## **Setting Up a Batch Prediction Job on VertexAI**

In [1]:
from google.cloud import aiplatform

""" Setting the location """
aiplatform.init(location='us-west1')

### Running a Batch prediction job required registering the model in Model registry. Later find the registered mode id under version details of the registerted model

In [2]:
""" Add model id """
registered_model_id = "$MODEL ID$" 
model_id = f'projects/ml-ops-segment-anything/locations/us-west1/models/{registered_model_id}'
model = aiplatform.Model(model_id)

In [3]:
model.supported_input_storage_formats

['jsonl', 'bigquery', 'csv', 'tf-record', 'tf-record-gzip', 'file-list']

### Here we are going to preprocess the batch of images and prepare a JSON file with all the preprocessed data

In [4]:
from google.cloud import storage
import base64
import json

# Initialize a client
storage_client = storage.Client()

""" Add your Bucket Name """
bucket_name = "$BUCKET_NAME$"

# Create a bucket object
bucket = storage_client.get_bucket(f"{bucket_name}")

# Create blob objects from the filepath
blobs = bucket.list_blobs(prefix="batch-prediction-images")

# Iterate over the blobs and filter based on file extension
image_extensions = ('.png', '.jpg', '.jpeg')

# collecting all image file names 
image_blobs = [blob for blob in blobs if blob.name.lower().endswith(image_extensions)]

# Download all images and create a base64 String of each image
image_base64_dict = {"instances": []}
for image_blob in image_blobs:
    # Creating base64 string of the image
    base64_str = base64.b64encode(image_blob.download_as_bytes()).decode('utf-8')
    
    image_base64_dict["instances"].append({
        'image': base64_str,
        'file_path': f"gs://{bucket_name}/batch-prediction-images/{image_blob.name}"
    })

# Save the preprocessed data into a JSON file
with open("batch_predict.jsonl", "w") as file:
    json.dump(image_base64_dict, file)

# Push the JSON file into cloud storage
blob = bucket.blob("batch_predict.jsonl")
blob.upload_from_filename("batch-prediction-images/batch_predict.jsonl")

In [5]:
""" Batch Prediction Job Name """
job_display_name = 'sam-batch-prediction'

""" Cloud Storage Preprocessed JSON data path"""
gcs_source = f'gs://{bucket_name}/batch-prediction-images/batch_predict.jsonl'

""" Batch prediction JSON results storage path"""
gcs_destination_prefix = f'gs://{bucket}/batch-prediction-images'

In [6]:
""" Setup the Batch Prediction Job """

#  Refer to GCP documentation for different machine types and accelerator types
batch_prediction_job = model.batch_predict(
    job_display_name=job_display_name,
    machine_type= "n1-standard-2", 
    accelerator_type= "NVIDIA_TESLA_T4", 
    accelerator_count= 1,
    gcs_source=gcs_source,
    gcs_destination_prefix=gcs_destination_prefix,
)

Creating BatchPredictionJob
BatchPredictionJob created. Resource name: projects/633534855904/locations/us-west1/batchPredictionJobs/3963534908969713664
To use this BatchPredictionJob in another session:
bpj = aiplatform.BatchPredictionJob('projects/633534855904/locations/us-west1/batchPredictionJobs/3963534908969713664')
View Batch Prediction Job:
https://console.cloud.google.com/ai/platform/locations/us-west1/batch-predictions/3963534908969713664?project=633534855904
BatchPredictionJob projects/633534855904/locations/us-west1/batchPredictionJobs/3963534908969713664 current state:
JobState.JOB_STATE_RUNNING
BatchPredictionJob projects/633534855904/locations/us-west1/batchPredictionJobs/3963534908969713664 current state:
JobState.JOB_STATE_RUNNING
BatchPredictionJob projects/633534855904/locations/us-west1/batchPredictionJobs/3963534908969713664 current state:
JobState.JOB_STATE_RUNNING
BatchPredictionJob projects/633534855904/locations/us-west1/batchPredictionJobs/3963534908969713664 c