In [41]:
# Import required libraries. 
import ndjson
import pandas as pd
from google.cloud import aiplatform
from google.cloud import storage

# Initialize Vertex AI connection.
aiplatform.init(location='europe-west4')

# Define Cloud Storage client.
client = storage.Client()

In [4]:
# Get the batch prediction job.
job_id = '751954902724378624'
batch_prediction_job = aiplatform.BatchPredictionJob(job_id)

In [11]:
# Get the job output folder.
results_folder = batch_prediction_job.output_info.gcs_output_directory
results_folder

'gs://c4ds-europe-west4/prediction-lemon_quality_model-2022-12-06T12:46:06.043433Z'

In [12]:
# Create prefix to filter blobs
prefix_filter = results_folder.replace('gs://c4ds-europe-west4/','')
prefix_filter

'prediction-lemon_quality_model-2022-12-06T12:46:06.043433Z'

In [42]:


# Define Cloud Storage client and bucket to which files will be exported
client = storage.Client()
bucket = client.get_bucket('c4ds-europe-west4')

In [37]:
# Create iterator. 
blobs = client.list_blobs('c4ds-europe-west4', prefix=prefix_filter)

# Show all files with predictions.
for blob in blobs:
    print(blob.name)

prediction-lemon_quality_model-2022-12-06T12:46:06.043433Z/predictions_00001.jsonl
prediction-lemon_quality_model-2022-12-06T12:46:06.043433Z/predictions_00002.jsonl
prediction-lemon_quality_model-2022-12-06T12:46:06.043433Z/predictions_00003.jsonl
prediction-lemon_quality_model-2022-12-06T12:46:06.043433Z/predictions_00004.jsonl
prediction-lemon_quality_model-2022-12-06T12:46:06.043433Z/predictions_00005.jsonl
prediction-lemon_quality_model-2022-12-06T12:46:06.043433Z/predictions_00006.jsonl
prediction-lemon_quality_model-2022-12-06T12:46:06.043433Z/predictions_00007.jsonl
prediction-lemon_quality_model-2022-12-06T12:46:06.043433Z/predictions_00008.jsonl
prediction-lemon_quality_model-2022-12-06T12:46:06.043433Z/predictions_00009.jsonl
prediction-lemon_quality_model-2022-12-06T12:46:06.043433Z/predictions_00010.jsonl


In [53]:
blobs = client.list_blobs('c4ds-europe-west4', prefix=prefix_filter)

# Display few predictions to show prediction format.
for blob in blobs:
    # Load data and transform into json
    json_data_string = blob.download_as_string()
    json_data = ndjson.loads(json_data_string)
    pred_examples = json_data[0:3]
    break
    
pred_examples

[{'instance': {'content': 'gs://c4ds-europe-west4/images_to_score/good_quality_722.jpg'},
  'prediction': {'ids': ['1448585678873952256',
    '7213193201908187136',
    '6060271697301340160'],
   'displayNames': ['good', 'bad', 'empty'],
   'confidences': [0.99928063, 0.00071935914, 9.2256196e-11]}},
 {'instance': {'content': 'gs://c4ds-europe-west4/images_to_score/bad_quality_124.jpg'},
  'prediction': {'ids': ['7213193201908187136',
    '1448585678873952256',
    '6060271697301340160'],
   'displayNames': ['bad', 'good', 'empty'],
   'confidences': [0.99999976, 2.2686247e-07, 8.685552e-11]}},
 {'instance': {'content': 'gs://c4ds-europe-west4/images_to_score/bad_quality_591.jpg'},
  'prediction': {'ids': ['7213193201908187136',
    '1448585678873952256',
    '6060271697301340160'],
   'displayNames': ['bad', 'good', 'empty'],
   'confidences': [1.0, 2.3470442e-08, 2.36922e-14]}}]

In [54]:
blobs = client.list_blobs('c4ds-europe-west4', prefix=prefix_filter)

# Create placeholders for prediction results.
cs_uri_list = []
predicted_label_list = []
prediction_confidence_list = []

# Iterate over each file. 
for blob in blobs:
    # Load data and transform into json
    json_data_string = blob.download_as_string()
    json_data = ndjson.loads(json_data_string) 
    
    # Iterate over each prediction in file.
    for prediction in json_data:
        # Get data from prediction
        cs_uri = prediction.get('instance').get('content')
        predicted_label = prediction.get('prediction').get('displayNames')[0] # This array is sorted so the highest probability label is always on 1st position.
        prediction_confidence = prediction.get('prediction').get('confidences')[0]
        
        # Append to appropriate list
        cs_uri_list.append(cs_uri)
        predicted_label_list.append(predicted_label)
        prediction_confidence_list.append(prediction_confidence)


In [55]:
# Create data frame with results. 
results_df = pd.DataFrame({'cs_uri': cs_uri_list, 'predicted_label': predicted_label_list, 'confidence': prediction_confidence_list})
results_df.head()

Unnamed: 0,cs_uri,predicted_label,confidence
0,gs://c4ds-europe-west4/images_to_score/good_qu...,good,0.999281
1,gs://c4ds-europe-west4/images_to_score/bad_qua...,bad,1.0
2,gs://c4ds-europe-west4/images_to_score/bad_qua...,bad,1.0
3,gs://c4ds-europe-west4/images_to_score/bad_qua...,bad,1.0
4,gs://c4ds-europe-west4/images_to_score/bad_qua...,bad,0.999999
