In [1]:
# Import Vertex AI library.
from google.cloud import aiplatform

# Initialize connection.
aiplatform.init(location='europe-west4')

In [2]:
# Get model that will make a batch prediction.
model_id = 'projects/481904805065/locations/europe-west4/models/5338955785596567552'
model = aiplatform.Model(model_id)

In [3]:
# Check the supported batch prediction jobs input formats.
model.supported_input_storage_formats

['jsonl']

In [4]:
# Define required arguments for batch prediction job.
job_display_name = 'lemon'
gcs_source = 'gs://c4ds-europe-west4/input_file.jsonl'
gcs_destination_prefix = 'gs://c4ds-europe-west4'

In [5]:
# Create batch prediction job.
batch_prediction_job = model.batch_predict(
    job_display_name=job_display_name,
    gcs_source=gcs_source,
    gcs_destination_prefix=gcs_destination_prefix,
)

Creating BatchPredictionJob
BatchPredictionJob created. Resource name: projects/481904805065/locations/europe-west4/batchPredictionJobs/751954902724378624
To use this BatchPredictionJob in another session:
bpj = aiplatform.BatchPredictionJob('projects/481904805065/locations/europe-west4/batchPredictionJobs/751954902724378624')
View Batch Prediction Job:
https://console.cloud.google.com/ai/platform/locations/europe-west4/batch-predictions/751954902724378624?project=481904805065
BatchPredictionJob projects/481904805065/locations/europe-west4/batchPredictionJobs/751954902724378624 current state:
JobState.JOB_STATE_RUNNING
BatchPredictionJob projects/481904805065/locations/europe-west4/batchPredictionJobs/751954902724378624 current state:
JobState.JOB_STATE_RUNNING
BatchPredictionJob projects/481904805065/locations/europe-west4/batchPredictionJobs/751954902724378624 current state:
JobState.JOB_STATE_RUNNING
BatchPredictionJob projects/481904805065/locations/europe-west4/batchPredictionJobs

In [40]:
# Get the job output folder
results_folder = batch_prediction_job.output_info.gcs_output_directory
results_folder

'gs://c4ds-europe-west4/prediction-lemon_quality_model-2022-12-06T12:46:06.043433Z'

In [41]:
# Create prefix to filter blobs
prefix_filter = results_folder.replace('gs://c4ds-europe-west4/','')
prefix_filter

'prediction-lemon_quality_model-2022-12-06T12:46:06.043433Z'

In [42]:
from google.cloud import storage

# Define Cloud Storage client and bucket to which files will be exported
client = storage.Client()
bucket = client.get_bucket('c4ds-europe-west4')

In [44]:
blobs = client.list_blobs('c4ds-europe-west4', prefix=prefix_filter)
for blob in blobs:
    json_data_string = blob.download_as_string()
    break
    # print(blob.name)

In [47]:
import ndjson

In [48]:
json_data = ndjson.loads(json_data_string) 

In [54]:
type(json_data)

list

In [60]:
for prediction in json_data:
    # prediction.instance
    print(prediction.get('instance').get('content'))

gs://c4ds-europe-west4/images_to_score/good_quality_722.jpg
gs://c4ds-europe-west4/images_to_score/bad_quality_124.jpg
gs://c4ds-europe-west4/images_to_score/bad_quality_591.jpg
gs://c4ds-europe-west4/images_to_score/bad_quality_649.jpg
gs://c4ds-europe-west4/images_to_score/bad_quality_559.jpg
gs://c4ds-europe-west4/images_to_score/good_quality_23.jpg
gs://c4ds-europe-west4/images_to_score/empty_background_169.jpg
gs://c4ds-europe-west4/images_to_score/bad_quality_476.jpg
gs://c4ds-europe-west4/images_to_score/empty_background_259.jpg
gs://c4ds-europe-west4/images_to_score/empty_background_203.jpg
gs://c4ds-europe-west4/images_to_score/good_quality_399.jpg
gs://c4ds-europe-west4/images_to_score/bad_quality_621.jpg
gs://c4ds-europe-west4/images_to_score/bad_quality_499.jpg
gs://c4ds-europe-west4/images_to_score/bad_quality_652.jpg
gs://c4ds-europe-west4/images_to_score/bad_quality_929.jpg
gs://c4ds-europe-west4/images_to_score/empty_background_137.jpg
gs://c4ds-europe-west4/images_to_s

In [50]:
list = []
for item in json_data:
    list.append(item)

In [52]:
list

[{'instance': {'content': 'gs://c4ds-europe-west4/images_to_score/good_quality_722.jpg'},
  'prediction': {'ids': ['1448585678873952256',
    '7213193201908187136',
    '6060271697301340160'],
   'displayNames': ['good', 'bad', 'empty'],
   'confidences': [0.99928063, 0.00071935914, 9.2256196e-11]}},
 {'instance': {'content': 'gs://c4ds-europe-west4/images_to_score/bad_quality_124.jpg'},
  'prediction': {'ids': ['7213193201908187136',
    '1448585678873952256',
    '6060271697301340160'],
   'displayNames': ['bad', 'good', 'empty'],
   'confidences': [0.99999976, 2.2686247e-07, 8.685552e-11]}},
 {'instance': {'content': 'gs://c4ds-europe-west4/images_to_score/bad_quality_591.jpg'},
  'prediction': {'ids': ['7213193201908187136',
    '1448585678873952256',
    '6060271697301340160'],
   'displayNames': ['bad', 'good', 'empty'],
   'confidences': [1.0, 2.3470442e-08, 2.36922e-14]}},
 {'instance': {'content': 'gs://c4ds-europe-west4/images_to_score/bad_quality_649.jpg'},
  'prediction': {

In [15]:
gcloud.__version__

NameError: name 'gcloud' is not defined

In [None]:
def list_blobs(bucket_name):
    """Lists all the blobs in the bucket."""
    # bucket_name = "your-bucket-name"

    storage_client = storage.Client()

    # Note: Client.list_blobs requires at least package version 1.17.0.
    blobs = storage_client.list_blobs(bucket_name)

    # Note: The call returns a response only when the iterator is consumed.
    for blob in blobs:
        print(blob.name)

In [7]:
# Get it's type
type(batch_prediction_job.output_info)

google.cloud.aiplatform_v1.types.batch_prediction_job.BatchPredictionJob.OutputInfo

In [9]:
batch_prediction_job.gca_resource

name: "projects/481904805065/locations/europe-west4/batchPredictionJobs/751954902724378624"
display_name: "lemon"
model: "projects/481904805065/locations/europe-west4/models/5338955785596567552"
input_config {
  instances_format: "jsonl"
  gcs_source {
    uris: "gs://c4ds-europe-west4/input_file.jsonl"
  }
}
output_config {
  predictions_format: "jsonl"
  gcs_destination {
    output_uri_prefix: "gs://c4ds-europe-west4"
  }
}
output_info {
  gcs_output_directory: "gs://c4ds-europe-west4/prediction-lemon_quality_model-2022-12-06T12:46:06.043433Z"
}
state: JOB_STATE_SUCCEEDED
completion_stats {
  successful_count: 2528
}
create_time {
  seconds: 1670330766
  nanos: 96229000
}
start_time {
  seconds: 1670330766
  nanos: 308936000
}
end_time {
  seconds: 1670333348
  nanos: 808285000
}
update_time {
  seconds: 1670333348
  nanos: 808285000
}
model_version_id: "1"

In [8]:
dir(batch_prediction_job)

['_FutureManager__latest_future',
 '_FutureManager__latest_future_lock',
 '__abstractmethods__',
 '__annotations__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_are_futures_done',
 '_assert_gca_resource_is_available',
 '_block_until_complete',
 '_cancel_method',
 '_complete_future',
 '_construct_sdk_resource_from_gapic',
 '_create',
 '_dashboard_uri',
 '_delete_method',
 '_empty_constructor',
 '_exception',
 '_format_resource_name',
 '_format_resource_name_method',
 '_gca_resource',
 '_generate_display_name',
 '_get_and_validate_project_location',
 '_get_gca_resource',
 '_getter_method',
 '_instantiate_client',
 '_job_type',
 '_latest_