#### Batch prediction

In [1]:
# Import Vertex AI library
from google.cloud import aiplatform

# Initialize connection
aiplatform.init(location='europe-west1')

In [2]:
# Get model that will make a batch prediction
model_id = 'projects/268076997885/locations/europe-west1/models/8895049068707840000'
model = aiplatform.Model(model_id)

In [3]:
# Check the supported batch prediction jobs input formats
model.supported_input_storage_formats

['bigquery', 'csv', 'jsonl', 'tf-record']

In [4]:
# Define required arguments for batch prediction job
job_display_name = 'stroke'
bigquery_source = 'bq://cloud4datascience.test_datasets.new_stroke_data'
bigquery_destination_prefix = 'bq://cloud4datascience.test_datasets'

In [5]:
# Create batch prediction job
batch_prediction_job = model.batch_predict(
    job_display_name=job_display_name,
    bigquery_source=bigquery_source,
    bigquery_destination_prefix=bigquery_destination_prefix,
)

Creating BatchPredictionJob
BatchPredictionJob created. Resource name: projects/268076997885/locations/europe-west1/batchPredictionJobs/3678893338771062784
To use this BatchPredictionJob in another session:
bpj = aiplatform.BatchPredictionJob('projects/268076997885/locations/europe-west1/batchPredictionJobs/3678893338771062784')
View Batch Prediction Job:
https://console.cloud.google.com/ai/platform/locations/europe-west1/batch-predictions/3678893338771062784?project=268076997885
BatchPredictionJob projects/268076997885/locations/europe-west1/batchPredictionJobs/3678893338771062784 current state:
JobState.JOB_STATE_RUNNING
BatchPredictionJob projects/268076997885/locations/europe-west1/batchPredictionJobs/3678893338771062784 current state:
JobState.JOB_STATE_RUNNING
BatchPredictionJob projects/268076997885/locations/europe-west1/batchPredictionJobs/3678893338771062784 current state:
JobState.JOB_STATE_RUNNING
BatchPredictionJob projects/268076997885/locations/europe-west1/batchPredicti

#### Transforming raw results into final score table

In [18]:
# Get the job output information
batch_prediction_job.output_info

bigquery_output_dataset: "bq://cloud4datascience.test_datasets"
bigquery_output_table: "predictions_2022_07_19T08_46_24_441Z_075"

In [19]:
# Get it's type
type(batch_prediction_job.output_info)

google.cloud.aiplatform_v1.types.batch_prediction_job.BatchPredictionJob.OutputInfo

In [20]:
# Convert output job information into string
output_info_str = str(batch_prediction_job.output_info)
output_info_str

'bigquery_output_dataset: "bq://cloud4datascience.test_datasets"\nbigquery_output_table: "predictions_2022_07_19T08_46_24_441Z_075"\n'

In [21]:
# Split elements of the output by whitespace
output_info_list = output_info_str.split()
output_info_list

['bigquery_output_dataset:',
 '"bq://cloud4datascience.test_datasets"',
 'bigquery_output_table:',
 '"predictions_2022_07_19T08_46_24_441Z_075"']

In [22]:
# Remove double quotes from strings
output_info_list = [x.replace('"', '') for x in output_info_list]
output_info_list

['bigquery_output_dataset:',
 'bq://cloud4datascience.test_datasets',
 'bigquery_output_table:',
 'predictions_2022_07_19T08_46_24_441Z_075']

In [23]:
# Part which holds project and dataset name
dataset_str = output_info_list[1]

# Part which holds table name
table_str = output_info_list[3]

print(f'Project & dataset: {dataset_str} \nTable: {table_str}')

Project & dataset: bq://cloud4datascience.test_datasets 
Table: predictions_2022_07_19T08_46_24_441Z_075


In [24]:
# Get everything after 'bg://' part from the dataset_str
import re

# Define regex pattern to look for
pattern = r'(?<=bq://).*'

# Run the regex search and show variable
result = re.search(pattern, dataset_str)
dataset_str = result.group(0)
dataset_str

'cloud4datascience.test_datasets'

In [25]:
# Define Table ID 
table_id = dataset_str + '.' + table_str
table_id

'cloud4datascience.test_datasets.predictions_2022_07_19T08_46_24_441Z_075'

In [26]:
# Read bigquery library and define client
from google.cloud import bigquery
client = bigquery.Client()

In [30]:
# Define query which will create a table in BigQuery and run it
query = f"""
CREATE OR REPLACE TABLE test_datasets.stroke_predictions AS (
  SELECT id, predicted_stroke.scores[offset(1)] AS score, 
  FROM {table_id}
)
"""
client.query(query)

QueryJob<project=cloud4datascience, location=EU, id=539fce0d-be25-471b-a979-a478d443c245>

In [28]:
# Delete original raw table with results as it is no longer needed
client.delete_table(table_id)
print("Deleted table '{}'.".format(table_id))

Deleted table 'cloud4datascience.test_datasets.predictions_2022_07_19T08_46_24_441Z_075'.
