#### Batch prediction

In [None]:
# Import Vertex AI library
from google.cloud import aiplatform

# Initialize connection
aiplatform.init(location='europe-west1')

In [None]:
# Get model that will make a batch prediction
model_id = 'projects/268076997885/locations/europe-west1/models/8895049068707840000'
model = aiplatform.Model(model_id)

In [None]:
# Check the supported batch prediction jobs input formats
model.supported_input_storage_formats

In [None]:
# Define required arguments for batch prediction job
job_display_name = 'stroke'
bigquery_source = 'bq://cloud4datascience.test_datasets.new_stroke_data'
bigquery_destination_prefix = 'bq://cloud4datascience.test_datasets'

In [None]:
# Create batch prediction job
batch_prediction_job = model.batch_predict(
    job_display_name=job_display_name,
    bigquery_source=bigquery_source,
    bigquery_destination_prefix=bigquery_destination_prefix,
)

#### Transforming raw results into final score table

In [None]:
# Get the job output information
batch_prediction_job.output_info

In [None]:
# Get it's type
type(batch_prediction_job.output_info)

In [None]:
# Convert output job information into string
output_info_str = str(batch_prediction_job.output_info)
output_info_str

In [None]:
# Split elements of the output by whitespace
output_info_list = output_info_str.split()
output_info_list

In [None]:
# Remove double quotes from strings
output_info_list = [x.replace('"', '') for x in output_info_list]
output_info_list

In [None]:
# Part which holds project and dataset name
dataset_str = output_info_list[1]

# Part which holds table name
table_str = output_info_list[3]

print(f'Project & dataset: {dataset_str} \nTable: {table_str}')

In [None]:
# Get everything after 'bg://' part from the dataset_str
import re

# Define regex pattern to look for
pattern = r'(?<=bq://).*'

# Run the regex search and show variable
result = re.search(pattern, dataset_str)
dataset_str = result.group(0)
dataset_str

In [None]:
# Define Table ID 
table_id = dataset_str + '.' + table_str
table_id

In [None]:
# Read bigquery library and define client
from google.cloud import bigquery
client = bigquery.Client()

In [None]:
# Define query which will create a table in BigQuery and run it
query = f"""
CREATE OR REPLACE TABLE test_datasets.stroke_predictions AS (
  SELECT id, predicted_stroke.scores[offset(1)] AS score, 
  FROM {table_id}
)
"""
client.query(query)

In [None]:
# Delete original raw table with results as it is no longer needed
client.delete_table(table_id)
print("Deleted table '{}'.".format(table_id))