# 07 - Prediction Serving

The purpose of the notebook is to show how to use the deployed model for online and batch prediction.
The notebook covers the following tasks:
1. Test the endpoints for online prediction.
2. Use the uploaded custom model for batch prediction.
3. Run a the batch prediction pipeline using `Vertex Pipelines`.

## Setup

### Import libraries

In [2]:
import os
from datetime import datetime
import tensorflow as tf

from google.cloud import aiplatform as vertex_ai

### Setup Google Cloud project

In [9]:
PROJECT = 'pbalm-cxb-aa'
REGION = 'europe-west4' 
BUCKET = PROJECT + '-eu'
SERVICE_ACCOUNT = "188940921537-compute@developer.gserviceaccount.com"

if PROJECT == "" or PROJECT is None or PROJECT == "[your-project-id]":
    # Get your GCP project id from gcloud
    shell_output = !gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT = shell_output[0]
    
if BUCKET == "" or BUCKET is None or BUCKET == "[your-bucket-name]":
    # Get your bucket name to GCP project id
    BUCKET = PROJECT
    # Try to create the bucket if it doesn't exists
    ! gsutil mb -l $REGION gs://$BUCKET
    print("")

if SERVICE_ACCOUNT == "" or SERVICE_ACCOUNT is None or SERVICE_ACCOUNT == "[your-service-account]":
    # Get your GCP project id from gcloud
    shell_output = !gcloud config list --format 'value(core.account)' 2>/dev/null
    SERVICE_ACCOUNT = shell_output[0]
    
print("Project ID:", PROJECT)
print("Region:", REGION)
print("Bucket name:", BUCKET)
print("Service Account:", SERVICE_ACCOUNT)

Project ID: pbalm-cxb-aa
Region: europe-west4
Bucket name: pbalm-cxb-aa-eu
Service Account: pbalm@google.com


### Set configurations

In [37]:
VERSION = 'v02'
DATASET_DISPLAY_NAME = 'creditcards'
MODEL_DISPLAY_NAME = f'{DATASET_DISPLAY_NAME}-classifier-{VERSION}'
ENDPOINT_DISPLAY_NAME = f'{DATASET_DISPLAY_NAME}-classifier'

SERVE_BQ_DATASET_NAME = 'vertex_eu'
SERVE_BQ_TABLE_NAME = 'creditcards_ml' # Change to your serving BigQuery table name.

## 1. Making Online Predicitons


In [38]:
vertex_ai.init(
    project=PROJECT,
    location=REGION,
    staging_bucket=BUCKET
)

endpoint_name = vertex_ai.Endpoint.list(
    filter=f'display_name={ENDPOINT_DISPLAY_NAME}', 
    order_by="update_time")[-1].gca_resource.name

endpoint = vertex_ai.Endpoint(endpoint_name)

In [39]:
test_instances = [{
    'V1': [-0.906611],
    'V2': [-0.906611],
    'V3': [-0.906611],
    'V4': [-0.906611],
    'V5': [-0.906611],
    'V6': [-0.906611],
    'V7': [-0.906611],
    'V8': [-0.906611],
    'V9': [-0.906611],
    'V10': [-0.906611],
    'V11': [-0.906611],
    'V12': [-0.906611],
    'V13': [-0.906611],
    'V14': [-0.906611],
    'V15': [-0.906611],
    'V16': [-0.906611],
    'V17': [-0.906611],
    'V18': [-0.906611],
    'V19': [-0.906611],
    'V20': [-0.906611],
    'V21': [-0.906611],
    'V22': [-0.906611],
    'V23': [-0.906611],
    'V24': [-0.906611],
    'V25': [-0.906611],
    'V26': [-0.906611],
    'V27': [-0.906611],
    'V28': [-0.906611],
    'Amount': [15.99]
}]


In [40]:
predictions = endpoint.predict(test_instances).predictions

for prediction in predictions:
    print(prediction)

{'classes': ['legit', 'fraudulent'], 'scores': [0.580836713, 0.419163287]}


In [41]:
explanations = endpoint.explain(test_instances).explanations

for explanation in explanations:
    print(explanation)

attributions {
  baseline_output_value: 0.6099663972854614
  instance_output_value: 0.5808367133140564
  feature_attributions {
    struct_value {
      fields {
        key: "Amount"
        value {
          list_value {
            values {
              number_value: -0.0299752414226532
            }
          }
        }
      }
      fields {
        key: "V1"
        value {
          list_value {
            values {
              number_value: 6.837844848632813e-05
            }
          }
        }
      }
      fields {
        key: "V10"
        value {
          list_value {
            values {
              number_value: -0.0001080513000488281
            }
          }
        }
      }
      fields {
        key: "V11"
        value {
          list_value {
            values {
              number_value: 0.0001684725284576416
            }
          }
        }
      }
      fields {
        key: "V12"
        value {
          list_value {
            values {
      

## 2. Batch Prediction

In [42]:
WORKSPACE = f"gs://{BUCKET}/{DATASET_DISPLAY_NAME}/"
SERVING_DATA_DIR = os.path.join(WORKSPACE, 'serving_data')
SERVING_INPUT_DATA_DIR = os.path.join(SERVING_DATA_DIR, 'input_data')
SERVING_OUTPUT_DATA_DIR = os.path.join(SERVING_DATA_DIR, 'output_predictions')

In [43]:
if tf.io.gfile.exists(SERVING_DATA_DIR):
    print("Removing previous serving data...")
    tf.io.gfile.rmtree(SERVING_DATA_DIR)
    
print("Creating serving data directory...")
tf.io.gfile.mkdir(SERVING_DATA_DIR)
print("Serving data directory is ready.")

Removing previous serving data...
Creating serving data directory...
Serving data directory is ready.


### Extract serving data to Cloud Storage as JSONL

In [44]:
from src.common import datasource_utils
from src.preprocessing import etl

In [45]:
LIMIT = 10000

sql_query = datasource_utils.get_serving_source_query(
    bq_dataset_name=SERVE_BQ_DATASET_NAME, 
    bq_table_name=SERVE_BQ_TABLE_NAME,
    limit=LIMIT
)

print(sql_query)


    SELECT *
    
    EXCEPT (Time, ML_use, Class)
    FROM vertex_eu.creditcards_ml 
    LIMIT 10000


In [46]:
job_name = f"extract-{DATASET_DISPLAY_NAME}-serving-{datetime.now().strftime('%Y%m%d%H%M%S')}"

args = {
    'job_name': job_name,
    #'runner': 'DataflowRunner',
    'sql_query': sql_query,
    'exported_data_prefix': os.path.join(SERVING_INPUT_DATA_DIR, "data-"),
    'temporary_dir': os.path.join(WORKSPACE, 'tmp'),
    'gcs_location': os.path.join(WORKSPACE, 'bq_tmp'),
    'project': PROJECT,
    'region': REGION,
    'setup_file': './setup.py'
}

In [47]:
tf.get_logger().setLevel('ERROR')

print("Data extraction started...")
etl.run_extract_pipeline(args)
print("Data extraction completed.")



Data extraction started...




Data extraction completed.


In [48]:
!gsutil ls {SERVING_INPUT_DATA_DIR}

gs://pbalm-cxb-aa-eu/creditcards/serving_data/input_data/data--00000-of-00001.jsonl


### Submit the batch prediction job

In [49]:
model_name =  vertex_ai.Model.list(
    filter=f'display_name={MODEL_DISPLAY_NAME}',
    order_by="update_time")[-1].gca_resource.name

In [None]:
job_resources =  {
    "machine_type": 'n1-standard-2',
    #'accelerator_count': 1,
    #'accelerator_type': 'NVIDIA_TESLA_T4'
    "starting_replica_count": 1,
    "max_replica_count": 10,
}

job_display_name = f"{MODEL_DISPLAY_NAME}-prediction-job-{datetime.now().strftime('%Y%m%d%H%M%S')}"

vertex_ai.BatchPredictionJob.create(
    job_display_name=job_display_name,
    model_name=model_name,
    gcs_source=SERVING_INPUT_DATA_DIR + '/*.jsonl',
    gcs_destination_prefix=SERVING_OUTPUT_DATA_DIR,
    instances_format='jsonl',
    predictions_format='jsonl',
    sync=True,
    **job_resources,
)

Creating BatchPredictionJob


INFO:google.cloud.aiplatform.jobs:Creating BatchPredictionJob


BatchPredictionJob created. Resource name: projects/188940921537/locations/europe-west4/batchPredictionJobs/3192367140061577216


INFO:google.cloud.aiplatform.jobs:BatchPredictionJob created. Resource name: projects/188940921537/locations/europe-west4/batchPredictionJobs/3192367140061577216


To use this BatchPredictionJob in another session:


INFO:google.cloud.aiplatform.jobs:To use this BatchPredictionJob in another session:


bpj = aiplatform.BatchPredictionJob('projects/188940921537/locations/europe-west4/batchPredictionJobs/3192367140061577216')


INFO:google.cloud.aiplatform.jobs:bpj = aiplatform.BatchPredictionJob('projects/188940921537/locations/europe-west4/batchPredictionJobs/3192367140061577216')


View Batch Prediction Job:
https://console.cloud.google.com/ai/platform/locations/europe-west4/batch-predictions/3192367140061577216?project=188940921537


INFO:google.cloud.aiplatform.jobs:View Batch Prediction Job:
https://console.cloud.google.com/ai/platform/locations/europe-west4/batch-predictions/3192367140061577216?project=188940921537


BatchPredictionJob projects/188940921537/locations/europe-west4/batchPredictionJobs/3192367140061577216 current state:
JobState.JOB_STATE_PENDING


INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/188940921537/locations/europe-west4/batchPredictionJobs/3192367140061577216 current state:
JobState.JOB_STATE_PENDING


BatchPredictionJob projects/188940921537/locations/europe-west4/batchPredictionJobs/3192367140061577216 current state:
JobState.JOB_STATE_RUNNING


INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/188940921537/locations/europe-west4/batchPredictionJobs/3192367140061577216 current state:
JobState.JOB_STATE_RUNNING


BatchPredictionJob projects/188940921537/locations/europe-west4/batchPredictionJobs/3192367140061577216 current state:
JobState.JOB_STATE_RUNNING


INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/188940921537/locations/europe-west4/batchPredictionJobs/3192367140061577216 current state:
JobState.JOB_STATE_RUNNING


BatchPredictionJob projects/188940921537/locations/europe-west4/batchPredictionJobs/3192367140061577216 current state:
JobState.JOB_STATE_RUNNING


INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/188940921537/locations/europe-west4/batchPredictionJobs/3192367140061577216 current state:
JobState.JOB_STATE_RUNNING


BatchPredictionJob projects/188940921537/locations/europe-west4/batchPredictionJobs/3192367140061577216 current state:
JobState.JOB_STATE_RUNNING


INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/188940921537/locations/europe-west4/batchPredictionJobs/3192367140061577216 current state:
JobState.JOB_STATE_RUNNING


BatchPredictionJob projects/188940921537/locations/europe-west4/batchPredictionJobs/3192367140061577216 current state:
JobState.JOB_STATE_RUNNING


INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/188940921537/locations/europe-west4/batchPredictionJobs/3192367140061577216 current state:
JobState.JOB_STATE_RUNNING


BatchPredictionJob projects/188940921537/locations/europe-west4/batchPredictionJobs/3192367140061577216 current state:
JobState.JOB_STATE_RUNNING


INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/188940921537/locations/europe-west4/batchPredictionJobs/3192367140061577216 current state:
JobState.JOB_STATE_RUNNING


BatchPredictionJob projects/188940921537/locations/europe-west4/batchPredictionJobs/3192367140061577216 current state:
JobState.JOB_STATE_RUNNING


INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/188940921537/locations/europe-west4/batchPredictionJobs/3192367140061577216 current state:
JobState.JOB_STATE_RUNNING


## 3. Run the batch prediction pipeline using Vertex Pipelines

In [None]:
WORKSPACE = f"gs://{BUCKET}/{DATASET_DISPLAY_NAME}/"
ARTIFACT_STORE = os.path.join(WORKSPACE, 'tfx_artifacts')
PIPELINE_NAME = f'{MODEL_DISPLAY_NAME}-predict-pipeline'

### Set the pipeline configurations for the Vertex AI run

In [None]:
os.environ["PROJECT"] = PROJECT
os.environ["REGION"] = REGION
os.environ["GCS_LOCATION"] = f"gs://{BUCKET}/{DATASET_DISPLAY_NAME}"
os.environ["MODEL_DISPLAY_NAME"] = MODEL_DISPLAY_NAME
os.environ["PIPELINE_NAME"] = PIPELINE_NAME
os.environ["ARTIFACT_STORE_URI"] = ARTIFACT_STORE
os.environ["BATCH_PREDICTION_BQ_DATASET_NAME"] = SERVE_BQ_DATASET_NAME
os.environ["BATCH_PREDICTION_BQ_TABLE_NAME"] = SERVE_BQ_TABLE_NAME
os.environ["SERVE_LIMIT"] = "1000"
os.environ["BEAM_RUNNER"] = "DirectRunner"
os.environ["TFX_IMAGE_URI"] = f"gcr.io/{PROJECT}/{DATASET_DISPLAY_NAME}:{VERSION}"

In [None]:
import importlib
from src.tfx_pipelines import config
importlib.reload(config)

for key, value in config.__dict__.items():
    if key.isupper(): print(f'{key}: {value}')

### (Optional) Build the ML container image

This is the `TFX` runtime environment for the training pipeline steps.

In [None]:
!echo $TFX_IMAGE_URI

In [None]:
!gcloud builds submit --tag $TFX_IMAGE_URI . --timeout=15m --machine-type=e2-highcpu-8

### Compile pipeline

In [None]:
from src.tfx_pipelines import runner

pipeline_definition_file = f'{config.PIPELINE_NAME}.json'
pipeline_definition = runner.compile_prediction_pipeline(pipeline_definition_file)

### Submit run to Vertex Pipelines

In [None]:
from kfp.v2.google.client import AIPlatformClient

pipeline_client = AIPlatformClient(
    project_id=PROJECT, region=REGION)
                 
pipeline_client.create_run_from_job_spec(
    job_spec_path=pipeline_definition_file,
    service_account=SERVICE_ACCOUNT
)