# 04 - Prediction Serving

The purpose of the notebook is to show how to use the deployed model for online and batch prediction.
The notebook covers the following tasks:
1. Test the endpoints for online prediction.
2. Use the uploaded custom model for batch prediciton.

## Setup

In [None]:
import os
import time
from datetime import datetime
import tensorflow as tf

In [None]:
PROJECT = 'ksalama-cloudml'  # Change to your project Id.
REGION = 'us-central1'
BUCKET = 'ksalama-cloudml-us' # Change to your bucket.

BQ_DATASET_NAME = 'playground_us' # Change to your serving BigQuery dataset name.
BQ_TABLE_NAME = 'chicago_taxitrips_prep' # Change to your serving BigQuery table name.
MODEL_DISPLAY_NAME = 'chicago_taxi_tips_classifier_v1'
ENDPOINT_DISPLAY_NAME = 'chicago_taxi_tips_classification'

In [None]:
from src.utils.vertex_utils import VertexClient
vertex_client = VertexClient(PROJECT, REGION, BUCKET)

## 1. Making Online Predicitons


In [None]:
test_instances = [  
    
    {
        "dropoff_grid": ["POINT(-87.6 41.9)"],
        "euclidean": [2064.2696],
        "loc_cross": [""],
        "payment_type": ["Credit Card"],
        "pickup_grid": ["POINT(-87.6 41.9)"],
        "trip_miles": [1.37],
        "trip_day": [12],
        "trip_hour": [16],
        "trip_month": [2],
        "trip_day_of_week": [4],
        "trip_seconds": [555]
    }
]

In [None]:
predictions = vertex_client.predict(
    ENDPOINT_DISPLAY_NAME,
    test_instances).predictions

for prediction in predictions:
    print(prediction)
    

## 2. Batch Prediction

In [None]:
WORKSPACE = f"gs://{BUCKET}/ucaip_demo/chicago_taxi"
SERVING_DATA_DIR = os.path.join(WORKSPACE, 'serving_data')
SERVING_INPUT_DATA_DIR = os.path.join(SERVING_DATA_DIR, 'input_data')
SERVING_OUTPUT_DATA_DIR = os.path.join(SERVING_DATA_DIR, 'output_predictions')

RAW_SCHEMA_LOCATION = 'src/raw_schema/schema.pbtxt'

In [None]:
if tf.io.gfile.exists(SERVING_DATA_DIR):
    print("Removing previous serving data...")
    tf.io.gfile.rmtree(SERVING_DATA_DIR)
print("Creating preprocessing serving data directory...")
tf.io.gfile.mkdir(SERVING_DATA_DIR)

### Extract serving data to Cloud Storage as TFRecords

In [None]:
from src.utils import datasource_utils

In [None]:
DATA_SPLIT = 'TEST'
LIMIT = 10000

raw_data_query = datasource_utils.get_serving_source_query(
    bq_dataset_name=BQ_DATASET_NAME, 
    bq_table_name=BQ_TABLE_NAME,
    limit=LIMIT
)

print(raw_data_query)

In [None]:
args = {
    #'runner': 'DataflowRunner',
    'raw_schema_location': RAW_SCHEMA_LOCATION,
    'raw_data_query': raw_data_query,
    'exported_data_prefix': os.path.join(SERVING_INPUT_DATA_DIR, "data-"),
    'temporary_dir': os.path.join(WORKSPACE, 'tmp'),
    'gcs_location': os.path.join(WORKSPACE, 'bq_tmp'),
    'project': PROJECT,
    'region': REGION,
    'setup_file': './setup.py'
}

In [None]:
from src.preprocessing import etl

In [None]:
tf.get_logger().setLevel('ERROR')

print("Data extraction started...")
etl.run_extract_pipeline(args)
print("Data extraction completed.")

In [None]:
!gsutil ls {SERVING_INPUT_DATA_DIR}

### Prepare the batch prediction job

In [None]:
job_resources =  {
    "machine_type": 'n1-standard-2',
    #'accelerator_count': 1,
    #'accelerator_type': 'NVIDIA_TESLA_T4'
    "starting_replica_count": 1,
    "max_replica_count": 10,
}

### Submit the batch prediction job

In [None]:
batch_prediction_job = vertex_client.submit_batch_prediction_job(
    model_display_name=MODEL_DISPLAY_NAME, 
    gcs_source_pattern=SERVING_INPUT_DATA_DIR + '/*.jsonl', 
    gcs_destination_prefix=SERVING_OUTPUT_DATA_DIR,
    instances_format='jsonl',
    predictions_format='jsonl',
    other_configurations=job_resources,
    sync=True
)

In [None]:
!gsutil ls {SERVING_OUTPUT_DATA_DIR}/*