# Deploy Query Model to online endpoint with Monitoring

### Steps in this notebook:

* Deploy Query model to online prediction endpoint
* Setup model monitoring for online prediction endpoint

## Load env config

In [1]:
# naming convention for all cloud resources
VERSION        = "v1"                  # TODO
PREFIX         = f'ndr-{VERSION}'      # TODO

print(f"PREFIX = {PREFIX}")

PREFIX = ndr-v1


In [2]:
# staging GCS
GCP_PROJECTS             = !gcloud config get-value project
PROJECT_ID               = GCP_PROJECTS[0]

# GCS bucket and paths
BUCKET_NAME              = f'{PREFIX}-{PROJECT_ID}-bucket'
BUCKET_URI               = f'gs://{BUCKET_NAME}'

config = !gsutil cat {BUCKET_URI}/config/notebook_env.py
print(config.n)
exec(config.n)


PROJECT_ID               = "cpg-cdp"
PROJECT_NUM              = "939655404703"
LOCATION                 = "us-central1"

REGION                   = "us-central1"
BQ_LOCATION              = "US"
VPC_NETWORK_NAME         = "genai-haystack-vpc"

VERTEX_SA                = "939655404703-compute@developer.gserviceaccount.com"

PREFIX                   = "ndr-v1"
VERSION                  = "v1"

APP                      = "sp"
MODEL_TYPE               = "2tower"
FRAMEWORK                = "tfrs"
DATA_VERSION             = "v1"
TRACK_HISTORY            = "5"

BUCKET_NAME              = "ndr-v1-cpg-cdp-bucket"
BUCKET_URI               = "gs://ndr-v1-cpg-cdp-bucket"
SOURCE_BUCKET            = "spotify-million-playlist-dataset"

DATA_GCS_PREFIX          = "data"
DATA_PATH                = "gs://ndr-v1-cpg-cdp-bucket/data"
VOCAB_SUBDIR             = "vocabs"
VOCAB_FILENAME           = "vocab_dict.pkl"

CANDIDATE_PREFIX         = "candidates"
TRAIN_DIR_PREFIX         = "train"
VALID_DIR_PREFIX   

#### Edit these:

In [3]:
CREATE_NEW_ASSETS     = True # True | False
ENABLE_XAI_MONITORING = False # True | False

In [4]:
# local-train-v1/run-20230919-150451/candidates/candidate_embeddings.json

EXPERIMENT_NAME       = "scale-training-v1"         # local-train-v1" 
RUN_NAME              = "run-20231116-164100"  # "run-20230919-150451"

RUN_DIR_PATH = f'{EXPERIMENT_NAME}/{RUN_NAME}'

print(f"EXPERIMENT_NAME : {EXPERIMENT_NAME}")
print(f"RUN_NAME        : {RUN_NAME}")
print(f"RUN_DIR_PATH    : {RUN_DIR_PATH}")

EXPERIMENT_NAME : scale-training-v1
RUN_NAME        : run-20231116-164100
RUN_DIR_PATH    : scale-training-v1/run-20231116-164100


## Imports

In [5]:
import os
import sys
import time
import numpy as np
import pickle as pkl

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 

# google cloud SDKs
from google.cloud import storage
from google.cloud import aiplatform as vertex_ai
from google.cloud.aiplatform import model_monitoring

import tensorflow as tf

# this repo
from src.two_tower_jt import test_instances as test_instances
from util import feature_set_utils as feature_utils

2023-11-21 13:53:11.535263: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-21 13:53:11.535427: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-21 13:53:11.681360: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [6]:
vertex_ai.init(project=PROJECT_ID, location=LOCATION)

storage_client = storage.Client(project=PROJECT_ID)

# Deploy Query Model

## Register Query model to Vertex Model Registry

**TODO:** parametrize new vs existing assets

```
model = vertex_ai.Model.list(filter=f"display_name=bqml_fraud_classifier")[-1]
```

In [7]:
QUERY_MODEL_DIR = f"{BUCKET_URI}/{RUN_DIR_PATH}/model-dir/query_model"

print(f"QUERY_MODEL_DIR: {QUERY_MODEL_DIR}")

QUERY_MODEL_DIR: gs://ndr-v1-cpg-cdp-bucket/scale-training-v1/run-20231116-164100/model-dir/query_model


In [8]:
! gsutil ls $QUERY_MODEL_DIR

gs://ndr-v1-cpg-cdp-bucket/scale-training-v1/run-20231116-164100/model-dir/query_model/
gs://ndr-v1-cpg-cdp-bucket/scale-training-v1/run-20231116-164100/model-dir/query_model/saved_model.pb
gs://ndr-v1-cpg-cdp-bucket/scale-training-v1/run-20231116-164100/model-dir/query_model/assets/
gs://ndr-v1-cpg-cdp-bucket/scale-training-v1/run-20231116-164100/model-dir/query_model/variables/


In [9]:
DISPLAY_NAME = "two-tower-query-model-nov-2023-v2"
if CREATE_NEW_ASSETS == True:
    
    uploaded_query_model = vertex_ai.Model.upload(
        display_name=f'query_model_{DISPLAY_NAME}',
        artifact_uri=QUERY_MODEL_DIR,
        serving_container_image_uri=SERVING_IMAGE_URI_CPU,
        description="Top of the query tower, meant to return an embedding for each playlist instance",
        sync=True,
    )
else:
    # use existing
    uploaded_query_model = vertex_ai.Model('projects/934903580331/locations/us-central1/models/2404541769992634368@1')

print(f"display_name         : {uploaded_query_model.display_name}\n")
print(f"uploaded_query_model : {uploaded_query_model}")

Creating Model
Create Model backing LRO: projects/939655404703/locations/us-central1/models/8534899636983300096/operations/134669764717969408
Model created. Resource name: projects/939655404703/locations/us-central1/models/8534899636983300096@1
To use this Model in another session:
model = aiplatform.Model('projects/939655404703/locations/us-central1/models/8534899636983300096@1')
display_name         : query_model_two-tower-query-model-nov-2023-v2

uploaded_query_model : <google.cloud.aiplatform.models.Model object at 0x7f62c2847610> 
resource name: projects/939655404703/locations/us-central1/models/8534899636983300096


## Deploy registered model to online endpoint

**Create model endpoint**

In [9]:
SAMPLE_RATE = 1.0
BQ_PREDICTION_LOG_TABLE = f"{PROJECT_ID}.{BQ_DATASET}.req_resp"

if CREATE_NEW_ASSETS == True:

    endpoint = vertex_ai.Endpoint.create(
        display_name=f'endpoint_{DISPLAY_NAME}',
        project=PROJECT_ID,
        location=LOCATION,
        enable_request_response_logging=True,
        request_response_logging_sampling_rate=SAMPLE_RATE,
        request_response_logging_bq_destination_table=f"bq://{BQ_PREDICTION_LOG_TABLE}",
        sync=True,
    )

else:
    endpoint = vertex_ai.Endpoint('projects/939655404703/locations/us-central1/endpoints/6476864558437761024')

print(f"display_name : {endpoint.display_name}\n")
print(f"endpoint     : {endpoint}")

display_name : endpoint_two-tower-query-model-nov-2023-v2

endpoint     : <google.cloud.aiplatform.models.Endpoint object at 0x7f2411906c80> 
resource name: projects/939655404703/locations/us-central1/endpoints/6476864558437761024


**Deploy to endpoint**

In [None]:
if CREATE_NEW_ASSETS == False:
    
    deployed_query_model = uploaded_query_model.deploy(
        endpoint=endpoint,
        deployed_model_display_name=f'deployed_qmodel_{DISPLAY_NAME}',
        machine_type="n1-standard-4",
        min_replica_count=1,
        max_replica_count=2,
        accelerator_type=None,
        accelerator_count=0,
        sync=True,
        traffic_percentage=100
    )

else:
    deployed_query_model = vertex_ai.Endpoint('projects/939655404703/locations/us-central1/endpoints/6476864558437761024')

print(f"display_name         : {deployed_query_model.display_name}\n")
print(f"deployed_query_model : {deployed_query_model}")

#### list all model endpoints

In [12]:
# list_of_model_endpoints = deployed_query_model.list()
# list_of_model_endpoints[:5]

#### list all models on a single endpoint

In [13]:
# list_of_model_endpoints = deployed_query_model.list_models()
# list_of_model_endpoints #[:5]

## Centralized Model Monitoring - Create a Model Monitor

In [19]:
from google.cloud.aiplatform.private_preview.centralized_model_monitoring import model_monitor

MODEL_VERSION_ID = "1"
MONITOR_DISPLAY_NAME="query_model_spotify_monitor"

my_model_monitor = model_monitor.ModelMonitor.create(
    project=PROJECT_ID,
    location=REGION,
    display_name=MONITOR_DISPLAY_NAME,
    model_name=uploaded_query_model.resource_name,
    model_version_id=MODEL_VERSION_ID)
MODEL_MONITOR_RESOURCE_NAME = my_model_monitor.name
print(f"MODEL MONITOR {MODEL_MONITOR_RESOURCE_NAME} created.")

proto-plus module google.cloud.aiplatform.private_preview.centralized_model_monitoring.types.types has a declared manifest but Tensor is not in it


Creating ModelMonitor
Create ModelMonitor backing LRO: projects/939655404703/locations/us-central1/modelMonitors/3660300597145370624/operations/7898875522304704512
ModelMonitor created. Resource name: projects/939655404703/locations/us-central1/modelMonitors/3660300597145370624
To use this ModelMonitor in another session:
model_monitor = aiplatform.ModelMonitor('projects/939655404703/locations/us-central1/modelMonitors/3660300597145370624')
MODEL MONITOR 3660300597145370624 created.


#### Define the drift detection configuration

With the drift detection configuration, you define the input features and the associated thresholds for monitoring feature distribution drift.

In [66]:
# feature_names
feature_names = ['pl_duration_ms_new'
                  ,'num_pl_songs_new'
                  ,'num_pl_artists_new'
                  ,'num_pl_albums_new'
                ]

In [67]:
DRIFT_THRESHOLD_VALUE = 0.05
ATTRIBUTION_DRIFT_THRESHOLD_VALUE = 0.05

# =========================== #
##   Feature value drift     ##
# =========================== #
drift_thresholds = dict()

for feature in feature_names:
    if feature in drift_thresholds:
        print("feature name already in dict")
    else:
        drift_thresholds[feature] = DRIFT_THRESHOLD_VALUE
        
print(f"drift_thresholds      : {drift_thresholds}\n")

drift_thresholds      : {'pl_duration_ms_new': 0.05, 'num_pl_songs_new': 0.05, 'num_pl_artists_new': 0.05, 'num_pl_albums_new': 0.05}



In [70]:
import pandas as pd
TIMESTAMP = pd.Timestamp.utcnow().strftime('%Y%m%d%H%M%S')
DEFAULT_THRESHOLD_VALUE = 0.001
EMAIL = 'jwortz@google.com'
JOB_DISPLAY_NAME = f"spotify_query_model_monitoring_job_{TIMESTAMP}"

In [23]:
model_monitoring_job=my_model_monitor.run(
    display_name=JOB_DISPLAY_NAME,
    objective_config=model_monitor.spec.ObjectiveSpec(
        baseline=model_monitor.spec.MonitoringInput(
            table_uri=f"bq://{PROJECT_ID}.{BQ_DATASET}.{BQ_TABLE_VALID}"
        ),
        target=model_monitor.spec.MonitoringInput(
            endpoints=[endpoint.resource_name]
        ),
        feature_distribution_skew=model_monitor.spec.SkewSpec(
            default_threshold=DEFAULT_THRESHOLD_VALUE,
            feature_thresholds=drift_thresholds,
            # The data window of the serving data is "2h", indicating the selection of '2-hour' data windows before the current time for analysis.
            window="2h")
    ),
    notification_config=model_monitor.spec.NotificationSpec(
        user_emails=[EMAIL],
    ),
    output_config=model_monitor.spec.OutputSpec(
        gcs_base_dir=BUCKET_URI
    )
)

CMM_JOB_RESOURCE_NAME = model_monitoring_job.name
print(f"Model Monitoring Job {CMM_JOB_RESOURCE_NAME} created.")

ModelMonitoringJob created. Resource name: projects/939655404703/locations/us-central1/modelMonitors/3660300597145370624/modelMonitoringJobs/5662854318953332736
To use this ModelMonitoringJob in another session:
model_monitoring_job = aiplatform.ModelMonitoringJob('projects/939655404703/locations/us-central1/modelMonitors/3660300597145370624/modelMonitoringJobs/5662854318953332736')
Model Monitoring Job projects/939655404703/locations/us-central1/modelMonitors/3660300597145370624/modelMonitoringJobs/5662854318953332736 created.


## Create Scheduled Model Monitoring Job

In [68]:
CRON="0 * * * *" # @param {type:"string"} Every 1 hour at :00, for example 1:00, 2:00..
DATA_WINDOW="1h"
SCHEDULE_DISPLAY_NAME="query-endpoint-spotify-hourly"

In [71]:
model_monitoring_schedule=my_model_monitor.create_schedule(
    display_name=SCHEDULE_DISPLAY_NAME,
    cron=CRON,
    objective_config=model_monitor.spec.ObjectiveSpec(
        baseline=model_monitor.spec.MonitoringInput(
            table_uri=f"bq://{PROJECT_ID}.{BQ_DATASET}.{BQ_TABLE_VALID}"
        ),
        target=model_monitor.spec.MonitoringInput(
            endpoints=[endpoint.resource_name]
        ),
        feature_distribution_skew=model_monitor.spec.SkewSpec(
            default_threshold=DEFAULT_THRESHOLD_VALUE,
            feature_thresholds=drift_thresholds,
            # The data window of the serving data is "2h", indicating the selection of '2-hour' data windows before the current time for analysis.
            window="2h")
    ),
    notification_config=model_monitor.spec.NotificationSpec(
        user_emails=[EMAIL],
    ),
    output_config=model_monitor.spec.OutputSpec(
        gcs_base_dir=BUCKET_URI
    )
)

SCHEDULE_RESOURCE_NAME = model_monitoring_schedule.name
print(f"Schedule {SCHEDULE_RESOURCE_NAME} created.")

Schedule projects/939655404703/locations/us-central1/schedules/1465305952197541888 created.


# Test endpoint deployment

In [72]:
if TRACK_HISTORY == '5':
    TEST_INSTANCE = test_instances.TEST_INSTANCE_5
elif TRACK_HISTORY == '15':
    TEST_INSTANCE = test_instances.TEST_INSTANCE_15
else:
    TEST_INSTANCE = None
    print("Track History length not supported")
    
# TEST_INSTANCE

### Make prediction request

test single prediction request and response

In [73]:
response = endpoint.predict(instances=[TEST_INSTANCE])

prediction = response[0]

# print the prediction for the first instance
print(prediction[0])

[1.5727675, -0.478426367, -1.48506081, 1.63012981, 0.207987741, -1.07165945, 0.708537221, 0.973639607, -1.31733668, 0.430333912, 0.203536034, -0.991817415, -0.336415708, -0.364583969, 0.748859465, 0.32331267, -0.832917809, -0.1681014, 1.14483774, 1.42354131, -1.06543791, -0.393461972, -0.503103, 0.265378535, -0.196776718, 1.4200505, -0.852416337, 0.825526357, -0.284980953, -0.307609379, -0.0755918175, 0.626654208, -0.809113801, 0.353907973, -0.26276, -0.425157845, 0.919185758, -0.883253396, -0.334266454, 1.21176898, -0.112918615, 1.55699313, 1.79519391, -0.370241284, 0.287336051, -1.1449461, 1.0705148, -1.44784117, -1.67035174, -0.303792179, 0.358365715, 0.329541594, -0.668701887, 1.60419786, 1.69286656, 0.637520313, -1.33965516, -0.627404153, -1.1610347, -0.188887462, -0.617347896, -0.45977819, -1.65789604, -0.817034066, 0.0378097594, 0.906091452, 0.81617105, 1.2639792, 1.71842718, 1.15019393, 0.489773244, -0.546301603, 1.07499707, 0.187670663, 0.961778045, -0.759879291, -0.197109252,

### Write (many) test instances to file

> test endpoint monitoring with >= 1000 prediction requests

In [74]:
PRED_REQUEST_N = 50
INTERVAL       = PRED_REQUEST_N // 2
SKIP_N         = INTERVAL

print(f"PRED_REQUEST_N : {PRED_REQUEST_N}")
print(f"INTERVAL       : {INTERVAL}")

PRED_REQUEST_N : 50
INTERVAL       : 25


In [75]:
valid_files = []
for blob in storage_client.list_blobs(f"{BUCKET_NAME}", prefix=f'data/{DATA_VERSION}/{VALID_DIR_PREFIX}/'):
    if '.tfrecords' in blob.name:
        valid_files.append(blob.public_url.replace("https://storage.googleapis.com/", "gs://"))
    
valid = tf.data.TFRecordDataset(valid_files)

valid_parsed = valid.map(feature_utils.parse_towers_tfrecord)
# valid_parsed

In [76]:
import numpy

subset_val = valid_parsed.skip(SKIP_N).take(PRED_REQUEST_N)

list_of_dicts = []

for tensor_dict in subset_val:
    list_dict = {}
    td_keys = tensor_dict.keys()
    for k in td_keys:
        
        value = tensor_dict[k].numpy()
        
        if type(value) == bytes:

            list_dict.update({k: value.decode()})
        
        elif type(value) == numpy.ndarray:
            
            if type(value[0]) != bytes:
                list_dict.update({k: value.tolist()})
            else:

                tmp_list = []

                for ele in value:
                    tmp_list.append(ele.decode())

                list_dict.update({k: tmp_list})
                
        elif type(value) == numpy.float32:
            list_dict.update({k: value.item()})
                
        else:
            list_dict.update({k: value})
            
        list_of_dicts.append(list_dict)
    
# list_dict
len(list_of_dicts)

2600

In [77]:
count = 0

for test in list_of_dicts:
    response = endpoint.predict(instances=[test])
    
    if count > 0 and count % INTERVAL == 0:
        print(f"{count} prediciton requests..")
        
    count += 1
    
prediction = response[0]
# print the prediction for the first instance
print(prediction[0])

25 prediciton requests..
50 prediciton requests..
75 prediciton requests..
100 prediciton requests..
125 prediciton requests..
150 prediciton requests..
175 prediciton requests..
200 prediciton requests..
225 prediciton requests..
250 prediciton requests..
275 prediciton requests..
300 prediciton requests..
325 prediciton requests..
350 prediciton requests..
375 prediciton requests..
400 prediciton requests..
425 prediciton requests..
450 prediciton requests..
475 prediciton requests..
500 prediciton requests..
525 prediciton requests..
550 prediciton requests..
575 prediciton requests..
600 prediciton requests..
625 prediciton requests..
650 prediciton requests..
675 prediciton requests..
700 prediciton requests..
725 prediciton requests..
750 prediciton requests..
775 prediciton requests..
800 prediciton requests..
825 prediciton requests..
850 prediciton requests..
875 prediciton requests..
900 prediciton requests..
925 prediciton requests..
950 prediciton requests..
975 prediciton 

### Save test instances to pickle file

In [78]:
import pickle as pkl

LOCAL_INSTANCE_FILE = 'test_instance_list.pkl'

In [79]:
filehandler = open(LOCAL_INSTANCE_FILE, 'wb')
pkl.dump(list_of_dicts, filehandler)
filehandler.close()

In [80]:
filehandler = open(LOCAL_INSTANCE_FILE, 'rb')
LIST_OF_INSTANCES = pkl.load(filehandler)
filehandler.close()

In [81]:
# LIST_OF_INSTANCES[200]

In [82]:
ENDPOINT_TEST_SUBDIR = "endpoint-tests"

!gsutil -q cp $LOCAL_INSTANCE_FILE $BUCKET_URI/$ENDPOINT_TEST_SUBDIR/$LOCAL_INSTANCE_FILE

!gsutil ls $BUCKET_URI/$ENDPOINT_TEST_SUBDIR

gs://ndr-v1-cpg-cdp-bucket/endpoint-tests/test_instance_list.pkl


## Create skewed online query traffic

In [83]:
GENERATE_NEW_STATS       = True # True | False
SKEW_FEATURES_STATS_FILE = 'skew_feat_stats.pkl'

In [84]:
if GENERATE_NEW_STATS:
    
    valid_files = []
    for blob in storage_client.list_blobs(f"{BUCKET_NAME}", prefix=f'data/{DATA_VERSION}/{VALID_DIR_PREFIX}/'):
        if '.tfrecords' in blob.name:
            valid_files.append(blob.public_url.replace("https://storage.googleapis.com/", "gs://"))

    valid = tf.data.TFRecordDataset(valid_files)
    valid_parsed = valid.map(feature_utils.parse_towers_tfrecord)
    
    PRED_REQUEST_N = 5000
    valid_parsed = valid_parsed.take(PRED_REQUEST_N)
    
    # feature
    start_time = time.time()

    durations = np.concatenate(list(valid_parsed.map(lambda x: x["pl_duration_ms_new"]).batch(100)))
    mean_durations = durations.mean()
    std_durations = durations.std()

    num_songs = np.concatenate(list(valid_parsed.map(lambda x: x["num_pl_songs_new"]).batch(100)))
    mean_num_songs = num_songs.mean()
    std_num_songs = num_songs.std()

    num_artists = np.concatenate(list(valid_parsed.map(lambda x: x["num_pl_artists_new"]).batch(100)))
    mean_num_artists = num_artists.mean()
    std_num_artists = num_artists.std()

    num_albums = np.concatenate(list(valid_parsed.map(lambda x: x["num_pl_albums_new"]).batch(100)))
    mean_num_albums = num_albums.mean()
    std_num_albums = num_albums.std()
    
    end_time = time.time()
    elapsed_time = int((end_time - start_time) / 60)
    print(f"elapsed_time: {elapsed_time}")
    
    SKEW_FEATURES = {
        'pl_duration_ms_new': (mean_durations, std_durations),
        'num_pl_songs_new': (mean_num_songs, std_num_songs),
        'num_pl_artists_new': (mean_num_artists, std_num_artists),
        'num_pl_albums_new': (mean_num_albums, std_num_albums),
    }
    
else:
    
    filehandler = open(SKEW_FEATURES_STATS_FILE, 'rb')
    SKEW_FEATURES = pkl.load(filehandler)
    filehandler.close()
    
    mean_durations, std_durations = SKEW_FEATURES['pl_duration_ms_new']
    mean_num_songs, std_num_songs = SKEW_FEATURES['num_pl_songs_new']
    mean_num_artists, std_num_artists = SKEW_FEATURES['num_pl_artists_new']
    mean_num_albums, std_num_albums = SKEW_FEATURES['num_pl_albums_new']

print(f"mean_durations : {mean_durations}")
print(f"std_durations : {std_durations}")

print(f"mean_num_songs : {mean_num_songs}")
print(f"std_num_songs  : {std_num_songs}")

print(f"mean_num_artists : {mean_num_artists}")
print(f"std_num_artists  : {std_num_artists}")

print(f"mean_num_albums : {mean_num_albums}")
print(f"std_num_albums  : {std_num_albums}")

elapsed_time: 0
mean_durations : 16840084.0
std_durations : 15045907.0
mean_num_songs : 71.39299774169922
std_num_songs  : 61.79998779296875
mean_num_artists : 35.209999084472656
std_num_artists  : 29.72267723083496
mean_num_albums : 41.86000061035156
std_num_albums  : 36.4605598449707


In [85]:
def monitoring_test(endpoint, instances, skew_feat_stat, start=2, end=4):
    
    mean_durations, std_durations = skew_feat_stat['pl_duration_ms_new']
    mean_num_songs, std_num_songs = skew_feat_stat['num_pl_songs_new']
    mean_num_artists, std_num_artists = skew_feat_stat['num_pl_artists_new']
    mean_num_albums, std_num_albums = skew_feat_stat['num_pl_albums_new']
    print(f"std_durations   : {round(std_durations, 0)}")
    print(f"std_num_songs   : {round(std_num_songs, 0)}")
    print(f"std_num_artists : {round(std_num_artists, 0)}")
    print(f"std_num_albums  : {round(std_num_albums, 0)}\n")
    
    total_preds = 0
    
    for multiplier in range(start, end+1):

        print(f"multiplier: {multiplier}")

        pred_count = 0

        for example in instances:
            list_dict = {}

            example['pl_duration_ms_new'] = round(std_durations * multiplier, 0)
            example['num_pl_songs_new'] = round(std_num_songs * multiplier, 0)
            example['num_pl_artists_new'] = round(std_num_artists * multiplier, 0)
            example['num_pl_albums_new'] = round(std_num_albums * multiplier, 0)
            # list_of_skewed_instances.append(example)

            response = endpoint.predict(instances=[example])

            if pred_count > 0 and pred_count % 250 == 0:
                print(f"pred_count: {pred_count}")

            pred_count += 1
            total_preds += 1

        print(f"sent {pred_count} pred requests with {multiplier}X multiplier")
    
    print(f"sent {total_preds} total pred requests")

In [None]:
for _ in range(10):
    monitoring_test(
        endpoint=endpoint, 
        instances=LIST_OF_INSTANCES,
        skew_feat_stat=SKEW_FEATURES,
        start=2, 
        end=5
    )
    random_sleep_time = np.random.randint(3600*0.5)
    print("Sleeping for ", random_sleep_time, " seconds")
    time.sleep(random_sleep_time)

std_durations   : 15045907.0
std_num_songs   : 62.0
std_num_artists : 30.0
std_num_albums  : 36.0

multiplier: 2
pred_count: 250
pred_count: 500
pred_count: 750
pred_count: 1000
pred_count: 1250
pred_count: 1500
pred_count: 1750
pred_count: 2000
pred_count: 2250
pred_count: 2500
sent 2600 pred requests with 2X multiplier
multiplier: 3
pred_count: 250
pred_count: 500
pred_count: 750
pred_count: 1000
pred_count: 1250
pred_count: 1500
pred_count: 1750
pred_count: 2000
pred_count: 2250
pred_count: 2500
sent 2600 pred requests with 3X multiplier
multiplier: 4
pred_count: 250
pred_count: 500
pred_count: 750
pred_count: 1000
pred_count: 1250
pred_count: 1500
pred_count: 1750
pred_count: 2000
pred_count: 2250
pred_count: 2500
sent 2600 pred requests with 4X multiplier
multiplier: 5
pred_count: 250
pred_count: 500
pred_count: 750
pred_count: 1000
pred_count: 1250
pred_count: 1500
pred_count: 1750
pred_count: 2000
pred_count: 2250
pred_count: 2500
sent 2600 pred requests with 5X multiplier
sent 

# (Optional): Clean-up

In [64]:
from google.cloud import bigquery

# When no jobs are running, delete the schedule and all the jobs.
my_model_monitor.delete_schedule(SCHEDULE_RESOURCE_NAME)
my_model_monitor.delete_all_model_monitoring_jobs()
my_model_monitor.delete()

# Undeploy the model and delete the endpoint
endpoint.undeploy_all()
endpoint.delete()

# Delete the model
uploaded_query_model.delete()

# Delete BQ logging table
bqclient = bigquery.Client(project=PROJECT_ID)
# Delete the dataset (including all tables)
bqclient.delete_table(BQ_PREDICTION_LOG_TABLE, not_found_ok=True)

### Learn more about model monitoring

**Congratulations!** You've now learned what model monitoring is, how to configure and enable it, and how to find and interpret the results. Check out the following resources to learn more about model monitoring and ML Ops.

- [TensorFlow Data Validation](https://www.tensorflow.org/tfx/guide/tfdv)
- [Data Understanding, Validation, and Monitoring At Scale](https://blog.tensorflow.org/2018/09/introducing-tensorflow-data-validation.html)
- [Vertex Product Documentation](https://cloud.google.com/vertex-ai)
- [Vertex AI Model Monitoring Reference Docs](https://cloud.google.com/vertex-ai/docs/reference)
- [Vertex AI Model Monitoring blog article](https://cloud.google.com/blog/topics/developers-practitioners/monitor-models-training-serving-skew-vertex-ai)
- [Explainable AI Whitepaper](https://storage.googleapis.com/cloud-ai-whitepapers/AI%20Explainability%20Whitepaper.pdf)