# Build custom container for Vertex Prediction

In [3]:
!pwd

/home/jupyter/tf_vertex_agents/02-perarm-features-bandit


In [4]:
# PREFIX = 'mabv1'
VERSION        = "v2"                       # TODO
PREFIX         = f'rec-bandits-{VERSION}'   # TODO

print(f"PREFIX: {PREFIX}")

PREFIX: rec-bandits-v2


In [5]:
# staging GCS
GCP_PROJECTS             = !gcloud config get-value project
PROJECT_ID               = GCP_PROJECTS[0]

# GCS bucket and paths
BUCKET_NAME              = f'{PREFIX}-{PROJECT_ID}-bucket'
BUCKET_URI               = f'gs://{BUCKET_NAME}'

config = !gsutil cat {BUCKET_URI}/config/notebook_env.py
print(config.n)
exec(config.n)


PROJECT_ID               = "hybrid-vertex"
PROJECT_NUM              = "934903580331"
LOCATION                 = "us-central1"

REGION                   = "us-central1"
BQ_LOCATION              = "US"
VPC_NETWORK_NAME         = "ucaip-haystack-vpc-network"

VERTEX_SA                = "934903580331-compute@developer.gserviceaccount.com"

PREFIX                   = "rec-bandits-v2"
VERSION                  = "v2"

BUCKET_NAME              = "rec-bandits-v2-hybrid-vertex-bucket"
BUCKET_URI               = "gs://rec-bandits-v2-hybrid-vertex-bucket"
DATA_GCS_PREFIX          = "data"
DATA_PATH                = "gs://rec-bandits-v2-hybrid-vertex-bucket/data"
VOCAB_SUBDIR             = "vocabs"
VOCAB_FILENAME           = "vocab_dict.pkl"
DATA_PATH_KFP_DEMO       = "gs://rec-bandits-v2-hybrid-vertex-bucket/data/kfp_demo_data/u.data"

VPC_NETWORK_FULL         = "projects/934903580331/global/networks/ucaip-haystack-vpc-network"

BIGQUERY_DATASET_NAME    = "mvlens_rec_bandits_v2"
BIGQUERY_TABLE_NA

In [4]:
# # TODO incorporate to 00-env-setup
# DOCKERNAME_02_PRED = 'Dockerfile_predict_mab_02e'
# IMAGE_NAME_02_PRED = "pred-perarm-feats-02e"
# IMAGE_URI_02_PRED  = f"gcr.io/hybrid-vertex/{IMAGE_NAME_02_PRED}"
# REMOTE_IMAGE_NAME  = f"{REGION}-docker.pkg.dev/{PROJECT_ID}/{REPOSITORY}/{IMAGE_NAME_02_PRED}"

# print(f"DOCKERNAME_02_PRED = {DOCKERNAME_02_PRED}")
# print(f"REPOSITORY         = {REPOSITORY}")
# print(f"IMAGE_NAME_02_PRED = {IMAGE_NAME_02_PRED}")
# print(f"IMAGE_URI_02_PRED  = {IMAGE_URI_02_PRED}")
# print(f"REMOTE_IMAGE_NAME  = {REMOTE_IMAGE_NAME}")

DOCKERNAME_02_PRED = Dockerfile_predict_mab_02e
REPOSITORY         = rl-movielens-rec-bandits-v2
IMAGE_NAME_02_PRED = pred-perarm-feats-02e
IMAGE_URI_02_PRED  = gcr.io/hybrid-vertex/pred-perarm-feats-02e
REMOTE_IMAGE_NAME  = us-central1-docker.pkg.dev/hybrid-vertex/rl-movielens-rec-bandits-v2/pred-perarm-feats-02e


run this in terminal from root to clear `__pycache__` files...

In [None]:
# find . | grep -E "(/__pycache__$|\.pyc$|\.pyo$)" | xargs rm -rf

### Setup credentials

Setting up credentials is only required to run the custom serving container locally with GCS paths. Credentials set up is required to execute the `Predictor`'s `load` function, which downloads the model artifacts from Google Cloud Storage.

To access Google Cloud Storage in your project, you'll need to set up credentials by using one of the following:

1. User account
2. Service account

You can learn more about each of the above [here](https://cloud.google.com/docs/authentication#principals)

Option 1: Use Google user credentials

In [None]:
# !gcloud auth application-default login
# !gcloud auth login

# USER_ACCOUNT = "TODO_USER_GCP_LOGIN"  # TODO - 00-env-setup

# !gcloud projects add-iam-policy-binding $PROJECT_ID \
#     --member=user:$USER_ACCOUNT \
#     --role=roles/storage.admin

Option 2: Use Google Service Account credentials

In [None]:
# !gcloud services enable iam.googleapis.com
# !gcloud auth login

# !gcloud projects add-iam-policy-binding $PROJECT_ID \
#     --member=serviceAccount:$VERTEX_SA \
#     --role=roles/storage.admin

Create credentials file

In [None]:
path="/home/jupyter/tf_vertex_agents/src"
os.chdir(path)

In [81]:
CREDENTIALS_FILE = "./credentials.json"

# !gcloud iam service-accounts keys create $CREDENTIALS_FILE \
#     --iam-account=$VERTEX_SA

### Create Artifact Repository
If you don't have an existing artifact repository, create one using the gcloud command below

In [5]:
# ! gcloud artifacts repositories create $REPOSITORY --repository-format=docker --location=$LOCATION

# Custom Prediction Routine (CPR)

### references

* [src code](https://github.com/googleapis/python-aiplatform/tree/main/google/cloud/aiplatform/prediction)
* [docs](https://cloud.google.com/vertex-ai/docs/predictions/custom-prediction-routines#run_the_container_locally_optional)
* code examples
  * [SDK_Custom_Predict_and_Handler_SDK_Integration](https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/prediction/custom_prediction_routines/SDK_Custom_Predict_and_Handler_SDK_Integration.ipynb)
  * [SDK_Custom_Preprocess](https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/ef8b70db32813b8a2f128ab5ef1d170aea739e7f/notebooks/community/prediction/custom_prediction_routines/SDK_Custom_Preprocess.ipynb)
  
**In the built image, user provided files will be copied as follows:**

```
    container_workdir/
    |-- predictor.py
    |-- requirements.txt
    |-- user_code/
    |   |-- utils.py
    |   |-- custom_package.tar.gz
    |   |-- ...
    |-- ...
```

## Imports

In [90]:
import os
import sys
import numpy as np
import pickle as pkl
from pprint import pprint

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
os.environ["PROJECT_ID"]=PROJECT_ID

# tensorflow
import tensorflow as tf
from tf_agents.policies import py_tf_eager_policy

# google cloud
from google.cloud import storage
from google.cloud import aiplatform as vertex_ai
from google.cloud.aiplatform.utils import prediction_utils

# GPU
from numba import cuda 
import gc

# this repo
# from per_arm_rl import data_utils as data_utils
# # from per_arm_rl import data_config
# from per_arm_rl import train_utils as train_utils
# from pred import emb_features_pred as emb_features
# from perarm_features import reward_factory as reward_factory

sys.path.append("..")
from src.per_arm_rl import data_utils
from src.per_arm_rl import data_config
from src.per_arm_rl import train_utils as train_utils

from src.perarm_features import reward_factory as reward_factory
from src.perarm_features import emb_features as emb_features

In [None]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

In [None]:
device = cuda.get_current_device()
device.reset()
gc.collect()

In [9]:
storage_client = storage.Client(project=PROJECT_ID)

## Set vars

In [10]:
path="/home/jupyter/tf_vertex_agents/src"
os.chdir(path)
print(os.getcwd())

/home/jupyter/tf_vertex_agents/src


In [11]:
# DOCKERNAME_02_PRED_CPR = 'Dockerfile_cpr'
IMAGE_NAME_02_PRED_CPR = "cpr-perarm-bandit-02e"
IMAGE_URI_02_PRED_CPR  = f"gcr.io/hybrid-vertex/{IMAGE_NAME_02_PRED_CPR}"
REMOTE_IMAGE_NAME_CPR  = f"{REGION}-docker.pkg.dev/{PROJECT_ID}/{REPOSITORY}/{IMAGE_NAME_02_PRED_CPR}"

print(f"REPOSITORY             = {REPOSITORY}")
# print(f"DOCKERNAME_02_PRED_CPR = {DOCKERNAME_02_PRED_CPR}")
print(f"IMAGE_NAME_02_PRED_CPR = {IMAGE_NAME_02_PRED_CPR}")
print(f"IMAGE_URI_02_PRED_CPR  = {IMAGE_URI_02_PRED_CPR}")
print(f"REMOTE_IMAGE_NAME_CPR  = {REMOTE_IMAGE_NAME_CPR}")

REPOSITORY             = rl-movielens-rec-bandits-v2
IMAGE_NAME_02_PRED_CPR = cpr-perarm-bandit-02e
IMAGE_URI_02_PRED_CPR  = gcr.io/hybrid-vertex/cpr-perarm-bandit-02e
REMOTE_IMAGE_NAME_CPR  = us-central1-docker.pkg.dev/hybrid-vertex/rl-movielens-rec-bandits-v2/cpr-perarm-bandit-02e


## (Optional) Local inference example

> Before writting the CPR container, let's make sure we understand the steps we need to replicate in that container

### Create test data

In [14]:
LOCAL_VOCAB_FILENAME = './vocab_dict.pkl'

filehandler = open(f"{LOCAL_VOCAB_FILENAME}", 'rb')
vocab_dict = pkl.load(filehandler)
filehandler.close()

In [15]:
SPLIT = "val"
eval_batch_size = 1
NUM_EVAL_STEPS = 20

val_files = []
for blob in storage_client.list_blobs(f"{BUCKET_NAME}", prefix=f'{DATA_GCS_PREFIX}/{SPLIT}'):
    if '.tfrecord' in blob.name:
        val_files.append(blob.public_url.replace("https://storage.googleapis.com/", "gs://"))
        
val_dataset = tf.data.TFRecordDataset(val_files)
val_dataset = val_dataset.map(data_utils.parse_tfrecord, num_parallel_calls=tf.data.AUTOTUNE)

In [16]:
NUM_OOV_BUCKETS        = 1
GLOBAL_EMBEDDING_SIZE  = 16
MV_EMBEDDING_SIZE      = 32 #32

embs = emb_features.EmbeddingModel(
    vocab_dict = vocab_dict,
    num_oov_buckets = NUM_OOV_BUCKETS,
    global_emb_size = GLOBAL_EMBEDDING_SIZE,
    mv_emb_size = MV_EMBEDDING_SIZE,
)

embs

<src.perarm_features.emb_features.EmbeddingModel at 0x7f57e2c98ca0>

In [335]:
ARTIFACTS_DIR = "gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v2/run-20231115-094131/artifacts"

test_policy = py_tf_eager_policy.SavedModelPyTFEagerPolicy(ARTIFACTS_DIR, load_specs_from_pbtxt=True)
test_policy

<tf_agents.policies.py_tf_eager_policy.SavedModelPyTFEagerPolicy at 0x7f7c635ae890>

### Inference (policy action)

In [1291]:
INFER_SIZE      = 1
EVAL_BATCH_SIZE = 1
NUM_EVAL_STEPS  = 5

PER_ARM_DIM     = 64
GLOBAL_DIM      = 64

eval_ds = val_dataset.batch(EVAL_BATCH_SIZE)
eval_ds = eval_ds.take(NUM_EVAL_STEPS)
eval_ds

<_TakeDataset element_spec={'bucketized_user_age': TensorSpec(shape=(None,), dtype=tf.float32, name=None), 'movie_genres': TensorSpec(shape=(None, 1), dtype=tf.int64, name=None), 'movie_id': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'timestamp': TensorSpec(shape=(None,), dtype=tf.int64, name=None), 'user_id': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'user_occupation_text': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'user_rating': TensorSpec(shape=(None,), dtype=tf.float32, name=None)}>

In [1292]:
dummy_arm = tf.zeros([INFER_SIZE, PER_ARM_DIM], dtype=tf.float32)

for x in eval_ds.take(INFER_SIZE):
    global_feat_infer = embs._get_global_context_features(x)
    arm_feat_infer = embs._get_per_arm_features(x)
    
    rewards = reward_factory._get_rewards(x)
    actual_reward = rewards.numpy()[0] # get actual reward
    
    # reshape arm features
    arm_feat_infer = tf.reshape(arm_feat_infer, [EVAL_BATCH_SIZE, PER_ARM_DIM]) # perarm_dim
    concat_arm = tf.concat([arm_feat_infer, dummy_arm], axis=0)
    
    # flatten global
    flat_global_infer = tf.reshape(global_feat_infer, [GLOBAL_DIM])
    feature = {'global': flat_global_infer, 'per_arm': concat_arm}
    
    # build trajectory step
    trajectory_step = train_utils._get_eval_step(feature, actual_reward)
    
    prediction = test_policy.action(trajectory_step)

prediction

PolicyStep(action=array(0, dtype=int32), state=(), info=PerArmPolicyInfo(log_probability=(), predicted_rewards_mean=array([2.4577115, 2.4577115], dtype=float32), multiobjective_scalarized_predicted_rewards_mean=(), predicted_rewards_optimistic=(), predicted_rewards_sampled=(), bandit_policy_type=array([1], dtype=int32), chosen_arm_features=array([-2.09365617e-02,  1.48380436e-02,  4.38927449e-02, -1.62509568e-02,
        4.12502177e-02, -1.96358319e-02,  1.89190395e-02, -1.97972544e-02,
        1.07204206e-02, -2.90853623e-02, -5.21492958e-03, -4.96399291e-02,
       -3.84666212e-02,  3.22810523e-02,  6.33295625e-03, -1.96670890e-02,
       -1.56960599e-02,  3.25680114e-02,  1.57541893e-02, -3.18676122e-02,
       -3.89801636e-02, -2.12526806e-02, -1.22037306e-02,  2.59828456e-02,
        4.26850952e-02,  1.31589882e-02, -6.71111420e-03,  1.19521245e-02,
       -7.05723837e-03,  1.45995654e-02, -8.55533034e-03, -3.35612185e-02,
       -1.97723154e-02,  3.51027399e-03,  1.06675625e-02, 

For the endpoint, we need the policy action (prediction) to be JSON serializable, so:
* for `np.array` convert with `tolist()`
* for integer wrap with `int()`

In [1316]:
print(f"bandit_policy_type     : {prediction.info.bandit_policy_type[0]}")
print(f"chosen_arm_features    : {prediction.info.chosen_arm_features.tolist()}")
print(f"predicted_rewards_mean : {prediction.info.predicted_rewards_mean.tolist()}")
print(f"action                 : {prediction.action.tolist()}")

bandit_policy_type     : 1
chosen_arm_features    : [-0.02093656174838543, 0.014838043600320816, 0.043892744928598404, -0.016250956803560257, 0.04125021770596504, -0.01963583193719387, 0.018919039517641068, -0.019797254353761673, 0.010720420628786087, -0.02908536233007908, -0.0052149295806884766, -0.049639929085969925, -0.038466621190309525, 0.0322810523211956, 0.006332956254482269, -0.019667088985443115, -0.01569605991244316, 0.032568011432886124, 0.01575418934226036, -0.0318676121532917, -0.038980163633823395, -0.02125268056988716, -0.012203730642795563, 0.025982845574617386, 0.042685095220804214, 0.013158988207578659, -0.006711114197969437, 0.011952124536037445, -0.00705723837018013, 0.014599565416574478, -0.008555330336093903, -0.03356121852993965, -0.01977231539785862, 0.0035102739930152893, 0.010667562484741211, 0.0403297059237957, 0.011608172208070755, 0.040256235748529434, -4.451349377632141e-05, -0.0020690560340881348, -0.04746899753808975, 0.0052065737545490265, -0.0069525837

In [None]:
# type(prediction.info.chosen_arm_features)
type(prediction.info.bandit_policy_type.tolist())

In [None]:
prediction.info.bandit_policy_type[0]
type(prediction.info.bandit_policy_type[0])

So we'll need to post-process the policy action (prediction) to a dict:

In [1320]:
NEW_DICT = {
    "bandit_policy_type" : int(prediction.info.bandit_policy_type[0]),
    "chosen_arm_features" : prediction.info.chosen_arm_features.tolist(),
    "predicted_rewards_mean" : prediction.info.predicted_rewards_mean.tolist(),
    "action" : int(prediction.action.tolist()),
}
# NEW_DICT

And, the final step of the CPR will be serialized with `json.dumps()` 

In [1321]:
json_instance = json.dumps({"prediction": NEW_DICT})
json_instance

'{"prediction": {"bandit_policy_type": 1, "chosen_arm_features": [-0.02093656174838543, 0.014838043600320816, 0.043892744928598404, -0.016250956803560257, 0.04125021770596504, -0.01963583193719387, 0.018919039517641068, -0.019797254353761673, 0.010720420628786087, -0.02908536233007908, -0.0052149295806884766, -0.049639929085969925, -0.038466621190309525, 0.0322810523211956, 0.006332956254482269, -0.019667088985443115, -0.01569605991244316, 0.032568011432886124, 0.01575418934226036, -0.0318676121532917, -0.038980163633823395, -0.02125268056988716, -0.012203730642795563, 0.025982845574617386, 0.042685095220804214, 0.013158988207578659, -0.006711114197969437, 0.011952124536037445, -0.00705723837018013, 0.014599565416574478, -0.008555330336093903, -0.03356121852993965, -0.01977231539785862, 0.0035102739930152893, 0.010667562484741211, 0.0403297059237957, 0.011608172208070755, 0.040256235748529434, -4.451349377632141e-05, -0.0020690560340881348, -0.04746899753808975, 0.0052065737545490265, 

In [434]:
x

{'bucketized_user_age': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([25.], dtype=float32)>,
 'movie_genres': <tf.Tensor: shape=(1, 1), dtype=int64, numpy=array([[4]])>,
 'movie_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'211'], dtype=object)>,
 'timestamp': <tf.Tensor: shape=(1,), dtype=int64, numpy=array([874948475])>,
 'user_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'346'], dtype=object)>,
 'user_occupation_text': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'other'], dtype=object)>,
 'user_rating': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([4.], dtype=float32)>}

### (optional) execute step-by-step

> only needed if wanting to better understand dims/shapes etc.

In [388]:
# data_list = [] 

for i in range(2):
    
    iterator = iter(val_dataset.batch(1))
    data = next(iterator)
    data_list.append(data)

data
# data_list

[{'bucketized_user_age': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([25.], dtype=float32)>,
  'movie_genres': <tf.Tensor: shape=(1, 1), dtype=int64, numpy=array([[4]])>,
  'movie_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'211'], dtype=object)>,
  'timestamp': <tf.Tensor: shape=(1,), dtype=int64, numpy=array([874948475])>,
  'user_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'346'], dtype=object)>,
  'user_occupation_text': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'other'], dtype=object)>,
  'user_rating': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([4.], dtype=float32)>},
 {'bucketized_user_age': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([25.], dtype=float32)>,
  'movie_genres': <tf.Tensor: shape=(1, 1), dtype=int64, numpy=array([[4]])>,
  'movie_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'211'], dtype=object)>,
  'timestamp': <tf.Tensor: shape=(1,), dtype=int64, numpy=array([874948475])>,
  'user_id': <tf.Tens

In [338]:
global_feat_infer = embs._get_global_context_features(data)
print(f"global_feat_infer.shape = {global_feat_infer.shape}")

arm_feat_infer = embs._get_per_arm_features(data)
print(f"arm_feat_infer.shape = {arm_feat_infer.shape}")

rewards = reward_factory._get_rewards(data)
print(f"rewards.numpy()[0] = {rewards.numpy()[0]}")

(1, 64)

In [343]:
dummy_arm = tf.zeros([1, PER_ARM_DIM], dtype=tf.float32)

# reshape arm features
arm_feat_infer = tf.reshape(arm_feat_infer, [EVAL_BATCH_SIZE, PER_ARM_DIM]) # perarm_dim
concat_arm = tf.concat([arm_feat_infer, dummy_arm], axis=0)

# flatten global
flat_global_infer = tf.reshape(global_feat_infer, [GLOBAL_DIM])
feature = {'global': flat_global_infer, 'per_arm': concat_arm}
# feature

In [346]:
from tf_agents.trajectories import time_step as ts

# get trajectory step for prediction
def _get_pred_step(feature, reward_np):
    
    infer_step = ts.TimeStep(
        tf.constant(ts.StepType.FIRST, dtype=tf.int32, shape=[],name='step_type'),
        tf.constant(reward_np, dtype=tf.float32, shape=[], name='reward'),
        tf.constant(1.0, dtype=tf.float32, shape=[], name='discount'),
        feature
    )
    
    return infer_step

trajectory_step = _get_pred_step(feature, actual_reward)
# trajectory_step

In [348]:
prediction = test_policy.action(trajectory_step)
prediction

PolicyStep(action=array(0, dtype=int32), state=(), info=PerArmPolicyInfo(log_probability=(), predicted_rewards_mean=array([2.4577115, 2.4577115], dtype=float32), multiobjective_scalarized_predicted_rewards_mean=(), predicted_rewards_optimistic=(), predicted_rewards_sampled=(), bandit_policy_type=array([1], dtype=int32), chosen_arm_features=array([-2.09365617e-02,  1.48380436e-02,  4.38927449e-02, -1.62509568e-02,
        4.12502177e-02, -1.96358319e-02,  1.89190395e-02, -1.97972544e-02,
        1.07204206e-02, -2.90853623e-02, -5.21492958e-03, -4.96399291e-02,
       -3.84666212e-02,  3.22810523e-02,  6.33295625e-03, -1.96670890e-02,
       -1.56960599e-02,  3.25680114e-02,  1.57541893e-02, -3.18676122e-02,
       -3.89801636e-02, -2.12526806e-02, -1.22037306e-02,  2.59828456e-02,
        4.26850952e-02,  1.31589882e-02, -6.71111420e-03,  1.19521245e-02,
       -7.05723837e-03,  1.45995654e-02, -8.55533034e-03, -3.35612185e-02,
       -1.97723154e-02,  3.51027399e-03,  1.06675625e-02, 

### (optional) understand the pre-processing steps

> this will be the global and arm sampling functions from `EmbeddingModel()` class in `src/../emb_features.py`

```
        self._embs = emb_features.EmbeddingModel(
            vocab_dict = vocab_dict,
            num_oov_buckets = num_oov_buckets,
            global_emb_size = global_embedding_size,
            mv_emb_size = mv_embedding_size,
        )
```

This is the embbedding representation we need:

In [None]:
for x in eval_ds:
    print(x["user_id"])
    print(embs.user_id_model(x["user_id"]))
    
    break
    
# embs.user_id_model(tf.constant(TEST_INSTANCE['instances'][0]['user_id']))

In [None]:
# data_list[0]['user_id'].numpy()[0]
# data_list[0]['user_occupation_text'].numpy()[0]
# value_test = data_list[0]['movie_id'].numpy()[0]
# type(data_list[0]['movie_id'].numpy()[0])
# type(data_list[0]['user_rating'].numpy()[0])
# type(data_list[0]['timestamp'].numpy()[0])
# type(data_list[0]['movie_genres'].numpy()[0])
# data_list[0]['movie_genres'].numpy().tolist()[0]

**sample test instance** (prediction request)

In [None]:
TEST_INSTANCE = {
    "instances": [
        {
            'bucketized_user_age': 25.0,
            'movie_genres': [4],
            'movie_id': '211',
            'timestamp': 874948475,
            'user_id': '346',
            'user_occupation_text': 'other',
            'user_rating': 4.0
        }
    ]
}

TEST_INSTANCE['instances']

**preprocessing steps to implement in CPR**

In [None]:
rebuild_ex = {}

for x in TEST_INSTANCE['instances']:
    rebuild_ex['bucketized_user_age'] = tf.constant([x["bucketized_user_age"]], dtype=tf.float32)
    rebuild_ex['movie_genres'] = tf.constant([x["movie_genres"]], dtype=tf.int64)
    rebuild_ex['movie_id'] = tf.constant([x["movie_id"]], dtype=tf.string)
    rebuild_ex['timestamp'] = tf.constant([x["timestamp"]], dtype=tf.int64)
    rebuild_ex['user_id'] = tf.constant([x["user_id"]], dtype=tf.string)
    rebuild_ex['user_occupation_text'] = tf.constant([x["user_occupation_text"]], dtype=tf.string)
    rebuild_ex['user_rating'] = tf.constant([x["user_rating"]], dtype=tf.float32)
    print(embs.user_id_model(rebuild_ex['user_id']))
    
# rebuild_ex

**...which will be converted to embeddings like this:**

In [None]:
print(embs.user_id_model(rebuild_ex['user_id']).numpy())
print(embs.user_age_model(rebuild_ex['bucketized_user_age']).numpy())
print(embs.user_occ_model(rebuild_ex['user_occupation_text']).numpy())
print(embs.user_ts_model(rebuild_ex['timestamp']).numpy())
print(embs.mv_id_model(rebuild_ex['movie_id']).numpy())
print(embs.mv_gen_model(rebuild_ex['movie_genres']).numpy())

### (optional) understand CPR's model download steps

> one of the steps in our CPR will be to download our trained policy with `prediction_utils.download_model_artifacts()`; let's see what that looks like locally

In [32]:
path="/home/jupyter/tf_vertex_agents/src"
os.chdir(path)
print(os.getcwd())

/home/jupyter/tf_vertex_agents/src


In [33]:
# for testing
LOCAL_MODEL_ARTIFACTS_DIR = "local_model_dir"

! rm -rf ./$LOCAL_MODEL_ARTIFACTS_DIR
! mkdir ./$LOCAL_MODEL_ARTIFACTS_DIR

Ultimately we'll need to call `py_tf_eager_policy.SavedModelPyTFEagerPolicy()` in our CPR...

We can't just pass the `ARTIFACTS_DIR` because that would result in the CPR container's `model_dir` to look like this:

```
cpr_model_dir/
├── fingerprint.pb
├── policy_specs.pbtxt
├── saved_model.pb
└── variables
    ├── variables.data-00000-of-00001
    └── variables.index
```

Instead, we need the CPR container's `model_dir` to have a subdirectory holding these files like:

```
cpr_model_dir/
└── artifacts
    ├── fingerprint.pb
    ├── policy_specs.pbtxt
    ├── saved_model.pb
    └── variables
        ├── variables.data-00000-of-00001
        └── variables.index
```
.. this is compatible with `py_tf_eager_policy.SavedModelPyTFEagerPolicy()`

In [34]:
BASE_OUTPUT_URI = "gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v2/run-20231115-094131"

ARTIFACTS_DIR = f"{BASE_OUTPUT_URI}/artifacts"
POLICY_SERVE_DIR_URI = f"{BASE_OUTPUT_URI}/policy-server"

# !gsutil cp -r $ARTIFACTS_DIR $POLICY_SERVE_DIR_URI/

!gsutil ls $BASE_OUTPUT_URI

gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v2/run-20231115-094131/
gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v2/run-20231115-094131/artifacts/
gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v2/run-20231115-094131/logs/
gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v2/run-20231115-094131/policy-server/
gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v2/run-20231115-094131/root/


Let's test this...

In [45]:
%cd $LOCAL_MODEL_ARTIFACTS_DIR
# prediction_utils.download_model_artifacts(POLICY_SERVE_DIR_URI)

/home/jupyter/tf_vertex_agents/src/local_model_dir


In [46]:
!ls -p

artifacts/


In [47]:
# for testing
deployment_policy = py_tf_eager_policy.SavedModelPyTFEagerPolicy('artifacts', load_specs_from_pbtxt=True)
deployment_policy

<tf_agents.policies.py_tf_eager_policy.SavedModelPyTFEagerPolicy at 0x7f58a2608790>

# Create CPR directory

The CPR directory's structure will be the prediction serving container

Becasue we are going to use the `build_cpr_model()` method for `LocalModel()`, it need to resemble:

```
            container_workdir/
            |-- predictor.py
            |-- requirements.txt
            |-- user_code/
            |   |-- utils.py
            |   |-- custom_package.tar.gz
            |   |-- ...
            |-- ...
```

see `build_cpr_model()` [src](https://github.com/googleapis/python-aiplatform/blob/main/google/cloud/aiplatform/prediction/local_model.py#L147)

If you skipped the optional CPR model download steps above, load a trained policy here:

> TODO: edit the BASE_OUTPUT_DIR to reflect a trained policy in your project

In [49]:
BASE_OUTPUT_URI = "gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v2/run-20231115-094131"
ARTIFACTS_DIR = f"{BASE_OUTPUT_URI}/artifacts"

deployment_policy = py_tf_eager_policy.SavedModelPyTFEagerPolicy(ARTIFACTS_DIR, load_specs_from_pbtxt=True)
deployment_policy

<tf_agents.policies.py_tf_eager_policy.SavedModelPyTFEagerPolicy at 0x7f58a21e6170>

In [50]:
path="/home/jupyter/tf_vertex_agents/src"
os.chdir(path)
print(os.getcwd())

/home/jupyter/tf_vertex_agents/src


In [51]:
LOCAL_CPR_DIR = "cpr_dir"
CPR_SUBDIR = "user_code"

In [52]:
! rm -rf ./$LOCAL_CPR_DIR
! mkdir ./$LOCAL_CPR_DIR
! mkdir ./$LOCAL_CPR_DIR/$CPR_SUBDIR

## Predictor

* Implement a custom `Predictor` that loads in the preprocesor. The preprocessor will then be used at `preprocess` time
* Note, the `PredictionHandle`r will be used for prediction request handling, and the following will be executed:

> `self._predictor.postprocess(self._predictor.predict(self._predictor.preprocess(prediction_input)))`

**references**
* predictor_utils - [src](https://github.com/googleapis/python-aiplatform/blob/main/google/cloud/aiplatform/utils/prediction_utils.py)

In [53]:
os.chdir(path)
!pwd

/home/jupyter/tf_vertex_agents/src


In [54]:
!ls $LOCAL_CPR_DIR

user_code


In [55]:
%%writefile $LOCAL_CPR_DIR/predictor.py
import os
import sys
import logging
import numpy as np
import pickle as pkl
from typing import Dict, Any, Tuple

logging.disable(logging.WARNING)

# google cloud
from google.cloud.aiplatform.prediction.predictor import Predictor
from google.cloud.aiplatform.utils import prediction_utils
from google.cloud import storage

# tensorflow
import tensorflow as tf
import tf_agents
from tf_agents.policies import py_tf_eager_policy
from tf_agents.trajectories import time_step as ts

# this repo
sys.path.extend([f'./{name}' for name in os.listdir(".") if os.path.isdir(name)])

import data_config as data_config
import emb_features_pred as emb_features
import reward_factory as reward_factory

os.environ["PROJECT_ID"] = data_config.PROJECT_ID

# ==================================
# get trajectory step for prediction
# ==================================
def _get_pred_step(feature, reward_np):
    
    infer_step = ts.TimeStep(
        tf.constant(ts.StepType.FIRST, dtype=tf.int32, shape=[],name='step_type'),
        tf.constant(reward_np, dtype=tf.float32, shape=[], name='reward'),
        tf.constant(1.0, dtype=tf.float32, shape=[], name='discount'),
        feature
    )
    
    return infer_step

# ==================================
# prediction logic
# ==================================
class BanditPolicyPredictor(Predictor):
    
    """
    Interface of the Predictor class for Custom Prediction Routines.
    
    The Predictor is responsible for the ML logic for processing a prediction request.
    
    Specifically, the Predictor must define:
        (1) How to load all model artifacts used during prediction into memory.
        (2) The logic that should be executed at predict time.
    
    When using the default PredictionHandler, the Predictor will be invoked as follows:
    
      predictor.postprocess(predictor.predict(predictor.preprocess(prediction_input)))
    
    """
    
    def __init__(self):
        
        self._local_vocab_filename = "./vocab_dict.pkl"
        self._num_oov_buckets = data_config.NUM_OOV_BUCKETS
        self._global_embedding_size = data_config.GLOBAL_EMBEDDING_SIZE
        self._mv_embedding_size = data_config.MV_EMBEDDING_SIZE
        return
        
    def load(self, artifacts_uri: str):
        """
        Loads trained policy dir & vocabulary
        Args:
            artifacts_uri (str):
                Required. The value of the environment variable AIP_STORAGE_URI.
                has `artifacts/` as a sub directory 
        
        """
        prediction_utils.download_model_artifacts(artifacts_uri)
        
        # init deploy policy
        self._deployment_policy = py_tf_eager_policy.SavedModelPyTFEagerPolicy(
            'artifacts', load_specs_from_pbtxt=True
        )
        
        # load vocab dict
        filehandler = open(f"{self._local_vocab_filename}", 'rb')
        self._vocab_dict = pkl.load(filehandler)
        filehandler.close()
        
        # only if no custom preprocessor is defined
        # self._preprocessor = preprocessor
        
    def preprocess(self, prediction_input: Dict): # -> Tuple[Dict, float]:
        """
        Args:
            prediction_input (Any):
                Required. The prediction input that needs to be preprocessed.
        Returns:
            The preprocessed prediction input.        
        """
        # inputs = super().preprocess(prediction_input)
        
        dummy_arm = tf.zeros([1, data_config.PER_ARM_DIM], dtype=tf.float32)
        
        batch_size = len(prediction_input) #["instances"])
        assert batch_size == 1, 'prediction batch_size must be == 1'
        
        self._embs = emb_features.EmbeddingModel(
            vocab_dict = self._vocab_dict,
            num_oov_buckets = self._num_oov_buckets,
            global_emb_size = self._global_embedding_size,
            mv_emb_size = self._mv_embedding_size,
        )
        
        # preprocess example
        rebuild_ex = {}

        for x in prediction_input: #["instances"]:
            rebuild_ex['bucketized_user_age'] = tf.constant([x["bucketized_user_age"]], dtype=tf.float32)
            rebuild_ex['movie_genres'] = tf.constant([x["movie_genres"]], dtype=tf.int64)
            rebuild_ex['movie_id'] = tf.constant([x["movie_id"]], dtype=tf.string)
            rebuild_ex['timestamp'] = tf.constant([x["timestamp"]], dtype=tf.int64)
            rebuild_ex['user_id'] = tf.constant([x["user_id"]], dtype=tf.string)
            rebuild_ex['user_occupation_text'] = tf.constant([x["user_occupation_text"]], dtype=tf.string)
            rebuild_ex['user_rating'] = tf.constant([x["user_rating"]], dtype=tf.float32)
        
        global_feat_infer = self._embs._get_global_context_features(rebuild_ex)
        logging.info(f'global_feat_infer: {global_feat_infer}')          # tmp - debugging
        
        arm_feat_infer = self._embs._get_per_arm_features(rebuild_ex)    # tmp - debugging
        logging.info(f'arm_feat_infer: {arm_feat_infer}')
    
        rewards = reward_factory._get_rewards(rebuild_ex)
        logging.info(f'rewards: {rewards}')                              # tmp - debugging
        
        actual_reward = rewards.numpy()[0]
        logging.info(f'actual_reward: {actual_reward}')                  # tmp - debugging
        
        arm_feat_infer = tf.reshape(arm_feat_infer, [1, data_config.PER_ARM_DIM])
        concat_arm = tf.concat([arm_feat_infer, dummy_arm], axis=0)      # tmp - debugging
        
        # flatten global
        flat_global_infer = tf.reshape(global_feat_infer, [data_config.GLOBAL_DIM])
        feature = {'global': flat_global_infer, 'per_arm': concat_arm}
        logging.info(f'feature: {feature}')                              # tmp - debugging
        
        trajectory_step = _get_pred_step(feature, actual_reward)
        logging.info(f'trajectory_step: {trajectory_step}')
        
        # prediction = self._deployment_policy.action(trajectory_step)
        
        return trajectory_step
    
    def predict(self, instances) -> Dict:
        """
        Performs prediction i.e., policy takes action
        """
        # prediction = self._deployment_policy.action(instances) # trajectory_step
        # return {"predictions": prediction}
        return self._deployment_policy.action(instances)
        

    def postprocess(self, prediction_results: Any) -> Any:
        """ 
        Postprocesses the prediction results
        
        TODO:
             Convert predictions to item IDs
             
        """
        processed_pred_dict = {
            "bandit_policy_type" : int(prediction_results.info.bandit_policy_type[0]),
            "chosen_arm_features" : prediction_results.info.chosen_arm_features.tolist(),
            "predicted_rewards_mean" : prediction_results.info.predicted_rewards_mean.tolist(),
            "action" : int(prediction_results.action.tolist()),
        }
        
        return processed_pred_dict

Writing cpr_dir/predictor.py


## Entrypoint / Handler

Custom containers require an **entrypoint** of the image that starts the model server
* With Custom Prediction Routines (CPR), you **don't need to write the entrypoint** anymore. Vertex SDK will populate the entrypoint with the custom predictor you provide
* However, we *can* implement a custom `handler()` method for the CPR model server, instead of using a pre-built http request handler. 
  * The `handler()` method handles the extraction of the prediction request from the HTTP request message
  * Will also, call the `predictor()` method to pass the extraction instances data for the prediction request
  
For implementing our own Docker build process, see "Scenario 4" in [getting started with cpr](https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/ml_ops/stage6/get_started_with_cpr.ipynb) notebook tutorial

In [56]:
!pwd

/home/jupyter/tf_vertex_agents/src


In [57]:
%%writefile $LOCAL_CPR_DIR/handler.py

import json
import logging
from fastapi import Response
from google.cloud.aiplatform.prediction.handler import PredictionHandler

class CprHandler(PredictionHandler):
    """
    Default prediction handler for the pred requests sent to the application
    """

    async def handle(self, request):
        """Handles a prediction request."""
        
        request_body = await request.body()
        logging.info(f'request_body: {request_body}')
        
        request_body_dict = json.loads(request_body)
        logging.info(f'request_body_dict: {request_body_dict}')
        
        instances=request_body_dict["instances"]
        logging.info(f'instances: {instances}')
        
        prediction_results = self._predictor.postprocess(self._predictor.predict(self._predictor.preprocess(instances)))
                                                         
        logging.info(f'prediction: {prediction_results}')

        return Response(content=json.dumps(prediction_results))
        # return {"predictions": prediction_results}

Writing cpr_dir/handler.py


## CPR package

In [58]:
!pwd

/home/jupyter/tf_vertex_agents/src


### data config

> TODO - edit these as needed

In [59]:
%%writefile $LOCAL_CPR_DIR/$CPR_SUBDIR/data_config.py
PROJECT_ID             = "hybrid-vertex"
REGION                 = "us-central1"
PREFIX                 = "rec-bandits-v2"
BUCKET_NAME            = "rec-bandits-v2-hybrid-vertex-bucket"
EXISTING_VOCAB_FILE    = "gs://rec-bandits-v2-hybrid-vertex-bucket/vocabs/vocab_dict.pkl"
eval_batch_size        = 1
PER_ARM_DIM            = 64
GLOBAL_DIM             = 64
NUM_OOV_BUCKETS        = 1
GLOBAL_EMBEDDING_SIZE  = 16
MV_EMBEDDING_SIZE      = 32

Writing cpr_dir/user_code/data_config.py


### requirements.txt

In [60]:
%%writefile $LOCAL_CPR_DIR/requirements.txt
google-cloud-aiplatform[prediction]==1.33.1
google-cloud-storage
numpy
six
typing-extensions
tensorflow==2.13.1
tf-agents==0.17.0
urllib3
pillow
tensorflow-io
tensorflow-datasets
tensorflow-probability

Writing cpr_dir/requirements.txt


### copy remaining files to CPR dir

In [61]:
! cp ./perarm_features/reward_factory.py ./$LOCAL_CPR_DIR/$CPR_SUBDIR/reward_factory.py
! cp ./perarm_features/emb_features.py ./$LOCAL_CPR_DIR/$CPR_SUBDIR/emb_features_pred.py
! cp ./vocab_dict.pkl ./$LOCAL_CPR_DIR/vocab_dict.pkl

In [64]:
!tree $LOCAL_CPR_DIR

[01;34mcpr_dir[00m
├── handler.py
├── predictor.py
├── requirements.txt
├── [01;34muser_code[00m
│   ├── data_config.py
│   ├── emb_features_pred.py
│   └── reward_factory.py
└── vocab_dict.pkl

1 directory, 7 files


# Build and push CPR container to Vertex

* `LocalModel` [src](https://github.com/googleapis/python-aiplatform/blob/main/google/cloud/aiplatform/prediction/local_model.py)

**Build container**
* To build a custom container, we also need to write an entrypoint of the image that starts the model server. 
* However, with the Custom Prediction Routine feature, you don't need to write the entrypoint anymore. 
* Vertex AI SDK will populate the entrypoint with the custom predictor you provide.

### References

**build_cpr_model**
```
    local_model = LocalModel.build_cpr_model(
        "./user_src_dir",
        "us-docker.pkg.dev/$PROJECT/$REPOSITORY/$IMAGE_NAME$",
        predictor=$CUSTOM_PREDICTOR_CLASS,
        requirements_path="./user_src_dir/requirements.txt",
        extra_packages=["./user_src_dir/user_code/custom_package.tar.gz"],
    )
```

```
Args:
    src_dir (str):
        Required. The path to the local directory including all needed files such as
        predictor. The whole directory will be copied to the image.
    output_image_uri (str):
        Required. The image uri of the built image.
    predictor (Type[Predictor]):
        Optional. The custom predictor class consumed by handler to do prediction.
    handler (Type[Handler]):
        Required. The handler class to handle requests in the model server.
    base_image (str):
        Required. The base image used to build the custom images. The base image must
        have python and pip installed where the two commands ``python`` and ``pip`` must be
        available.
    requirements_path (str):
        Optional. The path to the local requirements.txt file. This file will be copied
        to the image and the needed packages listed in it will be installed.
    extra_packages (List[str]):
        Optional. The list of user custom dependency packages to install.
    no_cache (bool):
        Required. Do not use cache when building the image. Using build cache usually
        reduces the image building time. See
        https://docs.docker.com/develop/develop-images/dockerfile_best-practices/#leverage-build-cache
        for more details.
        
Returns:
    local model: Instantiated representation of the local model.
```

## Create example prediction instance

Create two formats:
* json file
* serialized dictionary

In [75]:
import json
import requests

In [73]:
path="/home/jupyter/tf_vertex_agents/src"
os.chdir(path)
print(os.getcwd())

/home/jupyter/tf_vertex_agents/src


In [76]:
TEST_INSTANCE = {
    "instances": [
        {
            'bucketized_user_age': 25.0,
            'movie_genres': [4],
            'movie_id': '211',
            'timestamp': 874948475,
            'user_id': '346',
            'user_occupation_text': 'other',
            'user_rating': 4.0
        }
    ]
}

# TEST_INSTANCE['instances']
json_instance = json.dumps({"instances": TEST_INSTANCE['instances']})
json_instance

'{"instances": [{"bucketized_user_age": 25.0, "movie_genres": [4], "movie_id": "211", "timestamp": 874948475, "user_id": "346", "user_occupation_text": "other", "user_rating": 4.0}]}'

In [77]:
INPUT_FILE = "instances.json"

with open(INPUT_FILE, "w") as f:
    json_dumps_str = json.dumps(TEST_INSTANCE)
    f.write(json_dumps_str)

In [None]:
# %%writefile $INPUT_FILE
# {
#     "instances": [
#         'bucketized_user_age': 25.0,
#         'movie_genres': [4],
#         'movie_id': '211',
#         'timestamp': 874948475,
#         'user_id': '346',
#         'user_occupation_text': 'other',
#         'user_rating': 4.0
#     ]
# }

## Local build

In [65]:
# path ="/home/jupyter/tf_vertex_agents/src"
# os.chdir(path)
!pwd

/home/jupyter/tf_vertex_agents/src


In [66]:
!ls $LOCAL_CPR_DIR

handler.py  predictor.py  requirements.txt  user_code  vocab_dict.pkl


In [68]:
from google.cloud.aiplatform.prediction import LocalModel
from cpr_dir.predictor import BanditPolicyPredictor
from cpr_dir.handler import CprHandler

POLICY_SERVE_DIR_URI = f"{BASE_OUTPUT_URI}/policy-server"

print(f"POLICY_SERVE_DIR_URI   = {POLICY_SERVE_DIR_URI}")
print(f"REPOSITORY             = {REPOSITORY}")
print(f"IMAGE_NAME_02_PRED_CPR = {IMAGE_NAME_02_PRED_CPR}")
print(f"IMAGE_URI_02_PRED_CPR  = {IMAGE_URI_02_PRED_CPR}")
print(f"REMOTE_IMAGE_NAME_CPR  = {REMOTE_IMAGE_NAME_CPR}")

POLICY_SERVE_DIR_URI   = gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v2/run-20231115-094131/policy-server
REPOSITORY             = rl-movielens-rec-bandits-v2
IMAGE_NAME_02_PRED_CPR = cpr-perarm-bandit-02e
IMAGE_URI_02_PRED_CPR  = gcr.io/hybrid-vertex/cpr-perarm-bandit-02e
REMOTE_IMAGE_NAME_CPR  = us-central1-docker.pkg.dev/hybrid-vertex/rl-movielens-rec-bandits-v2/cpr-perarm-bandit-02e


In [69]:
! gsutil ls $POLICY_SERVE_DIR_URI

gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v2/run-20231115-094131/policy-server/artifacts/


In [70]:
local_model = LocalModel.build_cpr_model(
    src_dir= f"./{LOCAL_CPR_DIR}",
    # output_image_uri = IMAGE_URI_02_PRED_CPR,
    output_image_uri = REMOTE_IMAGE_NAME_CPR,
    predictor= BanditPolicyPredictor,
    handler= CprHandler,
    base_image = 'tiangolo/uvicorn-gunicorn-fastapi:python3.10', # fastapi referenced in Predictor
    # base_image = 'tensorflow/tensorflow:2.14.0',
    requirements_path=f"./{LOCAL_CPR_DIR}/requirements.txt",
    no_cache=True,
)

  self.stdin = io.open(p2cwrite, 'wb', bufsize)
  self.stdout = io.open(c2pread, 'rb', bufsize)


You can check out the serving container spec of the built image.

In [71]:
local_model.get_serving_container_spec()

image_uri: "us-central1-docker.pkg.dev/hybrid-vertex/rl-movielens-rec-bandits-v2/cpr-perarm-bandit-02e"
predict_route: "/predict"
health_route: "/health"

Once CPR model built, either (1) test it locally or (2) push image to registry and upload model to Vertex

### (Optional) deploy to local endpoint

> **Deploy `LocalModel` to `LocalEndpoint`**

This cuts the dev cycle iterations significantly!!

In [78]:
!pwd

/home/jupyter/tf_vertex_agents/src


In [82]:
local_endpoint = local_model.deploy_to_local_endpoint(
    artifact_uri=f"{POLICY_SERVE_DIR_URI}",
    credential_path=CREDENTIALS_FILE,
    container_ready_timeout=300,
    container_ready_check_interval=10
    
)

**Call `serve()` to start the conatiner for local traffic** 

In [83]:
local_endpoint.serve()

health_check_response = local_endpoint.run_health_check()

print(f"health_check     : {health_check_response.content}")
print(f"container_status : {local_endpoint.get_container_status()}")
print(f"container_port   : {local_endpoint.container_port}")
print(f"env_vars         : {local_endpoint.serving_container_environment_variables}")
print(f"ready_interval   : {local_endpoint.container_ready_check_interval}")

health_check     : b'{}'
container_status : running
container_port   : 8080
env_vars         : {}
ready_interval   : 10


In [84]:
# TODO: still don't understand how to use this
local_endpoint.print_container_logs()

#### Test locally deployed policy endpoint

In [86]:
predict_response = local_endpoint.predict(
    request_file=INPUT_FILE,
    headers={"Content-Type": "application/json"},
)
# print(predict_response, predict_response.content)
print(f"predict_response: {predict_response.content}")

predict_response: b'{"bandit_policy_type": 1, "chosen_arm_features": [0.022640574723482132, -0.026614343747496605, 0.009350311011075974, -0.009256027638912201, 0.01683845743536949, -0.02970973215997219, -0.004785060882568359, -0.027460742741823196, 0.039387334138154984, 0.03265250846743584, 0.02882300689816475, -0.020852291956543922, -0.004458390176296234, -0.04745906591415405, -0.01072145625948906, 0.04541505500674248, -0.010558567941188812, 0.03181641176342964, -0.03314167261123657, 0.026442501693964005, 0.01832817867398262, 0.012074984610080719, -0.017612121999263763, 0.03968402370810509, 0.04626071825623512, -0.015969157218933105, 0.042623136192560196, 0.0013975389301776886, -0.017309151589870453, 0.018650617450475693, -0.04263586923480034, 0.03596215322613716, 0.047955069690942764, 0.04047132655978203, 0.02982569858431816, -0.01531977578997612, 0.016927648335695267, 0.01658148691058159, 0.03304805979132652, -0.03345393016934395, -0.0012910142540931702, -0.02426820993423462, 0.0168

and to get prediction response as a usable object: `.json()`

In [92]:
preds = predict_response.json()

print(preds['chosen_arm_features'])

[0.022640574723482132, -0.026614343747496605, 0.009350311011075974, -0.009256027638912201, 0.01683845743536949, -0.02970973215997219, -0.004785060882568359, -0.027460742741823196, 0.039387334138154984, 0.03265250846743584, 0.02882300689816475, -0.020852291956543922, -0.004458390176296234, -0.04745906591415405, -0.01072145625948906, 0.04541505500674248, -0.010558567941188812, 0.03181641176342964, -0.03314167261123657, 0.026442501693964005, 0.01832817867398262, 0.012074984610080719, -0.017612121999263763, 0.03968402370810509, 0.04626071825623512, -0.015969157218933105, 0.042623136192560196, 0.0013975389301776886, -0.017309151589870453, 0.018650617450475693, -0.04263586923480034, 0.03596215322613716, 0.047955069690942764, 0.04047132655978203, 0.02982569858431816, -0.01531977578997612, 0.016927648335695267, 0.01658148691058159, 0.03304805979132652, -0.03345393016934395, -0.0012910142540931702, -0.02426820993423462, 0.016876552253961563, -0.025590229779481888, 0.024780478328466415, -0.02881

stop local endpoint container:

In [None]:
# local_endpoint.stop()

## Deploy to Vertex AI

**Push image to registry**

In [93]:
local_model.push_image()

  self.stdin = io.open(p2cwrite, 'wb', bufsize)
  self.stdout = io.open(c2pread, 'rb', bufsize)


**Upload to Vertex Model Registry**

In [94]:
VERSION = "v5"

In [95]:
# !gsutil ls $ARTIFACTS_DIR
!gsutil ls $POLICY_SERVE_DIR_URI

gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v2/run-20231115-094131/policy-server/artifacts/


In [96]:
uploaded_policy = vertex_ai.Model.upload(
    local_model=local_model,
    display_name=f'cpr-bandit-from-local-{VERSION}',
    artifact_uri=POLICY_SERVE_DIR_URI,
    sync=True,
)

print(f"display_name    : {model.display_name}")
print(f"uploaded_policy : {model}")

display_name    : cpr-bandit-from-local-v5
uploaded_policy : <google.cloud.aiplatform.models.Model object at 0x7f58886894b0> 
resource name: projects/934903580331/locations/us-central1/models/7813171408418111488


In [97]:
endpoint = vertex_ai.Endpoint.create(
    display_name=f'endpoint-cpr-bandit-{VERSION}',
    project=PROJECT_ID,
    location=LOCATION,
    sync=True,
)

print(f"display_name : {endpoint.display_name}")
print(f"endpoint     : {endpoint}")

display_name : endpoint-cpr-bandit-v5
endpoint     : <google.cloud.aiplatform.models.Endpoint object at 0x7f580564d7e0> 
resource name: projects/934903580331/locations/us-central1/endpoints/5168920710407520256


In [98]:
deployed_policy = uploaded_policy.deploy(
    endpoint=endpoint,
    deployed_model_display_name=f'deployed-cpr-bandit-{VERSION}',
    machine_type="n1-standard-4",
    min_replica_count=1,
    max_replica_count=1,
    accelerator_type=None,
    accelerator_count=0,
    sync=True,
    enable_access_logging=True,
)

print(f"display_name    : {deployed_model.display_name}\n")
print(f"deployed_policy : {deployed_model}")

display_name    : endpoint-cpr-bandit-v5

deployed_policy : <google.cloud.aiplatform.models.Endpoint object at 0x7f580564d7e0> 
resource name: projects/934903580331/locations/us-central1/endpoints/5168920710407520256


### Test deployed policy endpoint

*Note*: to have predictions display in response to the gcloud command, the handler should return a response dictionary like:

> `{"predictions": post_processed_preds}`

See [Send an online prediction request](https://cloud.google.com/vertex-ai/docs/predictions/get-online-predictions#predict-request) in docs for more details 

#### gcloud

In [99]:
ENDPOINT_ID = endpoint.resource_name

!gcloud ai endpoints predict $ENDPOINT_ID --region=$REGION --json-request=instances.json

Using endpoint [https://us-central1-prediction-aiplatform.googleapis.com/]
deployedModelId: '9012835351344971776'
model: projects/934903580331/locations/us-central1/models/7813171408418111488
modelDisplayName: cpr-bandit-from-local-v5
modelVersionId: '1'


In [100]:
# json_dumps_str = json.dumps(TEST_INSTANCE)

# json_instance
ENCODED_TEST_INSTANCE = json_instance.encode('utf-8')
ENCODED_TEST_INSTANCE

b'{"instances": [{"bucketized_user_age": 25.0, "movie_genres": [4], "movie_id": "211", "timestamp": 874948475, "user_id": "346", "user_occupation_text": "other", "user_rating": 4.0}]}'

#### Vertex SDK's raw predict

In [102]:
response = deployed_model.raw_predict(
    body = ENCODED_TEST_INSTANCE,
    headers = {'Content-Type':'application/json'}
).json()

# print(response['chosen_arm_features'])
print(response)

{'bandit_policy_type': 1, 'chosen_arm_features': [0.024603907018899918, 0.022303912788629532, -0.029591917991638184, 0.030105959624052048, 0.015452351421117783, 0.0425909049808979, -0.04958895593881607, 0.0021106116473674774, 0.005578480660915375, -0.047885406762361526, 0.03569388762116432, 0.007767736911773682, -0.028136778622865677, 0.0128319151699543, -0.009761642664670944, -0.02994852140545845, -0.01563861221075058, 0.04413098469376564, 0.00020591169595718384, 0.006635785102844238, -0.0350680947303772, -0.020459115505218506, 0.014313783496618271, 0.0007997974753379822, 0.03674118593335152, -0.007525373250246048, -0.006622813642024994, -0.006104696542024612, -0.040216851979494095, 0.03555159643292427, -0.0321384072303772, 0.02723154053092003, -0.017000533640384674, -0.013260770589113235, 0.009485840797424316, -0.019609510898590088, -0.011272478848695755, -0.037256695330142975, -0.016197729855775833, 0.03783922269940376, 0.03956976160407066, -0.012978147715330124, 0.00630347803235054

In [106]:
## TODO - jt

# response_v2 = deployed_model.predict(
#     instances=[[TEST_INSTANCE]], 
#     use_raw_predict=False
# ) #.json()

# print(response_v2)

# Vertex Batch Predictions

> TODO: jt

In [None]:
# TODO

# Clean up

Undeploy model and delete endpoint

In [107]:
# endpoint.delete(force=True)

Delete policy uploaded to Vertex AI Registry

In [108]:
# uploaded_policy.delete()

**Finished**