# Scaling bandit training with Vertex AI 

**prerequisites:**
* build training image in `04b-build-training-image` noteook

**Recommendation**

When profiling a train job, we don't need to do a full train. 

> We just need to get multiple iterations of going through the entire Agent graph (i.e., from data iterator --> agent.train a few times)

In [1]:
# protobuf==4.24.4
# protobuf==3.20.3

In [2]:
! python3 -c "import google.cloud.aiplatform; print('aiplatform SDK version: {}'.format(google.cloud.aiplatform.__version__))"

aiplatform SDK version: 1.33.1


## setup notebook environment

In [3]:
!pwd

/home/jupyter/tf_vertex_agents/02-perarm-features-bandit


### Load env config
* use the prefix from `00-env-setup` notebook

In [4]:
# PREFIX = 'mabv1'
VERSION        = "v2"                       # TODO
PREFIX         = f'rec-bandits-{VERSION}'   # TODO

print(f"PREFIX: {PREFIX}")

PREFIX: rec-bandits-v2


In [5]:
# staging GCS
GCP_PROJECTS             = !gcloud config get-value project
PROJECT_ID               = GCP_PROJECTS[0]

# GCS bucket and paths
BUCKET_NAME              = f'{PREFIX}-{PROJECT_ID}-bucket'
BUCKET_URI               = f'gs://{BUCKET_NAME}'

config = !gsutil cat {BUCKET_URI}/config/notebook_env.py
print(config.n)
exec(config.n)


PROJECT_ID               = "hybrid-vertex"
PROJECT_NUM              = "934903580331"
LOCATION                 = "us-central1"

REGION                   = "us-central1"
BQ_LOCATION              = "US"
VPC_NETWORK_NAME         = "ucaip-haystack-vpc-network"

VERTEX_SA                = "934903580331-compute@developer.gserviceaccount.com"

PREFIX                   = "rec-bandits-v2"
VERSION                  = "v2"

BUCKET_NAME              = "rec-bandits-v2-hybrid-vertex-bucket"
BUCKET_URI               = "gs://rec-bandits-v2-hybrid-vertex-bucket"
DATA_GCS_PREFIX          = "data"
DATA_PATH                = "gs://rec-bandits-v2-hybrid-vertex-bucket/data"
VOCAB_SUBDIR             = "vocabs"
VOCAB_FILENAME           = "vocab_dict.pkl"
DATA_PATH_KFP_DEMO       = "gs://rec-bandits-v2-hybrid-vertex-bucket/data/kfp_demo_data/u.data"

VPC_NETWORK_FULL         = "projects/934903580331/global/networks/ucaip-haystack-vpc-network"

BIGQUERY_DATASET_NAME    = "mvlens_rec_bandits_v2"
BIGQUERY_TABLE_NA

In [6]:
# ! gsutil iam ch serviceAccount:{VERTEX_SA}:roles/storage.objects.get $BUCKET_URI
# ! gsutil iam ch serviceAccount:{VERTEX_SA}:roles/storage.objects.get $BUCKET_URI

### imports

In [7]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [8]:
import json
from datetime import datetime
from time import time
import pandas as pd
import numpy as np

# disable INFO and DEBUG logging everywhere
import logging
import time
from pprint import pprint
import pickle as pkl

logging.disable(logging.WARNING)

from google.cloud import aiplatform as vertex_ai
from google.cloud import storage

In [9]:
import sys
sys.path.append("..")

from src.per_arm_rl import data_utils
from src.per_arm_rl import train_utils
from src.per_arm_rl import data_config

In [10]:
storage_client = storage.Client(project=PROJECT_ID)

vertex_ai.init(project=PROJECT_ID,location=REGION)

# Vertex Training Job

## job compute

Set the variable `TRAIN_COMPUTE` to configure the compute resources for the VMs you will use for for training.

**Machine Type:**
* `n1-standard`: 3.75GB of memory per vCPU.
* `n1-highmem`: 6.5GB of memory per vCPU
* `n1-highcpu`: 0.9 GB of memory per vCPU
* `vCPUs`: number of `[2, 4, 8, 16, 32, 64, 96 ]`

**Note:** The following is not supported for training:

* `standard`: 2 vCPUs
* `highcpu`: 2, 4 and 8 vCPUs

> Note: You may also use n2 and e2 machine types for training and deployment, but they do not support GPUs.

relevant docs: 
* [Configure compute resources for training](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types) for more details
* [Machine series comparison](https://cloud.google.com/compute/docs/machine-resource#machine_type_comparison)

In [11]:
ACCELERATOR = "t4" # str: "a100" | "t4" | None | l4
ACCELERATOR = str(ACCELERATOR)
print(f"ACCELERATOR: {ACCELERATOR}")

ACCELERATOR: t4


In [12]:
if ACCELERATOR == "a100":
    WORKER_MACHINE_TYPE = 'a2-highgpu-1g'
    REPLICA_COUNT = 1
    ACCELERATOR_TYPE = 'NVIDIA_TESLA_A100'
    PER_MACHINE_ACCELERATOR_COUNT = 1
    REDUCTION_SERVER_COUNT = 0                                                      
    REDUCTION_SERVER_MACHINE_TYPE = "n1-highcpu-16"
    DISTRIBUTE_STRATEGY = 'single'
elif ACCELERATOR == 't4':
    WORKER_MACHINE_TYPE = 'n1-highcpu-16'
    REPLICA_COUNT = 1
    ACCELERATOR_TYPE = 'NVIDIA_TESLA_T4'
    PER_MACHINE_ACCELERATOR_COUNT = 1
    DISTRIBUTE_STRATEGY = 'single'
    REDUCTION_SERVER_COUNT = 0                                                      
    REDUCTION_SERVER_MACHINE_TYPE = "n1-highcpu-16"
elif ACCELERATOR == 'l4':
    WORKER_MACHINE_TYPE = "g2-standard-16"
    REPLICA_COUNT = 1
    ACCELERATOR_TYPE = 'NVIDIA_L4'
    PER_MACHINE_ACCELERATOR_COUNT = 1
    DISTRIBUTE_STRATEGY = 'single'
    REDUCTION_SERVER_COUNT = 0                                                      
    REDUCTION_SERVER_MACHINE_TYPE = "n1-highcpu-16"
elif ACCELERATOR == 'tpu':
    WORKER_MACHINE_TYPE = "cloud-tpu"
    REPLICA_COUNT = 1
    ACCELERATOR_TYPE = 'TPU_v3'
    PER_MACHINE_ACCELERATOR_COUNT = 8 # 8 | +32+ for TPU Pods
    DISTRIBUTE_STRATEGY = 'single'
    REDUCTION_SERVER_COUNT = 0                                                      
    REDUCTION_SERVER_MACHINE_TYPE = None
elif ACCELERATOR == "False":
    WORKER_MACHINE_TYPE = 'n2-highmem-32' # 'n1-highmem-96'n | 'n2-highmem-92'
    REPLICA_COUNT = 1
    ACCELERATOR_TYPE = None
    PER_MACHINE_ACCELERATOR_COUNT = 0
    DISTRIBUTE_STRATEGY = 'single'
    REDUCTION_SERVER_COUNT = 0                                                      
    REDUCTION_SERVER_MACHINE_TYPE = "n1-highcpu-16"
    
TF_GPU_THREAD_COUNT   = '4'      # '1' | '4' | '8'

print(f"WORKER_MACHINE_TYPE            : {WORKER_MACHINE_TYPE}")
print(f"REPLICA_COUNT                  : {REPLICA_COUNT}")
print(f"ACCELERATOR_TYPE               : {ACCELERATOR_TYPE}")
print(f"PER_MACHINE_ACCELERATOR_COUNT  : {PER_MACHINE_ACCELERATOR_COUNT}")
print(f"DISTRIBUTE_STRATEGY            : {DISTRIBUTE_STRATEGY}")
print(f"REDUCTION_SERVER_COUNT         : {REDUCTION_SERVER_COUNT}")
print(f"REDUCTION_SERVER_MACHINE_TYPE  : {REDUCTION_SERVER_MACHINE_TYPE}")
print(f"TF_GPU_THREAD_COUNT            : {TF_GPU_THREAD_COUNT}")

WORKER_MACHINE_TYPE            : n1-highcpu-16
REPLICA_COUNT                  : 1
ACCELERATOR_TYPE               : NVIDIA_TESLA_T4
PER_MACHINE_ACCELERATOR_COUNT  : 1
DISTRIBUTE_STRATEGY            : single
REDUCTION_SERVER_COUNT         : 0
REDUCTION_SERVER_MACHINE_TYPE  : n1-highcpu-16
TF_GPU_THREAD_COUNT            : 4


## set Vertex AI Experiment

In [49]:
EXPERIMENT_NAME   = f'02-scale-compare-v4'

# new experiment
invoke_time       = time.strftime("%Y%m%d-%H%M%S")
RUN_NAME          = f'run-{invoke_time}'

BASE_OUTPUT_DIR   = f'{BUCKET_URI}/{EXPERIMENT_NAME}/{RUN_NAME}'
LOG_DIR           = f"{BASE_OUTPUT_DIR}/logs"
ROOT_DIR          = f"{BASE_OUTPUT_DIR}/root"       # Root directory for writing logs/summaries/checkpoints.
ARTIFACTS_DIR     = f"{BASE_OUTPUT_DIR}/artifacts"  # Where the trained model will be saved and restored.

vertex_ai.init(
    project=PROJECT_ID,
    location=REGION,
    experiment=EXPERIMENT_NAME
)

print(f"EXPERIMENT_NAME   : {EXPERIMENT_NAME}")
print(f"RUN_NAME          : {RUN_NAME}\n")
print(f"BASE_OUTPUT_DIR   : {BASE_OUTPUT_DIR}")
print(f"LOG_DIR           : {LOG_DIR}")
print(f"ROOT_DIR          : {ROOT_DIR}")
print(f"ARTIFACTS_DIR     : {ARTIFACTS_DIR}")

EXPERIMENT_NAME   : 02-scale-compare-v4
RUN_NAME          : run-20231115-125019

BASE_OUTPUT_DIR   : gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v4/run-20231115-125019
LOG_DIR           : gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v4/run-20231115-125019/logs
ROOT_DIR          : gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v4/run-20231115-125019/root
ARTIFACTS_DIR     : gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v4/run-20231115-125019/artifacts


## Create Tensorboard

In [50]:
# # create new TB instance
TENSORBOARD_DISPLAY_NAME=f"{EXPERIMENT_NAME}-{RUN_NAME}"

tensorboard = vertex_ai.Tensorboard.create(
    display_name=TENSORBOARD_DISPLAY_NAME
    , project=PROJECT_ID
    , location=REGION
)

TB_RESOURCE_NAME = tensorboard.resource_name

# use existing TB instance
# TB_RESOURCE_NAME = 'projects/934903580331/locations/us-central1/tensorboards/6924469145035603968'

print(f"TB_RESOURCE_NAME: {TB_RESOURCE_NAME}")
print(f"TB display name: {tensorboard.display_name}")

TB_RESOURCE_NAME: projects/934903580331/locations/us-central1/tensorboards/1713342581284274176
TB display name: 02-scale-compare-v4-run-20231115-125019


## Set training args

In [51]:
# print(f"REMOTE_IMAGE_NAME : {REMOTE_IMAGE_NAME}")  # docker (local build)
print(f"IMAGE_URI_02 : {IMAGE_URI_02}")                    # cloud build image uri

IMAGE_URI_02 : gcr.io/hybrid-vertex/train-perarm-feats-v2


In [52]:
# vocab
# VOCAB_SUBDIR         = "vocabs"
# VOCAB_FILENAME       = "vocab_dict.pkl"

# Set hyperparameters.
BATCH_SIZE           = 128          # Training and prediction batch size.
TRAINING_LOOPS       = 100          # Number of training iterations.
STEPS_PER_LOOP       = 1            # Number of driver steps per training iteration.
ASYNC_STEPS_PER_LOOP = 1
LOG_INTERVAL         = 10
LR                   = 0.05

CHKPT_INTERVAL       = 1000
EVAL_BATCH_SIZE      = 1  
NUM_EVAL_STEPS       = 2000 #10000

# Set MovieLens simulation environment parameters.
RANK_K               = 10      # Rank for matrix factorization in the MovieLens environment; also the observation dimension.
NUM_ACTIONS          = 2       # Number of actions (movie items) to choose from.
PER_ARM              = True    # Use the non-per-arm version of the MovieLens environment.

# ================================
# Agent
# ================================
AGENT_TYPE          = 'LinUCB' # 'LinUCB' | 'LinTS |, 'epsGreedy' | 'NeuralLinUCB'
NETWORK_TYPE        = "commontower" # 'commontower' | 'dotproduct'

TIKHONOV_WEIGHT     = 0.001   # LinUCB Tikhonov regularization weight.
AGENT_ALPHA         = 0.1     # LinUCB exploration parameter that multiplies the confidence intervals.
EPSILON             = 0.01
ENCODING_DIM        = 1
EPS_PHASE_STEPS     = 1000

# ================================
# network params
# ================================
GLOBAL_LAYERS       = [64, 32, 16]
ARM_LAYERS          = [64, 32, 16]
COMMON_LAYERS       = [16, 8]

# GLOBAL_LAYERS       = [128, 64, 32]
# ARM_LAYERS          = [128, 64, 32]
# COMMON_LAYERS       = [32, 16, 8]

if AGENT_TYPE == 'NeuralLinUCB':
    NETWORK_TYPE = 'commontower'
    ENCODING_DIM = COMMON_LAYERS[-1]

# ================================
# data config
# ================================
GLOBAL_DIM             = 64       # 16
PER_ARM_DIM            = 64       # 16
NUM_OOV_BUCKETS        = 1
GLOBAL_EMBEDDING_SIZE  = 16
MV_EMBEDDING_SIZE      = 32       # 32
SPLIT                  = "train"  # TODO - remove
RESUME_TRAINING        = None

print(f"VOCAB_SUBDIR           : {VOCAB_SUBDIR}")
print(f"VOCAB_FILENAME         : {VOCAB_FILENAME}")
print(f"BATCH_SIZE             : {BATCH_SIZE}")
print(f"TRAINING_LOOPS         : {TRAINING_LOOPS}")
print(f"STEPS_PER_LOOP         : {STEPS_PER_LOOP}")
print(f"ASYNC_STEPS_PER_LOOP   : {ASYNC_STEPS_PER_LOOP}")
print(f"LOG_INTERVAL           : {LOG_INTERVAL}")
print(f"RANK_K                 : {RANK_K}")
print(f"NUM_ACTIONS            : {NUM_ACTIONS}")
print(f"PER_ARM                : {PER_ARM}")
print(f"AGENT_TYPE             : {AGENT_TYPE}")
print(f"NETWORK_TYPE           : {NETWORK_TYPE}")
print(f"TIKHONOV_WEIGHT        : {TIKHONOV_WEIGHT}")
print(f"AGENT_ALPHA            : {AGENT_ALPHA}")
print(f"GLOBAL_DIM             : {GLOBAL_DIM}")
print(f"PER_ARM_DIM            : {PER_ARM_DIM}")
print(f"SPLIT                  : {SPLIT}")
print(f"RESUME_TRAINING        : {RESUME_TRAINING}")
print(f"NUM_OOV_BUCKETS        : {NUM_OOV_BUCKETS}")
print(f"GLOBAL_EMBEDDING_SIZE  : {GLOBAL_EMBEDDING_SIZE}")
print(f"MV_EMBEDDING_SIZE      : {MV_EMBEDDING_SIZE}")
print(f"AGENT_ALPHA            : {AGENT_ALPHA}")
print(f"GLOBAL_LAYERS          : {GLOBAL_LAYERS}")
print(f"ARM_LAYERS             : {ARM_LAYERS}")
print(f"COMMON_LAYERS          : {COMMON_LAYERS}")
print(f"LR                     : {LR}")
print(f"CHKPT_INTERVAL         : {CHKPT_INTERVAL}")
print(f"EVAL_BATCH_SIZE        : {EVAL_BATCH_SIZE}")
print(f"NUM_EVAL_STEPS         : {NUM_EVAL_STEPS}")
print(f"EPSILON                : {EPSILON}")
print(f"ENCODING_DIM           : {ENCODING_DIM}")
print(f"EPS_PHASE_STEPS        : {EPS_PHASE_STEPS}")

VOCAB_SUBDIR           : vocabs
VOCAB_FILENAME         : vocab_dict.pkl
BATCH_SIZE             : 128
TRAINING_LOOPS         : 100
STEPS_PER_LOOP         : 1
ASYNC_STEPS_PER_LOOP   : 1
LOG_INTERVAL           : 10
RANK_K                 : 10
NUM_ACTIONS            : 2
PER_ARM                : True
AGENT_TYPE             : LinUCB
NETWORK_TYPE           : commontower
TIKHONOV_WEIGHT        : 0.001
AGENT_ALPHA            : 0.1
GLOBAL_DIM             : 64
PER_ARM_DIM            : 64
SPLIT                  : train
RESUME_TRAINING        : None
NUM_OOV_BUCKETS        : 1
GLOBAL_EMBEDDING_SIZE  : 16
MV_EMBEDDING_SIZE      : 32
AGENT_ALPHA            : 0.1
GLOBAL_LAYERS          : [64, 32, 16]
ARM_LAYERS             : [64, 32, 16]
COMMON_LAYERS          : [16, 8]
LR                     : 0.05
CHKPT_INTERVAL         : 1000
EVAL_BATCH_SIZE        : 1
NUM_EVAL_STEPS         : 2000
EPSILON                : 0.01
ENCODING_DIM           : 1
EPS_PHASE_STEPS        : 1000


In [53]:
WORKER_ARGS = [
    f"--project={PROJECT_ID}"
    , f"--project_number={PROJECT_NUM}"
    , f"--bucket_name={BUCKET_NAME}"
    , f"--artifacts_dir={ARTIFACTS_DIR}"
    , f"--root_dir={ROOT_DIR}"
    , f"--log_dir={LOG_DIR}"
    , f"--data_dir_prefix_path={DATA_GCS_PREFIX}"
    , f"--vocab_prefix_path={VOCAB_SUBDIR}"
    , f"--vocab_filename={VOCAB_FILENAME}"
    ### job config
    , f"--distribute={DISTRIBUTE_STRATEGY}"
    , f"--experiment_name={EXPERIMENT_NAME}"
    , f"--experiment_run={RUN_NAME}"
    , f"--agent_type={AGENT_TYPE}"
    , f"--network_type={NETWORK_TYPE}"
    ### hparams
    , f"--batch_size={BATCH_SIZE}"
    , f"--eval_batch_size={EVAL_BATCH_SIZE}"
    , f"--training_loops={TRAINING_LOOPS}"
    , f"--steps_per_loop={STEPS_PER_LOOP}"
    , f"--num_eval_steps={NUM_EVAL_STEPS}"
    , f"--rank_k={RANK_K}"
    , f"--num_actions={NUM_ACTIONS}"
    , f"--async_steps_per_loop={ASYNC_STEPS_PER_LOOP}"
    # , f"--resume_training_loops"
    , f"--global_dim={GLOBAL_DIM}"
    , f"--per_arm_dim={PER_ARM_DIM}"
    , f"--split={SPLIT}"
    , f"--log_interval={LOG_INTERVAL}"
    , f"--chkpt_interval={CHKPT_INTERVAL}"
    , f"--num_oov_buckets={NUM_OOV_BUCKETS}"
    , f"--global_emb_size={GLOBAL_EMBEDDING_SIZE}"
    , f"--mv_emb_size={MV_EMBEDDING_SIZE}"
    , f"--agent_alpha={AGENT_ALPHA}"
    , f"--global_layers={GLOBAL_LAYERS}"
    , f"--arm_layers={ARM_LAYERS}"
    , f"--common_layers={COMMON_LAYERS}"
    , f"--learning_rate={LR}"
    , f"--epsilon={EPSILON}"
    , f"--encoding_dim={ENCODING_DIM}"
    , f"--eps_phase_steps={EPS_PHASE_STEPS}"
    , f"--tf_gpu_thread_count={TF_GPU_THREAD_COUNT}"
    ### accelerators & profiling
    , f"--use_gpu"
    # , f"--use_tpu"
    # , f"--profiler"
    # , f"--sum_grads_vars"
    , f"--debug_summaries"
    # , f"--cache_train"
]

import sys
sys.path.append("..")
from src.per_arm_rl import train_utils

WORKER_POOL_SPECS = train_utils.prepare_worker_pool_specs(
    # image_uri=f"{REMOTE_IMAGE_NAME}:latest",
    image_uri=f"{IMAGE_URI_02}:latest",
    args=WORKER_ARGS,
    replica_count=REPLICA_COUNT,
    machine_type=WORKER_MACHINE_TYPE,
    accelerator_count=PER_MACHINE_ACCELERATOR_COUNT,
    accelerator_type=ACCELERATOR_TYPE,
    reduction_server_count=REDUCTION_SERVER_COUNT,
    reduction_server_machine_type=REDUCTION_SERVER_MACHINE_TYPE,
)

from pprint import pprint
pprint(WORKER_POOL_SPECS)

[{'container_spec': {'args': ['--project=hybrid-vertex',
                              '--project_number=934903580331',
                              '--bucket_name=rec-bandits-v2-hybrid-vertex-bucket',
                              '--artifacts_dir=gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v4/run-20231115-125019/artifacts',
                              '--root_dir=gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v4/run-20231115-125019/root',
                              '--log_dir=gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v4/run-20231115-125019/logs',
                              '--data_dir_prefix_path=data',
                              '--vocab_prefix_path=vocabs',
                              '--vocab_filename=vocab_dict.pkl',
                              '--distribute=single',
                              '--experiment_name=02-scale-compare-v4',
                              '--experiment_run=run-20231115-125019',
                  

In [54]:
# !pwd

# Submit trainging job

In [55]:
vertex_ai.init(
    project=PROJECT_ID
    , location=REGION
    , experiment=EXPERIMENT_NAME
    # , staging_bucket=ROOT_DIR
)

JOB_NAME = f"paf-bandit-{RUN_NAME}"
print(f"JOB_NAME: {JOB_NAME}")

JOB_NAME: paf-bandit-run-20231115-125019


In [56]:
# Create a CustomJob
my_custom_job = vertex_ai.CustomJob(
    display_name=JOB_NAME
    , project=PROJECT_ID
    , worker_pool_specs=WORKER_POOL_SPECS
    , base_output_dir=BASE_OUTPUT_DIR
    , staging_bucket=ROOT_DIR
    # , location="asia-southeast1" 
)

In [57]:
my_custom_job.run(
    tensorboard=TB_RESOURCE_NAME,
    service_account=VERTEX_SA,
    restart_job_on_worker_restart=False,
    enable_web_access=True,
    sync=False,
)

In [24]:
print(f"Job Name: {my_custom_job.display_name}")
print(f"Job Resource Name: {my_custom_job.resource_name}\n")

Job Name: paf-bandit-run-20231115-104146
Job Resource Name: projects/934903580331/locations/us-central1/customJobs/4921246713999523840



### Get link to Vertex AI Experiment console

In [26]:
experiment_df = vertex_ai.get_experiment_df()
experiment_df = experiment_df[experiment_df.experiment_name == EXPERIMENT_NAME]
experiment_df.T

In [None]:
print("Open the following link", experiment_df["metric.lineage"][0])

### GPU profiling

> once training job begins, enter these commands in the Vertex interactive terminal:

```bash
sudo apt update
sudo apt -y install nvtop
```

## TensorBoard

### in-notebook TensorBoard

> if `--profiler`, find `PROFILE` in the drop down:

<img src="imgs/getting_profiler.png" 
     align="center" 
     width="850"
     height="850"/>

In [36]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import tensorflow as tf

TB_LOGS_PATH = LOG_DIR
# TB_LOGS_PATH = 'gs://rec-bandits-v2-hybrid-vertex-bucket/v2-scale-t4-v1/run-20231010-082727/logs'
print(f"TB_LOGS_PATH: {TB_LOGS_PATH}")

TB_LOGS_PATH: gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v2/run-20231115-094131/logs


In [37]:
# %load_ext tensorboard
%reload_ext tensorboard

In [38]:
%tensorboard --logdir=$TB_LOGS_PATH

# Making predictions

* When a policy is trained, given a new observation request (i.e. a user vector),
* the policy will inference (produce) actions, which are the recommended movies.
* In TF-Agents, observations are abstracted in a named tuple,

```
TimeStep(‘step_type’, ‘discount’, ‘reward’, ‘observation’)
```

> the policy maps time steps to actions

In [61]:
import tensorflow as tf
from src.perarm_features import emb_features as emb_features
from src.perarm_features import reward_factory as reward_factory

## Load eval dataset

In [62]:
SPLIT = "val"

val_files = []
for blob in storage_client.list_blobs(f"{BUCKET_NAME}", prefix=f'{DATA_GCS_PREFIX}/{SPLIT}'):
    if '.tfrecord' in blob.name:
        val_files.append(blob.public_url.replace("https://storage.googleapis.com/", "gs://"))
        
val_dataset = tf.data.TFRecordDataset(val_files)
val_dataset = val_dataset.map(data_utils.parse_tfrecord, num_parallel_calls=tf.data.AUTOTUNE)

# eval dataset
eval_ds = val_dataset.batch(1)

# if NUM_EVAL_STEPS > 0:
#     eval_ds = eval_ds.take(NUM_EVAL_STEPS)

eval_ds

<_BatchDataset element_spec={'bucketized_user_age': TensorSpec(shape=(None,), dtype=tf.float32, name=None), 'movie_genres': TensorSpec(shape=(None, 1), dtype=tf.int64, name=None), 'movie_id': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'timestamp': TensorSpec(shape=(None,), dtype=tf.int64, name=None), 'user_id': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'user_occupation_text': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'user_rating': TensorSpec(shape=(None,), dtype=tf.float32, name=None)}>

### Load vocabulary

In [63]:
EXISTING_VOCAB_FILE = f'gs://{BUCKET_NAME}/{VOCAB_SUBDIR}/{VOCAB_FILENAME}'
print(f"Downloading vocab...")

os.system(f'gsutil -q cp {EXISTING_VOCAB_FILE} .')
print(f"Downloaded vocab from: {EXISTING_VOCAB_FILE}\n")

filehandler = open(VOCAB_FILENAME, 'rb')
vocab_dict = pkl.load(filehandler)
filehandler.close()

for key in vocab_dict.keys():
    pprint(key)

Downloading vocab...
Downloaded vocab from: gs://rec-bandits-v2-hybrid-vertex-bucket/vocabs/vocab_dict.pkl

'movie_id'
'user_id'
'user_occupation_text'
'movie_genres'
'bucketized_user_age'
'max_timestamp'
'min_timestamp'
'timestamp_buckets'


## load trained policy

In [64]:
# MODEL_DIR = "gs://mabv1-hybrid-vertex-bucket/scale-perarm-hpt/run-20230717-211248/model"

!gsutil ls $ARTIFACTS_DIR

gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v4/run-20231115-125019/artifacts/
gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v4/run-20231115-125019/artifacts/fingerprint.pb
gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v4/run-20231115-125019/artifacts/policy_specs.pbtxt
gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v4/run-20231115-125019/artifacts/saved_model.pb
gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v4/run-20231115-125019/artifacts/assets/
gs://rec-bandits-v2-hybrid-vertex-bucket/02-scale-compare-v4/run-20231115-125019/artifacts/variables/


In [65]:
from tf_agents.policies import py_tf_eager_policy

trained_policy = py_tf_eager_policy.SavedModelPyTFEagerPolicy(
    ARTIFACTS_DIR, load_specs_from_pbtxt=True
)

trained_policy

<tf_agents.policies.py_tf_eager_policy.SavedModelPyTFEagerPolicy at 0x7fa4085c7430>

## call embedding models

In [66]:
GLOBAL_EMBEDDING_SIZE

16

In [67]:
embs = emb_features.EmbeddingModel(
    vocab_dict = vocab_dict,
    num_oov_buckets = NUM_OOV_BUCKETS,
    global_emb_size = GLOBAL_EMBEDDING_SIZE,
    mv_emb_size = MV_EMBEDDING_SIZE,
)

embs

<src.perarm_features.emb_features.EmbeddingModel at 0x7fa4085c7ac0>

## Run inference with trained policy

In [68]:
INFER_SIZE = 1
dummy_arm = tf.zeros([INFER_SIZE, PER_ARM_DIM], dtype=tf.float32)

for x in eval_ds.take(INFER_SIZE):
    # get feature tensors
    global_feat_infer = embs._get_global_context_features(x)
    arm_feat_infer = embs._get_per_arm_features(x)
    
    # rewards = _get_rewards(x)
    rewards = reward_factory._get_rewards(x)
    
    # reshape arm features
    arm_feat_infer = tf.reshape(arm_feat_infer, [EVAL_BATCH_SIZE, PER_ARM_DIM]) # perarm_dim
    concat_arm = tf.concat([arm_feat_infer, dummy_arm], axis=0)
    
    # flatten global
    flat_global_infer = tf.reshape(global_feat_infer, [GLOBAL_DIM])
    feature = {'global': flat_global_infer, 'per_arm': concat_arm}
    
    # get actual reward
    actual_reward = rewards.numpy()[0]
    
    # build trajectory step
    trajectory_step = train_utils._get_eval_step(feature, actual_reward)
    
    prediction = trained_policy.action(trajectory_step)

In [69]:
prediction

PolicyStep(action=array(0, dtype=int32), state=(), info=PerArmPolicyInfo(log_probability=(), predicted_rewards_mean=(), multiobjective_scalarized_predicted_rewards_mean=(), predicted_rewards_optimistic=(), predicted_rewards_sampled=(), bandit_policy_type=(), chosen_arm_features=array([ 9.3020797e-03, -3.2347158e-02,  4.1276325e-02,  2.6380036e-02,
       -2.6593601e-02, -3.4350384e-02, -3.0565202e-02, -3.2029547e-02,
        1.5862275e-02, -6.1246380e-03,  2.3106385e-02, -2.2312880e-02,
       -6.0982481e-03,  1.2759421e-02,  1.3587091e-02,  2.4498928e-02,
       -3.2554224e-02,  7.3992126e-03,  4.8149824e-03,  4.6623219e-02,
        3.5907898e-02, -3.9207697e-02, -3.0699229e-02, -3.0026352e-02,
        3.5303507e-02, -4.8878707e-02,  2.5514964e-02, -9.9024996e-03,
        4.6273097e-03,  1.7675757e-03,  1.8049750e-02,  2.1221664e-02,
        1.6136181e-02,  2.1091986e-02, -4.5583498e-02, -4.4766020e-02,
       -4.7618970e-03, -2.3394451e-03, -2.1933293e-02, -3.5277437e-02,
       -3.6

# Deploy policy to endpoint

In [70]:
# x

In [71]:
# TODO incorporate to 00-env-setup
DOCKERNAME_02_PRED = 'Dockerfile_predict_mab_02e'
IMAGE_NAME_02_PRED = "pred-perarm-feats-02e"
IMAGE_URI_02_PRED  = f"gcr.io/hybrid-vertex/{IMAGE_NAME_02_PRED}"
REMOTE_IMAGE_NAME  = f"{REGION}-docker.pkg.dev/{PROJECT_ID}/{REPOSITORY}/{IMAGE_NAME_02_PRED}"

print(f"DOCKERNAME_02_PRED = {DOCKERNAME_02_PRED}")
print(f"REPOSITORY         = {REPOSITORY}")
print(f"IMAGE_NAME_02_PRED = {IMAGE_NAME_02_PRED}")
print(f"IMAGE_URI_02_PRED  = {IMAGE_URI_02_PRED}")
print(f"REMOTE_IMAGE_NAME  = {REMOTE_IMAGE_NAME}")

DOCKERNAME_02_PRED = Dockerfile_predict_mab_02e
REPOSITORY         = rl-movielens-rec-bandits-v2
IMAGE_NAME_02_PRED = pred-perarm-feats-02e
IMAGE_URI_02_PRED  = gcr.io/hybrid-vertex/pred-perarm-feats-02e
REMOTE_IMAGE_NAME  = us-central1-docker.pkg.dev/hybrid-vertex/rl-movielens-rec-bandits-v2/pred-perarm-feats-02e


In [72]:
uploaded_policy = vertex_ai.Model.upload(
    display_name=f'trained-{AGENT_TYPE}-v2',
    artifact_uri=ARTIFACTS_DIR,
    serving_container_image_uri=f"{IMAGE_URI_02_PRED}:latest",
    # serving_container_image_uri=f"{REMOTE_IMAGE_NAME}:latest",
    serving_container_predict_route="/predict",
    serving_container_health_route="/health",
    sync=True,
)

print(f"display_name    : {uploaded_policy.display_name}")
print(f"uploaded_policy : {uploaded_policy}")

display_name    : trained-LinUCB-v2
uploaded_policy : <google.cloud.aiplatform.models.Model object at 0x7fa3fbc06bc0> 
resource name: projects/934903580331/locations/us-central1/models/2107392155516403712


In [73]:
# endpoint = vertex_ai.Endpoint.create(
#     display_name=f'endpoint-{AGENT_TYPE}-v1',
#     project=PROJECT_ID,
#     location=LOCATION,
#     sync=True,
# )

endpoint = vertex_ai.Endpoint('211442683091091456')

print(f"display_name : {endpoint.display_name}")
print(f"endpoint     : {endpoint}")

display_name : endpoint-epsGreedy-v1
endpoint     : <google.cloud.aiplatform.models.Endpoint object at 0x7fa3fbc46f80> 
resource name: projects/934903580331/locations/us-central1/endpoints/211442683091091456


In [74]:
deployed_policy = uploaded_policy.deploy(
    endpoint=endpoint,
    deployed_model_display_name=f'deployed-{AGENT_TYPE}-v1',
    machine_type="n1-standard-4",
    min_replica_count=1,
    max_replica_count=1,
    accelerator_type=None,
    accelerator_count=0,
    sync=True,
)

print(f"display_name    : {deployed_policy.display_name}\n")
print(f"deployed_policy : {deployed_policy}")

deployed_policy

display_name    : endpoint-epsGreedy-v1

deployed_policy : <google.cloud.aiplatform.models.Endpoint object at 0x7fa3fbc46f80> 
resource name: projects/934903580331/locations/us-central1/endpoints/211442683091091456


<google.cloud.aiplatform.models.Endpoint object at 0x7fa3fbc46f80> 
resource name: projects/934903580331/locations/us-central1/endpoints/211442683091091456

In [None]:
for x in eval_ds.take(INFER_SIZE):

    deployed_policy.predict(
        instances=[x],
    )

# Clean up

In [None]:
# endpoint.delete(force=True)
# uploaded_policy.delete()

**Finished**