In [1]:
! python3 -c "import google.cloud.aiplatform; print('aiplatform SDK version: {}'.format(google.cloud.aiplatform.__version__))"

aiplatform SDK version: 1.26.0


In [2]:
PREFIX = 'mabv1'

In [3]:
# staging GCS
GCP_PROJECTS             = !gcloud config get-value project
PROJECT_ID               = GCP_PROJECTS[0]

# GCS bucket and paths
BUCKET_NAME              = f'{PREFIX}-{PROJECT_ID}-bucket'
BUCKET_URI               = f'gs://{BUCKET_NAME}'

config = !gsutil cat {BUCKET_URI}/config/notebook_env.py
print(config.n)
exec(config.n)


PROJECT_ID               = "hybrid-vertex"
PROJECT_NUM              = "934903580331"
LOCATION                 = "us-central1"

REGION                   = "us-central1"
BQ_LOCATION              = "US"
VPC_NETWORK_NAME         = "ucaip-haystack-vpc-network"

VERTEX_SA                = "934903580331-compute@developer.gserviceaccount.com"

PREFIX                   = "mabv1"
VERSION                  = "v1"

BUCKET_NAME              = "mabv1-hybrid-vertex-bucket"
DATA_GCS_PREFIX          = "data"
DATA_PATH                = "gs://mabv1-hybrid-vertex-bucket/data"
BUCKET_URI               = "gs://mabv1-hybrid-vertex-bucket"

VPC_NETWORK_FULL         = "projects/934903580331/global/networks/ucaip-haystack-vpc-network"

BIGQUERY_DATASET_ID      = "hybrid-vertex.movielens_dataset_mabv1"
BIGQUERY_TABLE_ID        = "hybrid-vertex.movielens_dataset_mabv1.training_dataset"

REPO_DOCKER_PATH_PREFIX  = "src"
RL_SUB_DIR               = "per_arm_rl"



In [4]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [5]:
import functools
import json
from collections import defaultdict
from typing import Callable, Dict, List, Optional, TypeVar
from datetime import datetime
import time
from pprint import pprint
import pickle as pkl

import attr

import logging
logging.disable(logging.WARNING)

import matplotlib.pyplot as plt
import numpy as np

# google cloud
from google.cloud import aiplatform, storage

# tensorflow
import tensorflow as tf
from tf_agents.agents import TFAgent
from tf_agents.bandits.agents import lin_ucb_agent
from tf_agents.bandits.agents.examples.v2 import trainer
from tf_agents.bandits.environments import (environment_utilities,
                                            movielens_py_environment,
                                            movielens_per_arm_py_environment)
from tf_agents.bandits.metrics import tf_metrics as tf_bandit_metrics
from tf_agents.drivers import dynamic_step_driver
from tf_agents.environments import TFEnvironment, tf_py_environment
from tf_agents.eval import metric_utils
from tf_agents.metrics import tf_metrics
from tf_agents.metrics.tf_metric import TFStepMetric
from tf_agents.policies import policy_saver


# from tf_agents.trajectories import time_step as ts
# from tf_agents.specs import tensor_spec

# GPU
from numba import cuda 
import gc

# my project
from src.per_arm_rl import data_utils
from src.per_arm_rl import data_config

# tf exceptions and vars
if tf.__version__[0] != "2":
    raise Exception("The trainer only runs with TensorFlow version 2.")

T = TypeVar("T")

In [6]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [8]:
device = cuda.get_current_device()
device.reset()
gc.collect()

14

In [9]:
# cloud storage client
storage_client = storage.Client(project=PROJECT_ID)

# Vertex client
aiplatform.init(project=PROJECT_ID, location=LOCATION)

# get train data

In [10]:
options = tf.data.Options()
options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.AUTO

In [11]:
train_files = []
for blob in storage_client.list_blobs(f"{BUCKET_NAME}", prefix=f'{DATA_GCS_PREFIX}/val'):
# for blob in storage_client.list_blobs(f"{BUCKET_NAME}", prefix=f'{DATA_GCS_PREFIX}/train'):
    if '.tfrecord' in blob.name:
        train_files.append(blob.public_url.replace("https://storage.googleapis.com/", "gs://"))
        
train_files

['gs://mabv1-hybrid-vertex-bucket/data/val/ml-ratings-100k-val.tfrecord']

In [12]:
train_dataset = tf.data.TFRecordDataset(train_files)
train_dataset = train_dataset.map(data_utils.parse_tfrecord)

for x in train_dataset.batch(1).take(1):
    pprint(x)

{'bucketized_user_age': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([25.], dtype=float32)>,
 'movie_genres': <tf.Tensor: shape=(1, 1), dtype=int64, numpy=array([[4]])>,
 'movie_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'211'], dtype=object)>,
 'timestamp': <tf.Tensor: shape=(1,), dtype=int64, numpy=array([874948475])>,
 'user_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'346'], dtype=object)>,
 'user_occupation_text': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'other'], dtype=object)>,
 'user_rating': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([4.], dtype=float32)>}


In [81]:
# USER_RATING_LOOKUP = data_utils.get_dictionary_lookup_by_tf_data_key(
#     key = 'user_rating'
#     , dataset= train_dataset
# )

# USER_RATING_DIM = len(USER_RATING_LOOKUP)
# print(f"USER_RATING_DIM: {USER_RATING_DIM}")

# USER_RATING_LOOKUP

USER_RATING_DIM: 5


{1.0: 0, 2.0: 1, 3.0: 2, 4.0: 3, 5.0: 4}

# get vocab

In [13]:
GENERATE_VOCABS = False
print(f"GENERATE_VOCABS: {GENERATE_VOCABS}")

VOCAB_SUBDIR   = "vocabs"
VOCAB_FILENAME = "vocab_dict.pkl"

GENERATE_VOCABS: False


In [14]:
if not GENERATE_VOCABS:

    EXISTING_VOCAB_FILE = f'gs://{BUCKET_NAME}/{VOCAB_SUBDIR}/{VOCAB_FILENAME}'
    print(f"Downloading vocab...")
    
    os.system(f'gsutil -q cp {EXISTING_VOCAB_FILE} .')
    print(f"Downloaded vocab from: {EXISTING_VOCAB_FILE}\n")

    filehandler = open(VOCAB_FILENAME, 'rb')
    vocab_dict = pkl.load(filehandler)
    filehandler.close()
    
    for key in vocab_dict.keys():
        pprint(key)

Downloading vocab...
Downloaded vocab from: gs://mabv1-hybrid-vertex-bucket/vocabs/vocab_dict.pkl

'movie_id'
'user_id'
'user_occupation_text'
'movie_genres'
'bucketized_user_age'
'max_timestamp'
'min_timestamp'
'timestamp_buckets'


# get preprocess layers

In [15]:
# hparams_dict = {
#     "XXXXX":XXXX
# }

# feature_emb_dict = {
#     "feature_key":"movie_id"
# }

NUM_OOV_BUCKETS = 1

In [16]:
# def _get_arm_feature_preprocess_layer(hparams, tf_transform_output):
#     """Returns arm feature preprocessing layer."""
#     inputs = []
#     features = []
#     embedding_configs = [
#         _EmbeddingConfig('doc_id', tf.int64, hparams.docid_embedding_dim),
#         _EmbeddingConfig('app_language_tag', tf.string, 
#                          hparams.app_language_embedding_dim),
#         _EmbeddingConfig('app_region_tag', tf.string,
#                          hparams.app_region_embedding_dim)
#     ]
#     for embedding_config in embedding_configs:
#         input_feature = tf.keras.Input(
#             name=embedding_config.feature_key,
#             shape=(1,),
#             dtype=embedding_config.dtype)
#         vocab = tf_transform_output.vocabulary_file_by_name(
#             embedding_config.feature_key)
#         max_tokens = tf_transform_output.vocabulary_size_by_name(
#             embedding_config.feature_key) + NUM_OOV_BUCKETS
#         if embedding_config.dtype == tf.int64:
#             lookup = tf.keras.layers.experimental.preprocessing.IntegerLookup(
#                 vocabulary=vocab, num_oov_indices=NUM_OOV_BUCKETS, oov_value=0)
#         else:
#             lookup = tf.keras.layers.experimental.preprocessing.StringLookup(
#                 max_tokens=max_tokens,
#                 num_oov_indices=NUM_OOV_BUCKETS,
#                 mask_token=None,
#                 vocabulary=vocab)
#         ind_tensor = lookup(input_feature)
#         embedding_input = tf.keras.layers.Embedding(
#             input_dim=max_tokens, output_dim=embedding_config.embedding_dim)(ind_tensor)
#         embedding_input = tf.reduce_sum(embedding_input, axis=-2)
#         inputs.append(input_feature)
#         features.append(embedding_input)
#     outputs = tf.keras.layers.concatenate(features, axis=-1)
#     return tf.keras.Model(inputs=inputs, outputs=outputs)

## user features

In [49]:
global_inputs = []
global_features = []

USER_EMBEDDING_SIZE = 4 # 16

In [50]:
user_id_input_layer = tf.keras.Input(
    name="user_id",
    shape=(1,),
    dtype=tf.string
)

user_id_lookup = tf.keras.layers.StringLookup(
    max_tokens=len(vocab_dict['user_id']) + NUM_OOV_BUCKETS,
    num_oov_indices=NUM_OOV_BUCKETS,
    mask_token=None,
    vocabulary=vocab_dict['user_id'],
)(user_id_input_layer)

user_id_embedding = tf.keras.layers.Embedding(
    # Let's use the explicit vocabulary lookup.
    input_dim=len(vocab_dict['user_id']) + NUM_OOV_BUCKETS,
    output_dim=USER_EMBEDDING_SIZE
)(user_id_lookup)

user_id_embedding = tf.reduce_sum(user_id_embedding, axis=-2)

global_inputs.append(user_id_input_layer)
global_features.append(user_id_embedding)

In [51]:
global_inputs

[<KerasTensor: shape=(None, 1) dtype=string (created by layer 'user_id')>]

In [52]:
global_features

[<KerasTensor: shape=(None, 4) dtype=float32 (created by layer 'tf.math.reduce_sum_6')>]

In [53]:
user_age_input_layer = tf.keras.Input(
    name="bucketized_user_age",
    shape=(1,),
    dtype=tf.float32
)

user_age_lookup = tf.keras.layers.IntegerLookup(
    vocabulary=vocab_dict['bucketized_user_age'],
    num_oov_indices=NUM_OOV_BUCKETS,
    oov_value=0,
)(user_age_input_layer)

user_age_embedding = tf.keras.layers.Embedding(
    # Let's use the explicit vocabulary lookup.
    input_dim=len(vocab_dict['bucketized_user_age']) + NUM_OOV_BUCKETS,
    output_dim=USER_EMBEDDING_SIZE
)(user_age_lookup)

user_age_embedding = tf.reduce_sum(user_age_embedding, axis=-2)

global_inputs.append(user_age_input_layer)
global_features.append(user_age_embedding)

In [54]:
global_inputs

[<KerasTensor: shape=(None, 1) dtype=string (created by layer 'user_id')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'bucketized_user_age')>]

In [55]:
global_features

[<KerasTensor: shape=(None, 4) dtype=float32 (created by layer 'tf.math.reduce_sum_6')>,
 <KerasTensor: shape=(None, 4) dtype=float32 (created by layer 'tf.math.reduce_sum_7')>]

In [56]:
user_occ_input_layer = tf.keras.Input(
    name="user_occupation_text",
    shape=(1,),
    dtype=tf.string
)

user_occ_lookup = tf.keras.layers.StringLookup(
    max_tokens=len(vocab_dict['user_occupation_text']) + NUM_OOV_BUCKETS,
    num_oov_indices=NUM_OOV_BUCKETS,
    mask_token=None,
    vocabulary=vocab_dict['user_occupation_text'],
)(user_occ_input_layer)

user_occ_embedding = tf.keras.layers.Embedding(
    # Let's use the explicit vocabulary lookup.
    input_dim=len(vocab_dict['user_occupation_text']) + NUM_OOV_BUCKETS,
    output_dim=USER_EMBEDDING_SIZE
)(user_occ_lookup)

user_occ_embedding = tf.reduce_sum(user_occ_embedding, axis=-2)

global_inputs.append(user_occ_input_layer)
global_features.append(user_occ_embedding)

In [57]:
user_ts_input_layer = tf.keras.Input(
    name="timestamp",
    shape=(1,),
    dtype=tf.int64
)

user_ts_lookup = tf.keras.layers.Discretization(
    vocab_dict['timestamp_buckets'].tolist()
)(user_ts_input_layer)

user_ts_embedding = tf.keras.layers.Embedding(
    # Let's use the explicit vocabulary lookup.
    input_dim=len(vocab_dict['timestamp_buckets'].tolist()) + NUM_OOV_BUCKETS,
    output_dim=USER_EMBEDDING_SIZE
)(user_ts_lookup)

user_ts_embedding = tf.reduce_sum(user_ts_embedding, axis=-2)

global_inputs.append(user_ts_input_layer)
global_features.append(user_ts_embedding)

### concat global (user) features

In [58]:
global_outputs = tf.keras.layers.concatenate(global_features, axis=-1)
global_outputs

<KerasTensor: shape=(None, 16) dtype=float32 (created by layer 'concatenate_2')>

In [59]:
global_preprocess_layers = tf.keras.Model(inputs=global_inputs, outputs=global_outputs)
global_preprocess_layers

<keras.src.engine.functional.Functional at 0x7fe29851fd60>

## arm features

In [60]:
MV_EMBEDDING_SIZE = 8 #32

arm_inputs = []
arm_features = []

In [61]:
mv_id_input_layer = tf.keras.Input(
    name="movie_id",
    shape=(1,),
    dtype=tf.string
)

mv_id_lookup = tf.keras.layers.StringLookup(
    max_tokens=len(vocab_dict['movie_id']) + NUM_OOV_BUCKETS,
    num_oov_indices=NUM_OOV_BUCKETS,
    mask_token=None,
    vocabulary=vocab_dict['movie_id'],
)(mv_id_input_layer)

mv_id_embedding = tf.keras.layers.Embedding(
    # Let's use the explicit vocabulary lookup.
    input_dim=len(vocab_dict['movie_id']) + NUM_OOV_BUCKETS,
    output_dim=USER_EMBEDDING_SIZE
)(mv_id_lookup)

mv_id_embedding = tf.reduce_sum(mv_id_embedding, axis=-2)

arm_inputs.append(mv_id_input_layer)
arm_features.append(mv_id_embedding)

In [62]:
arm_inputs

[<KerasTensor: shape=(None, 1) dtype=string (created by layer 'movie_id')>]

In [63]:
arm_features

[<KerasTensor: shape=(None, 4) dtype=float32 (created by layer 'tf.math.reduce_sum_10')>]

In [64]:
mv_genre_input_layer = tf.keras.Input(
    name="movie_genres",
    shape=(1,),
    dtype=tf.float32
)

mv_genre_lookup = tf.keras.layers.IntegerLookup(
    vocabulary=vocab_dict['movie_genres'],
    num_oov_indices=NUM_OOV_BUCKETS,
    oov_value=0,
)(mv_genre_input_layer)

mv_genre_embedding = tf.keras.layers.Embedding(
    # Let's use the explicit vocabulary lookup.
    input_dim=len(vocab_dict['movie_genres']) + NUM_OOV_BUCKETS,
    output_dim=USER_EMBEDDING_SIZE
)(mv_genre_lookup)

mv_genre_embedding = tf.reduce_sum(mv_genre_embedding, axis=-2)

arm_inputs.append(mv_genre_input_layer)
arm_features.append(mv_genre_embedding)

### concat arm (movie) features

In [65]:
arm_outputs = tf.keras.layers.concatenate(arm_features, axis=-1)
arm_outputs

<KerasTensor: shape=(None, 8) dtype=float32 (created by layer 'concatenate_3')>

In [66]:
arm_preprocess_layers = tf.keras.Model(inputs=arm_inputs, outputs=arm_outputs)
arm_preprocess_layers

<keras.src.engine.functional.Functional at 0x7fe2985472e0>

# Create TF-Agent

In [67]:
# GLOBAL_DIM = XXX
NUM_MVS_TO_RANK = 1
NUM_USER_PER_STEP = 1

In [68]:
from tf_agents.bandits.agents import neural_epsilon_greedy_agent
from tf_agents.bandits.agents import neural_linucb_agent

class NeuralEpsilonGreedyAgentNoFn(
    neural_epsilon_greedy_agent.NeuralEpsilonGreedyAgent
):
    _enable_functions = False

class NeuralLinUCBAgentNoFn(
    neural_linucb_agent.NeuralLinUCBAgent
):
    _enable_functions = False

## observation and action specs

> The `observation_spec`` and the action_spec` methods return a nest of `(Bounded)ArraySpecs` that describe the `name`, `shape`, `datatype` and ranges of the observations and actions respectively.

### observation spec

In [69]:
from tf_agents.bandits.specs import utils as bandit_spec_utils
from tf_agents.specs import tensor_spec

obs_spec = {
    bandit_spec_utils.GLOBAL_FEATURE_KEY: {
        'user_id' : tensor_spec.TensorSpec(
            shape=(NUM_USER_PER_STEP,), dtype=tf.string
        ),
        'bucketized_user_age' : tensor_spec.TensorSpec(
            shape=(NUM_USER_PER_STEP,), dtype=tf.float32
        ),
        'user_occupation_text' : tensor_spec.TensorSpec(
            shape=(NUM_USER_PER_STEP,), dtype=tf.string
        ),
        'timestamp' : tensor_spec.TensorSpec(
            shape=(NUM_USER_PER_STEP,), dtype=tf.int64
        ),
    },
    bandit_spec_utils.PER_ARM_FEATURE_KEY: {
        'movie_id': tensor_spec.TensorSpec(
            shape=(NUM_MVS_TO_RANK,), dtype=tf.string),
        'movie_genres': tensor_spec.TensorSpec(
              shape=(NUM_MVS_TO_RANK,), dtype=tf.int64),
    }
}

obs_spec

{'global': {'user_id': TensorSpec(shape=(1,), dtype=tf.string, name=None),
  'bucketized_user_age': TensorSpec(shape=(1,), dtype=tf.float32, name=None),
  'user_occupation_text': TensorSpec(shape=(1,), dtype=tf.string, name=None),
  'timestamp': TensorSpec(shape=(1,), dtype=tf.int64, name=None)},
 'per_arm': {'movie_id': TensorSpec(shape=(1,), dtype=tf.string, name=None),
  'movie_genres': TensorSpec(shape=(1,), dtype=tf.int64, name=None)}}

In [70]:
from tf_agents.trajectories import time_step

time_step_spec = time_step.time_step_spec(obs_spec)
time_step_spec

TimeStep(
{'discount': BoundedTensorSpec(shape=(), dtype=tf.float32, name='discount', minimum=array(0., dtype=float32), maximum=array(1., dtype=float32)),
 'observation': {'global': {'bucketized_user_age': TensorSpec(shape=(1,), dtype=tf.float32, name=None),
                            'timestamp': TensorSpec(shape=(1,), dtype=tf.int64, name=None),
                            'user_id': TensorSpec(shape=(1,), dtype=tf.string, name=None),
                            'user_occupation_text': TensorSpec(shape=(1,), dtype=tf.string, name=None)},
                 'per_arm': {'movie_genres': TensorSpec(shape=(1,), dtype=tf.int64, name=None),
                             'movie_id': TensorSpec(shape=(1,), dtype=tf.string, name=None)}},
 'reward': TensorSpec(shape=(), dtype=tf.float32, name='reward'),
 'step_type': TensorSpec(shape=(), dtype=tf.int32, name='step_type')})

### action spec

In [71]:
action_spec = tensor_spec.BoundedTensorSpec(
    shape=(),
    dtype=tf.int32,
    minimum=0,
    maximum=NUM_MVS_TO_RANK, # - 1,
    name='action'
)
action_spec

BoundedTensorSpec(shape=(), dtype=tf.int32, name='action', minimum=array(0, dtype=int32), maximum=array(1, dtype=int32))

In [72]:
LEARNING_RATE = 0.05

optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)

## create model

In [89]:
MODEL_TYPE = "neural_ucb"      # "neural_ucb" | "neural_epsilon_greedy"

BATCH_SIZE = 128

# layers
GLOBAL_LAYERS = [16,4]
ARM_LAYERS = [16,4]
COMMON_LAYERS = [4]

# encoding network
ENCODING_DIM = 5
# How many training steps to run for training the encoding network before
# switching to LinUCB
NETWORK_TRAIN_STEPS = 20

# A float representing the probability of choosing a random action 
# instead of the greedy action
EPSILON_GREEDY = 0.02

# ==============================
# HPARAMS for neural_ucb_model
# ==============================

# Exploration parameter that multiplies the confidence intervals
EXPLORE_ALPHA = 0.1
# A float forgetting factor in [0.0, 1.0]. When set to 1, the algorithm
# doesn't forget.
FORGETTING_FACTOR = 0.99

In [74]:
from tf_agents.bandits.networks import global_and_arm_feature_network

if MODEL_TYPE == 'neural_epsilon_greedy':
    network = global_and_arm_feature_network.create_feed_forward_common_tower_network(
        observation_spec=obs_spec,
        global_layers=GLOBAL_LAYERS,
        arm_layers=ARM_LAYERS,
        common_layers=COMMON_LAYERS,
        global_preprocessing_combiner=global_preprocess_layers,
        arm_preprocessing_combiner=arm_preprocess_layers,
        output_dim=1
    )
elif MODEL_TYPE == 'neural_ucb':
    network = global_and_arm_feature_network.create_feed_forward_common_tower_network(
        observation_spec=obs_spec,
        global_layers=GLOBAL_LAYERS,
        arm_layers=ARM_LAYERS,
        common_layers=COMMON_LAYERS,
        global_preprocessing_combiner=global_preprocess_layers,
        arm_preprocessing_combiner=arm_preprocess_layers,
        output_dim=ENCODING_DIM
    )
    
network

<tf_agents.bandits.networks.global_and_arm_feature_network.GlobalAndArmCommonTowerNetwork at 0x7fe29857e800>

In [75]:
from tf_agents.bandits.policies import policy_utilities

def _agent_fn(
    time_step_spec, 
    action_spec, 
    network,
    optimizer,
    model_type
):
    if model_type == "neural_epsilon_greedy":
        agent = NeuralEpsilonGreedyAgentNoFn(
            time_step_spec=time_step_spec,
            action_spec=action_spec,
            reward_network=network,
            optimizer=optimizer,
            epsilon=EPSILON_GREEDY,
            accepts_per_arm_features=True,
            emit_policy_info=(policy_utilities.InfoFields.PREDICTED_REWARDS_MEAN),
            error_loss_fn=tf.compat.v1.losses.mean_squared_error,
            gradient_clipping=None,
            debug_summaries=True,
            summarize_grads_and_vars=True,
            name='NeuralEpsilonGreedyAgent'
        )
    elif model_type == 'neural_ucb':
        agent = NeuralLinUCBAgentNoFn(
            time_step_spec=time_step_spec,
            action_spec=action_spec,
            encoding_network=network,
            encoding_network_num_train_steps=NETWORK_TRAIN_STEPS,
            encoding_dim=ENCODING_DIM,
            optimizer=optimizer,
            alpha=EXPLORE_ALPHA,
            gamma=FORGETTING_FACTOR,
            epsilon_greedy=EPSILON_GREEDY,
            accepts_per_arm_features=True,
            emit_policy_info=(
                policy_utilities.InfoFields.PREDICTED_REWARDS_MEAN,
                policy_utilities.InfoFields.PREDICTED_REWARDS_OPTIMISTIC
            ),
            error_loss_fn=tf.compat.v1.losses.mean_squared_error,
            gradient_clipping=None,
            debug_summaries=True,
            summarize_grads_and_vars=True,
            name='NeuralLinUCBAgent'
        )
        
    return agent

In [76]:
_agent_fn(
    time_step_spec=time_step_spec,
    action_spec=action_spec, 
    network=network,
    optimizer=optimizer,
    model_type=MODEL_TYPE
)

<__main__.NeuralLinUCBAgentNoFn at 0x7fe2985f0370>

## get rewards

In [82]:
def _get_rewards(element):
    """Calculates reward for the actions."""

    def _calc_reward(x):
        """Calculates reward for a single action."""
        r0 = lambda: tf.constant(0.0)
        r1 = lambda: tf.constant(-10.0)
        r2 = lambda: tf.constant(2.0)
        r3 = lambda: tf.constant(3.0)
        r4 = lambda: tf.constant(4.0)
        r5 = lambda: tf.constant(10.0)
        c1 = tf.equal(x, 1.0)
        c2 = tf.equal(x, 2.0)
        c3 = tf.equal(x, 3.0)
        c4 = tf.equal(x, 4.0)
        c5 = tf.equal(x, 5.0)
        return tf.case([(c1, r1), (c2, r2), (c3, r3),(c4, r4),(c5, r5)], default=r0, exclusive=True)

    return tf.map_fn(
        fn=_calc_reward, 
        elems=element['user_rating'], 
        dtype=tf.float32
    )

def _add_outer_dimension(x):
    """Adds an extra outer dimension."""
    if isinstance(x, dict):
        for key, value in x.items():
            x[key] = tf.expand_dims(value, 1)
        return x
    return tf.expand_dims(x, 1)

In [112]:
def _get_global_feature_list(input_features):
    """Return list of global features."""
    global_feature_names = [
        'user_id', 'bucketized_user_age', 'user_occupation_text', 'timestamp'
    ]
    
    global_features = []
    
    for global_feature in global_feature_names:
        if global_feature in input_features:
            global_features.append(input_features[global_feature])

    return global_features

def _get_global_feature_dict(input_features):
    """Returns a dictionary mapping feature key to per arm features."""
    global_feature_names = [
        'user_id', 'bucketized_user_age', 'user_occupation_text', 'timestamp'
    ]
    
    global_features = {}
    
    for global_feature_name in global_feature_names:
        if global_feature_name in input_features:
            global_features[global_feature_name] = input_features[global_feature_name]
    
    return global_features

def _get_per_arm_feature_dict(input_features):
    """Returns a dictionary mapping feature key to per arm features."""
    per_arm_feature_names = ['movie_id', 'movie_genres']
    
    arm_features = {}
    
    for per_arm_feature in per_arm_feature_names:
        if per_arm_feature in input_features:
            arm_features[per_arm_feature] = input_features[per_arm_feature]
    
    return arm_features

In [113]:
# test 

for x in train_dataset.batch(1).take(1):
    global_features = _get_global_feature_dict(x)
    arm_features = _get_per_arm_feature_dict(x)
    
    #
    global_features = _add_outer_dimension(global_features)
    arm_features = _add_outer_dimension(arm_features)
    
print(global_features)
print(arm_features)

{'user_id': <tf.Tensor: shape=(1, 1), dtype=string, numpy=array([[b'346']], dtype=object)>, 'bucketized_user_age': <tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[25.]], dtype=float32)>, 'user_occupation_text': <tf.Tensor: shape=(1, 1), dtype=string, numpy=array([[b'other']], dtype=object)>, 'timestamp': <tf.Tensor: shape=(1, 1), dtype=int64, numpy=array([[874948475]])>}
{'movie_id': <tf.Tensor: shape=(1, 1), dtype=string, numpy=array([[b'211']], dtype=object)>, 'movie_genres': <tf.Tensor: shape=(1, 1, 1), dtype=int64, numpy=array([[[4]]])>}


In [114]:
global_features

{'user_id': <tf.Tensor: shape=(1, 1), dtype=string, numpy=array([[b'346']], dtype=object)>,
 'bucketized_user_age': <tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[25.]], dtype=float32)>,
 'user_occupation_text': <tf.Tensor: shape=(1, 1), dtype=string, numpy=array([[b'other']], dtype=object)>,
 'timestamp': <tf.Tensor: shape=(1, 1), dtype=int64, numpy=array([[874948475]])>}

In [115]:
_add_outer_dimension(global_features)

{'user_id': <tf.Tensor: shape=(1, 1, 1), dtype=string, numpy=array([[[b'346']]], dtype=object)>,
 'bucketized_user_age': <tf.Tensor: shape=(1, 1, 1), dtype=float32, numpy=array([[[25.]]], dtype=float32)>,
 'user_occupation_text': <tf.Tensor: shape=(1, 1, 1), dtype=string, numpy=array([[[b'other']]], dtype=object)>,
 'timestamp': <tf.Tensor: shape=(1, 1, 1), dtype=int64, numpy=array([[[874948475]]])>}

In [88]:
# Adds a time dimension.
arm_features = _add_outer_dimension(arm_features)
arm_features

{'movie_id': <tf.Tensor: shape=(1, 1), dtype=string, numpy=array([[b'211']], dtype=object)>,
 'movie_genres': <tf.Tensor: shape=(1, 1, 1), dtype=int64, numpy=array([[[4]]])>}

In [118]:
def _trajectory_fn(element):
    """
    Converts a dataset element into a trajectory.
    """
    # global_features = _get_global_feature_list(element)
    global_features = _get_global_feature_dict(element)
    arm_features = _get_per_arm_feature_dict(element)
    # global_features = ['user_id', 'bucketized_user_age', 'user_occupation_text', 'timestamp']
    # arm_features = {'movie_id':'movie_id', 'movie_genres':'movie_genres'}
    
    # tmp
    print(f"global_features: {global_features}")
    
    # Adds a time dimension.
    arm_features = _add_outer_dimension(arm_features)
    # tmp
    print(f"arm_features: {arm_features}")
    
    observation = {
        bandit_spec_utils.GLOBAL_FEATURE_KEY:
            _add_outer_dimension(tf.concat(global_features, axis=1))
            # _add_outer_dimension(global_features, axis=1)
    }
    
    reward = _add_outer_dimension(_get_rewards(element))
    
    # To emit the predicted rewards in policy_info, we need to create dummy
    # rewards to match the definition in TensorSpec for the ones specified in
    # emit_policy_info set.
    dummy_rewards = tf.zeros([BATCH_SIZE, 1, NUM_MVS_TO_RANK])
    
    policy_info = policy_utilities.PerArmPolicyInfo(
        chosen_arm_features=arm_features,
        # Pass dummy mean rewards here to match the model_spec for emitting
        # mean rewards in policy info
        predicted_rewards_mean=dummy_rewards
    )
    if MODEL_TYPE == 'neural_ucb':
        policy_info = policy_info._replace(
            predicted_rewards_optimistic=dummy_rewards
        )
    return trajectory.single_step(
        observation=observation,
        action=tf.zeros_like(
            reward, dtype=tf.int32
        ),  # Arm features are copied from policy info, put dummy zeros here
        policy_info=policy_info,
        reward=reward,
        discount=tf.zeros_like(reward)
    )

In [119]:
for x in train_dataset.batch(1).take(1):
    print(_trajectory_fn(x))

global_features: {'user_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'346'], dtype=object)>, 'bucketized_user_age': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([25.], dtype=float32)>, 'user_occupation_text': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'other'], dtype=object)>, 'timestamp': <tf.Tensor: shape=(1,), dtype=int64, numpy=array([874948475])>}
arm_features: {'movie_id': <tf.Tensor: shape=(1, 1), dtype=string, numpy=array([[b'211']], dtype=object)>, 'movie_genres': <tf.Tensor: shape=(1, 1, 1), dtype=int64, numpy=array([[[4]]])>}


ValueError: Attempt to convert a value ({'user_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'346'], dtype=object)>, 'bucketized_user_age': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([25.], dtype=float32)>, 'user_occupation_text': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'other'], dtype=object)>, 'timestamp': <tf.Tensor: shape=(1,), dtype=int64, numpy=array([874948475])>}) with an unsupported type (<class 'dict'>) to a Tensor.

In [None]:
tf.expand_dims(x, 1)

In [None]:
def _serving_input_fn(input_features):
    """
    Converts input features into Timestep for serving.
    """
    global_features = _get_global_feature_list(input_features)
    arm_features = _get_per_arm_feature_dict(input_features)

    observation = {
        bandit_spec_utils.GLOBAL_FEATURE_KEY: tf.concat(global_features, axis=1),
        # Adds an num_action dimension.
        bandit_spec_utils.PER_ARM_FEATURE_KEY: _add_outer_dimension(arm_features)
    }
    batch_size = tf.reshape(
        tf.shape(observation[bandit_spec_utils.GLOBAL_FEATURE_KEY])[0], [1])
    
    return time_step.restart(observation=observation, batch_size=batch_size)

In [107]:
rb_iterator = iter(train_dataset)
next(rb_iterator)

{'bucketized_user_age': <tf.Tensor: shape=(), dtype=float32, numpy=25.0>,
 'movie_genres': <tf.Tensor: shape=(1,), dtype=int64, numpy=array([4])>,
 'movie_id': <tf.Tensor: shape=(), dtype=string, numpy=b'211'>,
 'timestamp': <tf.Tensor: shape=(), dtype=int64, numpy=874948475>,
 'user_id': <tf.Tensor: shape=(), dtype=string, numpy=b'346'>,
 'user_occupation_text': <tf.Tensor: shape=(), dtype=string, numpy=b'other'>,
 'user_rating': <tf.Tensor: shape=(), dtype=float32, numpy=4.0>}

In [None]:
from tf_agents.train import learner
from tf_agents.train.utils import strategy_utils
from tf_agents.policies import policy_saver

def create_run_fn(agent_fn, trajectory_fn, serving_input_fn, dataset, artifact_dir):
    """
    Creates the run_fn passed to tensorflow_trainer.

    Args:
      agent_fn: Function to create a TF.Agent.
      trajectory_fn: Function to convert input data into Trajectory.
      serving_input_fn: Function to convert input data into Timestep.
      model_hparams: An `HParams` object, holds hyperparameters to build and train
        the model.

    Returns:
      A function that can be run by the tensorflow_trainer.
    """

    def _run_fn(trainer_inputs):
        """Run_fn passed to the tensorflow_trainer.

        Args:
          trainer_inputs: A `TensorflowTrainerInputs` object.
        """
        policy_save_interval = 5000
        
        tf_transform_output = trainer_util.TFTransformOutput(
            trainer_inputs.transform_dir
        )
        # hparams = tf1.training.merge_hparam(model_hparams, trainer_inputs.hparams)

        dist_strategy = strategy_utils.get_strategy(tpu=False, use_gpu=True)
        with dist_strategy.scope():
            summary_writer = tf.summary.create_file_writer(trainer_inputs.working_dir)
            with summary_writer.as_default():
                train_step = train_utils.create_train_step()
                
                model = agent_fn(
                    time_step_spec=time_step_spec,
                    action_spec=action_spec, 
                    network=network,
                    optimizer=optimizer,
                    model_type=MODEL_TYPE
                )
                # replay_buffer = dwrb.DatasetWrappingReplayBuffer(
                #     data_spec=model.training_data_spec,
                #     capacity=1,
                #     buffer_mode=mode.SERVER_CLIENT,
                #     dataset_fn=_create_dataset_fn(
                #         trainer_inputs.examples_input, [TRAIN_EXAMPLE_SPLIT],
                #         tf_transform_output,
                #         functools.partial(trajectory_fn, hparams=hparams),
                #         randomize_input=False
                #     )
                # )
                # rb_iterator = iter(train_dataset)
                # next(rb_iterator)
                experience_dataset_fn = lambda: dataset
                
                # checkpointer = common.Checkpointer(
                #     ckpt_dir=trainer_inputs.working_dir,
                #     agent=model,
                # )
                
#                 policy_learner = off_policy_learner.OffPolicyLearner(
#                     agent=model,
#                     replay_buffer=rb_iterator,
#                     sample_batch_size=BATCH_SIZE,
#                     strategy=dist_strategy,
#                     summary_writers=[summary_writer],
#                     train_checkpointer=checkpointer,
#                     train_checkpoint_steps_frequency=CHECKPOINT_FREQUENCY,
#                     policy_checkpointer=checkpointer,
#                     policy_checkpoint_steps_frequency=CHECKPOINT_FREQUENCY,
#                 )
#                 policy_learner.launch(hparams.train_steps)

                # Triggers to save the agent's policy checkpoints.
    
                saved_model_dir = f"{artifact_dir}/saved_model"
                learning_triggers = [
                    triggers.PolicySavedModelTrigger(
                        saved_model_dir,
                        model,
                        train_step,
                        interval=policy_save_interval),
                    triggers.StepPerSecondLogTrigger(train_step, interval=1000),
                ]
                agent_learner = learner.Learner(
                    root_dir = tempdir,
                    train_step = train_step,
                    agent=model,
                    experience_dataset_fn = experience_dataset_fn,
                    triggers=learning_triggers,
                    strategy=dist_strategy
                )
    
                # if trainer_inputs.tf_job.is_chief:
                logging.info('Exporting trained policy.')
                saver = policy_saver.PolicySaver(
                    policy=model.policy,
                    train_step=model.train_step_counter,
                    # input_fn_and_spec=_create_serving_input_fn_and_spec(
                    #     tf_transform_output, serving_input_fn, model)
                )
                # for key in trainer_inputs.export_dir_by_format.keys():
                #     saver.save(trainer_inputs.export_dir_by_format[key])

    return _run_fn

Options = tensorflow_trainer_inputs.Options


def run_tensorflow_trainer(
    run_fn: Callable[
        [tensorflow_trainer_inputs.TensorflowTrainerInputs], None],
    options: Optional[Options] = None
) -> None:
    
    options = options or Options()
    _run_tensorflow_trainer_internal(run_fn, options)

In [None]:
from google.cloud import aiplatform, storage

EXPERIMENT_NAME   = f'custom-neural-bandits-v3'

invoke_time       = time.strftime("%Y%m%d-%H%M%S")
RUN_NAME          = f'run-{invoke_time}'

LOG_DIR           = f"{BUCKET_URI}/{EXPERIMENT_NAME}/{RUN_NAME}/tb-logs"
ROOT_DIR          = f"{BUCKET_URI}/{EXPERIMENT_NAME}/{RUN_NAME}/root"       # Root directory for writing logs/summaries/checkpoints.
ARTIFACTS_DIR     = f"{BUCKET_URI}/{EXPERIMENT_NAME}/{RUN_NAME}/artifacts"  # Where the trained model will be saved and restored.

aiplatform.init(
    project=PROJECT_ID,
    location=REGION,
    experiment=EXPERIMENT_NAME
)

print(f"EXPERIMENT_NAME   : {EXPERIMENT_NAME}")
print(f"RUN_NAME          : {RUN_NAME}")
print(f"LOG_DIR           : {LOG_DIR}")
print(f"ROOT_DIR          : {ROOT_DIR}")
print(f"ARTIFACTS_DIR     : {ARTIFACTS_DIR}")

In [None]:
def create_run_fn():
    return trainer_common.create_run_fn(
        agent_fn = _agent_fn, 
        trajectory_fn = _trajectory_fn, 
        serving_input_fn = _serving_input_fn,
        dataset = train_datset,
        artifact_dir = ARTIFACTS_DIR
    )


In [None]:
create_run_fn()

In [None]:
# def main(unused_argv):
#   tensorflow_trainer.run_tensorflow_trainer(create_run_fn())