# Preparing off-policy training data for RL

> "Off-policy" refers to the situation where for a data record, given its observation, the current policy in training might not choose the same action as the one in said data record

## Load env config

* use the prefix from `00-env-setup`

In [1]:
VERSION        = "v1"                       # TODO
PREFIX         = f'rec-bandits-{VERSION}'   # TODO

print(f"PREFIX: {PREFIX}")

PREFIX: rec-bandits-v1


**run the next cell to populate env vars**

In [2]:
# staging GCS
GCP_PROJECTS             = !gcloud config get-value project
PROJECT_ID               = GCP_PROJECTS[0]

# GCS bucket and paths
BUCKET_NAME              = f'{PREFIX}-{PROJECT_ID}-bucket'
BUCKET_URI               = f'gs://{BUCKET_NAME}'

config = !gsutil cat {BUCKET_URI}/config/notebook_env.py
print(config.n)
exec(config.n)


PROJECT_ID               = "hybrid-vertex"
PROJECT_NUM              = "934903580331"
LOCATION                 = "us-central1"

REGION                   = "us-central1"
BQ_LOCATION              = "US"
VPC_NETWORK_NAME         = "ucaip-haystack-vpc-network"

VERTEX_SA                = "934903580331-compute@developer.gserviceaccount.com"

PREFIX                   = "rec-bandits-v1"
VERSION                  = "v1"

BUCKET_NAME              = "rec-bandits-v1-hybrid-vertex-bucket"
BUCKET_URI               = "gs://rec-bandits-v1-hybrid-vertex-bucket"
DATA_GCS_PREFIX          = "data"
DATA_PATH                = "gs://rec-bandits-v1-hybrid-vertex-bucket/data"
VOCAB_SUBDIR             = "vocabs"
VOCAB_FILENAME           = "vocab_dict.pkl"

VPC_NETWORK_FULL         = "projects/934903580331/global/networks/ucaip-haystack-vpc-network"

BIGQUERY_DATASET_ID      = "hybrid_vertex.movielens_ds_rec_bandits_v1"
BIGQUERY_TABLE_ID        = "hybrid_vertex.movielens_ds_rec_bandits_v1.training_dataset"

REPO

In [3]:
! gsutil ls $BUCKET_URI

gs://rec-bandits-v1-hybrid-vertex-bucket/config/
gs://rec-bandits-v1-hybrid-vertex-bucket/data/


## imports

In [4]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [5]:
import numpy as np
import sys
import time
from pprint import pprint
from IPython import display
import matplotlib.pyplot as plt

import logging
logging.disable(logging.WARNING)

import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

# GPU
from numba import cuda
import gc

# google cloud
from google.cloud import aiplatform, storage

from src.per_arm_rl import data_utils
from src.per_arm_rl import data_config # as data_config

In [6]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
    
gpus

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [8]:
device = cuda.get_current_device()
device.reset()
gc.collect()

14

### Initialize GCP clients

In [9]:
# cloud storage client
storage_client = storage.Client(project=PROJECT_ID)

# Vertex client
aiplatform.init(project=PROJECT_ID, location=LOCATION)

### Create `data_utils.py`

> this will be used to support data processing throughout the development workflow

## Prepare Movielens dataset

### load data from Tensorflow Datasets

* see [TFDS documentation](https://www.tensorflow.org/datasets/catalog/movielens#movielens100k-ratings) for more details on this dataset, feature descriptions, and other versions

In [10]:
ratings = tfds.load("movielens/100k-ratings", split="train")

for x in ratings.batch(1).take(1):
    pprint(x)

{'bucketized_user_age': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([45.], dtype=float32)>,
 'movie_genres': <tf.Tensor: shape=(1, 1), dtype=int64, numpy=array([[7]])>,
 'movie_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'357'], dtype=object)>,
 'movie_title': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b"One Flew Over the Cuckoo's Nest (1975)"], dtype=object)>,
 'raw_user_age': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([46.], dtype=float32)>,
 'timestamp': <tf.Tensor: shape=(1,), dtype=int64, numpy=array([879024327])>,
 'user_gender': <tf.Tensor: shape=(1,), dtype=bool, numpy=array([ True])>,
 'user_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'138'], dtype=object)>,
 'user_occupation_label': <tf.Tensor: shape=(1,), dtype=int64, numpy=array([4])>,
 'user_occupation_text': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'doctor'], dtype=object)>,
 'user_rating': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([4.], dtype=float32)>,

In [11]:
tf.random.set_seed(42)
shuffled = ratings.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(80_000)
val = shuffled.skip(80_000).take(20_000)

### write dataset to TF Records

In [12]:
# full dataset
TF_RECORD_FILE_full = "ml-ratings-100k-full.tfrecord"
LOCAL_TF_RECORD_full = f"./{TF_RECORD_FILE_full}"

# train split
TF_RECORD_FILE_train = "ml-ratings-100k-train.tfrecord"
LOCAL_TF_RECORD_train = f"./{TF_RECORD_FILE_train}"

# val split
TF_RECORD_FILE_val = "ml-ratings-100k-val.tfrecord"
LOCAL_TF_RECORD_val = f"./{TF_RECORD_FILE_val}"

# paths
FULL_DATA_PATH = f"{DATA_PATH}"
TRAIN_DATA_PATH = f"{DATA_PATH}/train"
VAL_DATA_PATH = f"{DATA_PATH}/val"

print(f"FULL_DATA_PATH   : {FULL_DATA_PATH}")
print(f"TRAIN_DATA_PATH  : {TRAIN_DATA_PATH}")
print(f"VAL_DATA_PATH    : {VAL_DATA_PATH}")

FULL_DATA_PATH   : gs://rec-bandits-v1-hybrid-vertex-bucket/data
TRAIN_DATA_PATH  : gs://rec-bandits-v1-hybrid-vertex-bucket/data/train
VAL_DATA_PATH    : gs://rec-bandits-v1-hybrid-vertex-bucket/data/val


In [13]:
data_utils.write_tfrecords(TF_RECORD_FILE_full, ratings, list_wise=False)

In [14]:
data_utils.write_tfrecords(TF_RECORD_FILE_train, train, list_wise=False)

In [15]:
data_utils.write_tfrecords(TF_RECORD_FILE_val, val, list_wise=False)

### save TF Records to GCS

In [16]:
# LOCAL_TF_RECORD = f"./{TF_RECORD_FILE}"
# ! gsutil -q cp $LOCAL_TF_RECORD $DATA_PATH/train/
# ! gsutil ls $DATA_PATH

! gsutil -q cp $LOCAL_TF_RECORD_full $FULL_DATA_PATH/
! gsutil -q cp $LOCAL_TF_RECORD_train $TRAIN_DATA_PATH/
! gsutil -q cp $LOCAL_TF_RECORD_val $VAL_DATA_PATH/

In [17]:
! gsutil ls $DATA_PATH

gs://rec-bandits-v1-hybrid-vertex-bucket/data/ml-ratings-100k-full.tfrecord
gs://rec-bandits-v1-hybrid-vertex-bucket/data/train/
gs://rec-bandits-v1-hybrid-vertex-bucket/data/val/


## validate TF Records

In [20]:
train_files = []
for blob in storage_client.list_blobs(f"{BUCKET_NAME}", prefix=f'{DATA_GCS_PREFIX}/', delimiter='/'):
    if '.tfrecord' in blob.name:
        train_files.append(blob.public_url.replace("https://storage.googleapis.com/", "gs://"))
        
train_files

['gs://rec-bandits-v1-hybrid-vertex-bucket/data/ml-ratings-100k-full.tfrecord']

In [21]:
train_dataset = tf.data.TFRecordDataset(train_files)

train_dataset

<TFRecordDatasetV2 element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>

In [22]:
train_dataset = train_dataset.map(data_utils.parse_tfrecord)

for x in train_dataset.batch(1).take(1):
    pprint(x)

{'bucketized_user_age': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([45.], dtype=float32)>,
 'movie_genres': <tf.Tensor: shape=(1, 1), dtype=int64, numpy=array([[7]])>,
 'movie_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'357'], dtype=object)>,
 'timestamp': <tf.Tensor: shape=(1,), dtype=int64, numpy=array([879024327])>,
 'user_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'138'], dtype=object)>,
 'user_occupation_text': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'doctor'], dtype=object)>,
 'user_rating': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([4.], dtype=float32)>}


## Generate look-up dicts

**TODO** - use more Tensorflow native method for generating vocabs and stats

### unique IDs

In [23]:
# Get the unique movies and users
unique_movie_ids = train_dataset.map(lambda x: x["movie_id"])

unique_movie_ids = np.unique([x.numpy() for x in unique_movie_ids])

MOVIELENS_NUM_MOVIES = len(unique_movie_ids)

print(f"len(unique_movie_ids) : {len(unique_movie_ids)}")
print(f"unique_movie_ids      : {unique_movie_ids[:2]}")

len(unique_movie_ids) : 1682
unique_movie_ids      : [b'1' b'10']


In [24]:
# Get the unique movies and users
# unique_movie_ids = ratings.map(lambda x: x["movie_id"])
unique_occ_ids = train_dataset.map(lambda x: x["user_occupation_text"])

unique_occ_ids = np.unique([x.numpy() for x in unique_occ_ids])

NUM_OCCS = len(unique_occ_ids)

print(f"len(unique_occ_ids) : {len(unique_occ_ids)}")
print(f"unique_occ_ids      : {unique_occ_ids[:2]}")

len(unique_occ_ids) : 21
unique_occ_ids      : [b'administrator' b'artist']


In [25]:
# unique_user_ids = ratings.map(lambda x: x["user_id"])
unique_user_ids = train_dataset.map(lambda x: x["user_id"])

unique_user_ids = np.unique([x.numpy() for x in unique_user_ids])

MOVIELENS_NUM_USERS = len(unique_user_ids)

print(f"len(unique_user_ids) : {len(unique_user_ids)}")
print(f"unique_user_ids      : {unique_user_ids[:2]}")

len(unique_user_ids) : 943
unique_user_ids      : [b'1' b'10']


### lookup dictionaries

In [26]:
USER_AGE_LOOKUP = data_utils.get_dictionary_lookup_by_tf_data_key(
    key = 'bucketized_user_age'
    , dataset= train_dataset
)

USER_AGE_DIM = len(USER_AGE_LOOKUP)
print(f"USER_AGE_DIM: {USER_AGE_DIM}")

USER_AGE_LOOKUP

USER_AGE_DIM: 7


{1.0: 0, 35.0: 1, 45.0: 2, 18.0: 3, 50.0: 4, 56.0: 5, 25.0: 6}

In [27]:
USER_OCC_LOOKUP = data_utils.get_dictionary_lookup_by_tf_data_key(
    key = 'user_occupation_text'
    , dataset= train_dataset
)
USER_OCC_DIM = len(USER_OCC_LOOKUP)
print(f"USER_OCC_DIM: {USER_OCC_DIM}")

# USER_OCC_LOOKUP

USER_OCC_DIM: 21


In [28]:
MOVIE_GEN_LOOKUP = data_utils.get_dictionary_lookup_by_tf_data_key(
    key = 'movie_genres'
    , dataset= train_dataset
)
MOVIE_GEN_DIM = len(MOVIE_GEN_LOOKUP)
print(f"MOVIE_GEN_DIM: {MOVIE_GEN_DIM}")

# MOVIE_GEN_LOOKUP

MOVIE_GEN_DIM: 19


## create `data_config.py`

> write data config for subsequent notebooks

In [29]:
config = f'''
USER_AGE_LOOKUP       = {USER_AGE_LOOKUP}
USER_AGE_DIM          = {USER_AGE_DIM}

USER_OCC_LOOKUP       = {USER_OCC_LOOKUP}
USER_OCC_DIM          = {USER_OCC_DIM}

MOVIE_GEN_LOOKUP      = {MOVIE_GEN_LOOKUP}
MOVIE_GEN_DIM         = {MOVIE_GEN_DIM}

MOVIELENS_NUM_MOVIES  = {MOVIELENS_NUM_MOVIES}
MOVIELENS_NUM_USERS   = {MOVIELENS_NUM_USERS}
'''
# TODO - cleanup
with open(f'{REPO_DOCKER_PATH_PREFIX}/{RL_SUB_DIR}/data_config.py', 'w') as f:
    f.write(config)

### Validate creating the ratings matrix

In [30]:
from src.per_arm_rl import data_config as data_config

In [31]:
data_config.USER_AGE_LOOKUP

{1.0: 0, 35.0: 1, 45.0: 2, 18.0: 3, 50.0: 4, 56.0: 5, 25.0: 6}

In [32]:
data_config.USER_OCC_LOOKUP

{b'none': 0,
 b'technician': 1,
 b'executive': 2,
 b'other': 3,
 b'homemaker': 4,
 b'programmer': 5,
 b'student': 6,
 b'doctor': 7,
 b'administrator': 8,
 b'artist': 9,
 b'entertainment': 10,
 b'healthcare': 11,
 b'librarian': 12,
 b'engineer': 13,
 b'marketing': 14,
 b'lawyer': 15,
 b'educator': 16,
 b'salesman': 17,
 b'writer': 18,
 b'scientist': 19,
 b'retired': 20}

In [33]:
data_config.MOVIE_GEN_LOOKUP

{0: 0,
 1: 1,
 2: 2,
 3: 3,
 4: 4,
 5: 5,
 6: 6,
 7: 7,
 8: 8,
 9: 9,
 10: 10,
 12: 11,
 13: 12,
 14: 13,
 15: 14,
 16: 15,
 17: 16,
 18: 17,
 19: 18}

In [34]:
# test_dataset_load = data_utils.load_movielens_ratings(
#     ratings_dataset = train_dataset
#     , num_users = data_config.MOVIELENS_NUM_USERS
#     , num_movies = data_config.MOVIELENS_NUM_MOVIES
#     , user_age_lookup_dict = data_config.USER_AGE_LOOKUP
#     , user_occ_lookup_dict = data_config.USER_OCC_LOOKUP
#     , movie_gen_lookup_dict = data_config.MOVIE_GEN_LOOKUP
# )

# test_dataset_load

In [35]:
# ratings_matrix = test_dataset_load[0]
# print(ratings_matrix.shape)
# ratings_matrix

# Data splits

In [36]:
# tf.random.set_seed(42)
# shuffled = ratings.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

# train = shuffled.take(80_000)
# test = shuffled.skip(80_000).take(20_000)

In [37]:
# for x in train.batch(1).take(1):
#     pprint(x)

In [38]:
# for x in val.batch(1).take(1):
#     pprint(x)

### write TF Records

In [39]:
# TF_RECORD_FILE_train = "ml-ratings-100k-train.tfrecord"
# LOCAL_TF_RECORD_train = f"./{TF_RECORD_FILE_train}"

# TF_RECORD_FILE_val = "ml-ratings-100k-val.tfrecord"
# LOCAL_TF_RECORD_val = f"./{TF_RECORD_FILE_val}"

# TRAIN_DATA_PATH = f"{DATA_PATH}/train"
# VAL_DATA_PATH = f"{DATA_PATH}/val"

# print(f"TRAIN_DATA_PATH  : {TRAIN_DATA_PATH}")
# print(f"VAL_DATA_PATH    : {VAL_DATA_PATH}")

In [40]:
# data_utils.write_tfrecords(TF_RECORD_FILE_train, train)

In [41]:
# data_utils.write_tfrecords(TF_RECORD_FILE_val, val)

In [42]:
# ! gsutil -q cp $LOCAL_TF_RECORD_train $TRAIN_DATA_PATH/
# ! gsutil -q cp $LOCAL_TF_RECORD_val $VAL_DATA_PATH/

In [43]:
# ! gsutil ls $DATA_PATH

### validate TF Records

In [44]:
# ## validate

# train_files = []
# for blob in storage_client.list_blobs(f"{BUCKET_NAME}", prefix=f'{DATA_GCS_PREFIX}/train'):
#     if '.tfrecord' in blob.name:
#         train_files.append(blob.public_url.replace("https://storage.googleapis.com/", "gs://"))
        
# train_files

In [45]:
# train_dataset = tf.data.TFRecordDataset(train_files)

# train_dataset = train_dataset.map(data_utils.parse_tfrecord)

# for x in train_dataset.batch(1).take(1):
#     pprint(x)

In [46]:
# # Get the unique movies and users
# unique_user_ratings = train_dataset.map(lambda x: x["user_rating"])

# unique_user_ratings = np.unique([x.numpy() for x in unique_user_ratings])

# unique_user_ratings

# Ranking Data (listwise)

In [47]:
# ratings = ratings.map(lambda x: {
#     "movie_title": x["movie_title"],
#     "user_id": x["user_id"],
#     "user_rating": x["user_rating"],
# })

# ratings

In [48]:
# unique_user_ids = np.unique(
#     np.concatenate(list(ratings.batch(1_000).map(lambda x: x["user_id"])))
# )

In [49]:
# for x in ratings.batch(1).take(1):
#     pprint(x)

In [50]:
# movies = tfds.load("movielens/100k-movies", split="train")
# movies = movies.map(lambda x: x["movie_title"])
# movies

In [51]:
# unique_movie_titles = np.unique(
#     np.concatenate(list(movies.batch(1000)))
# )

In [52]:
# tf.random.set_seed(42)

# # Split between train and tests sets, as before.
# shuffled = ratings.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

# train = shuffled.take(80_000)
# test = shuffled.skip(80_000).take(20_000)

### write tf-record for train (listwise)

In [53]:
# # We sample 50 lists for each user for the training data. For each list we
# # sample 5 movies from the movies the user rated.
# train = tfrs.examples.movielens.sample_listwise(
#     train,
#     num_list_per_user=50,
#     num_examples_per_list=5,
#     seed=42
# )

# train

In [54]:
# for x in train.batch(1).take(1):
#     pprint(x)

In [55]:
# type(x['user_rating'].numpy())

In [56]:
# # train split
# TF_RECORD_listwise_train = "ml-ratings-100k-listwise-train.tfrecord"
# LOCAL_TF_RECORD_listwise_train = f"./{TF_RECORD_listwise_train}"

# # paths
# TRAIN_DATA_PATH_listwise = f"{DATA_PATH}/listwise/train"

# print(f"TRAIN_DATA_PATH_listwise   : {TRAIN_DATA_PATH_listwise}")

In [57]:
# from src.per_arm_rl import data_utils_v3 as data_utils

In [58]:
# train

In [59]:
# for f0,f1,f2 in train.take(1):
#     print(f0)
#     print(f1)
#     print(f2)

In [60]:
# f0

In [61]:
# def tf_serialize_example(f0,f1,f2):
#     tf_string = tf.py_function(
#         serialize_example,
#         (f0, f1, f2),
#         tf.string
#     )   
#     return tf.reshape(tf_string, ())

In [62]:
# tf_serialize_example(f0, f1, f2)

In [63]:
# train = tf.data.TFRecordDataset(train)
# train

In [64]:
# data_utils.write_tfrecords(TF_RECORD_listwise_train, train, list_wise=True)

In [65]:
# ! gsutil -q cp $LOCAL_TF_RECORD_listwise_train $TRAIN_DATA_PATH_listwise/

### write tf-record for val (listwise)

In [66]:
# test = tfrs.examples.movielens.sample_listwise(
#     val,
#     num_list_per_user=1,
#     num_examples_per_list=5,
#     seed=42
# )

# test

In [67]:
# # val split
# TF_RECORD_FILE_val = "ml-ratings-100k-val.tfrecord"
# LOCAL_TF_RECORD_val = f"./{TF_RECORD_FILE_val}"

**Finished**