# Preparing off-policy training data for RL

> "Off-policy" refers to the situation where for a data record, given its observation, the current policy in training might not choose the same action as the one in said data record

**References**
* [internal code example](https://source.corp.google.com/piper///depot/google3/commerce/delivery/recommendation/shop_rl/python/topk_off_policy_reinforce/dataset_analysis.ipynb)
* following generator, ingester code from [JT-example](https://github.com/tottenjordan/tf_vertex_agents/blob/main/src/generator/generator_component.py)
* not correct, but different [BanditDev](https://github.com/alex-seto/RecipeBandit/blob/7ff5c0d8930aaf60f576222be42f5cd3181a1e49/scrap/BanditDev.ipynb)
* (blog) [RL recommender using Tf-Agent](https://medium.com/@yuchengtsai84/reinforcement-learning-based-recommender-systems-using-tf-agent-and-movielens-dataset-ebbf40b3a1a2)

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [2]:
from IPython import display
import matplotlib.pyplot as plt
import numpy as np
import sys
import time
import logging
logging.disable(logging.WARNING)

import tensorflow as tf

import tensorflow_datasets as tfds
from pprint import pprint

# google cloud
from google.cloud import aiplatform, storage

In [3]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
    
gpus

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]

In [4]:
from numba import cuda 
device = cuda.get_current_device()
device.reset()

In [5]:
import gc
gc.collect()

14

### set vars

In [6]:
PREFIX = 'mabv1'

# creds, PROJECT_ID = google.auth.default()
GCP_PROJECTS             = !gcloud config get-value project
PROJECT_ID               = GCP_PROJECTS[0]

PROJECT_NUM              = !gcloud projects describe $PROJECT_ID --format="value(projectNumber)"
PROJECT_NUM              = PROJECT_NUM[0]

VERTEX_SA                = f'{PROJECT_NUM}-compute@developer.gserviceaccount.com'

VPC_NETWORK_NAME         = "ucaip-haystack-vpc-network"

# locations / regions for cloud resources
LOCATION                 = 'us-central1'        
REGION                   = LOCATION
BQ_LOCATION              = 'US'

print(f"PROJECT_ID       = {PROJECT_ID}")
print(f"PROJECT_NUM      = {PROJECT_NUM}")
print(f"VPC_NETWORK_NAME = {VPC_NETWORK_NAME}")
print(f"LOCATION         = {LOCATION}")
print(f"REGION           = {REGION}")
print(f"BQ_LOCATION      = {BQ_LOCATION}")

PROJECT_ID       = hybrid-vertex
PROJECT_NUM      = 934903580331
VPC_NETWORK_NAME = ucaip-haystack-vpc-network
LOCATION         = us-central1
REGION           = us-central1
BQ_LOCATION      = US


In [7]:
# GCS bucket and paths
BUCKET_NAME              = f'{PREFIX}-{PROJECT_ID}-bucket'
BUCKET_URI               = f'gs://{BUCKET_NAME}'

# Location of the MovieLens 100K dataset's "u.data" file.
DATA_GCS_PREFIX          = "data"
DATA_PATH                = f"{BUCKET_URI}/{DATA_GCS_PREFIX}"

VPC_NETWORK_FULL         = f"projects/{PROJECT_NUM}/global/networks/{VPC_NETWORK_NAME}"

MY_BQ_DATASET            = BUCKET_NAME.lower().replace("-","_")

print(f"BUCKET_NAME       : {BUCKET_NAME}")
print(f"BUCKET_URI        : {BUCKET_URI}")
print(f"DATA_GCS_PREFIX   : {DATA_GCS_PREFIX}")
print(f"DATA_PATH         : {DATA_PATH}")
print(f"VPC_NETWORK_FULL  : {VPC_NETWORK_FULL}")
print(f"MY_BQ_DATASET     : {MY_BQ_DATASET}")

BUCKET_NAME       : mabv1-hybrid-vertex-bucket
BUCKET_URI        : gs://mabv1-hybrid-vertex-bucket
DATA_GCS_PREFIX   : data
DATA_PATH         : gs://mabv1-hybrid-vertex-bucket/data
VPC_NETWORK_FULL  : projects/934903580331/global/networks/ucaip-haystack-vpc-network
MY_BQ_DATASET     : mabv1_hybrid_vertex_bucket


In [8]:
# create bucket
! gsutil mb -l $REGION $BUCKET_URI

Creating gs://mabv1-hybrid-vertex-bucket/...
ServiceException: 409 A Cloud Storage bucket named 'mabv1-hybrid-vertex-bucket' already exists. Try another name. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.


In [9]:
# cloud storage client
storage_client = storage.Client(project=PROJECT_ID)

# Vertex client
aiplatform.init(project=PROJECT_ID, location=LOCATION)

# # bigquery client
# bqclient = bigquery.Client(
#     project=PROJECT_ID,
#     # location=LOCATION
# )

## Write TF Records

In [139]:
ratings = tfds.load("movielens/100k-ratings", split="train")

for x in ratings.batch(1).take(1):
    pprint(x)

{'bucketized_user_age': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([45.], dtype=float32)>,
 'movie_genres': <tf.Tensor: shape=(1, 1), dtype=int64, numpy=array([[7]])>,
 'movie_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'357'], dtype=object)>,
 'movie_title': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b"One Flew Over the Cuckoo's Nest (1975)"], dtype=object)>,
 'raw_user_age': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([46.], dtype=float32)>,
 'timestamp': <tf.Tensor: shape=(1,), dtype=int64, numpy=array([879024327])>,
 'user_gender': <tf.Tensor: shape=(1,), dtype=bool, numpy=array([ True])>,
 'user_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'138'], dtype=object)>,
 'user_occupation_label': <tf.Tensor: shape=(1,), dtype=int64, numpy=array([4])>,
 'user_occupation_text': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'doctor'], dtype=object)>,
 'user_rating': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([4.], dtype=float32)>,

In [258]:
for x in ratings.batch(1).skip(3).take(1):
    # value = x['movie_genres']
    # value = x['user_id']
    value = x['user_occupation_text']
    # value = tf.io.serialize_tensor(value)
    print(f"type: {type(value)}")
    # print(f"type: {type(value)}")
    # print(f"type: {type(v) for v in value}")
    print("-------")
    print(f"value: {value[0]}")
    print(f"type: value: {type(value[0])}")
    print("-------")
    value_n = value.numpy() # [0]
    print(f"value_n: {value_n}")
    print(f"type: value_n: {type(value_n)}")
    print("-------")
    value_tl = value.numpy().tolist() # [0]
    print(f"value_tl: {value_tl[0]}")
    print(f"type: value_tl: {type(value_tl[0])}")
    print("-------")
    value_tlv = value.numpy().tolist() # [0]
    print(f"value_tl: {v for value in value_tl[0]}")
    print(f"type: value_tl: {type(v) for v in value_tl[0]}")

type: <class 'tensorflow.python.framework.ops.EagerTensor'>
-------
value: b'healthcare'
type: value: <class 'tensorflow.python.framework.ops.EagerTensor'>
-------
value_n: [b'healthcare']
type: value_n: <class 'numpy.ndarray'>
-------
value_tl: b'healthcare'
type: value_tl: <class 'bytes'>
-------
value_tl: <generator object <genexpr> at 0x7f1c84623920>
type: value_tl: <generator object <genexpr> at 0x7f1c84623d10>


In [326]:
unique_user_occs = ratings.map(lambda x: x["user_occupation_text"])
unique_occ_ids = np.unique([x.numpy() for x in unique_user_occs])
unique_occ_ids

array([b'administrator', b'artist', b'doctor', b'educator', b'engineer',
       b'entertainment', b'executive', b'healthcare', b'homemaker',
       b'lawyer', b'librarian', b'marketing', b'none', b'other',
       b'programmer', b'retired', b'salesman', b'scientist', b'student',
       b'technician', b'writer'], dtype='|S13')

In [141]:
# <class 'tensorflow.python.framework.ops.EagerTensor'>

### Build Examples

> **TODO** - consolidate with `data_utils.py`

In [344]:
def _bytes_feature(value):
    """
    Get byte features
    """
    # value = tf.io.serialize_tensor(value)
    # value = value.numpy()
    if type(value) == list:
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
    else:
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[i.numpy() for i in [value]]))

def _int64_feature(value):
    """
    Get int64 feature
    """
    if type(value) == list:
        # return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
        return tf.train.Feature(int64_list=tf.train.Int64List(value=[int(v) for v in value]))
    else:
        return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
    
def _int64_list_feature(value):
    """
    Get int64 list feature
    """
    value = value.numpy().tolist()[0]
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
    # return tf.train.Feature(int64_list=tf.train.Int64List(value=[int(v) for v in value]))

def _string_array(value, shape=1):
    """
    Returns a bytes_list from a string / byte.
    """
    value = value.numpy()[0] # .tolist()[0]
    if type(value) == list:
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[str(v).encode('utf-8') for v in value]))
        # return tf.train.Feature(bytes_list=tf.train.BytesList(value=[str(v) for v in value]))
    else:
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[str(value).encode('utf-8')]))
        # return tf.train.Feature(bytes_list=tf.train.BytesList(value=[str(value)]))

def _float_feature(value, shape=1):
    """Returns a float_list from a float / double."""
    if type(value) == list:
        return tf.train.Feature(float_list=tf.train.FloatList(value=value))
    else:
        return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

In [345]:
def build_example(data) -> tf.train.Example:
    """
    Returns: A `tf.train.Example` object holding the same data as `data_row`.
    """
    feature = {
        # user - global context features 
        "user_id": _bytes_feature(data['user_id'])
        , "user_rating": _float_feature(data['user_rating'])
        , "bucketized_user_age": _float_feature(data['bucketized_user_age'])
        , "user_occupation_text": _bytes_feature(data['user_occupation_text'])
        # , "user_occupation_label": _int64_feature(data['user_occupation_label'])
        # , "user_zip_code": _string_array(data['user_zip_code'])
        # , "user_gender": BOOL_TODO(data['user_gender'])
        , "timestamp": _int64_feature(data['timestamp'])
        
        # movie - per arm features
        , "movie_id": _bytes_feature(data['movie_id'])
        # , "movie_title": _string_array(data['movie_title'])
        , "movie_genres": _int64_list_feature(data['movie_genres'])
    }
    example_proto = tf.train.Example(
        features=tf.train.Features(feature=feature))
    return example_proto

In [346]:
def write_tfrecords(tfrecord_file, dataset):
    with tf.io.TFRecordWriter(tfrecord_file) as writer:
        for data_row in dataset:
            example = build_example(data_row)
            writer.write(example.SerializeToString())

In [347]:
TF_RECORD_FILE = "ml-ratings-100k-train.tfrecord"

write_tfrecords(TF_RECORD_FILE, ratings)

In [348]:
LOCAL_TF_RECORD = "./ml-ratings-100k-train.tfrecord"

! gsutil cp $LOCAL_TF_RECORD $DATA_PATH/

Copying file://./ml-ratings-100k-train.tfrecord [Content-Type=application/octet-stream]...
/ [1 files][ 19.4 MiB/ 19.4 MiB]                                                
Operation completed over 1 objects/19.4 MiB.                                     


In [349]:
! gsutil ls $DATA_PATH

gs://mabv1-hybrid-vertex-bucket/data/ml-ratings-100k-train.tfrecord


### validate TF Record

In [350]:
train_files = []
for blob in storage_client.list_blobs(f"{BUCKET_NAME}", prefix=f'{DATA_GCS_PREFIX}/'):
    if '.tfrecord' in blob.name:
        train_files.append(blob.public_url.replace("https://storage.googleapis.com/", "gs://"))
        
train_files

['gs://mabv1-hybrid-vertex-bucket/data/ml-ratings-100k-train.tfrecord']

In [351]:
train_dataset = tf.data.TFRecordDataset(train_files)

train_dataset

<TFRecordDatasetV2 element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>

In [352]:
# TODO - defined later -fix
from src.per_arm_rl import data_utils_v1 as data_utils
from src.per_arm_rl import data_config

train_dataset = train_dataset.map(data_utils.parse_tfrecord)

train_dataset

<_MapDataset element_spec={'bucketized_user_age': TensorSpec(shape=(), dtype=tf.float32, name=None), 'movie_genres': TensorSpec(shape=(1,), dtype=tf.int64, name=None), 'movie_id': TensorSpec(shape=(), dtype=tf.string, name=None), 'timestamp': TensorSpec(shape=(), dtype=tf.int64, name=None), 'user_id': TensorSpec(shape=(), dtype=tf.string, name=None), 'user_occupation_text': TensorSpec(shape=(), dtype=tf.string, name=None), 'user_rating': TensorSpec(shape=(), dtype=tf.float32, name=None)}>

In [353]:
for x in train_dataset.batch(1).take(2):
    pprint(x)

{'bucketized_user_age': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([45.], dtype=float32)>,
 'movie_genres': <tf.Tensor: shape=(1, 1), dtype=int64, numpy=array([[7]])>,
 'movie_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'357'], dtype=object)>,
 'timestamp': <tf.Tensor: shape=(1,), dtype=int64, numpy=array([879024327])>,
 'user_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'138'], dtype=object)>,
 'user_occupation_text': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'doctor'], dtype=object)>,
 'user_rating': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([4.], dtype=float32)>}
{'bucketized_user_age': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([25.], dtype=float32)>,
 'movie_genres': <tf.Tensor: shape=(1, 1), dtype=int64, numpy=array([[4]])>,
 'movie_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'709'], dtype=object)>,
 'timestamp': <tf.Tensor: shape=(1,), dtype=int64, numpy=array([875654590])>,
 'user_id': <tf.Tensor: shape=(1

## Dataset Stats

### unique IDs

In [354]:
# Get the unique movies and users
# unique_movie_ids = ratings.map(lambda x: x["movie_id"])
unique_movie_ids = train_dataset.map(lambda x: x["movie_id"])

unique_movie_ids = np.unique([x.numpy() for x in unique_movie_ids])

MOVIELENS_NUM_MOVIES = len(unique_movie_ids)

print(f"len(unique_movie_ids) : {len(unique_movie_ids)}")
print(f"unique_movie_ids      : {unique_movie_ids[:2]}")

len(unique_movie_ids) : 1682
unique_movie_ids      : [b'1' b'10']


In [355]:
# Get the unique movies and users
# unique_movie_ids = ratings.map(lambda x: x["movie_id"])
unique_occ_ids = train_dataset.map(lambda x: x["user_occupation_text"])

unique_occ_ids = np.unique([x.numpy() for x in unique_occ_ids])

NUM_OCCS = len(unique_occ_ids)

print(f"len(unique_occ_ids) : {len(unique_occ_ids)}")
print(f"unique_occ_ids      : {unique_occ_ids[:2]}")

len(unique_occ_ids) : 21
unique_occ_ids      : [b'administrator' b'artist']


In [356]:
# unique_user_ids = ratings.map(lambda x: x["user_id"])
unique_user_ids = train_dataset.map(lambda x: x["user_id"])

unique_user_ids = np.unique([x.numpy() for x in unique_user_ids])

MOVIELENS_NUM_USERS = len(unique_user_ids)

print(f"len(unique_user_ids) : {len(unique_user_ids)}")
print(f"unique_user_ids      : {unique_user_ids[:2]}")

len(unique_user_ids) : 943
unique_user_ids      : [b'1' b'10']


In [360]:
int(unique_user_ids[0])

1

### lookup dictionary

In [362]:
from typing import Dict

def get_dictionary_lookup_by_tf_data_key(key: str) -> Dict:
    # tensor = ratings.map(lambda x: x[key])
    tensor = train_dataset.map(lambda x: x[key])
    unique_elems = set()
    for x in tensor:
        val = x.numpy()
        if type(val) is np.ndarray: # if multi dimesnional only grab first one
            val = val[0]
        unique_elems.add(val)
    
    #return a dictionary of keys by integer values for the feature space
    return {val: i for i, val in enumerate(unique_elems)}

In [363]:
USER_AGE_LOOKUP = get_dictionary_lookup_by_tf_data_key('bucketized_user_age')
USER_AGE_DIM = len(USER_AGE_LOOKUP)

In [364]:
USER_AGE_LOOKUP

{1.0: 0, 35.0: 1, 45.0: 2, 18.0: 3, 50.0: 4, 56.0: 5, 25.0: 6}

In [365]:
USER_OCC_LOOKUP = get_dictionary_lookup_by_tf_data_key('user_occupation_text')
USER_OCC_DIM = len(USER_OCC_LOOKUP)

In [366]:
USER_OCC_LOOKUP

{b'artist': 0,
 b'student': 1,
 b'other': 2,
 b'librarian': 3,
 b'doctor': 4,
 b'engineer': 5,
 b'technician': 6,
 b'programmer': 7,
 b'scientist': 8,
 b'entertainment': 9,
 b'healthcare': 10,
 b'none': 11,
 b'educator': 12,
 b'administrator': 13,
 b'salesman': 14,
 b'retired': 15,
 b'marketing': 16,
 b'executive': 17,
 b'writer': 18,
 b'lawyer': 19,
 b'homemaker': 20}

In [367]:
MOVIE_GEN_LOOKUP = get_dictionary_lookup_by_tf_data_key('movie_genres')
MOVIE_GEN_DIM = len(MOVIE_GEN_LOOKUP)

In [368]:
MOVIE_GEN_LOOKUP

{0: 0,
 1: 1,
 2: 2,
 3: 3,
 4: 4,
 5: 5,
 6: 6,
 7: 7,
 8: 8,
 9: 9,
 10: 10,
 12: 11,
 13: 12,
 14: 13,
 15: 14,
 16: 15,
 17: 16,
 18: 17,
 19: 18}

### write data config for subsequent notebooks

In [369]:
config = f'''
USER_AGE_LOOKUP       = {USER_AGE_LOOKUP}
USER_AGE_DIM          = {USER_AGE_DIM}

USER_OCC_LOOKUP       = {USER_OCC_LOOKUP}
USER_OCC_DIM          = {USER_OCC_DIM}

MOVIE_GEN_LOOKUP      = {MOVIE_GEN_LOOKUP}
MOVIE_GEN_DIM         = {MOVIE_GEN_DIM}

MOVIELENS_NUM_MOVIES  = {MOVIELENS_NUM_MOVIES}
MOVIELENS_NUM_USERS   = {MOVIELENS_NUM_USERS}
'''
# TODO - cleanup
with open('src/per_arm_rl/data_config.py', 'w') as f:
    f.write(config)

In [370]:
# test_dict = USER_OCC_LOOKUP

In [371]:
# config = f"""
# USER_AGE_LOOKUP       = {USER_AGE_LOOKUP}
# USER_AGE_DIM          = {USER_AGE_DIM}

# USER_OCC_LOOKUP       = {USER_OCC_LOOKUP}
# USER_OCC_DIM          = {USER_OCC_DIM}

# MOVIE_GEN_LOOKUP      = {MOVIE_GEN_LOOKUP}
# MOVIE_GEN_DIM         = {MOVIE_GEN_DIM}

# MOVIELENS_NUM_MOVIES  = {MOVIELENS_NUM_MOVIES}
# MOVIELENS_NUM_USERS   = {MOVIELENS_NUM_USERS}
# """
# print(config)

In [372]:
# !echo '{config}' | gsutil cp - {BUCKET_URI}/data_stats/notebook_env.py

In [373]:
# !gsutil ls $BUCKET_URI

## Data Utils

In [374]:
REPO_DOCKER_PATH_PREFIX = 'src'
RL_SUB_DIR = 'per_arm_rl'

# ! rm -rf {REPO_DOCKER_PATH_PREFIX}/{RL_SUB_DIR}
# ! mkdir -p {REPO_DOCKER_PATH_PREFIX}/{RL_SUB_DIR}
# ! touch {REPO_DOCKER_PATH_PREFIX}/{RL_SUB_DIR}/__init__.py

In [375]:
# tmp
# USER_AGE_LOOKUP  # dict
# USER_OCC_LOOKUP  # dict
# MOVIE_GEN_LOOKUP # dict

In [386]:
%%writefile {REPO_DOCKER_PATH_PREFIX}/{RL_SUB_DIR}/data_utils.py
# Copyright 2021 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#            http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and

import os
import numpy as np
from typing import Dict

import tensorflow as tf


EMBEDDING_SIZE = 128

# ============================================
# features
# ============================================
# DEFAULT_FEATURE_MAP = {
#     # user - global context features
#     'user_id': tf.io.FixedLenSequenceFeature([], tf.string),
#     'user_rating': tf.io.FixedLenSequenceFeature([], tf.float32),
#     'bucketized_user_age': tf.io.FixedLenSequenceFeature([], tf.float32),
#     'user_occupation_text': tf.io.FixedLenSequenceFeature([], tf.string),
#     # 'user_occupation_label': tf.io.FixedLenSequenceFeature([], tf.int64),
#     'timestamp': tf.io.FixedLenSequenceFeature([], tf.int64),
#     # 'user_zip_code': tf.io.FixedLenSequenceFeature([], tf.string),
#     # 'user_gender': tf.io.FixedLenSequenceFeature([], tf.bool),
    
#     # movie - per arm features
#     'movie_id': tf.io.FixedLenSequenceFeature([], tf.string),
#     'movie_title': tf.io.FixedLenSequenceFeature([], tf.string),
#     'movie_genres': tf.io.FixedLenSequenceFeature([], tf.int64),
# }

def get_all_features():
    
    feats = {
        # user - global context features
        'user_id': tf.io.FixedLenFeature(shape=(), dtype=tf.string),
        'user_rating': tf.io.FixedLenFeature(shape=(), dtype=tf.float32),
        'bucketized_user_age': tf.io.FixedLenFeature(shape=(), dtype=tf.float32),
        'user_occupation_text': tf.io.FixedLenFeature(shape=(), dtype=tf.string),
        # 'user_occupation_label': tf.io.FixedLenFeature(shape=(), dtype=tf.int64),
        'timestamp': tf.io.FixedLenFeature(shape=(), dtype=tf.int64),
        # 'user_zip_code': tf.io.FixedLenFeature(shape=(), dtype=tf.string),
        # 'user_gender': tf.io.FixedLenFeature(shape=(), dtype=tf.bool),

        # movie - per arm features
        'movie_id': tf.io.FixedLenFeature(shape=(), dtype=tf.string),
        # 'movie_title': tf.io.FixedLenFeature(shape=(), dtype=tf.string),
        'movie_genres': tf.io.FixedLenFeature(shape=(1,), dtype=tf.int64),
    }
    
    return feats 

# ============================================
# tf data parsing functions
# ============================================
options = tf.data.Options()
options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA

def parse_tfrecord(example):
    """
    Reads a serialized example from GCS and converts to tfrecord
    """
    feats = get_all_features()
    
    # example = tf.io.parse_single_example(
    example = tf.io.parse_example(
        example,
        feats
        # features=feats
    )
    return example

# data loading and parsing
def full_parse(data):
    # used for interleave - takes tensors and returns a tf.dataset
    data = tf.data.TFRecordDataset(data)
    return data

# ============================================
# Helper function for TF lookup dictionary
# ============================================

def get_dictionary_lookup_by_tf_data_key(key, dataset) -> Dict:
    tensor = dataset.map(lambda x: x[key])
    unique_elems = set()
    for x in tensor:
        val = x.numpy()
        if type(val) is np.ndarray: # if multi dimesnional only grab first one
            val = val[0]
        unique_elems.add(val)
    
    #return a dictionary of keys by integer values for the feature space
    return {val: i for i, val in enumerate(unique_elems)}

# ============================================
# load movielens
# ============================================
def load_movielens_ratings(
    ratings_dataset
    , num_users: int
    , num_movies: int
    , user_age_lookup_dict: dict
    , user_occ_lookup_dict: dict
    , movie_gen_lookup_dict: dict
):
    """
    > loads (wide) movielens ratings data 
    > returns ratings matrix
    """
    # ratings = tfds.load("movielens/100k-ratings", split="train")
    ratings_matrix = np.zeros([num_users, num_movies])
    
    local_data = ratings_dataset.map(
        lambda x: {
            'user_id': x['user_id']
            ,'movie_id':  x['movie_id']
            ,'user_rating':  x['user_rating']
            ,'bucketized_user_age': x['bucketized_user_age']
            ,'user_occupation_text': x['user_occupation_text']
            ,'movie_genres': x['movie_genres'][0]
        }
    )
    user_age_int = []
    user_occ_int = []
    mov_gen_int = []
    
    for row in local_data:
        ratings_matrix[
            int(row['user_id'].numpy()) - 1
            , int(row['movie_id'].numpy()) - 1
        ] = float(row['user_rating'].numpy())
        
        user_age_int.append(
            float(user_age_lookup_dict[row['bucketized_user_age'].numpy()]) + .0001
        )
        user_occ_int.append(
            float(user_occ_lookup_dict[row['user_occupation_text'].numpy()]) + .0001
        )
        mov_gen_int.append(
            float(movie_gen_lookup_dict[row['movie_genres'].numpy()]) + .0001
        ) 
    return ratings_matrix, np.array(user_age_int), np.array(user_occ_int), np.array(mov_gen_int)

Overwriting src/per_arm_rl/data_utils.py


### validate creating ratings matrix

In [377]:
data_config.USER_AGE_LOOKUP

{1.0: 0, 35.0: 1, 45.0: 2, 18.0: 3, 50.0: 4, 56.0: 5, 25.0: 6}

In [378]:
data_config.USER_OCC_LOOKUP

{b'artist': 0,
 b'student': 1,
 b'other': 2,
 b'librarian': 3,
 b'doctor': 4,
 b'engineer': 5,
 b'technician': 6,
 b'programmer': 7,
 b'scientist': 8,
 b'entertainment': 9,
 b'healthcare': 10,
 b'none': 11,
 b'educator': 12,
 b'administrator': 13,
 b'salesman': 14,
 b'retired': 15,
 b'marketing': 16,
 b'executive': 17,
 b'writer': 18,
 b'lawyer': 19,
 b'homemaker': 20}

In [379]:
data_config.MOVIE_GEN_LOOKUP

{0: 0,
 1: 1,
 2: 2,
 3: 3,
 4: 4,
 5: 5,
 6: 6,
 7: 7,
 8: 8,
 9: 9,
 10: 10,
 12: 11,
 13: 12,
 14: 13,
 15: 14,
 16: 15,
 17: 16,
 18: 17,
 19: 18}

In [380]:
# TODO - defined later -fix
from src.per_arm_rl import data_utils_v2 as data_utils
from src.per_arm_rl import data_config

In [381]:
train_files = []
for blob in storage_client.list_blobs(f"{BUCKET_NAME}", prefix=f'{DATA_GCS_PREFIX}/'):
    if '.tfrecord' in blob.name:
        train_files.append(blob.public_url.replace("https://storage.googleapis.com/", "gs://"))
        
train_files

['gs://mabv1-hybrid-vertex-bucket/data/ml-ratings-100k-train.tfrecord']

In [382]:
train_dataset = tf.data.TFRecordDataset(train_files)

train_dataset

<TFRecordDatasetV2 element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>

In [383]:
train_dataset = train_dataset.map(data_utils.parse_tfrecord)

# train_dataset = tf.data.Dataset.from_tensor_slices(train_files).prefetch(
#     tf.data.AUTOTUNE,
# )
train_dataset

<_MapDataset element_spec={'bucketized_user_age': TensorSpec(shape=(), dtype=tf.float32, name=None), 'movie_genres': TensorSpec(shape=(1,), dtype=tf.int64, name=None), 'movie_id': TensorSpec(shape=(), dtype=tf.string, name=None), 'timestamp': TensorSpec(shape=(), dtype=tf.int64, name=None), 'user_id': TensorSpec(shape=(), dtype=tf.string, name=None), 'user_occupation_text': TensorSpec(shape=(), dtype=tf.string, name=None), 'user_rating': TensorSpec(shape=(), dtype=tf.float32, name=None)}>

In [384]:
for x in train_dataset.batch(1).take(2):
    pprint(x)

{'bucketized_user_age': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([45.], dtype=float32)>,
 'movie_genres': <tf.Tensor: shape=(1, 1), dtype=int64, numpy=array([[7]])>,
 'movie_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'357'], dtype=object)>,
 'timestamp': <tf.Tensor: shape=(1,), dtype=int64, numpy=array([879024327])>,
 'user_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'138'], dtype=object)>,
 'user_occupation_text': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'doctor'], dtype=object)>,
 'user_rating': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([4.], dtype=float32)>}
{'bucketized_user_age': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([25.], dtype=float32)>,
 'movie_genres': <tf.Tensor: shape=(1, 1), dtype=int64, numpy=array([[4]])>,
 'movie_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'709'], dtype=object)>,
 'timestamp': <tf.Tensor: shape=(1,), dtype=int64, numpy=array([875654590])>,
 'user_id': <tf.Tensor: shape=(1

In [385]:
# # TODO - defined later -fix
# from src.per_arm_rl import data_utils_v2 as data_utils
# from src.per_arm_rl import data_config

test_dataset_load = data_utils.load_movielens_ratings(
    ratings_dataset = train_dataset
    , num_users = data_config.MOVIELENS_NUM_USERS
    , num_movies = data_config.MOVIELENS_NUM_MOVIES
    , user_age_lookup_dict = data_config.USER_AGE_LOOKUP
    , user_occ_lookup_dict = data_config.USER_OCC_LOOKUP
    , movie_gen_lookup_dict = data_config.MOVIE_GEN_LOOKUP
)

test_dataset_load

(array([[5., 3., 4., ..., 0., 0., 0.],
        [4., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [5., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 5., 0., ..., 0., 0., 0.]]),
 array([2.0001, 6.0001, 3.0001, ..., 3.0001, 1.0001, 3.0001]),
 array([ 4.0001,  9.0001,  1.0001, ...,  1.0001, 18.0001,  1.0001]),
 array([7.00010e+00, 4.00010e+00, 4.00010e+00, ..., 1.00001e+01,
        1.00000e-04, 4.00010e+00]))

In [389]:
ratings_matrix = test_dataset_load[0]
print(ratings_matrix.shape)
ratings_matrix

(943, 1682)


array([[5., 3., 4., ..., 0., 0., 0.],
       [4., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [5., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 5., 0., ..., 0., 0., 0.]])

# Vocab Generation - TODO

following [this internal example](https://source.corp.google.com/piper///depot/google3/commerce/delivery/recommendation/shop_rl/python/topk_off_policy_reinforce/dataset_analysis.ipynb)
* see [data_utils.py](https://source.corp.google.com/piper///depot/google3/commerce/delivery/recommendation/shop_rl/python/topk_off_policy_reinforce/data_utils.py)

In [None]:
FEATURE_ACTION = 'movie_id'
FEATURE_REWARD = 'user_rating'

def process_example(example_proto):
    """
    Returns a dataset of actions for each example.
    """
    _, sequence_feature = tf.io.parse_single_sequence_example(
        example_proto
        , sequence_features={
            FEATURE_ACTION:
                tf.io.FixedLenSequenceFeature([], tf.int64, default_value=None)
            , FEATURE_REWARD:
                tf.io.FixedLenSequenceFeature([], tf.int64, default_value=None)
        }
    )
    actions = sequence_feature[FEATURE_ACTION]
    rewards = sequence_feature[FEATURE_REWARD]
    
    return actions, rewards

In [None]:
# %%time

dataset_files = tf.io.gfile.glob(dataset_path)
dataset_files

In [None]:
example_dataset = tf.data.RecordIODataset(dataset_files)
example_dataset

In [None]:
example_dataset = example_dataset.map(
    process_example
    , num_parallel_calls=tf.data.experimental.AUTOTUNE
)
example_dataset

In [None]:
max_items_to_process = 1000000
num_elements = 0
sequence_lengths = []
actions = []
rewards = []

start_time = time.time()
for elem in example_dataset.as_numpy_iterator():
    action, reward = elem
    sequence_lengths.append(len(action))
    actions.append(action)
    rewards.append(reward)
    num_elements += 1
    if num_elements % 10000 == 0:
        print(num_elements)
    if num_elements > max_items_to_process:
        break

print('Num sequences = ', num_elements)

In [None]:
# A function which processes a TF Example and returns a nested tensor containing
# the data extracted from the Example proto.
ProcessExampleFnType = Callable[[types.Tensor], types.NestedTensor]
FilterExampleFnType = Callable[[types.NestedTensor, types.Tensor], types.Tensor]

DEFAULT_FEATURE_MAP = {
    'user_rating':
        tf.io.FixedLenSequenceFeature([], tf.float32),
    
    # The id of an item. This is what we use as the action. 
    # movie_id's from previous time steps are also used as
    # an observation. The movie_id is unique for each item/movie
    'movie_id':
        tf.io.FixedLenSequenceFeature([], tf.string),
    
    # The title
    'movie_title':
        tf.io.FixedLenSequenceFeature([], tf.string),
    
    # 'movie_genres':
    #     tf.io.FixedLenSequenceFeature([], tf.int64),
    
    'user_id':
        tf.io.FixedLenSequenceFeature([], tf.string),
    
    # user's occupation - encoded label
    'user_occupation_label':
        tf.io.FixedLenSequenceFeature([], tf.int64),
    
    # the timestamp of the ratings, 
    # represented in seconds since midnight UTC of 01/01/1970
    # 'timestamp':
    #     tf.io.FixedLenSequenceFeature([], tf.int64),
}

# Generate Off Policy data

In [None]:
def generate_simulation_data(
    raw_data_path: str
    , batch_size: int
    , rank_k: int
    , num_actions: int
    , driver_steps: int
) -> replay_buffers.TFUniformReplayBuffer:
    """
    Generates `trajectories.Trajectory` data from the simulation environment.

    Constructs a MovieLens simulation environment, and generates a set of
    `trajectories.Trajectory` data using a random policy.

    Args:
      raw_data_path: Path to MovieLens 100K's "u.data" file.
      batch_size: Batch size of environment generated quantities eg. rewards.
      rank_k: Rank for matrix factorization in the MovieLens environment; also
        the observation dimension.
      num_actions: Number of actions (movie items) to choose from.
      driver_steps: Number of steps to run per batch.

    Returns:
      A replay buffer holding randomly generated`trajectories.Trajectory` data.
    """
    # Create MovieLens simulation environment.
    # env = movielens_py_environment.MovieLensPyEnvironment(
    #     raw_data_path,
    #     rank_k,
    #     batch_size,
    #     num_movies=num_actions,
    #     csv_delimiter="\t"
    # )
    # environment = tf_py_environment.TFPyEnvironment(env)
    
    env = movielens_per_arm_py_environment.MovieLensPerArmPyEnvironment(
        data_dir = raw_data_path
        , rank_k = rank_k
        , batch_size = batch_size
        , num_actions = num_actions
        , csv_delimiter = "\t"
    )

    environment = tf_py_environment.TFPyEnvironment(env)

    # Define random policy for collecting data.
    random_policy = random_tf_policy.RandomTFPolicy(
        action_spec=environment.action_spec()
        , time_step_spec=environment.time_step_spec()
    )

    # Use replay buffer and observers to keep track of Trajectory data.
    data_spec = random_policy.trajectory_spec
    replay_buffer = trainer._get_replay_buffer(
        data_spec
        , environment.batch_size
        , driver_steps
        , 1
    )
    observers = [replay_buffer.add_batch]

    # Run driver to apply the random policy in the simulation environment.
    driver = dynamic_step_driver.DynamicStepDriver(
        env=environment
        , policy=random_policy
        , num_steps=driver_steps * environment.batch_size
        , observers=observers
    )
    driver.run()

    return replay_buffer