In [1]:
# !pip install tf-agents --user -q

In [18]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [19]:
import functools
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

from tf_agents.bandits.agents import lin_ucb_agent
from tf_agents.bandits.environments import stationary_stochastic_per_arm_py_environment as p_a_env
from tf_agents.bandits.metrics import tf_metrics as tf_bandit_metrics
from tf_agents.drivers import dynamic_step_driver
from tf_agents.environments import tf_py_environment
from tf_agents.replay_buffers import tf_uniform_replay_buffer
from tf_agents.specs import tensor_spec
from tf_agents.trajectories import time_step as ts
import tensorflow_datasets as tfds
from pprint import pprint

nest = tf.nest

### movies data

In [20]:
movies = tfds.load("movielens/100k-movies", split="train")

for x in movies.batch(1).take(1):
    pprint(x)

{'movie_genres': <tf.Tensor: shape=(1, 1), dtype=int64, numpy=array([[4]])>,
 'movie_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'1681'], dtype=object)>,
 'movie_title': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'You So Crazy (1994)'], dtype=object)>}


### user and ratings data

In [21]:
ratings = tfds.load("movielens/100k-ratings", split="train")

for x in ratings.batch(1).take(1):
    pprint(x)

{'bucketized_user_age': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([45.], dtype=float32)>,
 'movie_genres': <tf.Tensor: shape=(1, 1), dtype=int64, numpy=array([[7]])>,
 'movie_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'357'], dtype=object)>,
 'movie_title': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b"One Flew Over the Cuckoo's Nest (1975)"], dtype=object)>,
 'raw_user_age': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([46.], dtype=float32)>,
 'timestamp': <tf.Tensor: shape=(1,), dtype=int64, numpy=array([879024327])>,
 'user_gender': <tf.Tensor: shape=(1,), dtype=bool, numpy=array([ True])>,
 'user_id': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'138'], dtype=object)>,
 'user_occupation_label': <tf.Tensor: shape=(1,), dtype=int64, numpy=array([4])>,
 'user_occupation_text': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'doctor'], dtype=object)>,
 'user_rating': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([4.], dtype=float32)>,

#### Let's make this simple and load up movielens that has features
We will only consider for this example
1) The movie genere as an Arm feature (we will concatenate multiple genres)
2) The user occupation and age bucket labels for the overall context features

We need to load the data, get the ratings - light EDA for us to get cardnality of the dataset as well as lookups for the 

In [22]:
# Get the unique movies and users
unique_movie_ids = ratings.map(lambda x: x["movie_id"])
unique_movie_ids = np.unique([x.numpy() for x in unique_movie_ids])
MOVIELENS_NUM_MOVIES = len(unique_movie_ids)


print(f"len(unique_movie_ids) : {len(unique_movie_ids)}")
print(f"unique_movie_ids      : {unique_movie_ids[:2]}")

len(unique_movie_ids) : 1682
unique_movie_ids      : [b'1' b'10']


In [23]:
unique_user_ids = ratings.map(lambda x: x["user_id"])
unique_user_ids = np.unique([x.numpy() for x in unique_user_ids])
MOVIELENS_NUM_USERS = len(unique_user_ids)


print(f"len(unique_user_ids) : {len(unique_user_ids)}")
print(f"unique_user_ids      : {unique_user_ids[:2]}")

len(unique_user_ids) : 943
unique_user_ids      : [b'1' b'10']


In [24]:
## Get the unnique set of user buckets and create a lookup table

In [25]:
from typing import Dict

def get_dictionary_lookup_by_tf_data_key(key: str) -> Dict:
    tensor = ratings.map(lambda x: x[key])
    unique_elems = set()
    for x in tensor:
        val = x.numpy()
        if type(val) is np.ndarray: # if multi dimesnional only grab first one
            val = val[0]
        unique_elems.add(val)
    
    #return a dictionary of keys by integer values for the feature space
    return {val: i for i, val in enumerate(unique_elems)}


In [26]:
user_age_lookup = get_dictionary_lookup_by_tf_data_key('bucketized_user_age')
user_age_dim = len(user_age_lookup)

In [27]:
user_age_lookup

{1.0: 0, 35.0: 1, 45.0: 2, 18.0: 3, 50.0: 4, 56.0: 5, 25.0: 6}

In [28]:
user_occ_lookup = get_dictionary_lookup_by_tf_data_key('user_occupation_text')
user_occ_dim = len(user_occ_lookup)

In [29]:
user_occ_lookup

{b'entertainment': 0,
 b'salesman': 1,
 b'homemaker': 2,
 b'librarian': 3,
 b'administrator': 4,
 b'other': 5,
 b'none': 6,
 b'student': 7,
 b'executive': 8,
 b'scientist': 9,
 b'artist': 10,
 b'doctor': 11,
 b'engineer': 12,
 b'educator': 13,
 b'programmer': 14,
 b'writer': 15,
 b'technician': 16,
 b'marketing': 17,
 b'healthcare': 18,
 b'lawyer': 19,
 b'retired': 20}

In [30]:
movie_gen_lookup = get_dictionary_lookup_by_tf_data_key('movie_genres')
movie_gen_dim = len(movie_gen_lookup)

In [31]:
movie_gen_lookup

{0: 0,
 1: 1,
 2: 2,
 3: 3,
 4: 4,
 5: 5,
 6: 6,
 7: 7,
 8: 8,
 9: 9,
 10: 10,
 12: 11,
 13: 12,
 14: 13,
 15: 14,
 16: 15,
 17: 16,
 18: 17,
 19: 18}

In [72]:

 #from https://github.com/tensorflow/agents/blob/master/tf_agents/bandits/environments/dataset_utilities.py#L153
    
# def load_movielens_data(data_file, delimiter=','):
#     """Loads the movielens data and returns the ratings matrix."""
#     ratings_matrix = np.zeros([MOVIELENS_NUM_USERS, MOVIELENS_NUM_MOVIES])
#     with tf.io.gfile.GFile(data_file, 'r') as infile:
#     # The file is a csv with rows containing:
#     # user id | item id | rating | timestamp
#     reader = csv.reader(infile, delimiter=delimiter)
#     for row in reader:
#         user_id, item_id, rating, _ = row
#         ratings_matrix[int(user_id) - 1, int(item_id) - 1] = float(rating)
#     return ratings_matrix



def load_movielens_data(ratings_dataset):
    # ratings = tfds.load("movielens/100k-ratings", split="train")
    ratings_matrix = np.zeros([MOVIELENS_NUM_USERS, MOVIELENS_NUM_MOVIES])
    local_data = ratings_dataset.map(lambda x: {'user_id': x['user_id']
                                                 ,'movie_id':  x['movie_id']
                                                 ,'user_rating':  x['user_rating']
                                                 ,'bucketized_user_age': x['bucketized_user_age']
                                                 ,'user_occupation_text': x['user_occupation_text']
                                                 ,'movie_genres': x['movie_genres'][0]
                                               }
                                                                         )
    user_age_int = []
    user_occ_int = []
    mov_gen_int = []
    for row in local_data:
        ratings_matrix[int(row['user_id'].numpy()) - 1, int(row['movie_id'].numpy()) - 1] = float(row['user_rating'].numpy())
        user_age_int.append(user_age_lookup[row['bucketized_user_age'].numpy()])
        user_occ_int.append(user_occ_lookup[row['user_occupation_text'].numpy()])
        mov_gen_int.append(movie_gen_lookup[row['movie_genres'].numpy()])
    return ratings_matrix, np.array(user_age_int), np.array(user_occ_int), np.array(mov_gen_int)
    

In [73]:
ratings_matrix, user_age_int, user_occ_int, mov_gen_int = load_movielens_data(ratings)

In [74]:
from tf_agents.bandits.specs import utils as bandit_spec_utils

## Replicate an agent using the above data

https://github.com/tensorflow/agents/blob/master/tf_agents/bandits/environments/movielens_per_arm_py_environment.py

Create an arm spec from this utility function
https://www.tensorflow.org/agents/api_docs/python/tf_agents/specs/bandit_spec_utils/create_per_arm_observation_spec

In [75]:
# Example observation spec from above
# There are 20 user occupations and 7 age buckets. This makes our global dimension 27
# There are 19 genres, and that will be the arm dimension for this example

from tf_agents.specs.bandit_spec_utils import create_per_arm_observation_spec as create_obs_spec
create_obs_spec(
    global_dim = 1,
    per_arm_dim = 2,
    max_num_actions = 10,
    add_num_actions_feature = False
) 

{'global': TensorSpec(shape=(1,), dtype=tf.float32, name=None),
 'per_arm': TensorSpec(shape=(10, 2), dtype=tf.float32, name=None)}

In [76]:
ratings.cardinality().numpy()

100000

In [86]:
"""Class implementation of the per-arm MovieLens Bandit environment."""
from __future__ import absolute_import

import random
from typing import Optional, Text
import gin
import numpy as np

from tf_agents.bandits.environments import bandit_py_environment
from tf_agents.bandits.environments import dataset_utilities
from tf_agents.bandits.specs import utils as bandit_spec_utils
from tf_agents.specs import array_spec
from tf_agents.trajectories import time_step as ts


GLOBAL_KEY = bandit_spec_utils.GLOBAL_FEATURE_KEY
PER_ARM_KEY = bandit_spec_utils.PER_ARM_FEATURE_KEY


# @gin.configurable
class MovieLensPerArmPyEnvironment(bandit_py_environment.BanditPyEnvironment):
    """Implements the per-arm version of the MovieLens Bandit environment.

    This environment implements the MovieLens 100K dataset, available at:
    https://www.kaggle.com/prajitdatta/movielens-100k-dataset

    This dataset contains 100K ratings from 943 users on 1682 items.
    This csv list of:
    user id | item id | rating | timestamp.
    This environment computes a low-rank matrix factorization (using SVD) of the
    data matrix `A`, such that: `A ~= U * Sigma * V^T`.

    The environment uses the rows of `U` as global (or user) features, and the
    rows of `V` as per-arm (or movie) features.

    The reward of recommending movie `v` to user `u` is `u * Sigma * v^T`.
    """

    def __init__(self,
               dataset = ratings,
               rank_k: int = 2,
               user_feature_dim: int = 2,
               movie_feature_dim: int = 2,
               batch_size: int = 10,
               num_actions: int = 100,
               name: Optional[Text] = 'movielens_per_arm'):
        """Initializes the Per-arm MovieLens Bandit environment.

        Args:
          data_dir: (string) Directory where the data lies (in text form).
          rank_k : (int) Which rank to use in the matrix factorization. This will
            also be the feature dimension of both the user and the movie features.
          batch_size: (int) Number of observations generated per call.
          num_actions: (int) How many movies to choose from per round.
          csv_delimiter: (string) The delimiter to use in loading the data csv file.
          name: (string) The name of this environment instance.
        """
        self._batch_size = batch_size
        self._movie_context_dim = movie_feature_dim
        self._user_context_dim = user_feature_dim
        self._num_actions = num_actions

        # Compute the matrix factorization.
        # self._data_matrix = dataset_utilities.load_movielens_data(
        #     data_dir, delimiter=csv_delimiter)

        self._data_matrix, self._user_age_int, self._user_occ_int, self._mov_gen_int = load_movielens_data(ratings)
        self._num_users, self._num_movies = self._data_matrix.shape

        # Compute the SVD.
        u, s, vh = np.linalg.svd(self._data_matrix, full_matrices=False)

        # Keep only the largest singular values.
        self._u_hat = u[:, :rank_k].astype(np.float32)
        self._s_hat = s[:rank_k].astype(np.float32)
        self._v_hat = np.transpose(vh[:rank_k]).astype(np.float32)

        self._approx_ratings_matrix = np.matmul(self._u_hat * self._s_hat,
                                                np.transpose(self._v_hat))

        self._action_spec = array_spec.BoundedArraySpec(
            shape=(),
            dtype=np.int32,
            minimum=0,
            maximum=num_actions - 1,
            name='action')
        observation_spec = {
            GLOBAL_KEY:
                array_spec.ArraySpec(shape=[user_feature_dim], dtype=np.float32),
            PER_ARM_KEY:
                array_spec.ArraySpec(
                    shape=[num_actions, movie_feature_dim+1], dtype=np.float32),
        }
        self._time_step_spec = ts.time_step_spec(observation_spec)

        self._current_user_indices = np.zeros(batch_size, dtype=np.int32)
        self._previous_user_indices = np.zeros(batch_size, dtype=np.int32)

        self._current_movie_indices = np.zeros([batch_size, num_actions],
                                               dtype=np.int32)
        self._previous_movie_indices = np.zeros([batch_size, num_actions],
                                                dtype=np.int32)

        self._observation = {
            GLOBAL_KEY:
                np.zeros([batch_size, user_feature_dim]),
            PER_ARM_KEY:
                np.zeros([batch_size, num_actions, movie_feature_dim+1]),
        }

        super(MovieLensPerArmPyEnvironment, self).__init__(
            observation_spec, self._action_spec, name=name)

    @property
    def batch_size(self):
        return self._batch_size

    @property
    def batched(self):
        return True

    def _observe(self):
        sampled_user_indices = np.random.randint(
            self._num_users, size=self._batch_size)
        self._previous_user_indices = self._current_user_indices
        self._current_user_indices = sampled_user_indices

        sampled_movie_indices = np.array([
            random.sample(range(self._num_movies), self._num_actions)
            for _ in range(self._batch_size)
        ])
        
        movie_index_vector = sampled_movie_indices.reshape(-1)
        print(movie_index_vector.shape)
        flat_genre_list = self._mov_gen_int[movie_index_vector] #shape of 1
        flat_movie_list = self._v_hat[movie_index_vector] #shape of 2
        combined_movie_features = np.concatenate((flat_movie_list,flat_genre_list), axis=0)
        current_movies = combined_movie_features.reshape(
            [self._batch_size, self._num_actions, self._movie_context_dim+1])

        self._previous_movie_indices = self._current_movie_indices
        self._current_movie_indices = sampled_movie_indices

        batched_observations = {
            GLOBAL_KEY:
                self._u_hat[sampled_user_indices],
            PER_ARM_KEY:
                current_movies,
        }
        return batched_observations

    def _apply_action(self, action):
        chosen_arm_indices = self._current_movie_indices[range(self._batch_size),
                                                         action]
        return self._approx_ratings_matrix[self._current_user_indices,
                                           chosen_arm_indices]

    def _rewards_for_all_actions(self):
        rewards_matrix = self._approx_ratings_matrix[
            np.expand_dims(self._previous_user_indices, axis=-1),
            self._previous_movie_indices]
        return rewards_matrix

    def compute_optimal_action(self):
        return np.argmax(self._rewards_for_all_actions(), axis=-1)

    def compute_optimal_reward(self):
        return np.max(self._rewards_for_all_actions(), axis=-1)

In [87]:
my_ml_env = MovieLensPerArmPyEnvironment()

In [88]:
print('observation spec: ', my_ml_env.observation_spec())
print('\nAn observation: ', my_ml_env.reset().observation)

observation spec:  {'global': ArraySpec(shape=(2,), dtype=dtype('float32'), name=None), 'per_arm': ArraySpec(shape=(100, 3), dtype=dtype('float32'), name=None)}
(1000,)


ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)

In [55]:
my_ml_env._v_hat.shape

(1682, 2)

In [59]:
sampled_movie_indices = np.array([
            random.sample(range(my_ml_env._num_movies), my_ml_env._num_actions)
            for _ in range(my_ml_env._batch_size)
        ])
movie_index_vector = sampled_movie_indices.reshape(-1)

In [69]:
flat_genre_list = my_ml_env._mov_gen_int[movie_index_vector] #shape of 1

TypeError: only integer scalar arrays can be converted to a scalar index

In [65]:
flat_genre_list

4

In [68]:
genre_array = np.array(my_ml_env._mov_gen_int)

In [70]:
flat_genre_list = genre_array[movie_index_vector] #shape of 1

In [71]:
flat_genre_list

array([ 4,  3,  7, 18,  0,  0,  7,  4,  7,  4,  0, 14,  0,  7,  9,  7,  1,
        4,  7,  0,  4,  7,  7, 14,  5,  7,  7, 12, 11,  0,  0,  0,  7,  2,
        0,  1,  7,  0,  0,  7,  7,  0,  7,  7,  4,  0,  0,  0,  7,  7,  0,
        0,  0,  7,  7,  4,  7, 14,  4,  5,  0,  0,  2,  7,  1,  7,  0,  0,
        7,  4,  7,  0, 10,  0,  0,  7,  7,  4,  7,  7,  7,  4,  7,  7,  7,
        7,  0,  7,  5,  7,  4,  0,  7,  0,  0,  7,  0,  0,  7,  7,  4,  5,
        1,  1,  4,  4,  2,  7,  0,  4, 10,  6, 14, 14, 12,  4,  7,  1,  5,
        0,  0,  0,  0,  2,  0,  7,  7,  0,  7,  4,  7,  4,  0,  0,  7,  7,
       15,  7,  7,  7,  0,  7,  0,  4,  7,  4,  4,  7,  4,  0, 10,  4,  4,
        0,  4,  0,  0,  0,  4, 12,  7,  4,  4, 14,  7,  2,  0,  1,  0,  5,
        1,  7,  4,  4,  0, 11,  4,  0,  7,  1,  0,  4,  4,  7,  0,  3,  7,
        7, 11,  7,  4,  0,  7,  4, 10,  7,  1,  7,  4,  7,  0,  7,  0,  5,
        7,  4,  1,  4,  0,  0,  7,  0,  1,  7,  4,  7,  0,  7,  7,  4,  7,
        0,  7,  4,  7, 10