In [None]:
import collections
import torch
import pandas as pd
import numpy as np
import random
import scipy.sparse as sp

Reference: https://github.com/rail-berkeley/rlkit/tree/master/rlkit/exploration_strategies

In [None]:
class GaussianStrategy():
  def __init__(self, max_sigma=1.0, min_sigma=None, decay_period=10):
    self._max_sigma = max_sigma
    if min_sigma is None:
        min_sigma = max_sigma
    self._min_sigma = min_sigma
    self._decay_period = decay_period

  # add gaussian noise to action
  def get_action(self, action, t=None):
    sigma = (self._max_sigma - (self._max_sigma - self._min_sigma) * min(1.0, t * 1.0 / self._decay_period))
    return torch.Tensor([np.clip(action.detach().numpy() + np.random.normal(size=len(action)) * sigma, -1.0, 1.0)]).float()

In [None]:
class OUStrategy():
  def __init__(self, action_dim, mu=0.0, theta=0.15, max_sigma=0.4, min_sigma=0.4, decay_period=10):
    self.mu = mu
    self.theta = theta
    self.sigma = max_sigma
    self.max_sigma = max_sigma
    self.min_sigma = min_sigma
    self.decay_period = decay_period
    self.action_dim = action_dim
    self.reset()

  def reset(self):
    self.state = np.ones(self.action_dim) * self.mu

  def evolve_state(self):
    x  = self.state
    dx = self.theta * (self.mu - x) + self.sigma * np.random.randn(self.action_dim)
    self.state = x + dx
    return self.state

  def get_action(self, action, t=0):
    ou_state = self.evolve_state()
    self.sigma = self.max_sigma - (self.max_sigma - self.min_sigma) * min(1.0, t / self.decay_period)
    return torch.tensor([action.detach().numpy() + ou_state]).float()

In [None]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, positive_interactions, num_of_books, interaction_matrix, negative_interactions=99):
      super(Dataset, self).__init__()
      self.positive_interactions = positive_interactions
      self.num_of_books = num_of_books
      self.interaction_matrix = interaction_matrix
      self.negative_interactions = negative_interactions

      self.reset()

    def reset(self):
      data = self.create_data()
      labels = np.zeros(len(self.positive_interactions) * (1 + self.negative_interactions))
      labels[::1+self.negative_interactions] = 1
      self.data = np.concatenate([np.array(data), np.array(labels)[:, np.newaxis]], axis=1)

    def create_data(self):
      data = []
      for user, positive in self.positive_interactions:
        data.append([user, positive])
        for i in range(self.negative_interactions):
          negative = np.random.randint(self.num_of_books)
          # if user has rated the book, find another book which hasn't been rated by this user
          while (user, negative) in self.interaction_matrix:
              negative = np.random.randint(self.num_of_books)
          data.append([user, negative])
      return data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
      user, book, label = self.data[idx]
      output = {'user': user, 'book': book, 'label': np.float32(label)}

      return output

In [None]:
def filter_data(df):
    # filter for books that have a rating of 7 or higher (some users for these books will be lost since they rated it lower than 6)
    books_w_rating_gt_6 = df[df.rating > 6]

    # take users who have rated 100-500 books and rating>6 due to RAM limitations
    grouped = books_w_rating_gt_6.groupby(['user_id'])['isbn'].count()
    user_ratings = grouped[(grouped>=100) & (grouped<=500)].index.tolist()
    filtered_df = books_w_rating_gt_6[books_w_rating_gt_6['user_id'].isin(user_ratings)]

    # map user_id and isbn to contiguous values to avoid OOM issues
    filtered_df['user'] = pd.factorize(filtered_df['user_id'])[0] + 1
    filtered_df['book'] = pd.factorize(filtered_df['isbn'])[0] + 1

    return filtered_df

In [None]:
def preprocess_data(filtered_df):
    data = filtered_df[['user', 'book']]

    num_of_users = data['user'].max() + 1
    num_of_books = data['book'].max() + 1

    # train-test split
    train_data = data.sample(frac=0.9, random_state=101)
    test_data = data.drop(train_data.index)

    # List of list [[user_id, isbn], [], [] ..]
    train_data_list = train_data.values.tolist()
    test_data_list = test_data.values.tolist()

    train_mat = collections.defaultdict(int)
    test_mat = collections.defaultdict(int)

    for user, book in train_data_list:
        train_mat[user, book] = 1.0
    for user, book in test_data_list:
        test_mat[user, book] = 1.0

    # create sparse matrix for user-book interaction
    train_matrix = sp.dok_matrix((num_of_users, num_of_books), dtype=np.float16)
    dict.update(train_matrix, train_mat)
    test_matrix = sp.dok_matrix((num_of_users, num_of_books), dtype=np.float16)
    dict.update(test_matrix, test_mat)

    # apt users are those who have rated 137-185 books
    apt_users = np.arange(num_of_users).reshape(-1, 1)[((train_matrix.sum(1) >= 137) & (train_matrix.sum(1) <= 185))]

    return (train_data, train_matrix, test_data, test_matrix, num_of_users, num_of_books, apt_users)

In [None]:
def process_user_features(user_features_df, filtered_df):
  # use required features only and map to user
  user_features_df = user_features_df[['user_id', 'age', 'country']]
  df = filtered_df[['user_id', 'user']].join(user_features_df.set_index('user_id'), how='left', on='user_id')

  # Categorical Encoding for 'Country'
  unique_countries = df['country'].unique()
  num_of_countries = unique_countries.shape[0]
  mapping_country = {country: index for index, country in enumerate(unique_countries)}
  df['country_encoded'] = df['country'].map(mapping_country)

  # Quantile-based bucketing for 'Age'
  num_of_age_buckets = 5  # Choose the desired number of age buckets
  df['age_encoded'] = pd.qcut(df['age'], q=num_of_age_buckets, labels=False, duplicates='drop')

  return (df, num_of_countries, num_of_age_buckets)

In [None]:
def get_user_features(user, user_features_df):
  user_row = user_features_df[user_features_df['user'] == user].iloc[0]
  country = user_row.country_encoded
  age = user_row.age_encoded

  return (country, age)

In [None]:
def hit_metric(recommended, actual, label, df):
  related_books = [a for idx, a in enumerate(actual) if label[idx]]

  for rec in recommended:
    book = df[df.book == rec]
    if len(book) > 0:
      book = book.iloc[0]
      if book.best_seller:
        return True
    if rec in related_books:
      return True

  return False

def dcg_metric(recommended, actual, label, df):
  related_books = [a for idx, a in enumerate(actual) if label[idx]]
  flag = False

  for rec in recommended:
    book = df[df.book == rec]
    if len(book) > 0:
      book = book.iloc[0]
      if book.best_seller:
        flag = True

    if (rec in related_books) or flag:
      index = actual.index(rec)
      return np.reciprocal(np.log2(index + 2))

  return 0