# Movie Lens in RL Framework

*State*: as the browsing history of a user, i.e., previous $N$ items
that a user liked before time $t$


DCF:
https://towardsdatascience.com/deep-learning-based-recommender-systems-3d120201db7e

DCF will output if recommend or not.

Co-Trained-RL will tell which subset to sample from to maximize probability of DCF

Shortcomings: Can only evaluate in Offline fashion...

User eval: online simulator... would need to simulate user


In [7]:
import pandas as pd
import numpy as np
import os
import time

In [2]:
# def read_data_ml100k():
#     data_dir = 'ml-100k/'
#     names = ['user_id', 'item_id', 'rating', 'timestamp']
#     data = pd.read_csv(os.path.join(data_dir, 'u.data'), '\t', names=names,
#                        engine='python')
#     num_users = data.user_id.unique().shape[0]
#     num_items = data.item_id.unique().shape[0]
#     return data, num_users, num_items

In [3]:
def read_data_ml1M():
    data_dir = 'ml-1m/'
    names = ['user_id', 'item_id', 'rating', 'timestamp']
    data = pd.read_csv(os.path.join(data_dir, 'ratings.dat'), '::', names=names,
                       engine='python')
    num_users = data.user_id.unique().shape[0]
    num_items = 3953 #from README
    return data, num_users, num_items

In [31]:
# def read_item_data_ml100k():
#     data_dir = 'ml-100k/'
#     names = ['movie id', 'movie title', 'release date', 'video release date',
#               'IMDb URL', 'unknown', 'Action', 'Adventure', 'Animation',
#               "Children's", 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',
#               'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi',
#               'Thriller', 'War', 'Western']
#     data = pd.read_csv(os.path.join(data_dir, 'u.item'), '|', names=names,
#                        engine='python')
#     return data

In [51]:
# def read_item_data_ml1M():
#     data_dir = 'ml-1m/'
#     names = ['movie id', 'movie title', 'Action', 'Adventure', 'Animation',
#               "Children's", 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',
#               'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi',
#               'Thriller', 'War', 'Western']
#     data = pd.read_csv(os.path.join(data_dir, 'movies.dat'), '::', names=names,
#                        engine='python')
#     return data

In [4]:
data, num_users, num_items = read_data_ml1M()

In [33]:
data.head()

Unnamed: 0,user_id,item_id,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [8]:
#https://striatum.readthedocs.io/en/latest/auto_examples/movielens_preprocess.html
import itertools
def movie_preprocessing(movie):
    movie_col = list(movie.columns)
    movie_tag = [doc.split('|') for doc in movie['tag']]
    tag_table = {token: idx for idx, token in enumerate(set(itertools.chain.from_iterable(movie_tag)))}
    movie_tag = pd.DataFrame(movie_tag)
    tag_table = pd.DataFrame(tag_table.items())
    tag_table.columns = ['Tag', 'Index']

    # use one-hot encoding for movie genres (here called tag)
    tag_dummy = np.zeros([len(movie), len(tag_table)])

    for i in range(len(movie)):
        for j in range(len(tag_table)):
            if tag_table['Tag'][j] in list(movie_tag.iloc[i, :]):
                tag_dummy[i, j] = 1

    # combine the tag_dummy one-hot encoding table to original movie files
    movie = pd.concat([movie, pd.DataFrame(tag_dummy)], 1)
    movie_col.extend(['tag' + str(i) for i in range(len(tag_table))])
    movie.columns = movie_col
    movie = movie.drop('tag', 1)
    return movie


In [9]:
# read and preprocess the movie data
item_data = pd.read_table('ml-1m/movies.dat', sep='::', names=['movie id', 'movie title', 'tag'], engine='python')
item_data = movie_preprocessing(item_data)
#item_data = read_item_data_ml1M()

In [10]:
item_data.head()

Unnamed: 0,movie id,movie title,tag0,tag1,tag2,tag3,tag4,tag5,tag6,tag7,tag8,tag9,tag10,tag11,tag12,tag13,tag14,tag15,tag16,tag17
0,1,Toy Story (1995),0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
1,2,Jumanji (1995),0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
2,3,Grumpier Old Men (1995),0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,4,Waiting to Exhale (1995),0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5,Father of the Bride Part II (1995),0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Data Preprocessing

In [11]:
import numpy as np

np.random.seed(272)


rand_userIds = np.random.choice(data['user_id'].unique(), 
                                size=int(len(data['user_id'].unique())*0.8), 
                                replace=False)

data_30 = data.loc[data['user_id'].isin(rand_userIds)]

## Leave one out train-test split

To avoid look-ahead bias/data leakage

In [12]:
data_30['rank_latest'] = data_30.groupby(['user_id'])['timestamp'].rank(method='first', ascending=False)

train_ratings = data_30[data_30['rank_latest'] != 1]
test_ratings = data_30[data_30['rank_latest'] == 1]

# drop columns that we no longer need
train_ratings = train_ratings[['user_id', 'item_id', 'rating']]
test_ratings = test_ratings[['user_id', 'item_id', 'rating']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_30['rank_latest'] = data_30.groupby(['user_id'])['timestamp'].rank(method='first', ascending=False)


## Converting Dataset to Implicit Feedback

In [13]:
#train_ratings.loc[:, 'rating'] = 1

In [14]:
import torch
from torch.utils.data import Dataset

class MovieLensTrainDataset(Dataset):
    """MovieLens PyTorch Dataset for Training
    
    Args:
        ratings (pd.DataFrame): Dataframe containing the movie ratings
        all_movieIds (list): List containing all movieIds
    
    """

    def __init__(self, ratings, all_movieIds):
        self.users, self.items, self.labels = self.get_dataset(ratings, all_movieIds)

    def __len__(self):
        return len(self.users)
  
    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.labels[idx]

    def get_dataset(self, ratings, all_movieIds):
        users, items, labels = [], [], []
        user_item_set = set(zip(ratings['user_id'], ratings['item_id']))

        num_negatives = 4
        for u, i in user_item_set:
            users.append(u)
            items.append(i)
            labels.append(1)
            for _ in range(num_negatives):
                negative_item = np.random.choice(all_movieIds)
                while (u, negative_item) in user_item_set:
                    negative_item = np.random.choice(all_movieIds)
                users.append(u)
                items.append(negative_item)
                labels.append(0)

        return torch.tensor(users), torch.tensor(items), torch.tensor(labels)

In [15]:
import torch.nn as nn
import pytorch_lightning as pl
from torch.utils.data import DataLoader

# class NCF(pl.LightningModule):
#     """ Neural Collaborative Filtering (NCF)
    
#         Args:
#             num_users (int): Number of unique users
#             num_items (int): Number of unique items
#             ratings (pd.DataFrame): Dataframe containing the movie ratings for training
#             all_movieIds (list): List containing all movieIds (train + test)
#     """
    
#     def __init__(self, num_users, num_items, ratings, all_movieIds):
#         super().__init__()
#         self.user_embedding = nn.Embedding(num_embeddings=num_users, embedding_dim=8)
#         self.item_embedding = nn.Embedding(num_embeddings=num_items, embedding_dim=8)
#         self.fc1 = nn.Linear(in_features=16, out_features=64)
#         self.fc2 = nn.Linear(in_features=64, out_features=32)
#         self.output = nn.Linear(in_features=32, out_features=1)
#         self.ratings = ratings
#         self.all_movieIds = all_movieIds
        
#     def forward(self, user_input, item_input):
        
#         # Pass through embedding layers
#         user_embedded = self.user_embedding(user_input)
#         item_embedded = self.item_embedding(item_input)

#         # Concat the two embedding layers
#         vector = torch.cat([user_embedded, item_embedded], dim=-1)

#         # Pass through dense layer
#         vector = nn.ReLU()(self.fc1(vector))
#         vector = nn.ReLU()(self.fc2(vector))

#         # Output layer
#         pred = nn.Sigmoid()(self.output(vector))

#         return pred
    
#     def training_step(self, batch, batch_idx):
#         user_input, item_input, labels = batch
#         predicted_labels = self(user_input, item_input)
#         loss = nn.BCELoss()(predicted_labels, labels.view(-1, 1).float())
#         return loss

#     def configure_optimizers(self):
#         return torch.optim.Adam(self.parameters())

#     def train_dataloader(self):
#         return DataLoader(MovieLensTrainDataset(self.ratings, self.all_movieIds),
#                           batch_size=128, num_workers=0)

In [16]:
num_users = data_30['user_id'].max()+1
num_items = data_30['item_id'].max()+1
all_movieIds = data['item_id'].unique()

# model = NCF(num_users, num_items, train_ratings, all_movieIds)

# trainer = pl.Trainer(max_epochs=1, gpus=0, reload_dataloaders_every_epoch=True,
#                      progress_bar_refresh_rate=50, logger=False, checkpoint_callback=False)

# trainer.fit(model)

# Reinforced Co-Training Framework

Define classifiers: 

* $C_1, C_2$
* Parition documents into $K$ subsets based similarity
* Feed classifiers representative document concat user embedding
* Based on classifer output build initial state $s_0$, concat of output from $C_1,C_2$,
* feed state to Q-learner



## Partioning by LSH

https://santhoshhari.github.io/Locality-Sensitive-Hashing/

In [18]:
class HashTable:
    def __init__(self, hash_size, inp_dimensions):
        self.hash_size = hash_size
        self.inp_dimensions = inp_dimensions
        self.hash_table = dict()
        self.projections = np.random.randn(self.hash_size, inp_dimensions)
        self.subset_counter = 0
        
    def generate_hash(self, inp_vector):
        bools = (np.dot(inp_vector, self.projections.T) > 0).astype('int')
        return ''.join(bools.astype('str'))

    def __setitem__(self, inp_vec, label):
        hash_value = self.generate_hash(inp_vec)
        if hash_value not in self.hash_table:
            self.hash_table[hash_value] = self.hash_table\
                .get(hash_value, list()) + [self.subset_counter,label]
            self.subset_counter+=1
        else:
            self.hash_table[hash_value] = self.hash_table\
                .get(hash_value, list()) + [label]
        
    def __getitem__(self, inp_vec):
        hash_value = self.generate_hash(inp_vec)
        return self.hash_table.get(hash_value, [])

In [19]:
class LSH:
    def __init__(self, num_tables, hash_size, inp_dimensions):
        self.num_tables = num_tables
        self.hash_size = hash_size
        self.inp_dimensions = inp_dimensions
        self.hash_tables = list()
        for i in range(self.num_tables):
            self.hash_tables.append(HashTable(self.hash_size, self.inp_dimensions))
    
    def __setitem__(self, inp_vec, label):
        for table in self.hash_tables:
            table[inp_vec] = label
    
    def __getitem__(self, inp_vec):
        results = list()
        for table in self.hash_tables:
            results.extend(table[inp_vec])
        return list(set(results))


In [20]:
movie_hasher = HashTable(hash_size=10, inp_dimensions=18)
start = time.time()
for i,row in item_data.iterrows():
    movie_label = row['movie id']
    movie_feature_vec = item_data.iloc[i, 2:]
    movie_hasher.__setitem__(movie_feature_vec, movie_label)
end = time.time()
print(f'Creating movie LSH took {end-start} seconds.')

Creating movie LSH took 1.3886330127716064 seconds.


In [21]:
movie_feature_vec.values

array([0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0,
       0.0, 0.0, 0.0, 0.0, 0.0], dtype=object)

In [22]:
len(movie_hasher.hash_table)

154

In [23]:
import gym
from gym import spaces

In [24]:
#item_data.head()

In [25]:
#item_data[item_data['movie id'] == 1].iloc[0,5:].values

In [26]:
#hash_movie_id = movie_hasher.generate_hash()
#movie_hasher.__getitem__(item_data[item_data['movie id'] == 1].iloc[:,5:].values[0])


In [27]:
# # we have item_data and a user_id
# # Generate Labeled seeding data by
# # for each item_id associated with current user get from subset
np.random.seed(270)
co_train_user = np.random.randint(0,num_users)


# u_1|1, ... u_k|0 etc
# This is L the seeding dataset
def create_L(co_train_user, df):
    co_train_user_ratings = df[df['user_id'] == co_train_user]
    co_train_u_ks = np.zeros(shape=len(movie_hasher.hash_table),dtype=int)
    L_item_ids = np.zeros(shape=len(movie_hasher.hash_table),dtype=int)
    user_ids = np.ones(shape=len(movie_hasher.hash_table),dtype=int)*co_train_user
    for i, row in co_train_user_ratings.iterrows():
        item_id = row['item_id']
        item_vec = item_data[item_data['movie id'] == item_id].iloc[:,2:].values[0]
        subset_id = movie_hasher.__getitem__(item_vec)[0]
        co_train_u_ks[subset_id] = 1
        L_item_ids[subset_id] = item_id
    return user_ids, L_item_ids, co_train_u_ks

L_user_ids, L_item_ids, L_u_ks = create_L(co_train_user, train_ratings)
L_val_user_ids, L_val_item_ids, L_val_u_ks = create_L(co_train_user, test_ratings)
# consists of current user_id|positive examples|negative exapmples
# Generate labeled val data from test set.


In [28]:
L_user_ids, L_item_ids, L_u_ks

(array([3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104,
        3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104,
        3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104,
        3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104,
        3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104,
        3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104,
        3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104,
        3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104,
        3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104,
        3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104,
        3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104,
        3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104,
        3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104, 3104,
        3104, 3104, 3104, 3104, 3104, 

In [46]:
c1 = Classifier(num_users,num_items)
c2 = Classifier(num_users,num_items)
pred = np.round(c1(torch.tensor(L_user_ids), torch.tensor(L_item_ids)).squeeze(1).detach().numpy()) 
pred,L_u_ks

(array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.], dtype=float32),
 array([0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
        1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0,
        0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [29]:
class CoTrainDataset(Dataset):
    """
    Producing datasets for cotraining process
    
    """
    def __init__(self, users, item_ids, u_k_labels):
        self.users = torch.tensor(users)
        self.items =  torch.tensor(item_ids)
        self.labels = torch.tensor(u_k_labels)

    def __len__(self):
        return len(self.users)
  
    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.labels[idx]


In [30]:
class CotrainEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, c1, c2):
        super(CotrainEnv, self).__init__()
        self.co_train_user = None
        #sample representatives
        self.sample_reps = [ v[0] for v in movie_hasher.hash_table.values() ]
        self.k = len(self.sample_reps)
        self.obs_shape = 2
        self.action_space = spaces.Discrete(self.k)# one of the k subsets
        self.observation_space = spaces.Box(low=0, high=1,
                                        shape=(self.obs_shape,))
        self.c1, self.c2 = c1,c2
        self.trainer = pl.Trainer(max_epochs=1, gpus=0, reload_dataloaders_every_epoch=True,
                     progress_bar_refresh_rate=0, logger=False, checkpoint_callback=False)
        
        self.t = 0
        self.num_steps = 10

    def step(self, action):
        
        # accuracy t-1
        L_val_user_ids, L_val_item_ids, L_val_u_ks = self.create_L(self.co_train_user, test_ratings)
        L_val_user_ids = torch.tensor(L_val_user_ids)
        L_val_item_ids = torch.tensor(L_val_item_ids)
        L_val_u_ks = torch.tensor(L_val_u_ks)
#         print('user',L_val_user_ids)
#         print('item',L_val_item_ids)
#         print('uks',L_val_u_ks)
        prev_c1_label = np.round(self.c1(L_val_user_ids,L_val_item_ids).detach().numpy())
        prev_c2_label = np.round(self.c2(L_val_user_ids,L_val_item_ids).detach().numpy())
        
        # action is subset to sample from..
        u_at = self.sample_reps[action]
        self.cotrain(self.c1, self.c2, u_at)
        
        # accuracy t
        prob_c1 = self.c1(torch.tensor(self.co_train_user),torch.tensor(u_at)).detach().numpy()
        prob_c2 = self.c2(torch.tensor(self.co_train_user),torch.tensor(u_at)).detach().numpy()
        post_c1_label = np.round(self.c1(L_val_user_ids,L_val_item_ids).detach().numpy())
        post_c2_label = np.round(self.c2(L_val_user_ids,L_val_item_ids).detach().numpy())
        
        reward = self.calc_reward(prev_c1_label, prev_c2_label, post_c1_label, post_c2_label, L_val_u_ks)
        # update state
        observation = np.concatenate([prob_c1,prob_c2])
        done = self.t == self.num_steps
        self.t+=1
        info = {}
        return observation, reward, done, info
    
    def create_L(self, co_train_user, df):
        co_train_user_ratings = df[df['user_id'] == co_train_user]
        #maybe pass movie_hasher to env as well
        co_train_u_ks = np.zeros(shape=len(movie_hasher.hash_table),dtype=int)
        L_item_ids = np.zeros(shape=len(movie_hasher.hash_table),dtype=int)
        user_ids = np.ones(shape=len(movie_hasher.hash_table),dtype=int)*co_train_user
        for i, row in co_train_user_ratings.iterrows():
            item_id = row['item_id']
            item_vec = item_data[item_data['movie id'] == item_id].iloc[:,2:].values[0]
            subset_id = movie_hasher.__getitem__(item_vec)[0]
            co_train_u_ks[subset_id] = 1
            L_item_ids[subset_id] = item_id
        return user_ids, L_item_ids, co_train_u_ks
    
    def train_on_L(self):
        L_user_ids, L_item_ids, L_u_ks = self.create_L(self.co_train_user, train_ratings)
        
        # fit c1 on L
        L = CoTrainDataset(L_user_ids, L_item_ids, L_u_ks)
        self.c1.dataset = L
        self.trainer.fit(c1)
        
        # fit c2 on L
        self.c2.dataset = L
        self.trainer.fit(c2)
        return
    
    def cotrain(self, c1, c2, u_at):
        # u_at is subset
        # c1 labels u_at then train c2 using that label
        user_input = torch.tensor(self.co_train_user)
        u_at_c1_label = np.round(c1(user_input, torch.tensor(u_at)).detach().numpy())
        u_at_c1_dataset = CoTrainDataset(self.co_train_user, u_at, u_at_c1_label)
        c2.dataset = u_at_c1_dataset
        self.trainer.fit(c2)
        
        # c2 labels u_at then train c1 using that label
        u_at_c2_label = np.round(c2(user_input, torch.tensor(u_at)).detach().numpy())
        u_at_c2_dataset = CoTrainDataset(self.co_train_user, u_at, u_at_c2_label)
        c1.dataset = u_at_c2_dataset
        self.trainer.fit(c1)
        return
        
        
    def calc_reward(self, prev_c1, prev_c2, post_c1, post_c2, true_labels):
        post_c1, post_c2 = post_c1.squeeze(1),post_c2.squeeze(1)
        prev_c1, prev_c2 = prev_c1.squeeze(1), prev_c2.squeeze(1)
        assert post_c1.shape == true_labels.shape
        assert post_c2.shape == true_labels.shape
        assert prev_c1.shape == true_labels.shape
        assert prev_c2.shape == true_labels.shape
#         print('pred label',post_c1.shape,prev_c1.shape)
#         print('true label',true_labels.shape)
        r_1 = np.sum(post_c1 == true_labels) - np.sum(prev_c1 == true_labels)
        r_2 = np.sum(post_c2 == true_labels) - np.sum(prev_c2 == true_labels)
        print(r_1,r_2)
        r = r_1*r_2
        reward = r if r > 0 else 0
        #print(reward)
        return reward
    
    def reset(self):
        self.t = 0
        self.co_train_user = np.random.randint(0,num_users)
        observation = np.ones(shape=(self.obs_shape,))*0.5
        self.train_on_L()
        return observation  # reward, done, info can't be included

    def render(self, mode='human'):
        pass
    
    def close (self):
         pass

In [31]:
# Define Classifier
class Classifier(pl.LightningModule):
    """ Neural Collaborative Filtering (NCF)
    
        Args:
            num_users (int): Number of unique users
            num_items (int): Number of unique items
    """
    
    def __init__(self, num_users, num_items):
        super().__init__()
        self.user_embedding = nn.Embedding(num_embeddings=num_users, embedding_dim=8)
        self.item_embedding = nn.Embedding(num_embeddings=num_items, embedding_dim=8)
        self.fc1 = nn.Linear(in_features=16, out_features=64)
        self.fc2 = nn.Linear(in_features=64, out_features=32)
        self.output = nn.Linear(in_features=32, out_features=1)
        self.dataset = None
        
    def forward(self, user_input, item_input):
        
        # Pass through embedding layers
        user_embedded = self.user_embedding(user_input)
        item_embedded = self.item_embedding(item_input)

        # Concat the two embedding layers
        vector = torch.cat([user_embedded, item_embedded], dim=-1)

        # Pass through dense layer
        vector = nn.ReLU()(self.fc1(vector))
        vector = nn.ReLU()(self.fc2(vector))

        # Output layer
        pred = nn.Sigmoid()(self.output(vector))

        return pred
    
    def training_step(self, batch, batch_idx):
        user_input, item_input, labels = batch
        predicted_labels = self(user_input, item_input)
        loss = nn.BCELoss()(predicted_labels, labels.view(-1, 1).float())
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters())

    def train_dataloader(self):
        return DataLoader(self.dataset,
                          batch_size=16, num_workers=0)

In [32]:
c1 = Classifier(num_users,num_items)
c2 = Classifier(num_users,num_items)

In [114]:
from stable_baselines3.common.env_checker import check_env
env = CotrainEnv(c1, c2)
check_env(env)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores

  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

0 0
0 0
0 0
0 0
0 0



  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

  | Name           | Type      | Params
---------------------------------

0 0
0 0
0 0
0 0
0 0



  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

  | Name           | Type      | Params
---------------------------------

0 0


In [23]:
# from stable_baselines3 import PPO
# from stable_baselines3.common.monitor import Monitor
# from stable_baselines3.common.results_plotter import load_results, ts2xy
# from stable_baselines3.common.callbacks import BaseCallback
# import matplotlib.pyplot as plt

In [24]:
# class SaveOnBestTrainingRewardCallback(BaseCallback):
#     """
#     Callback for saving a model (the check is done every ``check_freq`` steps)
#     based on the training reward (in practice, we recommend using ``EvalCallback``).

#     :param check_freq: (int)
#     :param log_dir: (str) Path to the folder where the model will be saved.
#       It must contains the file created by the ``Monitor`` wrapper.
#     :param verbose: (int)
#     """
#     def __init__(self, check_freq: int, log_dir: str, verbose=1):
#         super(SaveOnBestTrainingRewardCallback, self).__init__(verbose)
#         self.check_freq = check_freq
#         self.log_dir = log_dir
#         self.save_path = os.path.join(log_dir, 'best_model')
#         self.best_mean_reward = -np.inf

#     def _init_callback(self) -> None:
#         # Create folder if needed
#         if self.save_path is not None:
#             os.makedirs(self.save_path, exist_ok=True)

#     def _on_step(self) -> bool:
#         if self.n_calls % self.check_freq == 0:

#           # Retrieve training reward
#             x, y = ts2xy(load_results(self.log_dir), 'timesteps')
#             if len(x) > 0:
#               # Mean training reward over the last 100 episodes
#                 mean_reward = np.mean(y[-100:])
#                 if self.verbose > 0:
#                     print(f"Num timesteps: {self.num_timesteps}")
#                     print(f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}")

#                   # New best model, you could save the agent here
#                 if mean_reward > self.best_mean_reward:
#                     self.best_mean_reward = mean_reward
#                     # Example for saving best model
#                     if self.verbose > 0:
#                         print(f"Saving new best model to {self.save_path}.zip")
#                         self.model.save(self.save_path)

#         return True

In [25]:
# # Create log dir
# log_dir = "/tmp/gym/"
# os.makedirs(log_dir, exist_ok=True)
# # Logs will be saved in log_dir/monitor.csv
# env = Monitor(env, log_dir)

In [26]:
# # Create the callback: check every 1000 steps
# callback = SaveOnBestTrainingRewardCallback(check_freq=10, log_dir=log_dir)
# model = PPO("MlpPolicy", env, verbose=0)

In [49]:
# model.learn(total_timesteps=10, callback=callback)

In [None]:
# from stable_baselines3.common import results_plotter

# # Helper from the library
# results_plotter.plot_results([log_dir], 1e5, results_plotter.X_TIMESTEPS, "MiniGrid-DistShift1-v0")

In [28]:
# def moving_average(values, window):
#     """
#     Smooth values by doing a moving average
#     :param values: (numpy array)
#     :param window: (int)
#     :return: (numpy array)
#     """
#     weights = np.repeat(1.0, window) / window
#     return np.convolve(values, weights, 'valid')


# def plot_results(log_folder, title='Learning Curve'):
#     """
#     plot the results

#     :param log_folder: (str) the save location of the results to plot
#     :param title: (str) the title of the task to plot
#     """
#     x, y = ts2xy(load_results(log_folder), 'timesteps')
#     y = moving_average(y, window=50)
#     # Truncate x
#     x = x[len(x) - len(y):]

#     fig = plt.figure(title)
#     plt.plot(x, y)
#     plt.xlabel('Number of Timesteps')
#     plt.ylabel('Rewards')
#     plt.title(title + " Smoothed")
#     plt.show()


In [None]:
# plot_results(log_dir)

In [46]:
from d3rlpy.algos import DiscreteCQL
from d3rlpy.online.buffers import ReplayBuffer
from d3rlpy.online.explorers import LinearDecayEpsilonGreedy
cql = DiscreteCQL()

In [47]:
# setup replay buffer
buffer = ReplayBuffer(maxlen=1000, env=env)
# setup explorers
explorer = LinearDecayEpsilonGreedy(start_epsilon=1.0,
                                    end_epsilon=0.1,
                                    duration=100)
cql.fit_online(env,buffer=buffer, explorer=explorer, n_steps=100)

2021-05-18 01:40.01 [info     ] Directory is created at d3rlpy_logs/DiscreteCQL_online_20210518014001
2021-05-18 01:40.01 [debug    ] Building model...
2021-05-18 01:40.01 [debug    ] Model has been built.
2021-05-18 01:40.01 [info     ] Parameters are saved to d3rlpy_logs/DiscreteCQL_online_20210518014001/params.json params={'action_scaler': None, 'augmentation': {'params': {'n_mean': 1}, 'augmentations': []}, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'scaler': None, 'target_reduction_type': 'min', 'target_update_interval': 8000, 'use_gpu': None, 'algorith


  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)
  0%|          | 0/100 [00:00<?, ?it/s]
  | Name           | Type      | P


  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

  | Name           | Type      | Params
---------------------------------

 17%|█▋        | 17/100 [00:00<00:04, 18.45it/s]
  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

  | Name           | Type


  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)
 27%|██▋       | 27/100 [00:01<00:04, 18.16it/s]
  | Name           | Type


  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)
 36%|███▌      | 36/100 [00:01<00:03, 16.59it/s]
  | Name           | Type

 44%|████▍     | 44/100 [00:02<00:03, 18.10it/s]
  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

  | Name           | Type


  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)
 54%|█████▍    | 54/100 [00:02<00:02, 18.48it/s]
  | Name           | Type

 62%|██████▏   | 62/100 [00:03<00:02, 17.15it/s]
  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

  | Name           | Type


  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)
 72%|███████▏  | 72/100 [00:04<00:01, 15.74it/s]
  | Name           | Type

 80%|████████  | 80/100 [00:04<00:01, 17.63it/s]
  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

  | Name           | Type

 89%|████████▉ | 89/100 [00:04<00:00, 18.55it/s]
  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

  | Name           | Type


  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)

  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 7.6 K 
1 | item_embedding | Embedding | 13.5 K
2 | fc1            | Linear    | 1.1 K 
3 | fc2            | Linear    | 2.1 K 
4 | output         | Linear    | 33    
---------------------------------------------
24.2 K    Trainable params
0         Non-trainable params
24.2 K    Total params
0.097     Total estimated model params size (MB)
 99%|█████████▉| 99/100 [00:05<00:00, 14.87it/s]
  | Name           | Type

In [48]:
buffer.to_mdp_dataset().rewards

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
      dtype=float32)

In [228]:
# in an episode sample a user and interact a few times so Q-leaner develops user based policy
co_train_user = np.random.randint(0,num_users)
# Generate representative samples for each subset U_k
#sample_reps = [ v[0] for v in movie_hasher.hash_table.values() ]
# Define initial state 
#s_0 = np.ones(shape=(len(sample_reps)*2,))*0.5 #uniform for all subsets at beginning
# Allow Q-learner to pick a subset U_k

# Have pseudo-labeled U_k labeled by C_1 and then used to train C_2 and smalled labeled set L
# vice versa
# Update state to fed to Q-learner
# Evaluate classifers by using U_k Classifiers say we should sample from. Sample from it
# Give the sampled item to our trained NCF(has notion of user pref),


# Connecting Reinforced Co-Trainer and NCF

* Measure Hit Rate@10

* Will implement NDCG

Need to evaluate with and without Co-Trainer

In [233]:
# User-item pairs for testing
test_user_item_set = set(zip(test_ratings['user_id'], test_ratings['item_id']))

# Dict of all items that are interacted with by each user
user_interacted_items = data_30.groupby('user_id')['item_id'].apply(list).to_dict()

hits = []
for (u,i) in test_user_item_set:
    interacted_items = user_interacted_items[u]
    not_interacted_items = set(all_movieIds) - set(interacted_items)
    selected_not_interacted = list(np.random.choice(list(not_interacted_items), 99))
    test_items = selected_not_interacted + [i]
    
    predicted_labels = np.squeeze(model(torch.tensor([u]*100), 
                                        torch.tensor(test_items)).detach().numpy())
    
    top10_items = [test_items[i] for i in np.argsort(predicted_labels)[::-1][0:10].tolist()]
    
    if i in top10_items:
        hits.append(1)
    else:
        hits.append(0)
        
print("The Hit Ratio @ 10 is {:.2f}".format(np.average(hits)))

The Hit Ratio @ 10 is 0.40


In [None]:
# Co-Trainer + NCF
#