# Movie Lens in RL Framework

*State*: as the browsing history of a user, i.e., previous $N$ items
that a user liked before time $t$


DCF:
https://towardsdatascience.com/deep-learning-based-recommender-systems-3d120201db7e

DCF will output if recommend or not.

Co-Trained-RL will tell which subset to sample from to maximize probability of DCF

Shortcomings: Can only evaluate in Offline fashion...

User eval: online simulator... would need to simulate user


In [1]:
import pandas as pd
import os
import time

In [2]:
def read_data_ml100k():
    data_dir = 'ml-100k/'
    names = ['user_id', 'item_id', 'rating', 'timestamp']
    data = pd.read_csv(os.path.join(data_dir, 'u.data'), '\t', names=names,
                       engine='python')
    num_users = data.user_id.unique().shape[0]
    num_items = data.item_id.unique().shape[0]
    return data, num_users, num_items

In [3]:
def read_item_data_ml100k():
    data_dir = 'ml-100k/'
    names = ['movie id', 'movie title', 'release date', 'video release date',
              'IMDb URL', 'unknown', 'Action', 'Adventure', 'Animation',
              "Children's", 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',
              'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi',
              'Thriller', 'War', 'Western']
    data = pd.read_csv(os.path.join(data_dir, 'u.item'), '|', names=names,
                       engine='python')
    return data

In [4]:
data, num_users, num_items = read_data_ml100k()

In [5]:
item_data = read_item_data_ml100k()

In [6]:
# Create item_data dictionary to get movie_id feature vectors
movie_to_onehot = {}
for i,row in item_data.iterrows():
    movie_to_onehot[row['movie id']] = row[5:].values

# Data Preprocessing

In [7]:
import numpy as np

np.random.seed(272)


rand_userIds = np.random.choice(data['user_id'].unique(), 
                                size=int(len(data['user_id'].unique())*0.8), 
                                replace=False)

data_30 = data#.loc[data['user_id'].isin(rand_userIds)]

## Leave one out train-test split

To avoid look-ahead bias/data leakage

In [8]:
data_30['rank_latest'] = data_30.groupby(['user_id'])['timestamp'].rank(method='first', ascending=False)

train_ratings = data_30[data_30['rank_latest'] != 1]
test_ratings = data_30[data_30['rank_latest'] == 1]

# drop columns that we no longer need
train_ratings = train_ratings[['user_id', 'item_id', 'rating']]
test_ratings = test_ratings[['user_id', 'item_id', 'rating']]

## Converting Dataset to Implicit Feedback

In [9]:
#train_ratings.loc[:, 'rating'] = 1

In [10]:
import torch
from torch.utils.data import Dataset

class MovieLensTrainDataset(Dataset):
    """MovieLens PyTorch Dataset for Training
    
    Args:
        ratings (pd.DataFrame): Dataframe containing the movie ratings
        all_movieIds (list): List containing all movieIds
    
    """

    def __init__(self, ratings, all_movieIds):
        self.users, self.items, self.labels = self.get_dataset(ratings, all_movieIds)

    def __len__(self):
        return len(self.users)
  
    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.labels[idx]

    def get_dataset(self, ratings, all_movieIds):
        users, items, labels = [], [], []
        user_item_set = set(zip(ratings['user_id'], ratings['item_id']))

        num_negatives = 4
        for u, i in user_item_set:
            users.append(u)
            items.append(i)
            labels.append(1)
            for _ in range(num_negatives):
                negative_item = np.random.choice(all_movieIds)
                while (u, negative_item) in user_item_set:
                    negative_item = np.random.choice(all_movieIds)
                users.append(u)
                items.append(negative_item)
                labels.append(0)

        return torch.tensor(users), torch.tensor(items), torch.tensor(labels)

In [11]:
import torch.nn as nn
from torch.utils.data import DataLoader

In [12]:
num_users = data_30['user_id'].max()+1
num_items = data_30['item_id'].max()+1
all_movieIds = data['item_id'].unique()

# Reinforced Co-Training Framework

Define classifiers: 

* $C_1, C_2$
* Parition documents into $K$ subsets based similarity
* Feed classifiers representative document concat user embedding
* Based on classifer output build initial state $s_0$, concat of output from $C_1,C_2$,
* feed state to Q-learner



## Partioning by LSH

https://santhoshhari.github.io/Locality-Sensitive-Hashing/

In [13]:
class HashTable:
    def __init__(self, hash_size, inp_dimensions):
        self.hash_size = hash_size
        self.inp_dimensions = inp_dimensions
        self.hash_table = dict()
        self.projections = np.random.randn(self.hash_size, inp_dimensions)
        self.subset_counter = 0
        
    def generate_hash(self, inp_vector):
        bools = (np.dot(inp_vector, self.projections.T) > 0).astype('int')
        return ''.join(bools.astype('str'))

    def __setitem__(self, inp_vec, label):
        hash_value = self.generate_hash(inp_vec)
        if hash_value not in self.hash_table:
            self.hash_table[hash_value] = self.hash_table\
                .get(hash_value, list()) + [self.subset_counter,label]
            self.subset_counter+=1
        else:
            self.hash_table[hash_value] = self.hash_table\
                .get(hash_value, list()) + [label]
        
    def __getitem__(self, inp_vec):
        hash_value = self.generate_hash(inp_vec)
        return self.hash_table.get(hash_value, [])

In [14]:
class LSH:
    def __init__(self, num_tables, hash_size, inp_dimensions):
        self.num_tables = num_tables
        self.hash_size = hash_size
        self.inp_dimensions = inp_dimensions
        self.hash_tables = list()
        for i in range(self.num_tables):
            self.hash_tables.append(HashTable(self.hash_size, self.inp_dimensions))
    
    def __setitem__(self, inp_vec, label):
        for table in self.hash_tables:
            table[inp_vec] = label
    
    def __getitem__(self, inp_vec):
        results = list()
        for table in self.hash_tables:
            results.extend(table[inp_vec])
        return list(set(results))


In [15]:
movie_hasher = HashTable(hash_size=10, inp_dimensions=19)
start = time.time()
for i,row in item_data.iterrows():
    movie_label = row['movie id']
    movie_feature_vec = item_data.iloc[i, 5:]
    movie_hasher.__setitem__(movie_feature_vec, movie_label)
end = time.time()
print(f'Creating movie LSH took {end-start} seconds.')

Creating movie LSH took 1.2041378021240234 seconds.


In [None]:
movie_hasher.

In [16]:
movie_feature_vec.values

array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
      dtype=object)

In [17]:
len(movie_hasher.hash_table)

141

In [18]:
import gym
from gym import spaces

In [19]:
# # we have item_data and a user_id
# # Generate Labeled seeding data by
# # for each item_id associated with current user get from subset
np.random.seed(272)
co_train_user = np.random.randint(0,num_users)


# u_1|1, ... u_k|0 etc
# This is L the seeding dataset
def create_L(co_train_user, df):
    co_train_user_ratings = df[df['user_id'] == co_train_user]
    co_train_u_ks = np.zeros(shape=len(movie_hasher.hash_table),dtype=int)
    L_item_ids = np.zeros(shape=len(movie_hasher.hash_table),dtype=int)
    user_ids = np.ones(shape=len(movie_hasher.hash_table),dtype=int)*co_train_user
    for i, row in co_train_user_ratings.iterrows():
        item_id = row['item_id']
        item_vec = item_data[item_data['movie id'] == item_id].iloc[:,5:].values[0]
        subset_id = movie_hasher.__getitem__(item_vec)[0]
        co_train_u_ks[subset_id] = 1
        L_item_ids[subset_id] = item_id
    return user_ids, L_item_ids, co_train_u_ks

L_user_ids, L_item_ids, L_u_ks = create_L(co_train_user, train_ratings)
L_val_user_ids, L_val_item_ids, L_val_u_ks = create_L(co_train_user, test_ratings)
# consists of current user_id|positive examples|negative exapmples
# Generate labeled val data from test set.


In [218]:
np.append(L_user_ids,[0])

array([242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242,
       242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242,
       242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242,
       242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242,
       242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242,
       242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242,
       242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242,
       242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242,
       242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242,
       242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242,
       242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242,   0])

In [20]:
# c1 = Classifier(num_users,num_items)
# c2 = Classifier(num_users,num_items)
# pred = np.round(c1(torch.tensor(L_user_ids), torch.tensor(L_item_ids)).squeeze(1).detach().numpy()) == L_u_ks
# pred.mean(),L_u_ks

In [386]:
class CoTrainDataset(Dataset):
    """
    Producing datasets for cotraining process
    
    """
    def __init__(self, users, item_ids, u_k_labels):
        self.users = torch.tensor(users)
        self.items =  torch.tensor(item_ids)
        self.labels = torch.tensor(u_k_labels)

    def __len__(self):
        return len(self.users)
  
    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.labels[idx]


In [435]:
train_ratings[train_ratings['user_id'].isin([2,3])].sort_values(by='user_id')

Unnamed: 0,user_id,item_id,rating
700,2,292,4
37026,2,306,4
36334,2,309,1
35981,2,310,4
35337,2,294,1
...,...,...,...
38670,3,322,3
14381,3,327,4
40899,3,352,2
14654,3,307,3


In [457]:
# Should extend to have multiple users
class CotrainEnv(gym.Env):
    #metadata = {'render.modes': ['human']}

    def __init__(self, c1, c2):
        super(CotrainEnv, self).__init__()
        self.co_train_user = None
        #sample representatives v[1] because I used v[0] to mark subset number
        self.sample_reps = [ v[1] for v in movie_hasher.hash_table.values() ]
        self.u_subs = [ v[1:] for v in movie_hasher.hash_table.values() ]
        self.k = len(self.sample_reps)
        self.obs_shape = (2*self.k,)
        self.action_space = spaces.Discrete(self.k)# one of the k subsets
        self.observation_space = spaces.Box(low=0, high=1,
                                        shape=self.obs_shape)
        self.c1, self.c2 = c1,c2
        self.c1_optim = torch.optim.Adam(self.c1.parameters())
        self.c2_optim = torch.optim.Adam(self.c2.parameters())

        self.t = 0
        self.num_steps = 10
        self.c1_losses = []
        self.c2_losses = []

    def step(self, action):
        
        # accuracy t-1
        L_val_user_ids, L_val_item_ids, L_val_u_ks = self.create_L(self.co_train_user, test_ratings)
        L_val_user_ids = torch.tensor(L_val_user_ids)
        L_val_item_ids = torch.tensor(L_val_item_ids)
        L_val_u_ks = torch.tensor(L_val_u_ks)

        prev_c1_label = np.round(self.c1(L_val_user_ids,L_val_item_ids).detach().numpy())
        prev_c2_label = np.round(self.c2(L_val_user_ids,L_val_item_ids).detach().numpy())
        
        # action is subset to sample from..
        # Create 
        u_at = self.u_subs[action]
        self.cotrain(self.c1, self.c2, u_at)
        
        # accuracy t

        post_c1_label = np.round(self.c1(L_val_user_ids,L_val_item_ids).detach().numpy())
        post_c2_label = np.round(self.c2(L_val_user_ids,L_val_item_ids).detach().numpy())
        
        reward = self.calc_reward(prev_c1_label, prev_c2_label, post_c1_label, post_c2_label, L_val_u_ks)
        # update state
        state_holder = []
        for rep_sample in self.sample_reps:
            c1_pred = self.c1(torch.tensor([self.co_train_user]),torch.tensor([rep_sample]))
            prob_c1 = torch.sigmoid(c1_pred).detach().numpy()
            c2_pred = self.c2(torch.tensor([self.co_train_user]),torch.tensor([rep_sample]))
            prob_c2 = torch.sigmoid(c2_pred).detach().numpy()
            state_holder+=[prob_c1,prob_c2]
        observation = np.concatenate(state_holder).reshape(-1)
        done = self.t == self.num_steps
        self.t+=1
        info = {}
        self.report_losses()
        return observation, reward, done, info
    
    def report_losses(self):
        if self.t % 10 == 0:
            print('Current user: {}'.format(self.co_train_user))
            print('====> C1 loss: {:.4f}'.format(np.mean(self.c1_losses)))
            print('====> C2 loss: {:.4f}'.format(np.mean(self.c2_losses)))
    
    def create_L(self, co_train_user, df):
        # Change this to have 4 negative examples for every positive
        num_neg = 4
        co_train_user_ratings = df[df['user_id']==co_train_user]#.sort_values(by='user_id')
        #maybe pass movie_hasher to env as well
        L_labels = []
        L_item_ids = []
        user_ids = [co_train_user]
        items_user_liked = set()
        all_movie_ids = set(item_data['movie id'].values)
        #current_user_id = co_train_user[0]
        for i, row in co_train_user_ratings.iterrows():
#             if current_user_id != row['user_id']:
#                 #reset items liked
#                 items_user_liked.clear()
            item_id = row['item_id']
            items_user_liked.add(item_id)
           
            L_labels.append(1)
            L_item_ids.append(item_id)
        # Add negative item examples
        movie_selection_set = all_movie_ids - items_user_liked
        for _ in range(num_neg):
            movie_selection_list = list(movie_selection_set)
            negative_example = np.random.choice(movie_selection_list, size=1)[0]
            L_item_ids.append(negative_example)
            L_labels.append(0)
            movie_selection_set.remove(negative_example)
              
        return user_ids*len(L_item_ids), L_item_ids, L_labels
    
    def train_classifier(self, model, optimizer, dataloader, model_name, loss_collater):
        model.train()
        for data in dataloader:
            user_input, item_input, labels = data
            optimizer.zero_grad()
            predicted_labels = model(user_input, item_input)
            #pos_weight= num_negative examples torch.tensor([4.])
            loss = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([4.]))(predicted_labels, labels.view(-1, 1).float())
            loss_collater.append(loss.item())
            loss.backward()
            optimizer.step()
#         if self.t % 10==0:
#             print('====> {} Final loss: {:.4f}'.format(model_name,loss.item()))
    
    def train_on_L(self):
        L_user_ids, L_item_ids, L_u_ks = self.create_L(self.co_train_user, train_ratings)
        # fit c1 on L
        L = CoTrainDataset(L_user_ids, L_item_ids, L_u_ks)
        L_dataloader = DataLoader(L, batch_size=16, num_workers=0)
        self.train_classifier(self.c1,self.c1_optim, L_dataloader, 'c1 L-train', self.c1_losses)
        
        # fit c2 on L
        self.train_classifier(self.c2, self.c2_optim, L_dataloader, 'c2 L-train', self.c2_losses)
        return
    
    def cotrain(self, c1, c2, u_at):
        L_user_ids, L_item_ids, L_u_ks = self.create_L(self.co_train_user, train_ratings)
        L = CoTrainDataset(L_user_ids, L_item_ids, L_u_ks)
        user_input = [self.co_train_user]*len(u_at)
        
        # Used for u_at + L dataset
        u_L_user_ids = np.append(L_user_ids, user_input)
        u_L_item_ids = np.append(L_item_ids, u_at)
        
        # u_at is subset
        # c1 labels u_at then train c2 using that label
        c1_preds = torch.sigmoid(c1(torch.tensor(user_input), torch.tensor(u_at))).detach().numpy()
        u_at_c1_label = np.round(c1_preds)
        # Make u_at + L dataset
        u_at_c1_L_labels = np.append(L_u_ks, u_at_c1_label)
        u_at_c1_dataset = CoTrainDataset(u_L_user_ids, u_L_item_ids, u_at_c1_L_labels)
        u_at_c1_dataloader = DataLoader(u_at_c1_dataset, batch_size=4, num_workers=0)
        self.train_classifier(self.c2, self.c2_optim, u_at_c1_dataloader, 'c1 co-train', self.c2_losses)
        
        # c2 labels u_at then train c1 using that label
        c2_preds = torch.sigmoid(c2(torch.tensor(user_input), torch.tensor(u_at))).detach().numpy()
        u_at_c2_label = np.round(c2_preds)
        u_at_c2_L_labels = np.append(L_u_ks, u_at_c2_label)
        u_at_c2_dataset = CoTrainDataset(u_L_user_ids, u_L_item_ids, u_at_c2_L_labels)
        u_at_c2_dataloader = DataLoader(u_at_c2_dataset, batch_size=4, num_workers=0)
        self.train_classifier(self.c1,self.c1_optim, u_at_c2_dataloader, 'c2 co-train', self.c1_losses)
        return
        
        
    def calc_reward(self, prev_c1, prev_c2, post_c1, post_c2, true_labels):
        post_c1, post_c2 = post_c1.squeeze(1),post_c2.squeeze(1)
        prev_c1, prev_c2 = prev_c1.squeeze(1), prev_c2.squeeze(1)
        assert post_c1.shape == true_labels.shape
        assert post_c2.shape == true_labels.shape
        assert prev_c1.shape == true_labels.shape
        assert prev_c2.shape == true_labels.shape
       
        true_labels_np = true_labels.detach().numpy()

        r_1 = np.mean(post_c1 == true_labels_np) - np.mean(prev_c1 == true_labels_np)
        r_2 = np.mean(post_c2 == true_labels_np) - np.mean(prev_c2 == true_labels_np)
        r = r_1*r_2
        reward = r if r > 0 else 0
        return reward
    
    def reset(self):
        self.t = 0
        self.c1_losses = []
        self.c2_losses = []
        self.co_train_user = np.random.choice(np.arange(num_users),size=1)[0]#np.random.randint(0,num_users)
        observation = np.ones(shape=self.obs_shape)*0.5
        self.train_on_L()
        return observation  # reward, done, info can't be included

    def render(self, mode='human'):
        pass
    
    def close (self):
         pass

In [458]:
# Define Classifier
class Classifier(nn.Module):
    """ Neural Collaborative Filtering (NCF)
    
        Args:
            num_users (int): Number of unique users
            num_items (int): Number of unique items
    """
    
    def __init__(self, num_users, num_items, movie_onehot):
        super().__init__()
        self.user_embedding = nn.Embedding(num_embeddings=num_users, embedding_dim=8)
        self.item_embedding = nn.Embedding(num_embeddings=num_items, embedding_dim=8)
        self.movie_onehot = movie_onehot
        self.one_hot_dim = len(movie_onehot[1])
        input_dim = 16 + self.one_hot_dim
        
        self.fc1 = nn.Linear(in_features=input_dim, out_features=64)
        self.fc2 = nn.Linear(in_features=64, out_features=32)
        self.output = nn.Linear(in_features=32, out_features=1)
        
    def forward(self, user_input, item_input):
        #print(item_input)
        #Build batch of movie features
        movie_features = []
        for item_id in item_input:
            movie_features.append(self.movie_onehot[item_id.item()].astype('float'))
        # Pass through embedding layers
        user_embedded = self.user_embedding(user_input)
        item_embedded = self.item_embedding(item_input)
        #print(movie_features)
        movie_features = torch.tensor(movie_features, requires_grad=True,dtype=torch.float)
        # Concat the two embedding layers
        vector = torch.cat([user_embedded, item_embedded,movie_features], dim=-1)

        # Pass through dense layer
        vector = nn.ReLU()(self.fc1(vector))
        vector = nn.ReLU()(self.fc2(vector))

        # Output layer
        pred = self.output(vector)

        return pred

In [459]:
# Train classifier
def train_classifier(model, optimizer, dataloader):
    model.train()
    for data in dataloader:
        user_input, item_input, labels = data
        optimizer.zero_grad()
        predicted_labels = model(user_input, item_input)
        loss = nn.BCEWithLogitsLoss()(predicted_labels, labels.view(-1, 1).float())
        loss.backward()
        optimizer.step()
    print('====>Final loss: {:.4f}'.format(loss.item()))

In [460]:
c1 = Classifier(num_users,num_items, movie_to_onehot)
c2 = Classifier(num_users,num_items,movie_to_onehot)

In [461]:
from stable_baselines3.common.env_checker import check_env
env = CotrainEnv(c1, c2)
#check_env(env)

In [462]:
from d3rlpy.algos import DiscreteCQL
from d3rlpy.online.buffers import ReplayBuffer
from d3rlpy.online.explorers import LinearDecayEpsilonGreedy
cql = DiscreteCQL()

In [463]:
# # setup replay buffer
# buffer = ReplayBuffer(maxlen=1000, env=env)
# # setup explorers
# explorer = LinearDecayEpsilonGreedy(start_epsilon=1.0,
#                                     end_epsilon=0.01,
#                                     duration=100)
# cql.fit_online(env,buffer=buffer, explorer=explorer, n_steps=100)

2021-05-20 18:52.23 [info     ] Directory is created at d3rlpy_logs/DiscreteCQL_online_20210520185223
2021-05-20 18:52.23 [debug    ] Building model...
2021-05-20 18:52.23 [debug    ] Model has been built.
2021-05-20 18:52.23 [info     ] Parameters are saved to d3rlpy_logs/DiscreteCQL_online_20210520185223/params.json params={'action_scaler': None, 'augmentation': {'params': {'n_mean': 1}, 'augmentations': []}, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'scaler': None, 'target_reduction_type': 'min', 'target_update_interval': 8000, 'use_gpu': None, 'algorith

 10%|█         | 10/100 [00:02<00:20,  4.33it/s]

Current user: 894
====> C1 loss: 0.2323
====> C2 loss: 0.2268


 24%|██▍       | 24/100 [00:03<00:07,  9.53it/s]

Current user: 283
====> C1 loss: 0.2949
====> C2 loss: 0.3062


 35%|███▌      | 35/100 [00:05<00:10,  6.02it/s]

Current user: 693
====> C1 loss: 0.1714
====> C2 loss: 0.1689


 46%|████▌     | 46/100 [00:08<00:12,  4.35it/s]

Current user: 774
====> C1 loss: 0.1128
====> C2 loss: 0.1189


 59%|█████▉    | 59/100 [00:10<00:05,  7.22it/s]

Current user: 835
====> C1 loss: 0.2168
====> C2 loss: 0.2353


 71%|███████   | 71/100 [00:11<00:03,  8.81it/s]

Current user: 582
====> C1 loss: 0.3352
====> C2 loss: 0.3250


 83%|████████▎ | 83/100 [00:12<00:01,  9.28it/s]

Current user: 146
====> C1 loss: 0.3753
====> C2 loss: 0.3738


 95%|█████████▌| 95/100 [00:13<00:00, 11.44it/s]

Current user: 572
====> C1 loss: 0.5660
====> C2 loss: 0.5983


100%|██████████| 100/100 [00:14<00:00,  6.72it/s]


In [426]:
#buffer.to_mdp_dataset().rewards
#buffer.to_mdp_dataset().observations[-1]
#buffer.to_mdp_dataset().actions
buffer.to_mdp_dataset().episodes

9

# Evaluating Reinforced Co-Trainer 

* Measure Hit Rate@10

* Will implement NDCG

In [465]:
# load classifiers
# Bring Images into same latent space, also preprocess
device = torch.device('cpu')

checkpoint = torch.load('classifiers_15000.pt',map_location=device)
c1.load_state_dict(checkpoint['c1_state_dict'])
c2.load_state_dict(checkpoint['c2_state_dict'])

c1.eval()
c2.eval()

Classifier(
  (user_embedding): Embedding(944, 8)
  (item_embedding): Embedding(1683, 8)
  (fc1): Linear(in_features=35, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (output): Linear(in_features=32, out_features=1, bias=True)
)

In [483]:
# User-item pairs for testing
test_user_item_set = set(zip(test_ratings['user_id'], test_ratings['item_id']))

# Dict of all items that are interacted with by each user
user_interacted_items = data_30.groupby('user_id')['item_id'].apply(list).to_dict()

hits = []
for (u,i) in test_user_item_set:
    interacted_items = user_interacted_items[u]
    not_interacted_items = set(all_movieIds) - set(interacted_items)
    selected_not_interacted = list(np.random.choice(list(not_interacted_items), 99))
    test_items = selected_not_interacted + [i]
    
    c1_predicted_labels = c1(torch.tensor([u]*100), torch.tensor(test_items))
    
    c2_predicted_labels = c2(torch.tensor([u]*100), torch.tensor(test_items))
    logits_labels = torch.sigmoid((c1_predicted_labels+c2_predicted_labels)/2)
    predicted_labels = np.squeeze(logits_labels.detach().numpy())
    
    top10_items = [test_items[i] for i in np.argsort(predicted_labels)[::-1][0:10].tolist()]
    
    if i in top10_items:
        hits.append(1)
    else:
        hits.append(0)
        
print("The Hit Ratio @ 10 is {:.4f}".format(np.average(hits)))

The Hit Ratio @ 10 is 0.3913


In [None]:
# Co-Trainer + NCF
#