## Import Package

In [1]:
import os 
import numpy as np 
import pandas as pd 
from scipy.sparse import csr_matrix

import torch 
import torch.nn as nn 
import torch.nn.functional as F
from torch.optim import SGD

from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity


import random
from matplotlib import pyplot as plt


  from .autonotebook import tqdm as notebook_tqdm


## Training Setting

In [2]:
df = pd.read_csv('preprocessed_df_with_timestamp.csv')

In [3]:
class Netflix(Dataset):
    def __init__(self, df):
        self.df = df
        self.users = self.df['Cust_ID'].values
        self.items = self.df['Movie_Id'].values
        self.ratings = self.df['Rating'].values
        
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        user = self.users[index]
        item = self.items[index]
        rating = self.ratings[index]
        
        return user, item, rating

In [5]:
sample_df = df.loc[(df['Cust_ID'] < 2500) & (df['Movie_Id'] < 97)]
sample_df

Unnamed: 0,Cust_ID,Movie_Id,Rating,Timestamp
0,0,0,3.0,2005-09-06
1,1,0,5.0,2005-05-13
2,2,0,4.0,2005-10-19
3,3,0,4.0,2005-12-26
4,4,0,3.0,2004-05-03
...,...,...,...,...
351307,2490,96,4.0,2004-06-08
351312,2489,96,4.0,2005-01-19
351328,2494,96,4.0,2003-03-12
352251,538,96,2.0,2004-06-07


In [6]:
# sample_df = sample_df[['Cust_ID','Movie_Id','Rating']]
sample_df_user2idx = {user:idx for idx, user in enumerate(sample_df['Cust_ID'].unique())} 
sample_df_item2idx = {item:idx for idx, item in enumerate(sample_df['Movie_Id'].unique())}
sample_df['Cust_ID'] = sample_df['Cust_ID'].map(sample_df_user2idx)
sample_df['Movie_Id'] = sample_df['Movie_Id'].map(sample_df_item2idx)
sample_df['Timestamp'] = pd.to_datetime(sample_df['Timestamp'])
bins = pd.date_range(start = '1999-11-01', end = '2005-12-31', freq = 'M')
sample_df.loc[:,'bins'] = pd.cut(sample_df['Timestamp'], bins=bins, labels=False)

# sample_df = sample_df[['Cust_ID','Movie_Id','Rating']]
train_df, test_df = train_test_split(sample_df, test_size = 0.2, random_state = 42)
temporal_train_df = train_df[['Cust_ID','Movie_Id','Rating','bins']]
temporal_test_df = test_df[['Cust_ID','Movie_Id','Rating','bins']]
train_df = train_df[['Cust_ID','Movie_Id','Rating']]
test_df = test_df[['Cust_ID','Movie_Id','Rating']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample_df['Cust_ID'] = sample_df['Cust_ID'].map(sample_df_user2idx)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample_df['Movie_Id'] = sample_df['Movie_Id'].map(sample_df_item2idx)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample_df['Timestamp'] = pd.to_datetime(sample_df['Timestamp'])
A v

In [7]:
train_dataset = Netflix(train_df)
test_dataset = Netflix(test_df)
train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True)

In [9]:
R = csr_matrix(
    (np.array(train_df['Rating'].values, dtype = np.int32),
    (np.array(train_df['Cust_ID'].values, dtype = np.int32),np.array(train_df['Movie_Id'].values, dtype = np.int32))
    ), shape = (len(sample_df_user2idx), len(sample_df_item2idx)))

mu = train_df.Rating.mean() 
F = 15
k = 15

In [10]:
device = torch.device('cpu')

In [11]:
def train(model, train_loader, criterion ,optimizer):
    model.train() 
    total_loss = 0 
    for user, item, rating in train_loader:
        user = user.to(device)
        item = item.to(device)
        rating = rating.to(device)
        
        optimizer.zero_grad()
        pred = model(user, item)
        loss = criterion(pred, rating)
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    return total_loss / len(train_loader)

def evaluate(model, test_loader, criterion):
    model.eval()
    total_loss = 0
    
    with torch.no_grad():
        for user, item, rating in test_loader:
            try:
                user = user.to(device)
                item = item.to(device)
                rating = rating.to(device)

                pred = model(user, item)
                loss = criterion(pred, rating)

                total_loss += loss.item()
            except:
                pass
    
    return total_loss / len(test_loader)

In [12]:
# create a function (this my favorite choice)
def RMSELoss(yhat,y):
    return torch.sqrt(torch.mean((yhat-y)**2)+1e-6)


## Models

In [13]:
class BaselineEstimates(nn.Module):
    def __init__(self, num_users, num_items, mu):
        super(BaselineEstimates, self).__init__()
        self.num_users = num_users
        self.num_items = num_items
        self.mu = mu
        
        self.user_biases = nn.Embedding(num_users, 1)
        self.item_biases = nn.Embedding(num_items, 1)
        
        self.user_biases.weight.data.normal_(0,1)
        self.item_biases.weight.data.normal_(0,1)
    
    def forward(self, user, item):
        bu = self.user_biases(user)
        bi = self.item_biases(item)
        
        rui = self.mu + torch.squeeze(bu) + torch.squeeze(bi)
        
        return rui

In [14]:
class NeighborhoodModel(nn.Module):
    def __init__(self, R, mu, k):
        super(NeighborhoodModel, self).__init__()
        self.R = R 
        self.k = k
        self.num_users, self.num_items = R.shape
        self.Base = BaselineEstimates(self.num_users, self.num_items, mu)
        self.item_weights = nn.Parameter(torch.normal(0,1,size=(self.num_items,self.num_items)))
        self.implicit_offset = nn.Parameter(torch.normal(0,1,size=(self.num_items,self.num_items)))
        self.mu = mu
        self.S = cosine_similarity(R.T)
        
        self.get_top_k()
        self.get_implicit()
        
    def get_top_n_indices(self, list, n):
        sorted_indices = sorted(range(len(list)), key=lambda i: list[i], reverse=True)
        top_n_indices = sorted_indices[:n]
        
        return top_n_indices

    def get_top_k(self):
        self.similar_k = {}
        for item in range(self.num_items):
            self.similar_k[item] = self.get_top_n_indices(self.S[item], self.k)
            
    def get_implicit(self):
        self.implicit_data = {} 
        users, items = R.toarray().nonzero()
        for user, item in zip(users, items):
            if user not in self.implicit_data:
                self.implicit_data[user] = []
            self.implicit_data[user].append(item)
  
    def forward(self, user, item):
        bui = self.Base(user, item)
        user_idx = int(user)
        item_idx = int(item)
        
        sum_of_item_weights = 0
        sum_of_implicit_offset = 0
        num_k = 0
        
        self.used_items = self.implicit_data[user_idx]
        
        for implicit in self.implicit_data[user_idx]:
            if implicit in self.similar_k[item_idx]:
                implicit_tensor = torch.LongTensor([implicit]).to(device)
                num_k += 1
                
                with torch.no_grad():
                    buj = self.Base(user, implicit_tensor)
                    
                sum_of_item_weights += (int(self.R[user,implicit].data)-buj) * self.item_weights[item][0][implicit]
                sum_of_implicit_offset += self.implicit_offset[item][0][implicit]        
            
        norm = num_k ** -0.5

        rui = bui + norm * sum_of_item_weights + norm * sum_of_implicit_offset
        
        return rui

In [15]:
class AsymmetricSVD(nn.Module):
    def __init__(self, R, mu, F):
        super(AsymmetricSVD, self).__init__()
        self.num_users, self.num_items = R.shape
        self.Base = BaselineEstimates(self.num_users, self.num_items, mu)
        self.R = R 
        self.Q = nn.Embedding(self.num_items, F)
        self.X = nn.Embedding(self.num_items, F)
        self.Y = nn.Embedding(self.num_items, F)
        
        self.Q.weight.data.normal_(0, 1/F)
        self.X.weight.data.normal_(0, 1/F)
        self.Y.weight.data.normal_(0, 1/F)
        
    def get_implicit(self):
        self.implicit_data = {} 
        users, items = R.toarray().nonzero()
        for user, item in zip(users, items):
            if user not in self.implicit_data:
                self.implicit_data[user] = []
            self.implicit_data[user].append(item)
        
    def forward(self, user, item):
        user_idx = int(user)
        
        bui = self.Base(user, item)
        Q_i = self.Q(item)
        
        sum_of_item_weights = 0
        sum_of_implicit_offset = 0
        
        for implicit in self.implicit_data[user_idx]:
            implicit_tensor = torch.LongTensor([implicit]).to(device)
            with torch.no_grad():
                buj = self.Base(user, implicit_tensor)
                
            sum_of_item_weights += (int(self.R[user,implicit].data) - buj) * self.X(implicit_tensor)
            sum_of_implicit_offset += self.Y(implicit_tensor)
            
        norm = len(self.implicit_data[user_idx]) ** -0.5        
        
        rui = bui + torch.sum(Q_i * (norm * (sum_of_item_weights + sum_of_implicit_offset)), dim = 1)
        
        return rui

In [16]:
class SVDPlusPlus(nn.Module):
    def __init__(self, R, mu, F, is_layer=False):
        super(SVDPlusPlus, self).__init__()
        self.is_layer = is_layer
        self.R = R 
        self.num_users, self.num_items = R.shape
        self.Base = BaselineEstimates(self.num_users, self.num_items, mu)
        
        self.user_embedding = nn.Embedding(self.num_users, F)
        self.item_embedding = nn.Embedding(self.num_items, F)
        
        self.Y = nn.Embedding(self.num_items, F)
        
        self.user_embedding.weight.data.normal_(0,1/F)
        self.item_embedding.weight.data.normal_(0,1/F)
        self.Y.weight.data.normal_(0,1/F)
        self.get_implicit()
        
    def get_implicit(self):
        self.implicit_data = {} 
        users, items = R.toarray().nonzero()
        for user, item in zip(users, items):
            if user not in self.implicit_data:
                self.implicit_data[user] = []
            self.implicit_data[user].append(item)
        
    def forward(self, user, item):
        user_idx = int(user)
        
        bui = self.Base(user, item)
        
        P_u = self.user_embedding(user)
        Q_i = self.item_embedding(item)
        
        sum_of_implicit_offset = 0
        for implicit in self.implicit_data[user_idx]:
            implicit_tensor = torch.LongTensor([implicit]).to(device)
            sum_of_implicit_offset += self.Y(implicit_tensor)
        
        norm = len(self.implicit_data[user_idx]) ** -0.5
        
        if self.is_layer:
            rui = torch.sum(P_u * (Q_i + norm * sum_of_implicit_offset), dim = 1)
        else:
            rui = bui + torch.sum(P_u * (Q_i + norm * sum_of_implicit_offset), dim = 1)
        
        return rui

In [17]:
class IntergratedModel(nn.Module):
    def __init__(self, R, mu, F, k):
        super(IntergratedModel, self).__init__()
        self.neighbor = NeighborhoodModel(R,mu,k)
        self.SVD = SVDPlusPlus(R,mu,F, is_layer=True)
        
        self.neighbor.get_implicit()
        self.neighbor.get_top_k()
        self.SVD.get_implicit()
        
    def forward(self, user, item):
        rui = self.neighbor(user, item) + self.SVD(user, item)
        
        return rui

## Test

In [18]:
summary = pd.read_csv('summary_with_10000user.csv')
model = AsymmetricSVD(R, mu, F)
model.get_implicit()
model = model.to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01,weight_decay=0.001)
criterion = RMSELoss
early_stop_cnt = 0
early_stop_loss = 100000
idx = 3
for epoch in range(0,150):
    print(f'{idx} model | {epoch} epoch start')
    train_loss = train(model, train_dataloader, criterion, optimizer)
    val_loss = evaluate(model, test_dataloader, criterion)
    
    print(f'{idx} model epoch: {epoch}, train_loss: {train_loss}, val_loss: {val_loss}')
    summary = pd.concat([summary, pd.DataFrame([[idx, epoch, train_loss, val_loss]], columns=['model', 'epoch', 'train_rmse', 'test_rmse'])])
    
    if early_stop_loss > val_loss:
        early_stop_cnt = 0
        early_stop_loss = val_loss
    else:
        early_stop_cnt += 1 
        
    if early_stop_loss < val_loss and early_stop_cnt > 10:
        break

3 model | 0 epoch start
3 model epoch: 0, train_loss: 0.7320651157961088, val_loss: 0.8166272692952101
3 model | 1 epoch start
3 model epoch: 1, train_loss: 0.5224120013170584, val_loss: 0.8025903856490808
3 model | 2 epoch start
3 model epoch: 2, train_loss: 0.48399020533229786, val_loss: 0.8014221069244849
3 model | 3 epoch start
3 model epoch: 3, train_loss: 0.4679526876029694, val_loss: 0.8053773985699615
3 model | 4 epoch start
3 model epoch: 4, train_loss: 0.4599326261347945, val_loss: 0.8029520039860228
3 model | 5 epoch start
3 model epoch: 5, train_loss: 0.45505499284093276, val_loss: 0.8057474320815282
3 model | 6 epoch start
3 model epoch: 6, train_loss: 0.45213195224590824, val_loss: 0.8070488541111683
3 model | 7 epoch start
3 model epoch: 7, train_loss: 0.4510357283332333, val_loss: 0.8070527789020878
3 model | 8 epoch start
3 model epoch: 8, train_loss: 0.4498033411166584, val_loss: 0.8073434048612655
3 model | 9 epoch start
3 model epoch: 9, train_loss: 0.44844342519255

KeyboardInterrupt: 

In [43]:
summary = pd.DataFrame(columns=['model', 'epoch', 'train_rmse', 'test_rmse'])
models = [NeighborhoodModel(R,mu,k), SVDPlusPlus(R,mu,F), IntergratedModel(R,mu,F,k)]

for idx, model in enumerate(models):
    model = model.to(device) 
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01,weight_decay=0.001)
    criterion = RMSELoss
    early_stop_cnt = 0
    early_stop_loss = 100000
    for epoch in range(0,150):
        print(f'{idx} model | {epoch} epoch start')
        train_loss = train(model, train_dataloader, criterion, optimizer)
        val_loss = evaluate(model, test_dataloader, criterion)
        
        print(f'{idx} model epoch: {epoch}, train_loss: {train_loss}, val_loss: {val_loss}')
        summary = pd.concat([summary, pd.DataFrame([[idx, epoch, train_loss, val_loss]], columns=['model', 'epoch', 'train_rmse', 'test_rmse'])])
        
        if early_stop_loss > val_loss:
            early_stop_cnt = 0
            early_stop_loss = val_loss
        else:
            early_stop_cnt += 1 
            
        if early_stop_loss < val_loss and early_stop_cnt > 10:
            break

0 model | 0 epoch start
0 model epoch: 0, train_loss: 0.7947843706805141, val_loss: 0.8426133349641028
0 model | 1 epoch start
0 model epoch: 1, train_loss: 0.3598788026932397, val_loss: 0.7712650429135254
0 model | 2 epoch start
0 model epoch: 2, train_loss: 0.3178387293911942, val_loss: 0.7626618496286546
0 model | 3 epoch start
0 model epoch: 3, train_loss: 0.31046705799605145, val_loss: 0.7623558970829867
0 model | 4 epoch start
0 model epoch: 4, train_loss: 0.30875893647714076, val_loss: 0.7615189188263898
0 model | 5 epoch start
0 model epoch: 5, train_loss: 0.3088090806006443, val_loss: 0.7597735770572772
0 model | 6 epoch start
0 model epoch: 6, train_loss: 0.3086766629163971, val_loss: 0.7621098483800143
0 model | 7 epoch start
0 model epoch: 7, train_loss: 0.3086436802870316, val_loss: 0.7607888448591054
0 model | 8 epoch start
0 model epoch: 8, train_loss: 0.3089161259212105, val_loss: 0.7608994148480405
0 model | 9 epoch start
0 model epoch: 9, train_loss: 0.308223097695983

## Temporal Model Test

In [69]:
class TemporalDynamics(nn.Module):
    def __init__(self, R, F, mu, T):
        super(TemporalDynamics, self).__init__()
        self.R = R 
        self.mu = mu
        self.num_users, self.num_items = R.shape
        self.Q = nn.Embedding(self.num_items, F)
        self.temporal_user_biases = nn.Parameter(torch.normal(0,1,size=(self.num_users, T)))
        self.temporal_item_biases = nn.Parameter(torch.normal(0,1,size=(self.num_items, T)))
        self.temporal_user_factors = nn.Parameter(torch.normal(0,1/F,size=(self.num_users, T, F)))
        
        
    def forward(self, user, item, time_bin):
        Q_i = self.Q(item)
        P_ut = self.temporal_user_factors[user,time_bin,:]
        
        but = self.temporal_user_biases[user,time_bin]
        bit = self.temporal_item_biases[item,time_bin]
    

        rui = self.mu + torch.squeeze(but) + torch.squeeze(bit) + torch.sum(Q_i * P_ut, dim = 1)
        
        return rui

In [70]:
class Temporal_Netflix(Dataset):
    def __init__(self, df):
        self.df = df
        self.users = self.df['Cust_ID'].values
        self.items = self.df['Movie_Id'].values
        self.ratings = self.df['Rating'].values
        self.time = self.df['bins'].values
        
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        user = self.users[index]
        item = self.items[index]
        time = self.time[index]

        rating = self.ratings[index]
        
        return user, item,time, rating
    
def temporal_train(model, train_loader):
    model.train() 
    total_loss = 0 
    for user, item, time,rating in train_loader:
        user = user.to(device)
        item = item.to(device)
        time = time.to(device)
        rating = rating.to(device)
        
        optimizer.zero_grad()
        pred = model(user, item,time)
        loss = criterion(pred, rating)
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
    total_loss = total_loss / len(train_loader)
    
    return total_loss

def temporal_evaluate(model, test_loader):
    model.eval()
    total_loss = 0
    
    with torch.no_grad():
        for user, item, time,rating in test_loader:
            user = user.to(device)
            item = item.to(device)
            time = time.to(device)
            rating = rating.to(device)

            pred = model(user, item,time)
            loss = criterion(pred, rating)

            total_loss += loss.item()
    
    return total_loss / len(test_loader)

device = torch.device('cpu')

In [71]:
train_dataset = Temporal_Netflix(temporal_train_df)
test_dataset = Temporal_Netflix(temporal_test_df)
train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True)

In [72]:
T = sample_df['bins'].nunique()

model = TemporalDynamics(R,F,mu,T)

model = model.to(device) 
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, weight_decay=0.001)
criterion = RMSELoss
early_stop_cnt = 0
early_stop_loss = 100000
for epoch in range(0,70):
    print(f'{epoch} epoch start')
    train_loss = temporal_train(model, train_dataloader)
    val_loss = temporal_evaluate(model, test_dataloader)
    
    print(f'model epoch: {epoch}, train_loss: {train_loss}, val_loss: {val_loss}')
    summary = pd.concat([summary, pd.DataFrame([[4, epoch, train_loss, val_loss]], columns=['model', 'epoch', 'train_rmse', 'test_rmse'])])
    
    if early_stop_loss > val_loss:
        early_stop_cnt = 0
        early_stop_loss = val_loss
    else:
        early_stop_cnt += 1 
        
    if early_stop_loss < val_loss and early_stop_cnt > 10:
        break

0 epoch start
model epoch: 0, train_loss: 1.1588983413921712, val_loss: 0.9978441043604176
1 epoch start
model epoch: 1, train_loss: 0.9350918661978832, val_loss: 0.939495644723088
2 epoch start
model epoch: 2, train_loss: 0.9164878167127912, val_loss: 0.9293509276474643
3 epoch start
model epoch: 3, train_loss: 0.9119749419117924, val_loss: 0.9256476845200995
4 epoch start
model epoch: 4, train_loss: 0.9105434819861761, val_loss: 0.9249227700910538
5 epoch start
model epoch: 5, train_loss: 0.9098915659558795, val_loss: 0.9247681179052938
6 epoch start
model epoch: 6, train_loss: 0.9098770002461978, val_loss: 0.9243884682618714
7 epoch start
model epoch: 7, train_loss: 0.9099273049964394, val_loss: 0.9244546344045707
8 epoch start
model epoch: 8, train_loss: 0.9095748652809786, val_loss: 0.9245960744519783
9 epoch start
model epoch: 9, train_loss: 0.9098863587048485, val_loss: 0.9241361260797243
10 epoch start


In [49]:
summary.to_csv('summary_with_20000user.csv', index=False)

In [55]:
summary.groupby('model').agg({'train_rmse':'min', 'test_rmse':'min'})

Unnamed: 0_level_0,train_rmse,test_rmse
model,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.143589,0.495449
1,0.393381,0.691296
2,0.110757,0.520523
4,0.343014,0.907792


In [48]:
summary

Unnamed: 0,model,epoch,train_rmse,test_rmse
0,0,0,0.794784,0.842613
0,0,1,0.359879,0.771265
0,0,2,0.317839,0.762662
0,0,3,0.310467,0.762356
0,0,4,0.308759,0.761519
0,0,5,0.308809,0.759774
0,0,6,0.308677,0.76211
0,0,7,0.308644,0.760789
0,0,8,0.308916,0.760899
0,0,9,0.308223,0.763466


In [18]:
model1 = NeighborhoodModel(R,mu,k)
model2 = SVDPlusPlus(R,mu,F)
model3 = IntergratedModel(R,mu,F,k)
model4 = AsymmetricSVD(R,mu,F)

models = [model1, model2, model3, model4]

for idx, model in enumerate(models):
    model = model.to(device) 
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01,weight_decay=0.001)
    criterion = RMSELoss
    early_stop_cnt = 0
    early_stop_loss = 100000
    for epoch in range(0,150):
        print(f'{idx} model | {epoch} epoch start')
        train_loss = train(model, train_dataloader, criterion, optimizer)
        val_loss = evaluate(model, test_dataloader, criterion)
        
        print(f'{idx} model epoch: {epoch}, train_loss: {train_loss}, val_loss: {val_loss}')
        # summary = pd.concat([summary, pd.DataFrame([[idx, epoch, train_loss, val_loss]], columns=['model', 'epoch', 'train_rmse', 'test_rmse'])])
        
        if early_stop_loss > val_loss:
            early_stop_cnt = 0
            early_stop_loss = val_loss
        else:
            early_stop_cnt += 1 
            
        if early_stop_loss < val_loss and early_stop_cnt > 10:
            break

0 model | 0 epoch start
0 model epoch: 0, train_loss: 1.3408062031743364, val_loss: 1.2343095534505546
0 model | 1 epoch start
0 model epoch: 1, train_loss: 0.870291771758033, val_loss: 1.0616667870821468
0 model | 2 epoch start
0 model epoch: 2, train_loss: 0.6509648654441307, val_loss: 0.9572716262857647
0 model | 3 epoch start
0 model epoch: 3, train_loss: 0.5221291180559742, val_loss: 0.891603656858786
0 model | 4 epoch start
0 model epoch: 4, train_loss: 0.4390028937857809, val_loss: 0.8504392013570943
0 model | 5 epoch start
0 model epoch: 5, train_loss: 0.379468073676493, val_loss: 0.8117040222101667
0 model | 6 epoch start
0 model epoch: 6, train_loss: 0.33586110169076355, val_loss: 0.7821918901973912
0 model | 7 epoch start
0 model epoch: 7, train_loss: 0.30322334351387786, val_loss: 0.7589800064003424
0 model | 8 epoch start
0 model epoch: 8, train_loss: 0.27811256926170164, val_loss: 0.732978934327587
0 model | 9 epoch start
0 model epoch: 9, train_loss: 0.2589141240241999, 

AttributeError: 'AsymmetricSVD' object has no attribute 'implicit_data'

In [None]:
class Netflix(Dataset):
    def __init__(self, df):
        self.df = df
        self.users = self.df['Cust_ID'].values
        self.items = self.df['Movie_Id'].values
        self.ratings = self.df['Rating'].values
        
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        user = self.users[index]
        item = self.items[index]
        rating = self.ratings[index]
        
        return user, item, rating

In [None]:
def evaluation_through_a_top_k_recommender(df, model):
    summary = []
    rating_5_df = df.loc[df['Rating'] == 5]
    rating_no_df = df.loc[df['Rating'] != 5]
    
    rating_5_dataset = Netflix(rating_5_df)
    rating_no_dataset = Netflix(rating_no_df) 

    rating_5_dataloader = DataLoader(rating_5_dataset, batch_size=1, shuffle=True)
    rating_no_dataloader = DataLoader(rating_no_dataset, batch_size=1, shuffle=True)
    
    for user,item,rating in rating_5_dataloader:
        temp = []
        user = user.to(device)
        item = item.to(device)
        
        pred_5 = model(user, item)
        temp.append(pred_5)
        
        for user, item, rating in rating_no_dataloader:
            user = user.to(device)
            item = item.to(device)
            
            pred_no = model(user, item)
            temp.append(pred_no)
        summary.append(temp)
    
    return summary

In [None]:
def calculate_first_index_rank(lists):
    percentage = 0
    for i in lists:
        cnt = 0
        first_index = i[0]
        for idx in i:
            if idx > first_index:
                cnt += 1
        percentage += cnt / len(i)
    
    return percentage / len(lists)

In [None]:
for idx,model in enumerate(models):
    summary = evaluation_through_a_top_k_recommender(sample_df, model)
    percentage = calculate_first_index_rank(summary)
    print(f'{idx} model percentage: {percentage}')