In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import json
import os
from collections import Counter
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
import re
import seaborn as sns
import matplotlib.pyplot as plt
import swifter
import multiprocessing
import time
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import statsmodels.formula.api as smf
from collections import defaultdict
from dataclasses import dataclass
from torch import nn
import torch.nn.functional as F
import torch
import random
import copy
from tqdm import tqdm

sns.set_style("darkgrid")

# Loading Processed Reviews

In [3]:
# Paths
PROCESSED_FOLDER = './data/processed/'
# PROCESSED_REVIEWS_FILE = 'processed_reviews.csv'
PROCESSED_REVIEWS_FILE = 'processed_reviews_with_sentiment.csv'

In [4]:
reviews = pd.read_csv(os.path.join(PROCESSED_FOLDER, PROCESSED_REVIEWS_FILE))

In [5]:
reviews.head()

Unnamed: 0,review_id,user_id,item_id,text,rating,tokenized_text,neg_sent_avg,neu_sent_avg,pos_sent_avg,compound_sent_avg
0,255938,0,1,"First things first. My ""reviews"" system is exp...",8,"[['First', 'things', 'first', '.'], ['My', '``...",0.073333,0.757708,0.168937,0.208675
1,259117,1,2,Let me start off by saying that Made in Abyss ...,10,"[['Let', 'me', 'start', 'off', 'by', 'saying',...",0.062741,0.759333,0.177963,0.302804
2,253664,2,3,"Art 9/10: It is great, especially the actions ...",7,"[['Art', '9/10', ':', 'It', 'is', 'great', ','...",0.047278,0.792889,0.159833,0.220767
3,247454,3,4,As someone who loves Studio Ghibli and its mov...,6,"[['As', 'someone', 'who', 'loves', 'Studio', '...",0.055577,0.809192,0.135269,0.187896
4,23791,4,5,code geass is one of those series that everybo...,10,"[['code', 'geass', 'is', 'one', 'of', 'those',...",0.028857,0.723,0.248143,0.534129


# Converting Data for Modeling

In [6]:
# Convert item_id to 0 indexed
if min(reviews['item_id']) != 0:
    reviews['item_id'] = reviews['item_id'] - 1
    print("Done")

Done


In [7]:
@dataclass
class Review:
    user_id: int
    item_id: int
    rating: int
    text: str
    pos_sent: float
    neg_sent: float
    compound_sent: float

In [8]:
USER_KEY = 'user_id'
ITEM_KEY = 'item_id'
RATING_KEY = 'rating'

In [9]:
reviews.head()

Unnamed: 0,review_id,user_id,item_id,text,rating,tokenized_text,neg_sent_avg,neu_sent_avg,pos_sent_avg,compound_sent_avg
0,255938,0,0,"First things first. My ""reviews"" system is exp...",8,"[['First', 'things', 'first', '.'], ['My', '``...",0.073333,0.757708,0.168937,0.208675
1,259117,1,1,Let me start off by saying that Made in Abyss ...,10,"[['Let', 'me', 'start', 'off', 'by', 'saying',...",0.062741,0.759333,0.177963,0.302804
2,253664,2,2,"Art 9/10: It is great, especially the actions ...",7,"[['Art', '9/10', ':', 'It', 'is', 'great', ','...",0.047278,0.792889,0.159833,0.220767
3,247454,3,3,As someone who loves Studio Ghibli and its mov...,6,"[['As', 'someone', 'who', 'loves', 'Studio', '...",0.055577,0.809192,0.135269,0.187896
4,23791,4,4,code geass is one of those series that everybo...,10,"[['code', 'geass', 'is', 'one', 'of', 'those',...",0.028857,0.723,0.248143,0.534129


In [10]:
user_to_reviews = defaultdict(list)
for _, row in reviews.iterrows():
    user_id, item_id, rating, text = row[USER_KEY], row[ITEM_KEY], row[RATING_KEY], row['text']
    pos_sent, neg_sent, compound_sent = row['pos_sent_avg'], row['neg_sent_avg'], row['compound_sent_avg']
    user_to_reviews[user_id].append(Review(user_id, item_id, rating, text, pos_sent, neg_sent, compound_sent))

## Creating the score matrix

In [11]:
# users by items
X = np.zeros(shape=(reviews['user_id'].nunique(), reviews['item_id'].nunique()))

In [12]:
for _, row in reviews.iterrows():
    user_id, item_id, rating = row[USER_KEY], row[ITEM_KEY], row[RATING_KEY]
    X[user_id][item_id] = rating

## Train/Test Split

In [13]:
train_X = copy.deepcopy(X)
valid_X = np.zeros(shape=X.shape)
test_X = np.zeros(shape=X.shape)

for user_id, reviews in user_to_reviews.items():
    # can confirm this actually shuffles properly (this code block works)
    random.shuffle(reviews)

    # Leave one out for valid
    valid_review = reviews[0]
    train_X[valid_review.user_id][valid_review.item_id] = 0
    valid_X[valid_review.user_id][valid_review.item_id] = valid_review.rating
    
    # Leave one out for test
    test_review = reviews[1]
    train_X[test_review.user_id][test_review.item_id] = 0
    test_X[test_review.user_id][test_review.item_id] = test_review.rating
    
    # Rest for train

## Creating bias terms for users / items from the training data

In [14]:
# users
user_to_pos_sent = defaultdict(list)
user_to_neg_sent = defaultdict(list)
user_to_compound_sent = defaultdict(list)

# items
item_to_pos_sent = defaultdict(list)
item_to_neg_sent = defaultdict(list)
item_to_compound_sent = defaultdict(list)

# loadding
for user_id, reviews in user_to_reviews.items():
    for r in reviews:
        # skip if not in train
        if train_X[user_id, r.item_id] == 0:
            continue
        user_to_pos_sent[user_id].append(r.pos_sent)
        user_to_neg_sent[user_id].append(r.neg_sent)
        user_to_compound_sent[user_id].append(r.compound_sent)
        item_to_pos_sent[r.item_id].append(r.pos_sent)
        item_to_neg_sent[r.item_id].append(r.neg_sent)
        item_to_compound_sent[r.item_id].append(r.compound_sent)

In [15]:
# Averaging values to get bias term
def list_mapping_to_float_mapping(hm: dict):
    id_to_sent_term = defaultdict(float)
    for k, v in hm.items():
        id_to_sent_term[k] = np.mean(v)
    return id_to_sent_term

user_to_pos_sent_term = list_mapping_to_float_mapping(user_to_pos_sent)
user_to_neg_sent_term = list_mapping_to_float_mapping(user_to_neg_sent)
user_to_compound_sent_term = list_mapping_to_float_mapping(user_to_compound_sent)
item_to_pos_sent_term = list_mapping_to_float_mapping(item_to_pos_sent)
item_to_neg_sent_term = list_mapping_to_float_mapping(item_to_neg_sent)
item_to_compound_sent_term = list_mapping_to_float_mapping(item_to_compound_sent)

## Actual NCF (w/ Separate GMF and MLP)

In [16]:
def l2_regularization(values):
    return torch.sum(torch.square(values))

 
class GMF(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=20, regularization_constant=1e-6, sentiment_regularization_constant=1e-6, eps=1e-8):
        super().__init__()
        self.user_factors = nn.Embedding(num_users, embedding_dim)
        self.item_factors = nn.Embedding(num_items, embedding_dim)
        self.regularization_constant = regularization_constant
        self.sentiment_regularization_constant = sentiment_regularization_constant # unused
        self.eps = eps
        
    def forward(self, users: torch.LongTensor, items: torch.LongTensor):
        # (users, emb_dim) * (items, emb_dim) = (interactions, emb_dim)
        result_tensor = self.user_factors(users) * self.item_factors(items)
        user_latent_factors = self.user_factors(users)
        item_latent_factors = self.item_factors(items)
        pred_ratings = user_latent_factors @ item_latent_factors.T
        pred_ratings = 1 + 9 * torch.sigmoid(pred_ratings)
        return pred_ratings.diagonal()
    
    def loss(self, pred_rating: torch.LongTensor, rating: torch.LongTensor, rmse=False):
        if rmse:
            loss = torch.sqrt(F.mse_loss(pred_rating, rating) + self.eps)
        else:
            loss = F.mse_loss(pred_rating, rating) + self.eps
        
        # L2 Regularization
        sum_of_squared_values = l2_regularization(self.user_factors.weight) + l2_regularization(self.item_factors.weight)
        l2_penalty = (1/len(rating)) * self.regularization_constant * sum_of_squared_values

        # Total Loss
        total_loss = loss + l2_penalty
        return total_loss
    
    def predict_single_interaction(self, user_id: int, item_id: int):
        user = torch.LongTensor([user_id]).cuda()
        item = torch.LongTensor([item_id]).cuda()
        return self.forward(user, item)

class MLP(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=20, regularization_constant=1e-6, sentiment_regularization_constant=1e-6, eps=1e-8):
        super().__init__()
        self.user_factors = nn.Embedding(num_users, embedding_dim)
        self.item_factors = nn.Embedding(num_items, embedding_dim)
        self.regularization_constant = regularization_constant
        self.sentiment_regularization_constant = sentiment_regularization_constant # unused
        self.eps = eps
        
        # MLP layers
        self.fc1 = nn.Linear(2*embedding_dim, 128)
        self.relu1 = nn.LeakyReLU()
        self.fc2 = nn.Linear(128, 10)
        self.relu2 = nn.LeakyReLU()
        self.fc3 = nn.Linear(10, 1)
        
        
    def forward(self, users: torch.LongTensor, items: torch.LongTensor):
        # user is shape (users, 1)
        # item is shape (items, 1)
        # embedding output shape is (*, emb_dim) = (users/items, emb_dim)
        user_latent_factors = self.user_factors(users)
        item_latent_factors = self.item_factors(items)
        
        # Concat latent facts together => (*, 2*emb_dim)
        user_item_latent_factors = torch.cat((user_latent_factors, item_latent_factors), dim=1)

        # FC takes (*, in_dim) and outputs (*, out_dim)
        output = self.fc1(user_item_latent_factors)
        output = self.relu1(output)
        output = self.fc2(output)
        output = self.relu2(output)
        output = self.fc3(output)
        
        # Clip in the desired range
        pred_ratings = 1 + 9 * torch.sigmoid(output).squeeze()  

        return pred_ratings
    
    def loss(self, pred_rating: torch.LongTensor, rating: torch.LongTensor, rmse=False):
        if rmse:
            loss = torch.sqrt(F.mse_loss(pred_rating, rating) + self.eps)
        else:
            loss = F.mse_loss(pred_rating, rating) + self.eps
        
        
        # L2 Regularization
        sum_of_squared_values = l2_regularization(self.user_factors.weight) + l2_regularization(self.item_factors.weight)
        l2_penalty = (1/len(rating)) * self.regularization_constant * sum_of_squared_values
        
        # Total Loss
        total_loss = loss + l2_penalty
        return total_loss
    
    def predict_single_interaction(self, user_id: int, item_id: int):
        user = torch.LongTensor([user_id]).cuda()
        item = torch.LongTensor([item_id]).cuda()
        return self.forward(user, item)

    
class NeuMF(nn.Module):
    def __init__(self, num_users, num_items, MF_embedding_dim=20, MLP_embedding_dim=20, regularization_constant=1e-6, sentiment_regularization_constant=1e-6, eps=1e-8):
        super().__init__()
        # User factors
        self.MF_user_factors = nn.Embedding(num_users, MF_embedding_dim)
        self.MLP_user_factors = nn.Embedding(num_users, MLP_embedding_dim)
        
        # Item Factors
        self.MF_item_factors = nn.Embedding(num_users, MF_embedding_dim)
        self.MLP_item_factors = nn.Embedding(num_users, MLP_embedding_dim)

        
        # GMF Layer
        # element wise product
        
        # MLP Layers
        self.fc1 = nn.Linear(2*embedding_dim, 128)
        self.relu1 = nn.LeakyReLU()
        self.fc2 = nn.Linear(128, 10)
        self.relu2 = nn.LeakyReLU()
        self.fc3 = nn.Linear(10, 1)

        # Trainable NeuMF Layer (in the original paper it's just a hyperparameter)
        # Initialize to [0.5, 0.5]
        self.neumf_layer = nn.Linear(2, 1)
        self.neumf_layer.weight.data = torch.Tensor([[0.5, 0.5]]).to(device)
        
        ## The usual constants
        self.regularization_constant = regularization_constant
        self.sentiment_regularization_constant = sentiment_regularization_constant # unused
        self.eps = eps
        
    def forward(self, users: torch.LongTensor, items: torch.LongTensor):
        # There are all (users x emb_dim), as usual
        MF_latent_user_factors = self.MF_user_factors(users)
        MLP_latent_user_factors = self.MLP_user_factors(users)
        
        MLP_latent_item_factors = self.MLP_item_factors(items)
        MF_latent_item_factors = self.MF_item_factors(items)
        
        # GMF Layer
        gmf_output = (MF_latent_user_factors @ MF_latent_item_factors.T).diagonal().unsqueeze(1)
        
        # MLP Layers
        concat_latent_factors = torch.cat((MLP_latent_user_factors, MLP_latent_item_factors), dim=1)
        mlp_output = self.fc1(concat_latent_factors)
        mlp_output = self.relu1(mlp_output)
        mlp_output = self.fc2(mlp_output)
        mlp_output = self.relu2(mlp_output)
        mlp_output = self.fc3(mlp_output)
        
        # NeuMF Layer
        combined_output = torch.cat((gmf_output, mlp_output), dim=1)
        pred_ratings = self.neumf_layer(combined_output).squeeze()
        pred_ratings = 1 + 9 * torch.sigmoid(pred_ratings)
        
        return pred_ratings
    
    def loss(self, pred_rating: torch.LongTensor, rating: torch.LongTensor, rmse=False):
        if rmse:
            loss = torch.sqrt(F.mse_loss(pred_rating, rating) + self.eps)
        else:
            loss = F.mse_loss(pred_rating, rating) + self.eps
        
        # L2 Regularization
        sum_of_squared_values = l2_regularization(self.MF_user_factors.weight) + l2_regularization(self.MF_item_factors.weight)
        sum_of_squared_values += l2_regularization(self.MLP_user_factors.weight)
        sum_of_squared_values += l2_regularization(self.MLP_item_factors.weight)
        for f in [self.fc1, self.fc2, self.fc3]:
            sum_of_squared_values += l2_regularization(f.weight)
        l2_penalty = (1/len(rating)) * self.regularization_constant * sum_of_squared_values

        # Total Loss
        total_loss = loss + l2_penalty
        return total_loss
    
    def predict_single_interaction(self, user_id: int, item_id: int):
        user = torch.LongTensor([user_id]).cuda()
        item = torch.LongTensor([item_id]).cuda()
        return self.forward(user, item)

In [17]:
def eval_MSE_loss(eval_X, model, round_digits=3):
    """Uses reduction mean"""
    user_ids_list, item_ids_list = eval_X.nonzero()
    gt_ratings = torch.FloatTensor([eval_X[user_id, item_id] for user_id, item_id in zip(user_ids_list, item_ids_list)]).to(device)
    curr_users_tensor = torch.LongTensor(user_ids_list).to(device)
    curr_items_tensor = torch.LongTensor(item_ids_list).to(device)
    pred_ratings = model.forward(curr_users_tensor, curr_items_tensor)
    
    return round(F.mse_loss(pred_ratings, gt_ratings).item(), 3)

def eval_RMSE_loss(eval_X, model):
    """Uses reduction mean"""
    user_ids_list, item_ids_list = eval_X.nonzero()
    gt_ratings = torch.FloatTensor([eval_X[user_id, item_id] for user_id, item_id in zip(user_ids_list, item_ids_list)]).to(device)
    curr_users_tensor = torch.LongTensor(user_ids_list).to(device)
    curr_items_tensor = torch.LongTensor(item_ids_list).to(device)
    pred_ratings = model.forward(curr_users_tensor, curr_items_tensor)
    
    return round(torch.sqrt(F.mse_loss(pred_ratings, gt_ratings)).item(), 3)

def eval_MAE_loss(eval_X, model):
    """Uses reduction mean"""
    user_ids_list, item_ids_list = eval_X.nonzero()
    gt_ratings = torch.FloatTensor([eval_X[user_id, item_id] for user_id, item_id in zip(user_ids_list, item_ids_list)]).to(device)
    curr_users_tensor = torch.LongTensor(user_ids_list).to(device)
    curr_items_tensor = torch.LongTensor(item_ids_list).to(device)
    pred_ratings = model.forward(curr_users_tensor, curr_items_tensor)
    
    return round(F.l1_loss(pred_ratings, gt_ratings).item(), 3)


def train_v2(train_X, valid_X, model, optimizer, n_epochs=10, batch_size=5, rmse=False):
    """Training Function, calculates training and validation loss"""
    
    for epoch in (range(1, n_epochs+1)):
        users, items = train_X.nonzero()
        num_examples = len(users)
        permuted_indices = np.random.permutation(num_examples)
        users, items = users[permuted_indices], items[permuted_indices]
        

        total_train_loss = 0
        
        
        for i in tqdm(range(num_examples // batch_size)):
            user_ids_list = users[i*batch_size:i*batch_size+batch_size]
            item_ids_list = items[i*batch_size:i*batch_size+batch_size]

            # Set gradients to zero
            optimizer.zero_grad()

            # Turn data into tensors
            ratings = torch.FloatTensor([train_X[user_id, item_id] for user_id, item_id in zip(user_ids_list, item_ids_list)]).to(device)
            curr_users_tensor = torch.LongTensor(user_ids_list).to(device)
            curr_items_tensor = torch.LongTensor(item_ids_list).to(device)

            # Predict and calculate loss
            pred_ratings = model.forward(curr_users_tensor, curr_items_tensor)
            assert pred_ratings.shape == ratings.shape
            
            ## SELECTING LOSS HERE
            # loss = model.loss(pred_rating, rating)
            loss = model.loss(pred_ratings, ratings, rmse=rmse)

            # Backpropagate
            loss.backward()

            # Update the parameters
            optimizer.step()

            # MSE Loss w/o regularization (just for status updates)
            total_train_loss += F.mse_loss(pred_ratings, ratings, reduction='sum')

        # Computing validation loss for display
        total_valid_loss = eval_MSE_loss(valid_X, model)
        total_valid_RMSE_loss = eval_RMSE_loss(valid_X, model)
        total_valid_MAE_loss = eval_MAE_loss(valid_X, model)
        
        print(f"Epoch {epoch} MSE Loss: {round(total_train_loss.item() / (batch_size * (num_examples//batch_size)), 3)}, valid MSE Loss: {total_valid_loss}, valid RMSE Loss: {total_valid_RMSE_loss}, valid MAE Loss: {total_valid_MAE_loss}")

In [18]:
def eval_model(test_X, model):
    total_test_loss = eval_MSE_loss(test_X, model)
    total_test_RMSE_loss = eval_RMSE_loss(test_X, model)
    total_test_MAE_loss = eval_MAE_loss(test_X, model)
    print(f"test MSE Loss: {total_test_loss}, test RMSE Loss: {total_test_RMSE_loss}, test MAE Loss: {total_test_MAE_loss}")

## Config Cuda

In [19]:
!nvidia-smi

Tue May  3 04:59:53 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.82.01    Driver Version: 470.82.01    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  On   | 00000000:06:00.0 Off |                    0 |
| N/A   66C    P0   294W / 300W |  32444MiB / 32510MiB |    100%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla V100-SXM2...  On   | 00000000:07:00.0 Off |                    0 |
| N/A   48C    P0    87W / 300W |  23318MiB / 32510MiB |     10%      Default |
|       

In [20]:
## Forcing GPU
assert torch.cuda.is_available()
torch.cuda.set_device("cuda:5")
device = torch.device("cuda")
a = torch.tensor([[1., 2.], [3., 4.]]).to(device)
a

tensor([[1., 2.],
        [3., 4.]], device='cuda:5')

## Training GMF and MLP Independently
As per the NCF paper, we must train GMF and MLP separately. Then, we initialize NeuMF using the pretrained models of GMF and MLP

### Training GMF

In [21]:
# Params
embedding_dim=200
lr=.01
regularization_constant=.01

# Test
gmf = GMF(num_users=X.shape[0], num_items=X.shape[1], embedding_dim=embedding_dim, regularization_constant=regularization_constant)
optimizer = torch.optim.Adam(gmf.parameters(), lr=lr)
gmf.to(device)
gmf.predict_single_interaction(0,0)

tensor([8.9524], device='cuda:5', grad_fn=<DiagonalBackward0>)

In [22]:
train_v2(train_X, valid_X, gmf, optimizer, n_epochs=25, batch_size=64, rmse=True)

100%|████████████████████████████████| 508/508 [00:01<00:00, 355.80it/s]


Epoch 1 MSE Loss: 15.632, valid MSE Loss: 8.869, valid RMSE Loss: 2.978, valid MAE Loss: 2.45


100%|████████████████████████████████| 508/508 [00:01<00:00, 354.33it/s]


Epoch 2 MSE Loss: 4.077, valid MSE Loss: 4.958, valid RMSE Loss: 2.227, valid MAE Loss: 1.841


100%|████████████████████████████████| 508/508 [00:01<00:00, 285.26it/s]


Epoch 3 MSE Loss: 2.379, valid MSE Loss: 4.184, valid RMSE Loss: 2.045, valid MAE Loss: 1.679


100%|████████████████████████████████| 508/508 [00:01<00:00, 445.80it/s]


Epoch 4 MSE Loss: 2.195, valid MSE Loss: 4.014, valid RMSE Loss: 2.004, valid MAE Loss: 1.624


100%|████████████████████████████████| 508/508 [00:01<00:00, 396.18it/s]


Epoch 5 MSE Loss: 2.307, valid MSE Loss: 4.027, valid RMSE Loss: 2.007, valid MAE Loss: 1.596


100%|████████████████████████████████| 508/508 [00:01<00:00, 398.67it/s]


Epoch 6 MSE Loss: 2.458, valid MSE Loss: 4.032, valid RMSE Loss: 2.008, valid MAE Loss: 1.607


100%|████████████████████████████████| 508/508 [00:01<00:00, 307.35it/s]


Epoch 7 MSE Loss: 2.537, valid MSE Loss: 4.17, valid RMSE Loss: 2.042, valid MAE Loss: 1.629


100%|████████████████████████████████| 508/508 [00:01<00:00, 395.69it/s]


Epoch 8 MSE Loss: 2.541, valid MSE Loss: 3.995, valid RMSE Loss: 1.999, valid MAE Loss: 1.598


100%|████████████████████████████████| 508/508 [00:01<00:00, 348.58it/s]


Epoch 9 MSE Loss: 2.467, valid MSE Loss: 4.03, valid RMSE Loss: 2.008, valid MAE Loss: 1.612


100%|████████████████████████████████| 508/508 [00:01<00:00, 374.65it/s]


Epoch 10 MSE Loss: 2.363, valid MSE Loss: 3.861, valid RMSE Loss: 1.965, valid MAE Loss: 1.57


100%|████████████████████████████████| 508/508 [00:01<00:00, 324.82it/s]


Epoch 11 MSE Loss: 2.255, valid MSE Loss: 3.929, valid RMSE Loss: 1.982, valid MAE Loss: 1.599


100%|████████████████████████████████| 508/508 [00:01<00:00, 348.41it/s]


Epoch 12 MSE Loss: 2.15, valid MSE Loss: 3.749, valid RMSE Loss: 1.936, valid MAE Loss: 1.551


100%|████████████████████████████████| 508/508 [00:01<00:00, 304.72it/s]


Epoch 13 MSE Loss: 2.047, valid MSE Loss: 3.809, valid RMSE Loss: 1.952, valid MAE Loss: 1.561


100%|████████████████████████████████| 508/508 [00:01<00:00, 325.73it/s]


Epoch 14 MSE Loss: 1.963, valid MSE Loss: 3.739, valid RMSE Loss: 1.934, valid MAE Loss: 1.557


100%|████████████████████████████████| 508/508 [00:01<00:00, 317.56it/s]


Epoch 15 MSE Loss: 1.923, valid MSE Loss: 3.679, valid RMSE Loss: 1.918, valid MAE Loss: 1.53


100%|████████████████████████████████| 508/508 [00:01<00:00, 333.72it/s]


Epoch 16 MSE Loss: 1.845, valid MSE Loss: 3.682, valid RMSE Loss: 1.919, valid MAE Loss: 1.534


100%|████████████████████████████████| 508/508 [00:01<00:00, 377.83it/s]


Epoch 17 MSE Loss: 1.793, valid MSE Loss: 3.688, valid RMSE Loss: 1.92, valid MAE Loss: 1.525


100%|████████████████████████████████| 508/508 [00:01<00:00, 335.24it/s]


Epoch 18 MSE Loss: 1.733, valid MSE Loss: 3.737, valid RMSE Loss: 1.933, valid MAE Loss: 1.547


100%|████████████████████████████████| 508/508 [00:01<00:00, 300.84it/s]


Epoch 19 MSE Loss: 1.708, valid MSE Loss: 3.669, valid RMSE Loss: 1.915, valid MAE Loss: 1.518


100%|████████████████████████████████| 508/508 [00:01<00:00, 316.76it/s]


Epoch 20 MSE Loss: 1.681, valid MSE Loss: 3.647, valid RMSE Loss: 1.91, valid MAE Loss: 1.521


100%|████████████████████████████████| 508/508 [00:01<00:00, 372.58it/s]


Epoch 21 MSE Loss: 1.642, valid MSE Loss: 3.65, valid RMSE Loss: 1.91, valid MAE Loss: 1.517


100%|████████████████████████████████| 508/508 [00:01<00:00, 352.64it/s]


Epoch 22 MSE Loss: 1.625, valid MSE Loss: 3.635, valid RMSE Loss: 1.907, valid MAE Loss: 1.512


100%|████████████████████████████████| 508/508 [00:01<00:00, 349.58it/s]


Epoch 23 MSE Loss: 1.623, valid MSE Loss: 3.661, valid RMSE Loss: 1.913, valid MAE Loss: 1.516


100%|████████████████████████████████| 508/508 [00:01<00:00, 450.97it/s]


Epoch 24 MSE Loss: 1.614, valid MSE Loss: 3.625, valid RMSE Loss: 1.904, valid MAE Loss: 1.513


100%|████████████████████████████████| 508/508 [00:01<00:00, 366.07it/s]


Epoch 25 MSE Loss: 1.581, valid MSE Loss: 3.702, valid RMSE Loss: 1.924, valid MAE Loss: 1.532


In [23]:
eval_model(test_X, gmf)

test MSE Loss: 3.83, test RMSE Loss: 1.957, test MAE Loss: 1.562


## Training MLP

In [24]:
embedding_dim=200
lr=1e-2
regularization_constant=.011

mlp = MLP(num_users=X.shape[0], num_items=X.shape[1], 
                    embedding_dim=embedding_dim, 
                    regularization_constant=regularization_constant,
                )
optimizer = torch.optim.Adam(mlp.parameters(), lr=lr)
mlp.to(device)
mlp.predict_single_interaction(0,0)

tensor(5.2639, device='cuda:5', grad_fn=<AddBackward0>)

In [25]:
train_v2(train_X, valid_X, mlp, optimizer, n_epochs=25, batch_size=64, rmse=True)

100%|████████████████████████████████| 508/508 [00:02<00:00, 225.16it/s]


Epoch 1 MSE Loss: 3.856, valid MSE Loss: 3.522, valid RMSE Loss: 1.877, valid MAE Loss: 1.445


100%|████████████████████████████████| 508/508 [00:02<00:00, 195.33it/s]


Epoch 2 MSE Loss: 3.239, valid MSE Loss: 3.286, valid RMSE Loss: 1.813, valid MAE Loss: 1.364


100%|████████████████████████████████| 508/508 [00:02<00:00, 204.30it/s]


Epoch 3 MSE Loss: 2.951, valid MSE Loss: 3.107, valid RMSE Loss: 1.763, valid MAE Loss: 1.342


100%|████████████████████████████████| 508/508 [00:02<00:00, 225.59it/s]


Epoch 4 MSE Loss: 2.808, valid MSE Loss: 3.024, valid RMSE Loss: 1.739, valid MAE Loss: 1.316


100%|████████████████████████████████| 508/508 [00:02<00:00, 207.80it/s]


Epoch 5 MSE Loss: 2.691, valid MSE Loss: 3.058, valid RMSE Loss: 1.749, valid MAE Loss: 1.363


100%|████████████████████████████████| 508/508 [00:02<00:00, 194.56it/s]


Epoch 6 MSE Loss: 2.651, valid MSE Loss: 2.938, valid RMSE Loss: 1.714, valid MAE Loss: 1.291


100%|████████████████████████████████| 508/508 [00:02<00:00, 192.17it/s]


Epoch 7 MSE Loss: 2.591, valid MSE Loss: 2.947, valid RMSE Loss: 1.717, valid MAE Loss: 1.29


100%|████████████████████████████████| 508/508 [00:02<00:00, 187.39it/s]


Epoch 8 MSE Loss: 2.549, valid MSE Loss: 2.996, valid RMSE Loss: 1.731, valid MAE Loss: 1.285


100%|████████████████████████████████| 508/508 [00:02<00:00, 217.60it/s]


Epoch 9 MSE Loss: 2.572, valid MSE Loss: 3.016, valid RMSE Loss: 1.737, valid MAE Loss: 1.295


100%|████████████████████████████████| 508/508 [00:02<00:00, 197.69it/s]


Epoch 10 MSE Loss: 2.514, valid MSE Loss: 2.901, valid RMSE Loss: 1.703, valid MAE Loss: 1.287


100%|████████████████████████████████| 508/508 [00:02<00:00, 193.78it/s]


Epoch 11 MSE Loss: 2.476, valid MSE Loss: 3.009, valid RMSE Loss: 1.735, valid MAE Loss: 1.306


100%|████████████████████████████████| 508/508 [00:02<00:00, 232.97it/s]


Epoch 12 MSE Loss: 2.488, valid MSE Loss: 2.923, valid RMSE Loss: 1.71, valid MAE Loss: 1.271


100%|████████████████████████████████| 508/508 [00:02<00:00, 231.62it/s]


Epoch 13 MSE Loss: 2.461, valid MSE Loss: 2.89, valid RMSE Loss: 1.7, valid MAE Loss: 1.284


100%|████████████████████████████████| 508/508 [00:02<00:00, 218.80it/s]


Epoch 14 MSE Loss: 2.437, valid MSE Loss: 2.919, valid RMSE Loss: 1.708, valid MAE Loss: 1.27


100%|████████████████████████████████| 508/508 [00:02<00:00, 197.00it/s]


Epoch 15 MSE Loss: 2.429, valid MSE Loss: 3.0, valid RMSE Loss: 1.732, valid MAE Loss: 1.282


100%|████████████████████████████████| 508/508 [00:02<00:00, 189.48it/s]


Epoch 16 MSE Loss: 2.406, valid MSE Loss: 3.015, valid RMSE Loss: 1.736, valid MAE Loss: 1.301


100%|████████████████████████████████| 508/508 [00:02<00:00, 196.73it/s]


Epoch 17 MSE Loss: 2.393, valid MSE Loss: 2.921, valid RMSE Loss: 1.709, valid MAE Loss: 1.285


100%|████████████████████████████████| 508/508 [00:02<00:00, 206.05it/s]


Epoch 18 MSE Loss: 2.368, valid MSE Loss: 3.016, valid RMSE Loss: 1.737, valid MAE Loss: 1.28


100%|████████████████████████████████| 508/508 [00:02<00:00, 192.12it/s]


Epoch 19 MSE Loss: 2.365, valid MSE Loss: 2.931, valid RMSE Loss: 1.712, valid MAE Loss: 1.275


100%|████████████████████████████████| 508/508 [00:02<00:00, 216.39it/s]


Epoch 20 MSE Loss: 2.344, valid MSE Loss: 2.985, valid RMSE Loss: 1.728, valid MAE Loss: 1.306


100%|████████████████████████████████| 508/508 [00:02<00:00, 197.26it/s]


Epoch 21 MSE Loss: 2.325, valid MSE Loss: 2.962, valid RMSE Loss: 1.721, valid MAE Loss: 1.285


100%|████████████████████████████████| 508/508 [00:02<00:00, 210.23it/s]


Epoch 22 MSE Loss: 2.315, valid MSE Loss: 3.02, valid RMSE Loss: 1.738, valid MAE Loss: 1.31


100%|████████████████████████████████| 508/508 [00:02<00:00, 200.40it/s]


Epoch 23 MSE Loss: 2.314, valid MSE Loss: 3.08, valid RMSE Loss: 1.755, valid MAE Loss: 1.298


100%|████████████████████████████████| 508/508 [00:02<00:00, 190.72it/s]


Epoch 24 MSE Loss: 2.272, valid MSE Loss: 3.127, valid RMSE Loss: 1.768, valid MAE Loss: 1.332


100%|████████████████████████████████| 508/508 [00:01<00:00, 256.55it/s]


Epoch 25 MSE Loss: 2.272, valid MSE Loss: 2.98, valid RMSE Loss: 1.726, valid MAE Loss: 1.287


In [26]:
eval_model(test_X, mlp)

test MSE Loss: 3.196, test RMSE Loss: 1.788, test MAE Loss: 1.335


## Training the combined model

### Hyper Params

In [27]:
MF_embedding_dim=200
MLP_embedding_dim=200
lr=1e-2
regularization_constant=1e-2

neumf = NeuMF(num_users=X.shape[0], num_items=X.shape[1], 
                    MF_embedding_dim=MF_embedding_dim, 
                    MLP_embedding_dim=MLP_embedding_dim,
                    regularization_constant=regularization_constant,
                )

optimizer = torch.optim.SGD(mlp.parameters(), lr=lr)
neumf.to(device)
neumf.predict_single_interaction(0,0)

tensor(1.0486, device='cuda:5', grad_fn=<AddBackward0>)

### Assign the pretrained weights

In [28]:
def init_pretrained(NeuMF_model, GMF_model, MLP_model):
    # Latent weights
    NeuMF_model.MF_user_factors.weight = GMF_model.user_factors.weight
    NeuMF_model.MF_item_factors.weight = GMF_model.item_factors.weight
    NeuMF_model.MLP_user_factors.weight = MLP_model.user_factors.weight
    NeuMF_model.MLP_item_factors.weight = MLP_model.item_factors.weight
    
    # MLP weights
    NeuMF_model.fc1.weight = MLP_model.fc1.weight
    NeuMF_model.fc2.weight = MLP_model.fc2.weight
    NeuMF_model.fc3.weight = MLP_model.fc3.weight
    
init_pretrained(neumf, gmf, mlp)

### Train!

In [31]:
train_v2(train_X, valid_X, neumf, optimizer, n_epochs=25, batch_size=64, rmse=True)

100%|████████████████████████████████| 508/508 [00:02<00:00, 184.47it/s]


Epoch 1 MSE Loss: 0.997, valid MSE Loss: 3.125, valid RMSE Loss: 1.768, valid MAE Loss: 1.331


100%|████████████████████████████████| 508/508 [00:02<00:00, 182.54it/s]


Epoch 2 MSE Loss: 0.989, valid MSE Loss: 3.137, valid RMSE Loss: 1.771, valid MAE Loss: 1.334


100%|████████████████████████████████| 508/508 [00:02<00:00, 189.21it/s]


Epoch 3 MSE Loss: 0.985, valid MSE Loss: 3.138, valid RMSE Loss: 1.772, valid MAE Loss: 1.332


100%|████████████████████████████████| 508/508 [00:02<00:00, 182.80it/s]


Epoch 4 MSE Loss: 0.979, valid MSE Loss: 3.14, valid RMSE Loss: 1.772, valid MAE Loss: 1.337


100%|████████████████████████████████| 508/508 [00:02<00:00, 191.04it/s]


Epoch 5 MSE Loss: 0.973, valid MSE Loss: 3.117, valid RMSE Loss: 1.766, valid MAE Loss: 1.328


100%|████████████████████████████████| 508/508 [00:02<00:00, 185.17it/s]


Epoch 6 MSE Loss: 0.967, valid MSE Loss: 3.126, valid RMSE Loss: 1.768, valid MAE Loss: 1.332


100%|████████████████████████████████| 508/508 [00:02<00:00, 181.89it/s]


Epoch 7 MSE Loss: 0.962, valid MSE Loss: 3.119, valid RMSE Loss: 1.766, valid MAE Loss: 1.33


100%|████████████████████████████████| 508/508 [00:02<00:00, 177.68it/s]


Epoch 8 MSE Loss: 0.956, valid MSE Loss: 3.128, valid RMSE Loss: 1.769, valid MAE Loss: 1.335


100%|████████████████████████████████| 508/508 [00:02<00:00, 204.88it/s]


Epoch 9 MSE Loss: 0.951, valid MSE Loss: 3.126, valid RMSE Loss: 1.768, valid MAE Loss: 1.331


100%|████████████████████████████████| 508/508 [00:02<00:00, 206.20it/s]


Epoch 10 MSE Loss: 0.946, valid MSE Loss: 3.097, valid RMSE Loss: 1.76, valid MAE Loss: 1.322


100%|████████████████████████████████| 508/508 [00:02<00:00, 208.30it/s]


Epoch 11 MSE Loss: 0.941, valid MSE Loss: 3.12, valid RMSE Loss: 1.766, valid MAE Loss: 1.326


100%|████████████████████████████████| 508/508 [00:02<00:00, 206.62it/s]


Epoch 12 MSE Loss: 0.936, valid MSE Loss: 3.102, valid RMSE Loss: 1.761, valid MAE Loss: 1.321


100%|████████████████████████████████| 508/508 [00:02<00:00, 191.13it/s]


Epoch 13 MSE Loss: 0.931, valid MSE Loss: 3.11, valid RMSE Loss: 1.764, valid MAE Loss: 1.323


100%|████████████████████████████████| 508/508 [00:02<00:00, 187.01it/s]


Epoch 14 MSE Loss: 0.926, valid MSE Loss: 3.108, valid RMSE Loss: 1.763, valid MAE Loss: 1.326


100%|████████████████████████████████| 508/508 [00:02<00:00, 198.49it/s]


Epoch 15 MSE Loss: 0.921, valid MSE Loss: 3.137, valid RMSE Loss: 1.771, valid MAE Loss: 1.33


100%|████████████████████████████████| 508/508 [00:02<00:00, 187.76it/s]


Epoch 16 MSE Loss: 0.917, valid MSE Loss: 3.106, valid RMSE Loss: 1.762, valid MAE Loss: 1.321


100%|████████████████████████████████| 508/508 [00:03<00:00, 168.32it/s]


Epoch 17 MSE Loss: 0.912, valid MSE Loss: 3.118, valid RMSE Loss: 1.766, valid MAE Loss: 1.326


100%|████████████████████████████████| 508/508 [00:02<00:00, 183.26it/s]


Epoch 18 MSE Loss: 0.908, valid MSE Loss: 3.127, valid RMSE Loss: 1.768, valid MAE Loss: 1.33


100%|████████████████████████████████| 508/508 [00:02<00:00, 181.09it/s]


Epoch 19 MSE Loss: 0.902, valid MSE Loss: 3.128, valid RMSE Loss: 1.769, valid MAE Loss: 1.325


100%|████████████████████████████████| 508/508 [00:02<00:00, 191.66it/s]


Epoch 20 MSE Loss: 0.898, valid MSE Loss: 3.106, valid RMSE Loss: 1.762, valid MAE Loss: 1.323


100%|████████████████████████████████| 508/508 [00:02<00:00, 199.91it/s]


Epoch 21 MSE Loss: 0.893, valid MSE Loss: 3.109, valid RMSE Loss: 1.763, valid MAE Loss: 1.324


100%|████████████████████████████████| 508/508 [00:02<00:00, 205.94it/s]


Epoch 22 MSE Loss: 0.888, valid MSE Loss: 3.134, valid RMSE Loss: 1.77, valid MAE Loss: 1.327


100%|████████████████████████████████| 508/508 [00:02<00:00, 215.59it/s]


Epoch 23 MSE Loss: 0.884, valid MSE Loss: 3.11, valid RMSE Loss: 1.764, valid MAE Loss: 1.323


100%|████████████████████████████████| 508/508 [00:02<00:00, 203.18it/s]


Epoch 24 MSE Loss: 0.879, valid MSE Loss: 3.112, valid RMSE Loss: 1.764, valid MAE Loss: 1.322


100%|████████████████████████████████| 508/508 [00:02<00:00, 181.09it/s]


Epoch 25 MSE Loss: 0.874, valid MSE Loss: 3.112, valid RMSE Loss: 1.764, valid MAE Loss: 1.321


## Evaluation

In [32]:
total_test_loss = eval_MSE_loss(test_X, neumf)
total_test_RMSE_loss = eval_RMSE_loss(test_X, neumf)
total_test_MAE_loss = eval_MAE_loss(test_X, neumf)
print(f"test MSE Loss: {total_test_loss}, test RMSE Loss: {total_test_RMSE_loss}, test MAE Loss: {total_test_MAE_loss}")

test MSE Loss: 3.323, test RMSE Loss: 1.823, test MAE Loss: 1.376
