In [None]:
# Importing initial packages
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
import seaborn as sns
sns.set_style('whitegrid')
sns.set_palette("deep")
import matplotlib.pyplot as plt

# Setting the device to GPU for parallelization
device = 'mps' if torch.backends.mps.is_available() else 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

In [None]:
# Setting Hyperparameters
batch_size = 64
epochs = 15

In [None]:
# In my project folder I added a data folder with both files (found in repo README) but did not push (gitignore) due to size
movies = pd.read_csv('data/movies.csv')

ratings = pd.read_csv('data/ratings.csv')

In [None]:
# Creating encoders and decoders for our movie ids
movie_ids = list(movies.movieId)
moviesid_to_title = dict(zip(movies.movieId,movies.title))    # Decoder
movietitle_to_id = {j:i for i,j in moviesid_to_title.items()} # Encoder

movies.head()

## Data Preprocessing

In [None]:
# Assume NAs are not watched by user, so rating of 0
user_rating_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)
user_rating_matrix_np = user_rating_matrix.values
train_data, test_data = train_test_split(user_rating_matrix_np, test_size=0.2, random_state=42)
train_data = torch.FloatTensor(train_data)
test_data = torch.FloatTensor(test_data)

In [None]:
num_users, num_items = user_rating_matrix_np.shape

In [None]:
class RatingsDataset(Dataset):
    def __init__(self, data):
        self.data = data.nonzero(as_tuple=True) # Get the indices of non-zero elements
        self.ratings = data[self.data]          # Using non-zero indiced to extract those ratings

    def __len__ (self):
        return len(self.data[0])
    
    def __getitem__ (self, idx):
        user = self.data[0][idx]
        item = self.data[1][idx]
        rating = self.ratings[idx]
        
        return user, item, rating
    
train_dataset = RatingsDataset(train_data)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle = True)

test_dataset = RatingsDataset(test_data)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle = False)

In [None]:
criterion = nn.MSELoss()

## Models

### Model 1

- Embed_Dim = 64
- 4 Linear Layers (128>128>64>16>1)
- 2 Dropouts (0.5, 0.4)
- 3 Batch Norms

In [None]:
class Model1(nn.Module):
    def __init__(self, num_users, num_items, n_embd = 64):
        super(Model1, self).__init__()
        self.user_embedding = nn.Embedding(num_users, n_embd) # Creating an embedding for our users with the correct dimensions
        self.item_embedding = nn.Embedding(num_items, n_embd) # Embedding items
        
        self.fc1 = nn.Linear(n_embd * 2, 128)                 # First fully connected (fc) layer
        self.fc2 = nn.Linear(128, 64)                        # Second fc layer
        self.fc3 = nn.Linear(64, 16)                         # Finaly fc layer, with an output of one value
        self.fc4 = nn.Linear(64, 1)                           # Finaly fc layer, with an output of one value
       
        self.sigmoid = nn.Sigmoid()                           # Sigmoid activation function to compress the output to a value between (0,1)
        
        self.dropout1 = nn.Dropout(0.5)                       # Dropout layer to prevent overfitting
        self.dropout2 = nn.Dropout(0.4)                       # Dropout layer to prevent overfitting
        
        self.bn1 = nn.BatchNorm1d(128)
        self.bn2 = nn.BatchNorm1d(64)
        self.bn3 = nn.BatchNorm1d(16)


    def forward(self, user, item):
        user_embed = self.user_embedding(user)          # Embedding our users
        item_embed = self.item_embedding(item)          # Embedding the movies
        
        x = torch.cat([user_embed, item_embed], dim=-1) # Concatenating the users and items 
        x = torch.relu(self.fc1(x))                     # Applying first fc layer, with a ReLU activation function
        x = self.bn1(x)
        x = self.dropout1(x)
        x = torch.relu(self.fc2(x))
        x = self.bn2(x)
        x = self.dropout2(x)
        x = torch.relu(self.fc3(x))
        x = self.bn3(x)
        x = self.fc4(x)                                 # Final layer to get predicted rating
        x = self.sigmoid(x)                             # Applying the sigmoid function
        x = x*5                                         # Scale the output from [0,1] to [0,5]   
        return x

def initialise_model1(num_users= num_users, num_items=num_items, lr=0.03, wd=2.5e-5):
    global model
    model = Model1(num_users, num_items)
    model = model.to(device)
    global optimizer
    optimizer = optim.Adam(model.parameters(), lr = lr, weight_decay=wd)

initialise_model1()

## Model 2

- Embed_Dim = 32
- 4 Linear Layers (64>128>64>16>1)
- 2 Dropouts (0.5, 0.4)
- 3 Batch Norms

In [None]:
class Model2(nn.Module):
    def __init__(self, num_users, num_items, n_embd = 64):
        super(Model1, self).__init__()
        self.user_embedding = nn.Embedding(num_users, n_embd) # Creating an embedding for our users with the correct dimensions
        self.item_embedding = nn.Embedding(num_items, n_embd) # Embedding items
        
        self.fc1 = nn.Linear(n_embd * 2, 128)                 # First fully connected (fc) layer
        self.fc2 = nn.Linear(128, 64)                         # Second fc layer
        self.fc3 = nn.Linear(64, 16)                          # Finaly fc layer, with an output of one value
        self.fc4 = nn.Linear(64, 1)                           # Finaly fc layer, with an output of one value
       
        self.sigmoid = nn.Sigmoid()                           # Sigmoid activation function to compress the output to a value between (0,1)
        
        self.dropout1 = nn.Dropout(0.5)                       # Dropout layer to prevent overfitting
        self.dropout2 = nn.Dropout(0.4)                       # Dropout layer to prevent overfitting
        
        self.bn1 = nn.BatchNorm1d(128)
        self.bn2 = nn.BatchNorm1d(64)
        self.bn3 = nn.BatchNorm1d(16)


    def forward(self, user, item):
        user_embed = self.user_embedding(user)          # Embedding our users
        item_embed = self.item_embedding(item)          # Embedding the movies
        
        x = torch.cat([user_embed, item_embed], dim=-1) # Concatenating the users and items 
        x = torch.relu(self.fc1(x))                     # Applying first fc layer, with a ReLU activation function
        x = self.bn1(x)
        x = self.dropout1(x)
        x = torch.relu(self.fc2(x))
        x = self.bn2(x)
        x = self.dropout2(x)
        x = torch.relu(self.fc3(x))
        x = self.bn3(x)
        x = self.fc4(x)                                 # Final layer to get predicted rating
        x = self.sigmoid(x)                             # Applying the sigmoid function
        x = x*5                                         # Scale the output from [0,1] to [0,5]   
        return x

def initialise_model2(num_users = num_users, num_items=num_items, lr=0.03, wd=2.5e-5):
    global model
    model = Model2(num_users, num_items)
    model = model.to(device)
    global optimizer
    optimizer = optim.Adam(model.parameters(), lr = lr, weight_decay=wd)

initialise_model2()