In [122]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

# pick device
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print("Using device:", device)

Using device: mps


In [123]:
movieData = pd.read_csv("data/ratings.csv")
movieData.drop("timestamp", axis = 1, inplace=True)

In [124]:
# Scaling ratings

ratings, min_rating, max_rating = movieData["rating"], movieData["rating"].min(), movieData["rating"].max()
movieData["rating"] = (ratings - min_rating)/ (max_rating - min_rating)


In [125]:
# Reindexing user Ids and Movie Ids

unqusers = movieData["userId"].unique()
unqmovies = movieData["movieId"].unique()

nUsers = movieData["userId"].nunique()
nMovies = movieData["movieId"].nunique()


userIndex = {old : new for new, old in enumerate(unqusers)}
movieIndex = {old : new for new, old in enumerate(unqmovies)}
reverse_userIndex = {v : k for k, v in userIndex.items()}
reverse_movieIndex = {v : k for k, v in movieIndex.items()}

movieData["userId"] = movieData["userId"].map(userIndex)
movieData["movieId"] = movieData["movieId"].map(movieIndex)

In [126]:
movieData.head(5)

Unnamed: 0,userId,movieId,rating
0,0,0,0.777778
1,0,1,0.777778
2,0,2,0.777778
3,0,3,1.0
4,0,4,1.0


In [127]:
# Preparing the data

users = torch.LongTensor(movieData["userId"].values)
items = torch.LongTensor(movieData["movieId"].values)
ratings = torch.FloatTensor(movieData["rating"].values)

dataset = TensorDataset(users, items, ratings)
loader = DataLoader(dataset, batch_size=512, shuffle=True)

In [None]:
# Matrix Factorization Model

class MFModel(nn.Module):
    "Matrix Factorization Model"
    
    def __init__(self, num_users: int, num_items: int, embedding_dim: int):
        "Initializing the parameters"
        super().__init__()
        
        self.user_embeddings = nn.Embedding(num_embeddings=num_users, embedding_dim=embedding_dim)
        self.item_embeddings = nn.Embedding(num_embeddings=num_items, embedding_dim=embedding_dim)
        
        nn.init.normal_(self.user_embeddings.weight, std=0.01)
        nn.init.normal_(self.item_embeddings.weight, std=0.01)
        

    def forward(self, users: torch.tensor, items: torch.tensor):
        "Performs the forward pass"
        
        user_embeddings = self.user_embeddings(users)
        item_embeddings = self.item_embeddings(items)
        
        out = (user_embeddings * item_embeddings).sum(dim = 1)
        
        return out
        

In [129]:
# Setting up the model

model = MFModel(nUsers, nMovies, embedding_dim=20)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

In [130]:
epochs = 10
for epoch in range(epochs):
    model.train()
    total_loss = 0
    
    for batch_users, batch_items, batch_ratings in loader:
        optimizer.zero_grad()
        
        preds = model(batch_users, batch_items)
        loss = criterion(preds, batch_ratings)
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(loader):.4f}")


Epoch 1/10, Loss: 0.4497
Epoch 2/10, Loss: 0.1641
Epoch 3/10, Loss: 0.0911
Epoch 4/10, Loss: 0.0744
Epoch 5/10, Loss: 0.0674
Epoch 6/10, Loss: 0.0638
Epoch 7/10, Loss: 0.0616
Epoch 8/10, Loss: 0.0604
Epoch 9/10, Loss: 0.0595
Epoch 10/10, Loss: 0.0588


In [131]:
import math

def evaluate_rmse(model, data_loader, criterion=nn.MSELoss()):
    model.eval()
    total_loss = 0.0
    count = 0
    
    with torch.no_grad():
        for users, items, ratings in data_loader:
            preds = model(users, items)
            # make sure preds and ratings have same shape
            if preds.dim() > 1:
                preds = preds.view(-1)
            loss = criterion(preds, ratings)
            total_loss += loss.item() * len(ratings)  # accumulate weighted by batch size
            count += len(ratings)
    
    mse = total_loss / count
    rmse = math.sqrt(mse)
    return rmse

In [132]:
mf_rmse = evaluate_rmse(model, loader)
print("MF RMSE:", mf_rmse)

MF RMSE: 0.23313689092011203
