In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

In [2]:
df = pd.read_csv("data/ratings.dat", sep="::", engine='python', names=["user", "item", "rating", "timestamp"])
df.head()

Unnamed: 0,user,item,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


 ### Cleaning & Encoding the Data

In [None]:
# Droping timestamp
df.drop("timestamp", axis=1, inplace=True)

# Maping users and items to new indices
user_ids = df["user"].unique()
item_ids = df["item"].unique()

user2idx = {user: idx for idx, user in enumerate(user_ids)}
item2idx = {item: idx for idx, item in enumerate(item_ids)}

df["user"] = df["user"].map(user2idx)
df["item"] = df["item"].map(item2idx)

n_users = len(user2idx)
n_items = len(item2idx)

df.head()

Unnamed: 0,user,item,rating
0,0,0,5
1,0,1,3
2,0,2,3
3,0,3,4
4,0,4,5


### Dataset Class

In [5]:
class RatingsDataset(torch.utils.data.Dataset):
    def __init__(self, df):
        self.users = df["user"].values
        self.items = df["item"].values
        self.ratings = df["rating"].values.astype(np.float32)

    def __len__(self):
        return len(self.ratings)

    def __getitem__(self, idx):
        return (
            torch.tensor(self.users[idx]),
            torch.tensor(self.items[idx]),
            torch.tensor(self.ratings[idx])
        )

### Train-Test Split and DataLoaders

In [None]:
# Spliting the dataset
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Creating datasets
train_dataset = RatingsDataset(train_df)
test_dataset = RatingsDataset(test_df)

# Creating data loaders
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=256)

### MLP Model

In [13]:
class MLPRecommender(nn.Module):
    def __init__(self, n_users, n_items, embedding_dim=64):
        super(MLPRecommender, self).__init__()
        self.user_embedding = nn.Embedding(n_users, embedding_dim)
        self.item_embedding = nn.Embedding(n_items, embedding_dim)

        self.layers = nn.Sequential(
        nn.Linear(embedding_dim * 2, 128),
        nn.ReLU(),
        nn.Dropout(0.3),  # new
        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Dropout(0.3),  # new
        nn.Linear(64, 1)
        )

    def forward(self, user, item):
        user_emb = self.user_embedding(user)
        item_emb = self.item_embedding(item)
        x = torch.cat([user_emb, item_emb], dim=1)
        return self.layers(x).squeeze()

### Initializing Model, Loss Function, and Optimizer

In [15]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Instantiating the model
model = MLPRecommender(n_users, n_items, embedding_dim=128).to(device)

# Loss and optimizer
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)

### Training Loop

In [16]:
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for user, item, rating in train_loader:
        user, item, rating = user.to(device), item.to(device), rating.to(device)

        # Forward pass
        preds = model(user, item)
        loss = loss_fn(preds, rating)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {total_loss / len(train_loader):.4f}")

Epoch 1/10, Training Loss: 1.2363
Epoch 2/10, Training Loss: 0.9355
Epoch 3/10, Training Loss: 0.8676
Epoch 4/10, Training Loss: 0.8357
Epoch 5/10, Training Loss: 0.8157
Epoch 6/10, Training Loss: 0.8005
Epoch 7/10, Training Loss: 0.7871
Epoch 8/10, Training Loss: 0.7773
Epoch 9/10, Training Loss: 0.7674
Epoch 10/10, Training Loss: 0.7580


### Evaluating the Model using RMSE

In [17]:
def evaluate_rmse(model, dataloader):
    model.eval()
    mse_loss = 0
    with torch.no_grad():
        for user, item, rating in dataloader:
            user, item, rating = user.to(device), item.to(device), rating.to(device)
            preds = model(user, item)
            mse_loss += nn.functional.mse_loss(preds, rating, reduction='sum').item()
    
    rmse = np.sqrt(mse_loss / len(dataloader.dataset))
    return rmse

rmse = evaluate_rmse(model, test_loader)
print(f"Test RMSE: {rmse:.4f}")

Test RMSE: 0.8859


 ### Evaluating the Model using MAE 

In [18]:
def evaluate_mae(model, dataloader):
    model.eval()
    abs_error = 0
    with torch.no_grad():
        for user, item, rating in dataloader:
            user, item, rating = user.to(device), item.to(device), rating.to(device)
            preds = model(user, item)
            abs_error += torch.sum(torch.abs(preds - rating)).item()
    
    mae = abs_error / len(dataloader.dataset)
    return mae


mae = evaluate_mae(model, test_loader)
print(f"Test MAE: {mae:.4f}")

Test MAE: 0.6970


## Recommendation System

### Loading Movie Metadata

In [None]:
movies = pd.read_csv(
    "data/movies.dat",
    sep="::",
    engine="python",
    names=["item", "title", "genres"],
    encoding="latin-1"
)

idx2item = {new_idx: orig_id for orig_id, new_idx in item2idx.items()}
idx2title = {
    new_idx: movies.loc[movies['item'] == orig_id, 'title'].values[0]
    for new_idx, orig_id in idx2item.items()
}

list(idx2title.items())[:5]

[(0, "One Flew Over the Cuckoo's Nest (1975)"),
 (1, 'James and the Giant Peach (1996)'),
 (2, 'My Fair Lady (1964)'),
 (3, 'Erin Brockovich (2000)'),
 (4, "Bug's Life, A (1998)")]

### Recommendation Function

In [None]:
def recommend_movies(model, user_id, df, n=10):
    model.eval()
    all_items = set(range(n_items))
    seen = set(df[df['user'] == user_id]['item'].values)
    unseen = list(all_items - seen)

    user_tensor = torch.tensor([user_id] * len(unseen)).to(device)
    item_tensor = torch.tensor(unseen).to(device)

    with torch.no_grad():
        scores = model(user_tensor, item_tensor)

    topk = torch.topk(scores, n).indices.cpu().numpy()
    rec_idxs = [unseen[i] for i in topk]
    return [idx2title[idx] for idx in rec_idxs]

# SET THE USER ID HERE
# Example: Recommend movies for user with ID 0
user_id = 0
top10 = recommend_movies(model, user_id, df, n=10)
for i, title in enumerate(top10, 1):
    print(f"{i}. {title}")

1. Shawshank Redemption, The (1994)
2. Raiders of the Lost Ark (1981)
3. Usual Suspects, The (1995)
4. Great Escape, The (1963)
5. It's a Wonderful Life (1946)
6. Sting, The (1973)
7. Rear Window (1954)
8. Star Wars: Episode V - The Empire Strikes Back (1980)
9. Good Will Hunting (1997)
10. Silence of the Lambs, The (1991)
