In [10]:
!wget http://files.grouplens.org/datasets/movielens/ml-100k.zip
!unzip ml-100k.zip

In [3]:
import pandas as pd

# Load the ratings file
ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'])

# Map user and item IDs to 0-based indices
ratings['user_id'] = ratings['user_id'].astype('category').cat.codes
ratings['item_id'] = ratings['item_id'].astype('category').cat.codes

# Normalize ratings to 0–1
ratings['rating'] = ratings['rating'] / 5.0

# Drop timestamp
ratings = ratings.drop(columns=['timestamp'])

# Split into train and test
from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(ratings, test_size=0.2, random_state=42)


In [7]:
import torch
from torch.utils.data import Dataset

class RatingsDataset(Dataset):
    def __init__(self, df):
        self.users = torch.tensor(df['user_id'].values, dtype=torch.long)
        self.items = torch.tensor(df['item_id'].values, dtype=torch.long)
        self.ratings = torch.tensor(df['rating'].values, dtype=torch.float32)

    def __len__(self):
        return len(self.ratings)

    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.ratings[idx]



import torch.nn as nn
## Step 4: Build Neural Collaborative Filtering Model
class NeuralCF(nn.Module):
    def __init__(self, num_users, num_items, embed_dim=64):
        super().__init__()
        self.user_embed = nn.Embedding(num_users, embed_dim)
        self.item_embed = nn.Embedding(num_items, embed_dim)
        self.user_bias = nn.Embedding(num_users, 1)
        self.item_bias = nn.Embedding(num_items, 1)

        self.fc_layers = nn.Sequential(
            nn.Linear(embed_dim * 2, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 1)
        )

    def forward(self, user, item):
        u = self.user_embed(user)
        i = self.item_embed(item)
        x = torch.cat([u, i], dim=1)
        out = self.fc_layers(x)
        bias = self.user_bias(user).squeeze() + self.item_bias(item).squeeze()
        return out.squeeze() + bias

from torch.utils.data import DataLoader
import torch.optim as optim
from tqdm import tqdm

def train_model(model, train_loader, epochs=10):
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)

    for epoch in range(1, epochs + 1):
        model.train()
        total_loss = 0
        loop = tqdm(train_loader, desc=f"Epoch {epoch}")
        for user, item, rating in loop:
            optimizer.zero_grad()
            pred = model(user, item)
            loss = loss_fn(pred, rating)

            # Embedding regularization
            reg = model.user_embed(user).norm(2) + model.item_embed(item).norm(2)
            loss += 1e-6 * reg  ## eg: .000001 * reg
            #  Adds a small penalty to the loss, scaled by 1e-6.
            #  This discourages the model from learning overly large embeddings, which helps prevent overfitting.
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            loop.set_postfix(loss=loss.item())
        avg_loss = total_loss / len(train_loader.dataset)
        print(f"Epoch {epoch} completed. Total Loss: {avg_loss:.4f}")


In [8]:
num_users = ratings['user_id'].nunique()
num_items = ratings['item_id'].nunique()

train_dataset = RanytingsDataset(train_df)
train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)

model = NeuralCF(num_users, num_items)
train_model(model, train_loader)



Epoch 1: 100%|██████████| 157/157 [00:04<00:00, 32.18it/s, loss=2.64]


Epoch 1 completed. Total Loss: 0.0044


Epoch 2: 100%|██████████| 157/157 [00:04<00:00, 32.21it/s, loss=2.12]


Epoch 2 completed. Total Loss: 0.0038


Epoch 3: 100%|██████████| 157/157 [00:05<00:00, 28.67it/s, loss=1.98]


Epoch 3 completed. Total Loss: 0.0035


Epoch 4: 100%|██████████| 157/157 [00:04<00:00, 33.85it/s, loss=1.45]


Epoch 4 completed. Total Loss: 0.0032


Epoch 5: 100%|██████████| 157/157 [00:05<00:00, 28.44it/s, loss=1.54]


Epoch 5 completed. Total Loss: 0.0029


Epoch 6: 100%|██████████| 157/157 [00:04<00:00, 32.90it/s, loss=1.17]


Epoch 6 completed. Total Loss: 0.0027


Epoch 7: 100%|██████████| 157/157 [00:04<00:00, 32.33it/s, loss=1.08]


Epoch 7 completed. Total Loss: 0.0025


Epoch 8: 100%|██████████| 157/157 [00:05<00:00, 28.39it/s, loss=1.04]


Epoch 8 completed. Total Loss: 0.0023


Epoch 9: 100%|██████████| 157/157 [00:04<00:00, 32.70it/s, loss=1]


Epoch 9 completed. Total Loss: 0.0021


Epoch 10: 100%|██████████| 157/157 [00:05<00:00, 29.07it/s, loss=0.848]

Epoch 10 completed. Total Loss: 0.0019



