In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

In [None]:
df = pd.read_csv("data/ratings.dat", sep="::", engine='python', names=["user", "item", "rating", "timestamp"])
df.head()

Unnamed: 0,user,item,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


 Clean & Encode the Data

In [4]:
# Drop timestamp
df.drop("timestamp", axis=1, inplace=True)

# Map users and items to new indices
user_ids = df["user"].unique()
item_ids = df["item"].unique()

user2idx = {user: idx for idx, user in enumerate(user_ids)}
item2idx = {item: idx for idx, item in enumerate(item_ids)}

df["user"] = df["user"].map(user2idx)
df["item"] = df["item"].map(item2idx)

n_users = len(user2idx)
n_items = len(item2idx)

df.head()

Unnamed: 0,user,item,rating
0,0,0,5
1,0,1,3
2,0,2,3
3,0,3,4
4,0,4,5


PyTorch Dataset Class

In [5]:
class RatingsDataset(torch.utils.data.Dataset):
    def __init__(self, df):
        self.users = df["user"].values
        self.items = df["item"].values
        self.ratings = df["rating"].values.astype(np.float32)

    def __len__(self):
        return len(self.ratings)

    def __getitem__(self, idx):
        return (
            torch.tensor(self.users[idx]),
            torch.tensor(self.items[idx]),
            torch.tensor(self.ratings[idx])
        )

Train-Test Split + DataLoaders

In [6]:
# Split the dataset
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Create datasets
train_dataset = RatingsDataset(train_df)
test_dataset = RatingsDataset(test_df)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=256)

Define the MLP Model

In [None]:
class MLPRecommender(nn.Module):
    def __init__(self, n_users, n_items, embedding_dim=64):
        super(MLPRecommender, self).__init__()
        self.user_embedding = nn.Embedding(n_users, embedding_dim)
        self.item_embedding = nn.Embedding(n_items, embedding_dim)

        self.layers = nn.Sequential(
            nn.Linear(embedding_dim * 2, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)  # Predicts a rating
        )

    def forward(self, user, item):
        user_emb = self.user_embedding(user)
        item_emb = self.item_embedding(item)
        x = torch.cat([user_emb, item_emb], dim=1)
        return self.layers(x).squeeze()