In [54]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

from sklearn.metrics import accuracy_score

In [55]:
df = pd.read_csv('ml-100k/u.data', sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'])
df = df.drop(columns=['timestamp'])

df['rating'] = df['rating'] - 1

num_users = df['user_id'].nunique()
num_items = df['item_id'].nunique()

In [56]:
class MLDataset(Dataset):
    def __init__(self, df):
        self.users = torch.tensor(df['user_id'].values, dtype = torch.long)
        self.ratings = torch.tensor(df['rating'].values, dtype = torch.long)
        self.items = torch.tensor(df['item_id'].values, dtype = torch.long)
    def __len__(self):
        return len(self.ratings)
    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.ratings[idx]
train_dataset = MLDataset(df)
train_loader = DataLoader(train_dataset, batch_size = 64, shuffle = True)

In [57]:
class MLDataNN(nn.Module):
    def __init__(self):
        super(MLDataNN, self).__init__()
        self.user_embedding = nn.Embedding(num_users + 1, 50)
        self.item_embedding = nn.Embedding(num_items + 1, 50)
        self.fc_layers = nn.Sequential(
            nn.Linear(100, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 5)
        )
    def forward(self, user, item):
        user_embed = self.user_embedding(user)
        item_embed = self.item_embedding(item)
        x = torch.cat([user_embed, item_embed], dim = 1)
        x = self.fc_layers(x)
        return x
model = MLDataNN()

In [58]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)

In [59]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

MLDataNN(
  (user_embedding): Embedding(944, 50)
  (item_embedding): Embedding(1683, 50)
  (fc_layers): Sequential(
    (0): Linear(in_features=100, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=32, bias=True)
    (5): ReLU()
    (6): Linear(in_features=32, out_features=16, bias=True)
    (7): ReLU()
    (8): Linear(in_features=16, out_features=5, bias=True)
  )
)

In [60]:
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    for user, item, rating in train_loader:
        user, item, rating = user.to(device), item.to(device), rating.to(device)
        optimizer.zero_grad()
        output = model(user, item)
        loss = criterion(output, rating)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader):.4f}")

Epoch 1/10, Loss: 1.3843
Epoch 2/10, Loss: 1.2780
Epoch 3/10, Loss: 1.2387
Epoch 4/10, Loss: 1.2130
Epoch 5/10, Loss: 1.1939
Epoch 6/10, Loss: 1.1752
Epoch 7/10, Loss: 1.1557
Epoch 8/10, Loss: 1.1370
Epoch 9/10, Loss: 1.1186
Epoch 10/10, Loss: 1.0972


In [61]:
def evaluate_model(model, loader):
    model.eval()
    predictions, actuals = [], []

    with torch.no_grad():
        for user, item, rating in loader:
            user, item, rating = user.to(device), item.to(device), rating.to(device)
            output = model(user, item)
            pred = output.argmax(1).cpu().numpy()

            predictions.extend(pred)
            actuals.extend(rating.cpu().numpy())
    acc = accuracy_score(actuals, predictions)
    print(f"Test Accuracy: {acc:.4f}")
evaluate_model(model, train_loader)

Test Accuracy: 0.5465
