In [None]:

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

torch.manual_seed(42)
np.random.seed(42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Loading & Preprocessing Ratings

In [None]:

ratings = pd.read_csv(
    "data/ratings.dat",
    sep="::",
    engine="python",
    names=["user", "item", "rating", "timestamp"]
)

ratings.drop("timestamp", axis=1, inplace=True)

user_ids = ratings["user"].unique()
item_ids = ratings["item"].unique()
user2idx = {u: i for i, u in enumerate(user_ids)}
item2idx = {i: j for j, i in enumerate(item_ids)}

ratings["user"] = ratings["user"].map(user2idx)
ratings["item"] = ratings["item"].map(item2idx)

n_users = len(user2idx)
n_items = len(item2idx)

### Train/Test Split

In [3]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(ratings, test_size=0.2, random_state=42)

### User–Item Matrix

In [None]:
# Initialize R with zeros
R = np.zeros((n_users, n_items), dtype=np.float32)

# Fill in with known ratings from train_df
for row in train_df.itertuples():
    R[row.user, row.item] = row.rating

print(f"Built R matrix of shape {R.shape}, "
      f"with {np.count_nonzero(R)} known entries.")

Built R matrix of shape (6040, 3706), with 800167 known entries.


### UserDataset and DataLoader

In [None]:
from torch.utils.data import Dataset, DataLoader
import torch

class UserDataset(Dataset):
    def __init__(self, R_matrix):
        # R_matrix: a NumPy array of shape (n_users, n_items)
        self.R = torch.from_numpy(R_matrix)  # convert to tensor

    def __len__(self):
        return self.R.size(0)  # number of users

    def __getitem__(self, idx):
        # returns the ratings vector for user idx
        return self.R[idx]

# Instantiating dataset and loader
batch_size = 64
user_dataset = UserDataset(R)
user_loader  = DataLoader(user_dataset, batch_size=batch_size, shuffle=True)

# Quick sanity check
for batch in user_loader:
    print(batch.shape)   # should print: (batch_size, n_items)
    break

torch.Size([64, 3706])


### AutoRec Model