In [2]:
pip install torch



In [5]:
import pandas as pd
import urllib.request
import os

# Download the MovieLens dataset
url = "https://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
dataset_path = "ml-latest-small.zip"

if not os.path.exists(dataset_path):
    urllib.request.urlretrieve(url, dataset_path)

import zipfile
with zipfile.ZipFile(dataset_path, 'r') as zip_ref:
    zip_ref.extractall("ml-latest-small")

# Load the ratings dataset using pandas
ratings = pd.read_csv("/content/ml-latest-small/ml-latest-small/ratings.csv")
ratings = ratings[['userId', 'movieId', 'rating']]
ratings.head()


Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [6]:
import torch
import numpy as np
from sklearn.model_selection import train_test_split

# Create a user-item interaction matrix
user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)
user_item_matrix = torch.tensor(user_item_matrix.values, dtype=torch.float32)

# Split the dataset into train and test
train_matrix, test_matrix = train_test_split(user_item_matrix.numpy(), test_size=0.2)
train_matrix = torch.tensor(train_matrix, dtype=torch.float32)
test_matrix = torch.tensor(test_matrix, dtype=torch.float32)

# Moving data to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_matrix = train_matrix.to(device)
test_matrix = test_matrix.to(device)

In [8]:
from torch.utils.data import DataLoader, Dataset

# Custom dataset class for user-item interactions
class UserItemDataset(Dataset):
    def __init__(self, matrix):
        self.user_item_pairs = torch.nonzero(matrix, as_tuple=False)  # Only non-zero entries
        self.ratings = matrix[self.user_item_pairs[:, 0], self.user_item_pairs[:, 1]]

    def __len__(self):
        return len(self.user_item_pairs)

    def __getitem__(self, idx):
        user = self.user_item_pairs[idx, 0]
        item = self.user_item_pairs[idx, 1]
        rating = self.ratings[idx]
        return user, item, rating

# Create the dataset
train_dataset = UserItemDataset(train_matrix)

# Create a DataLoader for batch processing
batch_size = 1024
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [9]:
class MatrixFactorization(torch.nn.Module):
    def __init__(self, num_users, num_items, num_factors, dropout_rate=0.2):
        super(MatrixFactorization, self).__init__()
        self.user_factors = torch.nn.Embedding(num_users, num_factors)
        self.item_factors = torch.nn.Embedding(num_items, num_factors)
        self.dropout = torch.nn.Dropout(dropout_rate)

        # Initialize embeddings with small random values
        torch.nn.init.normal_(self.user_factors.weight, std=0.01)
        torch.nn.init.normal_(self.item_factors.weight, std=0.01)

    def forward(self, user, item):
        user_embedding = self.dropout(self.user_factors(user))
        item_embedding = self.dropout(self.item_factors(item))
        return (user_embedding * item_embedding).sum(1)

# Initialize the model
num_factors = 20  # Number of latent factors
num_users, num_items = user_item_matrix.shape
model = MatrixFactorization(num_users, num_items, num_factors).to(device)

In [10]:
# Optimizer and loss function
optimizer = torch.optim.AdamW(model.parameters(), lr=0.01, weight_decay=1e-5)
loss_function = torch.nn.MSELoss()

# Training loop using batch processing
num_epochs = 50

for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    # Iterate over mini-batches from the DataLoader
    for batch in train_loader:
        users, items, ratings = batch
        users = users.to(device)
        items = items.to(device)
        ratings = ratings.to(device)

        # Forward pass
        optimizer.zero_grad()
        predictions = model(users, items)

        # Compute the loss
        loss = loss_function(predictions, ratings)

        # Backpropagation and optimization
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss/len(train_loader)}")

Epoch 1/50, Loss: 9.694267686591092
Epoch 2/50, Loss: 2.0870184840926207
Epoch 3/50, Loss: 1.3884353163730667
Epoch 4/50, Loss: 1.2109439057039928
Epoch 5/50, Loss: 1.1500054885105915
Epoch 6/50, Loss: 1.1157046542110214
Epoch 7/50, Loss: 1.0963098794580943
Epoch 8/50, Loss: 1.0876021485730827
Epoch 9/50, Loss: 1.0787888363183262
Epoch 10/50, Loss: 1.072368762579309
Epoch 11/50, Loss: 1.0641206043312348
Epoch 12/50, Loss: 1.0591677061046463
Epoch 13/50, Loss: 1.050395710640643
Epoch 14/50, Loss: 1.0443474427763237
Epoch 15/50, Loss: 1.0350892378623227
Epoch 16/50, Loss: 1.0292102256453182
Epoch 17/50, Loss: 1.0238022193851242
Epoch 18/50, Loss: 1.0226145142532257
Epoch 19/50, Loss: 1.0248547892972648
Epoch 20/50, Loss: 1.0158042957983822
Epoch 21/50, Loss: 1.0170058098184056
Epoch 22/50, Loss: 1.0087695387472588
Epoch 23/50, Loss: 1.0068387417908173
Epoch 24/50, Loss: 1.0046893465949829
Epoch 25/50, Loss: 1.0131825797529106
Epoch 26/50, Loss: 1.0003098041178233
Epoch 27/50, Loss: 0.999

In [15]:
# Get top N recommendations for a specific user
def get_top_n_recommendations(user_id, top_n=5):
    model.eval()
    with torch.no_grad():
        user_tensor = torch.tensor([user_id], device=device)
        scores = []
        for item_id in range(num_items):
            item_tensor = torch.tensor([item_id], device=device)
            prediction = model(user_tensor, item_tensor)
            scores.append((item_id, prediction.item()))

    # Sort by predicted score
    scores.sort(key=lambda x: x[1], reverse=True)
    return [item for item, score in scores[:top_n]]

# Example: Get top 7 recommendations for user 3
user_id = 3
recommendations = get_top_n_recommendations(user_id, top_n=7)
print(f"Top recommendations for user {user_id}: {recommendations}")

Top recommendations for user 3: [9241, 6711, 4583, 6034, 7638, 8573, 7764]
