In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader

# Create a dummy user_ratings.csv file for demonstration
data = {'user_id': [1, 1, 2, 2, 3, 3],
        'item_id': [101, 102, 101, 103, 102, 103],
        'rating': [4, 5, 3, 4, 5, 4]}
dummy_df = pd.DataFrame(data)
dummy_df.to_csv('user_ratings.csv', index=False)

print("Dummy 'user_ratings.csv' created.")

# Load dataset (user-item interactions like user rating for movies)
df = pd.read_csv('user_ratings.csv')

# Convert user_id and item_id into categorical indicies
df['user_id'] = df['user_id'].astype('category').cat.codes
df['item_id'] = df['item_id'].astype('category').cat.codes

# Create training and validation datasets
train_size = int(0.8 * len(df))
train_data = df[:train_size]
val_data = df[train_size:]

# Custom Dataset Class
class UserItemRatingDataset(Dataset):
    def __init__(self, df):
        self.users = df['user_id'].values
        self.items = df['item_id'].values
        self.ratings = df['rating'].values

    def __len__(self):
        return len(self.ratings)

    def __getitem__(self, idx):
        # Cast user and item to long
        return torch.tensor(self.users[idx], dtype=torch.long), torch.tensor(self.items[idx], dtype=torch.long), torch.tensor(self.ratings[idx], dtype=torch.float)

# Prepare DataLoaders
train_dataset = UserItemRatingDataset(train_data)
val_dataset = UserItemRatingDataset(val_data)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64)

# Build the Recommendation Model Using Matrix Factorization
class MatrixFactorization(nn.Module):
    def __init__(self, num_users, num_items, embedding_size=50):
        super(MatrixFactorization, self).__init__()

        # Embedding Layers for users and items
        self.user_embedding = nn.Embedding(num_users, embedding_size)
        self.item_embedding = nn.Embedding(num_items, embedding_size)

        # Linear layer to predict the interaction score
        self.fc = nn.Linear(embedding_size, 1)

    def forward(self, user, item):
        # Extract embeddings
        user_embedded = self.user_embedding(user)
        item_embedded = self.item_embedding(item)

        # Element-wise multiply user and item enbeddings (interaction features)
        interaction = user_embedded * item_embedded

        # Sum the product and predict the rating
        interaction = torch.sum(interaction, dim=1)

        return interaction

# Define model, loss, and optimizer
num_users = df['user_id'].nunique()
num_items = df['item_id'].nunique()

model = MatrixFactorization(num_users, num_items, embedding_size=50)
optimizer = optim.Adam(model.parameters(), lr=0.01) # Changed optim.adam to optim.Adam
criterion = nn.MSELoss()

# Move model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for user, item, rating in train_loader:
        user = user.to(device)
        item = item.to(device)
        rating = rating.float().to(device)

        optimizer.zero_grad()

        # Forward pass
        predictions = model(user, item)
        loss = criterion(predictions, rating)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss / len(train_loader):.4f}')

# Validation (evaluate the model on the validation set)
model.eval()
with torch.no_grad():
    val_loss = 0
    for user, item, rating in val_loader:
        user = user.to(device)
        item = item.to(device)
        rating = rating.float().to(device)

        predictions = model(user, item)
        loss = criterion(predictions, rating)

        val_loss += loss.item()

    print(f'Validation Loss: {val_loss / len(val_loader):.4f}')

# Example: Recommend items for a specific user (user_id=0)
def recommend_items(user_id, model, num_recommendations=5):
    user_id_tensor = torch.tensor([user_id], dtype=torch.long).to(device) # Fixed user_id tensor creation

    # Get predictions for all items for this user
    item_ids = torch.arange(0, num_items, dtype=torch.long).to(device) # Added dtype

    model.eval()
    with torch.no_grad():
        # Repeat the user_id tensor to match the number of items
        user_ids_repeated = user_id_tensor.repeat(num_items)
        predictions = model(user_ids_repeated, item_ids).squeeze() # Use repeated user_ids

    # Sort items by predicted rating
    top_items = torch.argsort(predictions, descending=True)[:num_recommendations]

    return top_items.cpu().numpy()

# Call the recommend_items function
recommended_items = recommend_items(user_id=0, model=model, num_recommendations=5)
print(f'Recommended items for user 0: {recommended_items}')

# Save the model
torch.save(model.state_dict(), 'recommnedation_model.pth')

# Load the model for inference
loaded_model = MatrixFactorization(num_users, num_items) # Changed variable name to avoid conflict
loaded_model.load_state_dict(torch.load('recommnedation_model.pth'))
loaded_model.eval()

Dummy 'user_ratings.csv' created.
Epoch 1/10, Loss: 30.2993
Epoch 2/10, Loss: 24.8893
Epoch 3/10, Loss: 20.1634
Epoch 4/10, Loss: 16.0938
Epoch 5/10, Loss: 12.6193
Epoch 6/10, Loss: 9.6694
Epoch 7/10, Loss: 7.1971
Epoch 8/10, Loss: 5.1714
Epoch 9/10, Loss: 3.5614
Epoch 10/10, Loss: 2.3302
Validation Loss: 7.1410
Recommended items for user 0: [1 2 0]


MatrixFactorization(
  (user_embedding): Embedding(3, 50)
  (item_embedding): Embedding(3, 50)
  (fc): Linear(in_features=50, out_features=1, bias=True)
)