### Section 1: Install and Import Dependencies

In [42]:
!pip install torch torchvision torchaudio
!pip install torch-geometric==2.3.0
!pip install pandas numpy scikit-learn
!pip install torch-scatter -f https://data.pyg.org/whl/torch-$(python -c "import torch; print(torch.__version__)").html





[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: C:\Users\renee\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: C:\Users\renee\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: C:\Users\renee\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip
ERROR: Could not open requirements file: [Errno 2] No such file or directory: 'import torch; print(torch.__version__)).html'

[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: C:\Users\renee\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


### Section 2: Imports

In [43]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.utils import degree
from torch_geometric.nn import MessagePassing
from torch_scatter import scatter_softmax
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import random
import os


### Section 3: Setting Device and Reproducibility

In [44]:
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# For reproducibility
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if device.type == 'cuda':
    torch.cuda.manual_seed_all(42)

Using device: cpu


### Section 4: Load and Preprocess the MovieLens Dataset

In [45]:
# Ensure the dataset is in the working directory: 'ml-latest-small/ratings.csv' and 'movies.csv'
ratings = pd.read_csv('ml-latest-small/ratings.csv')
movies = pd.read_csv('ml-latest-small/movies.csv')

# Filter out any rows with missing userIds (shouldn't happen, but just in case)
ratings = ratings[ratings['userId'].notna()]

# Map user and movie IDs to consecutive integers
user_id_mapping = {id: idx for idx, id in enumerate(ratings['userId'].unique())}
item_id_mapping = {id: idx for idx, id in enumerate(ratings['movieId'].unique())}

ratings['userId'] = ratings['userId'].map(user_id_mapping)
ratings['movieId'] = ratings['movieId'].map(item_id_mapping)

num_users = ratings['userId'].nunique()
num_items = ratings['movieId'].nunique()
num_nodes = num_users + num_items

print("Number of users:", num_users)
print("Number of items:", num_items)
print("Number of total nodes:", num_nodes)
print("Number of interactions:", len(ratings))

Number of users: 610
Number of items: 9724
Number of total nodes: 10334
Number of interactions: 100836


### Section 5: Create Graph Data

In [46]:
# Create edge index for the entire dataset
# Users are [0, num_users-1], Items are [num_users, num_users+num_items-1]
user_nodes = ratings['userId'].to_numpy()
item_nodes = ratings['movieId'].to_numpy() + num_users

edge_index = np.vstack((user_nodes, item_nodes))
edge_index = torch.tensor(edge_index, dtype=torch.long)

# Edge attributes are the ratings
edge_attr = torch.tensor(ratings['rating'].to_numpy(), dtype=torch.float32)

# Feature matrix: we can start with a simple identity or zero embeddings, as LightGCN learns embeddings directly
# We'll rely solely on the learned embeddings from the model
data = Data(edge_index=edge_index, num_nodes=num_nodes)

# Move to device
data = data.to(device)
edge_attr = edge_attr.to(device)

### Section 6: Train/Test Split

In [47]:
train_data, test_data = train_test_split(ratings, test_size=0.2, random_state=42)

train_user = torch.tensor(train_data['userId'].values, dtype=torch.long, device=device)
train_item = torch.tensor(train_data['movieId'].values + num_users, dtype=torch.long, device=device)
train_rating = torch.tensor(train_data['rating'].values, dtype=torch.float32, device=device)

test_user = torch.tensor(test_data['userId'].values, dtype=torch.long, device=device)
test_item = torch.tensor(test_data['movieId'].values + num_users, dtype=torch.long, device=device)
test_rating = torch.tensor(test_data['rating'].values, dtype=torch.float32, device=device)

# For Recall@K calculation, we will need the test edges separately
test_edge_index = torch.stack([test_user, test_item], dim=0)

### Section 7: Utility Functions

In [48]:
def rmse(true_ratings, pred_ratings):
    return np.sqrt(mean_squared_error(true_ratings, pred_ratings))

def recall_at_k(model, k=10):
    """
    Compute Recall@K on the test set:
    - We consider all items and see if the items the user actually interacted with (in test set)
      appear in the top K recommendations for that user.
    """
    model.eval()
    with torch.no_grad():
        # Get embeddings
        embeddings = model.get_embedding(data.edge_index)
        user_emb = embeddings[:num_users]
        item_emb = embeddings[num_users:num_users+num_items]

        # Compute scores [num_users x num_items]
        scores = user_emb @ item_emb.T

        # Get top-k items for each user
        _, top_k_items = torch.topk(scores, k, dim=1)

        # Convert test set into a dict: user -> set of test items
        test_user_items = {}
        for u, i, r in zip(test_data['userId'], test_data['movieId'], test_data['rating']):
            # Only consider items where user interacted positively (rating > 0)
            # In MovieLens all ratings > 0 by definition, but we keep the check for generality
            if u not in test_user_items:
                test_user_items[u] = set()
            test_user_items[u].add(i)

        recalls = []
        for u in range(num_users):
            if u in test_user_items and len(test_user_items[u]) > 0:
                recommended = set((top_k_items[u].cpu().numpy()))
                relevant = test_user_items[u]
                hit_count = len(recommended & relevant)
                recall_u = hit_count / len(relevant)
                recalls.append(recall_u)
            else:
                # If a user has no test items, skip them or consider recall as 0
                # Usually, we consider only users with test interactions
                pass

        if len(recalls) == 0:
            return 0.0
        return float(np.mean(recalls))

### Section 8: LightGCN Model (Baseline)

In [49]:
class LightGCNConv(MessagePassing):
    def __init__(self):
        super().__init__(aggr='add')

    def forward(self, x, edge_index):
        row, col = edge_index
        deg = degree(col, x.size(0), dtype=x.dtype)
        deg_inv_sqrt = (deg + 1e-7).pow(-0.5) 
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]
        return self.propagate(edge_index, x=x, norm=norm)

    def message(self, x_j, norm):
        return norm.view(-1, 1) * x_j

class LightGCN(nn.Module):
    def __init__(self, num_nodes, embedding_dim=64, num_layers=3):
        super().__init__()
        self.num_layers = num_layers
        self.embedding = nn.Embedding(num_nodes, embedding_dim)
        nn.init.xavier_uniform_(self.embedding.weight)

        self.convs = nn.ModuleList([LightGCNConv() for _ in range(num_layers)])

    def forward(self, edge_index):
        x = self.embedding.weight
        all_embeddings = [x]
        for conv in self.convs:
            x = conv(x, edge_index)
            all_embeddings.append(x)
        # Mean of all layer embeddings
        x = torch.mean(torch.stack(all_embeddings, dim=0), dim=0)
        return x

    def get_embedding(self, edge_index):
        return self.forward(edge_index)

### Section 9: LightGCN with Attention

In [50]:
class LightGCNConvWithAttention(MessagePassing):
    def __init__(self, in_channels):
        super().__init__(aggr='add')
        self.att = nn.Parameter(torch.Tensor(1, in_channels * 2))
        nn.init.xavier_uniform_(self.att)

    def forward(self, x, edge_index):
        return self.propagate(edge_index, x=x)

    def message(self, x_i, x_j):
        x_cat = torch.cat([x_i, x_j], dim=-1)
        alpha = F.leaky_relu((x_cat * self.att).sum(dim=-1))
        alpha = F.softmax(alpha, dim=0)
        return alpha.unsqueeze(-1) * x_j


class LightGCNWithAttention(nn.Module):
    def __init__(self, num_nodes, embedding_dim=256, num_layers=4, dropout=0.2):
        super().__init__()
        self.num_layers = num_layers
        self.embedding = nn.Embedding(num_nodes, embedding_dim)
        self.layer_norm = nn.LayerNorm(embedding_dim)
        self.dropout = nn.Dropout(dropout)
        nn.init.xavier_uniform_(self.embedding.weight)

        self.convs = nn.ModuleList([
            LightGCNConvWithAttention(embedding_dim) for _ in range(num_layers)
        ])

    def forward(self, edge_index):
        x = self.embedding.weight
        all_embeddings = [x]

        for conv in self.convs:
            x = conv(x, edge_index)
            x = self.layer_norm(x)
            x = self.dropout(x)
            all_embeddings.append(x)

        return torch.stack(all_embeddings, dim=0).sum(dim=0)

    def get_embedding(self, edge_index):
        embeddings = self.forward(edge_index)
        return embeddings

### Section 10: Training and Evaluation Functions

In [None]:
def bpr_loss(user_emb, pos_item_emb, neg_item_emb):
    pos_scores = (user_emb * pos_item_emb).sum(dim=1)
    neg_scores = (user_emb * neg_item_emb).sum(dim=1)
    loss = -torch.log(torch.sigmoid(pos_scores - neg_scores)).mean()
    return loss

def hybrid_loss(user_emb, pos_item_emb, neg_item_emb, pred_ratings, true_ratings, alpha=0.3):
    # RMSE Loss
    rmse_loss = torch.sqrt(F.mse_loss(pred_ratings, true_ratings))
    
    # BPR Loss
    pos_scores = (user_emb * pos_item_emb).sum(dim=1)
    neg_scores = (user_emb * neg_item_emb).sum(dim=1)
    bpr_loss = -torch.log(torch.sigmoid(pos_scores - neg_scores)).mean()
    
    # Weighted combination
    return alpha * rmse_loss + (1 - alpha) * bpr_loss



def train_one_epoch(model, optimizer, edge_index, user, item, rating, num_items, alpha=0.3):
    model.train()
    optimizer.zero_grad()

    # Get embeddings
    embeddings = model.get_embedding(edge_index)
    user_emb = embeddings[user]
    pos_item_emb = embeddings[item]

    # Negative sampling: randomly select negative items
    neg_items = torch.randint(0, num_items, (len(user),), device=device)
    neg_item_emb = embeddings[neg_items + num_users]

    # Predict ratings for positive samples
    pred_ratings = (user_emb * pos_item_emb).sum(dim=1)

    # Calculate hybrid loss
    loss = hybrid_loss(user_emb, pos_item_emb, neg_item_emb, pred_ratings, rating, alpha)

    # Add L2 regularization (on embeddings)
    l2_reg = 1e-4 * torch.norm(model.embedding.weight)
    loss += l2_reg

    # Add L2 regularization for attention (only for LightGCNWithAttention)
    if hasattr(model, 'convs') and isinstance(model.convs[0], LightGCNConvWithAttention):
        for conv in model.convs:
            if hasattr(conv, 'att'):  # Check if 'att' exists
                loss += 1e-5 * torch.norm(conv.att)

    # Backpropagation and optimizer step
    loss.backward()
    optimizer.step()
    return loss.item()



def evaluate_rmse(model, edge_index, user, item, rating):
    model.eval()
    with torch.no_grad():
        embeddings = model.get_embedding(edge_index)
        user_emb = embeddings[user]
        item_emb = embeddings[item]
        pred = (user_emb * item_emb).sum(dim=1)
        pred = torch.clamp(pred, min=0.0, max=5.0)
        true = rating.cpu().numpy()
        return rmse(true, pred.cpu().numpy())

### Section 11: Training Loops and Comparison

In [82]:
import copy
def train_model(model, edge_index, train_user, train_item, train_rating, 
                test_user, test_item, test_rating, 
                num_items, alpha=0.2, epochs=50, lr=0.001, weight_decay=1e-5):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    best_rmse = float('inf')
    best_state = None
    patience = 5
    patience_counter = 0

    for epoch in range(1, epochs + 1):
        loss = train_one_epoch(model, optimizer, edge_index, train_user, train_item, train_rating, num_items, alpha)
        tr_rmse = evaluate_rmse(model, edge_index, train_user, train_item, train_rating)
        val_rmse = evaluate_rmse(model, edge_index, test_user, test_item, test_rating)
        
        print(f"Epoch {epoch:03d}: Loss={loss:.4f}, Train_RMSE={tr_rmse:.4f}, Test_RMSE={val_rmse:.4f}")

        # Early stopping on validation RMSE
        if tr_rmse < best_rmse:
            best_rmse = tr_rmse
            best_state = copy.deepcopy(model.state_dict())
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter > patience:
                print("Early stopping triggered.")
                break

    model.load_state_dict(best_state)
    return model


# Train the baseline model
baseline_model = LightGCN(num_nodes=num_nodes, embedding_dim=64, num_layers=3).to(device)
print("Training Baseline LightGCN...")
baseline_model = train_model(baseline_model, data.edge_index, train_user, train_item, train_rating, 
                             test_user, test_item, test_rating, num_items, alpha=0.3)

baseline_rmse = evaluate_rmse(baseline_model, data.edge_index, test_user, test_item, test_rating)
baseline_recall = recall_at_k(baseline_model, k=10)
print(f"Baseline LightGCN - Test RMSE: {baseline_rmse:.4f}, Recall@10: {baseline_recall:.4f}")

# Train the LightGCN with Attention model
att_model = LightGCNWithAttention(num_nodes=num_nodes, embedding_dim=64, num_layers=3, dropout=0.4).to(device)
print("\nTraining LightGCN with Attention...")
att_model = train_model(att_model, data.edge_index, train_user, train_item, train_rating, 
                        test_user, test_item, test_rating, num_items, alpha=0.2, 
                        epochs=400, lr=0.0005, weight_decay=1e-4)

att_rmse = evaluate_rmse(att_model, data.edge_index, test_user, test_item, test_rating)
att_recall = recall_at_k(att_model, k=10)
print(f"LightGCN+Attention - Test RMSE: {att_rmse:.4f}, Recall@10: {att_recall:.4f}")

Training Baseline LightGCN...
Epoch 001: Loss=1.3215, Train_RMSE=3.0027, Test_RMSE=3.0015
Epoch 002: Loss=1.2366, Train_RMSE=2.7394, Test_RMSE=2.7404
Epoch 003: Loss=1.1133, Train_RMSE=2.3881, Test_RMSE=2.3909
Epoch 004: Loss=0.9597, Train_RMSE=1.9871, Test_RMSE=1.9905
Epoch 005: Loss=0.7950, Train_RMSE=1.6344, Test_RMSE=1.6367
Epoch 006: Loss=0.6682, Train_RMSE=1.4315, Test_RMSE=1.4327
Epoch 007: Loss=0.6464, Train_RMSE=1.3615, Test_RMSE=1.3626
Epoch 008: Loss=0.6841, Train_RMSE=1.3326, Test_RMSE=1.3333
Epoch 009: Loss=0.6914, Train_RMSE=1.2955, Test_RMSE=1.2970
Epoch 010: Loss=0.6531, Train_RMSE=1.2398, Test_RMSE=1.2438
Epoch 011: Loss=0.5840, Train_RMSE=1.1801, Test_RMSE=1.1883
Epoch 012: Loss=0.5219, Train_RMSE=1.1577, Test_RMSE=1.1716
Epoch 013: Loss=0.4891, Train_RMSE=1.1981, Test_RMSE=1.2164
Epoch 014: Loss=0.4959, Train_RMSE=1.2665, Test_RMSE=1.2874
Epoch 015: Loss=0.5182, Train_RMSE=1.3165, Test_RMSE=1.3391
Epoch 016: Loss=0.5354, Train_RMSE=1.3291, Test_RMSE=1.3527
Epoch 017:

### Section 12: Results Comparison

In [83]:
print("===== Final Comparison =====")
print(f"Baseline LightGCN    : RMSE = {baseline_rmse:.4f}, Recall@10 = {baseline_recall:.4f}")
print(f"LightGCN + Attention : RMSE = {att_rmse:.4f}, Recall@10 = {att_recall:.4f}")

===== Final Comparison =====
Baseline LightGCN    : RMSE = 1.1716, Recall@10 = 0.0800
LightGCN + Attention : RMSE = 1.1186, Recall@10 = 0.0531
