In [1]:
# Install needed packages if not already installed
# !pip install implicit lightfm torch scikit-learn pandas

import pandas as pd
import numpy as np
from lightfm.datasets import fetch_movielens
from scipy.sparse import coo_matrix
from implicit.als import AlternatingLeastSquares
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

# ----------------------------
# 1. Load Dataset
# ----------------------------
data = fetch_movielens(min_rating=4.0)  # implicit version
interactions = data['train']

# Convert to COO matrix
coo = interactions.tocoo()
df = pd.DataFrame({'user': coo.row, 'item': coo.col, 'rating': coo.data})

user_ids = df['user'].unique()
item_ids = df['item'].unique()
num_users = len(user_ids)
num_items = len(item_ids)

# ----------------------------
# 2. Candidate Generation (Implicit ALS)
# ----------------------------
model = AlternatingLeastSquares(factors=64, regularization=0.1, iterations=15)
# implicit ALS requires (item-user) matrix
model.fit(interactions.T)

# For each user, generate top-N candidate items
def generate_candidates(model, user_id, N=100):
    ids, scores = model.recommend(user_id, interactions[user_id], N=N)
    return list(zip(ids, scores))

# Create a candidate set with labels
def generate_labeled_candidates(df, model, N=100):
    labeled = []
    for user_id in df['user'].unique():
        positives = set(df[df['user'] == user_id]['item'])
        candidates = generate_candidates(model, user_id, N=N)
        for item_id, score in candidates:
            label = int(item_id in positives)
            labeled.append((user_id, item_id, label))
    return pd.DataFrame(labeled, columns=['user', 'item', 'label'])

labeled_df = generate_labeled_candidates(df, model, N=100)

# ----------------------------
# 3. Neural Ranker
# ----------------------------

class RecommenderDataset(Dataset):
    def __init__(self, df, num_users, num_items):
        self.users = df['user'].values
        self.items = df['item'].values
        self.labels = df['label'].values.astype(np.float32)
        self.num_users = num_users
        self.num_items = num_items

    def __len__(self):
        return len(self.users)

    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.labels[idx]

class NeuralRanker(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=32):
        super().__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)
        self.fc = nn.Sequential(
            nn.Linear(2 * embedding_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, users, items):
        u = self.user_embedding(users)
        i = self.item_embedding(items)
        x = torch.cat([u, i], dim=1)
        return self.fc(x).squeeze()

# Split into train/test
train_df, test_df = train_test_split(labeled_df, test_size=0.2, random_state=42)

train_dataset = RecommenderDataset(train_df, num_users, num_items)
test_dataset = RecommenderDataset(test_df, num_users, num_items)

train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=512)

# Training loop
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_nn = NeuralRanker(num_users, num_items).to(device)
optimizer = optim.Adam(model_nn.parameters(), lr=0.001)
criterion = nn.BCELoss()

for epoch in range(5):
    model_nn.train()
    total_loss = 0
    for users, items, labels in train_loader:
        users = users.to(device)
        items = items.to(device)
        labels = labels.to(device)

        preds = model_nn(users, items)
        loss = criterion(preds, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")

# ----------------------------
# 4. Evaluation
# ----------------------------
model_nn.eval()
all_preds, all_labels = [], []
with torch.no_grad():
    for users, items, labels in test_loader:
        users = users.to(device)
        items = items.to(device)
        preds = model_nn(users, items).cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(labels.numpy())

from sklearn.metrics import roc_auc_score
print("Test AUC:", roc_auc_score(all_labels, all_preds))




HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




TypeError: 'coo_matrix' object is not subscriptable