In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split

#########################################
# Dataset loader: ML-100K integration
#########################################
def load_implicit_data():
    # Load dataset
    df = pd.read_csv('ml-100k/u.data', sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'])
    df = df.drop(columns=['timestamp'])
    
    # Adjust indices to start at 0
    df['user_id'] -= 1
    df['item_id'] -= 1
    
    # Split into training and testing sets.
    train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
    
    # Build a dictionary of user interactions.
    def build_dict(df):
        data = {}
        for row in df.itertuples():
            data.setdefault(row.user_id, []).append(row.item_id)
        return data

    train_data = build_dict(train_df)
    test_data = build_dict(test_df)
    
    # Calculate number of users and items from the full dataset.
    n_users = int(df['user_id'].max() + 1)
    n_items = int(df['item_id'].max() + 1)
    
    return train_data, test_data, n_users, n_items

#########################################
# Sampling Triplets
#########################################
def sample_triplets(pos_data_train, n_users, n_items, num_samples=128):
    """
    Uniformly samples triplets (user, positive item, negative item) from training data.
    """
    user_ids = []
    pos_item_ids = []
    neg_item_ids = []
    for _ in range(num_samples):
        user = np.random.randint(0, n_users)
        pos_items = pos_data_train.get(user, [])
        if not pos_items:
            continue
        pos_item = np.random.choice(pos_items)
        # Sample a negative item not in the user's positive set.
        neg_item = np.random.randint(0, n_items)
        while neg_item in pos_data_train.get(user, []):
            neg_item = np.random.randint(0, n_items)
        user_ids.append(user)
        pos_item_ids.append(pos_item)
        neg_item_ids.append(neg_item)
    return (torch.LongTensor(user_ids),
            torch.LongTensor(pos_item_ids),
            torch.LongTensor(neg_item_ids))

#########################################
# Loss Function
#########################################
class MarginLoss(nn.Module):
    def __init__(self, margin):
        super().__init__()
        self.margin = margin

    def forward(self, pos_score, neg_score):
        # We want pos_score to be higher than neg_score by at least margin.
        loss = F.relu(neg_score - pos_score + self.margin)
        return loss.mean()

#########################################
# 1. Multi-Layer Perceptron (MLP)
#########################################
class MLP(nn.Module):
    def __init__(self, input_dim, n_hidden=1, hidden_size=64, dropout=0., l2_reg=None):
        """
        Constructs an MLP.
        - input_dim: Dimension of the input (e.g., user_dim + item_dim).
        - n_hidden: Number of hidden layers.
        - hidden_size: Hidden layer size.
        - dropout: Dropout rate.
        - l2_reg: (Not used here but can be passed to an optimizer for weight decay.)
        """
        super().__init__()
        layers = []
        # Initial dropout layer
        layers.append(nn.Dropout(dropout))
        # Build hidden layers with ReLU activation.
        for _ in range(n_hidden):
            layers.append(nn.Linear(input_dim, hidden_size))
            layers.append(nn.ReLU())
            input_dim = hidden_size
        # Final layer outputs 1 score with no activation.
        layers.append(nn.Linear(input_dim, 1))
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        # The output is squeezed so that it returns a scalar per sample.
        return self.network(x).squeeze(-1)

#########################################
# 2. DeepTripletModel
#########################################
class DeepTripletModel(nn.Module):
    def __init__(self, n_users, n_items, user_dim=32, item_dim=64, margin=1.,
                 n_hidden=1, hidden_size=64, dropout=0, l2_reg=None):
        super().__init__()
        # Embedding layers for users and items.
        self.user_layer = nn.Embedding(n_users, user_dim)
        self.item_layer = nn.Embedding(n_items, item_dim)
        # The MLP processes the concatenated user-item features.
        self.mlp = MLP(user_dim + item_dim, n_hidden, hidden_size, dropout, l2_reg)
        self.margin_loss = MarginLoss(margin)
        self.margin = margin

    def forward(self, user_ids, pos_item_ids, neg_item_ids):
        # Lookup embeddings.
        user_embed = self.user_layer(user_ids)
        pos_embed = self.item_layer(pos_item_ids)
        neg_embed = self.item_layer(neg_item_ids)
        # Concatenate user and positive item embeddings.
        pos_input = torch.cat([user_embed, pos_embed], dim=1)
        # Concatenate user and negative item embeddings.
        neg_input = torch.cat([user_embed, neg_embed], dim=1)
        # Compute matching scores.
        pos_score = self.mlp(pos_input)
        neg_score = self.mlp(neg_input)
        # Compute the margin (triplet) loss.
        loss = self.margin_loss(pos_score, neg_score)
        return loss

#########################################
# DeepMatchModel for Evaluation
#########################################
class DeepMatchModel(nn.Module):
    def __init__(self, user_layer, item_layer, mlp):
        super().__init__()
        # Uses the same learned user and item embedding layers and the MLP.
        self.user_layer = user_layer
        self.item_layer = item_layer
        self.mlp = mlp

    def forward(self, user_ids, item_ids):
        # Compute user-item matching scores for evaluation.
        user_embed = self.user_layer(user_ids)
        item_embed = self.item_layer(item_ids)
        input_vec = torch.cat([user_embed, item_embed], dim=1)
        return self.mlp(input_vec)

#########################################
# 3. Evaluation Function: Average ROC AUC
#########################################
def average_roc_auc(deep_match_model, pos_data_test, n_users, n_items):
    """
    Computes the average ROC AUC across users.
    For each user, scores for all items are generated and compared to the test set of positive items.
    """
    all_auc = []
    deep_match_model.eval()
    with torch.no_grad():
        for user in range(n_users):
            y_true = []
            y_scores = []
            for item in range(n_items):
                score = deep_match_model(torch.LongTensor([user]), torch.LongTensor([item])).item()
                y_scores.append(score)
                # The label is 1 if the item is in the user's test positives.
                label = 1 if item in pos_data_test.get(user, []) else 0
                y_true.append(label)
            # Skip users who have only one type of label.
            if sum(y_true) == 0 or sum(y_true) == len(y_true):
                continue
            auc = roc_auc_score(y_true, y_scores)
            all_auc.append(auc)
    return np.mean(all_auc) if all_auc else 0.0

#########################################
# 4. Training Loop for Deep Recommender System
#########################################
def train_deep_recsys():
    # Load the ML-100K data.
    pos_data_train, pos_data_test, n_users, n_items = load_implicit_data()
    
    hyper_parameters = {
        'user_dim': 32,
        'item_dim': 64,
        'n_hidden': 2,        # Increase the number of hidden layers
        'hidden_size': 128,   # Increase the size of the hidden layer
        'dropout': 0.1,
        'l2_reg': 0.001,      # Reduce L2 regularization
        'margin': 1.0         # Increase margin
    }
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Initialize the triplet-based model.
    deep_triplet_model = DeepTripletModel(n_users, n_items, **hyper_parameters).to(device)
    
    # Create the DeepMatchModel for efficient evaluation.
    deep_match_model = DeepMatchModel(deep_triplet_model.user_layer,
                                      deep_triplet_model.item_layer,
                                      deep_triplet_model.mlp).to(device)
    
    optimizer = torch.optim.Adam(deep_triplet_model.parameters(), lr=0.001)
    n_epochs = 200
    batch_size = 128
    
    for epoch in range(n_epochs):
        deep_triplet_model.train()
        # Sample a mini-batch of triplets.
        user_ids, pos_item_ids, neg_item_ids = sample_triplets(pos_data_train, n_users, n_items, num_samples=batch_size)
        user_ids = user_ids.to(device)
        pos_item_ids = pos_item_ids.to(device)
        neg_item_ids = neg_item_ids.to(device)
        
        optimizer.zero_grad()
        loss = deep_triplet_model(user_ids, pos_item_ids, neg_item_ids)
        loss.backward()
        optimizer.step()
        
        # Evaluate using ROC AUC on test data.
        auc = average_roc_auc(deep_match_model, pos_data_test, n_users, n_items)
        print(f"Epoch {epoch+1}/{n_epochs}, Loss: {loss.item():.4f}, Test ROC AUC: {auc:.4f}")

if __name__ == "__main__":
    train_deep_recsys()
train_deep_recsys()

Epoch 1/200, Loss: 0.9948, Test ROC AUC: 0.4898
Epoch 2/200, Loss: 0.9940, Test ROC AUC: 0.5014
Epoch 3/200, Loss: 0.9871, Test ROC AUC: 0.5129
Epoch 4/200, Loss: 0.9852, Test ROC AUC: 0.5224
Epoch 5/200, Loss: 0.9856, Test ROC AUC: 0.5310
Epoch 6/200, Loss: 0.9948, Test ROC AUC: 0.5382
Epoch 7/200, Loss: 0.9829, Test ROC AUC: 0.5440
Epoch 8/200, Loss: 0.9713, Test ROC AUC: 0.5481
Epoch 9/200, Loss: 0.9918, Test ROC AUC: 0.5523
Epoch 10/200, Loss: 0.9765, Test ROC AUC: 0.5569
Epoch 11/200, Loss: 1.0006, Test ROC AUC: 0.5610
Epoch 12/200, Loss: 1.0003, Test ROC AUC: 0.5646
Epoch 13/200, Loss: 0.9410, Test ROC AUC: 0.5680
Epoch 14/200, Loss: 0.9628, Test ROC AUC: 0.5714
Epoch 15/200, Loss: 0.9326, Test ROC AUC: 0.5741
Epoch 16/200, Loss: 0.9345, Test ROC AUC: 0.5767
Epoch 17/200, Loss: 0.9479, Test ROC AUC: 0.5792
Epoch 18/200, Loss: 0.9094, Test ROC AUC: 0.5814
Epoch 19/200, Loss: 0.9754, Test ROC AUC: 0.5836
Epoch 20/200, Loss: 0.9576, Test ROC AUC: 0.5856
Epoch 21/200, Loss: 0.9622, T