In [1]:
import pickle
import pandas as pd

In [2]:
train_data = pd.read_pickle('/data/sukhanna/cse258/processed_100/train_data.pkl')
val_data = pd.read_pickle('/data/sukhanna/cse258/processed_100/val_data.pkl')
test_data = pd.read_pickle('/data/sukhanna/cse258/processed_100/test_data.pkl')

id2item = pd.read_pickle('/data/sukhanna/cse258/processed_100/id2item.pkl')
item2id = pd.read_pickle('/data/sukhanna/cse258/processed_100/item2id.pkl')

id2user = pd.read_pickle('/data/sukhanna/cse258/processed_100/id2user.pkl')
user2id = pd.read_pickle('/data/sukhanna/cse258/processed_100/user2id.pkl')


In [3]:
train_data[8551]

{'sequence': [{'item_id': 239726, 'timestamp': 1373155244.0, 'rating': 4.0},
  {'item_id': 100927, 'timestamp': 1398238497.0, 'rating': 5.0},
  {'item_id': 239727, 'timestamp': 1447851667.0, 'rating': 5.0}],
 'is_autoregressive': True}

In [4]:
val_data[8551]

{'sequence': [{'item_id': 239726, 'timestamp': 1373155244.0, 'rating': 4.0},
  {'item_id': 100927, 'timestamp': 1398238497.0, 'rating': 5.0},
  {'item_id': 239727, 'timestamp': 1447851667.0, 'rating': 5.0}],
 'target': {'item_id': 239728, 'timestamp': 1448406034.0, 'rating': 4.0},
 'is_autoregressive': False}

In [5]:
test_data[8551]

{'sequence': [{'item_id': 239726, 'timestamp': 1373155244.0, 'rating': 4.0},
  {'item_id': 100927, 'timestamp': 1398238497.0, 'rating': 5.0},
  {'item_id': 239727, 'timestamp': 1447851667.0, 'rating': 5.0},
  {'item_id': 239728, 'timestamp': 1448406034.0, 'rating': 4.0}],
 'target': {'item_id': 135537, 'timestamp': 1569107616.096, 'rating': 4.0},
 'is_autoregressive': False}

In [None]:
import torch
from torch.utils.data import Dataset
import random
import numpy as np

class BPRDataset(Dataset):
    def __init__(self, train_data, val_data, test_data, num_items):
        """
        Args:
            train_data: List of dicts containing 'sequence' (list of dicts with 'item_id').
            val_data: List of dicts containing 'target' ('item_id').
            test_data: List of dicts containing 'target' ('item_id').
            num_items: Total count of items in catalog (e.g., 349,000).
                       Assumes IDs range from 1 to num_items.
        """
        self.num_items = num_items
        
        # Pre-process data into simple sets for O(1) lookups
        self.user_history = []
        self.exclusion_sets = []
        self.valid_user_indices = []

        # Assuming train_data, val_data, test_data are aligned by list index (User ID)
        for u_idx in range(len(train_data)):
            # 1. Extract Train Sequence
            # The data structure is nested: entry['sequence'] -> list of dicts -> 'item_id'
            train_seq = [x['item_id'] for x in train_data[u_idx]['sequence']]
            
            # 2. Extract Validation and Test Targets
            val_target = val_data[u_idx]['target']['item_id']
            test_target = test_data[u_idx]['target']['item_id']
            
            # 3. Store Positive History for Training (only train_seq matters for BPR positive sampling)
            # We filter out users with empty sequences to prevent errors
            if len(train_seq) > 0:
                self.user_history.append(train_seq)
                
                # 4. Build Exclusion Set (Train + Val + Test)
                # These are items we CANNOT use as negatives
                exclude = set(train_seq)
                exclude.add(val_target)
                exclude.add(test_target)
                self.exclusion_sets.append(exclude)
                
                # Keep track of original user index if needed, though BPR usually learns 
                # user_id based on the row index of this dataset
                self.valid_user_indices.append(u_idx)

    def __len__(self):
        return len(self.user_history)

    def __getitem__(self, idx):
        """
        Returns:
            user_id: (0 to N-1)
            pos_item: (0 to M-1)
            neg_item: (0 to M-1)
        """
        # 1. User ID (mapped to 0-index)
        # We use the index of the dataset as the user_id for embedding lookup
        user_id = idx 
        
        # 2. Positive Sampling
        # Randomly select one item from the user's training history
        pos_id_raw = random.choice(self.user_history[idx])
        
        # 3. Negative Sampling with Exclusion
        # Randomly sample until we find an item NOT in the exclusion set
        while True:
            # Sample from 1 to num_items (inclusive)
            neg_id_raw = random.randint(1, self.num_items)
            
            if neg_id_raw not in self.exclusion_sets[idx]:
                break
        
        # 4. Convert to 0-based indexing for PyTorch Embedding Layers
        # Input IDs are 1-based, so we subtract 1.
        return (
            torch.tensor(user_id, dtype=torch.long),
            torch.tensor(pos_id_raw - 1, dtype=torch.long),
            torch.tensor(neg_id_raw - 1, dtype=torch.long)
        )

# class BPRDataset(Dataset):
#     def __init__(self, train_data, val_data, test_data, num_items, n_neg=1): # <--- Added n_neg
#         # ... (previous init code is same) ...
#         self.n_neg = n_neg # Store it

#     def __getitem__(self, idx):
#         user_id = idx 
#         pos_id_raw = random.choice(self.user_history[idx])
        
#         # --- MODIFIED BLOCK START ---
#         neg_samples = []
#         for _ in range(self.n_neg): # Loop N times
#             while True:
#                 neg_id_raw = random.randint(1, self.num_items)
#                 if neg_id_raw not in self.exclusion_sets[idx]:
#                     neg_samples.append(neg_id_raw - 1)
#                     break
#         # --- MODIFIED BLOCK END ---

#         return (
#             torch.tensor(user_id, dtype=torch.long),
#             torch.tensor(pos_id_raw - 1, dtype=torch.long),
#             torch.tensor(neg_samples, dtype=torch.long) # Shape: [n_neg]
#         )

In [None]:
import torch
import torch.nn as nn

class BPRMF(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=64):
        """
        Args:
            num_users: Total number of unique users.
            num_items: Total number of unique items.
            embedding_dim: Size of the latent vectors (e.g., 32, 64, 128).
        """
        super(BPRMF, self).__init__()
        
        # 1. User Embeddings
        # Shape: [num_users, embedding_dim]
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        
        # 2. Item Embeddings
        # Shape: [num_items, embedding_dim]
        self.item_embedding = nn.Embedding(num_items, embedding_dim)
        
        # Initialization (Critical for BPR convergence)
        # We initialize with small random values (Normal distribution)
        nn.init.normal_(self.user_embedding.weight, std=0.01)
        nn.init.normal_(self.item_embedding.weight, std=0.01)

    def forward(self, user_indices, item_indices):
        """
        Computes the compatibility score (dot product) between users and items.
        
        Args:
            user_indices: Tensor of shape [batch_size]
            item_indices: Tensor of shape [batch_size]
            
        Returns:
            scores: Tensor of shape [batch_size]
        """
        # Look up latent vectors
        # user_vec: [batch_size, embedding_dim]
        # item_vec: [batch_size, embedding_dim]
        user_vec = self.user_embedding(user_indices)
        item_vec = self.item_embedding(item_indices)
        
        # Compute Dot Product
        # Multiply element-wise and sum across the embedding dimension (dim=1)
        scores = (user_vec * item_vec).sum(dim=1)
        
        return scores
        

In [24]:
import torch
import numpy as np
import random
import math

class BPREvaluator:
    def __init__(self, eval_data, exclusion_rules, num_items, k_list=[5, 10]):
        """
        Args:
            eval_data: List of dicts (Validation or Test data).
            exclusion_rules: Dict {user_idx: set(all_positive_items)}.
            num_items: Total catalog size (for random negative sampling).
            k_list: List of K values for metrics (e.g., [5, 10]).
        """
        self.eval_data = eval_data
        self.exclusion_rules = exclusion_rules
        self.num_items = num_items
        self.k_list = k_list
    
    def evaluate(self, model, device='cpu'):
        model.eval()  # Switch model to evaluation mode
        
        # Accumulators for metrics
        hr_results = {k: [] for k in self.k_list}
        ndcg_results = {k: [] for k in self.k_list}
        
        # We iterate through each user in the evaluation set
        with torch.no_grad():
            for u_idx, entry in self.eval_data.items():
                
                # 1. Get Ground Truth (Target)
                # Ensure we handle the 1-based indexing -> 0-based conversion
                gt_item_raw = entry['target']['item_id']
                gt_item = gt_item_raw - 1
                
                # 2. Sample 100 Negatives
                # These must NOT be in the exclusion set (Train + Val + Test)
                negatives = []
                u_exclusion = self.exclusion_rules[u_idx]
                
                while len(negatives) < 100:
                    neg_candidate = random.randint(1, self.num_items)
                    
                    # Check exclusion and duplication within the current batch of 100
                    if (neg_candidate not in u_exclusion) and (neg_candidate - 1 != gt_item):
                        # Add to list (converting to 0-based index)
                        negatives.append(neg_candidate - 1)
                
                # 3. Prepare Batch for Model (1 GT + 100 Negatives)
                # Candidate Items: [GT, Neg1, Neg2, ..., Neg100]
                candidate_items = [gt_item] + negatives
                candidate_tensor = torch.tensor(candidate_items, dtype=torch.long).to(device)
                
                # User Tensor: Repeat the user ID 101 times
                user_tensor = torch.tensor([u_idx] * 101, dtype=torch.long).to(device)
                
                # 4. Score Items
                scores = model(user_tensor, candidate_tensor)
                scores = scores.cpu().numpy()
                
                # 5. Rank
                # The Ground Truth is at index 0. We need to see where it lands.
                # argsort gives indices that sort the array. 
                # We want descending sort.
                ranked_indices = np.argsort(-scores) # "-" for descending
                
                # Find where the GT (index 0) ended up in the sorted list
                # np.where returns a tuple, we take the first element
                gt_rank = np.where(ranked_indices == 0)[0][0]
                
                # gt_rank is 0-indexed (0 means 1st place, 1 means 2nd place)
                
                # 6. Calculate Metrics per User
                for k in self.k_list:
                    # Hit Rate
                    if gt_rank < k:
                        hr_results[k].append(1)
                        # NDCG: 1 / log2(rank + 2)
                        # rank+2 because rank is 0-based. 
                        # If rank=0 (1st), log2(2)=1 -> NDCG=1.
                        ndcg_results[k].append(1 / math.log2(gt_rank + 2))
                    else:
                        hr_results[k].append(0)
                        ndcg_results[k].append(0)

        # Average the results
        avg_hr = {k: np.mean(v) for k, v in hr_results.items()}
        avg_ndcg = {k: np.mean(v) for k, v in ndcg_results.items()}
        
        return avg_hr, avg_ndcg

In [25]:
# 1. Define Catalog Size
# We need the total count to know the range for random sampling (1 to num_items)
num_items_total = len(item2id)

# 2. Create the Training Dataset
# This will also pre-compute the 'exclusion_sets' (Train+Val+Test) for every user
train_dataset = BPRDataset(
    train_data=train_data, 
    val_data=val_data, 
    test_data=test_data, 
    num_items=num_items_total
)

# 3. Extract Exclusion Rules
# We need these rules so the Evaluator doesn't accidentally sample 
# known positives (history) as "negatives" during the ranking test.
exclusion_rules = train_dataset.exclusion_sets

# 4. Create the Validation Evaluator
# We use this to check HR@1 and HR@10 at the end of every epoch
val_evaluator = BPREvaluator(
    eval_data=val_data, 
    exclusion_rules=exclusion_rules, 
    num_items=num_items_total, 
    k_list=[1, 10]
)

In [None]:
from torch.utils.data import DataLoader
import torch.optim as optim
from tqdm import tqdm
import torch

# --- Hyperparameters ---
BATCH_SIZE = 128      # Reasonably small for 1 GPU
LEARNING_RATE = 0.001 # Standard starting point for Adam
NUM_EPOCHS = 10
EMBEDDING_DIM = 64
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Device selected: {DEVICE}")

# --- Setup Model & Data ---
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

model = BPRMF(num_users=len(train_data), num_items=len(item2id), embedding_dim=EMBEDDING_DIM)
model.to(DEVICE)

optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# --- Training Loop ---
print("Starting Training...")
print("=" * 60)

for epoch in range(1, NUM_EPOCHS + 1):
    
    # 1. Training Phase
    model.train()
    total_loss = 0.0
    
    # Tqdm progress bar for the batch loop
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch}/{NUM_EPOCHS}", leave=False)
    
    for user_ids, pos_items, neg_items in progress_bar:
        # Move data to GPU
        user_ids = user_ids.to(DEVICE)
        pos_items = pos_items.to(DEVICE)
        neg_items = neg_items.to(DEVICE)
        
        # Clear gradients
        optimizer.zero_grad()
        
        # Forward Pass
        # We calculate scores for (User, Positive) and (User, Negative)
        pos_scores = model(user_ids, pos_items)
        neg_scores = model(user_ids, neg_items)

        # # Unpack data
        # # neg_items shape: [Batch_Size, N_Neg]
        # batch_size = user_ids.size(0)
        # n_neg = neg_items.size(1) 

        # # 1. Flatten Negatives to fit into Model
        # # We effectively treat these as (Batch * N) separate pairs
        # flat_users = user_ids.repeat_interleave(n_neg) # Shape: [Batch * N]
        # flat_negs = neg_items.view(-1)                 # Shape: [Batch * N]

        # # 2. Compute Scores
        # pos_scores = model(user_ids, pos_items)        # Shape: [Batch]
        # flat_neg_scores = model(flat_users, flat_negs) # Shape: [Batch * N]

        # # 3. Reshape Back for Loss
        # neg_scores = flat_neg_scores.view(batch_size, n_neg) # Shape: [Batch, N]

        # # 4. Calculate Loss with Broadcasting
        # # pos_scores: [Batch] -> Unsqueeze to [Batch, 1] to broadcast against [Batch, N]
        # loss = -torch.mean(torch.nn.functional.logsigmoid(pos_scores.unsqueeze(1) - neg_scores))


        # BPR Loss Calculation
        # Loss = - sum( log( sigmoid( pos_score - neg_score ) ) )
        # We assume optimization minimizes loss, so we take negative log likelihood
        loss = -torch.mean(torch.nn.functional.logsigmoid(pos_scores - neg_scores))
        
        # Backward Pass
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
        # Update progress bar
        progress_bar.set_postfix({'loss': loss.item()})
        
    avg_loss = total_loss / len(train_loader)
    
    # 2. Evaluation Phase
    # We evaluate on the Validation set at the end of every epoch
    hr_metrics, ndcg_metrics = val_evaluator.evaluate(model, device=DEVICE)
    
    # 3. Reporting
    print(f"Epoch {epoch:02d} Completed")
    print("-" * 30)
    print(f"  Training Loss: {avg_loss:.4f}")
    print(f"  Validation HR@1:   {hr_metrics[1]:.4f} | HR@10:   {hr_metrics[10]:.4f}")
    print(f"  Validation NDCG@1: {ndcg_metrics[1]:.4f} | NDCG@10: {ndcg_metrics[10]:.4f}")
    print("=" * 60)

# --- Save Model ---
save_path = "/data/sukhanna/cse258/bpr_mf_model.pth"
torch.save(model.state_dict(), save_path)
print(f"\nTraining Complete. Model saved to: {save_path}")

Device selected: cuda
Starting Training...


                                                                          

Epoch 01 Completed
------------------------------
  Training Loss: 0.6931
  Validation HR@1:   0.0256 | HR@10:   0.1495
  Validation NDCG@1: 0.0256 | NDCG@10: 0.0769


                                                                          

Epoch 02 Completed
------------------------------
  Training Loss: 0.6915
  Validation HR@1:   0.0360 | HR@10:   0.1671
  Validation NDCG@1: 0.0360 | NDCG@10: 0.0903


                                                                          

Epoch 03 Completed
------------------------------
  Training Loss: 0.6858
  Validation HR@1:   0.0471 | HR@10:   0.1747
  Validation NDCG@1: 0.0471 | NDCG@10: 0.1006


                                                                          

Epoch 04 Completed
------------------------------
  Training Loss: 0.6755
  Validation HR@1:   0.0547 | HR@10:   0.1816
  Validation NDCG@1: 0.0547 | NDCG@10: 0.1082


                                                                          

Epoch 05 Completed
------------------------------
  Training Loss: 0.6605
  Validation HR@1:   0.0613 | HR@10:   0.1893
  Validation NDCG@1: 0.0613 | NDCG@10: 0.1159


                                                                          

Epoch 06 Completed
------------------------------
  Training Loss: 0.6411
  Validation HR@1:   0.0675 | HR@10:   0.1958
  Validation NDCG@1: 0.0675 | NDCG@10: 0.1227


                                                                          

Epoch 07 Completed
------------------------------
  Training Loss: 0.6191
  Validation HR@1:   0.0730 | HR@10:   0.2028
  Validation NDCG@1: 0.0730 | NDCG@10: 0.1290


                                                                          

Epoch 08 Completed
------------------------------
  Training Loss: 0.5942
  Validation HR@1:   0.0766 | HR@10:   0.2099
  Validation NDCG@1: 0.0766 | NDCG@10: 0.1343


                                                                          

Epoch 09 Completed
------------------------------
  Training Loss: 0.5674
  Validation HR@1:   0.0797 | HR@10:   0.2152
  Validation NDCG@1: 0.0797 | NDCG@10: 0.1387


                                                                           

Epoch 10 Completed
------------------------------
  Training Loss: 0.5408
  Validation HR@1:   0.0834 | HR@10:   0.2201
  Validation NDCG@1: 0.0834 | NDCG@10: 0.1432

Training Complete. Model saved to: /data/sukhanna/cse258/bpr_mf_model.pth


In [27]:
# 1. Instantiate the Test Evaluator
# We use the SAME exclusion_rules from the training dataset.
# This ensures we don't sample the Test Target (or Train/Val items) as negatives.
test_evaluator = BPREvaluator(
    eval_data=test_data, 
    exclusion_rules=train_dataset.exclusion_sets, 
    num_items=len(item2id), 
    k_list=[1, 10]
)

# 2. Run Evaluation
print("Running Test Evaluation...")
test_hr, test_ndcg = test_evaluator.evaluate(model, device=DEVICE)

# 3. Report Results
print("-" * 30)
print(f"Test HR@1:   {test_hr[1]:.4f} | HR@10:   {test_hr[10]:.4f}")
print(f"Test NDCG@1: {test_ndcg[1]:.4f} | NDCG@10: {test_ndcg[10]:.4f}")
print("-" * 30)

Running Test Evaluation...
------------------------------
Test HR@1:   0.0691 | HR@10:   0.2007
Test NDCG@1: 0.0691 | NDCG@10: 0.1261
------------------------------
