# Neural Recommendation with Personalized Attention (NPA)

This notebook implements an Attention-based Neural Recommender System. 
Since the dataset consists only of user-item interactions (no text content), we adapt the NPA concept to use **User Attention over Interaction History**.

## Architecture
1.  **User Embedding**: Represents the user's intrinsic preference.
2.  **Item Embedding**: Represents items.
3.  **Attention Mechanism**: The User Embedding attends to the Item Embeddings in the user's history to create a dynamic user representation.
4.  **Prediction**: Dot product between the dynamic user representation and the target item embedding.

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import time
import random
from tqdm import tqdm

## 1. Data Loading and Preprocessing

In [2]:
def load_data(file_path):
    print("Loading data...")
    user_history = {}
    max_user_id = 0
    max_item_id = 0
    
    with open(file_path, 'r') as f:
        for line in f:
            parts = list(map(int, line.strip().split()))
            if not parts:
                continue
            user_id = parts[0]
            items = parts[1:]
            
            if not items:
                continue
                
            user_history[user_id] = items
            max_user_id = max(max_user_id, user_id)
            max_item_id = max(max_item_id, max(items))
            
    print(f"Loaded {len(user_history)} users. Max User ID: {max_user_id}, Max Item ID: {max_item_id}")
    return user_history, max_user_id, max_item_id

train_file = '/Users/riteshsingh/Documents/SJSU/Recommender System/projectrec/train-2.txt'
user_history, n_users, n_items = load_data(train_file)

# Adjust counts for 0-indexing if needed (IDs seem to be 0-indexed based on previous files)
n_users += 1
n_items += 1

Loading data...
Loaded 52643 users. Max User ID: 52642, Max Item ID: 91604


In [3]:
class RecommenderDataset(Dataset):
    def __init__(self, user_history, n_items, history_len=20, num_negatives=4):
        self.user_history = user_history
        self.n_items = n_items
        self.history_len = history_len
        self.num_negatives = num_negatives
        self.users = list(user_history.keys())
        
        self.samples = []
        self._generate_samples()
        
    def _generate_samples(self):
        print("Generating training samples...")
        for user in tqdm(self.users):
            items = self.user_history[user]
            if len(items) < 2:
                continue
                
            # Leave one out for training target (or use sliding window)
            # Here we use the last item as target, and previous as history
            # To get more data, we can use sliding window
            
            # Simple approach: Use each item in history as a target, with previous items as history
            # Limit to last few items to avoid exploding dataset size
            
            # Let's use the last item as target for validation-like structure,
            # but for training we want to learn from all interactions.
            # Given dataset size, let's sample 5 targets per user if available.
            
            targets = items[-5:] if len(items) > 5 else items[1:]
            
            for i, target in enumerate(targets):
                # History is everything before this target
                # We take the last 'history_len' items
                target_idx = items.index(target)
                history = items[:target_idx]
                
                if not history:
                    continue
                    
                # Pad or truncate history
                if len(history) > self.history_len:
                    hist_seq = history[-self.history_len:]
                else:
                    hist_seq = history + [0] * (self.history_len - len(history))
                
                # Positive sample
                self.samples.append((user, hist_seq, target, 1.0))
                
                # Negative samples
                for _ in range(self.num_negatives):
                    neg = random.randint(0, self.n_items - 1)
                    while neg in items:
                        neg = random.randint(0, self.n_items - 1)
                    self.samples.append((user, hist_seq, neg, 0.0))
                    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        user, history, item, label = self.samples[idx]
        return torch.tensor(user), torch.tensor(history), torch.tensor(item), torch.tensor(label, dtype=torch.float32)

In [4]:
# Hyperparameters
EMBEDDING_DIM = 32
HISTORY_LEN = 20
BATCH_SIZE = 512
EPOCHS = 3
LR = 0.001

dataset = RecommenderDataset(user_history, n_items, history_len=HISTORY_LEN)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

Generating training samples...


100%|██████████| 52643/52643 [00:01<00:00, 48952.16it/s]


## 2. Model Definition (NPA-style)

In [5]:
class NPARecommender(nn.Module):
    def __init__(self, n_users, n_items, embedding_dim, history_len):
        super(NPARecommender, self).__init__()
        self.user_embedding = nn.Embedding(n_users, embedding_dim)
        self.item_embedding = nn.Embedding(n_items, embedding_dim)
        
        # Attention Mechanism
        # Query: User Embedding
        # Key/Value: Item Embedding
        self.attention_fc = nn.Linear(embedding_dim, embedding_dim)
        self.history_len = history_len
        
    def forward(self, user, history, target_item):
        # user: (batch_size)
        # history: (batch_size, history_len)
        # target_item: (batch_size)
        
        u_emb = self.user_embedding(user)  # (batch, dim)
        
        h_emb = self.item_embedding(history) # (batch, history_len, dim)
        
        # Attention Score
        # We want to see which history items are relevant to the user's *current* preference (represented by u_emb)
        # Score = u_emb . h_emb
        
        # Expand u_emb to match history dimension for dot product
        u_emb_exp = u_emb.unsqueeze(1) # (batch, 1, dim)
        
        # Attention scores: (batch, history_len)
        scores = torch.bmm(h_emb, u_emb_exp.transpose(1, 2)).squeeze(2) 
        
        # Mask padding (0 is padding, but 0 is also a valid item ID? 
        # Assuming 0 is valid item, we should have used a special padding token or mask.
        # For simplicity, let's assume item 0 is valid and just let attention learn.
        # Or better, mask 0 if it's padding. Let's assume 0 is padding for now if we shifted IDs.
        # But we didn't shift IDs. Let's ignore masking for simplicity or assume 0 is a valid item.
        
        attn_weights = torch.softmax(scores, dim=1) # (batch, history_len)
        
        # Weighted sum of history items
        # (batch, 1, history_len) x (batch, history_len, dim) -> (batch, 1, dim)
        user_history_vector = torch.bmm(attn_weights.unsqueeze(1), h_emb).squeeze(1)
        
        # Final User Representation: User Embedding + Weighted History
        final_user_vector = u_emb + user_history_vector
        
        # Target Item Embedding
        t_emb = self.item_embedding(target_item) # (batch, dim)
        
        # Prediction
        prediction = (final_user_vector * t_emb).sum(dim=1)
        
        return torch.sigmoid(prediction)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = NPARecommender(n_users, n_items, EMBEDDING_DIM, HISTORY_LEN).to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=LR)

## 3. Training

In [6]:
print("Starting training...")
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    
    for user, history, item, label in tqdm(dataloader):
        user, history, item, label = user.to(device), history.to(device), item.to(device), label.to(device)
        
        optimizer.zero_grad()
        prediction = model(user, history, item)
        loss = criterion(prediction, label)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {total_loss/len(dataloader):.4f}")

Starting training...


100%|██████████| 2571/2571 [00:27<00:00, 92.87it/s]


Epoch 1/3, Loss: 5.1565


100%|██████████| 2571/2571 [00:27<00:00, 92.22it/s]


Epoch 2/3, Loss: 4.0923


100%|██████████| 2571/2571 [00:28<00:00, 91.46it/s]

Epoch 3/3, Loss: 3.3441





## 4. Generate Recommendations

In [8]:
def generate_recommendations(model, user_history, n_users, n_items, output_file, top_k=20):
    print("Generating recommendations...")
    model.eval()
    
    # Pre-compute item embeddings for fast retrieval
    all_items = torch.arange(n_items).to(device)
    item_embeddings = model.item_embedding(all_items) # (n_items, dim)
    
    with open(output_file, 'w') as f:
        with torch.no_grad():
            for user_id in tqdm(range(n_users)):
                if user_id not in user_history:
                    continue
                    
                # Prepare input
                items = user_history[user_id]
                if len(items) > HISTORY_LEN:
                    hist_seq = items[-HISTORY_LEN:]
                else:
                    hist_seq = items + [0] * (HISTORY_LEN - len(items))
                
                user_tensor = torch.tensor([user_id]).to(device)
                hist_tensor = torch.tensor([hist_seq]).to(device)
                
                # Compute User Vector
                u_emb = model.user_embedding(user_tensor)
                h_emb = model.item_embedding(hist_tensor)
                
                u_emb_exp = u_emb.unsqueeze(1)
                scores = torch.bmm(h_emb, u_emb_exp.transpose(1, 2)).squeeze(2)
                attn_weights = torch.softmax(scores, dim=1)
                user_history_vector = torch.bmm(attn_weights.unsqueeze(1), h_emb).squeeze(1)
                final_user_vector = u_emb + user_history_vector # (1, dim)
                
                # Compute scores for all items
                # (1, dim) x (n_items, dim)^T -> (1, n_items)
                item_scores = torch.matmul(final_user_vector, item_embeddings.T).squeeze(0)
                
                # Mask training items
                item_scores[items] = -float('inf')
                
                # Top K
                top_k_scores, top_k_indices = torch.topk(item_scores, top_k)
                recs = top_k_indices.cpu().numpy()
                
                f.write(f"{user_id} {' '.join(map(str, recs))}\n")
                
output_file = '/Users/riteshsingh/Documents/SJSU/Recommender System/projectrec/output_npa.txt'
generate_recommendations(model, user_history, n_users, n_items, output_file)

Generating recommendations...


100%|██████████| 52643/52643 [00:16<00:00, 3181.31it/s]
