## setup


In [13]:
import os
import math
import random
from collections import defaultdict, Counter
from typing import List, Dict, Tuple, Optional

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

# Set seed for reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

# Device configuration
# DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE = torch.device("cpu")
print(f"Using device: {DEVICE}")


Using device: cpu


## data prepocessing


In [None]:
def load_and_preprocess_data(
    ratings_path: str = "data/ratings_small.csv",
    movies_path: str = "data/movies_metadata.csv",
    min_user_interactions: int = 5,
    min_movie_interactions: int = 5
) -> Tuple[pd.DataFrame, pd.DataFrame, Dict[int, int], Dict[int, int], Dict[int, str]]:
    ratings = pd.read_csv(ratings_path)
    print(f"Loaded {len(ratings)} ratings")
    
    movies = pd.read_csv(movies_path, low_memory=False)
    movies = movies[movies['id'].apply(lambda x: str(x).isdigit())]
    movies['id'] = movies['id'].astype(int)
    print(f"Loaded {len(movies)} movies")
    
    user_counts = ratings['userId'].value_counts()
    valid_users = user_counts[user_counts >= min_user_interactions].index
    ratings = ratings[ratings['userId'].isin(valid_users)]
    print(f"After user filtering: {len(ratings)} ratings")
    
    movie_counts = ratings['movieId'].value_counts()
    valid_movies = movie_counts[movie_counts >= min_movie_interactions].index
    ratings = ratings[ratings['movieId'].isin(valid_movies)]
    print(f"After movie filtering: {len(ratings)} ratings")
    
    unique_users = ratings['userId'].unique()
    unique_movies = ratings['movieId'].unique()
    
    user2idx = {user: idx for idx, user in enumerate(unique_users)}
    movie2idx = {movie: idx + 1 for idx, movie in enumerate(unique_movies)}  # 0 - padding
    idx2movie = {idx: movie for movie, idx in movie2idx.items()}
    
    movie_id_to_title = dict(zip(movies['id'], movies['title']))
    idx2movie_title = {}
    for idx, movie_id in idx2movie.items():
        title = movie_id_to_title.get(movie_id, f"Movie_{movie_id}")
        idx2movie_title[idx] = title
    
    print(f"Unique users: {len(unique_users)}")
    print(f"Unique movies: {len(unique_movies)}")
    
    return ratings, movies, user2idx, movie2idx, idx2movie_title


def create_user_sequences(
    ratings: pd.DataFrame,
    user2idx: Dict[int, int],
    movie2idx: Dict[int, int],
    min_rating: float = 3.5
) -> Dict[int, List[int]]:
    print(f"Creating sequences (min_rating={min_rating})...")
    
    positive_ratings = ratings[ratings['rating'] >= min_rating].copy()
    print(f"Positive interactions: {len(positive_ratings)}")
    
    positive_ratings = positive_ratings.sort_values(['userId', 'timestamp'])
    
    user_sequences = defaultdict(list)
    for _, row in tqdm(positive_ratings.iterrows(), total=len(positive_ratings), desc="Creating sequences"):
        user_idx = user2idx[row['userId']]
        movie_idx = movie2idx[row['movieId']]
        user_sequences[user_idx].append(movie_idx)
    
    user_sequences = {k: v for k, v in user_sequences.items() if len(v) >= 3}
    
    print(f"Users with sequences >= 3: {len(user_sequences)}")
    
    seq_lengths = [len(v) for v in user_sequences.values()]
    print(f"Sequence length statistics:")
    print(f"  Min: {min(seq_lengths)}, Max: {max(seq_lengths)}, Mean: {np.mean(seq_lengths):.2f}")
    
    return dict(user_sequences)


## dataset & dataloader


In [None]:
class MovieSequenceDataset(Dataset):
   
    def __init__(
        self,
        user_sequences: Dict[int, List[int]],
        max_seq_len: int = 50,
        num_items: int = None
    ):
        self.max_seq_len = max_seq_len
        self.num_items = num_items
        self.samples = []
        
        for user_idx, sequence in user_sequences.items():
            for i in range(1, len(sequence)):
                input_seq = sequence[max(0, i - max_seq_len):i]
                target = sequence[i]
                self.samples.append((input_seq, target))
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        input_seq, target = self.samples[idx]
        return input_seq, target


def collate_fn(batch):
    """
    Collate function for batching sequences of different lengths.
    Pad to maximum length in batch.
    """
    input_seqs, targets = zip(*batch)
    
    max_len = max(len(seq) for seq in input_seqs)
    
    padded_seqs = []
    attention_masks = []
    
    for seq in input_seqs:
        padding_len = max_len - len(seq)
        padded_seq = [0] * padding_len + list(seq)  # left padding
        mask = [0] * padding_len + [1] * len(seq)
        padded_seqs.append(padded_seq)
        attention_masks.append(mask)
    
    return (
        torch.tensor(padded_seqs, dtype=torch.long),
        torch.tensor(attention_masks, dtype=torch.float),
        torch.tensor(targets, dtype=torch.long)
    )


def split_sequences(
    user_sequences: Dict[int, List[int]],
    val_ratio: float = 0.1,
    test_ratio: float = 0.1
) -> Tuple[Dict[int, List[int]], Dict[int, List[int]], Dict[int, List[int]]]:
    """
    Split sequences into train/val/test.
    For each user, use leave-last-out strategy:
    - Last item -> test
    - Second to last -> val
    - Rest -> train
    """
    train_seqs = {}
    val_seqs = {}
    test_seqs = {}
    
    for user_idx, seq in user_sequences.items():
        if len(seq) >= 3:
            train_seqs[user_idx] = seq[:-2]
            val_seqs[user_idx] = seq[:-1]  # include full history for validation
            test_seqs[user_idx] = seq  # include full history for test
    
    return train_seqs, val_seqs, test_seqs


## transformer decoder model


In [None]:
class PositionalEncoding(nn.Module):
    
    def __init__(self, d_model: int, max_len: int = 1024, dropout: float = 0.1):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)
        
        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        
        pe = torch.zeros(max_len, d_model)
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        
        self.register_buffer('pe', pe)
    
    def forward(self, x):
        """Args: x: Tensor of shape [batch_size, seq_len, d_model]"""
        x = x + self.pe[:x.size(1)]
        return self.dropout(x)


class TransformerRecommender(nn.Module):
    
    def __init__(
        self,
        num_items: int,
        d_model: int = 64,
        nhead: int = 2,
        num_layers: int = 2,
        dim_feedforward: int = 256,
        dropout: float = 0.1,
        max_seq_len: int = 50
    ):
        super().__init__()
        
        self.num_items = num_items
        self.d_model = d_model
        self.max_seq_len = max_seq_len
        
        # item embedding (0 padding)
        self.item_embedding = nn.Embedding(num_items + 1, d_model, padding_idx=0)
        
        # positional encoding (use larger max_len to flex)
        self.pos_encoding = PositionalEncoding(d_model, max_len=1024, dropout=dropout)
        
        # layers
        decoder_layer = nn.TransformerDecoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            batch_first=True
        )
        self.transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=num_layers)
        
        # output layer
        self.output_layer = nn.Linear(d_model, num_items + 1)
        
        self._init_weights()
    
    def _init_weights(self):
        for p in self.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)
    
    def generate_causal_mask(self, seq_len: int) -> torch.Tensor:
        mask = torch.triu(torch.ones(seq_len, seq_len), diagonal=1).bool()
        return mask
    
    def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor = None) -> torch.Tensor:
        batch_size, seq_len = input_ids.shape
        
        x = self.item_embedding(input_ids)
        x = self.pos_encoding(x)
        
        causal_mask = self.generate_causal_mask(seq_len).to(input_ids.device)
        
        padding_mask = (attention_mask == 0) if attention_mask is not None else None
        
        memory = torch.zeros(batch_size, 1, self.d_model, device=input_ids.device)
        output = self.transformer_decoder(
            x, memory, tgt_mask=causal_mask, tgt_key_padding_mask=padding_mask
        )
        
        logits = self.output_layer(output)
        return logits
    
    def predict_next(
        self, input_ids: torch.Tensor, attention_mask: torch.Tensor = None, top_k: int = 10
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        logits = self.forward(input_ids, attention_mask)
        
        last_logits = logits[:, -1, :]
        
        last_logits[:, 0] = float('-inf')
        
        for i in range(input_ids.shape[0]):
            seen_items = input_ids[i][input_ids[i] != 0].tolist()
            last_logits[i, seen_items] = float('-inf')
        
        # top-k
        probs = F.softmax(last_logits, dim=-1)
        top_k_scores, top_k_items = torch.topk(probs, top_k, dim=-1)
        
        return top_k_items, top_k_scores


In [None]:
def save_model(
    model: nn.Module,
    movie2idx: Dict[int, int],
    idx2movie_title: Dict[int, str],
    filepath: str = "model_checkpoint.pt"
):
    checkpoint = {
        'model_state_dict': model.state_dict(),
        'model_config': {
            'num_items': model.num_items,
            'd_model': model.d_model,
            'nhead': model.transformer_decoder.layers[0].self_attn.num_heads,
            'num_layers': len(model.transformer_decoder.layers),
            'dim_feedforward': model.transformer_decoder.layers[0].linear1.out_features,
        },
        'movie2idx': movie2idx,
        'idx2movie_title': idx2movie_title,
    }
    torch.save(checkpoint, filepath)
    print(f"Model saved to '{filepath}'")


def load_model(
    filepath: str = "model_checkpoint.pt",
    device: torch.device = DEVICE
) -> Tuple[nn.Module, Dict[int, int], Dict[int, str]]:
    checkpoint = torch.load(filepath, map_location=device)
    
    # recreate model with saved config
    config = checkpoint['model_config']
    model = TransformerRecommender(
        num_items=config['num_items'],
        d_model=config['d_model'],
        nhead=config['nhead'],
        num_layers=config['num_layers'],
        dim_feedforward=config['dim_feedforward'],
    )
    
    # load weights
    model.load_state_dict(checkpoint['model_state_dict'])
    model = model.to(device)
    model.eval()
    
    print(f"Model loaded from '{filepath}'")
    print(f"  - Number of items: {config['num_items']}")
    print(f"  - Model dimension: {config['d_model']}")
    print(f"  - Number of layers: {config['num_layers']}")
    
    return model, checkpoint['movie2idx'], checkpoint['idx2movie_title']


## baseline models


In [None]:
class PopularityBaseline:    
    def __init__(self, user_sequences: Dict[int, List[int]]):
        all_items = []
        for seq in user_sequences.values():
            all_items.extend(seq)
        
        self.item_counts = Counter(all_items)
        self.popular_items = [item for item, _ in self.item_counts.most_common()]
    
    def predict(self, input_seq: List[int], top_k: int = 10) -> List[int]:
        seen = set(input_seq)
        predictions = [item for item in self.popular_items if item not in seen]
        return predictions[:top_k]


class MarkovBaseline:    
    def __init__(self, user_sequences: Dict[int, List[int]]):
        self.transitions = defaultdict(Counter)
        
        for seq in user_sequences.values():
            for i in range(len(seq) - 1):
                self.transitions[seq[i]][seq[i + 1]] += 1
        
        # popularity as fallback
        all_items = []
        for seq in user_sequences.values():
            all_items.extend(seq)
        self.popular_items = [item for item, _ in Counter(all_items).most_common()]
    
    def predict(self, input_seq: List[int], top_k: int = 10) -> List[int]:
        """Predict based on last movie in sequence."""
        if not input_seq:
            return self.popular_items[:top_k]
        
        last_item = input_seq[-1]
        seen = set(input_seq)
        
        if last_item in self.transitions:
            candidates = self.transitions[last_item].most_common()
            predictions = [item for item, _ in candidates if item not in seen]
            if len(predictions) >= top_k:
                return predictions[:top_k]
            # Fill with popular items
            for item in self.popular_items:
                if item not in seen and item not in predictions:
                    predictions.append(item)
                    if len(predictions) >= top_k:
                        break
            return predictions[:top_k]
        
        # fallback to popularity
        return [item for item in self.popular_items if item not in seen][:top_k]


class RandomBaseline:
    """Random baseline."""
    
    def __init__(self, num_items: int):
        self.num_items = num_items
    
    def predict(self, input_seq: List[int], top_k: int = 10) -> List[int]:
        """Random movie selection (excluding already watched)."""
        seen = set(input_seq)
        available = [i for i in range(1, self.num_items + 1) if i not in seen]
        return random.sample(available, min(top_k, len(available)))


## metrics


In [None]:
def hit_rate_at_k(predictions: List[int], target: int, k: int = 10) -> float:
    return 1.0 if target in predictions[:k] else 0.0


def mrr(predictions: List[int], target: int) -> float:
    try:
        rank = predictions.index(target) + 1
        return 1.0 / rank
    except ValueError:
        return 0.0


def ndcg_at_k(predictions: List[int], target: int, k: int = 10) -> float:
    try:
        rank = predictions[:k].index(target) + 1
        dcg = 1.0 / math.log2(rank + 1)
        idcg = 1.0
        return dcg / idcg
    except ValueError:
        return 0.0


def evaluate_model(
    model: nn.Module,
    test_sequences: Dict[int, List[int]],
    top_k: int = 10,
    max_seq_len: int = 50,
    device: torch.device = DEVICE
) -> Dict[str, float]:
    """Evaluate model on test data."""
    model.eval()
    
    hr_scores, mrr_scores, ndcg_scores = [], [], []
    
    with torch.no_grad():
        for user_idx, seq in tqdm(test_sequences.items(), desc="Evaluating model"):
            if len(seq) < 2:
                continue
            
            input_seq = seq[:-1]
            target = seq[-1]
            
            # truncate to max_seq_len
            if len(input_seq) > max_seq_len:
                input_seq = input_seq[-max_seq_len:]
            
            input_tensor = torch.tensor([input_seq], dtype=torch.long, device=device)
            attention_mask = torch.ones_like(input_tensor, dtype=torch.float)
            
            top_k_items, _ = model.predict_next(input_tensor, attention_mask, top_k=top_k)
            predictions = top_k_items[0].cpu().tolist()
            
            hr_scores.append(hit_rate_at_k(predictions, target, top_k))
            mrr_scores.append(mrr(predictions, target))
            ndcg_scores.append(ndcg_at_k(predictions, target, top_k))
    
    return {
        f'HR@{top_k}': np.mean(hr_scores),
        'MRR': np.mean(mrr_scores),
        f'NDCG@{top_k}': np.mean(ndcg_scores)
    }


def evaluate_baseline(
    baseline,
    test_sequences: Dict[int, List[int]],
    top_k: int = 10
) -> Dict[str, float]:
    """Evaluate baseline model on test data."""
    hr_scores, mrr_scores, ndcg_scores = [], [], []
    
    for user_idx, seq in tqdm(test_sequences.items(), desc="Evaluating baseline"):
        if len(seq) < 2:
            continue
        
        input_seq = seq[:-1]
        target = seq[-1]
        
        predictions = baseline.predict(input_seq, top_k=top_k)
        
        hr_scores.append(hit_rate_at_k(predictions, target, top_k))
        mrr_scores.append(mrr(predictions, target))
        ndcg_scores.append(ndcg_at_k(predictions, target, top_k))
    
    return {
        f'HR@{top_k}': np.mean(hr_scores),
        'MRR': np.mean(mrr_scores),
        f'NDCG@{top_k}': np.mean(ndcg_scores)
    }


## training


In [None]:
def train_model(
    model: nn.Module,
    train_loader: DataLoader,
    val_sequences: Dict[int, List[int]],
    num_epochs: int = 10,
    lr: float = 0.001,
    device: torch.device = DEVICE,
    patience: int = 3,
    max_seq_len: int = 50
) -> nn.Module:
    """Train model with early stopping."""
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss(ignore_index=0)
    
    best_val_hr = 0
    patience_counter = 0
    
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        num_batches = 0
        
        pbar = tqdm(train_loader, desc=f"Epoch {epoch + 1}/{num_epochs}")
        for batch in pbar:
            input_ids, attention_mask, targets = batch
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            targets = targets.to(device)
            
            optimizer.zero_grad()
            
            logits = model(input_ids, attention_mask)
            last_logits = logits[:, -1, :]
            
            loss = criterion(last_logits, targets)
            loss.backward()
            
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            
            total_loss += loss.item()
            num_batches += 1
            pbar.set_postfix({'loss': total_loss / num_batches})
        
        avg_loss = total_loss / num_batches
        print(f"Epoch {epoch + 1}: Train Loss = {avg_loss:.4f}")
        
        # validation
        val_metrics = evaluate_model(model, val_sequences, top_k=10, max_seq_len=max_seq_len, device=device)
        print(f"Validation: HR@10 = {val_metrics['HR@10']:.4f}, MRR = {val_metrics['MRR']:.4f}, NDCG@10 = {val_metrics['NDCG@10']:.4f}")
        
        # early stopping
        if val_metrics['HR@10'] > best_val_hr:
            best_val_hr = val_metrics['HR@10']
            patience_counter = 0
            torch.save(model.state_dict(), 'best_model.pt')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch + 1}")
                break
    
    # load best model
    model.load_state_dict(torch.load('best_model.pt'))
    return model


In [None]:
def demonstrate_recommendations(
    model: nn.Module,
    test_sequences: Dict[int, List[int]],
    idx2movie_title: Dict[int, str],
    num_examples: int = 5,
    top_k: int = 5,
    max_seq_len: int = 50,
    device: torch.device = DEVICE
):
    model.eval()
    
    print("\n" + "=" * 80)
    print("RECOMMENDATION DEMONSTRATION")
    print("=" * 80)
    
    users = list(test_sequences.keys())
    sample_users = random.sample(users, min(num_examples, len(users)))
    
    with torch.no_grad():
        for user_idx in sample_users:
            seq = test_sequences[user_idx]
            if len(seq) < 3:
                continue
            
            input_seq = seq[:-1]
            target = seq[-1]
            
            print(f"\n{'─' * 60}")
            print(f"User {user_idx}")
            print(f"Watch history ({len(input_seq)} movies):")
            for i, movie_idx in enumerate(input_seq[-5:], 1):
                title = idx2movie_title.get(movie_idx, f"Movie_{movie_idx}")
                print(f"  {i}. {title}")
            
            if len(input_seq) > 5:
                print(f"  ... and {len(input_seq) - 5} more movies")
            
            if len(input_seq) > max_seq_len:
                input_seq = input_seq[-max_seq_len:]
            
            input_tensor = torch.tensor([input_seq], dtype=torch.long, device=device)
            attention_mask = torch.ones_like(input_tensor, dtype=torch.float)
            top_k_items, top_k_scores = model.predict_next(input_tensor, attention_mask, top_k=top_k)
            
            predictions = top_k_items[0].cpu().tolist()
            scores = top_k_scores[0].cpu().tolist()
            
            print(f"\nModel recommendations:")
            for i, (movie_idx, score) in enumerate(zip(predictions, scores), 1):
                title = idx2movie_title.get(movie_idx, f"Movie_{movie_idx}")
                hit_marker = "✓" if movie_idx == target else " "
                print(f"  {i}. {hit_marker} {title} (score: {score:.4f})")
            
            target_title = idx2movie_title.get(target, f"Movie_{target}")
            print(f"\nActual next movie: {target_title}")
            if target in predictions:
                rank = predictions.index(target) + 1
                print(f"→ Position in recommendations: {rank}")
            else:
                print(f"→ Not in top-{top_k}")


## prepare data


In [None]:
ratings, movies, user2idx, movie2idx, idx2movie_title = load_and_preprocess_data()
num_items = len(movie2idx)

user_sequences = create_user_sequences(ratings, user2idx, movie2idx)

train_seqs, val_seqs, test_seqs = split_sequences(user_sequences)
print(f"\nDataset sizes:")
print(f"  Train: {len(train_seqs)} users")
print(f"  Val: {len(val_seqs)} users")
print(f"  Test: {len(test_seqs)} users")

train_dataset = MovieSequenceDataset(train_seqs, max_seq_len=50, num_items=num_items)
train_loader = DataLoader(
    train_dataset,
    batch_size=64,
    shuffle=True,
    collate_fn=collate_fn,
    num_workers=0
)
print(f"\nTraining samples: {len(train_dataset)}")


Loading data...
Loaded 100004 ratings
Loaded 45463 movies
After user filtering: 100004 ratings
After movie filtering: 90072 ratings
Unique users: 671
Unique movies: 3496
Creating sequences (min_rating=3.5)...
Positive interactions: 56892


Creating sequences: 100%|██████████| 56892/56892 [00:02<00:00, 22979.92it/s]


Users with sequences >= 3: 669
Sequence length statistics:
  Min: 3, Max: 913, Mean: 85.04

Dataset sizes:
  Train: 669 users
  Val: 669 users
  Test: 669 users

Training samples: 54882


## train


In [None]:

model = TransformerRecommender(
    num_items=num_items,
    d_model=64,
    nhead=2,
    num_layers=2,
    dim_feedforward=256,
    dropout=0.1,
    max_seq_len=50
)

print("Model Architecture:")
print(f"  Number of items: {num_items}")
print(f"  Embedding dimension: 64")
print(f"  Number of heads: 2")
print(f"  Number of layers: 2")
print(f"  FFN dimension: 256")

total_params = sum(p.numel() for p in model.parameters())
print(f"  Total parameters: {total_params:,}")

MAX_SEQ_LEN = 50

model = train_model(
    model,
    train_loader,
    val_seqs,
    num_epochs=15,
    lr=0.001,
    device=DEVICE,
    patience=3,
    max_seq_len=MAX_SEQ_LEN
)


Model Architecture:
  Number of items: 3496
  Embedding dimension: 64
  Number of heads: 2
  Number of layers: 2
  FFN dimension: 256
  Total parameters: 584,617


Epoch 1/15:   0%|          | 0/858 [00:00<?, ?it/s]

Epoch 1/15: 100%|██████████| 858/858 [03:33<00:00,  4.03it/s, loss=7.55]


Epoch 1: Train Loss = 7.5479


Evaluating model: 100%|██████████| 669/669 [00:03<00:00, 186.04it/s]


Validation: HR@10 = 0.0224, MRR = 0.0039, NDCG@10 = 0.0081


Epoch 2/15: 100%|██████████| 858/858 [04:35<00:00,  3.12it/s, loss=7.3] 


Epoch 2: Train Loss = 7.2976


Evaluating model: 100%|██████████| 669/669 [00:07<00:00, 92.27it/s] 


Validation: HR@10 = 0.0314, MRR = 0.0173, NDCG@10 = 0.0205


Epoch 3/15: 100%|██████████| 858/858 [05:30<00:00,  2.59it/s, loss=7.2] 


Epoch 3: Train Loss = 7.2004


Evaluating model: 100%|██████████| 669/669 [00:06<00:00, 102.80it/s]


Validation: HR@10 = 0.0314, MRR = 0.0155, NDCG@10 = 0.0192


Epoch 4/15:  43%|████▎     | 371/858 [01:28<01:44,  4.66it/s, loss=7.07]

In [None]:

save_model(model, movie2idx, idx2movie_title, "model_checkpoint.pt")


## init baseline models


In [None]:
popularity_baseline = PopularityBaseline(train_seqs)
markov_baseline = MarkovBaseline(train_seqs)
random_baseline = RandomBaseline(num_items)

## evaulate all


In [None]:
# Evaluate all models
results = {}

print("Evaluating Transformer model...")
transformer_metrics = evaluate_model(model, test_seqs, top_k=10, max_seq_len=MAX_SEQ_LEN, device=DEVICE)
results['Transformer'] = transformer_metrics

print("\nEvaluating Popularity Baseline...")
popularity_metrics = evaluate_baseline(popularity_baseline, test_seqs, top_k=10)
results['Popularity'] = popularity_metrics

print("\nEvaluating Markov Baseline...")
markov_metrics = evaluate_baseline(markov_baseline, test_seqs, top_k=10)
results['Markov'] = markov_metrics

print("\nEvaluating Random Baseline...")
random_metrics = evaluate_baseline(random_baseline, test_seqs, top_k=10)
results['Random'] = random_metrics


## results


In [None]:
print("=" * 60)
print("RESULTS COMPARISON")
print("=" * 60)

print(f"\n{'Model':<20} {'HR@10':<12} {'MRR':<12} {'NDCG@10':<12}")
print("-" * 56)

for model_name, metrics in results.items():
    print(f"{model_name:<20} {metrics['HR@10']:<12.4f} {metrics['MRR']:<12.4f} {metrics['NDCG@10']:<12.4f}")

results_df = pd.DataFrame(results).T
print("\n")
display(results_df.round(4))

results_df.to_csv('results.csv')
print("\nResults saved to 'results.csv'")


## demostration


In [None]:
demonstrate_recommendations(
    model, 
    test_seqs, 
    idx2movie_title, 
    num_examples=5, 
    top_k=5, 
    max_seq_len=MAX_SEQ_LEN,
    device=DEVICE
)


In [None]:
loaded_model, loaded_movie2idx, loaded_idx2movie_title = load_model("model_checkpoint.pt")

test_metrics = evaluate_model(loaded_model, test_seqs, top_k=10, max_seq_len=MAX_SEQ_LEN, device=DEVICE)
print(f"Loaded model - HR@10: {test_metrics['HR@10']:.4f}, MRR: {test_metrics['MRR']:.4f}")
