# NCF with Focal Loss - ML-1M Validation
This notebook validates the implementation of Neural Collaborative Filtering (NeuMF) with Focal Loss.

**Paper**: "Addressing Class Imbalance in NCF with Focal Loss" (AAMAS 2025)

**Objective**: Compare NeuMF trained with BCE vs Focal Loss on MovieLens 1M dataset.

**Success Criteria**:
1. Both models train without errors
2. HR@10 > 0.5 (reasonable performance)
3. Focal Loss performs >= BCE
4. Proper convergence curves

## Cell 1: Setup & Dependencies

In [None]:
# ============================================
# CELL 1: Install Dependencies
# ============================================
# Run this cell, then RESTART the runtime before continuing!

!pip uninstall numpy -y
!pip install numpy==1.26.4
!pip install recbole==1.2.0
!pip install kmeans-pytorch

# Verify numpy version
import numpy as np
print(f"\nNumPy version: {np.__version__}")
if np.__version__.startswith("2."):
    print("ERROR: NumPy 2.x detected! Please RESTART the runtime now.")
    print("Go to: Runtime -> Restart session")
else:
    print("NumPy version OK. You can continue to the next cell.")

Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
Collecting numpy==1.26.4
  Using cached numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Using cached numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.0 MB)
Installing collected packages: numpy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
jax 0.7.2 requires numpy>=2.0, but you have numpy 1.26.4 which is incompatible.
jaxlib 0.7.2 requires numpy>=2.0, but you have numpy 1.26.4 which is incompatible.
opencv-python-headless 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
shap 0.50.0 requires numpy>=2, but you have numpy 1.26.4 which is incompatible.
pytensor 2.35.1 requires numpy>=2.0, but you have numpy 1.26.4 which is in


NumPy version: 1.26.4
NumPy version OK. You can continue to the next cell.


In [None]:
# ============================================
# CELL 2: Imports
# ============================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
from collections import defaultdict
import os
import logging

# Fix for PyTorch 2.6+ (only patch once)
if not hasattr(torch, '_load_patched'):
    _original_torch_load = torch.load
    def _patched_torch_load(*args, **kwargs):
        if 'weights_only' not in kwargs:
            kwargs['weights_only'] = False
        return _original_torch_load(*args, **kwargs)
    torch.load = _patched_torch_load
    torch._load_patched = True

# RecBole imports
from recbole.quick_start import run_recbole
from recbole.model.general_recommender.neumf import NeuMF
from recbole.config import Config
from recbole.data import create_dataset, data_preparation
from recbole.trainer import Trainer
from recbole.utils import init_seed, init_logger

device = torch.device('cuda')
print(f"Using: {torch.cuda.get_device_name(0)}")

ModuleNotFoundError: No module named 'ray'

## Cell 2: Custom Focal Loss Implementation

Focal Loss formula: $FL(p_t) = -\alpha_t (1-p_t)^\gamma \log(p_t)$

Where:
- $p_t$ = model's estimated probability for the ground-truth class
- $\gamma$ = focusing parameter (default: 2.0)
- $\alpha$ = class balancing weight (default: 0.25)

In [None]:
# ============================================
# CELL 3: Focal Loss Implementation
# ============================================
class FocalLoss(nn.Module):
    """
    Focal Loss for addressing class imbalance in recommendation systems.

    Reference: Lin et al., "Focal Loss for Dense Object Detection", ICCV 2017

    Args:
        gamma (float): Focusing parameter. Higher values down-weight easy examples more.
                      gamma=0 reduces to standard BCE. Default: 2.0
        alpha (float): Class balancing weight for positive class. Default: 0.25
        reduction (str): 'mean', 'sum', or 'none'. Default: 'mean'
    """

    def __init__(self, gamma=2.0, alpha=0.25, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        self.reduction = reduction

    def forward(self, inputs, targets):
        """
        Args:
            inputs: Predicted probabilities (after sigmoid), shape [batch_size]
            targets: Ground truth labels (0 or 1), shape [batch_size]

        Returns:
            Focal loss value
        """
        # Clamp for numerical stability
        inputs = torch.clamp(inputs, min=1e-7, max=1-1e-7)

        # Calculate p_t (probability of true class)
        # p_t = p if y=1, else 1-p
        p_t = targets * inputs + (1 - targets) * (1 - inputs)

        # Calculate alpha_t (class weight)
        # alpha_t = alpha if y=1, else 1-alpha
        alpha_t = targets * self.alpha + (1 - targets) * (1 - self.alpha)

        # Focal loss: -alpha_t * (1 - p_t)^gamma * log(p_t)
        focal_weight = alpha_t * torch.pow(1 - p_t, self.gamma)
        focal_loss = -focal_weight * torch.log(p_t)

        if self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()
        else:
            return focal_loss


# Verify Focal Loss implementation
def test_focal_loss():
    """Test that Focal Loss with gamma=0 behaves correctly"""
    bce_loss = nn.BCELoss()

    # Test inputs
    preds = torch.tensor([0.9, 0.1, 0.5, 0.8])
    targets = torch.tensor([1.0, 0.0, 1.0, 0.0])

    bce = bce_loss(preds, targets)

    # With gamma=0 and alpha=0.5, FL = 0.5 * BCE (both classes weighted by 0.5)
    focal_loss_gamma0 = FocalLoss(gamma=0.0, alpha=0.5)
    fl_alpha05 = focal_loss_gamma0(preds, targets)

    print(f"BCE Loss: {bce.item():.4f}")
    print(f"Focal Loss (gamma=0, alpha=0.5): {fl_alpha05.item():.4f}")
    print(f"Expected (0.5 * BCE): {0.5 * bce.item():.4f}")

    # With alpha=0.5, FL should be exactly half of BCE
    assert abs(fl_alpha05.item() - 0.5 * bce.item()) < 0.01, "FL(gamma=0, alpha=0.5) should equal 0.5*BCE"
    print("Test 1 PASSED: FL(gamma=0, alpha=0.5) = 0.5 * BCE")

    # Test gamma effect: higher gamma should reduce loss for well-classified examples
    focal_loss_gamma2 = FocalLoss(gamma=2.0, alpha=0.5)
    fl_gamma2 = focal_loss_gamma2(preds, targets)

    print(f"\nFocal Loss (gamma=2, alpha=0.5): {fl_gamma2.item():.4f}")
    assert fl_gamma2.item() < fl_alpha05.item(), "Higher gamma should reduce loss"
    print("Test 2 PASSED: FL(gamma=2) < FL(gamma=0)")

    print("\nFocal Loss implementation PASSED!")

test_focal_loss()

In [None]:
# ============================================
# CELL 4: Demonstrate Focal Loss Effect
# ============================================
def demonstrate_focal_loss_effect():
    """Show how Focal Loss down-weights easy examples"""
    bce_loss = nn.BCELoss(reduction='none')
    focal_loss = FocalLoss(gamma=2.0, alpha=0.25, reduction='none')

    # Scenarios from the paper's toy example
    scenarios = [
        ("Easy negative (model predicts 0.05 for y=0)", torch.tensor([0.05]), torch.tensor([0.0])),
        ("Hard positive (model predicts 0.3 for y=1)", torch.tensor([0.30]), torch.tensor([1.0])),
        ("Hard negative (model predicts 0.7 for y=0)", torch.tensor([0.70]), torch.tensor([0.0])),
        ("Easy positive (model predicts 0.95 for y=1)", torch.tensor([0.95]), torch.tensor([1.0])),
    ]

    print("Comparing BCE vs Focal Loss (gamma=2, alpha=0.25):")
    print("=" * 70)

    for desc, pred, target in scenarios:
        bce = bce_loss(pred, target).item()
        fl = focal_loss(pred, target).item()
        ratio = bce / fl if fl > 0 else float('inf')

        print(f"\n{desc}")
        print(f"  BCE Loss:   {bce:.4f}")
        print(f"  Focal Loss: {fl:.4f}")
        print(f"  BCE/FL ratio: {ratio:.1f}x (Focal Loss reduces by {ratio:.0f}x)")

demonstrate_focal_loss_effect()

## Cell 3: Data Configuration (ML-1M)

Using RecBole's built-in MovieLens 1M dataset with:
- Binarization: ratings >= 4 -> positive
- Leave-one-out evaluation
- 4 negatives per positive (training)
- 99 negatives (evaluation)

In [None]:
# ============================================
# CELL 5: Base Configuration (ML-1M)
# ============================================
base_config = {
    # Dataset
    'dataset': 'ml-1m',
    'data_path': './dataset/',

    # Data preprocessing (from methodology)
    'load_col': {'inter': ['user_id', 'item_id', 'rating', 'timestamp']},
    'threshold': {'rating': 4},  # Binarize: ratings >= 4 are positive
    'val_interval': {'rating': '[4,inf)'},  # Only consider ratings >= 4 as positive

    # Evaluation settings (from methodology)
    'eval_args': {
        'split': {'LS': 'valid_and_test'},  # Leave-one-out
        'group_by': 'user',
        'order': 'TO',  # Temporal order (most recent for test)
        'mode': 'full',  # Full ranking evaluation
    },

    # Training negative sampling
    'train_neg_sample_args': {
        'distribution': 'uniform',
        'sample_num': 4,  # 4 negatives per positive
        'dynamic': False,
    },

    # Evaluation settings
    'metrics': ['Hit', 'NDCG'],
    'topk': [5, 10, 20],
    'valid_metric': 'NDCG@10',

    # Training settings
    'epochs': 100,
    'stopping_step': 10,  # Early stopping patience
    'train_batch_size': 256,
    'eval_batch_size': 4096,
    'learning_rate': 0.001,

    # Reproducibility
    'seed': 42,

    # Device
    'device': device,

    # Logging
    'show_progress': True,
}

print("Base configuration loaded.")
print(f"Dataset: {base_config['dataset']}")
print(f"Binarization threshold: rating >= {base_config['threshold']['rating']}")
print(f"Training negatives per positive: {base_config['train_neg_sample_args']['sample_num']}")
print(f"Early stopping patience: {base_config['stopping_step']} epochs")

## Cell 4: NeuMF with BCE (Baseline)

Standard NeuMF architecture with Binary Cross-Entropy loss.

In [None]:
# ============================================
# CELL 6: NeuMF-BCE Configuration
# ============================================
neumf_bce_config = base_config.copy()
neumf_bce_config.update({
    'model': 'NeuMF',

    # NeuMF architecture (from methodology)
    'mf_embedding_size': 64,
    'mlp_embedding_size': 64,
    'mlp_hidden_size': [128, 64, 32],
    'dropout_prob': 0.0,

    # Use default BCE loss
    'loss_type': 'BCE',
})

print("NeuMF-BCE Configuration:")
print(f"  MF Embedding Size: {neumf_bce_config['mf_embedding_size']}")
print(f"  MLP Embedding Size: {neumf_bce_config['mlp_embedding_size']}")
print(f"  MLP Hidden Layers: {neumf_bce_config['mlp_hidden_size']}")
print(f"  Loss: BCE")

In [None]:
# ============================================
# CELL 7: Train NeuMF with BCE
# ============================================
print("="*60)
print("Training NeuMF with BCE Loss")
print("="*60)

result_bce = run_recbole(
    model='NeuMF',
    dataset='ml-1m',
    config_dict=neumf_bce_config
)

# Store results
bce_results = {
    'model': 'NeuMF-BCE',
    'best_valid_score': result_bce['best_valid_score'],
    'test_result': result_bce['test_result']
}

print("\nNeuMF-BCE Results:")
print(f"  Best Validation NDCG@10: {result_bce['best_valid_score']:.4f}")
print(f"  Test Results: {result_bce['test_result']}")

## Cell 5: NeuMF with Focal Loss

Custom NeuMF with Focal Loss (gamma=2.0, alpha=0.25).

We need to create a custom model class that extends RecBole's NeuMF.

In [None]:
# ============================================
# CELL 8: NeuMF with Focal Loss Class
# ============================================
# NeuMF is already imported above as direct import

class NeuMF_FocalLoss(NeuMF):
    """
    NeuMF model with Focal Loss instead of BCE.

    This extends RecBole's NeuMF and replaces the loss function.
    """

    def __init__(self, config, dataset, gamma=2.0, alpha=0.25):
        super(NeuMF_FocalLoss, self).__init__(config, dataset)

        # Replace BCE loss with Focal Loss
        self.gamma = gamma
        self.alpha = alpha
        self.focal_loss = FocalLoss(gamma=gamma, alpha=alpha, reduction='mean')

        print(f"Initialized NeuMF with Focal Loss (gamma={gamma}, alpha={alpha})")

    def calculate_loss(self, interaction):
        """
        Calculate Focal Loss for the given interaction.

        This overrides the parent class's calculate_loss method.
        """
        user = interaction[self.USER_ID]
        item = interaction[self.ITEM_ID]
        label = interaction[self.LABEL]

        # Forward pass to get predictions
        output = self.forward(user, item)

        # Apply Focal Loss
        loss = self.focal_loss(output, label)

        return loss

In [None]:
# ============================================
# CELL 9: Training Function for Focal Loss
# ============================================
def train_neumf_focal_loss(config_dict, gamma=2.0, alpha=0.25, seed=42):
    """
    Train NeuMF with Focal Loss using RecBole's infrastructure.

    Args:
        config_dict: Configuration dictionary
        gamma: Focal Loss focusing parameter
        alpha: Focal Loss class balancing weight
        seed: Random seed for reproducibility

    Returns:
        Dictionary with training results
    """
    # Set seed
    init_seed(seed, reproducibility=True)

    # Create config
    config = Config(model='NeuMF', dataset='ml-1m', config_dict=config_dict)

    # Initialize logger
    init_logger(config)
    logger = logging.getLogger()

    # Create dataset and dataloaders
    dataset = create_dataset(config)
    train_data, valid_data, test_data = data_preparation(config, dataset)

    # Create model with Focal Loss
    model = NeuMF_FocalLoss(config, dataset, gamma=gamma, alpha=alpha).to(config['device'])
    logger.info(model)

    # Create trainer
    trainer = Trainer(config, model)

    # Train
    best_valid_score, best_valid_result = trainer.fit(train_data, valid_data)

    # Evaluate on test set
    test_result = trainer.evaluate(test_data)

    return {
        'best_valid_score': best_valid_score,
        'best_valid_result': best_valid_result,
        'test_result': test_result,
        'model': model,
        'trainer': trainer
    }

In [None]:
# ============================================
# CELL 10: NeuMF-FocalLoss Configuration
# ============================================
neumf_fl_config = base_config.copy()
neumf_fl_config.update({
    'model': 'NeuMF',

    # NeuMF architecture (same as BCE for fair comparison)
    'mf_embedding_size': 64,
    'mlp_embedding_size': 64,
    'mlp_hidden_size': [128, 64, 32],
    'dropout_prob': 0.0,
})

# Focal Loss hyperparameters (from methodology)
GAMMA = 2.0  # Focusing parameter
ALPHA = 0.25  # Class balancing weight

print("NeuMF-FocalLoss Configuration:")
print(f"  MF Embedding Size: {neumf_fl_config['mf_embedding_size']}")
print(f"  MLP Embedding Size: {neumf_fl_config['mlp_embedding_size']}")
print(f"  MLP Hidden Layers: {neumf_fl_config['mlp_hidden_size']}")
print(f"  Focal Loss gamma: {GAMMA}")
print(f"  Focal Loss alpha: {ALPHA}")

In [None]:
# ============================================
# CELL 11: Train NeuMF with Focal Loss
# ============================================
print("="*60)
print(f"Training NeuMF with Focal Loss (gamma={GAMMA}, alpha={ALPHA})")
print("="*60)

result_fl = train_neumf_focal_loss(
    config_dict=neumf_fl_config,
    gamma=GAMMA,
    alpha=ALPHA,
    seed=42
)

# Store results
fl_results = {
    'model': f'NeuMF-FL(g={GAMMA},a={ALPHA})',
    'best_valid_score': result_fl['best_valid_score'],
    'test_result': result_fl['test_result']
}

print(f"\nNeuMF-FocalLoss Results:")
print(f"  Best Validation NDCG@10: {result_fl['best_valid_score']:.4f}")
print(f"  Test Results: {result_fl['test_result']}")

## Cell 6: Evaluation & Comparison

Compare BCE vs Focal Loss results:
- HR@5, HR@10, HR@20
- NDCG@5, NDCG@10, NDCG@20

In [None]:
# ============================================
# CELL 12: Comparison Table
# ============================================
def create_comparison_table(bce_results, fl_results):
    """Create a side-by-side comparison table of BCE vs Focal Loss results"""

    metrics = ['hit@5', 'hit@10', 'hit@20', 'ndcg@5', 'ndcg@10', 'ndcg@20']

    data = []
    for metric in metrics:
        bce_val = bce_results['test_result'].get(metric, 0)
        fl_val = fl_results['test_result'].get(metric, 0)
        diff = fl_val - bce_val
        pct_change = (diff / bce_val * 100) if bce_val > 0 else 0

        data.append({
            'Metric': metric.upper(),
            'NeuMF-BCE': f'{bce_val:.4f}',
            'NeuMF-FL': f'{fl_val:.4f}',
            'Difference': f'{diff:+.4f}',
            '% Change': f'{pct_change:+.2f}%'
        })

    df = pd.DataFrame(data)
    return df

# Display comparison
print("="*70)
print("COMPARISON: NeuMF-BCE vs NeuMF-FocalLoss on ML-1M")
print("="*70)

comparison_df = create_comparison_table(bce_results, fl_results)
print(comparison_df.to_string(index=False))

In [None]:
# ============================================
# CELL 13: Validation Checks
# ============================================
def validate_results(bce_results, fl_results):
    """Check if results meet success criteria"""

    print("\n" + "="*50)
    print("VALIDATION CHECKS")
    print("="*50)

    # Check 1: Both models trained without errors
    print("\n[CHECK 1] Both models trained successfully")
    if bce_results['test_result'] and fl_results['test_result']:
        print("  PASSED: Both models have test results")
    else:
        print("  FAILED: One or both models failed to produce results")

    # Check 2: HR@10 > 0.5 (reasonable performance)
    print("\n[CHECK 2] Reasonable performance (HR@10 > 0.5)")
    bce_hr10 = bce_results['test_result'].get('hit@10', 0)
    fl_hr10 = fl_results['test_result'].get('hit@10', 0)
    print(f"  BCE HR@10: {bce_hr10:.4f} {'PASSED' if bce_hr10 > 0.5 else 'BELOW THRESHOLD'}")
    print(f"  FL HR@10:  {fl_hr10:.4f} {'PASSED' if fl_hr10 > 0.5 else 'BELOW THRESHOLD'}")

    # Check 3: Focal Loss >= BCE
    print("\n[CHECK 3] Focal Loss performance >= BCE")
    bce_ndcg10 = bce_results['test_result'].get('ndcg@10', 0)
    fl_ndcg10 = fl_results['test_result'].get('ndcg@10', 0)
    if fl_ndcg10 >= bce_ndcg10:
        print(f"  PASSED: FL NDCG@10 ({fl_ndcg10:.4f}) >= BCE NDCG@10 ({bce_ndcg10:.4f})")
        improvement = (fl_ndcg10 - bce_ndcg10) / bce_ndcg10 * 100
        print(f"  Improvement: {improvement:+.2f}%")
    else:
        print(f"  NOTE: FL NDCG@10 ({fl_ndcg10:.4f}) < BCE NDCG@10 ({bce_ndcg10:.4f})")
        print("  This may indicate need for hyperparameter tuning")

    print("\n" + "="*50)
    print("Validation complete. Review results above.")
    print("="*50)

validate_results(bce_results, fl_results)

## Cell 7: Additional Validation - Gamma=0 Test

Verify that Focal Loss with gamma=0 produces similar results to BCE.

In [None]:
# ============================================
# CELL 14: Gamma=0 Validation Test
# ============================================
print("="*60)
print("VALIDATION: Focal Loss with gamma=0 should approximate BCE")
print("="*60)

result_fl_gamma0 = train_neumf_focal_loss(
    config_dict=neumf_fl_config,
    gamma=0.0,  # gamma=0 reduces to weighted BCE
    alpha=0.5,  # alpha=0.5 for equal weighting (standard BCE)
    seed=42
)

print(f"\nComparison:")
print(f"  BCE NDCG@10:           {bce_results['test_result'].get('ndcg@10', 0):.4f}")
print(f"  FL(gamma=0) NDCG@10:   {result_fl_gamma0['test_result'].get('ndcg@10', 0):.4f}")
print(f"  FL(gamma=2) NDCG@10:   {fl_results['test_result'].get('ndcg@10', 0):.4f}")

## Cell 8: Summary & Next Steps

In [None]:
# ============================================
# CELL 15: Summary & Next Steps
# ============================================
print("\n" + "="*70)
print("EXPERIMENT SUMMARY: NCF with Focal Loss on ML-1M")
print("="*70)

print("\nDataset: MovieLens 1M")
print("Model: NeuMF (GMF + MLP hybrid)")
print("\nResults:")
print(comparison_df.to_string(index=False))

print("\n" + "-"*70)
print("NEXT STEPS:")
print("-"*70)
print("1. If validation passed: Proceed to full hyperparameter grid search")
print("2. Run same experiment on larger datasets (Amazon, Yelp)")
print("3. Add BPR loss comparison")
print("4. Run ablation studies (varying gamma and alpha)")
print("5. Add negative sampling ratio experiments (1:4, 1:10, 1:50)")
print("="*70)

---

## Optional: Grid Search (Run after validation passes)

Uncomment and run the cells below for full hyperparameter search.

In [None]:
# ============================================
# CELL 16: Grid Search (Optional - Uncomment to run)
# ============================================
GAMMA_VALUES = [0.5, 1.0, 2.0, 3.0]
ALPHA_VALUES = [0.25, 0.5, 0.75]
SEEDS = list(range(10))  # 10 random seeds for statistical testing

grid_search_results = []

for gamma in GAMMA_VALUES:
    for alpha in ALPHA_VALUES:
        print(f"\nRunning: gamma={gamma}, alpha={alpha}")

        seed_results = []
        for seed in SEEDS:
            result = train_neumf_focal_loss(
                config_dict=neumf_fl_config,
                gamma=gamma,
                alpha=alpha,
                seed=seed
            )
            seed_results.append(result['test_result'])

        # Aggregate results
        avg_ndcg10 = np.mean([r.get('ndcg@10', 0) for r in seed_results])
        std_ndcg10 = np.std([r.get('ndcg@10', 0) for r in seed_results])

        grid_search_results.append({
            'gamma': gamma,
            'alpha': alpha,
            'ndcg@10_mean': avg_ndcg10,
            'ndcg@10_std': std_ndcg10
        })

        print(f"  NDCG@10: {avg_ndcg10:.4f} +/- {std_ndcg10:.4f}")

# Display grid search results
grid_df = pd.DataFrame(grid_search_results)
print("\nGrid Search Results:")
print(grid_df.to_string(index=False))