# Process-Aware Benchmarking (PAB) Tutorial

This notebook demonstrates how to use the PAB toolkit to track and analyze a model's learning trajectory during training. We'll use a simple CNN trained on CIFAR-10 as an example.

## Setup

First, let's import the necessary libraries and set up our environment:

In [None]:
import os
import sys
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

# Add the parent directory to the path to import PAB
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))

from pab import ProcessAwareBenchmark
from pab.visualization import (
    plot_learning_trajectory,
    plot_class_progression,
    plot_robustness_curve,
    plot_pab_summary
)
from pab.utils import compute_class_accuracies, evaluate_adversarial_robustness

# Set random seed for reproducibility
torch.manual_seed(42)

# Create directory for results
os.makedirs('./notebook_results', exist_ok=True)

## Load and Prepare Data

We'll use the CIFAR-10 dataset for this example:

In [None]:
# Data transformations
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load datasets
train_dataset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train
)
test_dataset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test
)

# Create data loaders
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=128, shuffle=True, num_workers=2
)
test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=128, shuffle=False, num_workers=2
)

# Class names for CIFAR-10
class_names = {
    0: 'airplane',
    1: 'automobile',
    2: 'bird',
    3: 'cat',
    4: 'deer',
    5: 'dog',
    6: 'frog',
    7: 'horse',
    8: 'ship',
    9: 'truck'
}

print(f"Training dataset size: {len(train_dataset)}")
print(f"Test dataset size: {len(test_dataset)}")

## Define the Model

We'll use a simple CNN for this example:

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.fc_layers = nn.Sequential(
            nn.Linear(128 * 4 * 4, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 10)
        )
        
    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layers(x)
        return x

# Initialize model, loss function, and optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

print(f"Using device: {device}")
print(model)

## Initialize Process-Aware Benchmarking

Now, let's set up PAB to track the learning trajectory:

In [None]:
# Initialize PAB
pab = ProcessAwareBenchmark(
    checkpoint_dir='./notebook_results/checkpoints',
    save_frequency=2,  # Save checkpoints every 2 epochs
    track_representations=True
)

print(f"PAB initialized with checkpoint directory at {pab.checkpoint_dir}")

## Training Loop with PAB Tracking

Let's train our model while tracking the learning trajectory with PAB:

In [None]:
# Training function
def train(epoch):
    model.train()
    train_loss = 0.0
    correct = 0
    total = 0
    
    progress_bar = tqdm(train_loader, desc=f'Epoch {epoch}')
    for inputs, targets in progress_bar:
        inputs, targets = inputs.to(device), targets.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
        
        progress_bar.set_postfix({
            'loss': train_loss / (progress_bar.n + 1),
            'acc': 100. * correct / total
        })
    
    return train_loss / len(train_loader), correct / total

# Evaluation function
def evaluate():
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    
    return test_loss / len(test_loader), correct / total

# Number of epochs to train
num_epochs = 10

# Training loop
for epoch in range(1, num_epochs + 1):
    # Train for one epoch
    train_loss, train_acc = train(epoch)
    
    # Evaluate
    test_loss, test_acc = evaluate()
    
    # Compute per-class accuracies
    class_accuracies = compute_class_accuracies(
        model, test_loader, num_classes=10, device=device
    )
    
    # Evaluate adversarial robustness (every 2 epochs to save time)
    adversarial_acc = None
    if epoch % 2 == 0:
        print("Evaluating adversarial robustness...")
        adv_metrics = evaluate_adversarial_robustness(
            model, test_loader, device=device, epsilon=0.03
        )
        adversarial_acc = adv_metrics['adversarial_accuracy']
        print(f"Adversarial accuracy: {adversarial_acc:.4f}")
    
    # Track metrics with PAB
    pab.track_epoch(
        model=model,
        epoch=epoch,
        train_loss=train_loss,
        val_loss=test_loss,
        train_acc=train_acc,
        val_acc=test_acc,
        class_accuracies=class_accuracies,
        adversarial_acc=adversarial_acc
    )
    
    print(f"Epoch {epoch}: Train Loss={train_loss:.4f}, Train Acc={train_acc:.4f}, "
          f"Test Loss={test_loss:.4f}, Test Acc={test_acc:.4f}")

## Analyze Learning Trajectory with PAB

Now that we've trained our model and tracked the learning trajectory with PAB, let's analyze the results:

In [None]:
# Evaluate the learning trajectory
eval_results = pab.evaluate_trajectory()

# Print summary
summary = pab.summarize()
print("PAB Summary:")
print(summary)

## Visualize Learning Trajectory

Let's create some visualizations to better understand the learning trajectory:

In [None]:
# Plot learning trajectory
plt.figure(figsize=(12, 5))
plot_learning_trajectory(
    train_losses=pab.metrics['train_loss'],
    val_losses=pab.metrics['val_loss'],
    train_accs=pab.metrics['train_acc'],
    val_accs=pab.metrics['val_acc'],
    title="Learning Trajectory",
    save_path="./notebook_results/learning_trajectory.png"
)
plt.show()

In [None]:
# Plot class progression
plt.figure(figsize=(12, 6))
plot_class_progression(
    class_accuracies=pab.metrics['class_accuracy'],
    class_names=class_names,
    save_path="./notebook_results/class_progression.png"
)
plt.show()

In [None]:
# Plot adversarial robustness curve
if 'adversarial_robustness' in pab.metrics and pab.metrics['adversarial_robustness']:
    plt.figure(figsize=(12, 6))
    plot_robustness_curve(
        clean_accuracies=pab.metrics['val_acc'],
        adversarial_accuracies=pab.metrics['adversarial_robustness'],
        save_path="./notebook_results/robustness_curve.png"
    )
    plt.show()

In [None]:
# Plot PAB summary
plt.figure(figsize=(15, 10))
plot_pab_summary(
    metrics=pab.metrics,
    save_path="./notebook_results/pab_summary.png"
)
plt.show()

## Analyze Early vs. Late Learning Classes

Let's look at which classes were learned early and which were learned late:

In [None]:
if 'class_patterns' in eval_results:
    class_patterns = eval_results['class_patterns']
    
    print("Early learning classes:")
    for class_id in class_patterns.get('early_classes', []):
        print(f"  - Class {class_id} ({class_names[class_id]})")
    
    print("\nLate learning classes:")
    for class_id in class_patterns.get('late_classes', []):
        print(f"  - Class {class_id} ({class_names[class_id]})")
else:
    print("Class patterns not available in evaluation results.")

## Analyze Generalization

Let's look at the generalization behavior of the model:

In [None]:
if 'generalization' in eval_results:
    gen = eval_results['generalization']
    
    print("Generalization Analysis:")
    print(f"  - Final generalization gap: {gen.get('final_gap', 'Unknown')}")
    print(f"  - Gap trend: {gen.get('gap_trend', 'Unknown')}")
    print(f"  - Optimal early stopping epoch: {gen.get('early_stopping_epoch', 'Unknown')}")
else:
    print("Generalization metrics not available in evaluation results.")

## Analyze Robustness

Let's look at the adversarial robustness of the model:

In [None]:
if 'robustness' in eval_results:
    rob = eval_results['robustness']
    
    print("Adversarial Robustness Analysis:")
    print(f"  - Peak robustness: {rob.get('peak_value', 0):.4f} at epoch {rob.get('peak_epoch', 0)}")
    print(f"  - Final robustness: {rob.get('final_value', 0):.4f}")
    print(f"  - Robustness degradation: {rob.get('degradation', 0)*100:.2f}%")
else:
    print("Robustness metrics not available in evaluation results.")

## PAB Recommendations

Based on the PAB analysis, let's see what recommendations we can make for this model:

In [None]:
print("PAB Recommendations:")

if 'generalization' in eval_results and eval_results['generalization'].get('gap_trend') == 'increasing':
    print("  • Model shows signs of overfitting, consider early stopping or regularization.")

if 'robustness' in eval_results and eval_results['robustness'].get('degradation', 0) > 0.1:
    print("  • Adversarial robustness peaks before final epoch, suggesting robustness-accuracy tradeoff.")

if 'overall_stability' in eval_results and eval_results['overall_stability'].get('std', 0) > 0.1:
    print("  • Training exhibits instability, consider more stable optimization strategy.")

if 'class_patterns' in eval_results:
    num_late = len(eval_results['class_patterns'].get('late_classes', []))
    if num_late > 2:
        print(f"  • {num_late} classes are learned late, consider class-balanced training or focal loss.")

## Conclusion

In this notebook, we've demonstrated how to use the Process-Aware Benchmarking (PAB) toolkit to track, analyze, and visualize a model's learning trajectory during training. PAB provides insights beyond traditional static benchmarking, helping you understand how your model learns, when it generalizes, and how its robustness evolves.

Key takeaways:
- PAB helps identify which classes are learned early and which are learned late
- It tracks generalization efficiency throughout training
- It monitors adversarial robustness over time
- It provides actionable recommendations for improving model performance

By incorporating PAB into your ML workflow, you can gain deeper insights into your models and make more informed decisions about training, architecture, and hyperparameter selection.