# 12. Regularization Practice

This notebook demonstrates different regularization techniques in neural networks.

## Experiment Overview
- **Goal**: Compare different regularization techniques
- **Model**: MLP with various regularization methods
- **Features**: Dropout, L2 regularization, early stopping
- **Learning**: Understanding regularization effects on overfitting

## What You'll Learn
- Dropout regularization
- L2 weight decay
- Early stopping
- Regularization trade-offs


In [None]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import sys
import os

# Add scripts directory to path
sys.path.append('../scripts')
from utils import load_mnist_data, get_device, set_seed

# Set random seed for reproducibility
set_seed(42)

# Get device
device = get_device()
print(f"Using device: {device}")

# Load MNIST dataset
print("Loading MNIST dataset...")
train_loader, val_loader, test_loader = load_mnist_data(batch_size=64, test_split=0.2)

print(f"Training samples: {len(train_loader.dataset)}")
print(f"Validation samples: {len(val_loader.dataset)}")
print(f"Test samples: {len(test_loader.dataset)}")


In [None]:
# Define models with different regularization techniques
class NoRegularizationMLP(nn.Module):
    def __init__(self, input_size=784, hidden_size=256, num_classes=10):
        super(NoRegularizationMLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

class DropoutMLP(nn.Module):
    def __init__(self, input_size=784, hidden_size=256, num_classes=10, dropout_rate=0.5):
        super(DropoutMLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, num_classes)
        self.dropout = nn.Dropout(dropout_rate)
        
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

class L2RegularizationMLP(nn.Module):
    def __init__(self, input_size=784, hidden_size=256, num_classes=10):
        super(L2RegularizationMLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Create models
models = {
    'No Regularization': NoRegularizationMLP().to(device),
    'Dropout': DropoutMLP().to(device),
    'L2 Regularization': L2RegularizationMLP().to(device)
}

print("Models created:")
for name, model in models.items():
    print(f"{name}: {sum(p.numel() for p in model.parameters()):,} parameters")


In [None]:
# Training function with early stopping
def train_with_early_stopping(model, train_loader, val_loader, epochs=50, lr=0.001, weight_decay=0.0, patience=10):
    """Train model with early stopping."""
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    criterion = nn.CrossEntropyLoss()
    
    train_losses = []
    val_losses = []
    best_val_loss = float('inf')
    patience_counter = 0
    
    for epoch in range(epochs):
        # Training
        model.train()
        train_loss = 0
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for data, target in val_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                loss = criterion(output, target)
                val_loss += loss.item()
        
        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        
        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch+1}")
                break
        
        if (epoch + 1) % 10 == 0:
            print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')
    
    return train_losses, val_losses

# Train all models
results = {}
for name, model in models.items():
    print(f"\nTraining {name}...")
    if name == 'L2 Regularization':
        train_losses, val_losses = train_with_early_stopping(model, train_loader, val_loader, weight_decay=0.01)
    else:
        train_losses, val_losses = train_with_early_stopping(model, train_loader, val_loader)
    
    results[name] = {
        'train_losses': train_losses,
        'val_losses': val_losses
    }

# Plot results
plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
for name, result in results.items():
    plt.plot(result['train_losses'], label=f'{name} (Train)')
plt.title('Training Losses')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.subplot(1, 3, 2)
for name, result in results.items():
    plt.plot(result['val_losses'], label=f'{name} (Val)')
plt.title('Validation Losses')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.subplot(1, 3, 3)
for name, result in results.items():
    plt.plot(result['train_losses'], label=f'{name} (Train)', alpha=0.7)
    plt.plot(result['val_losses'], label=f'{name} (Val)', linestyle='--')
plt.title('All Losses')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig('../results/plots/regularization_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

# Print final results
print("\nFinal Results:")
for name, result in results.items():
    final_train_loss = result['train_losses'][-1]
    final_val_loss = result['val_losses'][-1]
    print(f"{name}: Train Loss: {final_train_loss:.4f}, Val Loss: {final_val_loss:.4f}")

# Save models
for name, model in models.items():
    torch.save(model.state_dict(), f'../results/logs/regularization_{name.lower().replace(" ", "_")}.pth')

print("\nModels saved successfully!")
