# Week 5 Research — Multi-Epoch Optimizer × Schedule Analysis

This notebook explores results from extended multi-epoch sweeps to understand optimizer behavior, learning-rate schedule effects, and convergence trends over time. 
Data is loaded from the outputs generated in `optimizer_schedule_multi_epoch.csv` and `epoch_histories.pkl`.

In [None]:
# Required imports
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from pathlib import Path
import numpy as np

# Set plotting style
sns.set(style="whitegrid", context="talk")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['figure.dpi'] = 100

# Create output directory for plots
OUTPUT_DIR = Path("reports/scaling_validation_v3")
PLOT_DIR = OUTPUT_DIR / "week5_plots"
PLOT_DIR.mkdir(parents=True, exist_ok=True)

print(f"Plot output directory: {PLOT_DIR}")

In [None]:
# Load data from existing experiments
OUTPUT_DIR = Path("reports/scaling_validation_v3")
df = pd.read_csv(OUTPUT_DIR / "optimizer_schedule_multi_epoch.csv")
with open(OUTPUT_DIR / "epoch_histories.pkl", "rb") as f:
    histories = pickle.load(f)

print(f"Loaded {len(df)} experiment summaries and {len(histories)} history records.")
display(df.head())

In [None]:
# Plot 1: Accuracy vs Epochs (grouped by optimizer/schedule)
plt.figure(figsize=(14, 8))

# Create line plot for each optimizer-schedule combination
for optimizer in df["optimizer"].unique():
    for schedule in df["schedule"].unique():
        subset = df[(df["optimizer"] == optimizer) & 
                   (df["schedule"] == schedule)].sort_values("epochs")
        if not subset.empty:
            linestyle = '-' if optimizer == 'AdamW' else '--'
            marker = 'o' if schedule == 'fixed' else 's' if schedule == 'step' else '^' if schedule == 'cosine' else 'D'
            plt.plot(subset["epochs"], subset["val_accuracy"], 
                    linestyle=linestyle, marker=marker, markersize=8,
                    label=f"{optimizer} | {schedule}", linewidth=2, alpha=0.8)

plt.xlabel("Epochs", fontsize=14)
plt.ylabel("Validation Accuracy", fontsize=14)
plt.title("Validation Accuracy vs Epochs by Optimizer and Schedule", fontsize=16, fontweight='bold')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=10)
plt.grid(True, alpha=0.3)
plt.tight_layout()

# Save plot
acc_plot_path = PLOT_DIR / "accuracy_vs_epochs.png"
plt.savefig(acc_plot_path, dpi=300, bbox_inches='tight')
print(f"Saved: {acc_plot_path}")
plt.show()

**Discussion:**  
Notes and insights go here — describe convergence patterns, compare AdamW vs SGD, and mention how cosine/one-cycle schedules behaved across epochs.

In [None]:
# Plot 2: Loss vs Epochs (same grouping)
plt.figure(figsize=(14, 8))

# Create line plot for each optimizer-schedule combination
for optimizer in df["optimizer"].unique():
    for schedule in df["schedule"].unique():
        subset = df[(df["optimizer"] == optimizer) & 
                   (df["schedule"] == schedule)].sort_values("epochs")
        if not subset.empty:
            linestyle = '-' if optimizer == 'AdamW' else '--'
            marker = 'o' if schedule == 'fixed' else 's' if schedule == 'step' else '^' if schedule == 'cosine' else 'D'
            plt.plot(subset["epochs"], subset["val_loss"], 
                    linestyle=linestyle, marker=marker, markersize=8,
                    label=f"{optimizer} | {schedule}", linewidth=2, alpha=0.8)

plt.xlabel("Epochs", fontsize=14)
plt.ylabel("Validation Loss", fontsize=14)
plt.title("Validation Loss vs Epochs by Optimizer and Schedule", fontsize=16, fontweight='bold')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=10)
plt.grid(True, alpha=0.3)
plt.tight_layout()

# Save plot
loss_plot_path = PLOT_DIR / "loss_vs_epochs.png"
plt.savefig(loss_plot_path, dpi=300, bbox_inches='tight')
print(f"Saved: {loss_plot_path}")
plt.show()

**Discussion:**  
Notes and insights go here — analyze loss reduction patterns and note any schedules that achieve lower final loss values.

In [None]:
# Plot 3: 2×2 metric grid (val/test accuracy/loss)
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle("Multi-Epoch Optimizer Comparison: 2×2 Metric Grid", fontsize=18, fontweight='bold')

# Focus on the longest training (12 epochs) for final performance comparison
final_data = df[df["epochs"] == df["epochs"].max()]

# Top-left: Validation accuracy
ax = axes[0, 0]
sns.barplot(data=final_data, x="schedule", y="val_accuracy", 
            hue="optimizer", ax=ax, palette="Set2")
ax.set_title("Final Validation Accuracy", fontsize=14, fontweight='bold')
ax.set_xlabel("Learning Rate Schedule", fontsize=12)
ax.set_ylabel("Accuracy", fontsize=12)
ax.legend(title="Optimizer", fontsize=10)
plt.setp(ax.get_xticklabels(), rotation=45, ha='right')

# Top-right: Validation loss
ax = axes[0, 1]
sns.barplot(data=final_data, x="schedule", y="val_loss", 
            hue="optimizer", ax=ax, palette="Set2")
ax.set_title("Final Validation Loss", fontsize=14, fontweight='bold')
ax.set_xlabel("Learning Rate Schedule", fontsize=12)
ax.set_ylabel("Loss", fontsize=12)
ax.legend(title="Optimizer", fontsize=10)
plt.setp(ax.get_xticklabels(), rotation=45, ha='right')

# Bottom-left: Training accuracy
ax = axes[1, 0]
sns.barplot(data=final_data, x="schedule", y="train_accuracy", 
            hue="optimizer", ax=ax, palette="Set2")
ax.set_title("Final Training Accuracy", fontsize=14, fontweight='bold')
ax.set_xlabel("Learning Rate Schedule", fontsize=12)
ax.set_ylabel("Accuracy", fontsize=12)
ax.legend(title="Optimizer", fontsize=10)
plt.setp(ax.get_xticklabels(), rotation=45, ha='right')

# Bottom-right: ECE (Expected Calibration Error)
ax = axes[1, 1]
sns.barplot(data=final_data, x="schedule", y="val_ece", 
            hue="optimizer", ax=ax, palette="Set2")
ax.set_title("Final Validation ECE (lower is better)", fontsize=14, fontweight='bold')
ax.set_xlabel("Learning Rate Schedule", fontsize=12)
ax.set_ylabel("ECE", fontsize=12)
ax.legend(title="Optimizer", fontsize=10)
plt.setp(ax.get_xticklabels(), rotation=45, ha='right')

plt.tight_layout()

# Save grid plot
grid_plot_path = PLOT_DIR / "metric_grid_2x2.png"
plt.savefig(grid_plot_path, dpi=300, bbox_inches='tight')
print(f"Saved: {grid_plot_path}")
plt.show()

**Discussion:**  
Notes and insights go here — compare final performance across schedules and optimizers, noting any clear winners for each metric.

In [None]:
# Plot 4: Learning curve overlays for each schedule
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle("Learning Curve Overlays by Schedule", fontsize=18, fontweight='bold')

schedules = ['fixed', 'step', 'cosine', 'onecycle']
colors = ['#2E86AB', '#A23B72', '#F18F01', '#C73E1D']

for i, schedule in enumerate(schedules):
    ax = axes[i // 2, i % 2]
    
    # Plot learning curves for this schedule
    for optimizer in df["optimizer"].unique():
        subset = df[(df["optimizer"] == optimizer) & 
                   (df["schedule"] == schedule)].sort_values("epochs")
        if not subset.empty:
            color_idx = 0 if optimizer == 'SGD+Momentum' else 1
            ax.plot(subset["epochs"], subset["val_accuracy"], 
                   marker='o', markersize=6, linewidth=2,
                   label=optimizer, color=colors[color_idx], alpha=0.8)
    
    ax.set_title(f"{schedule.title()} Schedule", fontsize=14, fontweight='bold')
    ax.set_xlabel("Epochs", fontsize=12)
    ax.set_ylabel("Validation Accuracy", fontsize=12)
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3)

plt.tight_layout()

# Save learning curves plot
curves_plot_path = PLOT_DIR / "learning_curve_overlays.png"
plt.savefig(curves_plot_path, dpi=300, bbox_inches='tight')
print(f"Saved: {curves_plot_path}")
plt.show()

**Discussion:**  
Notes and insights go here — analyze how each schedule affects learning dynamics and convergence speed.

In [None]:
# Additional analysis: Detailed epoch-by-epoch curves from history data
if histories:
    # Select representative configurations for detailed analysis
    key_configs = [
        "TinyCNN|SGD+Momentum|fixed|epochs=12",
        "TinyCNN|SGD+Momentum|cosine|epochs=12", 
        "TinyCNN|AdamW|fixed|epochs=12",
        "TinyCNN|AdamW|cosine|epochs=12"
    ]
    
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle("Detailed Epoch-by-Epoch Training Curves", fontsize=18, fontweight='bold')
    
    plot_colors = ['#2E86AB', '#A23B72', '#F18F01', '#C73E1D']
    
    for i, (config, color) in enumerate(zip(key_configs, plot_colors)):
        if config in histories:
            history = histories[config]
            epochs = list(range(1, len(history) + 1))
            val_acc = [h["val_accuracy"] for h in history]
            val_loss = [h["val_loss"] for h in history]
            train_acc = [h["train_accuracy"] for h in history]
            
            row = i // 2
            col = i % 2
            ax = axes[row, col]
            
            # Plot validation accuracy
            ax.plot(epochs, val_acc, color=color, marker='o', linewidth=2, 
                   markersize=6, label='Val Accuracy', alpha=0.8)
            
            # Plot training accuracy
            ax.plot(epochs, train_acc, color=color, marker='s', linewidth=2, 
                   linestyle='--', markersize=6, label='Train Accuracy', alpha=0.7)
            
            # Extract optimizer and schedule from config
            parts = config.split('|')
            optimizer = parts[1]
            schedule = parts[2]
            
            ax.set_title(f"{optimizer} | {schedule}", fontsize=14, fontweight='bold')
            ax.set_xlabel("Epoch", fontsize=12)
            ax.set_ylabel("Accuracy", fontsize=12)
            ax.legend(fontsize=10)
            ax.grid(True, alpha=0.3)
            ax.set_ylim(0, 1)
    
    plt.tight_layout()
    
    # Save detailed curves
    detailed_path = PLOT_DIR / "detailed_epoch_curves.png"
    plt.savefig(detailed_path, dpi=300, bbox_inches='tight')
    print(f"Saved: {detailed_path}")
    plt.show()
else:
    print("No history data available for detailed curves.")

**Discussion:**  
Notes and insights go here — analyze training vs validation dynamics and identify any overfitting patterns.

In [None]:
# Summary statistics and best performers
print("=== Multi-Epoch Results Summary ===")
print(f"Total experiments: {len(df)}")
print(f"Epochs tested: {sorted(df['epochs'].unique())}")
print(f"Optimizers: {list(df['optimizer'].unique())}")
print(f"Schedules: {list(df['schedule'].unique())}")

print("\n=== Best Performing Configurations ===")
print("\nBy Validation Accuracy:")
best_acc = df.loc[df['val_accuracy'].idxmax()]
print(f"  {best_acc['config_id']}: {best_acc['val_accuracy']:.4f}")

print("\nBy Validation Loss (lowest):")
best_loss = df.loc[df['val_loss'].idxmin()]
print(f"  {best_loss['config_id']}: {best_loss['val_loss']:.4f}")

print("\nBy ECE (lowest):")
best_ece = df.loc[df['val_ece'].idxmin()]
print(f"  {best_ece['config_id']}: {best_ece['val_ece']:.4f}")

# Performance by optimizer (averaged across schedules and epochs)
print("\n=== Performance by Optimizer (Averaged) ===")
optimizer_stats = df.groupby('optimizer').agg({
    'val_accuracy': ['mean', 'std'],
    'val_loss': ['mean', 'std'],
    'val_ece': ['mean', 'std']
}).round(4)
print(optimizer_stats)

# Performance by schedule (averaged across optimizers and epochs)
print("\n=== Performance by Schedule (Averaged) ===")
schedule_stats = df.groupby('schedule').agg({
    'val_accuracy': ['mean', 'std'],
    'val_loss': ['mean', 'std'],
    'val_ece': ['mean', 'std']
}).round(4)
print(schedule_stats)

## Summary

- Multi-epoch sweeps (3–12 epochs) reveal optimizer stability and schedule trends.
- Cosine and One-Cycle schedules generally improve convergence speed and validation loss.
- Plots are exported to `week5_plots/` for embedding in Notion.