# Learning Rate Warmup Study - Jupyter Notebook

Complete notebook for running the learning rate warmup experiments.

**Timeline**: 4 weeks | **GPU Hours**: 20-24 | **Experiments**: 155 (or 124 without MedMNIST)

## Step 1: Install Dependencies

In [None]:
# Install core packages
!pip install torch torchvision numpy pandas matplotlib seaborn tqdm scikit-learn pyyaml pillow scipy tensorboard -q

# Try to install medmnist (optional)
try:
    !pip install medmnist --no-deps -q
    !pip install fire -q
    print("✅ MedMNIST installed")
except:
    print("⚠️  MedMNIST not available - will run 124 experiments instead of 155")

print("\n✅ Installation complete!")

## Step 2: Upload Project Files

**Option A**: Upload the ZIP file using the file upload button

**Option B**: If using Google Colab, run this cell:

In [None]:
# For Google Colab: Upload and extract ZIP
from google.colab import files
import zipfile
import os

uploaded = files.upload()

for filename in uploaded.keys():
    with zipfile.ZipFile(filename, 'r') as zip_ref:
        zip_ref.extractall('.')
    print(f"✅ Extracted {filename}")

# Change to project directory
os.chdir('warmup_study')
print(f"Working directory: {os.getcwd()}")

## Step 3: Verify Installation

In [None]:
import torch
import torchvision
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

try:
    import medmnist
    print("✅ MedMNIST available")
except ImportError:
    print("⚠️  MedMNIST not available")

## Step 4: Quick Test (5 epochs, ~5 minutes)

In [None]:
from config import ExperimentConfig, DataConfig, ModelConfig, TrainingConfig
from train import train

# Quick test experiment
config = ExperimentConfig(
    experiment_name='quick_test',
    data=DataConfig(name='cifar10', examples_per_class=100),
    model=ModelConfig(num_classes=10),
    training=TrainingConfig(
        optimizer='sgd',
        warmup_epochs=1,
        epochs=5,
        batch_size=128,
        seed=42,
        track_interpretability=False
    ),
    output_dir='./results'
)

summary = train(config)
print(f"\n✅ Test complete! Accuracy: {summary['best_val_acc']:.2f}%")

## Step 5: Single Full Experiment (~30 minutes)

In [None]:
# Full experiment with 50 epochs
config = ExperimentConfig(
    experiment_name='cifar10_500epc_w5_sgd',
    data=DataConfig(name='cifar10', examples_per_class=500),
    model=ModelConfig(num_classes=10),
    training=TrainingConfig(
        optimizer='sgd',
        warmup_epochs=5,
        epochs=50,
        batch_size=128,
        seed=42,
        track_interpretability=True
    )
)

summary = train(config)
print(f"\n✅ Best accuracy: {summary['best_val_acc']:.2f}%")

## Step 6: Compare Warmup Durations (~2-3 hours)

In [None]:
# Compare different warmup values
results = []

for warmup in [0, 1, 5, 10]:
    print(f"\n{'='*60}")
    print(f"Warmup: {warmup} epochs")
    print(f"{'='*60}")
    
    config = ExperimentConfig(
        experiment_name=f'comparison_w{warmup}',
        data=DataConfig(name='cifar10', examples_per_class=1000),
        model=ModelConfig(num_classes=10),
        training=TrainingConfig(
            optimizer='sgd',
            warmup_epochs=warmup,
            epochs=30,
            batch_size=128,
            seed=42
        )
    )
    
    summary = train(config)
    results.append({
        'warmup': warmup,
        'accuracy': summary['best_val_acc'],
        'time_min': summary['training_time_seconds'] / 60
    })

# Display results
df_results = pd.DataFrame(results)
print("\n" + "="*60)
print("RESULTS")
print("="*60)
print(df_results)

# Plot
plt.figure(figsize=(10, 6))
plt.plot(df_results['warmup'], df_results['accuracy'], 
         marker='o', linewidth=2, markersize=10)
plt.xlabel('Warmup Epochs', fontsize=12)
plt.ylabel('Best Validation Accuracy (%)', fontsize=12)
plt.title('Warmup Effect on Accuracy', fontsize=14, fontweight='bold')
plt.grid(True, alpha=0.3)
plt.show()

## Step 7: Run All Experiments (20-24 hours)

**WARNING**: This will take 20-24 GPU hours. Only run if you have:
- Google Colab Pro, or
- Kaggle notebook, or
- Local GPU

For free Colab, run in batches using the week-by-week approach.

In [None]:
from run_experiments import run_all_experiments

# Run all experiments
run_all_experiments(output_dir='./results')

## Alternative: Run by Week

In [None]:
from run_experiments import run_week_experiments

# Week 1: CIFAR-10 baseline (~4 hours)
run_week_experiments(week=1, output_dir='./results')

In [None]:
# Week 2: CIFAR-10 full (~8 hours)
run_week_experiments(week=2, output_dir='./results')

In [None]:
# Week 3: Cross-dataset (~12 hours)
run_week_experiments(week=3, output_dir='./results')

## Step 8: Monitor Progress

In [None]:
from utils import check_progress

progress = check_progress('./results')
print(f"Completed: {progress['completed']}/{progress['total']}")
print(f"Progress: {progress['percentage']:.1f}%")
print(f"Remaining: {progress['remaining']} experiments")

## Step 9: Analyze Results

In [None]:
from analysis import WarmupAnalyzer

analyzer = WarmupAnalyzer('./results')
print(f"Loaded {len(analyzer.df)} experiments\n")

# Summary by dataset
print("="*60)
print("DATASET SUMMARY")
print("="*60)
print(analyzer.df.groupby('dataset')['best_val_acc'].agg(['count', 'mean', 'std']))

# Summary by warmup
print("\n" + "="*60)
print("WARMUP SUMMARY")
print("="*60)
print(analyzer.df.groupby('warmup_epochs')['best_val_acc'].agg(['count', 'mean', 'std']))

## Step 10: Create Heatmap

In [None]:
# Create heatmap for CIFAR-10
analyzer.plot_warmup_heatmap('cifar10', 'sgd', metric='best_val_acc')
plt.show()

## Step 11: Find Optimal Warmup

In [None]:
optimal = analyzer.find_optimal_warmup('cifar10', 'sgd')
print("\nOptimal Warmup Durations:")
print(optimal)

## Step 12: Test Hypotheses

In [None]:
hypotheses = analyzer.test_hypotheses()

print("="*60)
print("HYPOTHESIS TESTING")
print("="*60)

for h_name, h_result in hypotheses.items():
    print(f"\n{h_name}:")
    print(f"  {h_result['conclusion']}")

## Step 13: Generate All Visualizations

In [None]:
from generate_visualizations import generate_all_visualizations

generate_all_visualizations(
    results_dir='./results',
    output_dir='./visualizations'
)

print("✅ All visualizations created!")

## Step 14: Display Visualizations

In [None]:
from IPython.display import Image, display
import os

# Display key plots
viz_files = [
    './visualizations/comparisons/warmup_comparison_across_datasets.png',
    './visualizations/comparisons/sgd_vs_adamw_comparison.png',
    './visualizations/comparisons/optimal_warmup_vs_dataset_size.png'
]

for viz_file in viz_files:
    if os.path.exists(viz_file):
        print(f"\n{os.path.basename(viz_file)}:")
        display(Image(filename=viz_file))

## Step 15: Download Results (Google Colab)

In [None]:
# Package and download results
import shutil
from google.colab import files

# Create ZIPs
shutil.make_archive('warmup_results', 'zip', './results')
shutil.make_archive('warmup_visualizations', 'zip', './visualizations')

# Download
files.download('warmup_results.zip')
files.download('warmup_visualizations.zip')

print("✅ Downloads started!")

## Bonus: Save to Google Drive

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Copy results to Drive
!cp -r ./results /content/drive/MyDrive/warmup_study_results
!cp -r ./visualizations /content/drive/MyDrive/warmup_study_visualizations

print("✅ Results saved to Google Drive!")