In [None]:
%%capture
!pip install --upgrade pip
!git clone -b feat/new_BO_protocol --single-branch https://github.com/josephmargaryan/ATDL2.git
%cd ATDL2
!pip install -e . --no-deps

# Pareto Multi-Objective Bayesian Optimization

This notebook runs Bayesian optimization followed by full retraining with diagnostics.

## Workflow
1. **Bayesian Optimization**: Find Pareto-optimal hyperparameters
2. **Visualize Pareto Front**: Inspect trade-offs between CR and accuracy
3. **Full Retrain**: Use best parameters for complete training with diagnostics
4. **Generate Plots**: Training curves, mixture dynamics, weight evolution GIF

## Optimization Strategy
- **Multi-objective**: Optimizes CR (Compression Rate) and Accuracy simultaneously
- **Sampler**: NSGA-II (automatic for Pareto mode)
- **Seed**: 42 (consistent across all runs)
- **Output**: Pareto front of optimal trade-off solutions

---
# LeNet-300-100 Optimization

## 1. Run Bayesian Optimization

In [None]:
!python scripts/tune_optuna.py \
    --preset lenet_300_100 \
    --use-pareto \
    --n-trials 50 \
    --save-dir runs/bo_pareto_lenet3002 \
    --load-pretrained runs/mnist_300/mnist_lenet_300_100_pre.pt \
    --pretrain-epochs 0 \
    --retrain-epochs 40 \
    --batch-size 128 \
    --num-workers 4 \
    --quant-skip-last \
    --eval-every 20 \
    --cr-every 20 \
    --seed 42

## 2. Visualize Pareto Front

In [None]:
!python scripts/tune_optuna_pareto_viz.py \
    --pareto-json runs/bo_pareto_lenet3002/*_pareto_results.json \
    --annotate

## 3. Full Retrain with Best Parameters

**TODO**: After inspecting Pareto front, insert best hyperparameters below

In [None]:
# TODO: Insert best hyperparameters from Pareto front
BEST_TAU = "<INSERT_VALUE>"
BEST_GAMMA_ALPHA = "<INSERT_VALUE>"
BEST_GAMMA_BETA = "<INSERT_VALUE>"
BEST_GAMMA_ALPHA_ZERO = "<INSERT_VALUE>"
BEST_GAMMA_BETA_ZERO = "<INSERT_VALUE>"

!python run_sws.py \
    --preset lenet_300_100 \
    --load-pretrained runs/mnist_300/mnist_lenet_300_100_pre.pt \
    --pretrain-epochs 0 \
    --retrain-epochs 100 \
    --tau {BEST_TAU} \
    --gamma-alpha {BEST_GAMMA_ALPHA} \
    --gamma-beta {BEST_GAMMA_BETA} \
    --gamma-alpha-zero {BEST_GAMMA_ALPHA_ZERO} \
    --gamma-beta-zero {BEST_GAMMA_BETA_ZERO} \
    --num-components 17 \
    --merge-kl-thresh 1e-10 \
    --quant-assign map \
    --complexity-mode keras \
    --tau-warmup-epochs 0 \
    --quant-skip-last \
    --batch-size 128 \
    --num-workers 4 \
    --eval-every 1 \
    --log-mixture-every 1 \
    --make-gif \
    --gif-fps 3 \
    --run-name lenet300_best_full \
    --save-dir runs/bo_pareto_lenet300 \
    --seed 42

## 4. Generate Diagnostic Plots

In [None]:
RUN_DIR = "runs/bo_pareto_lenet300/lenet300_best_full"

# Training curves (accuracy, loss, compression rate)
!python scripts/plot_curves.py --run-dir {RUN_DIR}

# Mixture evolution over epochs
!python scripts/plot_mixture_dynamics.py --run-dir {RUN_DIR}

# Weight scatter plot (w0 → wT movement)
!python scripts/plot_weights_scatter.py --run-dir {RUN_DIR} --sample 20000

# Final mixture + weight histogram
!python scripts/plot_mixture.py --run-dir {RUN_DIR} --checkpoint prequant

print(f"\n✓ All plots saved to {RUN_DIR}/figures/")
print(f"✓ Training GIF saved to {RUN_DIR}/figures/")

---
# LeNet5 Optimization

## 1. Run Bayesian Optimization

In [None]:
!python scripts/tune_optuna.py \
      --preset lenet5 \
      --use-pareto \
      --n-trials 50 \
      --save-dir runs/bo_pareto_lenet52 \
      --load-pretrained runs/mnist_caffe/mnist_lenet5_pre.pt \
      --pretrain-epochs 0 \
      --retrain-epochs 40 \
      --batch-size 128 \
      --num-workers 4 \
      --quant-skip-last \
      --eval-every 20 \
      --cr-every 20 \
      --seed 42


## 2. Visualize Pareto Front

In [None]:
!python scripts/tune_optuna_pareto_viz.py \
    --pareto-json runs/bo_pareto_lenet52/*_pareto_results.json \
    --annotate

## 3. Full Retrain with Best Parameters

**TODO**: After inspecting Pareto front, insert best hyperparameters below

In [None]:
# TODO: Insert best hyperparameters from Pareto front
BEST_TAU = "<INSERT_VALUE>"
BEST_GAMMA_ALPHA = "<INSERT_VALUE>"
BEST_GAMMA_BETA = "<INSERT_VALUE>"
BEST_GAMMA_ALPHA_ZERO = "<INSERT_VALUE>"
BEST_GAMMA_BETA_ZERO = "<INSERT_VALUE>"

!python run_sws.py \
    --preset lenet5 \
    --load-pretrained runs/mnist_caffe/mnist_lenet_5_caffe_pre.pt \
    --pretrain-epochs 0 \
    --retrain-epochs 100 \
    --tau {BEST_TAU} \
    --gamma-alpha {BEST_GAMMA_ALPHA} \
    --gamma-beta {BEST_GAMMA_BETA} \
    --gamma-alpha-zero {BEST_GAMMA_ALPHA_ZERO} \
    --gamma-beta-zero {BEST_GAMMA_BETA_ZERO} \
    --num-components 17 \
    --merge-kl-thresh 1e-10 \
    --quant-assign map \
    --complexity-mode keras \
    --tau-warmup-epochs 0 \
    --quant-skip-last \
    --batch-size 128 \
    --num-workers 4 \
    --eval-every 1 \
    --log-mixture-every 1 \
    --make-gif \
    --gif-fps 3 \
    --run-name lenet5_best_full \
    --save-dir runs/bo_pareto_lenet5 \
    --seed 42

## 4. Generate Diagnostic Plots

In [None]:
RUN_DIR = "runs/bo_pareto_lenet5/lenet5_best_full"

# Training curves (accuracy, loss, compression rate)
!python scripts/plot_curves.py --run-dir {RUN_DIR}

# Mixture evolution over epochs
!python scripts/plot_mixture_dynamics.py --run-dir {RUN_DIR}

# Weight scatter plot (w0 → wT movement)
!python scripts/plot_weights_scatter.py --run-dir {RUN_DIR} --sample 20000

# Convolutional filters (pre vs quantized)
!python scripts/plot_filters.py --run-dir {RUN_DIR} --checkpoint pre
!python scripts/plot_filters.py --run-dir {RUN_DIR} --checkpoint quantized

# Final mixture + weight histogram
!python scripts/plot_mixture.py --run-dir {RUN_DIR} --checkpoint prequant

print(f"\n✓ All plots saved to {RUN_DIR}/figures/")
print(f"✓ Training GIF saved to {RUN_DIR}/figures/")