# 10 — Multi-Seed Training (Variance Quantification)

**Purpose:** Retrain the best-performing model with multiple random seeds
to quantify training stochasticity and report mean ± std AP.

**Plan:** 3 seeds × 2 models = **6 runs total** (~12-18 hours on Colab T4)
- Best model: Faster R-CNN R101 on `total` dataset
- Baseline: Faster R-CNN R50 on `total` dataset

**Training config matches original notebook exactly:**
- `batch_size = 8`
- `iterations = 2800` (hardcoded, not epochs × images / batch)
- `base_lr = 0.005`, `momentum = 0.9`, `weight_decay = 0.0005`
- `steps = [840]` (single LR decay at 30% = 3×2800/10)
- `warmup_iters = min(1000, save_checkpoint)`
- `ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512`
- `FILTER_EMPTY_ANNOTATIONS = False`
- `num_classes = 3`

**Run on:** Google Colab with GPU

In [None]:
import sys, os

# Mount Google Drive (for data and model weights)
from google.colab import drive
drive.mount('/content/drive')

# Clone project repo (code, config, utils)
REPO_URL = "https://github.com/jozedu/deep-microbiology-colony-detection.git"
REPO_DIR = "/content/deep-microbiology-colony-detection"

if not os.path.exists(REPO_DIR):
    !git clone {REPO_URL} {REPO_DIR}
else:
    !cd {REPO_DIR} && git pull

if REPO_DIR not in sys.path:
    sys.path.insert(0, REPO_DIR)

In [None]:
# Install detectron2 if needed
try:
    import detectron2
except ImportError:
    !python -m pip install 'git+https://github.com/facebookresearch/detectron2.git' -q

In [None]:
import datetime
import json
import numpy as np
import pandas as pd
import torch
import yaml

from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
from detectron2.data.datasets import register_coco_instances
from detectron2.data import DatasetCatalog

import config
from config import (
    AGAR_IMG_DIR, OUTPUTS_DIR, MODELS, AGAR_DATASETS,
)
from utils.training import MyTrainer

print(f"Detectron2 version: {detectron2.__version__}")
print(f"CUDA: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")

In [None]:
# Register the 'total' dataset (used for both models)
total_paths = AGAR_DATASETS['total']

for split in ['train', 'val', 'test']:
    ds_name = f"total_{split}"
    if ds_name not in DatasetCatalog.list():
        register_coco_instances(ds_name, {}, total_paths[split], AGAR_IMG_DIR)
        print(f"Registered: {ds_name}")

# Count training images (for logging only)
with open(total_paths['train'], 'r') as f:
    train_data = json.load(f)
NUM_TRAIN_IMAGES = len(train_data['images'])
print(f"Training images: {NUM_TRAIN_IMAGES}")

## Configuration

These values **exactly match** the original training in `detectron2.ipynb`.

In [None]:
# ── Experiment configuration ──
SEEDS = [42, 123, 456]

EXPERIMENTS = [
    {
        "name": "total_faster_rcnn_R101",
        "config_file": MODELS["faster_rcnn_R101"],
        "label": "Faster R-CNN R101 (best)",
    },
    {
        "name": "total_faster_rcnn_R50",
        "config_file": MODELS["faster_rcnn_R50"],
        "label": "Faster R-CNN R50 (baseline)",
    },
]

# ── Training hyperparameters (exactly as in original notebook) ──
BATCH_SIZE = 8
NUM_EPOCHS = 10
ITERATIONS = 2800  # hardcoded in original notebook
SAVE_CHECKPOINT = ITERATIONS // NUM_EPOCHS  # 280
STEPS = [int((3 * ITERATIONS) / NUM_EPOCHS)]  # [840] — single LR decay
BASE_LR = 0.005
MOMENTUM = 0.9
WEIGHT_DECAY = 0.0005
WARMUP_ITERS = min(1000, SAVE_CHECKPOINT)
WARMUP_FACTOR = 1.0 / 1000
ROI_BATCH_SIZE = 512
NUM_CLASSES = 3
MAX_DETS = 100
SCORE_THRESH = 0.5

TRAIN_NAME = "total_train"
VAL_NAME = "total_val"
TEST_NAME = "total_test"

print(f"Seeds: {SEEDS}")
print(f"Iterations: {ITERATIONS}")
print(f"LR steps: {STEPS}")
print(f"Checkpoint every: {SAVE_CHECKPOINT} iters")
print(f"Warmup: {WARMUP_ITERS} iters")
print(f"Total runs: {len(SEEDS) * len(EXPERIMENTS)} = {len(SEEDS)} seeds × {len(EXPERIMENTS)} models")

## Training loop

In [None]:
def build_cfg(config_file: str, seed: int, output_dir: str) -> object:
    """Build a Detectron2 config matching original training exactly."""
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file(config_file))
    
    # Datasets
    cfg.DATASETS.TRAIN = (TRAIN_NAME,)
    cfg.DATASETS.TEST = (VAL_NAME,)
    
    # Data loader
    cfg.DATALOADER.NUM_WORKERS = 4
    cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS = False
    
    # Weights from model zoo (COCO pretrained)
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(config_file)
    
    # Solver — matches original notebook exactly
    cfg.SOLVER.IMS_PER_BATCH = BATCH_SIZE
    cfg.SOLVER.BASE_LR = BASE_LR
    cfg.SOLVER.MOMENTUM = MOMENTUM
    cfg.SOLVER.WEIGHT_DECAY = WEIGHT_DECAY
    cfg.SOLVER.MAX_ITER = ITERATIONS
    cfg.SOLVER.STEPS = tuple(STEPS)
    cfg.SOLVER.WARMUP_FACTOR = WARMUP_FACTOR
    cfg.SOLVER.WARMUP_ITERS = WARMUP_ITERS
    cfg.SOLVER.WARMUP_METHOD = "linear"
    
    # Model head
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = ROI_BATCH_SIZE
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = NUM_CLASSES
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = SCORE_THRESH
    cfg.TEST.DETECTIONS_PER_IMAGE = MAX_DETS
    
    # Checkpointing
    cfg.SOLVER.CHECKPOINT_PERIOD = SAVE_CHECKPOINT
    cfg.TEST.EVAL_PERIOD = SAVE_CHECKPOINT
    
    # SEED — the key addition for this experiment
    cfg.SEED = seed
    
    # Output
    cfg.OUTPUT_DIR = output_dir
    
    return cfg


def train_and_evaluate(config_file: str, seed: int, run_name: str):
    """Train a model and evaluate on the test set. Returns results dict."""
    current_time = datetime.datetime.now().strftime("%d-%m-%Y_%H-%M-%S")
    output_dir = os.path.join(
        OUTPUTS_DIR, f"seed{seed}_{run_name}_{current_time}"
    )
    os.makedirs(output_dir, exist_ok=True)
    
    print(f"\n{'='*60}")
    print(f"Training: {run_name} | Seed: {seed}")
    print(f"Output: {output_dir}")
    print(f"{'='*60}")
    
    # Set all random seeds for reproducibility
    import random
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    
    # Build config
    cfg = build_cfg(config_file, seed, output_dir)
    
    # Save the full resolved config BEFORE training
    config_path = os.path.join(output_dir, "full_config.yaml")
    with open(config_path, 'w') as f:
        f.write(cfg.dump())
    print(f"Saved config to: {config_path}")
    
    # Train
    trainer = MyTrainer(cfg)
    trainer.resume_or_load(resume=False)
    trainer.train()
    
    # Evaluate on TEST set
    print(f"\nEvaluating on test set...")
    cfg.DATASETS.TEST = (TEST_NAME,)
    cfg.MODEL.WEIGHTS = os.path.join(output_dir, "model_final.pth")
    
    test_output = os.path.join(output_dir, "test")
    os.makedirs(test_output, exist_ok=True)
    
    predictor = DefaultPredictor(cfg)
    evaluator = COCOEvaluator(
        TEST_NAME, output_dir=test_output, max_dets_per_image=MAX_DETS
    )
    val_loader = build_detection_test_loader(cfg, TEST_NAME)
    results = inference_on_dataset(predictor.model, val_loader, evaluator)
    
    # Save results
    results_path = os.path.join(output_dir, "test_results.json")
    with open(results_path, 'w') as f:
        json.dump(results, f, indent=2)
    
    print(f"AP={results['bbox']['AP']:.1f}  AP50={results['bbox']['AP50']:.1f}")
    
    # Clean up GPU memory
    del trainer, predictor
    torch.cuda.empty_cache()
    
    return {
        "seed": seed,
        "model": run_name,
        "output_dir": output_dir,
        "AP": results['bbox']['AP'],
        "AP50": results['bbox']['AP50'],
        "AP75": results['bbox']['AP75'],
        "APs": results['bbox']['APs'],
        "APm": results['bbox']['APm'],
        "APl": results['bbox']['APl'],
    }

In [None]:
# ── Run all experiments ──
all_results = []

for exp in EXPERIMENTS:
    for seed in SEEDS:
        result = train_and_evaluate(
            config_file=exp["config_file"],
            seed=seed,
            run_name=exp["name"],
        )
        all_results.append(result)
        
        # Save intermediate results (in case Colab disconnects)
        intermediate_path = os.path.join(config.RESULTS_DIR, "multi_seed_results_partial.json")
        os.makedirs(config.RESULTS_DIR, exist_ok=True)
        with open(intermediate_path, 'w') as f:
            json.dump(all_results, f, indent=2)
        print(f"Saved intermediate results to: {intermediate_path}")

print(f"\n\n{'='*60}")
print(f"ALL EXPERIMENTS COMPLETE ({len(all_results)} runs)")
print(f"{'='*60}")

## Results: Mean ± Std across seeds

In [None]:
df = pd.DataFrame(all_results)

# Summary: mean ± std per model
metrics = ['AP', 'AP50', 'AP75', 'APs', 'APm', 'APl']

summary = df.groupby('model')[metrics].agg(['mean', 'std'])
summary.columns = [f"{m}_{s}" for m, s in summary.columns]

# Format nicely
print("\n=== Multi-Seed Results (mean ± std across 3 seeds) ===")
print()
for model in df['model'].unique():
    model_df = df[df['model'] == model]
    print(f"\n{model}:")
    for m in metrics:
        mean_val = model_df[m].mean()
        std_val = model_df[m].std()
        values = model_df[m].tolist()
        print(f"  {m}: {mean_val:.1f} ± {std_val:.1f}  (seeds: {[f'{v:.1f}' for v in values]})")

# Save final results
final_path = os.path.join(config.RESULTS_DIR, "multi_seed_results.json")
with open(final_path, 'w') as f:
    json.dump(all_results, f, indent=2)

csv_path = os.path.join(config.RESULTS_DIR, "multi_seed_results.csv")
df.to_csv(csv_path, index=False)

print(f"\nSaved to: {final_path}")
print(f"Saved to: {csv_path}")

In [None]:
# ── Visualization: bar chart with error bars ──
import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for ax, metric in zip(axes, ['AP', 'AP50', 'AP75']):
    models = df['model'].unique()
    means = [df[df['model'] == m][metric].mean() for m in models]
    stds = [df[df['model'] == m][metric].std() for m in models]
    
    bars = ax.bar(range(len(models)), means, yerr=stds, capsize=5,
                  color=['#3878a2', '#e2a83e'], edgecolor='black', linewidth=0.5)
    
    # Add individual seed points
    for i, m in enumerate(models):
        values = df[df['model'] == m][metric].values
        ax.scatter([i] * len(values), values, color='black', s=30, zorder=5)
    
    ax.set_xticks(range(len(models)))
    ax.set_xticklabels([m.replace('total_', '') for m in models], rotation=20, ha='right')
    ax.set_ylabel(metric)
    ax.set_title(f'{metric} across 3 seeds')
    ax.grid(axis='y', alpha=0.3)

plt.tight_layout()

plot_path = os.path.join(config.RESULTS_DIR, 'multi_seed_variance.png')
plt.savefig(plot_path, dpi=300, bbox_inches='tight')
print(f"Saved plot to: {plot_path}")
plt.show()

## Update config.py with new model paths

After training, add the new model directories to `config.py` manually, e.g.:

```python
MULTI_SEED_MODELS = {
    "total_faster_rcnn_R101_seed42":  os.path.join(OUTPUTS_DIR, "seed42_total_faster_rcnn_R101_..."),
    "total_faster_rcnn_R101_seed123": os.path.join(OUTPUTS_DIR, "seed123_total_faster_rcnn_R101_..."),
    "total_faster_rcnn_R101_seed456": os.path.join(OUTPUTS_DIR, "seed456_total_faster_rcnn_R101_..."),
    # ... etc
}
```