# 9 ‚Äî Batch Inference (Standardized Test-Set Predictions)

**Purpose:** Run standardized inference on the **test set** for all trained models,
saving `coco_instances_results.json` consistently to each model's output directory.

This is a **prerequisite** for:
- Bootstrap confidence intervals (notebook 8)
- Multi-threshold evaluation (notebook 8)
- Filter sensitivity analysis

**What it does:**
1. Iterates over all trained AGAR + Roboflow models in `config.py`
2. Loads each `model_final.pth`
3. Runs inference on the corresponding test set
4. Saves predictions to `{model_dir}/test/coco_instances_results.json`

**Run on:** Google Colab with GPU

In [None]:
import sys, os

# Mount Google Drive (for data and model weights)
from google.colab import drive
drive.mount('/content/drive')

# Clone project repo (code, config, utils)
REPO_URL = "https://github.com/jozedu/deep-microbiology-colony-detection.git"
REPO_DIR = "/content/deep-microbiology-colony-detection"

if not os.path.exists(REPO_DIR):
    !git clone {REPO_URL} {REPO_DIR}
else:
    !cd {REPO_DIR} && git pull

if REPO_DIR not in sys.path:
    sys.path.insert(0, REPO_DIR)

In [None]:
# Install detectron2 if needed
try:
    import detectron2
except ImportError:
    !python -m pip install 'git+https://github.com/facebookresearch/detectron2.git' -q

In [None]:
import json
import torch
from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
from detectron2.data.datasets import register_coco_instances

import config
from config import (
    AGAR_IMG_DIR, OUTPUTS_DIR, MODELS,
    AGAR_DATASETS, AGAR_TRAINED_MODELS,
    ROBOFLOW_DATASETS, ROBOFLOW_TRAINED_MODELS,
    is_retinanet,
)

print(f"Detectron2 version: {detectron2.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"Device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")

## Register all datasets

In [None]:
# ‚îÄ‚îÄ Register AGAR datasets ‚îÄ‚îÄ
from detectron2.data import DatasetCatalog

for subset_name, paths in AGAR_DATASETS.items():
    for split in ['train', 'val', 'test']:
        ds_name = f"{subset_name}_{split}"
        if ds_name not in DatasetCatalog.list():
            register_coco_instances(ds_name, {}, paths[split], AGAR_IMG_DIR)
            print(f"Registered: {ds_name}")

# ‚îÄ‚îÄ Register Roboflow datasets ‚îÄ‚îÄ
robo = ROBOFLOW_DATASETS['curated']
for split, dir_key in [('train', 'train_dir'), ('valid', 'val_dir'), ('test', 'test_dir')]:
    ds_name = f"roboflow_{split}"
    if ds_name not in DatasetCatalog.list():
        register_coco_instances(ds_name, {}, robo[split], robo[dir_key])
        print(f"Registered: {ds_name}")

print(f"\nTotal registered datasets: {len(DatasetCatalog.list())}")

## Define inference function

In [None]:
def run_inference_and_save(
    model_dir: str,
    config_file: str,
    test_dataset_name: str,
    num_classes: int = 3,
    score_thresh: float = 0.5,
    max_dets: int = 100,
    output_subfolder: str = "test",
    force_rerun: bool = False,
):
    """Load a trained model and run COCO evaluation on the test set.
    
    Saves coco_instances_results.json to {model_dir}/{output_subfolder}/
    
    Returns:
        dict: COCO evaluation results, or None if skipped.
    """
    weights_path = os.path.join(model_dir, "model_final.pth")
    output_dir = os.path.join(model_dir, output_subfolder)
    results_file = os.path.join(output_dir, "coco_instances_results.json")
    
    # Check if weights exist
    if not os.path.exists(weights_path):
        print(f"  ‚ö†Ô∏è SKIP: model_final.pth not found at {weights_path}")
        return None
    
    # Skip if already done (unless force_rerun)
    if os.path.exists(results_file) and not force_rerun:
        print(f"  ‚úÖ Already exists: {results_file}")
        # Load and return existing results summary
        return {"status": "cached", "path": results_file}
    
    # Build config
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file(config_file))
    cfg.DATASETS.TEST = (test_dataset_name,)
    cfg.MODEL.WEIGHTS = weights_path
    cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS = False
    cfg.TEST.DETECTIONS_PER_IMAGE = max_dets
    
    # Set num_classes based on architecture
    if 'retinanet' in config_file.lower():
        cfg.MODEL.RETINANET.NUM_CLASSES = num_classes
        cfg.MODEL.RETINANET.SCORE_THRESH_TEST = score_thresh
    else:
        cfg.MODEL.ROI_HEADS.NUM_CLASSES = num_classes
        cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = score_thresh
    
    cfg.OUTPUT_DIR = output_dir
    os.makedirs(output_dir, exist_ok=True)
    
    # Run inference
    predictor = DefaultPredictor(cfg)
    evaluator = COCOEvaluator(
        test_dataset_name, output_dir=output_dir, max_dets_per_image=max_dets
    )
    val_loader = build_detection_test_loader(cfg, test_dataset_name)
    results = inference_on_dataset(predictor.model, val_loader, evaluator)
    
    print(f"  üìä AP={results['bbox']['AP']:.1f}  AP50={results['bbox']['AP50']:.1f}")
    
    # Free GPU memory
    del predictor
    torch.cuda.empty_cache()
    
    return results

## Map model keys ‚Üí architecture config files + test set names

We need to know which Detectron2 config file and test dataset corresponds to each trained model.

In [None]:
def get_arch_config(model_key: str) -> str:
    """Map a trained model key to its Detectron2 config file."""
    # Extract architecture from model key
    key_lower = model_key.lower()
    if 'mask_rcnn_r101' in key_lower:
        return MODELS['mask_rcnn_R101']
    elif 'mask_rcnn_r50' in key_lower:
        return MODELS['mask_rcnn_R50']
    elif 'retinanet_r101' in key_lower or 'retinanet_r_101' in key_lower:
        return MODELS['retinanet_R101']
    elif 'retinanet_r50' in key_lower or 'retinanet_r_50' in key_lower:
        return MODELS['retinanet_R50']
    elif 'faster_rcnn_r101' in key_lower or 'faster_r101' in key_lower:
        return MODELS['faster_rcnn_R101']
    elif 'faster_rcnn_r50' in key_lower or 'faster_r50' in key_lower:
        return MODELS['faster_rcnn_R50']
    else:
        raise ValueError(f"Cannot determine architecture for model key: {model_key}")


def get_test_dataset(model_key: str, source: str = 'agar') -> str:
    """Map a trained model key to its test dataset name."""
    if source == 'roboflow':
        return 'roboflow_test'
    # Extract subset from key (e.g., 'bright_faster_rcnn_R50' -> 'bright')
    for subset in ['total', 'bright', 'dark', 'vague', 'lowres']:
        if model_key.startswith(subset):
            return f"{subset}_test"
    raise ValueError(f"Cannot determine test dataset for model key: {model_key}")


def get_num_classes(source: str) -> int:
    """AGAR has 3 classes, Roboflow has 4."""
    return 4 if source == 'roboflow' else 3


# Verify mapping works
print("=== AGAR Models ===")
for key in list(AGAR_TRAINED_MODELS.keys())[:3]:
    print(f"  {key} ‚Üí arch={get_arch_config(key)}, test={get_test_dataset(key)}")

print("\n=== Roboflow Models ===")
for key in list(ROBOFLOW_TRAINED_MODELS.keys())[:3]:
    print(f"  {key} ‚Üí arch={get_arch_config(key)}, test={get_test_dataset(key, 'roboflow')}")

## Run batch inference on all AGAR models

In [None]:
FORCE_RERUN = False  # Set True to re-generate all predictions

agar_results = {}
failed = []

print(f"Running inference on {len(AGAR_TRAINED_MODELS)} AGAR models...")
print("=" * 70)

for i, (model_key, model_dir) in enumerate(AGAR_TRAINED_MODELS.items()):
    print(f"\n[{i+1}/{len(AGAR_TRAINED_MODELS)}] {model_key}")
    print(f"  Dir: {model_dir}")
    
    try:
        arch_config = get_arch_config(model_key)
        test_ds = get_test_dataset(model_key, 'agar')
        num_classes = get_num_classes('agar')
        
        result = run_inference_and_save(
            model_dir=model_dir,
            config_file=arch_config,
            test_dataset_name=test_ds,
            num_classes=num_classes,
            force_rerun=FORCE_RERUN,
        )
        agar_results[model_key] = result
        
    except Exception as e:
        print(f"  ‚ùå ERROR: {e}")
        failed.append((model_key, str(e)))

print(f"\n{'=' * 70}")
print(f"Completed: {len(agar_results)}/{len(AGAR_TRAINED_MODELS)}")
if failed:
    print(f"Failed: {len(failed)}")
    for key, err in failed:
        print(f"  - {key}: {err}")

## Run batch inference on all Roboflow models

In [None]:
robo_results = {}
robo_failed = []

print(f"Running inference on {len(ROBOFLOW_TRAINED_MODELS)} Roboflow models...")
print("=" * 70)

for i, (model_key, model_dir) in enumerate(ROBOFLOW_TRAINED_MODELS.items()):
    print(f"\n[{i+1}/{len(ROBOFLOW_TRAINED_MODELS)}] {model_key}")
    print(f"  Dir: {model_dir}")
    
    try:
        arch_config = get_arch_config(model_key)
        test_ds = get_test_dataset(model_key, 'roboflow')
        num_classes = get_num_classes('roboflow')
        
        result = run_inference_and_save(
            model_dir=model_dir,
            config_file=arch_config,
            test_dataset_name=test_ds,
            num_classes=num_classes,
            force_rerun=FORCE_RERUN,
        )
        robo_results[model_key] = result
        
    except Exception as e:
        print(f"  ‚ùå ERROR: {e}")
        robo_failed.append((model_key, str(e)))

print(f"\n{'=' * 70}")
print(f"Completed: {len(robo_results)}/{len(ROBOFLOW_TRAINED_MODELS)}")
if robo_failed:
    print(f"Failed: {len(robo_failed)}")
    for key, err in robo_failed:
        print(f"  - {key}: {err}")

## Verify all prediction files exist

In [None]:
from config import get_predictions_path

print("=== Prediction file status ===")
print("\n--- AGAR Models ---")
for key in AGAR_TRAINED_MODELS:
    path = get_predictions_path(key, source='agar', subfolder='test')
    exists = os.path.exists(path)
    status = '‚úÖ' if exists else '‚ùå'
    size = f"{os.path.getsize(path)/1024:.0f}KB" if exists else 'missing'
    print(f"  {status} {key}: {size}")

print("\n--- Roboflow Models ---")
for key in ROBOFLOW_TRAINED_MODELS:
    path = get_predictions_path(key, source='roboflow', subfolder='test')
    exists = os.path.exists(path)
    status = '‚úÖ' if exists else '‚ùå'
    size = f"{os.path.getsize(path)/1024:.0f}KB" if exists else 'missing'
    print(f"  {status} {key}: {size}")

## Summary table of all test-set AP scores

In [None]:
# Collect results into a summary DataFrame
summary_rows = []

for key, result in {**agar_results, **robo_results}.items():
    if result and isinstance(result, dict) and 'bbox' in result:
        bbox = result['bbox']
        summary_rows.append({
            'Model': key,
            'AP': bbox.get('AP', None),
            'AP50': bbox.get('AP50', None),
            'AP75': bbox.get('AP75', None),
            'APs': bbox.get('APs', None),
            'APm': bbox.get('APm', None),
            'APl': bbox.get('APl', None),
        })

if summary_rows:
    import pandas as pd
    df_summary = pd.DataFrame(summary_rows)
    df_summary = df_summary.sort_values('AP50', ascending=False)
    
    # Save to CSV
    csv_path = os.path.join(config.RESULTS_DIR, 'all_models_test_ap.csv')
    os.makedirs(config.RESULTS_DIR, exist_ok=True)
    df_summary.to_csv(csv_path, index=False)
    print(f"Saved summary to: {csv_path}")
    
    display(df_summary.style.format(precision=1))
else:
    print("No new results generated (all cached). Set FORCE_RERUN=True to re-evaluate.")