# 4. Model Evaluation

This notebook evaluates trained models using:
- **COCO mAP** metrics (AP, AP50, AP75, AR).
- **Colony counting** accuracy (AE, sAPE, per-class MAE).
- **Visual inspection** of predictions on test images.

**Prerequisites:** Run `1_setup.ipynb` and have trained models available.

## 4.1 Configuration

In [None]:
import os
import json

import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
from detectron2.data.datasets import register_coco_instances
from detectron2.data import DatasetCatalog, MetadataCatalog

import config
from utils.evaluation import evaluate_model, evaluate_bbox_counting, plot_training_curves
from utils.visualization import show_predictions

In [None]:
# ===================== CONFIGURE YOUR EVALUATION =====================

# --- Dataset ---
DATASET_SOURCE = "agar"        # 'agar' or 'roboflow'
SUBSET = "total"               # For AGAR: 'total', 'bright', 'dark', 'vague', 'lowres'

# --- Model to evaluate ---
# Use a key from config.AGAR_TRAINED_MODELS or config.ROBOFLOW_TRAINED_MODELS
TRAINED_MODEL_KEY = "total_faster_rcnn_R101"
MODEL_SOURCE = "agar"          # 'agar' or 'roboflow'

# --- Model architecture (must match the trained model) ---
MODEL_KEY = "faster_rcnn_R101" # Must match the architecture of TRAINED_MODEL_KEY

# --- Evaluation params ---
NUM_CLASSES = 3                # AGAR: 3, Roboflow: 4
SCORE_THRESHOLD = 0.5
MAX_DETECTIONS = 100

# ====================================================================

print(f"Evaluating: {TRAINED_MODEL_KEY} on {DATASET_SOURCE}/{SUBSET}")

## 4.2 Dataset & Model Setup

In [None]:
# --- Dataset registration ---
if DATASET_SOURCE == "agar":
    dataset = config.AGAR_DATASETS[SUBSET]
    test_path = dataset["test"]
    img_dir_test = config.AGAR_IMG_DIR
    test_name = f"{SUBSET}_test"
elif DATASET_SOURCE == "roboflow":
    dataset = config.ROBOFLOW_DATASETS["curated"]
    test_path = dataset["test"]
    img_dir_test = dataset["test_dir"]
    test_name = "robo_test"

if test_name in DatasetCatalog.list():
    DatasetCatalog.remove(test_name)
    MetadataCatalog.remove(test_name)
register_coco_instances(test_name, {}, test_path, img_dir_test)

# --- Build predictor ---
config_file = config.MODELS[MODEL_KEY]
model_weights = config.get_model_weights(TRAINED_MODEL_KEY, MODEL_SOURCE)

output_dir = os.path.dirname(model_weights)
eval_output = os.path.join(output_dir, "test")
os.makedirs(eval_output, exist_ok=True)

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file(config_file))
cfg.DATASETS.TEST = (test_name,)
cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS = False
cfg.MODEL.WEIGHTS = model_weights
cfg.TEST.DETECTIONS_PER_IMAGE = MAX_DETECTIONS

if config.is_retinanet(MODEL_KEY):
    cfg.MODEL.RETINANET.NUM_CLASSES = NUM_CLASSES
    cfg.MODEL.RETINANET.SCORE_THRESH_TEST = SCORE_THRESHOLD
else:
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = NUM_CLASSES
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = SCORE_THRESHOLD

predictor = DefaultPredictor(cfg)
print(f"Model loaded from: {model_weights}")

## 4.3 COCO mAP Evaluation

In [None]:
results = evaluate_model(cfg, predictor, test_name, eval_output, max_dets=MAX_DETECTIONS)
print(json.dumps(results, indent=2))

## 4.4 Colony Counting Evaluation

In [None]:
predictions_path = os.path.join(eval_output, "coco_instances_results.json")

if os.path.exists(predictions_path):
    df = evaluate_bbox_counting(
        annotations_file=test_path,
        predictions_file=predictions_path,
        score_threshold=SCORE_THRESHOLD,
        iou_threshold=0.5,
        output_dir=eval_output,
        num_classes=NUM_CLASSES,
    )
    print(f"\nMean Absolute Error: {df['AE'].mean():.2f}")
    print(f"Mean sAPE: {df['sAPE'].mean():.4f}")
    display(df.head(10))
else:
    print(f"Predictions file not found at {predictions_path}")
    print("Run the COCO evaluation cell first.")

## 4.5 Visualize Predictions

In [None]:
show_predictions(predictor, test_name, num_samples=5, scale=0.5, seed=42)

## 4.6 Training Curves (Optional)

In [None]:
metrics_path = os.path.join(output_dir, "metrics.json")
if os.path.exists(metrics_path):
    plot_training_curves(metrics_path)
else:
    print(f"No metrics.json found at {metrics_path}")