# Results comparison

# Task 3: Results comparison

This notebook implements an evaluation to check whether the model is able to detect Maltese traffic signs. It covers:
1. **Evaluation**: Calculating F1-Scores & recalls.
2. **Inference & Analytics**: visualizing detections and counting signs per image.

## Import all the necessary libraries

In [1]:
from __future__ import annotations

#  Standard library 
import importlib
import subprocess
import sys

#  Package bootstrap helper
def ensure_package(pkg: str, import_name: str | None = None, pip_name: str | None = None):
    try:
        return importlib.import_module(import_name or pkg)
    except ImportError:
        pip_target = pip_name or pkg
        print(f"Installing missing package: {pip_target}")
        subprocess.check_call(
            [sys.executable, "-m", "pip", "install", pip_target],
            stdout=subprocess.DEVNULL
        )
        return importlib.import_module(import_name or pkg)

# Ensure third-party packages 
torch = ensure_package("torch")
np = ensure_package("numpy")
torchvision = ensure_package("torchvision")
ensure_package("pycocotools")

# Explicit imports (clear & IDE-friendly) 
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import torch
import numpy as np
import torchvision.transforms as T
from torch.utils.data import DataLoader

# Custom/local utilities 
try:
    from stefania_livori_utils import *
except ImportError:
    raise ImportError(
        "‚ùå 'stefania_livori_utils' not found.\n"
        "Make sure 'stefania_livori_utils.py' is in the working directory "
        "or on PYTHONPATH."
    )

#  Reproducibility 
torch.manual_seed(42)

print("Environment ready: all dependencies installed and imported.")


Environment ready: all dependencies installed and imported.


## Declare the device

In [2]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

## Variable to hold the type of evaluation we are doing

In [3]:
eval = "sign"

## Dataset if using signs dataset

In [None]:
if eval == "sign":
    ANNOTATION_FILE_VAL = "Assignment Material/COCO-based_COCO/annotations/val.json"
    DATA_DIR_VAL = "Assignment Material/COCO-based_COCO/images/val"
    ANNOTATION_FILE_TEST = "Assignment Material/COCO-based_COCO/annotations/test.json"
    DATA_DIR_TEST = "Assignment Material/COCO-based_COCO/images/test"
    with open(ANNOTATION_FILE_VAL, "r") as f:
        data = json.load(f)
    CLASS_ID_TO_NAME = {cat["id"]: cat["name"] for cat in data["categories"]}
    NUM_CLASSES = len(CLASS_ID_TO_NAME) + 1     
    model = get_faster_rcnn(NUM_CLASSES)
    model.load_state_dict(
        torch.load("./models/faster_rcnn_stefania_livori.pt", map_location=device)
    )



## Dataset if using mounting attributes

In [None]:

if eval == "mounting":
    DATA_DIR_VAL = "Assignment Material/COCO-based_COCO_mounting/images/val"
    ANNOTATION_FILE_VAL = "Assignment Material/COCO-based_COCO_mounting/annotations/val.json"
    ANNOTATION_FILE_TEST = "Assignment Material/COCO-based_COCO_mounting/annotations/test.json"
    DATA_DIR_TEST = "Assignment Material/COCO-based_COCO_mounting/images/test"
    model = get_retinanet(NUM_CLASSES)
    model.load_state_dict(
        torch.load("./models/mounting_retinanet_stefania_livori.pt", map_location=device)
    )
    

## COCO-based Only MAP Evaluation 

In [6]:
@torch.no_grad()
def evaluate_map(model, data_loader, device, coco_gt):
    model.eval()
    coco_results = []

    for images, targets in data_loader:
        images = [img.to(device) for img in images]
        outputs = model(images)

        for target, output in zip(targets, outputs):
            image_id = int(target["image_id"])
            boxes = output["boxes"].cpu().numpy()
            scores = output["scores"].cpu().numpy()
            labels = output["labels"].cpu().numpy()

            for box, score, label in zip(boxes, scores, labels):
                coco_results.append({
                    "image_id": image_id,
                    "category_id": int(label),
                    "bbox": [
                        float(box[0]),
                        float(box[1]),
                        float(box[2] - box[0]),
                        float(box[3] - box[1]),
                    ],
                    "score": float(score),
                })

    coco_dt = coco_gt.loadRes(coco_results)
    coco_eval = COCOeval(coco_gt, coco_dt, "bbox")
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    return coco_eval.stats  # contains mAP values

Run the actual evaluation for COCO based models only.

In [None]:
model.to(device)
model.eval()
coco_gt = COCO(ANNOTATION_FILE_VAL)

transform = T.Compose([
    T.ToTensor(),
])

val_dataset = SignsDataset(
    root=DATA_DIR_VAL,
    annFile=ANNOTATION_FILE_VAL,
    transforms=transform
)

val_loader = DataLoader(
    val_dataset,
    batch_size=2,
    shuffle=False,
    # Batch collate function to handle variable-size images
    collate_fn=lambda x: tuple(zip(*x))
)
map_stats = evaluate_map(model, val_loader, device, coco_gt)

print("mAP@0.5:0.95:", map_stats[0])
print("mAP@0.5:", map_stats[1])

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.07s).
Accumulating evaluation results...
DONE (t=0.04s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.059
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.160
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.012
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.100
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.060
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.199
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.254
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets