In [1]:
# Config
import torch
import os
# Basic
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Monitoring
from tqdm.notebook import tqdm
# IO
from os.path import join, exists, basename, dirname, splitext, expanduser
from glob import glob
# Parallel processing
from joblib import Parallel, delayed
import re
from PIL import Image
import supervision as sv


from supervision.metrics import MeanAveragePrecision


from supervision.metrics.core import Metric, MetricTarget
from tempfile import mkdtemp

from tempfile import mkdtemp
from ultralytics import RTDETR

from ultralytics import YOLO


In [3]:
# --- Configuration ---
home_dir = "/home/suruchi.hardaha/cosmos"
gt_image_dir = "/home/suruchi.hardaha/cosmos/ijcai_2025_data/LOLO/images"
gt_obb_label_dir = "/home/suruchi.hardaha/cosmos/ijcai_2025_data/LOLO/labels"
gt_aa_labels_dir = "/home/suruchi.hardaha/cosmos/ijcai_2025_data/symlinked_AA_DATA/train_afghanistan_bangladesh_pakistan_test_india/test/labels"
task = "obb"
conf = 0.05
iou = 0.50
imagsz = 128
epochs = 100

In [4]:
# /home/suruchihardaha/Active_learning/rishabh

In [5]:
# --- Model paths dictionary ---
models = {
    "yolo_11l_obb": {
        "path": f"{home_dir}/ijcai_2025_data/runs/train_afg_ban_pak_test_ind/weights/best.pt",
        "type": "YOLO",
        "bbox_type": "obb"
    },
    "yolo_worldv2": {
        "path": f"{home_dir}/ijcai_2025_data/runs_v2/train_afghanistan_bangladesh_pakistan_test_india/weights/best.pt",
        "type": "YOLO",
        "bbox_type": "aa"
    },
    # "yolo_12l_aa": {
    #     "path": f"{home_dir}/runs_sentinel/detect/train_stratified_train_val_stratified_val__detect_yolo_v12-aa_128_64_100_val_True/weights/best.pt",
    #     "type": "YOLO",
    #     "bbox_type": "aa"
    # },
    "rtdetr": {
        "path": f"{home_dir}/ijcai_2025_data/runs_rtdetr/train_afghanistan_bangladesh_pakistan_test_india/weights/best.pt",
        "type": "RTDETR",
        "bbox_type": "aa"
    }
}

In [6]:
# --- Dataset config ---
data_yml_save_path = mkdtemp()
data_yml = """train: dummy
val: dummy
nc: 3
names: ["CFCBK", "FCBK", "Zigzag"]
"""
data_yml_path = f"{data_yml_save_path}/data.yml"
with open(data_yml_path, "w") as f:
    f.write(data_yml)
print(f"Data yml saved to {data_yml_path}")


Data yml saved to /tmp/tmpm977_0qs/data.yml


In [7]:
for model_key, model_info in models.items():
    print(f"\nEvaluating model: {model_key}")
    model_type = model_info["type"]
    model_path = model_info["path"]
    assert exists(model_path), f"Model path {model_path} does not exist."
    bbox_type = model_info["bbox_type"]
    print(f"Model path: {model_path}")
    print(f"Model type: {model_type}")
    print(f"Bounding box type: {bbox_type}")


Evaluating model: yolo_11l_obb
Model path: /home/suruchi.hardaha/cosmos/ijcai_2025_data/runs/train_afg_ban_pak_test_ind/weights/best.pt
Model type: YOLO
Bounding box type: obb

Evaluating model: yolo_worldv2
Model path: /home/suruchi.hardaha/cosmos/ijcai_2025_data/runs_v2/train_afghanistan_bangladesh_pakistan_test_india/weights/best.pt
Model type: YOLO
Bounding box type: aa

Evaluating model: rtdetr
Model path: /home/suruchi.hardaha/cosmos/ijcai_2025_data/runs_rtdetr/train_afghanistan_bangladesh_pakistan_test_india/weights/best.pt
Model type: RTDETR
Bounding box type: aa


In [8]:
# --- Evaluation loop ---
for model_key, model_info in models.items():
    print(f"\nEvaluating model: {model_key}")
    model_type = model_info["type"]
    model_path = model_info["path"]

    # Determine dataset type from key
    is_obb = "obb" in model_key.lower()
    label_dir = gt_obb_label_dir if is_obb else gt_aa_labels_dir

    # Load the appropriate dataset
    sv_dataset = sv.DetectionDataset.from_yolo(gt_image_dir, label_dir, data_yml_path, is_obb=is_obb)

    # Load model
    if model_type.upper() == "YOLO":
        model = YOLO(model_path)
    else:
        model = RTDETR(model_path)

    targets, predictions = [], []

    for name, _, gt_detection in tqdm(sv_dataset):
        result = model(
            name,
            imgsz=imagsz,
            iou=iou,
            conf=conf,
            exist_ok=True,
            save_txt=False,
            max_det=300,
            verbose=False
        )[0]
        sv_detection = sv.Detections.from_ultralytics(result)
        targets.append(gt_detection)
        predictions.append(sv_detection)

    print(f"target length: {len(targets)}, prediction length: {len(predictions)}")

    # --- Class-wise mAP ---
    mAP_metric = MeanAveragePrecision(class_agnostic=False)
    mAP_result = mAP_metric.update(predictions, targets).compute()

    class_wise_mAP = mAP_result.ap_per_class[:, 0].tolist()
    matched_classes = mAP_result.matched_classes.tolist()

    final_class_wise_mAP = [0] * 3
    for cls, mAP in zip(matched_classes, class_wise_mAP):
        final_class_wise_mAP[cls] = mAP

    # --- CA-mAP (class agnostic) ---
    mAP_metric = MeanAveragePrecision(class_agnostic=True)
    mAP_result = mAP_metric.update(predictions, targets).compute()
    ca_map_50 = mAP_result.map50

    # --- Format output ---
    final_class_wise_mAP_percentage = [round(m * 100, 2) for m in final_class_wise_mAP]
    ca_map_50_percentage = round(ca_map_50 * 100, 2)
    formatted_output = f"{model_key} & {ca_map_50_percentage:.2f}" + " & " + " & ".join([f"{val:.2f}" for val in final_class_wise_mAP_percentage]) + " \\\\"

    print("Formatted LaTeX row:")
    print(formatted_output)


Evaluating model: yolo_11l_obb


  0%|          | 0/54470 [00:00<?, ?it/s]

target length: 54470, prediction length: 54470
Formatted LaTeX row:
yolo_11l_obb & 53.39 & 0.00 & 22.34 & 9.04 \\

Evaluating model: yolo_worldv2


  0%|          | 0/54470 [00:00<?, ?it/s]

target length: 54470, prediction length: 54470
Formatted LaTeX row:
yolo_worldv2 & 58.94 & 0.00 & 25.66 & 14.02 \\

Evaluating model: rtdetr


  0%|          | 0/54470 [00:00<?, ?it/s]

target length: 54470, prediction length: 54470
Formatted LaTeX row:
rtdetr & 57.63 & 0.00 & 23.84 & 14.43 \\
