In [23]:
# Config
import torch
import os
# Basic
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Monitoring
from tqdm.notebook import tqdm
# IO
from os.path import join, exists, basename, dirname, splitext, expanduser
from glob import glob
# Parallel processing
from joblib import Parallel, delayed
import re
from PIL import Image
import supervision as sv


from supervision.metrics import MeanAveragePrecision


from supervision.metrics.core import Metric, MetricTarget

from tempfile import mkdtemp
import pandas as pd
from tabulate import tabulate
from IPython.display import display, HTML


from ultralytics import YOLO
from ultralytics import RTDETR


In [24]:

# Set CUDA device
os.environ["CUDA_VISIBLE_DEVICES"] = "3"

# Parameters
region = "dhaka_airshed"  # First region (change this for the second region)
task = "aa"
type="detect"
image_size = 128  # Image size (128 or 640)
sepecific_feature="none"
satellite_type = "sentinel"
class_names = ["CFCBK", "FCBK", "Zigzag"]
num_classes = len(class_names)
CLASSES = class_names  # For sv.ConfusionMatrix
model="yolo12l"  # Model type (e.g., "yolov8n", "yolov8s", etc.)
model_name = "yolo12l-aa"  # Model name (e.g., "yolov8n", "yolov8s", etc.)

# Define paths
base_fold_path = f"/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/brick_kilns_neurips_2025/runs/{type}"
print(f"Base fold path: {base_fold_path}")
crossval_base_path = f"/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/data/processed_data/crossval/{region}_{task}_labels_{satellite_type}"
print(f"Cross-validation base path: {crossval_base_path}")
model_suffix = f"{region}_{task}_labels_sentinel_model_{model}_epochs_100_{{}}_{image_size}/weights/best.pt"
print(f"Model suffix: {model_suffix}")
# Check if the model path exists
model_path = os.path.join(base_fold_path, model_suffix.format(1))

print(f"Model path: {model_path}")  
if not os.path.exists(model_path):
    raise FileNotFoundError(f"Model path does not exist: {model_path}")
# Check if the cross-validation path exists
if not os.path.exists(crossval_base_path):
    raise FileNotFoundError(f"Cross-validation path does not exist: {crossval_base_path}")
# Check if the base fold path exists
if not os.path.exists(base_fold_path):
    raise FileNotFoundError(f"Base fold path does not exist: {base_fold_path}")
# Check if the image size is valid
if image_size not in [128, 640]:
    raise ValueError(f"Invalid image size: {image_size}. Must be one of [128, 640].")
# Check if the region is valid
valid_regions = ["wb_small_airshed", "lucknow_airshed", "delhi_airshed", "dhaka_airshed"]
if region not in valid_regions:
    raise ValueError(f"Invalid region: {region}. Must be one of {valid_regions}.")


Base fold path: /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/brick_kilns_neurips_2025/runs/detect
Cross-validation base path: /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/data/processed_data/crossval/dhaka_airshed_aa_labels_sentinel
Model suffix: dhaka_airshed_aa_labels_sentinel_model_yolo12l_epochs_100_{}_128/weights/best.pt
Model path: /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/brick_kilns_neurips_2025/runs/detect/dhaka_airshed_aa_labels_sentinel_model_yolo12l_epochs_100_1_128/weights/best.pt


In [25]:


# Create a temporary YAML file
data_yml_save_path = mkdtemp()
data_yml_path = os.path.join(data_yml_save_path, "data.yml")
with open(data_yml_path, "w") as f:
    f.write(f"""train: dummy
val: dummy
nc: {num_classes}
names: {class_names}
""")

# Initialize results DataFrame
results_df = pd.DataFrame(columns=[
    'Fold', 'IoU', 'Precision', 'Recall', 'F1 score', 
    'TP', 'FP', 'FN', 'Kiln instances', 'mAP_cfcbk', 'mAP_fcbk', 'mAP_zigzag'
])

# Evaluate each fold
for fold in range(0,4):
    print(f"\n🟩 Fold {fold} Evaluation")

    # Paths
    model_path = os.path.join(base_fold_path, model_suffix.format(fold))
    test_image_dir = os.path.join(crossval_base_path, str(fold), "test/images")
    test_label_dir = os.path.join(crossval_base_path, str(fold), "test/labels")

    # Load dataset
    sv_dataset = sv.DetectionDataset.from_yolo(test_image_dir, test_label_dir, data_yml_path,is_obb=False)
    print(f"Loaded {len(sv_dataset)} test samples")

    # Load model
    model = YOLO(model_path)

    targets, predictions = [], []

    # Inference loop
    for name, _, gt_detection in tqdm(sv_dataset):
        result = model(
            name,
            imgsz=image_size,
            iou=0.33,
            conf=0.25,
            max_det=300,
            verbose=False
        )[0]
        prediction = sv.Detections.from_ultralytics(result)
        predictions.append(prediction)
        targets.append(gt_detection)

    # mAP Calculation
    mAP_metric = MeanAveragePrecision(class_agnostic=False)
    mAP_result = mAP_metric.update(predictions, targets).compute()
    class_wise_mAP = [0] * num_classes
    for cls, mAP in zip(mAP_result.matched_classes.tolist(), mAP_result.ap_per_class[:, 0].tolist()):
        class_wise_mAP[cls] = mAP
    print(f"Class-wise mAP: {class_wise_mAP}")

    # Confusion Matrix and Metrics at IoU=0.5
    iou_threshold = 0.33
    cm = sv.ConfusionMatrix.from_detections(
        predictions=predictions,
        targets=targets,
        classes=CLASSES,
        conf_threshold=0.25,
        iou_threshold=iou_threshold
    ).matrix

    # True Positives
    tp = sum(cm[i][i] for i in range(num_classes))

    # Predicted Positives (Columns sum)
    predicted_positives = cm.sum(axis=0).sum()

    # Actual Positives (Rows sum)
    actual_positives = cm.sum(axis=1).sum()

    # Precision, Recall, F1 Score
    precision = tp / (predicted_positives + 1e-9)
    recall = tp / (actual_positives + 1e-9)
    f1_score = 2 * precision * recall / (precision + recall + 1e-9)

    fp = predicted_positives - tp
    fn = actual_positives - tp

    # Append results
    # Append results
    results_df = pd.concat([
        results_df,
        pd.DataFrame([{
            'Fold': fold,
            'IoU': round(iou_threshold, 2),
            'Precision': round(precision, 2),
            'Recall': round(recall, 2),
            'F1 score': round(f1_score, 2),
            'TP': tp,
            'FP': fp,
            'FN': fn,
            'Kiln instances': actual_positives,
            'mAP_cfcbk': round(class_wise_mAP[0], 2),
            'mAP_fcbk': round(class_wise_mAP[1], 2),
            'mAP_zigzag': round(class_wise_mAP[2], 2)
        }])
    ], ignore_index=True)

# Compute mean and variance for all numeric columns (excluding 'Fold' and 'IoU')
metrics_to_summarize = ['Precision', 'Recall', 'F1 score', 'TP', 'FP', 'FN', 'Kiln instances', 'mAP_cfcbk', 'mAP_fcbk', 'mAP_zigzag']
mean_values = results_df[metrics_to_summarize].mean()
std_values = results_df[metrics_to_summarize].std()

# Format as "mean ± std"
summary_row = {
    'Fold': 'mean ± std',
    'IoU': '-'
}
for metric in metrics_to_summarize:
    summary_row[metric] = f"{mean_values[metric]:.2f} ± {std_values[metric]:.2f}"

# Append the formatted summary row
results_df = pd.concat([
    results_df,
    pd.DataFrame([summary_row])
], ignore_index=True)

# Save to CSV
csv_path = f"{region}_crossval_results.csv"
results_df.to_csv(csv_path, index=False)

# Create formatted table with grid lines
region_title = f"📍 Cross-Validation Results — {model_name} - {region.replace('_', ' ').upper()}-{image_size}"
table_string = tabulate(results_df, headers='keys', tablefmt='grid', showindex=False)

# Save to TXT (append mode)
txt_path = "summary.txt"  # Change this to your desired path
with open(txt_path, "a") as f:  # Open in append mode
    f.write(f"\n{region_title.center(120)}\n\n")
    f.write(table_string)

print(f"✅ Results saved with grid formatting:\n- Text: {txt_path}\n- CSV : {csv_path}")
# Display the table in Jupyter Notebook
display(HTML(f"<h3>{region_title}</h3>"))
# Display the table
print("\n" + table_string)
# Display the summary
print("\n" + tabulate(pd.DataFrame([summary_row]), headers='keys', tablefmt='grid', showindex=False))
# Display the summary in Jupyter Notebook
# display(HTML(tabulate(pd.DataFrame([summary_row]), headers='keys', tablefmt='grid', showindex=False).replace('\n', '<br>')))




🟩 Fold 0 Evaluation
Loaded 96 test samples


  0%|          | 0/96 [00:00<?, ?it/s]

Class-wise mAP: [0, 0.0, 0.3030363792113265]

🟩 Fold 1 Evaluation
Loaded 95 test samples




  0%|          | 0/95 [00:00<?, ?it/s]

Class-wise mAP: [0, 0.0, 0.4653407553483264]

🟩 Fold 2 Evaluation
Loaded 95 test samples


  0%|          | 0/95 [00:00<?, ?it/s]

Class-wise mAP: [0, 0.0, 0.40072656767546677]

🟩 Fold 3 Evaluation
Loaded 95 test samples


  0%|          | 0/95 [00:00<?, ?it/s]

Class-wise mAP: [0, 0.0, 0.408158844265575]
✅ Results saved with grid formatting:
- Text: summary.txt
- CSV : dhaka_airshed_crossval_results.csv



+------------+-------+-------------+-------------+-------------+--------------+----------------+----------------+------------------+-------------+-------------+--------------+
| Fold       | IoU   | Precision   | Recall      | F1 score    | TP           | FP             | FN             | Kiln instances   | mAP_cfcbk   | mAP_fcbk    | mAP_zigzag   |
| 0          | 0.33  | 0.37        | 0.37        | 0.37        | 85.0         | 144.0          | 144.0          | 229.0            | 0           | 0.0         | 0.3          |
+------------+-------+-------------+-------------+-------------+--------------+----------------+----------------+------------------+-------------+-------------+--------------+
| 1          | 0.33  | 0.5         | 0.5         | 0.5         | 96.0         | 97.0           | 97.0           | 193.0            | 0           | 0.0         | 0.47         |
+------------+-------+-------------+-------------+-------------+--------------+----------------+----------------+------