In [36]:
# Config
import torch
import os
# Basic
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Monitoring
from tqdm.notebook import tqdm
# IO
from os.path import join, exists, basename, dirname, splitext, expanduser
from glob import glob
# Parallel processing
from joblib import Parallel, delayed
import re
from PIL import Image
import supervision as sv


from supervision.metrics import MeanAveragePrecision


from supervision.metrics.core import Metric, MetricTarget

from tempfile import mkdtemp
import pandas as pd
from tabulate import tabulate
from IPython.display import display, HTML


from ultralytics import YOLO

In [38]:

# Set CUDA device
os.environ["CUDA_VISIBLE_DEVICES"] = "3"

# Parameters
region = "wb_small_airshed"  # First region (change this for the second region)
task = "obb"
satellite_type = "sentinel"
class_names = ["CFCBK", "FCBK", "Zigzag"]
num_classes = len(class_names)
CLASSES = class_names  # For sv.ConfusionMatrix

# Define paths
base_fold_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/brick_kilns_neurips_2025/runs/obb"
crossval_base_path = f"/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/data/processed_data/crossval/{region}_{task}_labels_{satellite_type}"
model_suffix = f"{region}_obb_labels_sentinel_model_yolov8l-obb_epochs_100_{{}}_128/weights/best.pt"

# Create a temporary YAML file
data_yml_save_path = mkdtemp()
data_yml_path = os.path.join(data_yml_save_path, "data.yml")
with open(data_yml_path, "w") as f:
    f.write(f"""train: dummy
val: dummy
nc: {num_classes}
names: {class_names}
""")

# Initialize results DataFrame
results_df = pd.DataFrame(columns=[
    'Fold', 'IoU', 'Precision', 'Recall', 'F1 score', 
    'TP', 'FP', 'FN', 'Kiln instances', 'mAP_cfcbk', 'mAP_fcbk', 'mAP_zigzag'
])

# Evaluate each fold
for fold in range(4):
    print(f"\n🟩 Fold {fold} Evaluation")

    # Paths
    model_path = os.path.join(base_fold_path, model_suffix.format(fold))
    test_image_dir = os.path.join(crossval_base_path, str(fold), "test/images")
    test_label_dir = os.path.join(crossval_base_path, str(fold), "test/labels")

    # Load dataset
    sv_dataset = sv.DetectionDataset.from_yolo(test_image_dir, test_label_dir, data_yml_path)
    print(f"Loaded {len(sv_dataset)} test samples")

    # Load model
    model = YOLO(model_path)

    targets, predictions = [], []

    # Inference loop
    for name, _, gt_detection in tqdm(sv_dataset):
        result = model(
            name,
            imgsz=128,
            iou=0.33,
            conf=0.25,
            max_det=300,
            verbose=False
        )[0]
        prediction = sv.Detections.from_ultralytics(result)
        predictions.append(prediction)
        targets.append(gt_detection)

    # mAP Calculation
    mAP_metric = MeanAveragePrecision(class_agnostic=False)
    mAP_result = mAP_metric.update(predictions, targets).compute()
    class_wise_mAP = [0] * num_classes
    for cls, mAP in zip(mAP_result.matched_classes.tolist(), mAP_result.ap_per_class[:, 0].tolist()):
        class_wise_mAP[cls] = mAP
    print(f"Class-wise mAP: {class_wise_mAP}")

    # Confusion Matrix and Metrics at IoU=0.5
    iou_threshold = 0.33
    cm = sv.ConfusionMatrix.from_detections(
        predictions=predictions,
        targets=targets,
        classes=CLASSES,
        conf_threshold=0.25,
        iou_threshold=iou_threshold
    ).matrix

    # True Positives
    tp = sum(cm[i][i] for i in range(num_classes))

    # Predicted Positives (Columns sum)
    predicted_positives = cm.sum(axis=0).sum()

    # Actual Positives (Rows sum)
    actual_positives = cm.sum(axis=1).sum()

    # Precision, Recall, F1 Score
    precision = tp / (predicted_positives + 1e-9)
    recall = tp / (actual_positives + 1e-9)
    f1_score = 2 * precision * recall / (precision + recall + 1e-9)

    fp = predicted_positives - tp
    fn = actual_positives - tp

    # Append results
    # Append results
    results_df = pd.concat([
        results_df,
        pd.DataFrame([{
            'Fold': fold,
            'IoU': round(iou_threshold, 2),
            'Precision': round(precision, 2),
            'Recall': round(recall, 2),
            'F1 score': round(f1_score, 2),
            'TP': tp,
            'FP': fp,
            'FN': fn,
            'Kiln instances': actual_positives,
            'mAP_cfcbk': round(class_wise_mAP[0], 2),
            'mAP_fcbk': round(class_wise_mAP[1], 2),
            'mAP_zigzag': round(class_wise_mAP[2], 2)
        }])
    ], ignore_index=True)

# Compute mean and variance for all numeric columns (excluding 'Fold' and 'IoU')
metrics_to_summarize = ['Precision', 'Recall', 'F1 score', 'TP', 'FP', 'FN', 'Kiln instances', 'mAP_cfcbk', 'mAP_fcbk', 'mAP_zigzag']
mean_values = results_df[metrics_to_summarize].mean()
std_values = results_df[metrics_to_summarize].std()

# Format as "mean ± std"
summary_row = {
    'Fold': 'mean ± std',
    'IoU': '-'
}
for metric in metrics_to_summarize:
    summary_row[metric] = f"{mean_values[metric]:.2f} ± {std_values[metric]:.2f}"

# Append the formatted summary row
results_df = pd.concat([
    results_df,
    pd.DataFrame([summary_row])
], ignore_index=True)

# Save to CSV
csv_path = f"{region}_crossval_results.csv"
results_df.to_csv(csv_path, index=False)

# Create formatted table with grid lines
region_title = f"📍 Cross-Validation Results — {region.replace('_', ' ').upper()}"
table_string = tabulate(results_df, headers='keys', tablefmt='grid', showindex=False)

# Save to TXT (append mode)
txt_path = "summary.txt"  # Change this to your desired path
with open(txt_path, "a") as f:  # Open in append mode
    f.write(f"\n{region_title.center(120)}\n\n")
    f.write(table_string)

print(f"✅ Results saved with grid formatting:\n- Text: {txt_path}\n- CSV : {csv_path}")
# Display the table in Jupyter Notebook
display(HTML(f"<h3>{region_title}</h3>"))
# Display the table




🟩 Fold 0 Evaluation
Loaded 31 test samples


  0%|          | 0/31 [00:00<?, ?it/s]

Class-wise mAP: [0, 0.30164225213730167, 0.3684006558895774]

🟩 Fold 1 Evaluation
Loaded 31 test samples




  0%|          | 0/31 [00:00<?, ?it/s]

Class-wise mAP: [0, 0.12036775106082037, 0.20713032841745715]

🟩 Fold 2 Evaluation
Loaded 31 test samples


  0%|          | 0/31 [00:00<?, ?it/s]

Class-wise mAP: [0, 0.1995865520617996, 0.41495652840243014]

🟩 Fold 3 Evaluation
Loaded 31 test samples


  0%|          | 0/31 [00:00<?, ?it/s]

Class-wise mAP: [0, 0.17232800203097234, 0.2218988107601969]
✅ Results saved with grid formatting:
- Text: summary.txt
- CSV : wb_small_airshed_crossval_results.csv
