Here's a Python script to:

Load prediction files from both models.

Load ground truth.

Compare their formats (number of columns, data types, column meanings).

In [6]:
import numpy as np

def load_predictions(file_path):
    try:
        data = np.loadtxt(file_path)
        if data.ndim == 1:
            data = data.reshape(1, -1)
        return data
    except Exception as e:
        print(f"Error reading {file_path}: {e}")
        return None

def describe_format(name, data):
    if data is None or data.shape[0] == 0:
        print(f"{name}: No data found or file empty.")
        return None
    
    print(f"{name} shape: {data.shape}")
    sample = data[0]
    print(f"Sample row from {name}: {sample}")

    num_columns = data.shape[1]

    if num_columns == 6:
        print(f"🔍 {name} appears to be a **prediction** with 6 columns:")
        print("   Likely format: [class_id, x_center, y_center, width, height, confidence]")
    elif num_columns == 5:
        print(f"📌 {name} appears to be a **ground truth** with 5 columns:")
        print("   Likely format: [class_id, x_center, y_center, width, height]")
    else:
        print(f"⚠️ {name} has {num_columns} columns, format unknown. Please inspect manually.")

    print()

def check_all_formats(yolo9_pred, kdvit_pred, gt):
    print("========== FORMAT CHECK ==========\n")
    describe_format("YOLOv9 Prediction", yolo9_pred)
    describe_format("KD-YOLO-ViT Prediction", kdvit_pred)
    describe_format("Ground Truth", gt)
    
    # Check if predictions can be ensembled directly
    if yolo9_pred is not None and kdvit_pred is not None:
        if yolo9_pred.shape[1] != kdvit_pred.shape[1]:
            print("❌ Prediction formats mismatch! Cannot ensemble directly.")
        else:
            print("✅ Prediction formats match. You can proceed to ensemble.")

# ======= Example usage =======
yolov9_pred_file = 'C:/Mansura/UTI-Revision2/ExternalValidation/NMS/yolov9e_predictions/0045.txt'
kd_yolo_vit_pred_file = 'C:/Mansura/UTI-Revision2/ExternalValidation/NMS/kd-yolox-vit_predictions/0045.txt'
ground_truth_file = 'C:/Mansura/UTI-Revision2/ExternalValidation/NMS/test_labels/0045.txt'

yolov9_preds = load_predictions(yolov9_pred_file)
kd_yolo_vit_preds = load_predictions(kd_yolo_vit_pred_file)
ground_truth = load_predictions(ground_truth_file)

check_all_formats(yolov9_preds, kd_yolo_vit_preds, ground_truth)


YOLOv9 Prediction shape: (17, 6)
Sample row from YOLOv9 Prediction: [0.        0.462345  0.633205  0.0858389 0.0691288 0.558853 ]
🔍 YOLOv9 Prediction appears to be a **prediction** with 6 columns:
   Likely format: [class_id, x_center, y_center, width, height, confidence]

KD-YOLO-ViT Prediction shape: (292, 6)
Sample row from KD-YOLO-ViT Prediction: [0.       0.462891 0.631641 0.090625 0.072656 0.686064]
🔍 KD-YOLO-ViT Prediction appears to be a **prediction** with 6 columns:
   Likely format: [class_id, x_center, y_center, width, height, confidence]

Ground Truth shape: (1, 0)
Sample row from Ground Truth: []
⚠️ Ground Truth has 0 columns, format unknown. Please inspect manually.

✅ Prediction formats match. You can proceed to ensemble.


  data = np.loadtxt(file_path)


🔁 You currently have: [class_id, x_center, y_center, width, height, confidence]
For NMS convert: [x1, y1, x2, y2, confidence, class_id]

x1 = x_center - width / 2

y1 = y_center - height / 2

x2 = x_center + width / 2

y2 = y_center + height / 2


In [1]:
import os
import numpy as np

def convert_to_corners(data, is_prediction=True):
    """Convert [class_id, cx, cy, w, h, (conf)] -> [class_id, x1, y1, x2, y2, (conf)]"""
    converted = []
    for row in data:
        class_id, cx, cy, w, h = row[:5]
        x1 = cx - w / 2
        y1 = cy - h / 2
        x2 = cx + w / 2
        y2 = cy + h / 2
        if is_prediction:
            conf = row[5]
            converted.append([class_id, x1, y1, x2, y2, conf])
        else:
            converted.append([class_id, x1, y1, x2, y2])
    return np.array(converted)

def process_folder(input_folder, output_folder, is_prediction=True):
    os.makedirs(output_folder, exist_ok=True)
    for file in os.listdir(input_folder):
        if not file.endswith('.txt'):
            continue
        input_path = os.path.join(input_folder, file)
        output_path = os.path.join(output_folder, file)

        try:
            data = np.loadtxt(input_path)
            if data.ndim == 1:
                data = data.reshape(1, -1)
            converted = convert_to_corners(data, is_prediction=is_prediction)
            np.savetxt(output_path, converted, fmt='%.6f')
            #print(f"✅ Converted and saved: {file}")
        except Exception as e:
            print(f"❌ Error processing {file}: {e}")

# === File paths ===
yolov9_input = 'C:/Mansura/UTI-Revision2/ExternalValidation/NMS/yolov9e_predictions'
kdvit_input = 'C:/Mansura/UTI-Revision2/ExternalValidation/NMS/kd-yolox-vit_predictions'
gt_input = 'C:/Mansura/UTI-Revision2/ExternalValidation/NMS/test_labels'

yolov9_output = 'C:/Mansura/UTI-Revision2/ExternalValidation/NMS/yolov9_corners'
kdvit_output = 'C:/Mansura/UTI-Revision2/ExternalValidation/NMS/kdvit_corners'
gt_output = 'C:/Mansura/UTI-Revision2/ExternalValidation/NMS/gt_corners'

# === Run Conversion ===
process_folder(yolov9_input, yolov9_output, is_prediction=True)
process_folder(kdvit_input, kdvit_output, is_prediction=True)
process_folder(gt_input, gt_output, is_prediction=False)


❌ Error processing 0045.txt: not enough values to unpack (expected 5, got 0)
❌ Error processing 0050.txt: not enough values to unpack (expected 5, got 0)
❌ Error processing 0057.txt: not enough values to unpack (expected 5, got 0)
❌ Error processing 0062.txt: not enough values to unpack (expected 5, got 0)
❌ Error processing 0077.txt: not enough values to unpack (expected 5, got 0)
❌ Error processing 0089.txt: not enough values to unpack (expected 5, got 0)
❌ Error processing 0095.txt: not enough values to unpack (expected 5, got 0)
❌ Error processing 0098.txt: not enough values to unpack (expected 5, got 0)
❌ Error processing 1-0047.txt: not enough values to unpack (expected 5, got 0)
❌ Error processing 1-0075.txt: not enough values to unpack (expected 5, got 0)
❌ Error processing 1-0089.txt: not enough values to unpack (expected 5, got 0)
❌ Error processing 2-0030.txt: not enough values to unpack (expected 5, got 0)
❌ Error processing 2-0039.txt: not enough values to unpack (expected

  data = np.loadtxt(input_path)
  data = np.loadtxt(input_path)
  data = np.loadtxt(input_path)
  data = np.loadtxt(input_path)
  data = np.loadtxt(input_path)
  data = np.loadtxt(input_path)
  data = np.loadtxt(input_path)
  data = np.loadtxt(input_path)
  data = np.loadtxt(input_path)
  data = np.loadtxt(input_path)
  data = np.loadtxt(input_path)
  data = np.loadtxt(input_path)
  data = np.loadtxt(input_path)
  data = np.loadtxt(input_path)
  data = np.loadtxt(input_path)
  data = np.loadtxt(input_path)


Improved ensemble by nms

In [2]:
import os
import numpy as np
import torch
from torchvision.ops import nms
from collections import defaultdict

def weighted_box_fusion(predictions_list, model_weights, iou_thresh=0.5, conf_thresh=0.1):
    """
    Apply Weighted Box Fusion (WBF) to combine predictions from multiple models
    
    Parameters:
    - predictions_list: List of numpy arrays, each containing predictions from one model
                       Each prediction has format [class_id, x1, y1, x2, y2, conf]
    - model_weights: List of weights for each model
    - iou_thresh: IoU threshold for clustering boxes
    - conf_thresh: Confidence threshold for filtering weak predictions
    
    Returns:
    - Array of fused predictions [class_id, x1, y1, x2, y2, conf]
    """
    # Filter empty predictions and apply confidence threshold
    filtered_preds = []
    filtered_weights = []
    
    for i, preds in enumerate(predictions_list):
        if len(preds) > 0:
            mask = preds[:, 5] >= conf_thresh
            if np.any(mask):
                filtered_preds.append(preds[mask])
                filtered_weights.append(model_weights[i])
    
    if not filtered_preds:
        return np.empty((0, 6))
    
    # Convert all to tensors
    pred_tensors = [torch.tensor(p, dtype=torch.float32) for p in filtered_preds]
    
    # Process each class separately
    final_predictions = []
    
    # Get all unique class ids across all predictions
    all_classes = set()
    for preds in pred_tensors:
        if len(preds) > 0:
            all_classes.update(preds[:, 0].int().tolist())
    
    for class_id in all_classes:
        # Extract predictions for this class from each model
        class_preds = []
        class_weights = []
        
        for i, preds in enumerate(pred_tensors):
            if len(preds) > 0:
                class_mask = preds[:, 0] == class_id
                if torch.any(class_mask):
                    class_preds.append(preds[class_mask])
                    class_weights.append(filtered_weights[i])
        
        if not class_preds:
            continue
        
        # Combine all predictions for this class
        all_boxes = torch.cat([p[:, 1:5] for p in class_preds])
        all_scores = torch.cat([p[:, 5] * w for p, w in zip(class_preds, class_weights)])
        all_labels = torch.ones(len(all_boxes)) * class_id
        
        # Group boxes by IoU
        clusters = []
        cluster_scores = []
        used_indices = set()
        
        for i in range(len(all_boxes)):
            if i in used_indices:
                continue
                
            # Start a new cluster
            cluster_boxes = [all_boxes[i]]
            cluster_s = [all_scores[i]]
            used_indices.add(i)
            
            # Find all overlapping boxes
            for j in range(i+1, len(all_boxes)):
                if j in used_indices:
                    continue
                    
                box1 = all_boxes[i]
                box2 = all_boxes[j]
                
                # Calculate IoU
                x1 = max(box1[0], box2[0])
                y1 = max(box1[1], box2[1])
                x2 = min(box1[2], box2[2])
                y2 = min(box1[3], box2[3])
                
                if x2 < x1 or y2 < y1:
                    iou = 0.0
                else:
                    intersection = (x2 - x1) * (y2 - y1)
                    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
                    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
                    iou = intersection / (area1 + area2 - intersection)
                
                if iou >= iou_thresh:
                    cluster_boxes.append(all_boxes[j])
                    cluster_s.append(all_scores[j])
                    used_indices.add(j)
            
            # Average the boxes in the cluster, weighted by confidence
            if len(cluster_boxes) > 0:
                cluster_boxes = torch.stack(cluster_boxes)
                cluster_s = torch.stack(cluster_s)
                
                # Apply weights based on confidence
                weights = cluster_s / cluster_s.sum()
                weights = weights.unsqueeze(1).repeat(1, 4)
                
                # Calculate weighted average box
                fused_box = (cluster_boxes * weights).sum(dim=0)
                fused_score = cluster_s.mean()  # Average score
                
                clusters.append(fused_box)
                cluster_scores.append(fused_score)
        
        # Create final predictions for this class
        for box, score in zip(clusters, cluster_scores):
            final_predictions.append(torch.cat([torch.tensor([class_id]), box, torch.tensor([score])]))
    
    if final_predictions:
        return torch.stack(final_predictions).numpy()
    else:
        return np.empty((0, 6))

def soft_weighted_nms(predictions, iou_thresh=0.5, sigma=0.5, score_threshold=0.001):
    """
    Apply Soft-NMS to predictions
    
    Parameters:
    - predictions: numpy array of predictions [class_id, x1, y1, x2, y2, conf]
    - iou_thresh: IoU threshold for NMS
    - sigma: Parameter for Gaussian penalty function
    - score_threshold: Minimum score threshold to keep a box
    
    Returns:
    - Filtered predictions
    """
    if len(predictions) == 0:
        return np.empty((0, 6))
    
    # Group by class
    class_groups = defaultdict(list)
    for pred in predictions:
        class_groups[int(pred[0])].append(pred)
    
    final_predictions = []
    
    for class_id, preds in class_groups.items():
        preds = np.array(preds)
        if len(preds) == 1:
            final_predictions.append(preds[0])
            continue
            
        # Sort by confidence score
        order = np.argsort(-preds[:, 5])
        preds = preds[order]
        
        boxes = preds[:, 1:5]
        scores = preds[:, 5].copy()
        
        for i in range(len(boxes)):
            if scores[i] < score_threshold:
                continue
                
            # Keep the current box
            box_i = boxes[i]
            
            # Update scores of all other boxes
            for j in range(i+1, len(boxes)):
                if scores[j] < score_threshold:
                    continue
                    
                box_j = boxes[j]
                
                # Calculate IoU
                xx1 = max(box_i[0], box_j[0])
                yy1 = max(box_i[1], box_j[1])
                xx2 = min(box_i[2], box_j[2])
                yy2 = min(box_i[3], box_j[3])
                
                w = max(0, xx2 - xx1)
                h = max(0, yy2 - yy1)
                
                intersection = w * h
                area_i = (box_i[2] - box_i[0]) * (box_i[3] - box_i[1])
                area_j = (box_j[2] - box_j[0]) * (box_j[3] - box_j[1])
                union = area_i + area_j - intersection
                
                iou = intersection / union if union > 0 else 0
                
                # Apply Gaussian penalty to overlapping boxes
                if iou > iou_thresh:
                    scores[j] *= np.exp(-(iou * iou) / sigma)
        
        # Add boxes that are still above the threshold
        for i in range(len(preds)):
            if scores[i] >= score_threshold:
                pred_i = preds[i].copy()
                pred_i[5] = scores[i]  # Update with new score
                final_predictions.append(pred_i)
    
    if final_predictions:
        return np.array(final_predictions)
    else:
        return np.empty((0, 6))

def advanced_ensemble(yolo_folder, kdvit_folder, save_folder, 
                     iou_thresh=0.5, conf_thresh=0.1, 
                     model_weights=None, use_wbf=True):
    """
    Advanced ensemble combining YOLOv9 and KD-YOLOX-ViT predictions
    
    Parameters:
    - yolo_folder: Directory containing YOLOv9 predictions
    - kdvit_folder: Directory containing KD-YOLOX-ViT predictions
    - save_folder: Directory to save ensemble results
    - iou_thresh: IoU threshold for fusion
    - conf_thresh: Confidence threshold for filtering weak predictions
    - model_weights: List of weights for each model [yolo_weight, kdvit_weight]
    - use_wbf: If True, use Weighted Box Fusion, otherwise use Soft-NMS
    """
    os.makedirs(save_folder, exist_ok=True)
    
    # Default weights favor the better model (YOLOv9)
    if model_weights is None:
        model_weights = [0.7, 0.3]  # YOLOv9 has higher weight
    
    # Get files from yolo folder
    files = [f for f in os.listdir(yolo_folder) if f.endswith('.txt')]
    
    for file in files:
        yolov9_path = os.path.join(yolo_folder, file)
        kdvit_path = os.path.join(kdvit_folder, file)
        save_path = os.path.join(save_folder, file)
        
        # Skip if kdvit prediction doesn't exist
        if not os.path.exists(kdvit_path):
            print(f"⚠️ Missing KD-ViT prediction for {file}, skipping")
            continue
            
        try:
            # Load predictions
            yolov9_preds = np.loadtxt(yolov9_path).reshape(-1, 6) if os.path.getsize(yolov9_path) > 0 else np.empty((0, 6))
            kdvit_preds = np.loadtxt(kdvit_path).reshape(-1, 6) if os.path.getsize(kdvit_path) > 0 else np.empty((0, 6))
            
            # Handle empty files or single detection
            if yolov9_preds.size > 0 and yolov9_preds.ndim == 1:
                yolov9_preds = yolov9_preds.reshape(1, -1)
            if kdvit_preds.size > 0 and kdvit_preds.ndim == 1:
                kdvit_preds = kdvit_preds.reshape(1, -1)
                
            # Class-specific weighting for YOLOv9
            # YOLOv9 performs better on most classes, but KD-ViT is better on epithn and leuko
            for i in range(len(yolov9_preds)):
                class_id = int(yolov9_preds[i, 0])
                # Boost 'cast' and 'mycete' classes where YOLOv9 is significantly better
                if class_id == 0:  # cast
                    yolov9_preds[i, 5] *= 1.1
                elif class_id == 6:  # mycete
                    yolov9_preds[i, 5] *= 1.05
            
            # Boost certain classes for KD-ViT where it performs better
            for i in range(len(kdvit_preds)):
                class_id = int(kdvit_preds[i, 0])
                # Boost 'leuko' class where KD-ViT is better
                if class_id == 5:  # leuko
                    kdvit_preds[i, 5] *= 1.1
            
            # Apply fusion method
            if use_wbf:
                ensemble_preds = weighted_box_fusion(
                    [yolov9_preds, kdvit_preds],
                    model_weights,
                    iou_thresh,
                    conf_thresh
                )
            else:
                # Combine with confidence-weighted approach
                if len(yolov9_preds) == 0 and len(kdvit_preds) == 0:
                    ensemble_preds = np.empty((0, 6))
                else:
                    # Apply model-specific weights to confidence scores
                    if len(yolov9_preds) > 0:
                        yolov9_preds[:, 5] *= model_weights[0]
                    if len(kdvit_preds) > 0:
                        kdvit_preds[:, 5] *= model_weights[1]
                    
                    # Combine predictions
                    combined_preds = np.vstack((yolov9_preds, kdvit_preds)) if len(yolov9_preds) > 0 and len(kdvit_preds) > 0 else (
                        yolov9_preds if len(yolov9_preds) > 0 else kdvit_preds
                    )
                    
                    # Apply Soft-NMS
                    ensemble_preds = soft_weighted_nms(combined_preds, iou_thresh)
            
            # Save results
            np.savetxt(save_path, ensemble_preds, fmt='%.6f')
            #print(f"✅ Advanced ensemble saved: {file} | Detections: {len(ensemble_preds)}")
            
        except Exception as e:
            print(f"❌ Error processing {file}: {str(e)}")

# === Configuration ===
# Folders
model1_dir = 'C:/Mansura/UTI-Revision2/ExternalValidation/NMS/yolov9_corners'
model2_dir = 'C:/Mansura/UTI-Revision2/ExternalValidation/NMS/kdvit_corners'
ensemble_dir = 'C:/Mansura/UTI-Revision2/ExternalValidation/NMS/advanced_ensemble_output'

# Model weights - give more weight to YOLOv9 as it performs better overall
model_weights = [0.7, 0.3]  # [YOLOv9, KD-ViT]

# === Run Advanced Ensemble ===
advanced_ensemble(
    model1_dir, 
    model2_dir, 
    ensemble_dir,
    iou_thresh=0.5, 
    conf_thresh=0.1,
    model_weights=model_weights,
    use_wbf=True  # Set to True for Weighted Box Fusion, False for Soft-NMS
)

mAP calculation

In [3]:
import os
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict

# Only allow these two classes
valid_class_ids = {0, 1}
class_names = {0: "epith", 1: "rbc/wbc"}


def calculate_iou(box1, box2):
    x1_min = box1[0] - box1[2] / 2
    x1_max = box1[0] + box1[2] / 2
    y1_min = box1[1] - box1[3] / 2
    y1_max = box1[1] + box1[3] / 2

    x2_min = box2[0] - box2[2] / 2
    x2_max = box2[0] + box2[2] / 2
    y2_min = box2[1] - box2[3] / 2
    y2_max = box2[1] + box2[3] / 2

    inter_xmin = max(x1_min, x2_min)
    inter_ymin = max(y1_min, y2_min)
    inter_xmax = min(x1_max, x2_max)
    inter_ymax = min(y1_max, y2_max)

    inter_area = max(0, inter_xmax - inter_xmin) * max(0, inter_ymax - inter_ymin)
    box1_area = (x1_max - x1_min) * (y1_max - y1_min)
    box2_area = (x2_max - x2_min) * (y2_max - y2_min)

    union_area = box1_area + box2_area - inter_area
    return inter_area / union_area if union_area > 0 else 0


def evaluate_map50_coco(pred_folder, gt_folder, iou_thresh=0.5, conf_thresh=0.001):
    all_predictions = defaultdict(list)
    gt_counter_per_class = defaultdict(int)

    files = [f for f in os.listdir(pred_folder) if f.endswith('.txt') and os.path.exists(os.path.join(gt_folder, f))]

    for file in files:
        gt_path = os.path.join(gt_folder, file)
        pred_path = os.path.join(pred_folder, file)

        gt_data = np.loadtxt(gt_path, ndmin=2) if os.path.getsize(gt_path) > 0 else np.empty((0, 5))
        pred_data = np.loadtxt(pred_path, ndmin=2) if os.path.getsize(pred_path) > 0 else np.empty((0, 6))

        gt_this_image = {}
        for gt_box in gt_data:
            class_id = int(gt_box[0])
            if class_id not in valid_class_ids:
                continue
            gt_counter_per_class[class_id] += 1
            gt_this_image.setdefault(class_id, []).append([False, *gt_box[1:]])

        for pred_box in pred_data:
            class_id = int(pred_box[0])
            confidence = float(pred_box[5])
            if confidence < conf_thresh or class_id not in valid_class_ids:
                continue
            pred_bbox = [float(x) for x in pred_box[1:5]]
            all_predictions[class_id].append([file, confidence] + pred_bbox + [False])

            if class_id in gt_this_image:
                max_iou = -1
                max_idx = -1
                for idx, gt_box in enumerate(gt_this_image[class_id]):
                    if gt_box[0]:
                        continue
                    iou = calculate_iou(pred_bbox, gt_box[1:])
                    if iou > max_iou:
                        max_iou = iou
                        max_idx = idx
                if max_iou >= iou_thresh and max_idx >= 0:
                    gt_this_image[class_id][max_idx][0] = True
                    all_predictions[class_id][-1][-1] = True

    sum_ap = 0
    ap_dictionary = {}
    valid_classes = 0

    print("\nClass-wise Average Precision:")
    print("-" * 50)

    for class_id in sorted(valid_class_ids):
        if gt_counter_per_class[class_id] == 0:
            continue

        predictions = all_predictions.get(class_id, [])
        if not predictions:
            ap_dictionary[class_id] = 0.0
            sum_ap += 0.0
            valid_classes += 1
            print(f"{class_names[class_id]:<10} - AP: 0.0000, GT count: {gt_counter_per_class[class_id]}")
            continue

        predictions.sort(key=lambda x: x[1], reverse=True)
        tp = np.array([p[-1] for p in predictions], dtype=np.float64)
        fp = np.logical_not(tp).astype(np.float64)

        cumsum_tp = np.cumsum(tp)
        cumsum_fp = np.cumsum(fp)

        precision = cumsum_tp / (cumsum_tp + cumsum_fp + 1e-10)
        recall = cumsum_tp / gt_counter_per_class[class_id]

        for i in range(len(precision) - 2, -1, -1):
            precision[i] = max(precision[i], precision[i + 1])

        recall_points = np.unique(np.concatenate(([0], recall, [1])))
        interpolated_precision = np.zeros_like(recall_points)

        for i, r in enumerate(recall_points):
            precisions_at_recall = precision[recall >= r]
            if len(precisions_at_recall):
                interpolated_precision[i] = np.max(precisions_at_recall)

        ap = np.sum((recall_points[1:] - recall_points[:-1]) * interpolated_precision[:-1])
        ap_dictionary[class_id] = ap
        sum_ap += ap
        valid_classes += 1

        print(f"{class_names[class_id]:<10} - AP: {ap:.4f}, GT count: {gt_counter_per_class[class_id]}")

    mAP = sum_ap / valid_classes if valid_classes else 0.0
    print("-" * 50)
    print(f"Final mAP@50: {mAP:.4f}")
    return mAP, all_predictions, gt_counter_per_class


def plot_precision_recall_curves(all_predictions, gt_counter_per_class):
    for class_id in sorted(valid_class_ids):
        predictions = all_predictions.get(class_id, [])
        if not predictions or gt_counter_per_class[class_id] == 0:
            continue

        predictions.sort(key=lambda x: x[1], reverse=True)
        tp = np.array([p[-1] for p in predictions], dtype=np.float64)
        fp = np.logical_not(tp).astype(np.float64)

        cumsum_tp = np.cumsum(tp)
        cumsum_fp = np.cumsum(fp)

        precision = cumsum_tp / (cumsum_tp + cumsum_fp + 1e-10)
        recall = cumsum_tp / gt_counter_per_class[class_id]

        plt.plot(recall, precision, marker='.', label=class_names[class_id])

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curve')
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.show()


if __name__ == "__main__":
    gt_folder = 'C:/Mansura/UTI-Revision2/ExternalValidation/NMS/gt_corners'
    pred_folder = 'C:/Mansura/UTI-Revision2/ExternalValidation/NMS/advanced_ensemble_output'
    
    # Calculate mAP@50 using COCO method with same confidence threshold as YOLO (0.001)
    map50 = evaluate_map50_coco(pred_folder, gt_folder, iou_thresh=0.5, conf_thresh=0.001)
    
    # Optionally plot precision-recall curves
    #plot_precision_recall_curves(pred_folder, gt_folder, iou_thresh=0.5, conf_thresh=0.001)



Class-wise Average Precision:
--------------------------------------------------
epith      - AP: 0.9441, GT count: 412
rbc/wbc    - AP: 0.9487, GT count: 2038
--------------------------------------------------
Final mAP@50: 0.9464
