Here's a Python script to:

Load prediction files from both models.

Load ground truth.

Compare their formats (number of columns, data types, column meanings).

In [1]:
import numpy as np

def load_predictions(file_path):
    try:
        data = np.loadtxt(file_path)
        if data.ndim == 1:
            data = data.reshape(1, -1)
        return data
    except Exception as e:
        print(f"Error reading {file_path}: {e}")
        return None

def describe_format(name, data):
    if data is None or data.shape[0] == 0:
        print(f"{name}: No data found or file empty.")
        return None
    
    print(f"{name} shape: {data.shape}")
    sample = data[0]
    print(f"Sample row from {name}: {sample}")

    num_columns = data.shape[1]

    if num_columns == 6:
        print(f"🔍 {name} appears to be a **prediction** with 6 columns:")
        print("   Likely format: [class_id, x_center, y_center, width, height, confidence]")
    elif num_columns == 5:
        print(f"📌 {name} appears to be a **ground truth** with 5 columns:")
        print("   Likely format: [class_id, x_center, y_center, width, height]")
    else:
        print(f"⚠️ {name} has {num_columns} columns, format unknown. Please inspect manually.")

    print()

def check_all_formats(yolo9_pred, kdvit_pred, gt):
    print("========== FORMAT CHECK ==========\n")
    describe_format("YOLOv9 Prediction", yolo9_pred)
    describe_format("KD-YOLO-ViT Prediction", kdvit_pred)
    describe_format("Ground Truth", gt)
    
    # Check if predictions can be ensembled directly
    if yolo9_pred is not None and kdvit_pred is not None:
        if yolo9_pred.shape[1] != kdvit_pred.shape[1]:
            print("❌ Prediction formats mismatch! Cannot ensemble directly.")
        else:
            print("✅ Prediction formats match. You can proceed to ensemble.")

# ======= Example usage =======
yolov9_pred_file = 'C:/Mansura/UTI-Revision2/NMS/yolov9e_predictions/nh00010.txt'
kd_yolo_vit_pred_file = 'C:/Mansura/UTI-Revision2/NMS/yolov8x_predictions/nh00010.txt'
ground_truth_file = 'C:/Mansura/UTI-Revision2/NMS/test_labels/nh00010.txt'

yolov9_preds = load_predictions(yolov9_pred_file)
kd_yolo_vit_preds = load_predictions(kd_yolo_vit_pred_file)
ground_truth = load_predictions(ground_truth_file)

check_all_formats(yolov9_preds, kd_yolo_vit_preds, ground_truth)

Error reading C:/Mansura/UTI-Revision2/NMS/yolov8x_predictions/nh00010.txt: C:/Mansura/UTI-Revision2/NMS/yolov8x_predictions/nh00010.txt not found.

YOLOv9 Prediction shape: (39, 6)
Sample row from YOLOv9 Prediction: [6.        0.711089  0.545911  0.038479  0.0845022 0.852539 ]
🔍 YOLOv9 Prediction appears to be a **prediction** with 6 columns:
   Likely format: [class_id, x_center, y_center, width, height, confidence]

KD-YOLO-ViT Prediction: No data found or file empty.
Ground Truth shape: (11, 5)
Sample row from Ground Truth: [5.         0.27625    0.13333333 0.035      0.06      ]
📌 Ground Truth appears to be a **ground truth** with 5 columns:
   Likely format: [class_id, x_center, y_center, width, height]



🔁 You currently have: [class_id, x_center, y_center, width, height, confidence]
For NMS convert: [x1, y1, x2, y2, confidence, class_id]

x1 = x_center - width / 2

y1 = y_center - height / 2

x2 = x_center + width / 2

y2 = y_center + height / 2


In [1]:
import os
import numpy as np

def convert_to_corners(data, is_prediction=True):
    """Convert [class_id, cx, cy, w, h, (conf)] -> [class_id, x1, y1, x2, y2, (conf)]"""
    converted = []
    for row in data:
        class_id, cx, cy, w, h = row[:5]
        x1 = cx - w / 2
        y1 = cy - h / 2
        x2 = cx + w / 2
        y2 = cy + h / 2
        if is_prediction:
            conf = row[5]
            converted.append([class_id, x1, y1, x2, y2, conf])
        else:
            converted.append([class_id, x1, y1, x2, y2])
    return np.array(converted)

def process_folder(input_folder, output_folder, is_prediction=True):
    os.makedirs(output_folder, exist_ok=True)
    for file in os.listdir(input_folder):
        if not file.endswith('.txt'):
            continue
        input_path = os.path.join(input_folder, file)
        output_path = os.path.join(output_folder, file)

        try:
            data = np.loadtxt(input_path)
            if data.ndim == 1:
                data = data.reshape(1, -1)
            converted = convert_to_corners(data, is_prediction=is_prediction)
            np.savetxt(output_path, converted, fmt='%.6f')
            #print(f"✅ Converted and saved: {file}")
        except Exception as e:
            print(f"❌ Error processing {file}: {e}")

# === File paths ===
yolov9_input = 'C:/Mansura/UTI-Revision2/NMS/yolov9e_predictions'
yolov8_input = 'C:/Mansura/UTI-Revision2/NMS/yolov8x_predictions'
yolov10_input = 'C:/Mansura/UTI-Revision2/NMS/yolov10x_predictions'
kdvit_input = 'C:/Mansura/UTI-Revision2/NMS/kd-yolox-vit_predictions'
gt_input = 'C:/Mansura/UTI-Revision2/NMS/test_labels'

yolov9_output = 'C:/Mansura/UTI-Revision2/NMS/yolov9_corners'
yolov8_output = 'C:/Mansura/UTI-Revision2/NMS/yolov8_corners'
yolov10_output = 'C:/Mansura/UTI-Revision2/NMS/yolov10_corners'
kdvit_output = 'C:/Mansura/UTI-Revision2/NMS/kdvit_corners'
gt_output = 'C:/Mansura/UTI-Revision2/NMS/gt_corners'

# === Run Conversion ===
process_folder(yolov9_input, yolov9_output, is_prediction=True)
process_folder(yolov8_input, yolov8_output, is_prediction=True)
process_folder(yolov10_input, yolov10_output, is_prediction=True)
process_folder(kdvit_input, kdvit_output, is_prediction=True)
process_folder(gt_input, gt_output, is_prediction=False)


Improved ensemble by nms (wbf)

In [22]:
import os
import numpy as np
import torch
from torchvision.ops import nms
from collections import defaultdict

def weighted_box_fusion(predictions_list, model_weights, iou_thresh=0.5, conf_thresh=0.1):
    """
    Apply Weighted Box Fusion (WBF) to combine predictions from multiple models
    
    Parameters:
    - predictions_list: List of numpy arrays, each containing predictions from one model
                       Each prediction has format [class_id, x1, y1, x2, y2, conf]
    - model_weights: List of weights for each model
    - iou_thresh: IoU threshold for clustering boxes
    - conf_thresh: Confidence threshold for filtering weak predictions
    
    Returns:
    - Array of fused predictions [class_id, x1, y1, x2, y2, conf]
    """
    # Filter empty predictions and apply confidence threshold
    filtered_preds = []
    filtered_weights = []
    
    for i, preds in enumerate(predictions_list):
        if len(preds) > 0:
            mask = preds[:, 5] >= conf_thresh
            if np.any(mask):
                filtered_preds.append(preds[mask])
                filtered_weights.append(model_weights[i])
    
    if not filtered_preds:
        return np.empty((0, 6))
    
    # Convert all to tensors
    pred_tensors = [torch.tensor(p, dtype=torch.float32) for p in filtered_preds]
    
    # Process each class separately
    final_predictions = []
    
    # Get all unique class ids across all predictions
    all_classes = set()
    for preds in pred_tensors:
        if len(preds) > 0:
            all_classes.update(preds[:, 0].int().tolist())
    
    for class_id in all_classes:
        # Extract predictions for this class from each model
        class_preds = []
        class_weights = []
        
        for i, preds in enumerate(pred_tensors):
            if len(preds) > 0:
                class_mask = preds[:, 0] == class_id
                if torch.any(class_mask):
                    class_preds.append(preds[class_mask])
                    class_weights.append(filtered_weights[i])
        
        if not class_preds:
            continue
        
        # Combine all predictions for this class
        all_boxes = torch.cat([p[:, 1:5] for p in class_preds])
        all_scores = torch.cat([p[:, 5] * w for p, w in zip(class_preds, class_weights)])
        all_labels = torch.ones(len(all_boxes)) * class_id
        
        # Group boxes by IoU
        clusters = []
        cluster_scores = []
        used_indices = set()
        
        for i in range(len(all_boxes)):
            if i in used_indices:
                continue
                
            # Start a new cluster
            cluster_boxes = [all_boxes[i]]
            cluster_s = [all_scores[i]]
            used_indices.add(i)
            
            # Find all overlapping boxes
            for j in range(i+1, len(all_boxes)):
                if j in used_indices:
                    continue
                    
                box1 = all_boxes[i]
                box2 = all_boxes[j]
                
                # Calculate IoU
                x1 = max(box1[0], box2[0])
                y1 = max(box1[1], box2[1])
                x2 = min(box1[2], box2[2])
                y2 = min(box1[3], box2[3])
                
                if x2 < x1 or y2 < y1:
                    iou = 0.0
                else:
                    intersection = (x2 - x1) * (y2 - y1)
                    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
                    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
                    iou = intersection / (area1 + area2 - intersection)
                
                if iou >= iou_thresh:
                    cluster_boxes.append(all_boxes[j])
                    cluster_s.append(all_scores[j])
                    used_indices.add(j)
            
            # Average the boxes in the cluster, weighted by confidence
            if len(cluster_boxes) > 0:
                cluster_boxes = torch.stack(cluster_boxes)
                cluster_s = torch.stack(cluster_s)
                
                # Apply weights based on confidence
                weights = cluster_s / cluster_s.sum()
                weights = weights.unsqueeze(1).repeat(1, 4)
                
                # Calculate weighted average box
                fused_box = (cluster_boxes * weights).sum(dim=0)
                fused_score = cluster_s.mean()  # Average score
                
                clusters.append(fused_box)
                cluster_scores.append(fused_score)
        
        # Create final predictions for this class
        for box, score in zip(clusters, cluster_scores):
            final_predictions.append(torch.cat([torch.tensor([class_id]), box, torch.tensor([score])]))
    
    if final_predictions:
        return torch.stack(final_predictions).numpy()
    else:
        return np.empty((0, 6))

def soft_weighted_nms(predictions, iou_thresh=0.5, sigma=0.5, score_threshold=0.001):
    """
    Apply Soft-NMS to predictions
    
    Parameters:
    - predictions: numpy array of predictions [class_id, x1, y1, x2, y2, conf]
    - iou_thresh: IoU threshold for NMS
    - sigma: Parameter for Gaussian penalty function
    - score_threshold: Minimum score threshold to keep a box
    
    Returns:
    - Filtered predictions
    """
    if len(predictions) == 0:
        return np.empty((0, 6))
    
    # Group by class
    class_groups = defaultdict(list)
    for pred in predictions:
        class_groups[int(pred[0])].append(pred)
    
    final_predictions = []
    
    for class_id, preds in class_groups.items():
        preds = np.array(preds)
        if len(preds) == 1:
            final_predictions.append(preds[0])
            continue
            
        # Sort by confidence score
        order = np.argsort(-preds[:, 5])
        preds = preds[order]
        
        boxes = preds[:, 1:5]
        scores = preds[:, 5].copy()
        
        for i in range(len(boxes)):
            if scores[i] < score_threshold:
                continue
                
            # Keep the current box
            box_i = boxes[i]
            
            # Update scores of all other boxes
            for j in range(i+1, len(boxes)):
                if scores[j] < score_threshold:
                    continue
                    
                box_j = boxes[j]
                
                # Calculate IoU
                xx1 = max(box_i[0], box_j[0])
                yy1 = max(box_i[1], box_j[1])
                xx2 = min(box_i[2], box_j[2])
                yy2 = min(box_i[3], box_j[3])
                
                w = max(0, xx2 - xx1)
                h = max(0, yy2 - yy1)
                
                intersection = w * h
                area_i = (box_i[2] - box_i[0]) * (box_i[3] - box_i[1])
                area_j = (box_j[2] - box_j[0]) * (box_j[3] - box_j[1])
                union = area_i + area_j - intersection
                
                iou = intersection / union if union > 0 else 0
                
                # Apply Gaussian penalty to overlapping boxes
                if iou > iou_thresh:
                    scores[j] *= np.exp(-(iou * iou) / sigma)
        
        # Add boxes that are still above the threshold
        for i in range(len(preds)):
            if scores[i] >= score_threshold:
                pred_i = preds[i].copy()
                pred_i[5] = scores[i]  # Update with new score
                final_predictions.append(pred_i)
    
    if final_predictions:
        return np.array(final_predictions)
    else:
        return np.empty((0, 6))

def advanced_ensemble(yolo_folder, kdvit_folder, save_folder, 
                     iou_thresh=0.5, conf_thresh=0.1, 
                     model_weights=None, use_wbf=True):
    """
    Advanced ensemble combining YOLOv9 and KD-YOLOX-ViT predictions
    
    Parameters:
    - yolo_folder: Directory containing YOLOv9 predictions
    - kdvit_folder: Directory containing KD-YOLOX-ViT predictions
    - save_folder: Directory to save ensemble results
    - iou_thresh: IoU threshold for fusion
    - conf_thresh: Confidence threshold for filtering weak predictions
    - model_weights: List of weights for each model [yolo_weight, kdvit_weight]
    - use_wbf: If True, use Weighted Box Fusion, otherwise use Soft-NMS
    """
    os.makedirs(save_folder, exist_ok=True)
    
    # Default weights favor the better model (YOLOv9)
    if model_weights is None:
        model_weights = [0.7, 0.3]  # YOLOv9 has higher weight
    
    # Get files from yolo folder
    files = [f for f in os.listdir(yolo_folder) if f.endswith('.txt')]
    
    for file in files:
        yolov9_path = os.path.join(yolo_folder, file)
        kdvit_path = os.path.join(kdvit_folder, file)
        save_path = os.path.join(save_folder, file)
        
        # Skip if kdvit prediction doesn't exist
        if not os.path.exists(kdvit_path):
            print(f"⚠️ Missing KD-ViT prediction for {file}, skipping")
            continue
            
        try:
            # Load predictions
            yolov9_preds = np.loadtxt(yolov9_path).reshape(-1, 6) if os.path.getsize(yolov9_path) > 0 else np.empty((0, 6))
            kdvit_preds = np.loadtxt(kdvit_path).reshape(-1, 6) if os.path.getsize(kdvit_path) > 0 else np.empty((0, 6))
            
            # Handle empty files or single detection
            if yolov9_preds.size > 0 and yolov9_preds.ndim == 1:
                yolov9_preds = yolov9_preds.reshape(1, -1)
            if kdvit_preds.size > 0 and kdvit_preds.ndim == 1:
                kdvit_preds = kdvit_preds.reshape(1, -1)
                
            # Class-specific weighting for YOLOv9
            # YOLOv9 performs better on most classes, but KD-ViT is better on epithn and leuko
            for i in range(len(yolov9_preds)):
                class_id = int(yolov9_preds[i, 0])
                # Boost 'cast' and 'mycete' classes where YOLOv9 is significantly better
                if class_id == 0:  # cast
                    yolov9_preds[i, 5] *= 1.1
                elif class_id == 6:  # mycete
                    yolov9_preds[i, 5] *= 1.05
            
            # Boost certain classes for KD-ViT where it performs better
            for i in range(len(kdvit_preds)):
                class_id = int(kdvit_preds[i, 0])
                # Boost 'leuko' class where KD-ViT is better
                if class_id == 5:  # leuko
                    kdvit_preds[i, 5] *= 1.1
            
            # Apply fusion method
            if use_wbf:
                ensemble_preds = weighted_box_fusion(
                    [yolov9_preds, kdvit_preds],
                    model_weights,
                    iou_thresh,
                    conf_thresh
                )
            else:
                # Combine with confidence-weighted approach
                if len(yolov9_preds) == 0 and len(kdvit_preds) == 0:
                    ensemble_preds = np.empty((0, 6))
                else:
                    # Apply model-specific weights to confidence scores
                    if len(yolov9_preds) > 0:
                        yolov9_preds[:, 5] *= model_weights[0]
                    if len(kdvit_preds) > 0:
                        kdvit_preds[:, 5] *= model_weights[1]
                    
                    # Combine predictions
                    combined_preds = np.vstack((yolov9_preds, kdvit_preds)) if len(yolov9_preds) > 0 and len(kdvit_preds) > 0 else (
                        yolov9_preds if len(yolov9_preds) > 0 else kdvit_preds
                    )
                    
                    # Apply Soft-NMS
                    ensemble_preds = soft_weighted_nms(combined_preds, iou_thresh)
            
            # Save results
            np.savetxt(save_path, ensemble_preds, fmt='%.6f')
            #print(f"✅ Advanced ensemble saved: {file} | Detections: {len(ensemble_preds)}")
            
        except Exception as e:
            print(f"❌ Error processing {file}: {str(e)}")

# === Configuration ===
# Folders
model1_dir = 'C:/Mansura/UTI-Revision2/NMS/yolov10_corners'
model2_dir = 'C:/Mansura/UTI-Revision2/NMS/kdvit_corners'
ensemble_dir = 'C:/Mansura/UTI-Revision2/NMS/advanced_ensemble_output'

# Model weights - give more weight to YOLOv9 as it performs better overall
model_weights = [0.6, 0.4]  # [YOLOv9, KD-ViT]

# === Run Advanced Ensemble ===
advanced_ensemble(
    model1_dir, 
    model2_dir, 
    ensemble_dir,
    iou_thresh=0.5, 
    conf_thresh=0.1,
    model_weights=model_weights,
    use_wbf=True  # Set to True for Weighted Box Fusion, False for Soft-NMS
)

nms and soft-nms

In [23]:
import os
import numpy as np
import time
from collections import defaultdict
import matplotlib.pyplot as plt

def calculate_iou(box1, box2):
    """Calculate IoU between two boxes [x1, y1, x2, y2]"""
    # Calculate intersection
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    
    # Check if boxes overlap
    if x2 < x1 or y2 < y1:
        return 0.0
    
    intersection = (x2 - x1) * (y2 - y1)
    
    # Calculate union
    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union = area1 + area2 - intersection
    
    # Calculate IoU
    return intersection / union if union > 0 else 0

def standard_nms(predictions, iou_thresh=0.5, score_threshold=0.001):
    """
    Apply standard NMS to predictions
    
    Parameters:
    - predictions: numpy array of predictions [class_id, x1, y1, x2, y2, conf]
    - iou_thresh: IoU threshold for NMS
    - score_threshold: Minimum score threshold to keep a box
    
    Returns:
    - Filtered predictions
    """
    if len(predictions) == 0:
        return np.empty((0, 6))
    
    # Group by class
    class_groups = defaultdict(list)
    for pred in predictions:
        class_groups[int(pred[0])].append(pred)
    
    final_predictions = []
    
    for class_id, preds in class_groups.items():
        preds = np.array(preds)
        if len(preds) == 1:
            final_predictions.append(preds[0])
            continue
            
        # Sort by confidence score
        order = np.argsort(-preds[:, 5])
        preds = preds[order]
        
        keep = []
        while len(preds) > 0:
            # Keep highest score box
            keep.append(preds[0])
            
            # Exit if no more boxes
            if len(preds) == 1:
                break
                
            # Calculate IoU of first box with all others
            ious = []
            for i in range(1, len(preds)):
                iou = calculate_iou(preds[0, 1:5], preds[i, 1:5])
                ious.append(iou)
            
            # Keep only boxes with IoU less than threshold
            mask = np.array(ious) < iou_thresh
            preds = preds[1:][mask]
    
        final_predictions.extend(keep)
    
    if final_predictions:
        return np.array(final_predictions)
    else:
        return np.empty((0, 6))

def soft_nms(predictions, method='gaussian', iou_thresh=0.5, sigma=0.5, score_threshold=0.001):
    """
    Apply Soft-NMS to predictions
    
    Parameters:
    - predictions: numpy array of predictions [class_id, x1, y1, x2, y2, conf]
    - method: 'gaussian' or 'linear' penalty function
    - iou_thresh: IoU threshold for NMS
    - sigma: Parameter for Gaussian penalty function
    - score_threshold: Minimum score threshold to keep a box
    
    Returns:
    - Filtered predictions
    """
    if len(predictions) == 0:
        return np.empty((0, 6))
    
    # Group by class
    class_groups = defaultdict(list)
    for pred in predictions:
        class_groups[int(pred[0])].append(pred)
    
    final_predictions = []
    
    for class_id, preds in class_groups.items():
        preds = np.array(preds)
        if len(preds) == 1:
            final_predictions.append(preds[0])
            continue
            
        # Sort by confidence score
        order = np.argsort(-preds[:, 5])
        preds = preds[order]
        
        boxes = preds[:, 1:5]
        scores = preds[:, 5].copy()
        
        keep = []
        
        while len(scores) > 0:
            # Save highest scoring box
            if scores[0] >= score_threshold:
                # Save the original prediction with updated score
                new_pred = preds[0].copy()
                new_pred[5] = scores[0]
                keep.append(new_pred)
                
            # Break if only one box remains
            if len(scores) == 1:
                break
                
            # Get first box
            first_box = boxes[0]
            
            # Calculate IoU with all remaining boxes
            ious = []
            for i in range(1, len(boxes)):
                iou = calculate_iou(first_box, boxes[i])
                ious.append(iou)
            
            ious = np.array(ious)
            
            # Apply penalty to scores based on IoU
            for i in range(1, len(scores)):
                if ious[i-1] > iou_thresh:
                    if method == 'gaussian':
                        # Gaussian penalty
                        scores[i] *= np.exp(-(ious[i-1] * ious[i-1]) / sigma)
                    elif method == 'linear':
                        # Linear penalty
                        scores[i] *= (1 - ious[i-1])
            
            # Remove first box
            boxes = boxes[1:]
            scores = scores[1:]
            preds = preds[1:]
            
            # Re-sort by updated scores
            order = np.argsort(-scores)
            boxes = boxes[order]
            scores = scores[order]
            preds = preds[order]
    
        final_predictions.extend(keep)
    
    if final_predictions:
        return np.array(final_predictions)
    else:
        return np.empty((0, 6))

def combine_predictions(yolo_preds, kdvit_preds, model_weights=[0.6, 0.4]):
    """Combine predictions from two models with class-specific weighting"""
    if len(yolo_preds) > 0:
        # Class-specific weighting for YOLOv9
        for i in range(len(yolo_preds)):
            class_id = int(yolo_preds[i, 0])
            # Boost 'cast' and 'mycete' classes where YOLOv9 is significantly better
            if class_id == 0:  # cast
                yolo_preds[i, 5] *= 1.1
            elif class_id == 6:  # mycete
                yolo_preds[i, 5] *= 1.05
        
        # Apply model weight
        yolo_preds[:, 5] *= model_weights[0]
    
    if len(kdvit_preds) > 0:
        # Boost certain classes for KD-ViT where it performs better
        for i in range(len(kdvit_preds)):
            class_id = int(kdvit_preds[i, 0])
            # Boost 'leuko' class where KD-ViT is better
            if class_id == 5:  # leuko
                kdvit_preds[i, 5] *= 1.1
        
        # Apply model weight
        kdvit_preds[:, 5] *= model_weights[1]
    
    # Combine predictions
    if len(yolo_preds) == 0 and len(kdvit_preds) == 0:
        return np.empty((0, 6))
    elif len(yolo_preds) == 0:
        return kdvit_preds
    elif len(kdvit_preds) == 0:
        return yolo_preds
    else:
        return np.vstack((yolo_preds, kdvit_preds))

def process_files(yolo_folder, kdvit_folder, nms_output, soft_nms_output, 
                 iou_thresh=0.5, sigma=0.5, conf_thresh=0.1):
    """
    Process all files applying both NMS and Soft-NMS for comparison
    
    Parameters:
    - yolo_folder: Directory containing YOLOv9 predictions
    - kdvit_folder: Directory containing KD-YOLOX-ViT predictions
    - nms_output: Directory to save standard NMS results
    - soft_nms_output: Directory to save Soft-NMS results
    - iou_thresh: IoU threshold for NMS
    - sigma: Parameter for Gaussian penalty function in Soft-NMS
    - conf_thresh: Confidence threshold for filtering weak predictions
    """
    os.makedirs(nms_output, exist_ok=True)
    os.makedirs(soft_nms_output, exist_ok=True)
    
    # Get files from yolo folder
    files = [f for f in os.listdir(yolo_folder) if f.endswith('.txt')]
    
    nms_stats = {'files': 0, 'total_detections': 0, 'time': 0}
    soft_nms_stats = {'files': 0, 'total_detections': 0, 'time': 0}
    
    for file in files:
        yolov9_path = os.path.join(yolo_folder, file)
        kdvit_path = os.path.join(kdvit_folder, file)
        nms_save_path = os.path.join(nms_output, file)
        soft_nms_save_path = os.path.join(soft_nms_output, file)
        
        # Skip if kdvit prediction doesn't exist
        if not os.path.exists(kdvit_path):
            print(f"⚠️ Missing KD-ViT prediction for {file}, skipping")
            continue
            
        try:
            # Load predictions
            yolov9_preds = np.loadtxt(yolov9_path).reshape(-1, 6) if os.path.getsize(yolov9_path) > 0 else np.empty((0, 6))
            kdvit_preds = np.loadtxt(kdvit_path).reshape(-1, 6) if os.path.getsize(kdvit_path) > 0 else np.empty((0, 6))
            
            # Handle empty files or single detection
            if yolov9_preds.size > 0 and yolov9_preds.ndim == 1:
                yolov9_preds = yolov9_preds.reshape(1, -1)
            if kdvit_preds.size > 0 and kdvit_preds.ndim == 1:
                kdvit_preds = kdvit_preds.reshape(1, -1)
            
            # Combine predictions from both models
            combined_preds = combine_predictions(
                yolov9_preds.copy() if len(yolov9_preds) > 0 else np.empty((0, 6)), 
                kdvit_preds.copy() if len(kdvit_preds) > 0 else np.empty((0, 6))
            )
            
            # Apply standard NMS
            start_time = time.time()
            nms_preds = standard_nms(
                combined_preds,
                iou_thresh=iou_thresh,
                score_threshold=conf_thresh
            )
            nms_time = time.time() - start_time
            
            # Apply Soft-NMS
            start_time = time.time()
            soft_nms_preds = soft_nms(
                combined_preds,
                method='gaussian',
                iou_thresh=iou_thresh,
                sigma=sigma,
                score_threshold=conf_thresh
            )
            soft_nms_time = time.time() - start_time
            
            # Save results
            np.savetxt(nms_save_path, nms_preds, fmt='%.6f')
            np.savetxt(soft_nms_save_path, soft_nms_preds, fmt='%.6f')
            
            # Update stats
            nms_stats['files'] += 1
            nms_stats['total_detections'] += len(nms_preds)
            nms_stats['time'] += nms_time
            
            soft_nms_stats['files'] += 1
            soft_nms_stats['total_detections'] += len(soft_nms_preds)
            soft_nms_stats['time'] += soft_nms_time
            
            #print(f"✅ Processed {file} | NMS: {len(nms_preds)} detections | Soft-NMS: {len(soft_nms_preds)} detections")
            
        except Exception as e:
            print(f"❌ Error processing {file}: {str(e)}")
    
    # Calculate averages
    if nms_stats['files'] > 0:
        nms_stats['avg_detections'] = nms_stats['total_detections'] / nms_stats['files']
        nms_stats['avg_time'] = nms_stats['time'] / nms_stats['files']
        
    if soft_nms_stats['files'] > 0:
        soft_nms_stats['avg_detections'] = soft_nms_stats['total_detections'] / soft_nms_stats['files']
        soft_nms_stats['avg_time'] = soft_nms_stats['time'] / soft_nms_stats['files']
    
    return nms_stats, soft_nms_stats

def generate_comparison_report(nms_stats, soft_nms_stats, output_file):
    """Generate a comparison report between NMS and Soft-NMS"""
    with open(output_file, 'w') as f:
        f.write("# NMS vs Soft-NMS Comparison Report\n\n")
        
        f.write("## Performance Statistics\n\n")
        f.write("| Metric | Standard NMS | Soft-NMS |\n")
        f.write("|--------|-------------|----------|\n")
        f.write(f"| Files Processed | {nms_stats['files']} | {soft_nms_stats['files']} |\n")
        f.write(f"| Total Detections | {nms_stats['total_detections']} | {soft_nms_stats['total_detections']} |\n")
        f.write(f"| Average Detections per File | {nms_stats['avg_detections']:.2f} | {soft_nms_stats['avg_detections']:.2f} |\n")
        f.write(f"| Total Processing Time (s) | {nms_stats['time']:.4f} | {soft_nms_stats['time']:.4f} |\n")
        f.write(f"| Average Processing Time per File (s) | {nms_stats['avg_time']:.6f} | {soft_nms_stats['avg_time']:.6f} |\n\n")
        
        f.write("## Analysis\n\n")
        
        # Detection difference percentage
        detection_diff_pct = ((soft_nms_stats['total_detections'] - nms_stats['total_detections']) / 
                             nms_stats['total_detections'] * 100) if nms_stats['total_detections'] > 0 else 0
        
        f.write(f"### Detection Count Analysis\n\n")
        f.write(f"Soft-NMS detected {detection_diff_pct:.2f}% ")
        if detection_diff_pct > 0:
            f.write("more objects than standard NMS.\n\n")
        elif detection_diff_pct < 0:
            f.write("fewer objects than standard NMS.\n\n")
        else:
            f.write("the same number of objects as standard NMS.\n\n")
        
        # Time comparison
        time_diff_pct = ((soft_nms_stats['time'] - nms_stats['time']) / 
                        nms_stats['time'] * 100) if nms_stats['time'] > 0 else 0
        
        f.write(f"### Processing Time Analysis\n\n")
        f.write(f"Soft-NMS was {abs(time_diff_pct):.2f}% ")
        if time_diff_pct > 0:
            f.write("slower than standard NMS.\n\n")
        elif time_diff_pct < 0:
            f.write("faster than standard NMS.\n\n")
        else:
            f.write("the same speed as standard NMS.\n\n")
        
        f.write("### Key Differences\n\n")
        f.write("- **Standard NMS** completely removes overlapping boxes, which can be a problem for objects that are close together.\n")
        f.write("- **Soft-NMS** reduces the confidence of overlapping boxes instead of removing them completely.\n")
        f.write("- This usually results in **better recall** for Soft-NMS, especially in crowded scenes.\n")
        f.write("- Soft-NMS typically preserves more detections in areas where objects overlap.\n\n")
        
        f.write("### Recommendations\n\n")
        f.write("- Use **Standard NMS** when:\n")
        f.write("  - Processing speed is critical\n")
        f.write("  - Objects are well-separated\n")
        f.write("  - False positives are a concern\n\n")
        f.write("- Use **Soft-NMS** when:\n")
        f.write("  - Objects frequently overlap\n")
        f.write("  - Recall is more important than precision\n")
        f.write("  - You're working with crowded scenes\n")
        f.write("  - Missing detections is more problematic than having duplicate detections\n")

def plot_comparison(nms_stats, soft_nms_stats, output_image):
    """Create comparison plots between NMS and Soft-NMS"""
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
    
    # Plot 1: Average Detections
    methods = ['Standard NMS', 'Soft-NMS']
    avg_detections = [nms_stats['avg_detections'], soft_nms_stats['avg_detections']]
    
    ax1.bar(methods, avg_detections, color=['blue', 'green'])
    ax1.set_title('Average Detections per File')
    ax1.set_ylabel('Count')
    ax1.grid(axis='y', linestyle='--', alpha=0.7)
    
    for i, v in enumerate(avg_detections):
        ax1.text(i, v + 0.1, f"{v:.2f}", ha='center')
    
    # Plot 2: Average Processing Time
    avg_times = [nms_stats['avg_time'], soft_nms_stats['avg_time']]
    
    ax2.bar(methods, avg_times, color=['blue', 'green'])
    ax2.set_title('Average Processing Time per File')
    ax2.set_ylabel('Time (seconds)')
    ax2.grid(axis='y', linestyle='--', alpha=0.7)
    
    for i, v in enumerate(avg_times):
        ax2.text(i, v + 0.0001, f"{v:.6f}", ha='center')
    
    plt.tight_layout()
    plt.savefig(output_image)
    plt.close()

if __name__ == "__main__":
    # === Configuration ===
    # Folders
    model1_dir = 'C:/Mansura/UTI-Revision2/NMS/yolov10_corners'
    model2_dir = 'C:/Mansura/UTI-Revision2/NMS/kdvit_corners'
    standard_nms_dir = 'C:/Mansura/UTI-Revision2/NMS/standard_nms_output'
    soft_nms_dir = 'C:/Mansura/UTI-Revision2/NMS/soft_nms_output'
    
    # Report outputs
    #report_file = 'C:/Mansura/UTI-Revision2/NMS/nms_comparison_report.md'
    #comparison_plot = 'C:/Mansura/UTI-Revision2/NMS/nms_comparison_plot.png'
    
    print("\n=== Starting NMS vs Soft-NMS Comparison ===")
    
    # Process files with both methods
    nms_stats, soft_nms_stats = process_files(
        model1_dir,
        model2_dir,
        standard_nms_dir,
        soft_nms_dir,
        iou_thresh=0.5,
        sigma=0.5,
        conf_thresh=0.1
    )
    
    # Generate comparison report
    #generate_comparison_report(nms_stats, soft_nms_stats, report_file)
    
    # Create comparison plots
    #plot_comparison(nms_stats, soft_nms_stats, comparison_plot)
    
    print(f"\n=== Comparison Complete ===")
    print(f"Standard NMS found {nms_stats['total_detections']} detections across {nms_stats['files']} files")
    print(f"Soft-NMS found {soft_nms_stats['total_detections']} detections across {soft_nms_stats['files']} files")
    #print(f"Detailed report saved to: {report_file}")
    #print(f"Comparison plot saved to: {comparison_plot}")


=== Starting NMS vs Soft-NMS Comparison ===

=== Comparison Complete ===
Standard NMS found 20876 detections across 852 files
Soft-NMS found 8983 detections across 852 files


mAP calculation

In [26]:
import os
import numpy as np
from collections import defaultdict
import matplotlib.pyplot as plt

def calculate_iou(box1, box2):
    """
    Calculate IoU between two bounding boxes
    
    Args:
        box1: [x1, y1, x2, y2]
        box2: [x1, y1, x2, y2]
    
    Returns:
        iou: intersection over union
    """
    # Get coordinates of intersection
    x1_inter = max(box1[0], box2[0])
    y1_inter = max(box1[1], box2[1])
    x2_inter = min(box1[2], box2[2])
    y2_inter = min(box1[3], box2[3])
    
    # Calculate area of intersection
    width_inter = max(0, x2_inter - x1_inter)
    height_inter = max(0, y2_inter - y1_inter)
    area_inter = width_inter * height_inter
    
    # Calculate area of both boxes
    area_box1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area_box2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    
    # Calculate area of union
    area_union = area_box1 + area_box2 - area_inter
    
    # Return IoU
    if area_union > 0:
        return area_inter / area_union
    else:
        return 0.0

def evaluate_map50_coco(pred_folder, gt_folder, iou_thresh=0.5, conf_thresh=0.001):
    """
    Calculate mAP@50 for predictions against ground truth using COCO-style AP
    
    Args:
        pred_folder: folder containing prediction files in format [class_id, x1, y1, x2, y2, confidence]
        gt_folder: folder containing ground truth files in format [class_id, x1, y1, x2, y2]
        iou_thresh: IoU threshold for considering a prediction as correct
        conf_thresh: Confidence threshold for filtering predictions
    
    Returns:
        mAP@50: mean Average Precision at IoU threshold of 0.5
    """
    # Dictionary to store all predictions for each class
    all_predictions = defaultdict(list)
    # Dictionary to store ground truth count for each class
    gt_counter_per_class = defaultdict(int)
    
    # Map class_ids to class names if available
    class_names = {
        0: "cast",
        1: "cryst",
        2: "epith",
        3: "epithn",
        4: "eryth",
        5: "leuko",
        6: "mycete"
    }
    
    files = [f for f in os.listdir(pred_folder) if f.endswith('.txt') and os.path.exists(os.path.join(gt_folder, f))]
    
    # Process each file
    for file in files:
        # Load ground truth
        gt_path = os.path.join(gt_folder, file)
        if os.path.getsize(gt_path) > 0:
            gt_data = np.loadtxt(gt_path, ndmin=2)  # ndmin=2 ensures it's 2D even with single detection
        else:
            gt_data = np.empty((0, 5))
            
        # Load predictions
        pred_path = os.path.join(pred_folder, file)
        if os.path.getsize(pred_path) > 0:
            pred_data = np.loadtxt(pred_path, ndmin=2)  # ndmin=2 ensures it's 2D even with single detection
        else:
            pred_data = np.empty((0, 6))
        
        # Process ground truth for this image
        gt_this_image = {}
        for gt_box in gt_data:
            class_id = int(gt_box[0])
            # Increment ground truth count for this class
            gt_counter_per_class[class_id] += 1
            
            # Add ground truth box to dictionary, format: [used_flag, x1, y1, x2, y2]
            # used_flag: whether this gt box has been matched with a prediction
            if class_id not in gt_this_image:
                gt_this_image[class_id] = []
                
            gt_this_image[class_id].append([False, gt_box[1], gt_box[2], gt_box[3], gt_box[4]])
        
        # Process predictions for this image
        for pred_box in pred_data:
            class_id = int(pred_box[0])
            confidence = float(pred_box[5])
            
            # Skip predictions below confidence threshold
            if confidence < conf_thresh:
                continue
                
            pred_bbox = [float(x) for x in pred_box[1:5]]  # x1, y1, x2, y2
            
            # Add prediction to all_predictions list
            # Format: [file_name, confidence, x1, y1, x2, y2, tp/fp]
            # tp/fp: whether this prediction is true positive or false positive, initially set to False (fp)
            all_predictions[class_id].append([file, confidence] + pred_bbox + [False])
            
            # If there are ground truths for this class in this image
            if class_id in gt_this_image and len(gt_this_image[class_id]) > 0:
                # Find the ground truth box with highest IoU
                max_iou = -1
                max_idx = -1
                for idx, gt_box in enumerate(gt_this_image[class_id]):
                    # Skip if this ground truth box has already been matched
                    if gt_box[0]:
                        continue
                        
                    # Calculate IoU
                    iou = calculate_iou(pred_bbox, gt_box[1:])
                    
                    # Update if this IoU is higher
                    if iou > max_iou:
                        max_iou = iou
                        max_idx = idx
                
                # If we found a match with IoU > threshold
                if max_iou >= iou_thresh and max_idx >= 0:
                    # Mark this ground truth box as used
                    gt_this_image[class_id][max_idx][0] = True
                    # Mark this prediction as true positive
                    all_predictions[class_id][-1][-1] = True
    
    # Calculate AP for each class using COCO-style AP
    sum_ap = 0
    ap_dictionary = {}
    valid_classes = 0
    
    print("\nClass-wise Average Precision:")
    print("-" * 50)
    
    # Process each class
    for class_id in sorted(gt_counter_per_class.keys()):
        # If no ground truth exists for this class
        if gt_counter_per_class[class_id] == 0:
            continue
            
        # If no predictions for this class
        if class_id not in all_predictions:
            ap_dictionary[class_id] = 0.0
            sum_ap += 0.0
            valid_classes += 1
            class_name = class_names.get(class_id, f"Class {class_id}")
            print(f"{class_name:<10} - AP: 0.0000, GT count: {gt_counter_per_class[class_id]}")
            continue
            
        # Sort predictions by confidence
        predictions = all_predictions[class_id]
        predictions.sort(key=lambda x: x[1], reverse=True)
        
        # Initialize true positives and false positives array
        tp = np.array([pred[-1] for pred in predictions], dtype=np.float64)
        fp = np.logical_not(tp).astype(np.float64)
        
        # Calculate cumulative false positives and true positives
        cumsum_fp = np.cumsum(fp)
        cumsum_tp = np.cumsum(tp)
        
        # Calculate precision and recall
        precision = cumsum_tp / (cumsum_fp + cumsum_tp + 1e-10)
        recall = cumsum_tp / gt_counter_per_class[class_id]
        
        # Ensure precision is monotonically decreasing (COCO method)
        for i in range(len(precision) - 2, -1, -1):
            precision[i] = max(precision[i], precision[i + 1])
            
        # Find all unique recall points
        recall_points = np.concatenate(([0], recall, [1]))
        recall_points = np.unique(recall_points)
        
        # Interpolate precision at each recall point
        interpolated_precision = np.zeros_like(recall_points)
        
        for i, r in enumerate(recall_points):
            # Precision at recall >= r
            precisions_at_recall = precision[recall >= r]
            if len(precisions_at_recall) > 0:
                interpolated_precision[i] = np.max(precisions_at_recall)
        
        # Calculate AP as area under precision-recall curve
        ap = np.sum((recall_points[1:] - recall_points[:-1]) * interpolated_precision[:-1])
            
        # Store AP for this class
        ap_dictionary[class_id] = ap
        sum_ap += ap
        valid_classes += 1
        
        class_name = class_names.get(class_id, f"Class {class_id}")
        print(f"{class_name:<10} - AP: {ap:.4f}, GT count: {gt_counter_per_class[class_id]}")
        
    # Calculate mAP
    if valid_classes > 0:
        mAP = sum_ap / valid_classes
    else:
        mAP = 0.0
        
    print("-" * 50)
    print(f"Final mAP@50: {mAP:.4f}")
        
    return mAP

def plot_precision_recall_curves(pred_folder, gt_folder, iou_thresh=0.5, conf_thresh=0.001, output_file='pr_curves.png'):
    """
    Plot precision-recall curves for each class using COCO-style AP calculation
    """
    # Dictionary to store all predictions for each class
    all_predictions = defaultdict(list)
    # Dictionary to store ground truth count for each class
    gt_counter_per_class = defaultdict(int)
    
    # Map class_ids to class names if available
    class_names = {
        0: "cast",
        1: "cryst",
        2: "epith",
        3: "epithn",
        4: "eryth",
        5: "leuko",
        6: "mycete"
    }
    
    files = [f for f in os.listdir(pred_folder) if f.endswith('.txt') and os.path.exists(os.path.join(gt_folder, f))]
    
    # Process each file (same as in evaluate_map50_coco)
    # ... (same processing code as in evaluate_map50_coco)
    
    # Set up the plot
    plt.figure(figsize=(10, 8))
    
    # Process each class and plot its precision-recall curve
    for class_id in all_predictions:
        if gt_counter_per_class[class_id] == 0:
            continue
            
        # Sort predictions by confidence
        predictions = all_predictions[class_id]
        predictions.sort(key=lambda x: x[1], reverse=True)
        
        # Get tp/fp arrays
        tp = np.array([pred[-1] for pred in predictions], dtype=np.float64)
        fp = np.logical_not(tp).astype(np.float64)
        
        # Calculate cumulative values
        cumsum_fp = np.cumsum(fp)
        cumsum_tp = np.cumsum(tp)
        
        # Calculate precision and recall
        precision = cumsum_tp / (cumsum_fp + cumsum_tp + 1e-10)
        recall = cumsum_tp / gt_counter_per_class[class_id]
        
        # Ensure precision is monotonically decreasing (COCO method)
        for i in range(len(precision) - 2, -1, -1):
            precision[i] = max(precision[i], precision[i + 1])
        
        # Calculate AP
        # Find all unique recall points
        recall_points = np.concatenate(([0], recall, [1]))
        recall_points = np.unique(recall_points)
        
        # Interpolate precision at each recall point
        interpolated_precision = np.zeros_like(recall_points)
        
        for i, r in enumerate(recall_points):
            # Precision at recall >= r
            precisions_at_recall = precision[recall >= r]
            if len(precisions_at_recall) > 0:
                interpolated_precision[i] = np.max(precisions_at_recall)
        
        # Calculate AP as area under precision-recall curve
        ap = np.sum((recall_points[1:] - recall_points[:-1]) * interpolated_precision[:-1])
        
        # Plot precision-recall curve
        class_name = class_names.get(class_id, f"Class {class_id}")
        plt.plot(recall, precision, '-', label=f'{class_name} (AP: {ap:.4f})')
    
    # Customize plot
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title(f'Precision-Recall Curves (IoU={iou_thresh}, Conf={conf_thresh})')
    plt.xlim(0, 1.0)
    plt.ylim(0, 1.01)
    plt.grid(True)
    plt.legend(loc='lower left')
    
    # Save the plot
    plt.savefig(output_file)
    print(f"Precision-recall curves saved to {output_file}")
    plt.close()

# Example usage:
if __name__ == "__main__":
    gt_folder = 'C:/Mansura/UTI-Revision2/NMS/gt_corners'
    pred_folder = 'C:/Mansura/UTI-Revision2/NMS/soft_nms_output'
    
    # Calculate mAP@50 using COCO method with same confidence threshold as YOLO (0.001)
    map50 = evaluate_map50_coco(pred_folder, gt_folder, iou_thresh=0.5, conf_thresh=0.001)
    
    # Optionally plot precision-recall curves
    plot_precision_recall_curves(pred_folder, gt_folder, iou_thresh=0.5, conf_thresh=0.001)

No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.



Class-wise Average Precision:
--------------------------------------------------
cast       - AP: 0.7898, GT count: 545
cryst      - AP: 0.8772, GT count: 317
epith      - AP: 0.8882, GT count: 972
epithn     - AP: 0.9263, GT count: 77
eryth      - AP: 0.9440, GT count: 3008
leuko      - AP: 0.9295, GT count: 796
mycete     - AP: 0.8973, GT count: 233
--------------------------------------------------
Final mAP@50: 0.8932
Precision-recall curves saved to pr_curves.png
