# Uncertainty Estimation
  - **0:** class
  - **1:** x_mean
  - **2:** x_std
  - **3:** y_mean
  - **4:** y_std
  - **5:** w_mean
  - **6:** w_std
  - **7:** h_mean
  - **8:** h_std
  - **9:** conf
  - **10:** uncertainty
  - **11** IoU

  **pred** hold all the predicitons of all images. \
  **p** holds all predictions of a single image. \
  **p2** is a single prediction in an image.


## Libraries

In [1]:
import os
import json
import torch
import numpy as np
import matplotlib.pyplot as plt
from ultralytics.utils.ops import xywh2xyxy
from torchvision import ops

## Functions

In [2]:
def load(predictions_path, gt_path):

    predictions = []
    gt = []

    for file in os.listdir(predictions_path):
        prediction_file_path = os.path.join(predictions_path, file)
        prediction_name = os.path.splitext(file)[0]

        # load ensemble predictions
        try:
            with open(prediction_file_path, "r") as f:
                data = json.load(f)
                predictions.append(data)
        # load ensemble member predictions
        except:
            with open(prediction_file_path, "r") as f:
                p = []
                for line in f:
                    pred_line_data = [float(value) for value in line.split()]
                    p.append([int(pred_line_data[0]), 
                                        pred_line_data[1], 0,
                                        pred_line_data[2], 0,
                                        pred_line_data[3], 0,
                                        pred_line_data[4], 0,
                                        pred_line_data[5]])
            predictions.append(p)

        # load ground truths
        if gt_path:
            with open(os.path.join(gt_path, prediction_name + ".txt"), "r") as f:
                g = []
                for line in f:
                    gt_line_data = [float(value) for value in line.split()]
                    g.append(gt_line_data)
            gt.append(g)

    return predictions, gt

def fuzzy(pred, std):
    
    for i in pred:

        image = []

        for p in i:

            # x, y are coordinates
            # w, h are distance
            x, y, w, h = p[1], p[3], p[5], p[7]
            x_std, y_std, w_std, h_std = p[2], p[4], p[6], p[8]

            # add x standard diviations to the original w and h
            w_fuzzy = w + w_std * std
            h_fuzzy = h + h_std * std

            # add the distance betwen points x,y and x,y+std to w and h
            w_fuzzy += abs(x + x_std * std -  x)
            h_fuzzy += abs(y + y_std * std -  y)

            box1 = xywh2xyxy(torch.tensor([[x, y, w, h]], dtype=torch.float))
            box2 = xywh2xyxy(torch.tensor([[x, y, w_fuzzy, h_fuzzy]], dtype=torch.float))
            iou = 1 - ops.box_iou(box1, box2).numpy()[0][0]
            p.append(iou)
            image.append(p)

def match(pred, gt):

    for i, pred in enumerate(pred):
        for p in pred:
            for g in gt[i]:
                box1 = xywh2xyxy(torch.tensor([[p[1], p[3] ,p[5] ,p[7]]], dtype=torch.float))
                box2 = xywh2xyxy(torch.tensor([g[1:]], dtype=torch.float))
                iou = ops.box_iou(box1, box2).numpy()[0][0]
                
                if p[0] == g[0] and iou > 0.55:
                    p.append(iou)
                    break

def binning(pred):
    bins = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1]
    bins_conf = [[] for _ in range(len(bins) - 1)]

        # bin confidence scores
    for p in pred:
        for p2 in p:
            for j, bin_start in enumerate(bins):
                bin_end = bins[j + 1]
                if bin_start <= p2[9] < bin_end:
                    if len(p2) == 11:
                        bins_conf[j].append((p2[9], None))
                    else:
                        bins_conf[j].append((p2[9], p2[11]))
                    break
    
    return bins_conf

def calibration(conf_binned):
    ece = 0
    conf_means = []
    positives_ratios = []
    total_length = sum(len(bin) for bin in conf_binned)

    for bin in conf_binned:

        if not bin:
            continue

        # mean conf per bin
        confidence = [conf[0] for conf in bin]
        conf_mean = np.mean(confidence)
        conf_means.append(conf_mean)

        # TP per bin
        positives = 0
        for p in bin:
            if p[1]:
                positives += 1
        
        positives_ratio = positives / (len(bin) + 1e-16)
        positives_ratios.append(positives_ratio)
        ece += 1/total_length * len(bin) * abs(positives_ratio - conf_mean)

    return conf_means, positives_ratios, ece

## Load Data and Save Metrics
 - Loads the predictions and ground truths into two lists.
 - Adds a fuzzy uncertainty value at the end of each prediction.
 - Add a IoU score at the end for each correct prediction.

In [3]:
m = 10

# pred, gt = load("YOLOv9c_predictions_0.15\\1\labels", "datasets\crystals\labels\\test")
# fuzzy(pred, 1)
# match(pred, gt)


# pred, gt = load("YOLOv9c_predictions_0.01\ensemble_10", "datasets\crystals\labels\\test")
# fuzzy(pred, 1)
# match(pred, gt)

## LINE GRAPH: ensemble marco AP50

In [4]:
directory = 'plots_ensemble\line_ensemble_marco'
if not os.path.exists(directory):
    os.makedirs(directory)

ap50 = [
    [0.51466, 0.53805, 0.54445, 0.554, 0.53853, 0.55546, 0.55116, 0.55082, 0.5433, 0.54572],  # CO
    [0.76113, 0.74806, 0.76948, 0.7745, 0.76738, 0.76061, 0.76526, 0.76702, 0.76507, 0.76456],  # C
    [0.63499, 0.64195, 0.67644, 0.66844, 0.68662, 0.69785, 0.69789, 0.70418, 0.69876, 0.70895],  # DC
    [0.7561, 0.75115, 0.75418, 0.74995, 0.76367, 0.76516, 0.75868, 0.75663, 0.7628, 0.76404],  # P
    [0.50789, 0.4748, 0.50451, 0.47635, 0.49702, 0.51582, 0.50735, 0.51933, 0.50977, 0.5166],  # CC
    [0.35416, 0.33228, 0.35894, 0.36279, 0.37465, 0.37599, 0.37182, 0.37523, 0.37087, 0.38107]   # DO
]

class_labels = ['CO', 'C', 'DC', 'P', 'CC', 'DO']
ensemble_numbers = list(range(1, 11))

# Plotting
plt.figure(figsize=(12, 8))

for i, class_data in enumerate(ap50):
    plt.plot(ensemble_numbers, class_data, marker='o', label=class_labels[i])

plt.xlabel('m')
plt.ylabel('AP@50')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.grid(True)
plt.xticks(ensemble_numbers)
plt.savefig(os.path.join(directory, 'ap50.png'))
plt.close()


## LINE GRAPH: ensemble icebear AP50

In [None]:
directory = 'plots_ensemble\line_ensemble_icebear'
if not os.path.exists(directory):
    os.makedirs(directory)

class_labels = ['CO', 'C', 'DC', 'P', 'CC', 'DO']
ensemble_numbers = list(range(1, 11))

# Plotting
plt.figure(figsize=(12, 8))

for i, class_data in enumerate(ap50):
    plt.plot(ensemble_numbers, class_data, marker='o', label=class_labels[i])

plt.xlabel('m')
plt.ylabel('AP@50')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.grid(True)
plt.xticks(ensemble_numbers)
plt.savefig(os.path.join(directory, 'ap50.png'))
plt.close()

## LINE GRAPH: ensemble marco AP50-95

In [5]:
directory = 'plots_ensemble\line_ensemble_marco'
if not os.path.exists(directory):
    os.makedirs(directory)

ap50_95 = [
    [0.37803, 0.39993, 0.41041, 0.42429, 0.41917, 0.43021, 0.43159, 0.43556, 0.43276, 0.43621],
    [0.62568, 0.61972, 0.63969, 0.6446, 0.64719, 0.64731, 0.65138, 0.64869, 0.64919, 0.64463], 
    [0.31443, 0.3405, 0.36067, 0.35771, 0.36026, 0.36314, 0.36693, 0.3648, 0.36269, 0.37036], 
    [0.54816, 0.56127, 0.57083, 0.56616, 0.57355, 0.57781, 0.57281, 0.57242, 0.57612, 0.57656], 
    [0.46865, 0.43697, 0.46434, 0.43985, 0.45447, 0.46889, 0.45759, 0.47323, 0.45896, 0.47645],
    [0.15201, 0.15205, 0.17925, 0.18203, 0.18873, 0.18223, 0.18251, 0.18776, 0.18347, 0.18828]   
]

class_labels = ['CO', 'C', 'DC', 'P', 'CC', 'DO']
ensemble_numbers = list(range(1, 11))

# Plotting
plt.figure(figsize=(12, 8))

for i, class_data in enumerate(ap50_95):
    plt.plot(ensemble_numbers, class_data, marker='o', label=class_labels[i])

plt.xlabel('m')
plt.ylabel('AP@50-95')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.grid(True)
plt.xticks(ensemble_numbers)
plt.savefig(os.path.join(directory, 'ap50_95.png'))
plt.close()

## LINE GRAPH: ensemble mAP50 and mAP50-95

In [6]:
directory = 'plots_ensemble\line_ensemble_marco'
if not os.path.exists(directory):
    os.makedirs(directory)

mAP50 = [
    0.5881560695350004,
    0.5810491240762302,
    0.6013339227616193,
    0.5976730390733397,
    0.604644946767772, 
    0.6118167600960082,
    0.6086957300745839,
    0.6122024801273492,
    0.6084286872474781,
    0.613490171224863
]

mAP50_95 = [
    0.41449089642074044,
    0.4184056209535339, 
    0.4375304800271527, 
    0.4357746687133396, 
    0.44055931145634075,
    0.4449336908209816, 
    0.44380210196494635,
    0.44707498183903294,
    0.44386615864176915,
    0.4487471580725686
]

ensemble_numbers = list(range(1, 11))

# Plotting
plt.figure(figsize=(12, 8))
plt.plot(ensemble_numbers, mAP50, marker='o', label='mAP@50')
plt.plot(ensemble_numbers, mAP50_95, marker='o', label='mAP@50-95')
plt.xlabel('m')
plt.ylabel('mAP')
plt.grid(True)
plt.xticks(ensemble_numbers)
plt.legend()
plt.savefig(os.path.join(directory, 'map50_map50_95.png'))
plt.close()

## CALIBRATION PLOT: model/ensemble error

In [7]:
directory = 'plots_uncertainty\calibration_accuracy_conf_model_ensemble'
if not os.path.exists(directory):
    os.makedirs(directory)

plt.figure(figsize=(10, 8))
for i in range(1, m + 1):
    pred, gt = load(f"YOLOv9c_predictions_0.01\{i}\labels", "datasets\crystals\labels\\test")
    fuzzy(pred, 1)
    match(pred, gt)

    conf_binned = binning(pred)
    conf_means, positives_ratios, ece = calibration(conf_binned)

    plt.plot(conf_means, positives_ratios, linewidth=1, marker='o', markersize=2, label=f'Model {i} (ECE: {ece:.3f})')


pred, gt = load(f"YOLOv9c_predictions_0.01\ensemble_10", "datasets\crystals\labels\\test")
fuzzy(pred, 1)
match(pred, gt)
conf_binned = binning(pred)
conf_means, positives_ratios, ece = calibration(conf_binned)

plt.plot(conf_means, positives_ratios, linewidth=2, marker='o', markersize=4, label=f'Ensemble (ECE: {ece:.3f})', color='black')
plt.plot([0, 1], [0, 1], color='0.7', linestyle='--')
plt.xlabel('Means of Binned Confidences')
plt.ylabel('Fraction of Positives')
plt.title(f"m = {i}")
plt.grid(True)
plt.legend(loc='best')
plt.savefig(os.path.join(directory, f'{i}.png'))
plt.close()

## CALIBRATION PLOT: ensemble error

In [8]:
directory = 'plots_uncertainty\calibration_accuracy_conf_ensemble'
if not os.path.exists(directory):
    os.makedirs(directory)

for i in range(1, m + 1):
    pred, gt = load(f"YOLOv9c_predictions_0.01\ensemble_{i}", "datasets\crystals\labels\\test")
    fuzzy(pred, 1)
    match(pred, gt)

    plt.clf()

    conf_binned = binning(pred)
    conf_means, positives_ratios, ece = calibration(conf_binned)

    plt.plot(conf_means, positives_ratios, linewidth=2, marker='o', markersize=5,label=f'ECE: {ece:.3f}', color='black')
    plt.plot([0, 1], [0, 1], color='0.7', linestyle='--')
    plt.xlabel('Means of Binned Confidences')
    plt.ylabel('Fraction of Positives')
    plt.title(f"m = {i}")
    plt.grid(True)
    plt.legend(loc='best')
    plt.savefig(os.path.join(directory, f'{i}.png'))
    plt.close()

## SCATTER PLOT: error_conf
Error as a function of confidence, highlighting correct classifications (green), misclassifications (red), and the number of ensemble members (m).
 


In [9]:
directory = 'plots_uncertainty\scatter_error_conf'
if not os.path.exists(directory):
    os.makedirs(directory)

for i in range(1, m + 1):
    pred, gt = load(f"YOLOv9c_predictions_0.01\ensemble_{i}", "datasets\crystals\labels\\test")
    fuzzy(pred, 1)
    match(pred, gt)

    plt.clf()

    data = [(subsub[9], subsub[10], len(subsub)) for sublist in pred for subsub in sublist if len(subsub) in {11, 12}]
    x, y, subsub_length = zip(*data)
    colors = ['green' if length == 12 else 'red' for length in subsub_length]
    coefficients = np.polyfit(x, y, 1)
    trendline = np.poly1d(coefficients)

    plt.figure(dpi=500)
    plt.scatter(x, y, c=colors, s=5)
    plt.plot(x, trendline(x), color='black', linestyle='-', label='Trendline')
    plt.xlabel('Confidence')
    plt.ylabel('Error')
    plt.title(f"m = {i}")
    plt.xlim(0, 1)
    plt.ylim(0, 0.45)
    plt.savefig(os.path.join(directory, f'{i}.png'))
    plt.close()

<Figure size 640x480 with 0 Axes>

## SCATTER PLOT: error_iou

In [10]:
directory = 'plots_uncertainty\scatter_error_iou'
if not os.path.exists(directory):
    os.makedirs(directory)

for i in range(1, m + 1):
    pred, gt = load(f"YOLOv9c_predictions_0.01\ensemble_{i}", "datasets\crystals\labels\\test")
    fuzzy(pred, 1)
    match(pred, gt)

    plt.clf()

    data = [(subsub[11], subsub[10], len(subsub)) for sublist in pred for subsub in sublist if len(subsub) in {12}]
    x, y, subsub_length = zip(*data)
    colors = ['green' if length == 12 else 'red' for length in subsub_length]
    coefficients = np.polyfit(x, y, 1)
    trendline = np.poly1d(coefficients)

    plt.figure(dpi=500)
    plt.scatter(x, y, c=colors, s=5)
    plt.plot(x, trendline(x), color='black', linestyle='-', label='Trendline')
    plt.xlabel('IoU')
    plt.ylabel('Error')
    plt.title(f"m = {i}")
    plt.xlim(0.55, 1)
    plt.ylim(0, 0.45)
    plt.savefig(os.path.join(directory, f'{i}.png'))
    plt.close()

<Figure size 640x480 with 0 Axes>

# OOD: marco vs coco

In [12]:
directory = 'plots_uncertainty\histogram_ood_0.15_noother'
if not os.path.exists(directory):
    os.makedirs(directory)

for i in range(1, m + 1):

    pred_id, _ = load(f"YOLOv9c_predictions_0.15\ensemble_{i}", None)
    pred_ood, _ = load(f"YOLOv9c_predictions_0.15_coco\ensemble_{i}", None)
    fuzzy(pred_id, 2)
    match(pred_id, gt)
    fuzzy(pred_ood, 2)

    plt.clf()

    error_id = []
    error_ood = []
    for id, ood in zip(pred_id, pred_ood):
        for j in id:
            if j[0] != 0 and j[0] != 5:
                error_id.append(j[10])
        for k in ood:
            if k[0] != 0 and k[0] != 5:
                error_ood.append(k[10]) 

    plt.figure(dpi=500)
    bin_edges = np.linspace(0, 1, 51)[1:]
    plt.hist(error_id, bins=bin_edges, density=True, alpha=0.5, label='In-Distribution', edgecolor='black', color='green')
    plt.hist(error_ood, bins=bin_edges, density=True, alpha=0.5, label='Out-of-Distribution', edgecolor='black', color='red')
    plt.xlabel('Error')
    plt.ylabel('Density')
    plt.title(f"m = {i}")
    plt.xlim(0, 1)
    plt.ylim(0, 12)
    plt.legend()
    plt.savefig(os.path.join(directory, f'{i}.png'))
    plt.close()

<Figure size 640x480 with 0 Axes>