# Uncertainty Estimation
  - **0:** class
  - **1:** x_mean
  - **2:** x_std
  - **3:** y_mean
  - **4:** y_std
  - **5:** w_mean
  - **6:** w_std
  - **7:** h_mean
  - **8:** h_std
  - **9:** conf
  - **10:** uncertainty
  - **11** IoU

  **pred** hold all the predicitons of all images. \
  **p** holds all predictions of a single image. \
  **p2** is a single prediction in an image.


## Libraries

In [16]:
import os
import json
import torch
import numpy as np
import matplotlib.pyplot as plt
from ultralytics.utils.ops import xywh2xyxy
from torchvision import ops

## Functions

In [17]:
def load(predictions_path, gt_path):

    predictions = []
    gt = []

    for file in os.listdir(predictions_path):
        prediction_file_path = os.path.join(predictions_path, file)
        prediction_name = os.path.splitext(file)[0]

        # load ensemble predictions
        try:
            with open(prediction_file_path, "r") as f:
                data = json.load(f)
                predictions.append(data)
        # load ensemble member predictions
        except:
            with open(prediction_file_path, "r") as f:
                p = []
                for line in f:
                    pred_line_data = [float(value) for value in line.split()]
                    p.append([int(pred_line_data[0]), 
                                        pred_line_data[1], 0,
                                        pred_line_data[2], 0,
                                        pred_line_data[3], 0,
                                        pred_line_data[4], 0,
                                        pred_line_data[5]])
            predictions.append(p)

        # load ground truths
        if gt_path:
            with open(os.path.join(gt_path, prediction_name + ".txt"), "r") as f:
                g = []
                for line in f:
                    gt_line_data = [float(value) for value in line.split()]
                    g.append(gt_line_data)
            gt.append(g)

    return predictions, gt

def fuzzy(pred, std):
    
    for i in pred:

        image = []

        for p in i:

            # x, y are coordinates
            # w, h are distance
            x, y, w, h = p[1], p[3], p[5], p[7]
            x_std, y_std, w_std, h_std = p[2], p[4], p[6], p[8]

            # add x standard diviations to the original w and h
            w_fuzzy = w + w_std * std
            h_fuzzy = h + h_std * std

            # add the distance betwen points x,y and x,y+std to w and h
            w_fuzzy += abs(x + x_std * std -  x)
            h_fuzzy += abs(y + y_std * std -  y)

            box1 = xywh2xyxy(torch.tensor([[x, y, w, h]], dtype=torch.float))
            box2 = xywh2xyxy(torch.tensor([[x, y, w_fuzzy, h_fuzzy]], dtype=torch.float))
            iou = 1 - ops.box_iou(box1, box2).numpy()[0][0]
            p.append(iou)
            image.append(p)

def match(pred, gt):

    for i, pred in enumerate(pred):
        for p in pred:
            saved_iou = 0
            for g in gt[i]:
                box1 = xywh2xyxy(torch.tensor([[p[1], p[3] ,p[5] ,p[7]]], dtype=torch.float))
                box2 = xywh2xyxy(torch.tensor([g[1:]], dtype=torch.float))
                iou = ops.box_iou(box1, box2).numpy()[0][0]
                
                if p[0] == g[0] and iou > saved_iou:
                    saved_iou = iou

            if saved_iou == 0:
                p.append(0)
            else:
                p.append(saved_iou)

def binning(pred):
    bins = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1]
    bins_conf = [[] for _ in range(len(bins) - 1)]

        # bin confidence scores
    for p in pred:
        for p2 in p:
            for j, bin_start in enumerate(bins):
                bin_end = bins[j + 1]
                if bin_start <= p2[9] < bin_end:
                    if len(p2) == 11:
                        bins_conf[j].append((p2[9], None))
                    else:
                        bins_conf[j].append((p2[9], p2[11]))
                    break
    
    return bins_conf

def calibration(conf_binned):
    ece = 0
    conf_means = []
    positives_ratios = []
    total_length = sum(len(bin) for bin in conf_binned)

    for bin in conf_binned:

        if not bin:
            continue

        # mean conf per bin
        confidence = [conf[0] for conf in bin]
        conf_mean = np.mean(confidence)
        conf_means.append(conf_mean)

        # TP per bin
        positives = 0
        for p in bin:
            if p[1] > 0.55:
                positives += 1
        
        positives_ratio = positives / (len(bin) + 1e-16)
        positives_ratios.append(positives_ratio)
        ece += 1/total_length * len(bin) * abs(positives_ratio - conf_mean)

    return conf_means, positives_ratios, ece

## Load Data and Save Metrics
 - Loads the predictions and ground truths into two lists.
 - Adds a fuzzy uncertainty value at the end of each prediction.
 - Add a IoU score at the end for each correct prediction.

In [18]:
m = 10
class_labels = ['Clustered Other', 'Clear', 'Discrete Crystal', 'Precipitate', 'Clustered Crystal', 'Discrete Other']


# pred, gt = load("YOLOv9c_predictions_0.15\\1\labels", "datasets\crystals\labels\\test")
# fuzzy(pred, 1)
# match(pred, gt)


# pred, gt = load("YOLOv9c_predictions_0.01\ensemble_10", "datasets\crystals\labels\\test")
# fuzzy(pred, 1)
# match(pred, gt)

## LINE GRAPH: ensemble marco AP50

In [19]:
directory = 'plots_ensemble\line_ensemble_marco'
if not os.path.exists(directory):
    os.makedirs(directory)

ap50 = [
    [0.51466, 0.53805, 0.54445, 0.554, 0.53853, 0.55546, 0.55116, 0.55082, 0.5433, 0.54572],  # CO
    [0.76113, 0.74806, 0.76948, 0.7745, 0.76738, 0.76061, 0.76526, 0.76702, 0.76507, 0.76456],  # C
    [0.63499, 0.64195, 0.67644, 0.66844, 0.68662, 0.69785, 0.69789, 0.70418, 0.69876, 0.70895],  # DC
    [0.7561, 0.75115, 0.75418, 0.74995, 0.76367, 0.76516, 0.75868, 0.75663, 0.7628, 0.76404],  # P
    [0.50789, 0.4748, 0.50451, 0.47635, 0.49702, 0.51582, 0.50735, 0.51933, 0.50977, 0.5166],  # CC
    [0.35416, 0.33228, 0.35894, 0.36279, 0.37465, 0.37599, 0.37182, 0.37523, 0.37087, 0.38107]   # DO
]

ensemble_numbers = list(range(1, 11))

# Plotting
plt.figure(dpi=500)
plt.figure(figsize=(8, 8)) 

for i, class_data in enumerate(ap50):
    plt.plot(ensemble_numbers, class_data, marker='o', label=class_labels[i])

plt.xlabel('m')
plt.ylabel('AP@50')
plt.legend()
plt.grid(True)
plt.xticks(ensemble_numbers)
plt.ylim(0, 1)
plt.savefig(os.path.join(directory, 'ap50.png'))
plt.close()


<Figure size 3200x2400 with 0 Axes>

## LINE GRAPH: ensemble icebear AP50

In [20]:
directory = 'plots_ensemble\line_ensemble_icebear'
if not os.path.exists(directory):
    os.makedirs(directory)

ap50 = [
    [0.2282, 0.26357, 0.27709, 0.31397, 0.31927, 0.32261, 0.30949, 0.31771, 0.33555, 0.34384],
    [0.68463, 0.64879, 0.65379, 0.66647, 0.65973, 0.66488, 0.67457, 0.67712, 0.66917, 0.66451],
    [0.46933, 0.49607, 0.48057, 0.48674, 0.49784, 0.49975, 0.49793, 0.50198, 0.50793, 0.51128],
    [0.61244, 0.63637, 0.6727, 0.65769, 0.66936, 0.65707, 0.66379, 0.65464, 0.65462, 0.66396],
    [0.24729, 0.28702, 0.27674, 0.31108, 0.32734, 0.29223, 0.28456, 0.30206, 0.31927, 0.31974],
    [0.161, 0.19993, 0.25644, 0.23183, 0.26701, 0.2585, 0.27248, 0.27172, 0.29246, 0.31719]
]


ensemble_numbers = list(range(1, 11))

# Plotting
plt.figure(dpi=500)
plt.figure(figsize=(8, 8)) 

for i, class_data in enumerate(ap50):
    plt.plot(ensemble_numbers, class_data, marker='o', label=class_labels[i])

plt.xlabel('m')
plt.ylabel('AP@50')
plt.legend()
plt.grid(True)
plt.xticks(ensemble_numbers)
plt.ylim(0, 1)
plt.savefig(os.path.join(directory, 'ap50.png'))
plt.close()

<Figure size 3200x2400 with 0 Axes>

## LINE GRAPH: ensemble marco AP50-95

In [21]:
directory = 'plots_ensemble\line_ensemble_marco'
if not os.path.exists(directory):
    os.makedirs(directory)

ap50_95 = [
    [0.37803, 0.39993, 0.41041, 0.42429, 0.41917, 0.43021, 0.43159, 0.43556, 0.43276, 0.43621],
    [0.62568, 0.61972, 0.63969, 0.6446, 0.64719, 0.64731, 0.65138, 0.64869, 0.64919, 0.64463], 
    [0.31443, 0.3405, 0.36067, 0.35771, 0.36026, 0.36314, 0.36693, 0.3648, 0.36269, 0.37036], 
    [0.54816, 0.56127, 0.57083, 0.56616, 0.57355, 0.57781, 0.57281, 0.57242, 0.57612, 0.57656], 
    [0.46865, 0.43697, 0.46434, 0.43985, 0.45447, 0.46889, 0.45759, 0.47323, 0.45896, 0.47645],
    [0.15201, 0.15205, 0.17925, 0.18203, 0.18873, 0.18223, 0.18251, 0.18776, 0.18347, 0.18828]   
]

ensemble_numbers = list(range(1, 11))

# Plotting
plt.figure(dpi=500)
plt.figure(figsize=(8, 8)) 

for i, class_data in enumerate(ap50_95):
    plt.plot(ensemble_numbers, class_data, marker='o', label=class_labels[i])

plt.xlabel('m')
plt.ylabel('AP@50-95')
plt.legend()
plt.grid(True)
plt.xticks(ensemble_numbers)
plt.ylim(0, 1)
plt.savefig(os.path.join(directory, 'ap50_95.png'))
plt.close()

<Figure size 3200x2400 with 0 Axes>

## LINE GRAPH: ensemble icebear AP50-95

In [22]:
directory = 'plots_ensemble\line_ensemble_icebear'
if not os.path.exists(directory):
    os.makedirs(directory)

ap50_95 = [
    [0.16606, 0.19145, 0.21614, 0.23295, 0.23982, 0.24561, 0.23848, 0.24266, 0.25432, 0.2604],
    [0.56287, 0.53081, 0.55072, 0.56661, 0.5624, 0.56705, 0.57147, 0.57978, 0.57168, 0.5694],
    [0.19687, 0.2099, 0.20799, 0.21283, 0.22044, 0.21636, 0.21935, 0.22066, 0.22339, 0.22444],
    [0.42917, 0.44799, 0.48473, 0.48182, 0.49356, 0.48897, 0.48957, 0.485, 0.4861, 0.49675],
    [0.20469, 0.22357, 0.21693, 0.24742, 0.24993, 0.22564, 0.2219, 0.22904, 0.25025, 0.2536],
    [0.080678, 0.11244, 0.14911, 0.12498, 0.1427, 0.14095, 0.14852, 0.14757, 0.15151, 0.16553]
]

ensemble_numbers = list(range(1, 11))

# Plotting
plt.figure(dpi=500)
plt.figure(figsize=(8, 8)) 

for i, class_data in enumerate(ap50_95):
    plt.plot(ensemble_numbers, class_data, marker='o', label=class_labels[i])

plt.xlabel('m')
plt.ylabel('AP@50-95')
plt.legend()
plt.grid(True)
plt.xticks(ensemble_numbers)
plt.ylim(0, 1)
plt.savefig(os.path.join(directory, 'ap50_95.png'))
plt.close()

<Figure size 3200x2400 with 0 Axes>

## LINE GRAPH: ensemble mAP50 and mAP50-95 on marco

In [23]:
directory = 'plots_ensemble\line_ensemble_marco'
if not os.path.exists(directory):
    os.makedirs(directory)

mAP50 = [
    0.5881560695350004,
    0.5810491240762302,
    0.6013339227616193,
    0.5976730390733397,
    0.604644946767772, 
    0.6118167600960082,
    0.6086957300745839,
    0.6122024801273492,
    0.6084286872474781,
    0.613490171224863
]

mAP50_95 = [
    0.41449089642074044,
    0.4184056209535339, 
    0.4375304800271527, 
    0.4357746687133396, 
    0.44055931145634075,
    0.4449336908209816, 
    0.44380210196494635,
    0.44707498183903294,
    0.44386615864176915,
    0.4487471580725686
]

ensemble_numbers = list(range(1, 11))

# Plotting
plt.figure(dpi=500)
plt.figure(figsize=(8, 8)) 
plt.plot(ensemble_numbers, mAP50, marker='o', label='mAP@50')
plt.plot(ensemble_numbers, mAP50_95, marker='o', label='mAP@50-95')
plt.xlabel('m')
plt.ylabel('mAP')
plt.grid(True)
plt.xticks(ensemble_numbers)
plt.ylim(0, 1)
plt.legend()
plt.savefig(os.path.join(directory, 'map50_map50_95.png'))
plt.close()

<Figure size 3200x2400 with 0 Axes>

## LINE GRAPH: ensemble mAP50 and mAP50-95 on icebear

In [25]:
directory = 'plots_ensemble\line_ensemble_icebear'
if not os.path.exists(directory):
    os.makedirs(directory)

mAP50 = [
    0.4004813798860482,
    0.42195743183364015,
    0.4362228563648598,
    0.44463001670238533,
    0.4567603658443213,
    0.4491722498124144,
    0.45046925593029136,
    0.45420437654341206,
    0.4631654043568287,
    0.470086848071936
]

mAP50_95 = [
    0.27338885341525104,
    0.28602736318331884,
    0.30427104374178365,
    0.311103458578635,
    0.3181416885633679,
    0.3140970370144899,
    0.3148821656780199,
    0.3174517339685291,
    0.32287552683437976,
    0.32836944034023025
]


ensemble_numbers = list(range(1, 11))

# Plotting
plt.figure(dpi=500)
plt.figure(figsize=(8, 8)) 
plt.plot(ensemble_numbers, mAP50, marker='o', label='mAP@50')
plt.plot(ensemble_numbers, mAP50_95, marker='o', label='mAP@50-95')
plt.xlabel('m')
plt.ylabel('mAP')
plt.grid(True)
plt.xticks(ensemble_numbers)
plt.ylim(0, 1)
plt.legend()
plt.savefig(os.path.join(directory, 'map50_map50_95.png'))
plt.close()

<Figure size 3200x2400 with 0 Axes>

## CALIBRATION PLOT: model/ensemble error

In [None]:
directory = 'plots_uncertainty\calibration_accuracy_conf_model_ensemble'
if not os.path.exists(directory):
    os.makedirs(directory)

plt.figure(figsize=(10, 8))
for i in range(1, m + 1):
    pred, gt = load(f"YOLOv9c_predictions_0.01\{i}\labels", "datasets\crystals\labels\\test")
    fuzzy(pred, 1)
    match(pred, gt)

    conf_binned = binning(pred)
    conf_means, positives_ratios, ece = calibration(conf_binned)

    plt.plot(conf_means, positives_ratios, linewidth=1, marker='o', markersize=2, label=f'Model {i}')
    print(f"Model {i}: " + str(ece))


pred, gt = load(f"YOLOv9c_predictions_0.01\ensemble_10", "datasets\crystals\labels\\test")
fuzzy(pred, 1)
match(pred, gt)
conf_binned = binning(pred)
conf_means, positives_ratios, ece = calibration(conf_binned)

# plt.figure(dpi=500)
plt.plot(conf_means, positives_ratios, linewidth=2, marker='o', markersize=4, label=f'Ensemble', color='black')
print(f"Ensemble: " + str(ece))
plt.plot([0, 1], [0, 1], color='0.7', linestyle='--')
plt.xlabel('Means of Binned Confidences')
plt.ylabel('Fraction of Positives')
plt.title(f"m = {i}")
plt.grid(True)
plt.legend(loc='best')
plt.savefig(os.path.join(directory, f'{i}.png'))
plt.close()

## CALIBRATION PLOT: ensemble error

In [None]:
directory = 'plots_uncertainty\calibration_accuracy_conf_ensemble'
if not os.path.exists(directory):
    os.makedirs(directory)

for i in range(1, m + 1):
    pred, gt = load(f"YOLOv9c_predictions_0.01\ensemble_{i}", "datasets\crystals\labels\\test")
    fuzzy(pred, 1)
    match(pred, gt)

    plt.clf()

    conf_binned = binning(pred)
    conf_means, positives_ratios, ece = calibration(conf_binned)

    plt.figure(dpi=500)
    plt.plot(conf_means, positives_ratios, linewidth=2, marker='o', markersize=5,label=f'ECE: {ece:.3f}', color='black')
    plt.plot([0, 1], [0, 1], color='0.7', linestyle='--')
    plt.xlabel('Means of Binned Confidences')
    plt.ylabel('Fraction of Positives')
    plt.title(f"m = {i}")
    plt.grid(True)
    plt.legend(loc='best')
    plt.savefig(os.path.join(directory, f'{i}.png'))
    plt.close()

## SCATTER PLOT: error_conf
Error as a function of confidence, highlighting correct classifications (green), misclassifications (red), and the number of ensemble members (m).
 


In [None]:
directory = 'plots_uncertainty\scatter_error_conf'
if not os.path.exists(directory):
    os.makedirs(directory)

for i in range(1, m + 1):
    pred, gt = load(f"YOLOv9c_predictions_0.01\ensemble_{i}", "datasets\crystals\labels\\test")
    fuzzy(pred, 1)
    match(pred, gt)

    plt.clf()

    data = [(subsub[9], subsub[10], subsub[11]) for sublist in pred for subsub in sublist]
    x, y, z = zip(*data)
    colors = ['green' if score > 0.55 else 'red' for score in z]
    
    coefficients = np.polyfit(x, y, 3)
    trendline = np.poly1d(coefficients)

    plt.figure(dpi=500)
    plt.scatter(x, y, c=colors, s=5)
    plt.plot(np.sort(x), trendline(np.sort(x)), color='black', label='Trendline')
    plt.xlabel('Confidence')
    plt.ylabel('Error')
    plt.title(f"m = {i}")
    plt.xlim(0, 1)
    plt.ylim(0, 0.45)
    plt.savefig(os.path.join(directory, f'{i}.png'))
    plt.close()

## SCATTER PLOT: error_iou

In [None]:
directory = 'plots_uncertainty\scatter_error_iou'
if not os.path.exists(directory):
    os.makedirs(directory)

for i in range(1, m + 1):
    pred, gt = load(f"YOLOv9c_predictions_0.01\ensemble_{i}", "datasets\crystals\labels\\test")
    fuzzy(pred, 1)
    match(pred, gt)

    plt.clf()

    data = [(subsub[11], subsub[10], len(subsub)) for sublist in pred for subsub in sublist]
    x, y, subsub_length = zip(*data)
    colors = ['green' if score > 0.55 else 'red' for score in x]

    coefficients = np.polyfit(x, y, 3)
    trendline = np.poly1d(coefficients)

    plt.figure(dpi=500)
    plt.scatter(x, y, c=colors, s=5)
    plt.plot(np.sort(x), trendline(np.sort(x)), color='black', label='Trendline')
    plt.xlabel('IoU')
    plt.ylabel('Error')
    plt.title(f"m = {i}")
    plt.xlim(0, 1)
    plt.ylim(0, 0.45)
    plt.savefig(os.path.join(directory, f'{i}.png'))
    plt.close()

# OOD: marco vs coco

In [None]:
directory = 'plots_uncertainty\histogram_ood_0.15_noother'
if not os.path.exists(directory):
    os.makedirs(directory)

for i in range(1, m + 1):

    pred_id, _ = load(f"YOLOv9c_predictions_0.15\ensemble_{i}", None)
    pred_ood, _ = load(f"YOLOv9c_predictions_0.15_coco\ensemble_{i}", None)
    fuzzy(pred_id, 1)
    fuzzy(pred_ood, 1)

    plt.clf()

    error_id = []
    error_ood = []
    for id, ood in zip(pred_id, pred_ood):
        for j in id:
            if j[0] != 0 and j[0] != 5:
                error_id.append(j[10])
        for k in ood:
            if k[0] != 0 and k[0] != 5:
                error_ood.append(k[10]) 

    plt.figure(dpi=500)
    bin_edges = np.linspace(0, 1, 51)[1:]
    plt.hist(error_id, bins=bin_edges, density=True, alpha=0.5, label='In-Distribution', edgecolor='black', color='green')
    plt.hist(error_ood, bins=bin_edges, density=True, alpha=0.5, label='Out-of-Distribution', edgecolor='black', color='red')
    plt.xlabel('Error')
    plt.ylabel('Density')
    plt.title(f"m = {i}")
    plt.xlim(0, 0.45)
    plt.ylim(0, 20)
    plt.legend()
    plt.savefig(os.path.join(directory, f'{i}.png'))
    plt.close()