In [1]:
import torch
def compute_ece(probabilities, labels, num_bins=20, div_factor=None):

    labels = torch.as_tensor(labels)
    confidences = torch.max(probabilities, dim=1)[0]

    #device checker
    if labels.device != probabilities.device:
        labels = labels.to(probabilities.device)

    denom = confidences.shape[0]
    if div_factor is not None:
       denom = div_factor

    predictions = torch.argmax(probabilities, dim=1)
    #accuracies = predictions == labels
    accuracies = predictions.eq(labels)

    bin_boundaries = torch.linspace(0, 1, num_bins+1)
    ece = 0.0
    for bin_lower, bin_upper in zip(bin_boundaries[:-1], bin_boundaries[1:]):
        # samples in current bin
        in_bin = (confidences > bin_lower) & (confidences <= bin_upper)
        bin_size = torch.sum(in_bin).item()
        if bin_size > 0:
            accuracy_in_bin = torch.mean(accuracies[in_bin].float()).item()
            confidence_in_bin = torch.mean(confidences[in_bin]).item()
            ece += abs(accuracy_in_bin - confidence_in_bin) * (bin_size / denom)
    return ece

In [4]:
import numpy as np

# Binary Classification
samples = np.array([[0.78, 0.22],
                    [0.36, 0.64],
                    [0.08, 0.92],
                    [0.58, 0.42],
                    [0.49, 0.51],
                    [0.85, 0.15],
                    [0.30, 0.70],
                    [0.63, 0.37],
                    [0.17, 0.83]])

true_labels = np.array([0,1,0,0,0,0,1,1,1])
samples = torch.tensor(samples)
true_labels = torch.tensor(true_labels)

In [11]:
compute_ece(samples, true_labels, num_bins=5)

0.10444443782170612

In [7]:
samples_multi = np.array([[0.25,0.2,0.22,0.18,0.15],
                          [0.16,0.06,0.5,0.07,0.21],
                          [0.06,0.03,0.8,0.07,0.04],
                          [0.02,0.03,0.01,0.04,0.9],
                          [0.4,0.15,0.16,0.14,0.15],
                          [0.15,0.28,0.18,0.17,0.22],
                          [0.07,0.8,0.03,0.06,0.04],
                          [0.1,0.05,0.03,0.75,0.07],
                          [0.25,0.22,0.05,0.3,0.18],
                          [0.12,0.09,0.02,0.17,0.6]])

true_labels_multi = np.array([0,2,3,4,2,0,1,3,3,2])

samples_multi = torch.tensor(samples_multi)
true_labels_multi = torch.tensor(true_labels_multi)

In [10]:
compute_ece(samples_multi, true_labels_multi, num_bins=5)

0.13199999403953555