In [127]:
import pycalib
import iteround
import numpy as np
import pandas as pd
from scipy.stats import dirichlet

In [128]:
def get_scores(alpha, size, random_state):
    np.random.seed(random_state)
    scores = dirichlet.rvs(alpha, size)
    rounded_scores = np.array(
        [iteround.saferound(s, 1) for s in scores]
    )
    return rounded_scores

In [129]:
n_class = 10

In [130]:
scores_1 = get_scores([5, 1, 1], n_class, 42)
labels_1 = np.random.choice(3, size=(n_class, 1), p=[0.6, 0.2, 0.2])

In [131]:
scores_2 = get_scores([1, 3, 2], n_class, 43)
labels_2 = np.random.choice(3, size=(n_class, 1), p=[0.2, 0.5, 0.3])
random_instance = np.random.choice(n_class)
scores_2[random_instance] = np.around(np.ones(3) / 3, 2)
labels_2[random_instance] = 0

In [132]:
scores_3 = get_scores([1, 2, 3], n_class, 44)
labels_3 = np.random.choice(3, size=(n_class, 1), p=[0.2, 0.3, 0.5])
scores_3[random_instance] = np.around(np.ones(3) / 3, 2)
labels_3[random_instance] = 0

In [133]:
scores = np.vstack([scores_1, scores_2, scores_3])
labels = np.vstack([labels_1, labels_2, labels_3]) + 1

In [134]:
toy = pd.DataFrame(
    np.hstack([scores, labels]),
    columns=['$\hat{p}_1$', '$\hat{p}_2$', '$\hat{p}_3$', '$y$']
).astype(
    {
        '$\hat{p}_1$': float,
        '$\hat{p}_2$': float,
        '$\hat{p}_3$': float,
        '$y$': int
    }
)

In [176]:
sorted_toy = toy.sort_values(
    by=['$y$', '$\hat{p}_1$', '$\hat{p}_2$', '$\hat{p}_3$'],
    ascending=[True, False, False, False]
)

for i in range(3):
    init = 10 * i
    end = 10 * (i + 1)
    print(
        sorted_toy.iloc[init:end].set_index(
            np.arange(init + 1, end + 1)
        ).to_latex(escape=False), 
        '\hfill'
    )

\begin{tabular}{lrrrr}
\toprule
{} &  $\hat{p}_1$ &  $\hat{p}_2$ &  $\hat{p}_3$ &  $y$ \\
\midrule
1  &         1.00 &         0.00 &         0.00 &    1 \\
2  &         0.90 &         0.10 &         0.00 &    1 \\
3  &         0.80 &         0.10 &         0.10 &    1 \\
4  &         0.70 &         0.10 &         0.20 &    1 \\
5  &         0.60 &         0.30 &         0.10 &    1 \\
6  &         0.40 &         0.10 &         0.50 &    1 \\
7  &         0.33 &         0.33 &         0.33 &    1 \\
8  &         0.33 &         0.33 &         0.33 &    1 \\
9  &         0.20 &         0.40 &         0.40 &    1 \\
10 &         0.10 &         0.50 &         0.40 &    1 \\
\bottomrule
\end{tabular}
 \hfill
\begin{tabular}{lrrrr}
\toprule
{} &  $\hat{p}_1$ &  $\hat{p}_2$ &  $\hat{p}_3$ &  $y$ \\
\midrule
11 &          0.8 &          0.2 &          0.0 &    2 \\
12 &          0.7 &          0.0 &          0.3 &    2 \\
13 &          0.5 &          0.2 &          0.3 &    2 \\
14 &          

In [175]:
for i in range(3):
    init = 10 * i
    end = 10 * (i + 1)
    temp_toy = sorted_toy.iloc[init:end].set_index(np.arange(init + 1, end + 1))
    temp_toy['$y$'] = (temp_toy['$y$'] == 1).astype(int)
    temp_toy['$\hat{p}_0$'] = temp_toy['$\hat{p}_2$'] + temp_toy['$\hat{p}_3$']
    print(
        temp_toy.to_latex(
            escape=False,
            columns=['$\hat{p}_1$', '$\hat{p}_0$', '$y$']
        ), 
        '\hfill'
    )

\begin{tabular}{lrrr}
\toprule
{} &  $\hat{p}_1$ &  $\hat{p}_0$ &  $y$ \\
\midrule
1  &         1.00 &         0.00 &    1 \\
2  &         0.90 &         0.10 &    1 \\
3  &         0.80 &         0.20 &    1 \\
4  &         0.70 &         0.30 &    1 \\
5  &         0.60 &         0.40 &    1 \\
6  &         0.40 &         0.60 &    1 \\
7  &         0.33 &         0.66 &    1 \\
8  &         0.33 &         0.66 &    1 \\
9  &         0.20 &         0.80 &    1 \\
10 &         0.10 &         0.90 &    1 \\
\bottomrule
\end{tabular}
 \hfill
\begin{tabular}{lrrr}
\toprule
{} &  $\hat{p}_1$ &  $\hat{p}_0$ &  $y$ \\
\midrule
11 &          0.8 &          0.2 &    0 \\
12 &          0.7 &          0.3 &    0 \\
13 &          0.5 &          0.5 &    0 \\
14 &          0.4 &          0.6 &    0 \\
15 &          0.4 &          0.6 &    0 \\
16 &          0.3 &          0.7 &    0 \\
17 &          0.2 &          0.8 &    0 \\
18 &          0.1 &          0.9 &    0 \\
19 &          0.1 &       

In [256]:
def get_bins(df, pos_label, n_bins, bin_edges=None):
    positive_scores = df[f'$\hat{{p}}_{pos_label}$'].values
    positive_labels = (df['$y$'] == pos_label).astype(int).values

    if bin_edges is None:
        (counts, bin_edges) = np.histogram(
            positive_scores, 
            bins=n_bins
        )

    bin_idx = np.clip(
        np.digitize(positive_scores, bin_edges), 
        0, n_bins
    ) - 1

    bins_scores, bins_labels = [], []

    for b in range(n_bins):
        bins_scores.append(sorted(positive_scores[bin_idx == b]))
        bins_labels.append(sorted(positive_labels[bin_idx == b]))

    bins_df = pd.DataFrame(
        {
            '$B_i$': [f'$B_{i + 1}$' for i in range(n_bins)],
            '$|B_{i}|$': [
                len(bs) for bs in bins_scores
            ],
            'scores': bins_scores,
            '$\Bar{p}(B_{i})$': [
                '{}/{}'.format(np.around(np.sum(bs), 1), len(bs)) for bs in bins_scores
            ],
            '$\Bar{p}(B_{i}2)$': [
                np.around(np.mean(bs), 2) for bs in bins_scores
            ],
            'labels': bins_labels,
            '$\Bar{y}(B_{i})$': [
                '{}/{}'.format(np.around(np.sum(bl), 1), len(bl)) for bl in bins_labels
            ],
            '$\Bar{y}(B_{i}2)$': [
                np.around(np.mean(bl), 2) for bl in bins_labels
            ]
        }
    )
        
    print(
        bins_df.drop(['$\Bar{p}(B_{i}2)$', '$\Bar{y}(B_{i}2)$'], axis=1).to_latex(
            escape=False, 
            index=False
        ).replace('[', '').replace(']', '').replace('0.33', '1/3')
    )
    return bins_df

In [257]:
n_bins = 5
bins_positive = get_bins(sorted_toy, 1, n_bins)

\begin{tabular}{lrllll}
\toprule
 $B_i$ &  $|B_{i}|$ &                                      scores & $\Bar{p}(B_{i})$ &                    labels & $\Bar{y}(B_{i})$ \\
\midrule
 $B_1$ &          7 &         0.0, 0.0, 0.0, 0.0, 0.1, 0.1, 0.1 &            0.3/7 &     0, 0, 0, 0, 0, 0, 1 &              1/7 \\
 $B_2$ &          8 &  0.2, 0.2, 0.2, 0.2, 0.3, 0.3, 1/3, 1/3 &            2.1/8 &  0, 0, 0, 0, 0, 1, 1, 1 &              3/8 \\
 $B_3$ &          6 &              0.4, 0.4, 0.4, 0.5, 0.6, 0.6 &            2.9/6 &        0, 0, 0, 0, 1, 1 &              2/6 \\
 $B_4$ &          2 &                                  0.7, 0.7 &            1.4/2 &                    0, 1 &              1/2 \\
 $B_5$ &          7 &         0.8, 0.8, 0.8, 0.8, 0.8, 0.9, 1.0 &            5.9/7 &     0, 0, 0, 0, 1, 1, 1 &              3/7 \\
\bottomrule
\end{tabular}



In [201]:
print(
    bins_positive.to_latex(
        columns=['$B_i$', '$\Bar{p}(B_{i}2)$', '$\Bar{y}(B_{i}2)$', '$|B_{i}|$'],
        escape=False, 
        index=False
    ).replace('{i}2', '{i}')
)

\begin{tabular}{lrrr}
\toprule
 $B_i$ &  $\Bar{p}(B_{i})$ &  $\Bar{y}(B_{i})$ &  $|B_{i}|$ \\
\midrule
 $B_1$ &               0.04 &               0.14 &          7 \\
 $B_2$ &               0.26 &               0.38 &          8 \\
 $B_3$ &               0.48 &               0.33 &          6 \\
 $B_4$ &               0.70 &               0.50 &          2 \\
 $B_5$ &               0.84 &               0.43 &          7 \\
\bottomrule
\end{tabular}



In [217]:
def calculate_binary_ece(bins):
    abs_diff = (bins['$\Bar{y}(B_{i}2)$'] - bins['$\Bar{p}(B_{i}2)$']).abs()
    mul = abs_diff * bins['$|B_{i}|$']
    numerator = ' + '.join(
        [
            '{} \cdot {}'.format(b, np.around(ad, 2)) for (b, ad) in zip(
                bins['$|B_{i}|$'], abs_diff
            )
        ]
    )
    
    return np.around(mul.sum() / bins['$|B_{i}|$'].sum(), 4), numerator

In [218]:
print(calculate_binary_ece(bins_positive))

(0.1943, '7 \\cdot 0.1 + 8 \\cdot 0.12 + 6 \\cdot 0.15 + 2 \\cdot 0.2 + 7 \\cdot 0.41')


In [247]:
def get_confidence_df(df):
    conf_values = df.drop('$y$', axis=1).set_index(
        np.arange(1, 31)
    ).max(axis=1)
    
    conf_columns = df.drop('$y$', axis=1).set_index(
        np.arange(1, 31)
    ).idxmax(axis=1).values
    
    predictions = np.array([int(col[-2]) for col in conf_columns])
    correct = (df['$y$'].values == predictions).astype(int)
    
    return pd.DataFrame(
        {
            '$\hat{p}_1$': conf_values,
            '$y$': correct
        }
    )

In [248]:
conf_df = get_confidence_df(sorted_toy)

In [249]:
for i in range(3):
    init = 10 * i
    end = 10 * (i + 1)
    print(
        conf_df.iloc[init:end].set_index(
            np.arange(init + 1, end + 1)
        ).to_latex(header=['confidence', 'correct'], index=False, escape=False), 
        '\hfill'
    )

\begin{tabular}{rr}
\toprule
confidence & correct \\
\midrule
      1.00 &       1 \\
      0.90 &       1 \\
      0.80 &       1 \\
      0.70 &       1 \\
      0.60 &       1 \\
      0.50 &       0 \\
      0.33 &       1 \\
      0.33 &       1 \\
      0.40 &       0 \\
      0.50 &       0 \\
\bottomrule
\end{tabular}
 \hfill
\begin{tabular}{rr}
\toprule
confidence & correct \\
\midrule
       0.8 &       0 \\
       0.7 &       0 \\
       0.5 &       0 \\
       0.4 &       0 \\
       0.4 &       0 \\
       0.4 &       1 \\
       0.5 &       0 \\
       0.6 &       1 \\
       0.6 &       0 \\
       0.8 &       0 \\
\bottomrule
\end{tabular}
 \hfill
\begin{tabular}{rr}
\toprule
confidence & correct \\
\midrule
       0.8 &       0 \\
       0.8 &       0 \\
       0.8 &       0 \\
       0.6 &       0 \\
       0.7 &       1 \\
       0.6 &       0 \\
       0.4 &       0 \\
       0.6 &       1 \\
       0.7 &       1 \\
       0.7 &       1 \\
\bottomrule
\end{tabular}


In [258]:
bins_conf = get_bins(
    conf_df, 1, n_bins, bin_edges=np.linspace(0, 1, 6)
)

\begin{tabular}{lrllll}
\toprule
 $B_i$ &  $|B_{i}|$ &                                             scores & $\Bar{p}(B_{i})$ &                                         labels & $\Bar{y}(B_{i})$ \\
\midrule
 $B_1$ &          0 &                                                  &            0.0/0 &                                              &            0.0/0 \\
 $B_2$ &          2 &                                       1/3, 1/3 &            0.7/2 &                                         1, 1 &              2/2 \\
 $B_3$ &         15 &  0.4, 0.4, 0.4, 0.4, 0.4, 0.5, 0.5, 0.5, 0.5, ... &           7.6/15 &  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1 &             4/15 \\
 $B_4$ &          5 &                          0.7, 0.7, 0.7, 0.7, 0.7 &            3.5/5 &                                0, 1, 1, 1, 1 &              4/5 \\
 $B_5$ &          8 &           0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.9, 1.0 &            6.7/8 &                       0, 0, 0, 0, 0, 1, 1, 1 &              3/8 \\
\

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


In [259]:
print(
    bins_conf.to_latex(
        columns=['$B_i$', '$\Bar{p}(B_{i}2)$', '$\Bar{y}(B_{i}2)$', '$|B_{i}|$'],
        escape=False, 
        index=False
    ).replace('{i}2', '{i}')
)

\begin{tabular}{lrrr}
\toprule
 $B_i$ &  $\Bar{p}(B_{i})$ &  $\Bar{y}(B_{i})$ &  $|B_{i}|$ \\
\midrule
 $B_1$ &                NaN &                NaN &          0 \\
 $B_2$ &               0.33 &               1.00 &          2 \\
 $B_3$ &               0.51 &               0.27 &         15 \\
 $B_4$ &               0.70 &               0.80 &          5 \\
 $B_5$ &               0.84 &               0.38 &          8 \\
\bottomrule
\end{tabular}



In [260]:
print(calculate_binary_ece(bins_conf))

(0.304, '0 \\cdot nan + 2 \\cdot 0.67 + 15 \\cdot 0.24 + 5 \\cdot 0.1 + 8 \\cdot 0.46')


In [261]:
bins_class_2 = get_bins(sorted_toy, 2, n_bins)

\begin{tabular}{lrllll}
\toprule
 $B_i$ &  $|B_{i}|$ &                                             scores & $\Bar{p}(B_{i})$ &                          labels & $\Bar{y}(B_{i})$ \\
\midrule
 $B_1$ &         10 &  0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.1, 0.1, 0.1, ... &           0.5/10 &  0, 0, 0, 0, 0, 0, 0, 0, 0, 1 &             1/10 \\
 $B_2$ &          5 &                          0.2, 0.2, 0.2, 0.2, 0.2 &            1.0/5 &                 0, 1, 1, 1, 1 &              4/5 \\
 $B_3$ &          7 &              0.3, 0.3, 0.3, 0.3, 0.3, 1/3, 1/3 &            2.2/7 &           0, 0, 0, 0, 0, 1, 1 &              2/7 \\
 $B_4$ &          5 &                          0.4, 0.4, 0.4, 0.4, 0.4 &            2.0/5 &                 0, 0, 0, 1, 1 &              2/5 \\
 $B_5$ &          3 &                                    0.5, 0.6, 0.6 &            1.7/3 &                       0, 0, 1 &              1/3 \\
\bottomrule
\end{tabular}



In [262]:
bins_class_3 = get_bins(sorted_toy, 3, n_bins)

\begin{tabular}{lrllll}
\toprule
 $B_i$ &  $|B_{i}|$ &                                    scores & $\Bar{p}(B_{i})$ &                    labels & $\Bar{y}(B_{i})$ \\
\midrule
 $B_1$ &          7 &       0.0, 0.0, 0.0, 0.0, 0.1, 0.1, 0.1 &            0.3/7 &     0, 0, 0, 0, 0, 1, 1 &              2/7 \\
 $B_2$ &          8 &  0.2, 0.2, 0.2, 0.2, 0.3, 0.3, 0.3, 0.3 &            2.0/8 &  0, 0, 0, 0, 0, 0, 1, 1 &              2/8 \\
 $B_3$ &          7 &     1/3, 1/3, 0.4, 0.4, 0.4, 0.4, 0.4 &            2.7/7 &     0, 0, 0, 0, 0, 1, 1 &              2/7 \\
 $B_4$ &          4 &                      0.5, 0.5, 0.6, 0.6 &            2.2/4 &              0, 0, 0, 1 &              1/4 \\
 $B_5$ &          4 &                      0.7, 0.7, 0.7, 0.8 &            2.9/4 &              0, 1, 1, 1 &              3/4 \\
\bottomrule
\end{tabular}



In [263]:
print(calculate_binary_ece(bins_class_2))

(0.1453, '10 \\cdot 0.05 + 5 \\cdot 0.6 + 7 \\cdot 0.02 + 5 \\cdot 0.0 + 3 \\cdot 0.24')


In [264]:
print(calculate_binary_ece(bins_class_3))

(0.1233, '7 \\cdot 0.25 + 8 \\cdot 0.0 + 7 \\cdot 0.09 + 4 \\cdot 0.3 + 4 \\cdot 0.03')


In [265]:
np.mean([0.1943, 0.1453, 0.1233])

0.15430000000000002