# 2. Pseudo-real - tables - Supplementary

In [3]:
import numpy as np
import pandas as pd
from scipy import stats

from prepare_cifar5m_data import get_cifar5m

pd.set_option('display.max_rows', 500)
%load_ext autoreload
%autoreload 2

In [4]:
def correlation_finder(grp):
    d = {}
    d["spearman"] = stats.spearmanr(grp["c_hat_distance_p_debiased"], grp["p_distance_c"])[0]
    d["pearson"] = stats.pearsonr(grp["c_hat_distance_p_debiased"], grp["p_distance_c"])[0]
    return pd.Series(d, index=["spearman", "pearson"])

# Cifar5M calmap table (c^_dist_c)

In [8]:
def cifar_table(fname, binnings, metric = "c_hat_distance_c", 
                tag_groups = [["1vsRest1"], ["1vsRest3"],  ["1vsRest5"], ["confidence"]],
               cgt_nr = 3, is_std = False, check_cal = True):

    df_cifar5m = get_cifar5m()
    df_cifar5m = df_cifar5m[(df_cifar5m.binning.isin(binnings))]
    
    if check_cal:
        cal_sub = ['beta', 'VecS', 'logplatt','PW6logIOCE', 'ScalingBinning', 'Isotonic']
        df_cifar5m = df_cifar5m[df_cifar5m.calibration_function.isin(cal_sub)]
    
    df = pd.DataFrame()
    
    ranks = pd.DataFrame()

    # Construct the table row by row
    # For each row collect the data and the rankings (we will add them together later)
    for model_group in [['resnet110'], ['densenet40'], ['wide32']]:

        for tag_group in tag_groups:

            df_sel = df_cifar5m[df_cifar5m.tag_name.isin(tag_group)
                               &(df_cifar5m.cgt_nr == cgt_nr) # isotonic ground truth
                              &(df_cifar5m.model_name.isin(model_group))
                               ]
            # Data
            if is_std:
                df_row = (df_sel
                          .groupby(["binning", "seed"]).mean()
                          .groupby(["binning"]).std()
                          [[metric]].T)
                df_row = np.round(df_row*1000, 2)

                # Ranks
                rank_row = (df_sel
                          .groupby(["binning", "seed"]).mean()
                          .groupby(["binning"]).std().rank()
                             [[metric]].T)
                
            else:
            
                df_row = (df_sel
                          .groupby(["binning"]).mean()
                          [[metric]].T)
                df_row = np.round(df_row*1000, 2)

                # Ranks
                rank_row = (df_sel
                             .groupby(["binning"]).mean()
                             .rank()
                             [[metric]].T)

            df = pd.concat([df, df_row])
            ranks = pd.concat([ranks, rank_row])
            
    # Combine ranks and data
    for row_idx in range(len(df)):
        for column_idx in range(len(df.iloc[row_idx])):

            item = df.iloc[row_idx, column_idx]
            rank = ranks.iloc[row_idx, column_idx]

            item = np.round(item, 4)

            df.iloc[row_idx, column_idx] = str(item) + "_{" + str(int(rank)) + "}"
            if rank == 1:
                df.iloc[row_idx, column_idx] = "\mathbf{" + df.iloc[row_idx, column_idx] + "}"
            df.iloc[row_idx, column_idx] = "$" + df.iloc[row_idx, column_idx] + "$"
            
    # Add column titles on the left
    df.index =  ["cars vs rest", "cats vs rest", "dogs vs rest", "confidence"]*3
    df = pd.concat([pd.concat({"resnet110": df[:4]}, names=['Model']),
         pd.concat({"densenet40": df[4:8]}, names=['Model']),
         pd.concat({"wide32": df[8:12]}, names=['Model'])])
        
    df = df[binnings] 
    
    avg_ranks = ranks.mean()[binnings].values
    avg_ranks = ["$%0.1f$" % rnk for rnk in avg_ranks]
    df = pd.concat([df, pd.DataFrame([avg_ranks], index=[("","avg rank")], columns=df.columns)])
    
    display(df)
    
    capt_str = fname.split(".")[0]

    begin_list = ["\\begin{table}","\\caption{%s}" % capt_str,   "\\label{table:%s}" % capt_str, "\\centering", "\\begin{adjustbox}{width=0.8\\textwidth}"]
    hline_str = "\\hline"
    end_list = ["\\end{adjustbox}", "\\end{table}"]

    with pd.option_context("max_colwidth", 25):
        with open(f'tables/{fname}','w') as tf:
            output = df.to_latex(escape=False)
            splitted = output.split("\n")
            output_new = "\n".join(np.concatenate([begin_list, splitted[:-4], [hline_str], splitted[-4:-1], end_list]))
            tf.write(output_new)
            print(output_new)
            

In [4]:
binnings1 = ['$ES_{10}$', '$ES_{15}$', '$ES_{20}$', '$ES_{25}$', '$ES_{30}$', '$ES_{sweep}$', '$ES_{CV}$',
             '$PL3^{CE}$','$PL3^{MSE}$','$PL_{NN}^{CE}$','$PL_{NN}^{MSE}$','$PL_{DE}$',
             'Platt','beta', 'isotonic', 
             'KDE']

In [6]:
out = cifar_table("chat_dist_c.tex", binnings1, cgt_nr = 3, check_cal=True)

Unnamed: 0_level_0,binning,$ES_{10}$,$ES_{15}$,$ES_{20}$,$ES_{25}$,$ES_{30}$,$ES_{sweep}$,$ES_{CV}$,$PL3^{CE}$,$PL3^{MSE}$,$PL_{NN}^{CE}$,$PL_{NN}^{MSE}$,$PL_{DE}$,Platt,beta,isotonic,KDE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
resnet110,cars vs rest,$4.38_{13}$,$3.78_{6}$,$3.71_{5}$,$3.98_{11}$,$3.82_{7}$,$4.64_{15}$,$3.9_{9}$,$3.52_{3}$,$4.57_{14}$,$3.18_{2}$,$3.64_{4}$,$3.92_{10}$,$4.32_{12}$,$\mathbf{3.07_{1}}$,$3.85_{8}$,$101.32_{16}$
resnet110,cats vs rest,$8.94_{4}$,$10.4_{11}$,$10.37_{9}$,$10.86_{12}$,$10.87_{13}$,$11.1_{14}$,$10.04_{6}$,$7.72_{2}$,$9.7_{5}$,$\mathbf{7.44_{1}}$,$8.3_{3}$,$10.17_{8}$,$11.55_{15}$,$10.4_{10}$,$10.11_{7}$,$37.36_{16}$
resnet110,dogs vs rest,$8.28_{6}$,$9.24_{11}$,$9.24_{12}$,$9.87_{14}$,$11.04_{15}$,$8.53_{7}$,$9.07_{10}$,$7.0_{2}$,$8.93_{8}$,$\mathbf{6.37_{1}}$,$7.73_{3}$,$8.97_{9}$,$8.16_{5}$,$8.05_{4}$,$9.68_{13}$,$27.18_{16}$
resnet110,confidence,$12.69_{9}$,$14.29_{11}$,$15.77_{13}$,$17.61_{14}$,$19.11_{15}$,$11.89_{8}$,$13.86_{10}$,$11.09_{4}$,$11.63_{6}$,$\mathbf{9.79_{1}}$,$10.84_{3}$,$11.76_{7}$,$11.57_{5}$,$10.36_{2}$,$15.01_{12}$,$23.39_{16}$
densenet40,cars vs rest,$7.6_{15}$,$4.95_{11}$,$5.15_{14}$,$5.04_{13}$,$4.68_{10}$,$4.97_{12}$,$3.79_{6}$,$3.32_{4}$,$4.53_{9}$,$\mathbf{2.43_{1}}$,$3.21_{3}$,$4.21_{8}$,$3.69_{5}$,$3.13_{2}$,$3.9_{7}$,$125.76_{16}$
densenet40,cats vs rest,$10.04_{11}$,$11.93_{14}$,$9.61_{10}$,$11.33_{13}$,$12.41_{15}$,$10.08_{12}$,$9.12_{8}$,$7.16_{3}$,$8.49_{6}$,$\mathbf{5.72_{1}}$,$6.61_{2}$,$7.6_{4}$,$8.88_{7}$,$8.31_{5}$,$9.57_{9}$,$74.05_{16}$
densenet40,dogs vs rest,$7.65_{7}$,$8.96_{12}$,$9.73_{13}$,$10.38_{14}$,$10.64_{15}$,$8.27_{10}$,$8.02_{9}$,$6.79_{4}$,$7.66_{8}$,$\mathbf{5.02_{1}}$,$6.17_{2}$,$6.94_{5}$,$7.15_{6}$,$6.66_{3}$,$8.45_{11}$,$63.42_{16}$
densenet40,confidence,$12.38_{9}$,$14.09_{11}$,$17.03_{13}$,$18.1_{14}$,$19.75_{15}$,$11.04_{2}$,$12.55_{10}$,$11.05_{3}$,$12.09_{8}$,$\mathbf{10.75_{1}}$,$11.56_{6}$,$11.37_{5}$,$11.99_{7}$,$11.31_{4}$,$14.67_{12}$,$21.93_{16}$
wide32,cars vs rest,$3.75_{7}$,$3.48_{4}$,$3.65_{5}$,$3.78_{9}$,$3.93_{11}$,$4.22_{13}$,$3.76_{8}$,$3.89_{10}$,$5.06_{15}$,$\mathbf{2.62_{1}}$,$3.22_{3}$,$3.66_{6}$,$4.63_{14}$,$3.2_{2}$,$3.95_{12}$,$170.14_{16}$
wide32,cats vs rest,$9.94_{12}$,$9.01_{7}$,$10.09_{13}$,$10.81_{15}$,$10.51_{14}$,$9.26_{10}$,$8.57_{5}$,$7.43_{3}$,$9.29_{11}$,$\mathbf{6.75_{1}}$,$7.41_{2}$,$8.8_{6}$,$9.03_{8}$,$8.36_{4}$,$9.14_{9}$,$42.39_{16}$


\begin{table}
\caption{chat_dist_c}
\label{table:chat_dist_c}
\centering
\begin{adjustbox}{width=0.8\textwidth}
\begin{tabular}{llllllllllllllllll}
\toprule
       & binning &     $ES_{10}$ &     $ES_{15}$ &     $ES_{20}$ &     $ES_{25}$ &     $ES_{30}$ &  $ES_{sweep}$ &     $ES_{CV}$ &   $PL3^{CE}$ &  $PL3^{MSE}$ &        $PL_{NN}^{CE}$ & $PL_{NN}^{MSE}$ &    $PL_{DE}$ &         Platt &                 beta &      isotonic &            KDE \\
Model & {} &               &               &               &               &               &               &               &              &              &                       &                 &              &               &                      &               &                \\
\midrule
resnet110 & cars vs rest &   $4.38_{13}$ &    $3.78_{6}$ &    $3.71_{5}$ &   $3.98_{11}$ &    $3.82_{7}$ &   $4.64_{15}$ &     $3.9_{9}$ &   $3.52_{3}$ &  $4.57_{14}$ &            $3.18_{2}$ &      $3.64_{4}$ &  $3.92_{10}$ &   $4.32_{12}$ &  $\mathbf{3.07_{

In [31]:
cifar_table("chat_dist_c_sq.tex", binnings1, metric="c_hat_distance_c_square")

Unnamed: 0_level_0,binning,$ES_{10}$,$ES_{15}$,$ES_{20}$,$ES_{25}$,$ES_{30}$,$ES_{sweep}$,$ES_{CV}$,$PL3^{CE}$,$PL3^{MSE}$,$PL_{NN}^{CE}$,$PL_{NN}^{MSE}$,$PL_{DE}$,Platt,beta,isotonic,KDE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
resnet110,cars vs rest,$0.18_{9}$,$0.15_{2}$,$0.16_{5}$,$0.17_{6}$,$0.17_{7}$,$0.22_{13}$,$0.18_{8}$,$\mathbf{0.14_{1}}$,$0.2_{12}$,$0.16_{4}$,$0.16_{3}$,$0.19_{10}$,$0.31_{14}$,$0.2_{11}$,$0.33_{15}$,$90.6_{16}$
resnet110,cats vs rest,$0.37_{5}$,$0.52_{8}$,$0.5_{7}$,$0.58_{13}$,$0.57_{12}$,$0.54_{10}$,$0.5_{6}$,$0.33_{3}$,$0.34_{4}$,$\mathbf{0.29_{1}}$,$0.31_{2}$,$0.52_{9}$,$0.56_{11}$,$0.62_{14}$,$0.64_{15}$,$17.57_{16}$
resnet110,dogs vs rest,$0.32_{3}$,$0.4_{9}$,$0.43_{10}$,$0.51_{12}$,$0.58_{14}$,$0.34_{6}$,$0.47_{11}$,$0.29_{2}$,$0.38_{8}$,$\mathbf{0.21_{1}}$,$0.34_{5}$,$0.52_{13}$,$0.32_{4}$,$0.37_{7}$,$0.66_{15}$,$11.51_{16}$
resnet110,confidence,$0.45_{7}$,$0.61_{10}$,$0.73_{12}$,$0.99_{14}$,$1.13_{15}$,$0.48_{8}$,$0.66_{11}$,$0.38_{5}$,$0.36_{4}$,$\mathbf{0.27_{1}}$,$0.3_{2}$,$0.53_{9}$,$0.38_{6}$,$0.32_{3}$,$0.85_{13}$,$3.27_{16}$
densenet40,cars vs rest,$0.32_{13}$,$0.16_{2}$,$0.19_{7}$,$0.17_{4}$,$0.2_{9}$,$0.31_{12}$,$0.19_{6}$,$0.18_{5}$,$0.25_{10}$,$\mathbf{0.1_{1}}$,$0.16_{3}$,$0.41_{14}$,$0.2_{8}$,$0.25_{11}$,$0.41_{15}$,$115.55_{16}$
densenet40,cats vs rest,$0.34_{5}$,$0.48_{12}$,$0.34_{6}$,$0.49_{13}$,$0.58_{14}$,$0.4_{10}$,$0.34_{7}$,$0.24_{3}$,$0.29_{4}$,$\mathbf{0.17_{1}}$,$0.2_{2}$,$0.37_{9}$,$0.43_{11}$,$0.36_{8}$,$0.59_{15}$,$43.51_{16}$
densenet40,dogs vs rest,$0.23_{3}$,$0.34_{10}$,$0.4_{12}$,$0.48_{13}$,$0.52_{14}$,$0.28_{5}$,$0.36_{11}$,$0.29_{6}$,$0.32_{8}$,$\mathbf{0.15_{1}}$,$0.18_{2}$,$0.33_{9}$,$0.3_{7}$,$0.27_{4}$,$0.53_{15}$,$38.26_{16}$
densenet40,confidence,$0.52_{9}$,$0.64_{11}$,$0.91_{12}$,$1.0_{14}$,$1.19_{15}$,$0.44_{3}$,$0.55_{10}$,$0.49_{6}$,$0.5_{7}$,$0.42_{2}$,$\mathbf{0.42_{1}}$,$0.51_{8}$,$0.47_{5}$,$0.46_{4}$,$0.94_{13}$,$3.1_{16}$
wide32,cars vs rest,$0.14_{4}$,$0.12_{2}$,$0.16_{7}$,$0.15_{6}$,$0.18_{8}$,$0.22_{11}$,$0.15_{5}$,$0.22_{12}$,$0.29_{13}$,$\mathbf{0.1_{1}}$,$0.12_{3}$,$0.2_{9}$,$0.31_{14}$,$0.22_{10}$,$0.38_{15}$,$157.82_{16}$
wide32,cats vs rest,$0.39_{8}$,$0.35_{6}$,$0.41_{11}$,$0.49_{14}$,$0.48_{13}$,$0.39_{10}$,$0.34_{5}$,$0.27_{3}$,$0.33_{4}$,$\mathbf{0.24_{1}}$,$0.25_{2}$,$0.43_{12}$,$0.35_{7}$,$0.39_{9}$,$0.56_{15}$,$21.13_{16}$


\begin{table}
\caption{chat_dist_c_sq}
\label{table:chat_dist_c_sq}
\centering
\begin{adjustbox}{width=0.8\textwidth}
\begin{tabular}{llllllllllllllllll}
\toprule
       & binning &    $ES_{10}$ &    $ES_{15}$ &    $ES_{20}$ &    $ES_{25}$ &    $ES_{30}$ & $ES_{sweep}$ &    $ES_{CV}$ &           $PL3^{CE}$ &  $PL3^{MSE}$ &       $PL_{NN}^{CE}$ &      $PL_{NN}^{MSE}$ &    $PL_{DE}$ &        Platt &         beta &     isotonic &            KDE \\
Model & {} &              &              &              &              &              &              &              &                      &              &                      &                      &              &              &              &              &                \\
\midrule
resnet110 & cars vs rest &   $0.18_{9}$ &   $0.15_{2}$ &   $0.16_{5}$ &   $0.17_{6}$ &   $0.17_{7}$ &  $0.22_{13}$ &   $0.18_{8}$ &  $\mathbf{0.14_{1}}$ &   $0.2_{12}$ &           $0.16_{4}$ &           $0.16_{3}$ &  $0.19_{10}$ &  $0.31_{14}$ &   $0.2_{11}$ &  

In [7]:
cifar_table("ECE_abs.tex", binnings1, metric="ECE_abs")

Unnamed: 0_level_0,binning,$ES_{10}$,$ES_{15}$,$ES_{20}$,$ES_{25}$,$ES_{30}$,$ES_{sweep}$,$ES_{CV}$,$PL3^{CE}$,$PL3^{MSE}$,$PL_{NN}^{CE}$,$PL_{NN}^{MSE}$,$PL_{DE}$,Platt,beta,isotonic,KDE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
resnet110,cars vs rest,$1.99_{11}$,$1.85_{9}$,$1.85_{8}$,$1.61_{4}$,$1.7_{5}$,$1.94_{10}$,$2.84_{15}$,$\mathbf{1.36_{1}}$,$2.15_{13}$,$2.03_{12}$,$1.71_{6}$,$1.8_{7}$,$2.34_{14}$,$1.4_{2}$,$1.52_{3}$,$98.37_{16}$
resnet110,cats vs rest,$2.32_{2}$,$2.35_{3}$,$2.37_{4}$,$2.41_{5}$,$\mathbf{2.26_{1}}$,$4.01_{13}$,$3.16_{7}$,$2.58_{6}$,$4.08_{14}$,$3.6_{10}$,$3.17_{8}$,$3.29_{9}$,$5.5_{15}$,$3.69_{11}$,$3.97_{12}$,$29.28_{16}$
resnet110,dogs vs rest,$3.29_{5}$,$\mathbf{2.83_{1}}$,$2.88_{2}$,$3.18_{4}$,$3.65_{10}$,$3.29_{6}$,$4.24_{14}$,$2.9_{3}$,$4.0_{13}$,$3.55_{8}$,$3.74_{11}$,$3.96_{12}$,$3.62_{9}$,$3.31_{7}$,$5.0_{15}$,$20.24_{16}$
resnet110,confidence,$4.16_{3}$,$4.45_{8}$,$4.29_{5}$,$4.39_{6}$,$5.18_{13}$,$4.85_{12}$,$6.68_{15}$,$4.02_{2}$,$4.56_{10}$,$4.39_{7}$,$4.72_{11}$,$4.29_{4}$,$4.48_{9}$,$\mathbf{3.03_{1}}$,$6.03_{14}$,$13.91_{16}$
densenet40,cars vs rest,$1.23_{4}$,$1.11_{2}$,$1.29_{6}$,$\mathbf{1.09_{1}}$,$1.11_{3}$,$1.52_{10}$,$2.31_{14}$,$1.38_{7}$,$2.62_{15}$,$1.49_{9}$,$1.45_{8}$,$2.14_{13}$,$1.86_{11}$,$1.25_{5}$,$1.98_{12}$,$123.28_{16}$
densenet40,cats vs rest,$3.28_{4}$,$\mathbf{2.63_{1}}$,$2.71_{2}$,$3.33_{5}$,$3.03_{3}$,$4.27_{10}$,$4.75_{12}$,$3.38_{7}$,$4.82_{13}$,$3.6_{9}$,$3.38_{6}$,$3.45_{8}$,$5.14_{14}$,$4.49_{11}$,$5.27_{15}$,$68.41_{16}$
densenet40,dogs vs rest,$2.96_{8}$,$2.75_{3}$,$2.72_{2}$,$2.84_{5}$,$3.11_{9}$,$3.5_{11}$,$4.01_{13}$,$3.29_{10}$,$4.48_{14}$,$2.83_{4}$,$\mathbf{2.43_{1}}$,$2.92_{6}$,$3.93_{12}$,$2.94_{7}$,$4.57_{15}$,$57.85_{16}$
densenet40,confidence,$\mathbf{4.95_{1}}$,$5.12_{3}$,$6.34_{11}$,$6.18_{10}$,$7.11_{14}$,$5.29_{5}$,$6.7_{13}$,$5.27_{4}$,$5.62_{8}$,$5.97_{9}$,$5.59_{7}$,$5.31_{6}$,$6.6_{12}$,$5.06_{2}$,$9.21_{15}$,$14.97_{16}$
wide32,cars vs rest,$2.07_{12}$,$1.67_{7}$,$1.57_{4}$,$1.41_{2}$,$1.57_{5}$,$2.05_{11}$,$2.85_{15}$,$\mathbf{1.39_{1}}$,$2.56_{14}$,$1.73_{8}$,$1.83_{9}$,$1.62_{6}$,$1.98_{10}$,$1.51_{3}$,$2.33_{13}$,$168.33_{16}$
wide32,cats vs rest,$2.99_{6}$,$2.99_{5}$,$2.55_{2}$,$\mathbf{2.4_{1}}$,$2.82_{3}$,$4.49_{14}$,$4.86_{15}$,$2.95_{4}$,$4.4_{13}$,$3.87_{9}$,$3.26_{7}$,$3.44_{8}$,$4.14_{12}$,$3.91_{10}$,$4.14_{11}$,$35.59_{16}$


\begin{table}
\caption{ECE_abs}
\label{table:ECE_abs}
\centering
\begin{adjustbox}{width=0.8\textwidth}
\begin{tabular}{llllllllllllllllll}
\toprule
       & binning &            $ES_{10}$ &            $ES_{15}$ &    $ES_{20}$ &            $ES_{25}$ &            $ES_{30}$ & $ES_{sweep}$ &    $ES_{CV}$ &           $PL3^{CE}$ &  $PL3^{MSE}$ & $PL_{NN}^{CE}$ &      $PL_{NN}^{MSE}$ &    $PL_{DE}$ &        Platt &                 beta &     isotonic &            KDE \\
Model & {} &                      &                      &              &                      &                      &              &              &                      &              &                &                      &              &              &                      &              &                \\
\midrule
resnet110 & cars vs rest &          $1.99_{11}$ &           $1.85_{9}$ &   $1.85_{8}$ &           $1.61_{4}$ &            $1.7_{5}$ &  $1.94_{10}$ &  $2.84_{15}$ &  $\mathbf{1.36_{1}}$ &  $2.15_{13}$ &    $2.

In [8]:
cifar_table("ECE_sq.tex", binnings1, metric="ECE_square")

Unnamed: 0_level_0,binning,$ES_{10}$,$ES_{15}$,$ES_{20}$,$ES_{25}$,$ES_{30}$,$ES_{sweep}$,$ES_{CV}$,$PL3^{CE}$,$PL3^{MSE}$,$PL_{NN}^{CE}$,$PL_{NN}^{MSE}$,$PL_{DE}$,Platt,beta,isotonic,KDE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
resnet110,cars vs rest,$0.2_{5}$,$0.21_{7}$,$0.22_{11}$,$0.18_{2}$,$0.24_{12}$,$0.28_{14}$,$0.22_{9}$,$0.18_{3}$,$0.22_{10}$,$0.18_{4}$,$\mathbf{0.17_{1}}$,$0.22_{8}$,$0.24_{13}$,$0.21_{6}$,$0.32_{15}$,$90.6_{16}$
resnet110,cats vs rest,$0.25_{3}$,$0.24_{2}$,$0.27_{5}$,$0.26_{4}$,$0.28_{7}$,$0.38_{12}$,$0.28_{6}$,$\mathbf{0.24_{1}}$,$0.3_{9}$,$0.29_{8}$,$0.31_{10}$,$0.41_{13}$,$0.47_{14}$,$0.33_{11}$,$0.59_{15}$,$17.58_{16}$
resnet110,dogs vs rest,$0.21_{4}$,$0.19_{2}$,$0.28_{9}$,$0.29_{11}$,$0.29_{10}$,$0.23_{7}$,$0.29_{12}$,$0.22_{6}$,$0.31_{13}$,$\mathbf{0.17_{1}}$,$0.27_{8}$,$0.46_{14}$,$0.2_{3}$,$0.21_{5}$,$0.63_{15}$,$11.38_{16}$
resnet110,confidence,$0.38_{7}$,$0.41_{8}$,$0.44_{10}$,$0.47_{12}$,$0.52_{14}$,$0.44_{11}$,$0.43_{9}$,$0.36_{6}$,$0.36_{5}$,$0.28_{2}$,$0.32_{3}$,$0.49_{13}$,$0.32_{4}$,$\mathbf{0.24_{1}}$,$0.64_{15}$,$3.08_{16}$
densenet40,cars vs rest,$0.17_{8}$,$0.16_{5}$,$0.16_{6}$,$0.16_{4}$,$0.17_{7}$,$0.26_{13}$,$0.18_{10}$,$0.17_{9}$,$0.24_{12}$,$\mathbf{0.11_{1}}$,$0.15_{3}$,$0.45_{15}$,$0.14_{2}$,$0.22_{11}$,$0.42_{14}$,$115.57_{16}$
densenet40,cats vs rest,$0.21_{5}$,$0.21_{4}$,$0.23_{6}$,$0.25_{9}$,$0.23_{7}$,$0.27_{10}$,$0.2_{2}$,$0.24_{8}$,$0.3_{11}$,$\mathbf{0.16_{1}}$,$0.2_{3}$,$0.36_{13}$,$0.42_{14}$,$0.32_{12}$,$0.54_{15}$,$43.47_{16}$
densenet40,dogs vs rest,$0.16_{3}$,$0.18_{5}$,$0.21_{8}$,$0.25_{11}$,$0.21_{7}$,$0.24_{10}$,$0.2_{6}$,$0.23_{9}$,$0.3_{13}$,$\mathbf{0.13_{1}}$,$0.14_{2}$,$0.32_{14}$,$0.25_{12}$,$0.17_{4}$,$0.51_{15}$,$38.16_{16}$
densenet40,confidence,$\mathbf{0.29_{1}}$,$0.32_{5}$,$0.43_{14}$,$0.34_{8}$,$0.34_{9}$,$0.29_{2}$,$0.33_{7}$,$0.36_{11}$,$0.41_{12}$,$0.33_{6}$,$0.32_{4}$,$0.42_{13}$,$0.31_{3}$,$0.36_{10}$,$0.99_{15}$,$2.96_{16}$
wide32,cars vs rest,$0.15_{9}$,$0.13_{3}$,$0.16_{10}$,$0.13_{4}$,$0.15_{8}$,$0.18_{11}$,$0.14_{5}$,$0.14_{6}$,$0.19_{13}$,$\mathbf{0.1_{1}}$,$0.11_{2}$,$0.19_{14}$,$0.15_{7}$,$0.18_{12}$,$0.37_{15}$,$157.9_{16}$
wide32,cats vs rest,$0.28_{7}$,$0.29_{11}$,$0.29_{9}$,$0.29_{10}$,$0.33_{13}$,$0.33_{12}$,$0.26_{5}$,$0.22_{3}$,$0.28_{6}$,$\mathbf{0.2_{1}}$,$0.21_{2}$,$0.34_{14}$,$0.25_{4}$,$0.29_{8}$,$0.44_{15}$,$20.95_{16}$


\begin{table}
\caption{ECE_sq}
\label{table:ECE_sq}
\centering
\begin{adjustbox}{width=0.8\textwidth}
\begin{tabular}{llllllllllllllllll}
\toprule
       & binning &            $ES_{10}$ &    $ES_{15}$ &    $ES_{20}$ &    $ES_{25}$ &    $ES_{30}$ & $ES_{sweep}$ &    $ES_{CV}$ &           $PL3^{CE}$ &          $PL3^{MSE}$ &       $PL_{NN}^{CE}$ &      $PL_{NN}^{MSE}$ &    $PL_{DE}$ &        Platt &                 beta &     isotonic &            KDE \\
Model & {} &                      &              &              &              &              &              &              &                      &                      &                      &                      &              &              &                      &              &                \\
\midrule
resnet110 & cars vs rest &            $0.2_{5}$ &   $0.21_{7}$ &  $0.22_{11}$ &   $0.18_{2}$ &  $0.24_{12}$ &  $0.28_{14}$ &   $0.22_{9}$ &           $0.18_{3}$ &          $0.22_{10}$ &           $0.18_{4}$ &  $\mathbf{0.17_{1}}$ 

In [9]:
binnings2 = ['$PL3^{CE}$', '$PL3^{MSE}$', '$PL_{NN}^{CE}$', '$PL_{NN}^{MSE}$', '$PL_{DE}$', '$PL_{DE}^{2}$']

In [10]:
cifar_table("chat_dist_c_CE_MSE.tex", binnings2, metric = "c_hat_distance_c")

Unnamed: 0_level_0,binning,$PL3^{CE}$,$PL3^{MSE}$,$PL_{NN}^{CE}$,$PL_{NN}^{MSE}$,$PL_{DE}$,$PL_{DE}^{2}$
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
resnet110,cars vs rest,$3.52_{2}$,$4.57_{6}$,$\mathbf{3.18_{1}}$,$3.64_{3}$,$3.92_{5}$,$3.77_{4}$
resnet110,cats vs rest,$7.72_{2}$,$9.7_{5}$,$\mathbf{7.44_{1}}$,$8.3_{3}$,$10.17_{6}$,$9.47_{4}$
resnet110,dogs vs rest,$7.0_{2}$,$8.93_{4}$,$\mathbf{6.37_{1}}$,$7.73_{3}$,$8.97_{5}$,$9.57_{6}$
resnet110,confidence,$11.09_{3}$,$11.63_{4}$,$\mathbf{9.79_{1}}$,$10.84_{2}$,$11.76_{6}$,$11.66_{5}$
densenet40,cars vs rest,$3.32_{3}$,$4.53_{6}$,$\mathbf{2.43_{1}}$,$3.21_{2}$,$4.21_{5}$,$4.02_{4}$
densenet40,cats vs rest,$7.16_{3}$,$8.49_{6}$,$\mathbf{5.72_{1}}$,$6.61_{2}$,$7.6_{4}$,$8.25_{5}$
densenet40,dogs vs rest,$6.79_{3}$,$7.66_{6}$,$\mathbf{5.02_{1}}$,$6.17_{2}$,$6.94_{4}$,$6.95_{5}$
densenet40,confidence,$11.05_{2}$,$12.09_{6}$,$\mathbf{10.75_{1}}$,$11.56_{4}$,$11.37_{3}$,$11.99_{5}$
wide32,cars vs rest,$3.89_{4}$,$5.06_{6}$,$\mathbf{2.62_{1}}$,$3.22_{2}$,$3.66_{3}$,$3.91_{5}$
wide32,cats vs rest,$7.43_{3}$,$9.29_{6}$,$\mathbf{6.75_{1}}$,$7.41_{2}$,$8.8_{4}$,$9.01_{5}$


\begin{table}
\caption{chat_dist_c_CE_MSE}
\label{table:chat_dist_c_CE_MSE}
\centering
\begin{adjustbox}{width=0.8\textwidth}
\begin{tabular}{llllllll}
\toprule
       & binning &   $PL3^{CE}$ &  $PL3^{MSE}$ &        $PL_{NN}^{CE}$ & $PL_{NN}^{MSE}$ &    $PL_{DE}$ & $PL_{DE}^{2}$ \\
Model & {} &              &              &                       &                 &              &               \\
\midrule
resnet110 & cars vs rest &   $3.52_{2}$ &   $4.57_{6}$ &   $\mathbf{3.18_{1}}$ &      $3.64_{3}$ &   $3.92_{5}$ &    $3.77_{4}$ \\
       & cats vs rest &   $7.72_{2}$ &    $9.7_{5}$ &   $\mathbf{7.44_{1}}$ &       $8.3_{3}$ &  $10.17_{6}$ &    $9.47_{4}$ \\
       & dogs vs rest &    $7.0_{2}$ &   $8.93_{4}$ &   $\mathbf{6.37_{1}}$ &      $7.73_{3}$ &   $8.97_{5}$ &    $9.57_{6}$ \\
       & confidence &  $11.09_{3}$ &  $11.63_{4}$ &   $\mathbf{9.79_{1}}$ &     $10.84_{2}$ &  $11.76_{6}$ &   $11.66_{5}$ \\
densenet40 & cars vs rest &   $3.32_{3}$ &   $4.53_{6}$ &   $\mathbf{2.43_{1}

In [11]:
cifar_table("ECE_abs_CE_MSE.tex", binnings2, metric = "ECE_abs")

Unnamed: 0_level_0,binning,$PL3^{CE}$,$PL3^{MSE}$,$PL_{NN}^{CE}$,$PL_{NN}^{MSE}$,$PL_{DE}$,$PL_{DE}^{2}$
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
resnet110,cars vs rest,$\mathbf{1.36_{1}}$,$2.15_{6}$,$2.03_{5}$,$1.71_{2}$,$1.8_{3}$,$2.01_{4}$
resnet110,cats vs rest,$\mathbf{2.58_{1}}$,$4.08_{6}$,$3.6_{5}$,$3.17_{2}$,$3.29_{3}$,$3.33_{4}$
resnet110,dogs vs rest,$\mathbf{2.9_{1}}$,$4.0_{6}$,$3.55_{2}$,$3.74_{3}$,$3.96_{4}$,$3.99_{5}$
resnet110,confidence,$\mathbf{4.02_{1}}$,$4.56_{5}$,$4.39_{3}$,$4.72_{6}$,$4.29_{2}$,$4.52_{4}$
densenet40,cars vs rest,$\mathbf{1.38_{1}}$,$2.62_{6}$,$1.49_{3}$,$1.45_{2}$,$2.14_{4}$,$2.19_{5}$
densenet40,cats vs rest,$3.38_{2}$,$4.82_{6}$,$3.6_{4}$,$\mathbf{3.38_{1}}$,$3.45_{3}$,$3.66_{5}$
densenet40,dogs vs rest,$3.29_{5}$,$4.48_{6}$,$2.83_{3}$,$\mathbf{2.43_{1}}$,$2.92_{4}$,$2.47_{2}$
densenet40,confidence,$\mathbf{5.27_{1}}$,$5.62_{4}$,$5.97_{5}$,$5.59_{3}$,$5.31_{2}$,$6.08_{6}$
wide32,cars vs rest,$\mathbf{1.39_{1}}$,$2.56_{6}$,$1.73_{3}$,$1.83_{4}$,$1.62_{2}$,$1.87_{5}$
wide32,cats vs rest,$\mathbf{2.95_{1}}$,$4.4_{6}$,$3.87_{5}$,$3.26_{2}$,$3.44_{3}$,$3.63_{4}$


\begin{table}
\caption{ECE_abs_CE_MSE}
\label{table:ECE_abs_CE_MSE}
\centering
\begin{adjustbox}{width=0.8\textwidth}
\begin{tabular}{llllllll}
\toprule
       & binning &           $PL3^{CE}$ &          $PL3^{MSE}$ &       $PL_{NN}^{CE}$ &      $PL_{NN}^{MSE}$ &   $PL_{DE}$ & $PL_{DE}^{2}$ \\
Model & {} &                      &                      &                      &                      &             &               \\
\midrule
resnet110 & cars vs rest &  $\mathbf{1.36_{1}}$ &           $2.15_{6}$ &           $2.03_{5}$ &           $1.71_{2}$ &   $1.8_{3}$ &    $2.01_{4}$ \\
       & cats vs rest &  $\mathbf{2.58_{1}}$ &           $4.08_{6}$ &            $3.6_{5}$ &           $3.17_{2}$ &  $3.29_{3}$ &    $3.33_{4}$ \\
       & dogs vs rest &   $\mathbf{2.9_{1}}$ &            $4.0_{6}$ &           $3.55_{2}$ &           $3.74_{3}$ &  $3.96_{4}$ &    $3.99_{5}$ \\
       & confidence &  $\mathbf{4.02_{1}}$ &           $4.56_{5}$ &           $4.39_{3}$ &           $4.72_{6}$ &  $

## Equal size vs equal width

In [12]:
binnings_es_ew = ['$EW_{10}$', '$ES_{10}$', '$EW_{15}$', '$ES_{15}$', '$EW_{20}$', '$ES_{20}$', '$EW_{CV}$', '$ES_{CV}$']

In [13]:
cifar_table("chat_dist_c_es_ew.tex", binnings_es_ew, metric = "c_hat_distance_c")

Unnamed: 0_level_0,binning,$EW_{10}$,$ES_{10}$,$EW_{15}$,$ES_{15}$,$EW_{20}$,$ES_{20}$,$EW_{CV}$,$ES_{CV}$
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
resnet110,cars vs rest,$4.77_{5}$,$4.38_{4}$,$5.13_{7}$,$3.78_{2}$,$5.41_{8}$,$\mathbf{3.71_{1}}$,$4.94_{6}$,$3.9_{3}$
resnet110,cats vs rest,$11.28_{5}$,$\mathbf{8.94_{1}}$,$12.03_{7}$,$10.4_{4}$,$12.74_{8}$,$10.37_{3}$,$11.49_{6}$,$10.04_{2}$
resnet110,dogs vs rest,$10.49_{6}$,$\mathbf{8.28_{1}}$,$11.86_{7}$,$9.24_{3}$,$12.96_{8}$,$9.24_{4}$,$10.16_{5}$,$9.07_{2}$
resnet110,confidence,$13.11_{2}$,$\mathbf{12.69_{1}}$,$14.95_{6}$,$14.29_{4}$,$16.7_{8}$,$15.77_{7}$,$14.73_{5}$,$13.86_{3}$
densenet40,cars vs rest,$4.67_{3}$,$7.6_{8}$,$4.99_{5}$,$4.95_{4}$,$5.25_{7}$,$5.15_{6}$,$4.65_{2}$,$\mathbf{3.79_{1}}$
densenet40,cats vs rest,$10.28_{5}$,$10.04_{4}$,$11.43_{6}$,$11.93_{7}$,$12.69_{8}$,$9.61_{3}$,$\mathbf{8.56_{1}}$,$9.12_{2}$
densenet40,dogs vs rest,$9.73_{5}$,$\mathbf{7.65_{1}}$,$10.77_{7}$,$8.96_{4}$,$11.52_{8}$,$9.73_{6}$,$8.45_{3}$,$8.02_{2}$
densenet40,confidence,$14.83_{5}$,$\mathbf{12.38_{1}}$,$15.93_{6}$,$14.09_{4}$,$17.4_{8}$,$17.03_{7}$,$13.8_{3}$,$12.55_{2}$
wide32,cars vs rest,$4.53_{6}$,$3.75_{3}$,$5.09_{7}$,$\mathbf{3.48_{1}}$,$5.61_{8}$,$3.65_{2}$,$3.95_{5}$,$3.76_{4}$
wide32,cats vs rest,$10.42_{6}$,$9.94_{4}$,$11.49_{7}$,$9.01_{2}$,$12.62_{8}$,$10.09_{5}$,$9.58_{3}$,$\mathbf{8.57_{1}}$


\begin{table}
\caption{chat_dist_c_es_ew}
\label{table:chat_dist_c_es_ew}
\centering
\begin{adjustbox}{width=0.8\textwidth}
\begin{tabular}{llllllllll}
\toprule
       & binning &    $EW_{10}$ &             $ES_{10}$ &    $EW_{15}$ &            $ES_{15}$ &    $EW_{20}$ &            $ES_{20}$ &            $EW_{CV}$ &             $ES_{CV}$ \\
Model & {} &              &                       &              &                      &              &                      &                      &                       \\
\midrule
resnet110 & cars vs rest &   $4.77_{5}$ &            $4.38_{4}$ &   $5.13_{7}$ &           $3.78_{2}$ &   $5.41_{8}$ &  $\mathbf{3.71_{1}}$ &           $4.94_{6}$ &             $3.9_{3}$ \\
       & cats vs rest &  $11.28_{5}$ &   $\mathbf{8.94_{1}}$ &  $12.03_{7}$ &           $10.4_{4}$ &  $12.74_{8}$ &          $10.37_{3}$ &          $11.49_{6}$ &           $10.04_{2}$ \\
       & dogs vs rest &  $10.49_{6}$ &   $\mathbf{8.28_{1}}$ &  $11.86_{7}$ &           $9.24_{

In [14]:
cifar_table("ECE_abs_es_ew.tex", binnings_es_ew, metric = "ECE_abs")

Unnamed: 0_level_0,binning,$EW_{10}$,$ES_{10}$,$EW_{15}$,$ES_{15}$,$EW_{20}$,$ES_{20}$,$EW_{CV}$,$ES_{CV}$
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
resnet110,cars vs rest,$\mathbf{1.46_{1}}$,$1.99_{6}$,$1.46_{2}$,$1.85_{5}$,$1.63_{3}$,$1.85_{4}$,$2.86_{8}$,$2.84_{7}$
resnet110,cats vs rest,$2.81_{6}$,$\mathbf{2.32_{1}}$,$2.75_{5}$,$2.35_{2}$,$2.75_{4}$,$2.37_{3}$,$4.09_{8}$,$3.16_{7}$
resnet110,dogs vs rest,$2.98_{3}$,$3.29_{4}$,$3.49_{6}$,$\mathbf{2.83_{1}}$,$3.47_{5}$,$2.88_{2}$,$4.04_{7}$,$4.24_{8}$
resnet110,confidence,$\mathbf{4.02_{1}}$,$4.16_{3}$,$4.03_{2}$,$4.45_{6}$,$4.26_{4}$,$4.29_{5}$,$7.18_{8}$,$6.68_{7}$
densenet40,cars vs rest,$1.46_{4}$,$1.23_{2}$,$1.56_{5}$,$\mathbf{1.11_{1}}$,$1.72_{6}$,$1.29_{3}$,$2.67_{8}$,$2.31_{7}$
densenet40,cats vs rest,$2.91_{3}$,$3.28_{4}$,$3.28_{5}$,$\mathbf{2.63_{1}}$,$3.8_{6}$,$2.71_{2}$,$4.59_{7}$,$4.75_{8}$
densenet40,dogs vs rest,$2.86_{3}$,$2.96_{4}$,$3.55_{5}$,$2.75_{2}$,$3.55_{6}$,$\mathbf{2.72_{1}}$,$4.07_{8}$,$4.01_{7}$
densenet40,confidence,$5.83_{4}$,$\mathbf{4.95_{1}}$,$5.68_{3}$,$5.12_{2}$,$5.94_{5}$,$6.34_{6}$,$7.13_{8}$,$6.7_{7}$
wide32,cars vs rest,$\mathbf{1.49_{1}}$,$2.07_{5}$,$1.77_{4}$,$1.67_{3}$,$2.11_{6}$,$1.57_{2}$,$3.12_{8}$,$2.85_{7}$
wide32,cats vs rest,$3.09_{4}$,$2.99_{3}$,$3.42_{6}$,$2.99_{2}$,$3.25_{5}$,$\mathbf{2.55_{1}}$,$4.95_{8}$,$4.86_{7}$


\begin{table}
\caption{ECE_abs_es_ew}
\label{table:ECE_abs_es_ew}
\centering
\begin{adjustbox}{width=0.8\textwidth}
\begin{tabular}{llllllllll}
\toprule
       & binning &            $EW_{10}$ &            $ES_{10}$ &   $EW_{15}$ &            $ES_{15}$ &   $EW_{20}$ &            $ES_{20}$ &   $EW_{CV}$ &   $ES_{CV}$ \\
Model & {} &                      &                      &             &                      &             &                      &             &             \\
\midrule
resnet110 & cars vs rest &  $\mathbf{1.46_{1}}$ &           $1.99_{6}$ &  $1.46_{2}$ &           $1.85_{5}$ &  $1.63_{3}$ &           $1.85_{4}$ &  $2.86_{8}$ &  $2.84_{7}$ \\
       & cats vs rest &           $2.81_{6}$ &  $\mathbf{2.32_{1}}$ &  $2.75_{5}$ &           $2.35_{2}$ &  $2.75_{4}$ &           $2.37_{3}$ &  $4.09_{8}$ &  $3.16_{7}$ \\
       & dogs vs rest &           $2.98_{3}$ &           $3.29_{4}$ &  $3.49_{6}$ &  $\mathbf{2.83_{1}}$ &  $3.47_{5}$ &           $2.88_{2}$ &  $4.04_{7}$ &  

## Trick vs notrick

In [9]:
binnings_trick = ['$PL_{NN}^{CE}-notrick$', '$PL_{NN}^{CE}-trick$', '$PL3^{CE}-notrick$',
       '$PL3^{CE}-trick$', '$PL_{DE}-notrick$', '$PL_{DE}$',
       '$ES_{CV}-notrick$', '$ES_{CV}$']

In [10]:
cifar_table("chat_dist_c_trick.tex", binnings_trick, metric = "c_hat_distance_c")

Unnamed: 0_level_0,binning,$PL_{NN}^{CE}-notrick$,$PL_{NN}^{CE}-trick$,$PL3^{CE}-notrick$,$PL3^{CE}-trick$,$PL_{DE}-notrick$,$PL_{DE}$,$ES_{CV}-notrick$,$ES_{CV}$
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
resnet110,cars vs rest,$\mathbf{3.2_{1}}$,$3.25_{4}$,$3.23_{2}$,$3.25_{3}$,$3.89_{6}$,$3.92_{8}$,$3.83_{5}$,$3.9_{7}$
resnet110,cats vs rest,$7.59_{2}$,$\mathbf{7.56_{1}}$,$7.73_{3}$,$7.81_{4}$,$10.15_{7}$,$10.17_{8}$,$10.04_{6}$,$10.04_{5}$
resnet110,dogs vs rest,$6.62_{2}$,$\mathbf{6.42_{1}}$,$7.0_{4}$,$6.94_{3}$,$8.87_{5}$,$8.97_{6}$,$9.29_{8}$,$9.07_{7}$
resnet110,confidence,$10.05_{2}$,$\mathbf{10.04_{1}}$,$10.86_{3}$,$11.17_{4}$,$11.53_{5}$,$11.76_{6}$,$14.16_{8}$,$13.86_{7}$
densenet40,cars vs rest,$2.5_{2}$,$\mathbf{2.5_{1}}$,$3.24_{4}$,$3.19_{3}$,$4.22_{8}$,$4.21_{7}$,$3.71_{5}$,$3.79_{6}$
densenet40,cats vs rest,$5.93_{2}$,$\mathbf{5.78_{1}}$,$7.08_{4}$,$7.03_{3}$,$7.7_{6}$,$7.6_{5}$,$9.25_{8}$,$9.12_{7}$
densenet40,dogs vs rest,$5.31_{2}$,$\mathbf{5.1_{1}}$,$6.74_{4}$,$6.69_{3}$,$6.91_{5}$,$6.94_{6}$,$8.22_{8}$,$8.02_{7}$
densenet40,confidence,$\mathbf{10.71_{1}}$,$10.73_{2}$,$11.69_{6}$,$11.11_{3}$,$11.6_{5}$,$11.37_{4}$,$13.08_{8}$,$12.55_{7}$
wide32,cars vs rest,$\mathbf{2.59_{1}}$,$2.61_{2}$,$3.67_{5}$,$3.65_{3}$,$3.68_{6}$,$3.66_{4}$,$3.82_{8}$,$3.76_{7}$
wide32,cats vs rest,$\mathbf{6.58_{1}}$,$6.83_{2}$,$7.35_{3}$,$7.42_{4}$,$8.77_{7}$,$8.8_{8}$,$8.46_{5}$,$8.57_{6}$


\begin{table}
\caption{chat_dist_c_trick}
\label{table:chat_dist_c_trick}
\centering
\begin{adjustbox}{width=0.8\textwidth}
\begin{tabular}{llllllllll}
\toprule
       & binning & $PL_{NN}^{CE}-notrick$ &  $PL_{NN}^{CE}-trick$ & $PL3^{CE}-notrick$ & $PL3^{CE}-trick$ & $PL_{DE}-notrick$ &    $PL_{DE}$ & $ES_{CV}-notrick$ &    $ES_{CV}$ \\
Model & {} &                        &                       &                    &                  &                   &              &                   &              \\
\midrule
resnet110 & cars vs rest &     $\mathbf{3.2_{1}}$ &            $3.25_{4}$ &         $3.23_{2}$ &       $3.25_{3}$ &        $3.89_{6}$ &   $3.92_{8}$ &        $3.83_{5}$ &    $3.9_{7}$ \\
       & cats vs rest &             $7.59_{2}$ &   $\mathbf{7.56_{1}}$ &         $7.73_{3}$ &       $7.81_{4}$ &       $10.15_{7}$ &  $10.17_{8}$ &       $10.04_{6}$ &  $10.04_{5}$ \\
       & dogs vs rest &             $6.62_{2}$ &   $\mathbf{6.42_{1}}$ &          $7.0_{4}$ &       $6.94_{

In [11]:
cifar_table("ECE_abs_trick.tex", binnings_trick, metric = "ECE_abs")

Unnamed: 0_level_0,binning,$PL_{NN}^{CE}-notrick$,$PL_{NN}^{CE}-trick$,$PL3^{CE}-notrick$,$PL3^{CE}-trick$,$PL_{DE}-notrick$,$PL_{DE}$,$ES_{CV}-notrick$,$ES_{CV}$
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
resnet110,cars vs rest,$1.94_{5}$,$2.07_{6}$,$\mathbf{1.4_{1}}$,$1.46_{2}$,$1.78_{3}$,$1.8_{4}$,$2.81_{7}$,$2.84_{8}$
resnet110,cats vs rest,$3.69_{8}$,$3.65_{7}$,$2.58_{2}$,$\mathbf{2.52_{1}}$,$3.31_{6}$,$3.29_{5}$,$3.09_{3}$,$3.16_{4}$
resnet110,dogs vs rest,$3.21_{3}$,$3.62_{4}$,$\mathbf{2.64_{1}}$,$2.76_{2}$,$3.8_{5}$,$3.96_{6}$,$4.26_{8}$,$4.24_{7}$
resnet110,confidence,$4.29_{5}$,$4.58_{6}$,$\mathbf{3.98_{1}}$,$4.11_{2}$,$4.17_{3}$,$4.29_{4}$,$5.56_{7}$,$6.68_{8}$
densenet40,cars vs rest,$1.44_{3}$,$1.47_{4}$,$1.43_{2}$,$\mathbf{1.41_{1}}$,$2.14_{5}$,$2.14_{6}$,$2.26_{7}$,$2.31_{8}$
densenet40,cats vs rest,$3.31_{3}$,$3.58_{6}$,$3.17_{2}$,$\mathbf{3.11_{1}}$,$3.49_{5}$,$3.45_{4}$,$4.71_{7}$,$4.75_{8}$
densenet40,dogs vs rest,$\mathbf{2.66_{1}}$,$2.93_{4}$,$3.16_{6}$,$3.12_{5}$,$2.91_{2}$,$2.92_{3}$,$3.69_{7}$,$4.01_{8}$
densenet40,confidence,$5.69_{4}$,$6.02_{6}$,$6.0_{5}$,$5.4_{2}$,$5.53_{3}$,$\mathbf{5.31_{1}}$,$6.41_{7}$,$6.7_{8}$
wide32,cars vs rest,$1.68_{5}$,$1.73_{6}$,$1.48_{2}$,$\mathbf{1.46_{1}}$,$1.63_{4}$,$1.62_{3}$,$2.76_{7}$,$2.85_{8}$
wide32,cats vs rest,$3.63_{5}$,$3.93_{6}$,$\mathbf{3.02_{1}}$,$3.11_{2}$,$3.4_{3}$,$3.44_{4}$,$4.35_{7}$,$4.86_{8}$


\begin{table}
\caption{ECE_abs_trick}
\label{table:ECE_abs_trick}
\centering
\begin{adjustbox}{width=0.8\textwidth}
\begin{tabular}{llllllllll}
\toprule
       & binning & $PL_{NN}^{CE}-notrick$ & $PL_{NN}^{CE}-trick$ &   $PL3^{CE}-notrick$ &     $PL3^{CE}-trick$ & $PL_{DE}-notrick$ &            $PL_{DE}$ & $ES_{CV}-notrick$ &   $ES_{CV}$ \\
Model & {} &                        &                      &                      &                      &                   &                      &                   &             \\
\midrule
resnet110 & cars vs rest &             $1.94_{5}$ &           $2.07_{6}$ &   $\mathbf{1.4_{1}}$ &           $1.46_{2}$ &        $1.78_{3}$ &            $1.8_{4}$ &        $2.81_{7}$ &  $2.84_{8}$ \\
       & cats vs rest &             $3.69_{8}$ &           $3.65_{7}$ &           $2.58_{2}$ &  $\mathbf{2.52_{1}}$ &        $3.31_{6}$ &           $3.29_{5}$ &        $3.09_{3}$ &  $3.16_{4}$ \\
       & dogs vs rest &             $3.21_{3}$ &           $3.62_{4

## Std table of ECE, Chat_d_c

In [18]:
cifar_table("chat_dist_c_std.tex", binnings1, is_std = True)

Unnamed: 0_level_0,binning,$ES_{10}$,$ES_{15}$,$ES_{20}$,$ES_{25}$,$ES_{30}$,$ES_{sweep}$,$ES_{CV}$,$PL3^{CE}$,$PL3^{MSE}$,$PL_{NN}^{CE}$,$PL_{NN}^{MSE}$,$PL_{DE}$,Platt,beta,isotonic,KDE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
resnet110,cars vs rest,$0.42_{5}$,$0.29_{2}$,$0.38_{3}$,$0.49_{8}$,$0.42_{6}$,$0.62_{11}$,$0.53_{9}$,$0.54_{10}$,$0.64_{12}$,$0.39_{4}$,$0.71_{14}$,$0.66_{13}$,$0.77_{15}$,$0.47_{7}$,$\mathbf{0.16_{1}}$,$4.94_{16}$
resnet110,cats vs rest,$0.83_{10}$,$0.51_{3}$,$0.71_{7}$,$\mathbf{0.39_{1}}$,$0.55_{4}$,$0.98_{14}$,$0.75_{9}$,$0.9_{11}$,$1.13_{15}$,$0.68_{6}$,$0.68_{5}$,$0.94_{13}$,$0.9_{12}$,$0.73_{8}$,$0.41_{2}$,$1.27_{16}$
resnet110,dogs vs rest,$1.39_{10}$,$1.0_{5}$,$1.94_{15}$,$1.61_{13}$,$1.88_{14}$,$0.88_{3}$,$2.22_{16}$,$0.96_{4}$,$1.56_{12}$,$\mathbf{0.69_{1}}$,$1.18_{8}$,$1.22_{9}$,$0.79_{2}$,$1.02_{6}$,$1.15_{7}$,$1.39_{11}$
resnet110,confidence,$0.97_{7}$,$1.52_{11}$,$1.86_{14}$,$1.69_{12}$,$1.76_{13}$,$0.91_{5}$,$3.05_{16}$,$0.57_{3}$,$1.11_{8}$,$0.93_{6}$,$1.14_{9}$,$1.95_{15}$,$\mathbf{0.37_{1}}$,$0.38_{2}$,$1.2_{10}$,$0.74_{4}$
densenet40,cars vs rest,$2.52_{15}$,$0.97_{11}$,$2.09_{13}$,$2.01_{12}$,$2.2_{14}$,$0.83_{9}$,$0.76_{8}$,$0.65_{6}$,$0.47_{4}$,$0.34_{3}$,$0.51_{5}$,$0.9_{10}$,$0.33_{2}$,$0.65_{7}$,$\mathbf{0.29_{1}}$,$3.86_{16}$
densenet40,cats vs rest,$1.37_{11}$,$1.38_{12}$,$0.73_{6}$,$1.67_{14}$,$1.54_{13}$,$1.8_{15}$,$0.77_{8}$,$0.58_{4}$,$1.3_{10}$,$\mathbf{0.3_{1}}$,$0.35_{2}$,$0.56_{3}$,$0.78_{9}$,$0.68_{5}$,$0.74_{7}$,$3.57_{16}$
densenet40,dogs vs rest,$1.17_{9}$,$2.23_{14}$,$1.63_{12}$,$2.71_{15}$,$1.47_{11}$,$0.9_{5}$,$1.88_{13}$,$0.95_{8}$,$1.47_{10}$,$0.91_{7}$,$0.9_{6}$,$0.89_{4}$,$\mathbf{0.61_{1}}$,$0.7_{2}$,$0.74_{3}$,$2.88_{16}$
densenet40,confidence,$1.78_{6}$,$1.68_{4}$,$1.76_{5}$,$\mathbf{1.37_{1}}$,$1.66_{3}$,$2.33_{13}$,$2.74_{15}$,$2.17_{10}$,$2.53_{14}$,$3.13_{16}$,$2.26_{12}$,$2.23_{11}$,$2.08_{9}$,$1.96_{7}$,$1.96_{8}$,$1.57_{2}$
wide32,cars vs rest,$0.7_{6}$,$0.59_{4}$,$0.42_{3}$,$0.92_{13}$,$0.8_{8}$,$\mathbf{0.37_{1}}$,$1.01_{15}$,$0.85_{9}$,$0.99_{14}$,$0.41_{2}$,$0.91_{12}$,$0.85_{10}$,$0.73_{7}$,$0.86_{11}$,$0.69_{5}$,$5.71_{16}$
wide32,cats vs rest,$0.49_{5}$,$0.91_{12}$,$0.74_{8}$,$1.3_{16}$,$0.69_{7}$,$0.8_{10}$,$0.76_{9}$,$0.81_{11}$,$1.02_{13}$,$0.41_{4}$,$\mathbf{0.22_{1}}$,$1.15_{15}$,$0.59_{6}$,$0.28_{2}$,$0.34_{3}$,$1.03_{14}$


\begin{table}
\caption{chat_dist_c_std}
\label{table:chat_dist_c_std}
\centering
\begin{adjustbox}{width=0.8\textwidth}
\begin{tabular}{llllllllllllllllll}
\toprule
       & binning &    $ES_{10}$ &    $ES_{15}$ &    $ES_{20}$ &            $ES_{25}$ &    $ES_{30}$ &         $ES_{sweep}$ &    $ES_{CV}$ &   $PL3^{CE}$ &  $PL3^{MSE}$ &       $PL_{NN}^{CE}$ &      $PL_{NN}^{MSE}$ &    $PL_{DE}$ &                Platt &                 beta &             isotonic &          KDE \\
Model & {} &              &              &              &                      &              &                      &              &              &              &                      &                      &              &                      &                      &                      &              \\
\midrule
resnet110 & cars vs rest &   $0.42_{5}$ &   $0.29_{2}$ &   $0.38_{3}$ &           $0.49_{8}$ &   $0.42_{6}$ &          $0.62_{11}$ &   $0.53_{9}$ &  $0.54_{10}$ &  $0.64_{12}$ &           $0.39_{4}$ &

In [19]:
cifar_table("ECE_abs_std.tex", binnings1, metric="ECE_abs", is_std = True)

Unnamed: 0_level_0,binning,$ES_{10}$,$ES_{15}$,$ES_{20}$,$ES_{25}$,$ES_{30}$,$ES_{sweep}$,$ES_{CV}$,$PL3^{CE}$,$PL3^{MSE}$,$PL_{NN}^{CE}$,$PL_{NN}^{MSE}$,$PL_{DE}$,Platt,beta,isotonic,KDE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
resnet110,cars vs rest,$0.73_{9}$,$0.65_{6}$,$0.66_{7}$,$0.6_{5}$,$0.72_{8}$,$1.13_{14}$,$1.46_{15}$,$0.5_{4}$,$0.81_{12}$,$0.41_{2}$,$\mathbf{0.33_{1}}$,$0.74_{10}$,$0.84_{13}$,$0.78_{11}$,$0.5_{3}$,$4.96_{16}$
resnet110,cats vs rest,$1.14_{12}$,$0.71_{4}$,$0.76_{6}$,$0.92_{10}$,$0.67_{3}$,$1.22_{14}$,$1.69_{16}$,$0.89_{8}$,$0.67_{2}$,$1.12_{11}$,$1.16_{13}$,$1.38_{15}$,$0.9_{9}$,$0.75_{5}$,$\mathbf{0.43_{1}}$,$0.83_{7}$
resnet110,dogs vs rest,$1.35_{13}$,$0.58_{3}$,$1.41_{14}$,$0.89_{8}$,$1.43_{15}$,$1.19_{10}$,$1.26_{11}$,$0.63_{5}$,$1.65_{16}$,$0.39_{2}$,$0.69_{6}$,$1.08_{9}$,$\mathbf{0.29_{1}}$,$0.59_{4}$,$1.29_{12}$,$0.83_{7}$
resnet110,confidence,$0.86_{3}$,$1.28_{7}$,$1.16_{5}$,$1.72_{14}$,$2.9_{16}$,$1.37_{10}$,$1.86_{15}$,$1.51_{11}$,$1.24_{6}$,$1.36_{9}$,$0.78_{2}$,$1.69_{13}$,$1.68_{12}$,$1.04_{4}$,$1.32_{8}$,$\mathbf{0.51_{1}}$
densenet40,cars vs rest,$0.61_{8}$,$0.72_{13}$,$0.63_{11}$,$0.59_{6}$,$0.54_{4}$,$0.76_{14}$,$0.64_{12}$,$0.6_{7}$,$0.63_{10}$,$\mathbf{0.2_{1}}$,$0.3_{3}$,$0.88_{15}$,$0.55_{5}$,$0.63_{9}$,$0.28_{2}$,$3.85_{16}$
densenet40,cats vs rest,$0.69_{5}$,$1.02_{12}$,$\mathbf{0.35_{1}}$,$0.94_{11}$,$0.87_{9}$,$1.21_{13}$,$1.62_{14}$,$0.7_{6}$,$1.71_{15}$,$0.72_{8}$,$0.6_{3}$,$0.71_{7}$,$0.92_{10}$,$0.61_{4}$,$0.53_{2}$,$3.43_{16}$
densenet40,dogs vs rest,$0.83_{6}$,$0.89_{8}$,$1.05_{12}$,$1.14_{14}$,$1.1_{13}$,$0.88_{7}$,$\mathbf{0.36_{1}}$,$1.04_{11}$,$2.04_{15}$,$0.75_{5}$,$0.92_{9}$,$0.72_{4}$,$0.57_{3}$,$0.37_{2}$,$1.03_{10}$,$2.92_{16}$
densenet40,confidence,$2.03_{14}$,$1.66_{8}$,$2.18_{15}$,$\mathbf{1.03_{1}}$,$1.3_{3}$,$1.71_{9}$,$1.98_{13}$,$1.51_{5}$,$2.2_{16}$,$1.58_{6}$,$1.94_{12}$,$1.76_{10}$,$1.76_{11}$,$1.63_{7}$,$1.48_{4}$,$1.06_{2}$
wide32,cars vs rest,$0.76_{13}$,$0.77_{14}$,$0.67_{9}$,$0.61_{4}$,$0.63_{5}$,$0.92_{15}$,$0.46_{3}$,$0.63_{6}$,$0.65_{8}$,$0.3_{2}$,$\mathbf{0.18_{1}}$,$0.72_{10}$,$0.74_{12}$,$0.65_{7}$,$0.73_{11}$,$5.71_{16}$
wide32,cats vs rest,$0.91_{12}$,$0.51_{5}$,$0.45_{3}$,$0.66_{7}$,$0.43_{2}$,$1.65_{15}$,$2.8_{16}$,$0.45_{4}$,$0.82_{10}$,$1.31_{14}$,$1.15_{13}$,$0.87_{11}$,$0.69_{9}$,$\mathbf{0.24_{1}}$,$0.61_{6}$,$0.67_{8}$


\begin{table}
\caption{ECE_abs_std}
\label{table:ECE_abs_std}
\centering
\begin{adjustbox}{width=0.8\textwidth}
\begin{tabular}{llllllllllllllllll}
\toprule
       & binning &    $ES_{10}$ &    $ES_{15}$ &            $ES_{20}$ &            $ES_{25}$ &    $ES_{30}$ & $ES_{sweep}$ &            $ES_{CV}$ &   $PL3^{CE}$ &  $PL3^{MSE}$ &       $PL_{NN}^{CE}$ &      $PL_{NN}^{MSE}$ &    $PL_{DE}$ &                Platt &                 beta &             isotonic &                  KDE \\
Model & {} &              &              &                      &                      &              &              &                      &              &              &                      &                      &              &                      &                      &                      &                      \\
\midrule
resnet110 & cars vs rest &   $0.73_{9}$ &   $0.65_{6}$ &           $0.66_{7}$ &            $0.6_{5}$ &   $0.72_{8}$ &  $1.13_{14}$ &          $1.46_{15}$ &    $0.5_{4}$ &  $0.8

## Cgt's

In [20]:
cifar_table("chat_dist_c_cgt0.tex", binnings1, cgt_nr = 0)

Unnamed: 0_level_0,binning,$ES_{10}$,$ES_{15}$,$ES_{20}$,$ES_{25}$,$ES_{30}$,$ES_{sweep}$,$ES_{CV}$,$PL3^{CE}$,$PL3^{MSE}$,$PL_{NN}^{CE}$,$PL_{NN}^{MSE}$,$PL_{DE}$,Platt,beta,isotonic,KDE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
resnet110,cars vs rest,$4.42_{13}$,$3.76_{6}$,$3.67_{4}$,$3.98_{8}$,$3.75_{5}$,$4.63_{14}$,$3.99_{9}$,$3.66_{3}$,$4.7_{15}$,$3.4_{2}$,$3.81_{7}$,$4.08_{10}$,$4.24_{12}$,$\mathbf{3.23_{1}}$,$4.15_{11}$,$101.42_{16}$
resnet110,cats vs rest,$8.78_{4}$,$10.26_{7}$,$10.32_{9}$,$10.74_{12}$,$10.77_{13}$,$11.03_{14}$,$10.13_{6}$,$7.98_{2}$,$9.75_{5}$,$\mathbf{7.75_{1}}$,$8.6_{3}$,$10.3_{8}$,$11.46_{15}$,$10.42_{10}$,$10.52_{11}$,$37.34_{16}$
resnet110,dogs vs rest,$8.13_{6}$,$9.13_{11}$,$9.14_{12}$,$9.7_{13}$,$10.95_{15}$,$8.49_{7}$,$9.02_{10}$,$7.01_{2}$,$8.92_{8}$,$\mathbf{6.39_{1}}$,$7.67_{3}$,$8.95_{9}$,$7.93_{5}$,$7.9_{4}$,$9.98_{14}$,$27.16_{16}$
resnet110,confidence,$12.4_{9}$,$14.02_{11}$,$15.42_{13}$,$17.35_{14}$,$18.9_{15}$,$11.61_{7}$,$13.6_{10}$,$10.84_{4}$,$11.41_{6}$,$\mathbf{9.54_{1}}$,$10.54_{3}$,$11.61_{8}$,$11.24_{5}$,$10.15_{2}$,$15.09_{12}$,$23.08_{16}$
densenet40,cars vs rest,$7.28_{15}$,$4.77_{8}$,$5.0_{13}$,$5.0_{12}$,$4.8_{9}$,$5.16_{14}$,$4.42_{6}$,$3.86_{4}$,$4.89_{11}$,$\mathbf{3.23_{1}}$,$3.82_{3}$,$4.73_{7}$,$3.93_{5}$,$3.78_{2}$,$4.87_{10}$,$126.62_{16}$
densenet40,cats vs rest,$8.73_{6}$,$10.88_{13}$,$9.43_{10}$,$10.36_{12}$,$11.58_{15}$,$9.91_{11}$,$8.92_{7}$,$7.67_{3}$,$9.09_{8}$,$\mathbf{6.69_{1}}$,$7.31_{2}$,$8.38_{4}$,$9.09_{9}$,$8.66_{5}$,$11.04_{14}$,$74.37_{16}$
densenet40,dogs vs rest,$8.51_{3}$,$9.21_{8}$,$9.44_{10}$,$10.09_{11}$,$10.8_{14}$,$9.16_{6}$,$10.14_{12}$,$9.18_{7}$,$10.23_{13}$,$\mathbf{7.56_{1}}$,$8.57_{4}$,$9.33_{9}$,$8.66_{5}$,$8.42_{2}$,$11.45_{15}$,$65.83_{16}$
densenet40,confidence,$12.19_{9}$,$14.14_{11}$,$17.02_{13}$,$18.11_{14}$,$19.9_{15}$,$10.8_{2}$,$12.29_{10}$,$10.93_{3}$,$12.01_{8}$,$\mathbf{10.54_{1}}$,$11.38_{6}$,$11.24_{5}$,$11.69_{7}$,$11.04_{4}$,$14.93_{12}$,$21.8_{16}$
wide32,cars vs rest,$4.32_{9}$,$3.85_{2}$,$3.85_{3}$,$3.91_{5}$,$3.88_{4}$,$4.33_{10}$,$4.12_{7}$,$4.77_{12}$,$5.91_{15}$,$\mathbf{3.43_{1}}$,$4.08_{6}$,$4.61_{11}$,$5.42_{14}$,$4.3_{8}$,$5.12_{13}$,$171.31_{16}$
wide32,cats vs rest,$8.78_{5}$,$8.89_{7}$,$9.23_{9}$,$10.72_{15}$,$10.4_{13}$,$9.13_{8}$,$8.71_{4}$,$7.83_{2}$,$9.64_{12}$,$\mathbf{7.71_{1}}$,$8.31_{3}$,$9.25_{10}$,$9.27_{11}$,$8.79_{6}$,$10.7_{14}$,$43.02_{16}$


\begin{table}
\caption{chat_dist_c_cgt0}
\label{table:chat_dist_c_cgt0}
\centering
\begin{adjustbox}{width=0.8\textwidth}
\begin{tabular}{llllllllllllllllll}
\toprule
       & binning &    $ES_{10}$ &     $ES_{15}$ &     $ES_{20}$ &     $ES_{25}$ &     $ES_{30}$ &  $ES_{sweep}$ &     $ES_{CV}$ &   $PL3^{CE}$ &   $PL3^{MSE}$ &        $PL_{NN}^{CE}$ & $PL_{NN}^{MSE}$ &     $PL_{DE}$ &         Platt &                 beta &      isotonic &            KDE \\
Model & {} &              &               &               &               &               &               &               &              &               &                       &                 &               &               &                      &               &                \\
\midrule
resnet110 & cars vs rest &  $4.42_{13}$ &    $3.76_{6}$ &    $3.67_{4}$ &    $3.98_{8}$ &    $3.75_{5}$ &   $4.63_{14}$ &    $3.99_{9}$ &   $3.66_{3}$ &    $4.7_{15}$ &             $3.4_{2}$ &      $3.81_{7}$ &   $4.08_{10}$ &   $4.24_{12}$ &  $\

In [21]:
cifar_table("chat_dist_c_cgt1.tex", binnings1, cgt_nr = 1)

Unnamed: 0_level_0,binning,$ES_{10}$,$ES_{15}$,$ES_{20}$,$ES_{25}$,$ES_{30}$,$ES_{sweep}$,$ES_{CV}$,$PL3^{CE}$,$PL3^{MSE}$,$PL_{NN}^{CE}$,$PL_{NN}^{MSE}$,$PL_{DE}$,Platt,beta,isotonic,KDE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
resnet110,cars vs rest,$6.78_{14}$,$6.17_{8}$,$5.95_{4}$,$6.41_{12}$,$5.82_{3}$,$6.78_{13}$,$6.3_{10}$,$5.99_{5}$,$6.96_{15}$,$5.72_{2}$,$6.11_{7}$,$6.33_{11}$,$6.25_{9}$,$\mathbf{5.47_{1}}$,$6.08_{6}$,$103.08_{16}$
resnet110,cats vs rest,$10.46_{4}$,$11.65_{9}$,$11.73_{10}$,$12.31_{14}$,$12.19_{12}$,$12.2_{13}$,$11.43_{7}$,$9.46_{2}$,$11.33_{5}$,$\mathbf{9.2_{1}}$,$9.96_{3}$,$11.45_{8}$,$13.06_{15}$,$11.42_{6}$,$11.81_{11}$,$38.79_{16}$
resnet110,dogs vs rest,$10.19_{6}$,$11.04_{11}$,$11.06_{12}$,$11.57_{14}$,$12.69_{15}$,$10.46_{7}$,$10.97_{10}$,$9.06_{2}$,$10.91_{9}$,$\mathbf{8.6_{1}}$,$9.85_{3}$,$10.88_{8}$,$9.98_{5}$,$9.92_{4}$,$11.37_{13}$,$28.65_{16}$
resnet110,confidence,$13.17_{9}$,$14.79_{11}$,$16.19_{13}$,$18.09_{14}$,$19.57_{15}$,$12.53_{8}$,$14.37_{10}$,$11.59_{4}$,$12.36_{6}$,$\mathbf{10.38_{1}}$,$11.47_{3}$,$12.47_{7}$,$11.91_{5}$,$10.97_{2}$,$15.77_{12}$,$23.49_{16}$
densenet40,cars vs rest,$9.69_{15}$,$7.45_{13}$,$7.39_{12}$,$7.53_{14}$,$7.22_{10}$,$7.1_{8}$,$6.64_{6}$,$6.35_{5}$,$7.26_{11}$,$\mathbf{6.02_{1}}$,$6.34_{4}$,$7.18_{9}$,$6.25_{3}$,$6.15_{2}$,$7.04_{7}$,$127.57_{16}$
densenet40,cats vs rest,$11.21_{6}$,$13.24_{14}$,$11.49_{10}$,$12.3_{11}$,$13.43_{15}$,$12.33_{12}$,$11.25_{7}$,$10.3_{3}$,$11.33_{8}$,$\mathbf{9.53_{1}}$,$10.0_{2}$,$10.88_{5}$,$11.42_{9}$,$10.82_{4}$,$12.97_{13}$,$75.79_{16}$
densenet40,dogs vs rest,$10.69_{4}$,$11.01_{6}$,$11.32_{9}$,$11.82_{11}$,$12.51_{14}$,$11.13_{8}$,$12.09_{13}$,$11.1_{7}$,$11.88_{12}$,$\mathbf{9.69_{1}}$,$10.8_{5}$,$11.38_{10}$,$10.59_{3}$,$10.46_{2}$,$12.7_{15}$,$66.95_{16}$
densenet40,confidence,$13.28_{9}$,$15.05_{11}$,$17.82_{13}$,$18.8_{14}$,$20.54_{15}$,$12.1_{4}$,$13.29_{10}$,$11.86_{2}$,$13.13_{8}$,$\mathbf{11.22_{1}}$,$12.28_{6}$,$12.25_{5}$,$12.8_{7}$,$12.07_{3}$,$15.53_{12}$,$22.23_{16}$
wide32,cars vs rest,$6.55_{5}$,$6.56_{6}$,$6.5_{4}$,$6.8_{11}$,$6.83_{12}$,$6.87_{13}$,$6.66_{7}$,$6.75_{10}$,$7.82_{15}$,$\mathbf{5.78_{1}}$,$6.25_{3}$,$6.67_{8}$,$7.37_{14}$,$6.22_{2}$,$6.74_{9}$,$171.61_{16}$
wide32,cats vs rest,$10.24_{7}$,$10.07_{5}$,$10.55_{9}$,$11.98_{15}$,$11.68_{14}$,$10.41_{8}$,$10.15_{6}$,$9.42_{2}$,$10.93_{12}$,$\mathbf{9.39_{1}}$,$9.8_{3}$,$10.61_{10}$,$10.66_{11}$,$9.82_{4}$,$11.53_{13}$,$44.0_{16}$


\begin{table}
\caption{chat_dist_c_cgt1}
\label{table:chat_dist_c_cgt1}
\centering
\begin{adjustbox}{width=0.8\textwidth}
\begin{tabular}{llllllllllllllllll}
\toprule
       & binning &    $ES_{10}$ &     $ES_{15}$ &     $ES_{20}$ &     $ES_{25}$ &     $ES_{30}$ &  $ES_{sweep}$ &     $ES_{CV}$ &    $PL3^{CE}$ &   $PL3^{MSE}$ &        $PL_{NN}^{CE}$ & $PL_{NN}^{MSE}$ &     $PL_{DE}$ &         Platt &                 beta &      isotonic &            KDE \\
Model & {} &              &               &               &               &               &               &               &               &               &                       &                 &               &               &                      &               &                \\
\midrule
resnet110 & cars vs rest &  $6.78_{14}$ &    $6.17_{8}$ &    $5.95_{4}$ &   $6.41_{12}$ &    $5.82_{3}$ &   $6.78_{13}$ &    $6.3_{10}$ &    $5.99_{5}$ &   $6.96_{15}$ &            $5.72_{2}$ &      $6.11_{7}$ &   $6.33_{11}$ &    $6.25_{9}$ & 

## Different datasizes

In [22]:
def cifar_table_ndata(fname, binnings, metric = "c_hat_distance_c", 
                n_datas = [1000, 3000, 10000],
               cgt_nr = 3, is_std = False, check_cal=True):

    df_cifar5m = get_cifar5m()
    df_cifar5m = df_cifar5m[(df_cifar5m.binning.isin(binnings))]
    
    if check_cal:
        cal_sub = ['beta', 'VecS', 'logplatt','PW6logIOCE', 'ScalingBinning', 'Isotonic']
        df_cifar5m = df_cifar5m[df_cifar5m.calibration_function.isin(cal_sub)]

    df = pd.DataFrame()
    
    ranks = pd.DataFrame()

    # Construct the table row by row
    # For each row collect the data and the rankings (we will add them together later)
    for model_group in [['resnet110'], ['densenet40'], ['wide32']]:

        for n_data in n_datas:

            df_sel = df_cifar5m[(df_cifar5m.cgt_nr == cgt_nr) # isotonic ground truth
                                &(df_cifar5m.model_name.isin(model_group))
                                &(df_cifar5m.n_data == n_data)
                               ]
            # Data
            if is_std:
                df_row = (df_sel
                          .groupby(["binning", "seed"]).mean()
                          .groupby(["binning"]).std()
                          [[metric]].T)
                df_row = np.round(df_row*1000, 2)

                # Ranks
                rank_row = (df_sel
                          .groupby(["binning", "seed"]).mean()
                          .groupby(["binning"]).std().rank()
                             [[metric]].T)
                
            else:
            
                df_row = (df_sel
                          .groupby(["binning"]).mean()
                          [[metric]].T)
                df_row = np.round(df_row*1000, 2)

                # Ranks
                rank_row = (df_sel
                             .groupby(["binning"]).mean()
                             .rank()
                             [[metric]].T)

            df = pd.concat([df, df_row])
            ranks = pd.concat([ranks, rank_row])
            
    # Combine ranks and data
    for row_idx in range(len(df)):
        for column_idx in range(len(df.iloc[row_idx])):

            item = df.iloc[row_idx, column_idx]
            rank = ranks.iloc[row_idx, column_idx]

            item = np.round(item, 4)

            df.iloc[row_idx, column_idx] = str(item) + "_{" + str(int(rank)) + "}"
            if rank == 1:
                df.iloc[row_idx, column_idx] = "\mathbf{" + df.iloc[row_idx, column_idx] + "}"
            df.iloc[row_idx, column_idx] = "$" + df.iloc[row_idx, column_idx] + "$"
            
    # Add column titles on the left
    df.index =  [1000, 3000, 10000]*3
    df = pd.concat([pd.concat({"resnet110": df[:3]}, names=['Model']),
         pd.concat({"densenet40": df[3:6]}, names=['Model']),
         pd.concat({"wide32": df[6:9]}, names=['Model'])])
    
    df = df[binnings] 
    
    avg_ranks = ranks.mean()[binnings].values
    avg_ranks = ["$%0.1f$" % rnk for rnk in avg_ranks]
    df = pd.concat([df, pd.DataFrame([avg_ranks], index=[("","avg rank")], columns=df.columns)])
    
    display(df)
    
    capt_str = fname.split(".")[0]

    begin_list = ["\\begin{table}","\\caption{%s}" % capt_str,   "\\label{table:%s}" % capt_str, "\\centering", "\\begin{adjustbox}{width=0.8\\textwidth}"]
    hline_str = "\\hline"
    end_list = ["\\end{adjustbox}", "\\end{table}"]

    with pd.option_context("max_colwidth", 25):
        with open(f'tables/{fname}','w') as tf:
            output = df.to_latex(escape=False)
            splitted = output.split("\n")
            output_new = "\n".join(np.concatenate([begin_list, splitted[:-4], [hline_str], splitted[-4:-1], end_list]))
            tf.write(output_new)
            print(output_new)
            

In [23]:
cifar_table_ndata("chat_dist_c_ndata.tex", binnings1)

Unnamed: 0_level_0,binning,$ES_{10}$,$ES_{15}$,$ES_{20}$,$ES_{25}$,$ES_{30}$,$ES_{sweep}$,$ES_{CV}$,$PL3^{CE}$,$PL3^{MSE}$,$PL_{NN}^{CE}$,$PL_{NN}^{MSE}$,$PL_{DE}$,Platt,beta,isotonic,KDE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
resnet110,1000,$11.98_{8}$,$13.29_{11}$,$14.16_{13}$,$16.04_{14}$,$17.05_{15}$,$11.93_{7}$,$13.04_{10}$,$9.64_{2}$,$12.13_{9}$,$\mathbf{8.61_{1}}$,$10.12_{3}$,$11.66_{6}$,$10.68_{5}$,$10.18_{4}$,$13.98_{12}$,$51.75_{16}$
resnet110,3000,$7.83_{5}$,$8.49_{11}$,$9.09_{13}$,$9.33_{14}$,$9.89_{15}$,$8.16_{7}$,$8.32_{10}$,$6.89_{3}$,$8.16_{6}$,$\mathbf{6.7_{1}}$,$7.44_{4}$,$8.24_{9}$,$8.24_{8}$,$6.89_{2}$,$8.79_{12}$,$46.42_{16}$
resnet110,10000,$5.91_{5}$,$6.51_{11}$,$6.08_{6}$,$6.37_{10}$,$6.69_{12}$,$7.02_{14}$,$6.3_{9}$,$5.47_{3}$,$5.83_{4}$,$\mathbf{4.78_{1}}$,$5.32_{2}$,$6.21_{7}$,$7.79_{15}$,$6.84_{13}$,$6.22_{8}$,$43.76_{16}$
densenet40,1000,$12.89_{11}$,$13.59_{12}$,$15.4_{13}$,$16.4_{14}$,$16.84_{15}$,$11.57_{8}$,$10.98_{5}$,$10.74_{4}$,$12.17_{9}$,$\mathbf{8.06_{1}}$,$9.64_{3}$,$11.02_{6}$,$11.21_{7}$,$9.53_{2}$,$12.88_{10}$,$77.23_{16}$
densenet40,3000,$8.84_{11}$,$9.22_{12}$,$9.52_{13}$,$10.5_{14}$,$11.44_{15}$,$7.6_{8}$,$8.54_{9}$,$6.21_{2}$,$7.49_{7}$,$\mathbf{5.71_{1}}$,$6.54_{3}$,$6.77_{5}$,$6.82_{6}$,$6.71_{4}$,$8.73_{10}$,$69.87_{16}$
densenet40,10000,$6.52_{11}$,$7.14_{14}$,$6.22_{10}$,$6.75_{13}$,$7.33_{15}$,$6.59_{12}$,$5.6_{6}$,$4.29_{2}$,$4.92_{5}$,$\mathbf{4.17_{1}}$,$4.48_{3}$,$4.8_{4}$,$5.76_{7}$,$5.82_{8}$,$5.83_{9}$,$66.78_{16}$
wide32,1000,$11.91_{9}$,$13.13_{12}$,$14.94_{13}$,$16.72_{14}$,$17.12_{15}$,$10.82_{8}$,$10.45_{5}$,$10.3_{4}$,$12.5_{11}$,$\mathbf{7.38_{1}}$,$8.83_{2}$,$10.58_{6}$,$10.74_{7}$,$9.19_{3}$,$12.11_{10}$,$75.19_{16}$
wide32,3000,$6.87_{5}$,$7.67_{9}$,$7.97_{12}$,$9.59_{15}$,$9.33_{14}$,$7.15_{7}$,$7.71_{11}$,$6.82_{4}$,$7.69_{10}$,$\mathbf{6.23_{1}}$,$6.6_{2}$,$7.03_{6}$,$7.26_{8}$,$6.8_{3}$,$8.44_{13}$,$68.73_{16}$
wide32,10000,$6.4_{9}$,$6.2_{8}$,$6.56_{12}$,$6.44_{10}$,$7.11_{15}$,$6.7_{13}$,$5.74_{6}$,$5.04_{2}$,$5.62_{5}$,$\mathbf{4.81_{1}}$,$5.25_{3}$,$5.45_{4}$,$6.99_{14}$,$6.47_{11}$,$5.86_{7}$,$66.4_{16}$
,avg rank,$8.2$,$11.1$,$11.7$,$13.1$,$14.6$,$9.3$,$7.9$,$2.9$,$7.3$,$1.0$,$2.8$,$5.9$,$8.6$,$5.6$,$10.1$,$16.0$


\begin{table}
\caption{chat_dist_c_ndata}
\label{table:chat_dist_c_ndata}
\centering
\begin{adjustbox}{width=0.8\textwidth}
\begin{tabular}{llllllllllllllllll}
\toprule
       & binning &     $ES_{10}$ &     $ES_{15}$ &     $ES_{20}$ &     $ES_{25}$ &     $ES_{30}$ & $ES_{sweep}$ &     $ES_{CV}$ &   $PL3^{CE}$ &  $PL3^{MSE}$ &       $PL_{NN}^{CE}$ & $PL_{NN}^{MSE}$ &    $PL_{DE}$ &        Platt &         beta &      isotonic &           KDE \\
Model & {} &               &               &               &               &               &              &               &              &              &                      &                 &              &              &              &               &               \\
\midrule
resnet110 & 1000 &   $11.98_{8}$ &  $13.29_{11}$ &  $14.16_{13}$ &  $16.04_{14}$ &  $17.05_{15}$ &  $11.93_{7}$ &  $13.04_{10}$ &   $9.64_{2}$ &  $12.13_{9}$ &  $\mathbf{8.61_{1}}$ &     $10.12_{3}$ &  $11.66_{6}$ &  $10.68_{5}$ &  $10.18_{4}$ &  $13.98_{12}$ &  $51.75_

In [24]:
cifar_table_ndata("ECE_abs_ndata.tex", binnings1, metric="ECE_abs")

Unnamed: 0_level_0,binning,$ES_{10}$,$ES_{15}$,$ES_{20}$,$ES_{25}$,$ES_{30}$,$ES_{sweep}$,$ES_{CV}$,$PL3^{CE}$,$PL3^{MSE}$,$PL_{NN}^{CE}$,$PL_{NN}^{MSE}$,$PL_{DE}$,Platt,beta,isotonic,KDE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
resnet110,1000,$4.39_{6}$,$4.09_{3}$,$4.63_{7}$,$4.85_{10}$,$5.5_{12}$,$4.34_{5}$,$6.93_{15}$,$3.79_{2}$,$5.58_{13}$,$5.15_{11}$,$4.84_{9}$,$4.69_{8}$,$4.26_{4}$,$\mathbf{3.12_{1}}$,$6.82_{14}$,$43.48_{16}$
resnet110,3000,$2.46_{3}$,$2.5_{4}$,$\mathbf{2.22_{1}}$,$2.45_{2}$,$2.53_{5}$,$3.7_{12}$,$3.77_{14}$,$2.63_{7}$,$3.51_{11}$,$3.16_{10}$,$3.14_{9}$,$3.03_{8}$,$4.05_{15}$,$2.58_{6}$,$3.71_{13}$,$39.81_{16}$
resnet110,10000,$1.97_{7}$,$2.03_{11}$,$1.7_{3}$,$\mathbf{1.39_{1}}$,$1.57_{2}$,$2.53_{13}$,$1.99_{8}$,$1.73_{4}$,$2.0_{9}$,$1.87_{6}$,$2.02_{10}$,$2.29_{12}$,$3.64_{15}$,$2.87_{14}$,$1.87_{5}$,$38.07_{16}$
densenet40,1000,$5.09_{5}$,$\mathbf{4.62_{1}}$,$5.66_{7}$,$5.91_{10}$,$6.56_{12}$,$4.78_{2}$,$6.98_{13}$,$5.76_{9}$,$7.56_{14}$,$5.24_{6}$,$4.78_{3}$,$5.66_{8}$,$6.2_{11}$,$4.81_{4}$,$8.59_{15}$,$71.22_{16}$
densenet40,3000,$2.55_{2}$,$\mathbf{2.41_{1}}$,$2.55_{3}$,$2.75_{6}$,$2.73_{5}$,$3.4_{11}$,$4.41_{14}$,$2.65_{4}$,$3.62_{12}$,$3.29_{10}$,$3.1_{9}$,$2.97_{8}$,$3.7_{13}$,$2.9_{7}$,$4.66_{15}$,$64.68_{16}$
densenet40,10000,$1.67_{6}$,$1.67_{5}$,$1.58_{4}$,$\mathbf{1.42_{1}}$,$1.47_{2}$,$2.76_{14}$,$1.94_{10}$,$1.58_{3}$,$1.97_{11}$,$1.89_{9}$,$1.75_{8}$,$1.75_{7}$,$3.25_{15}$,$2.6_{13}$,$2.53_{12}$,$62.48_{16}$
wide32,1000,$\mathbf{3.85_{1}}$,$3.88_{2}$,$4.5_{5}$,$4.84_{7}$,$5.42_{11}$,$5.13_{10}$,$6.59_{14}$,$4.89_{8}$,$6.5_{13}$,$4.62_{6}$,$4.29_{3}$,$5.08_{9}$,$5.5_{12}$,$4.32_{4}$,$7.02_{15}$,$68.49_{16}$
wide32,3000,$2.85_{5}$,$2.44_{3}$,$2.44_{2}$,$2.47_{4}$,$\mathbf{2.39_{1}}$,$3.54_{12}$,$4.15_{15}$,$3.04_{6}$,$3.43_{10}$,$3.49_{11}$,$3.35_{8}$,$3.15_{7}$,$3.59_{13}$,$3.39_{9}$,$4.03_{14}$,$63.26_{16}$
wide32,10000,$2.17_{7}$,$2.13_{6}$,$1.91_{3}$,$1.87_{2}$,$\mathbf{1.86_{1}}$,$3.27_{14}$,$2.28_{8}$,$2.02_{4}$,$2.44_{10}$,$2.45_{11}$,$2.42_{9}$,$2.06_{5}$,$3.46_{15}$,$2.88_{13}$,$2.59_{12}$,$62.0_{16}$
,avg rank,$4.7$,$4.0$,$3.9$,$4.8$,$5.7$,$10.3$,$12.3$,$5.2$,$11.4$,$8.9$,$7.6$,$8.0$,$12.6$,$7.9$,$12.8$,$16.0$


\begin{table}
\caption{ECE_abs_ndata}
\label{table:ECE_abs_ndata}
\centering
\begin{adjustbox}{width=0.8\textwidth}
\begin{tabular}{llllllllllllllllll}
\toprule
       & binning &            $ES_{10}$ &            $ES_{15}$ &            $ES_{20}$ &            $ES_{25}$ &            $ES_{30}$ & $ES_{sweep}$ &    $ES_{CV}$ &  $PL3^{CE}$ &  $PL3^{MSE}$ & $PL_{NN}^{CE}$ & $PL_{NN}^{MSE}$ &    $PL_{DE}$ &        Platt &                 beta &     isotonic &           KDE \\
Model & {} &                      &                      &                      &                      &                      &              &              &             &              &                &                 &              &              &                      &              &               \\
\midrule
resnet110 & 1000 &           $4.39_{6}$ &           $4.09_{3}$ &           $4.63_{7}$ &          $4.85_{10}$ &           $5.5_{12}$ &   $4.34_{5}$ &  $6.93_{15}$ &  $3.79_{2}$ &  $5.58_{13}$ &    $5.15_{11}$ & 

## Compare calibration functions

In [25]:
def cifar_table_calfn(fname, binnings, metric = "c_hat_distance_c",
               cgt_nr = 3, is_std = False):

    df_cifar5m = get_cifar5m()
    df_cifar5m = df_cifar5m[(df_cifar5m.binning.isin(binnings))]

    df = pd.DataFrame()
    
    ranks = pd.DataFrame()
    
    cal_fns = df_cifar5m.calibration_function.unique()

    # Construct the table row by row
    # For each row collect the data and the rankings (we will add them together later)
    for cal_fn in cal_fns:


        df_sel = df_cifar5m[(df_cifar5m.cgt_nr == cgt_nr) # isotonic ground truth
                            &(df_cifar5m.calibration_function.isin([cal_fn]))
                            #&(df_cifar5m.n_data == n_data)
                           ]
        # Data
        if is_std:
            df_row = (df_sel
                      .groupby(["binning", "seed"]).mean()
                      .groupby(["binning"]).std()
                      [[metric]].T)
            df_row = np.round(df_row*1000, 2)

            # Ranks
            rank_row = (df_sel
                      .groupby(["binning", "seed"]).mean()
                      .groupby(["binning"]).std().rank()
                         [[metric]].T)

        else:

            df_row = (df_sel
                      .groupby(["binning"]).mean()
                      [[metric]].T)
            df_row = np.round(df_row*1000, 2)

            # Ranks
            rank_row = (df_sel
                         .groupby(["binning"]).mean()
                         .rank()
                         [[metric]].T)

        df = pd.concat([df, df_row])
        ranks = pd.concat([ranks, rank_row])
            
    # Combine ranks and data
    for row_idx in range(len(df)):
        for column_idx in range(len(df.iloc[row_idx])):

            item = df.iloc[row_idx, column_idx]
            rank = ranks.iloc[row_idx, column_idx]

            item = np.round(item, 4)

            df.iloc[row_idx, column_idx] = str(item) + "_{" + str(int(rank)) + "}"
            if rank == 1:
                df.iloc[row_idx, column_idx] = "\mathbf{" + df.iloc[row_idx, column_idx] + "}"
            df.iloc[row_idx, column_idx] = "$" + df.iloc[row_idx, column_idx] + "$"
            
    # Add column titles on the left
    df.index =  cal_fns
    #df = pd.concat([pd.concat({"resnet110": df[:3]}, names=['Model']),
    #     pd.concat({"densenet40": df[3:6]}, names=['Model']),
    #     pd.concat({"wide32": df[6:9]}, names=['Model'])])
    
    df = df[binnings] 
    
    avg_ranks = ranks.mean()[binnings].values
    avg_ranks = ["$%0.1f$" % rnk for rnk in avg_ranks]
    df = pd.concat([df, pd.DataFrame([avg_ranks], index=["avg rank"], columns=df.columns)])
    
    display(df)
    
    capt_str = fname.split(".")[0]

    begin_list = ["\\begin{table}","\\caption{%s}" % capt_str,   "\\label{table:%s}" % capt_str, "\\centering", "\\begin{adjustbox}{width=0.8\\textwidth}"]
    hline_str = "\\hline"
    end_list = ["\\end{adjustbox}", "\\end{table}"]

    with pd.option_context("max_colwidth", 25):
        with open(f'tables/{fname}','w') as tf:
            output = df.to_latex(escape=False)
            splitted = output.split("\n")
            output_new = "\n".join(np.concatenate([begin_list, splitted[:-4], [hline_str], splitted[-4:-1], end_list]))
            tf.write(output_new)
            print(output_new)
            

In [26]:
cifar_table_calfn("chat_dist_c_calfn.tex", binnings1)

binning,$ES_{10}$,$ES_{15}$,$ES_{20}$,$ES_{25}$,$ES_{30}$,$ES_{sweep}$,$ES_{CV}$,$PL3^{CE}$,$PL3^{MSE}$,$PL_{NN}^{CE}$,$PL_{NN}^{MSE}$,$PL_{DE}$,Platt,beta,isotonic,KDE
sweep,$20.11_{13}$,$19.77_{12}$,$17.83_{7}$,$18.34_{10}$,$18.96_{11}$,$20.96_{14}$,$17.16_{5}$,$15.57_{3}$,$15.77_{4}$,$17.45_{6}$,$18.0_{8}$,$14.68_{2}$,$27.64_{15}$,$18.28_{9}$,$\mathbf{9.34_{1}}$,$127.88_{16}$
dirODIR,$6.66_{5}$,$7.71_{8}$,$8.23_{11}$,$8.8_{13}$,$9.39_{14}$,$6.94_{6}$,$8.43_{12}$,$6.46_{4}$,$8.16_{10}$,$6.02_{3}$,$6.95_{7}$,$7.9_{9}$,$5.87_{2}$,$\mathbf{5.6_{1}}$,$9.56_{15}$,$33.13_{16}$
beta,$14.22_{12}$,$14.24_{13}$,$13.44_{11}$,$14.8_{15}$,$15.03_{16}$,$13.15_{10}$,$10.3_{6}$,$9.61_{4}$,$10.55_{7}$,$\mathbf{8.03_{1}}$,$8.99_{2}$,$9.65_{5}$,$12.0_{9}$,$11.08_{8}$,$9.32_{3}$,$14.45_{14}$
TempS,$7.92_{6}$,$8.93_{10}$,$9.08_{12}$,$9.71_{14}$,$10.16_{15}$,$8.51_{8}$,$8.72_{9}$,$6.44_{3}$,$7.97_{7}$,$6.81_{4}$,$7.7_{5}$,$8.96_{11}$,$6.15_{2}$,$\mathbf{5.84_{1}}$,$9.56_{13}$,$31.77_{16}$
VecS,$6.99_{5}$,$8.12_{9}$,$8.5_{12}$,$9.35_{13}$,$9.79_{15}$,$7.39_{7}$,$8.35_{11}$,$6.13_{4}$,$8.16_{10}$,$6.01_{3}$,$7.0_{6}$,$7.82_{8}$,$5.83_{2}$,$\mathbf{5.72_{1}}$,$9.64_{14}$,$36.94_{16}$
logplatt,$7.38_{6}$,$8.05_{10}$,$8.92_{12}$,$9.75_{14}$,$10.5_{15}$,$7.32_{5}$,$8.01_{9}$,$7.01_{3}$,$8.19_{11}$,$\mathbf{6.24_{1}}$,$7.05_{4}$,$7.81_{7}$,$7.94_{8}$,$6.68_{2}$,$9.43_{13}$,$17.09_{16}$
PW6logIOCE,$6.94_{6}$,$7.8_{11}$,$8.5_{12}$,$9.46_{13}$,$9.96_{15}$,$6.93_{5}$,$7.65_{9}$,$6.91_{4}$,$7.71_{10}$,$\mathbf{5.06_{1}}$,$5.95_{2}$,$7.01_{8}$,$6.94_{7}$,$6.85_{3}$,$9.51_{14}$,$136.49_{16}$
ScalingBinning,$7.45_{7}$,$8.05_{11}$,$9.14_{13}$,$9.65_{14}$,$10.26_{15}$,$7.29_{6}$,$7.64_{9}$,$6.39_{3}$,$8.05_{10}$,$\mathbf{5.75_{1}}$,$6.51_{4}$,$7.51_{8}$,$6.7_{5}$,$6.07_{2}$,$8.66_{12}$,$12.33_{16}$
Isotonic,$9.79_{10}$,$10.55_{11}$,$11.45_{13}$,$12.41_{14}$,$12.99_{15}$,$9.62_{9}$,$9.17_{7}$,$7.56_{3}$,$8.33_{5}$,$\mathbf{6.55_{1}}$,$7.32_{2}$,$8.05_{4}$,$10.92_{12}$,$9.22_{8}$,$8.67_{6}$,$160.12_{16}$
TempS1vsRest,$8.72_{9}$,$9.42_{12}$,$9.17_{10}$,$10.08_{14}$,$10.35_{15}$,$8.67_{8}$,$9.29_{11}$,$6.05_{2}$,$7.94_{6}$,$6.68_{4}$,$7.67_{5}$,$8.49_{7}$,$6.28_{3}$,$\mathbf{5.73_{1}}$,$9.51_{13}$,$31.63_{16}$


\begin{table}
\caption{chat_dist_c_calfn}
\label{table:chat_dist_c_calfn}
\centering
\begin{adjustbox}{width=0.8\textwidth}
\begin{tabular}{lllllllllllllllll}
\toprule
binning &     $ES_{10}$ &     $ES_{15}$ &     $ES_{20}$ &     $ES_{25}$ &     $ES_{30}$ &  $ES_{sweep}$ &    $ES_{CV}$ &    $PL3^{CE}$ &   $PL3^{MSE}$ &       $PL_{NN}^{CE}$ & $PL_{NN}^{MSE}$ &    $PL_{DE}$ &         Platt &                 beta &             isotonic &            KDE \\
\midrule
sweep          &  $20.11_{13}$ &  $19.77_{12}$ &   $17.83_{7}$ &  $18.34_{10}$ &  $18.96_{11}$ &  $20.96_{14}$ &  $17.16_{5}$ &   $15.57_{3}$ &   $15.77_{4}$ &          $17.45_{6}$ &      $18.0_{8}$ &  $14.68_{2}$ &  $27.64_{15}$ &          $18.28_{9}$ &  $\mathbf{9.34_{1}}$ &  $127.88_{16}$ \\
dirODIR        &    $6.66_{5}$ &    $7.71_{8}$ &   $8.23_{11}$ &    $8.8_{13}$ &   $9.39_{14}$ &    $6.94_{6}$ &  $8.43_{12}$ &    $6.46_{4}$ &   $8.16_{10}$ &           $6.02_{3}$ &      $6.95_{7}$ &    $7.9_{9}$ &    $5.87_{2}$ &   $\ma

In [27]:
cifar_table_calfn("ECE_abs_calfn.tex", binnings1, metric="ECE_abs")

binning,$ES_{10}$,$ES_{15}$,$ES_{20}$,$ES_{25}$,$ES_{30}$,$ES_{sweep}$,$ES_{CV}$,$PL3^{CE}$,$PL3^{MSE}$,$PL_{NN}^{CE}$,$PL_{NN}^{MSE}$,$PL_{DE}$,Platt,beta,isotonic,KDE
sweep,$6.95_{10}$,$6.49_{9}$,$3.64_{4}$,$3.8_{5}$,$3.28_{2}$,$11.21_{14}$,$3.59_{3}$,$5.41_{8}$,$5.11_{7}$,$8.42_{13}$,$7.41_{12}$,$4.44_{6}$,$16.62_{15}$,$7.16_{11}$,$\mathbf{2.84_{1}}$,$112.19_{16}$
dirODIR,$2.99_{5}$,$2.82_{3}$,$3.01_{6}$,$2.93_{4}$,$3.24_{8}$,$3.04_{7}$,$4.22_{13}$,$3.53_{10}$,$5.16_{14}$,$3.73_{11}$,$3.29_{9}$,$3.96_{12}$,$2.68_{2}$,$\mathbf{2.48_{1}}$,$5.19_{15}$,$27.62_{16}$
beta,$3.48_{8}$,$3.5_{9}$,$2.97_{2}$,$3.14_{4}$,$2.99_{3}$,$6.74_{15}$,$4.61_{13}$,$3.69_{10}$,$3.48_{7}$,$4.28_{12}$,$3.83_{11}$,$3.48_{6}$,$7.45_{16}$,$5.99_{14}$,$\mathbf{2.94_{1}}$,$3.3_{5}$
TempS,$3.21_{6}$,$3.13_{5}$,$3.11_{4}$,$3.35_{8}$,$3.28_{7}$,$3.69_{10}$,$4.07_{13}$,$2.99_{3}$,$4.18_{14}$,$3.85_{12}$,$3.54_{9}$,$3.84_{11}$,$\mathbf{2.53_{1}}$,$2.74_{2}$,$4.21_{15}$,$23.76_{16}$
VecS,$2.86_{3}$,$2.87_{4}$,$2.88_{5}$,$3.08_{7}$,$3.37_{10}$,$2.9_{6}$,$3.99_{13}$,$3.24_{8}$,$4.99_{14}$,$3.65_{12}$,$3.27_{9}$,$3.62_{11}$,$2.7_{2}$,$\mathbf{2.43_{1}}$,$5.11_{15}$,$31.28_{16}$
logplatt,$2.72_{3}$,$2.47_{2}$,$2.9_{5}$,$2.76_{4}$,$2.98_{8}$,$2.94_{7}$,$4.0_{14}$,$2.9_{6}$,$3.94_{13}$,$3.23_{9}$,$3.55_{12}$,$3.44_{10}$,$3.45_{11}$,$\mathbf{2.36_{1}}$,$5.15_{15}$,$12.44_{16}$
PW6logIOCE,$2.88_{3}$,$\mathbf{2.59_{1}}$,$3.18_{8}$,$3.08_{7}$,$3.56_{12}$,$2.84_{2}$,$4.41_{14}$,$3.41_{11}$,$4.27_{13}$,$3.04_{6}$,$2.94_{5}$,$3.34_{10}$,$3.26_{9}$,$2.91_{4}$,$5.82_{15}$,$133.33_{16}$
ScalingBinning,$2.86_{5}$,$2.8_{4}$,$3.12_{6}$,$3.35_{10}$,$3.68_{12}$,$3.19_{7}$,$4.67_{14}$,$2.74_{3}$,$4.53_{13}$,$3.29_{9}$,$3.19_{8}$,$3.53_{11}$,$2.72_{2}$,$\mathbf{2.25_{1}}$,$4.81_{15}$,$7.4_{16}$
Isotonic,$3.19_{6}$,$2.96_{2}$,$3.07_{5}$,$3.23_{8}$,$3.44_{10}$,$3.7_{11}$,$4.34_{14}$,$\mathbf{2.75_{1}}$,$3.2_{7}$,$3.29_{9}$,$3.01_{3}$,$3.03_{4}$,$5.5_{15}$,$3.72_{12}$,$4.05_{13}$,$154.58_{16}$
TempS1vsRest,$3.22_{7}$,$3.21_{6}$,$3.44_{9}$,$3.15_{4}$,$3.21_{5}$,$3.36_{8}$,$3.74_{13}$,$3.07_{3}$,$3.82_{14}$,$3.49_{10}$,$3.65_{11}$,$3.7_{12}$,$2.97_{2}$,$\mathbf{2.91_{1}}$,$4.14_{15}$,$25.16_{16}$


\begin{table}
\caption{ECE_abs_calfn}
\label{table:ECE_abs_calfn}
\centering
\begin{adjustbox}{width=0.8\textwidth}
\begin{tabular}{lllllllllllllllll}
\toprule
binning &    $ES_{10}$ &            $ES_{15}$ &   $ES_{20}$ &    $ES_{25}$ &            $ES_{30}$ &  $ES_{sweep}$ &    $ES_{CV}$ &           $PL3^{CE}$ &  $PL3^{MSE}$ & $PL_{NN}^{CE}$ & $PL_{NN}^{MSE}$ &    $PL_{DE}$ &                Platt &                 beta &             isotonic &            KDE \\
\midrule
sweep          &  $6.95_{10}$ &           $6.49_{9}$ &  $3.64_{4}$ &    $3.8_{5}$ &           $3.28_{2}$ &  $11.21_{14}$ &   $3.59_{3}$ &           $5.41_{8}$ &   $5.11_{7}$ &    $8.42_{13}$ &     $7.41_{12}$ &   $4.44_{6}$ &         $16.62_{15}$ &          $7.16_{11}$ &  $\mathbf{2.84_{1}}$ &  $112.19_{16}$ \\
dirODIR        &   $2.99_{5}$ &           $2.82_{3}$ &  $3.01_{6}$ &   $2.93_{4}$ &           $3.24_{8}$ &    $3.04_{7}$ &  $4.22_{13}$ &          $3.53_{10}$ &  $5.16_{14}$ &    $3.73_{11}$ &      $3.29_{9}$ &  

# Create cal method table

In [3]:
df_cal_method = pd.read_pickle("../ECE_visualization/df_calibration_methods_12_10_1.p") 
df_cal_method = df_cal_method[df_cal_method.cal_fn != "PW4CE2"]

In [6]:
tag_groups = [["1vsRest1"],["1vsRest3"], ["1vsRest5"], ["confidence"]]

df = pd.DataFrame()
ranks = pd.DataFrame()

# Construct the table row by row
# For each row collect the data and the rankings (we will add them together later) 
for tag_group in tag_groups:
    
    df_sel = df_cal_method[(df_cal_method.tag_name.isin(tag_group))
                          & (df_cal_method.cgt == "c_isotonic")]
    
    df_row = np.round((df_sel
                       .groupby(["cal_fn"]).mean()
                       [["p_dist_c"]].T) * 1000, 1)
    
    rank_row = (df_sel
                .groupby(["cal_fn"]).mean()
                .rank()
                [["p_dist_c"]].T)
    
    df = pd.concat([df, df_row])
    ranks = pd.concat([ranks, rank_row])
    
# Combine ranks and data
for row_idx in range(len(df)):
    for column_idx in range(len(df.iloc[row_idx])):
        
        item = df.iloc[row_idx, column_idx]
        rank = ranks.iloc[row_idx, column_idx]
        
        item = np.round(item, 4)
        
        df.iloc[row_idx, column_idx] = str(item) + "_{" + str(int(rank)) + "}"
        if rank == 1:
            df.iloc[row_idx, column_idx] = "\mathbf{" + df.iloc[row_idx, column_idx] + "}"
        df.iloc[row_idx, column_idx] = "$" + df.iloc[row_idx, column_idx] + "$"
        
        
        # Rename cols
df = df.rename({
    "sweep": "$ES_{sweep}$",
    "PW6logIOCE": "$PL3^{CE}$",
    "PW4MSE": "$PL_{NN}^{MSE}$",
    "PW6logIOMSE": "$PL3^{MSE}$",
    "PW4CE": "$PL_{NN}^{CE}$",
    "logplatt": "Platt",
    "Isotonic": "isotonic"
}, axis=1)

# Select cols in new order
df = df[['$ES_{sweep}$', '$PL3^{CE}$', '$PL3^{MSE}$', '$PL_{NN}^{CE}$', '$PL_{NN}^{MSE}$', 'Platt', 'beta', 'isotonic', 'ScalingBinning', 'TempS',
       'TempS1vsRest', 'VecS', 'dirL2', 'dirODIR', 'MSODIR'
       ]]

display(df)
with pd.option_context("max_colwidth", 25):
    with open(f'tables/test.tex','w') as tf:
        output = df.to_latex(escape=False)
        tf.write(output)
        print(output)

cal_fn,$ES_{sweep}$,$PL3^{CE}$,$PL3^{MSE}$,$PL_{NN}^{CE}$,$PL_{NN}^{MSE}$,Platt,beta,isotonic,ScalingBinning,TempS,TempS1vsRest,VecS,dirL2,dirODIR,MSODIR
p_dist_c,$7.9_{15}$,$\mathbf{2.9_{1}}$,$4.2_{7}$,$4.9_{11}$,$4.8_{10}$,$3.6_{2}$,$4.3_{8}$,$4.0_{5}$,$4.1_{6}$,$7.3_{14}$,$5.0_{12}$,$3.8_{4}$,$5.6_{13}$,$3.8_{3}$,$4.3_{9}$
p_dist_c,$19.3_{15}$,$\mathbf{5.3_{1}}$,$6.0_{2}$,$12.3_{8}$,$10.5_{7}$,$6.8_{3}$,$15.0_{12}$,$9.0_{4}$,$9.5_{5}$,$14.0_{11}$,$17.1_{14}$,$10.4_{6}$,$15.0_{13}$,$13.3_{10}$,$12.9_{9}$
p_dist_c,$25.7_{15}$,$\mathbf{3.5_{1}}$,$4.0_{2}$,$10.6_{9}$,$10.4_{8}$,$4.5_{3}$,$14.4_{14}$,$6.8_{5}$,$6.7_{4}$,$11.7_{12}$,$6.9_{6}$,$8.6_{7}$,$12.7_{13}$,$11.6_{11}$,$11.5_{10}$
p_dist_c,$23.9_{9}$,$8.5_{2}$,$\mathbf{8.4_{1}}$,$37.9_{13}$,$9.8_{4}$,$13.1_{6}$,$17.4_{7}$,$11.6_{5}$,$8.7_{3}$,$22.7_{8}$,$26.9_{10}$,$34.2_{11}$,$40.8_{15}$,$38.1_{14}$,$37.5_{12}$


\begin{tabular}{llllllllllllllll}
\toprule
cal_fn & $ES_{sweep}$ &          $PL3^{CE}$ &         $PL3^{MSE}$ & $PL_{NN}^{CE}$ & $PL_{NN}^{MSE}$ &       Platt &         beta &    isotonic & ScalingBinning &        TempS & TempS1vsRest &         VecS &        dirL2 &      dirODIR &       MSODIR \\
\midrule
p_dist_c &   $7.9_{15}$ &  $\mathbf{2.9_{1}}$ &           $4.2_{7}$ &     $4.9_{11}$ &      $4.8_{10}$ &   $3.6_{2}$ &    $4.3_{8}$ &   $4.0_{5}$ &      $4.1_{6}$ &   $7.3_{14}$ &   $5.0_{12}$ &    $3.8_{4}$ &   $5.6_{13}$ &    $3.8_{3}$ &    $4.3_{9}$ \\
p_dist_c &  $19.3_{15}$ &  $\mathbf{5.3_{1}}$ &           $6.0_{2}$ &     $12.3_{8}$ &      $10.5_{7}$ &   $6.8_{3}$ &  $15.0_{12}$ &   $9.0_{4}$ &      $9.5_{5}$ &  $14.0_{11}$ &  $17.1_{14}$ &   $10.4_{6}$ &  $15.0_{13}$ &  $13.3_{10}$ &   $12.9_{9}$ \\
p_dist_c &  $25.7_{15}$ &  $\mathbf{3.5_{1}}$ &           $4.0_{2}$ &     $10.6_{9}$ &      $10.4_{8}$ &   $4.5_{3}$ &  $14.4_{14}$ &   $6.8_{5}$ &      $6.7_{4}$ &  $11.7_{12}$ &   

In [19]:
df_cal_method

Unnamed: 0,cal_fn,data_name,tag_name,cgt,p_dist_c,p_dist_c_sq
0,beta,densenet40,1vsRest1,c_slope_1,0.005846,0.000335
1,beta,densenet40,1vsRest1,c_flat,0.006309,0.000524
2,beta,densenet40,1vsRest1,c_isotonic,0.004988,0.000208
3,beta,densenet40,1vsRest1,c_sweep,0.007242,0.001082
4,beta,densenet40,1vsRest3,c_slope_1,0.016175,0.000845
...,...,...,...,...,...,...
715,VecS,wide32,1vsRest5,c_sweep,0.021709,0.004149
716,VecS,wide32,confidence,c_slope_1,0.039177,0.005403
717,VecS,wide32,confidence,c_flat,0.039196,0.005420
718,VecS,wide32,confidence,c_isotonic,0.038937,0.005387


In [38]:
def cifar_table_cal_meth(fname, binnings, 
                tag_groups = [["1vsRest1"], ["1vsRest3"],  ["1vsRest5"], ["confidence"]],
               cgt_nr = 3, is_std = False, check_cal = True):

    df_cal_method = pd.read_pickle("../ECE_visualization/df_calibration_methods_12_10_1.p") 
    df_cal_method = df_cal_method[df_cal_method.cal_fn != "PW4CE2"]   

    df = pd.DataFrame()
    
    ranks = pd.DataFrame()

    # Construct the table row by row
    # For each row collect the data and the rankings (we will add them together later)
    for model_group in [['resnet110'], ['densenet40'], ['wide32']]:

        for tag_group in tag_groups:

            df_sel = df_cal_method[df_cal_method.tag_name.isin(tag_group)
                               &(df_cal_method.cgt == "c_isotonic")
                              &(df_cal_method.data_name.isin(model_group))
                               ]

            df_row = (df_sel.groupby(["cal_fn"]).mean()[["p_dist_c"]].T)
            df_row = np.round(df_row*1000, 2)

            # Ranks
            rank_row = (df_sel
                         .groupby(["cal_fn"]).mean()
                         .rank()
                         [["p_dist_c"]].T)

            df = pd.concat([df, df_row])
            ranks = pd.concat([ranks, rank_row])
            
    # Combine ranks and data
    for row_idx in range(len(df)):
        for column_idx in range(len(df.iloc[row_idx])):

            item = df.iloc[row_idx, column_idx]
            rank = ranks.iloc[row_idx, column_idx]

            item = np.round(item, 4)

            df.iloc[row_idx, column_idx] = str(item) + "_{" + str(int(rank)) + "}"
            if rank == 1:
                df.iloc[row_idx, column_idx] = "\mathbf{" + df.iloc[row_idx, column_idx] + "}"
            df.iloc[row_idx, column_idx] = "$" + df.iloc[row_idx, column_idx] + "$"
            
    # Add column titles on the left
    df.index =  ["cars vs rest", "cats vs rest", "dogs vs rest", "confidence"]*3
    df = pd.concat([pd.concat({"resnet110": df[:4]}, names=['Model']),
         pd.concat({"densenet40": df[4:8]}, names=['Model']),
         pd.concat({"wide32": df[8:12]}, names=['Model'])])
    
    df = df.rename({
                                "sweep": "$ES_{sweep}$",
                                "PW6logIOCE": "$PL3^{CE}$",
                                "PW4MSE": "$PL_{NN}^{MSE}$",
                                "PW6logIOMSE": "$PL3^{MSE}$",
                                "PW4CE": "$PL_{NN}^{CE}$",
                                "logplatt": "Platt",
                                "Isotonic": "isotonic"
                            }, axis=1)
    
    ranks = ranks.rename({
                                "sweep": "$ES_{sweep}$",
                                "PW6logIOCE": "$PL3^{CE}$",
                                "PW4MSE": "$PL_{NN}^{MSE}$",
                                "PW6logIOMSE": "$PL3^{MSE}$",
                                "PW4CE": "$PL_{NN}^{CE}$",
                                "logplatt": "Platt",
                                "Isotonic": "isotonic"
                            }, axis=1)
    
    #return df
    
    df = df[binnings] 
    
    avg_ranks = ranks.mean()[binnings].values
    avg_ranks = ["$%0.1f$" % rnk for rnk in avg_ranks]
    df = pd.concat([df, pd.DataFrame([avg_ranks], index=[("","avg rank")], columns=df.columns)])
    
    display(df)
    
    capt_str = fname.split(".")[0]

    begin_list = ["\\begin{table}","\\caption{%s}" % capt_str,   "\\label{table:%s}" % capt_str, "\\centering", "\\begin{adjustbox}{width=0.8\\textwidth}"]
    hline_str = "\\hline"
    end_list = ["\\end{adjustbox}", "\\end{table}"]

    with pd.option_context("max_colwidth", 25):
        with open(f'tables/{fname}','w') as tf:
            output = df.to_latex(escape=False)
            splitted = output.split("\n")
            output_new = "\n".join(np.concatenate([begin_list, splitted[:-4], [hline_str], splitted[-4:-1], end_list]))
            tf.write(output_new)
            print(output_new)
            

In [39]:
binning_cal = ['$ES_{sweep}$', '$PL3^{CE}$', '$PL3^{MSE}$', '$PL_{NN}^{CE}$', '$PL_{NN}^{MSE}$', 'Platt', 'beta', 'isotonic', 'ScalingBinning', 'TempS',
       'TempS1vsRest', 'VecS', 'dirL2', 'dirODIR', 'MSODIR'
       ]

In [40]:
df_t = cifar_table_cal_meth("cal_methods_all.tex", binning_cal)

Unnamed: 0_level_0,cal_fn,$ES_{sweep}$,$PL3^{CE}$,$PL3^{MSE}$,$PL_{NN}^{CE}$,$PL_{NN}^{MSE}$,Platt,beta,isotonic,ScalingBinning,TempS,TempS1vsRest,VecS,dirL2,dirODIR,MSODIR
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
resnet110,cars vs rest,$8.56_{14}$,$3.88_{3}$,$4.49_{10}$,$3.73_{2}$,$4.36_{9}$,$3.9_{4}$,$4.27_{7}$,$4.34_{8}$,$4.13_{6}$,$8.75_{15}$,$\mathbf{3.0_{1}}$,$4.84_{12}$,$5.53_{13}$,$3.94_{5}$,$4.53_{11}$
resnet110,cats vs rest,$23.93_{15}$,$6.95_{2}$,$\mathbf{6.92_{1}}$,$12.61_{8}$,$11.31_{5}$,$11.26_{4}$,$13.2_{9}$,$10.92_{3}$,$11.66_{7}$,$17.45_{14}$,$16.17_{13}$,$11.57_{6}$,$13.92_{12}$,$13.46_{11}$,$13.37_{10}$
resnet110,dogs vs rest,$28.96_{15}$,$\mathbf{3.28_{1}}$,$4.39_{2}$,$10.32_{8}$,$10.69_{9}$,$6.87_{4}$,$11.28_{10}$,$6.43_{3}$,$7.74_{5}$,$13.16_{14}$,$9.46_{6}$,$9.64_{7}$,$13.11_{13}$,$12.63_{11}$,$12.88_{12}$
resnet110,confidence,$21.2_{9}$,$\mathbf{10.64_{1}}$,$11.1_{3}$,$12.69_{7}$,$12.62_{5}$,$10.75_{2}$,$12.68_{6}$,$13.61_{8}$,$11.56_{4}$,$23.92_{10}$,$30.13_{11}$,$34.78_{12}$,$37.0_{15}$,$36.66_{14}$,$36.02_{13}$
densenet40,cars vs rest,$7.27_{15}$,$2.77_{2}$,$3.8_{9}$,$5.96_{13}$,$5.44_{12}$,$3.34_{5}$,$4.99_{11}$,$3.57_{6}$,$3.3_{4}$,$3.68_{8}$,$\mathbf{2.27_{1}}$,$3.05_{3}$,$6.28_{14}$,$3.67_{7}$,$3.99_{10}$
densenet40,cats vs rest,$10.43_{8}$,$3.6_{2}$,$5.72_{3}$,$10.52_{9}$,$8.29_{5}$,$\mathbf{3.32_{1}}$,$15.88_{15}$,$7.07_{4}$,$9.19_{7}$,$10.77_{10}$,$12.31_{12}$,$8.59_{6}$,$15.77_{14}$,$12.67_{13}$,$11.42_{11}$
densenet40,dogs vs rest,$21.95_{15}$,$3.55_{2}$,$3.99_{3}$,$10.1_{9}$,$10.37_{10}$,$\mathbf{3.38_{1}}$,$14.55_{14}$,$6.62_{6}$,$6.07_{4}$,$9.59_{8}$,$6.22_{5}$,$7.35_{7}$,$13.4_{13}$,$11.53_{12}$,$10.65_{11}$
densenet40,confidence,$23.5_{9}$,$\mathbf{5.54_{1}}$,$5.9_{2}$,$35.32_{14}$,$7.98_{4}$,$13.93_{6}$,$18.31_{7}$,$9.38_{5}$,$6.1_{3}$,$21.56_{8}$,$24.32_{10}$,$28.76_{11}$,$40.19_{15}$,$34.82_{13}$,$32.84_{12}$
wide32,cars vs rest,$7.8_{13}$,$\mathbf{2.15_{1}}$,$4.42_{8}$,$4.94_{11}$,$4.48_{9}$,$3.53_{3}$,$3.66_{4}$,$3.99_{6}$,$4.88_{10}$,$9.34_{14}$,$9.63_{15}$,$3.42_{2}$,$4.96_{12}$,$3.68_{5}$,$4.42_{7}$
wide32,cats vs rest,$23.65_{15}$,$5.29_{2}$,$\mathbf{5.28_{1}}$,$13.73_{9}$,$11.99_{7}$,$5.68_{3}$,$15.86_{13}$,$9.05_{5}$,$7.61_{4}$,$13.87_{11}$,$22.83_{14}$,$11.07_{6}$,$15.35_{12}$,$13.67_{8}$,$13.85_{10}$


\begin{table}
\caption{cal_methods_all}
\label{table:cal_methods_all}
\centering
\begin{adjustbox}{width=0.8\textwidth}
\begin{tabular}{lllllllllllllllll}
\toprule
       & cal_fn &  $ES_{sweep}$ &            $PL3^{CE}$ &          $PL3^{MSE}$ & $PL_{NN}^{CE}$ & $PL_{NN}^{MSE}$ &                Platt &          beta &     isotonic & ScalingBinning &         TempS &         TempS1vsRest &          VecS &         dirL2 &       dirODIR &        MSODIR \\
Model & {} &               &                       &                      &                &                 &                      &               &              &                &               &                      &               &               &               &               \\
\midrule
resnet110 & cars vs rest &   $8.56_{14}$ &            $3.88_{3}$ &          $4.49_{10}$ &     $3.73_{2}$ &      $4.36_{9}$ &            $3.9_{4}$ &    $4.27_{7}$ &   $4.34_{8}$ &     $4.13_{6}$ &   $8.75_{15}$ &   $\mathbf{3.0_{1}}$ &   $4.84_{12}$ &

In [34]:
df_t[binning_cal]

Unnamed: 0_level_0,cal_fn,$ES_{sweep}$,$PL3^{CE}$,$PL3^{MSE}$,$PL_{NN}^{CE}$,$PL_{NN}^{MSE}$,Platt,beta,isotonic,ScalingBinning,TempS,TempS1vsRest,VecS,dirL2,dirODIR,MSODIR
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
resnet110,cars vs rest,$8.56_{14}$,$3.88_{3}$,$4.49_{10}$,$3.73_{2}$,$4.36_{9}$,$3.9_{4}$,$4.27_{7}$,$4.34_{8}$,$4.13_{6}$,$8.75_{15}$,$\mathbf{3.0_{1}}$,$4.84_{12}$,$5.53_{13}$,$3.94_{5}$,$4.53_{11}$
resnet110,cats vs rest,$23.93_{15}$,$6.95_{2}$,$\mathbf{6.92_{1}}$,$12.61_{8}$,$11.31_{5}$,$11.26_{4}$,$13.2_{9}$,$10.92_{3}$,$11.66_{7}$,$17.45_{14}$,$16.17_{13}$,$11.57_{6}$,$13.92_{12}$,$13.46_{11}$,$13.37_{10}$
resnet110,dogs vs rest,$28.96_{15}$,$\mathbf{3.28_{1}}$,$4.39_{2}$,$10.32_{8}$,$10.69_{9}$,$6.87_{4}$,$11.28_{10}$,$6.43_{3}$,$7.74_{5}$,$13.16_{14}$,$9.46_{6}$,$9.64_{7}$,$13.11_{13}$,$12.63_{11}$,$12.88_{12}$
resnet110,confidence,$21.2_{9}$,$\mathbf{10.64_{1}}$,$11.1_{3}$,$12.69_{7}$,$12.62_{5}$,$10.75_{2}$,$12.68_{6}$,$13.61_{8}$,$11.56_{4}$,$23.92_{10}$,$30.13_{11}$,$34.78_{12}$,$37.0_{15}$,$36.66_{14}$,$36.02_{13}$
densenet40,cars vs rest,$7.27_{15}$,$2.77_{2}$,$3.8_{9}$,$5.96_{13}$,$5.44_{12}$,$3.34_{5}$,$4.99_{11}$,$3.57_{6}$,$3.3_{4}$,$3.68_{8}$,$\mathbf{2.27_{1}}$,$3.05_{3}$,$6.28_{14}$,$3.67_{7}$,$3.99_{10}$
densenet40,cats vs rest,$10.43_{8}$,$3.6_{2}$,$5.72_{3}$,$10.52_{9}$,$8.29_{5}$,$\mathbf{3.32_{1}}$,$15.88_{15}$,$7.07_{4}$,$9.19_{7}$,$10.77_{10}$,$12.31_{12}$,$8.59_{6}$,$15.77_{14}$,$12.67_{13}$,$11.42_{11}$
densenet40,dogs vs rest,$21.95_{15}$,$3.55_{2}$,$3.99_{3}$,$10.1_{9}$,$10.37_{10}$,$\mathbf{3.38_{1}}$,$14.55_{14}$,$6.62_{6}$,$6.07_{4}$,$9.59_{8}$,$6.22_{5}$,$7.35_{7}$,$13.4_{13}$,$11.53_{12}$,$10.65_{11}$
densenet40,confidence,$23.5_{9}$,$\mathbf{5.54_{1}}$,$5.9_{2}$,$35.32_{14}$,$7.98_{4}$,$13.93_{6}$,$18.31_{7}$,$9.38_{5}$,$6.1_{3}$,$21.56_{8}$,$24.32_{10}$,$28.76_{11}$,$40.19_{15}$,$34.82_{13}$,$32.84_{12}$
wide32,cars vs rest,$7.8_{13}$,$\mathbf{2.15_{1}}$,$4.42_{8}$,$4.94_{11}$,$4.48_{9}$,$3.53_{3}$,$3.66_{4}$,$3.99_{6}$,$4.88_{10}$,$9.34_{14}$,$9.63_{15}$,$3.42_{2}$,$4.96_{12}$,$3.68_{5}$,$4.42_{7}$
wide32,cats vs rest,$23.65_{15}$,$5.29_{2}$,$\mathbf{5.28_{1}}$,$13.73_{9}$,$11.99_{7}$,$5.68_{3}$,$15.86_{13}$,$9.05_{5}$,$7.61_{4}$,$13.87_{11}$,$22.83_{14}$,$11.07_{6}$,$15.35_{12}$,$13.67_{8}$,$13.85_{10}$
