# 1. Pseudo-real - tables - Main Article

In [1]:
import numpy as np
import pandas as pd
from scipy import stats

from prepare_cifar5m_data import get_cifar5m

pd.set_option('display.max_rows', 500)
%load_ext autoreload
%autoreload 2

In [2]:
def correlation_finder(grp):
    d = {}
    d["spearman"] = stats.spearmanr(grp["c_hat_distance_p_debiased"], grp["p_distance_c"])[0]
    d["pearson"] = stats.pearsonr(grp["c_hat_distance_p_debiased"], grp["p_distance_c"])[0]
    return pd.Series(d, index=["spearman", "pearson"])

# Cifar5M calmap table (c^_dist_c)

In [3]:
df_cifar5m = get_cifar5m()

In [4]:
df_cifar5m.head()

Unnamed: 0,model_name,tag_name,cgt_nr,seed,n_data,binning,n_bins,c_hat_distance_p,c_hat_distance_p_square,c_hat_distance_p_debiased,c_hat_distance_p_square_debiased,c_hat_distance_c,c_hat_distance_c_square,p_distance_c,p_distance_c_square,calibration_function,ECE_abs,ECE_square,batch_size,ECE_diff_abs
0,resnet110,confidence,0,1,1000,KDE,-1,0.027034,0.001242,0.027034,0.001242,0.031597,0.002515,0.021789,0.001688,sweep,0.005244,0.000445,9999.0,0.005244
2,densenet40,1vsRest1,0,1,3000,KDE,-1,0.141806,0.130617,0.141806,0.130617,0.141876,0.130611,0.000838,7e-06,dirODIR,0.140968,0.13061,9999.0,0.140968
4,resnet110,1vsRest1,0,4,1000,KDE,-1,0.007129,0.001602,0.007129,0.001602,0.006972,0.00153,0.003516,0.000174,beta,0.003613,0.001429,9999.0,0.003613
6,densenet40,confidence,0,3,3000,KDE,-1,0.021502,0.003806,0.021502,0.003806,0.021744,0.003773,0.005097,6.6e-05,TempS,0.016405,0.003739,9999.0,0.016405
8,resnet110,1vsRest5,0,0,3000,KDE,-1,0.01627,0.00389,0.01627,0.00389,0.015012,0.003541,0.006625,0.000219,VecS,0.009644,0.003672,9999.0,0.009644


In [5]:
df_cifar5m.shape

(243360, 20)

In [6]:
from matplotlib import pyplot as plt

In [3]:
def cifar_table(fname, binnings, metric = "c_hat_distance_c", 
                tag_groups = [["1vsRest1"], ["1vsRest3"],  ["1vsRest5"], ["confidence"]],
               cgt_nr = 3, is_std = False, check_cal = True):

    df_cifar5m = get_cifar5m()
    df_cifar5m = df_cifar5m[(df_cifar5m.binning.isin(binnings))]
    
    if check_cal:
        cal_sub = ['beta', 'VecS', 'logplatt','PW6logIOCE', 'ScalingBinning', 'Isotonic'
                  ] #"dirODIR", "MSODIR", "PW4MSE"] # Six best ranking calibration methods
        df_cifar5m = df_cifar5m[df_cifar5m.calibration_function.isin(cal_sub)]
    
    df = pd.DataFrame()
    
    ranks = pd.DataFrame()

    # Construct the table row by row
    # For each row collect the data and the rankings (we will add them together later)
    for model_group in [['resnet110'], ['densenet40'], ['wide32']]:

        for tag_group in tag_groups:

            df_sel = df_cifar5m[df_cifar5m.tag_name.isin(tag_group)
                               &(df_cifar5m.cgt_nr == cgt_nr) # isotonic ground truth
                              &(df_cifar5m.model_name.isin(model_group))
                               ]
            # Data
            if is_std:
                df_row = (df_sel
                          .groupby(["binning", "seed"]).mean()
                          .groupby(["binning"]).std()
                          [[metric]].T)
                df_row = np.round(df_row*1000, 2)

                # Ranks
                rank_row = (df_sel
                          .groupby(["binning", "seed"]).mean()
                          .groupby(["binning"]).std().rank()
                             [[metric]].T)
                
            else:
            
                df_row = (df_sel
                          .groupby(["binning"]).mean()
                          [[metric]].T)
                df_row = np.round(df_row*1000, 2)

                # Ranks
                rank_row = (df_sel
                             .groupby(["binning"]).mean()
                             .rank()
                             [[metric]].T)

            df = pd.concat([df, df_row])
            ranks = pd.concat([ranks, rank_row])
            
    # Combine ranks and data
    for row_idx in range(len(df)):
        for column_idx in range(len(df.iloc[row_idx])):

            item = df.iloc[row_idx, column_idx]
            rank = ranks.iloc[row_idx, column_idx]

            item = np.round(item, 4)

            df.iloc[row_idx, column_idx] = str(item) + "_{" + str(int(rank)) + "}"
            if rank == 1:
                df.iloc[row_idx, column_idx] = "\mathbf{" + df.iloc[row_idx, column_idx] + "}"
            df.iloc[row_idx, column_idx] = "$" + df.iloc[row_idx, column_idx] + "$"
            
    # Add column titles on the left
    df.index =  ["cars vs rest", "cats vs rest", "dogs vs rest", "confidence"]*3
    df = pd.concat([pd.concat({"resnet110": df[:4]}, names=['Model']),
         pd.concat({"densenet40": df[4:8]}, names=['Model']),
         pd.concat({"wide32": df[8:12]}, names=['Model'])])
        
    df = df[binnings] 
    
    avg_ranks = ranks.mean()[binnings].values
    avg_ranks = ["$%0.1f$" % rnk for rnk in avg_ranks]
    df = pd.concat([df, pd.DataFrame([avg_ranks], index=[("","avg rank")], columns=df.columns)])
    
    display(df)
    
    capt_str = fname.split(".")[0]

    begin_list = ["\\begin{table}","\\caption{%s}" % capt_str,   "\\label{table:%s}" % capt_str, "\\centering", "\\begin{adjustbox}{width=0.8\\textwidth}"]
    hline_str = "\\hline"
    end_list = ["\\end{adjustbox}", "\\end{table}"]

    with pd.option_context("max_colwidth", 25):
        with open(f'tables/{fname}','w') as tf:
            output = df.to_latex(escape=False)
            splitted = output.split("\n")
            output_new = "\n".join(np.concatenate([begin_list, splitted[:-4], [hline_str], splitted[-4:-1], end_list]))
            tf.write(output_new)
            print(output_new)
        
    return df

In [4]:
binnings1 = ['$ES_{15}$','$ES_{sweep}$', '$ES_{CV}$',
             '$PL3^{CE}$', '$PL_{NN}^{CE}$', '$PL_{DE}$',
             'Platt', 'beta', 'isotonic']

In [187]:
df_out = cifar_table("chat_dist_c.tex", binnings1, cgt_nr = 3, check_cal = True)

Unnamed: 0_level_0,binning,$ES_{15}$,$ES_{sweep}$,$ES_{CV}$,$PL3^{CE}$,$PL_{NN}^{CE}$,$PL_{DE}$,beta,isotonic,Platt
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
resnet110,cars vs rest,$3.78_{4}$,$4.64_{9}$,$3.9_{6}$,$3.52_{3}$,$3.18_{2}$,$3.92_{7}$,$\mathbf{3.07_{1}}$,$3.85_{5}$,$4.32_{8}$
resnet110,cats vs rest,$10.4_{7}$,$11.1_{8}$,$10.04_{3}$,$7.72_{2}$,$\mathbf{7.44_{1}}$,$10.17_{5}$,$10.4_{6}$,$10.11_{4}$,$11.55_{9}$
resnet110,dogs vs rest,$9.24_{8}$,$8.53_{5}$,$9.07_{7}$,$7.0_{2}$,$\mathbf{6.37_{1}}$,$8.97_{6}$,$8.05_{3}$,$9.68_{9}$,$8.16_{4}$
resnet110,confidence,$14.29_{8}$,$11.89_{6}$,$13.86_{7}$,$11.09_{3}$,$\mathbf{9.79_{1}}$,$11.76_{5}$,$10.36_{2}$,$15.01_{9}$,$11.57_{4}$
densenet40,cars vs rest,$4.95_{8}$,$4.97_{9}$,$3.79_{5}$,$3.32_{3}$,$\mathbf{2.43_{1}}$,$4.21_{7}$,$3.13_{2}$,$3.9_{6}$,$3.69_{4}$
densenet40,cats vs rest,$11.93_{9}$,$10.08_{8}$,$9.12_{6}$,$7.16_{2}$,$\mathbf{5.72_{1}}$,$7.6_{3}$,$8.31_{4}$,$9.57_{7}$,$8.88_{5}$
densenet40,dogs vs rest,$8.96_{9}$,$8.27_{7}$,$8.02_{6}$,$6.79_{3}$,$\mathbf{5.02_{1}}$,$6.94_{4}$,$6.66_{2}$,$8.45_{8}$,$7.15_{5}$
densenet40,confidence,$14.09_{8}$,$11.04_{2}$,$12.55_{7}$,$11.05_{3}$,$\mathbf{10.75_{1}}$,$11.37_{5}$,$11.31_{4}$,$14.67_{9}$,$11.99_{6}$
wide32,cars vs rest,$3.48_{3}$,$4.22_{8}$,$3.76_{5}$,$3.89_{6}$,$\mathbf{2.62_{1}}$,$3.66_{4}$,$3.2_{2}$,$3.95_{7}$,$4.63_{9}$
wide32,cats vs rest,$9.01_{6}$,$9.26_{9}$,$8.57_{4}$,$7.43_{2}$,$\mathbf{6.75_{1}}$,$8.8_{5}$,$8.36_{3}$,$9.14_{8}$,$9.03_{7}$


\begin{table}
\caption{chat_dist_c}
\label{table:chat_dist_c}
\centering
\begin{adjustbox}{width=0.8\textwidth}
\begin{tabular}{lllllllllll}
\toprule
       & binning &    $ES_{15}$ & $ES_{sweep}$ &    $ES_{CV}$ &   $PL3^{CE}$ &        $PL_{NN}^{CE}$ &    $PL_{DE}$ &                 beta &     isotonic &        Platt \\
Model & {} &              &              &              &              &                       &              &                      &              &              \\
\midrule
resnet110 & cars vs rest &   $3.78_{4}$ &   $4.64_{9}$ &    $3.9_{6}$ &   $3.52_{3}$ &            $3.18_{2}$ &   $3.92_{7}$ &  $\mathbf{3.07_{1}}$ &   $3.85_{5}$ &   $4.32_{8}$ \\
       & cats vs rest &   $10.4_{7}$ &   $11.1_{8}$ &  $10.04_{3}$ &   $7.72_{2}$ &   $\mathbf{7.44_{1}}$ &  $10.17_{5}$ &           $10.4_{6}$ &  $10.11_{4}$ &  $11.55_{9}$ \\
       & dogs vs rest &   $9.24_{8}$ &   $8.53_{5}$ &   $9.07_{7}$ &    $7.0_{2}$ &   $\mathbf{6.37_{1}}$ &   $8.97_{6}$ &           $8.05_{3}$ & 

In [125]:
def cifar_table_ndata(fname, binnings, metric = "c_hat_distance_c", 
                n_datas = [1000, 3000, 10000],
               cgt_nr = 3, is_std = False, check_cal = True):

    df_cifar5m = get_cifar5m()
    df_cifar5m = df_cifar5m[(df_cifar5m.binning.isin(binnings))]
    
    if check_cal:
        cal_sub = ['beta', 'VecS', 'logplatt','PW6logIOCE', 'ScalingBinning', 'Isotonic',
                  ]#"dirODIR", "MSODIR", "PW4MSE"] # Six best ranking calibration methods
        df_cifar5m = df_cifar5m[df_cifar5m.calibration_function.isin(cal_sub)]
    
    df = pd.DataFrame()
    
    ranks = pd.DataFrame()

    # Construct the table row by row
    # For each row collect the data and the rankings (we will add them together later)
    for model_group in [['resnet110'], ['densenet40'], ['wide32']]:

        for n_data in n_datas:

            df_sel = df_cifar5m[(df_cifar5m.cgt_nr == cgt_nr) # isotonic ground truth
                                &(df_cifar5m.model_name.isin(model_group))
                                &(df_cifar5m.n_data == n_data)
                               ]
            # Data
            if is_std:
                df_row = (df_sel
                          .groupby(["binning", "seed"]).mean()
                          .groupby(["binning"]).std()
                          [[metric]].T)
                df_row = np.round(df_row*1000, 2)

                # Ranks
                rank_row = (df_sel
                          .groupby(["binning", "seed"]).mean()
                          .groupby(["binning"]).std().rank()
                             [[metric]].T)
                
            else:
            
                df_row = (df_sel
                          .groupby(["binning"]).mean()
                          [[metric]].T)
                df_row = np.round(df_row*1000, 2)

                # Ranks
                rank_row = (df_sel
                             .groupby(["binning"]).mean()
                             .rank()
                             [[metric]].T)

            df = pd.concat([df, df_row])
            ranks = pd.concat([ranks, rank_row])
            
    # Combine ranks and data
    for row_idx in range(len(df)):
        for column_idx in range(len(df.iloc[row_idx])):

            item = df.iloc[row_idx, column_idx]
            rank = ranks.iloc[row_idx, column_idx]

            item = np.round(item, 4)

            df.iloc[row_idx, column_idx] = str(item) + "_{" + str(int(rank)) + "}"
            if rank == 1:
                df.iloc[row_idx, column_idx] = "\mathbf{" + df.iloc[row_idx, column_idx] + "}"
            df.iloc[row_idx, column_idx] = "$" + df.iloc[row_idx, column_idx] + "$"
            
    # Add column titles on the left
    df.index =  [1000, 3000, 10000]*3
    df = pd.concat([pd.concat({"resnet110": df[:3]}, names=['Model']),
         pd.concat({"densenet40": df[3:6]}, names=['Model']),
         pd.concat({"wide32": df[6:9]}, names=['Model'])])
    
    df = df[binnings] 
    
    avg_ranks = ranks.mean()[binnings].values
    avg_ranks = ["$%0.1f$" % rnk for rnk in avg_ranks]
    df = pd.concat([df, pd.DataFrame([avg_ranks], index=[("","avg rank")], columns=df.columns)])
    
    display(df)
    
    capt_str = fname.split(".")[0]

    begin_list = ["\\begin{table}","\\caption{%s}" % capt_str,   "\\label{table:%s}" % capt_str, "\\centering", "\\begin{adjustbox}{width=0.8\\textwidth}"]
    hline_str = "\\hline"
    end_list = ["\\end{adjustbox}", "\\end{table}"]

    with pd.option_context("max_colwidth", 25):
        with open(f'tables/{fname}','w') as tf:
            output = df.to_latex(escape=False)
            splitted = output.split("\n")
            output_new = "\n".join(np.concatenate([begin_list, splitted[:-4], [hline_str], splitted[-4:-1], end_list]))
            tf.write(output_new)
            print(output_new)
            

In [126]:
cifar_table_ndata("chat_dist_c_ndata.tex", binnings1, check_cal=True)

Unnamed: 0_level_0,binning,$ES_{15}$,$ES_{sweep}$,$ES_{CV}$,$PL3^{CE}$,$PL_{NN}^{CE}$,$PL_{DE}$,beta,isotonic,Platt
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
resnet110,1000,$13.29_{8}$,$11.93_{6}$,$13.04_{7}$,$9.64_{2}$,$\mathbf{8.61_{1}}$,$11.66_{5}$,$10.18_{3}$,$13.98_{9}$,$10.68_{4}$
resnet110,3000,$8.49_{8}$,$8.16_{4}$,$8.32_{7}$,$6.89_{3}$,$\mathbf{6.7_{1}}$,$8.24_{6}$,$6.89_{2}$,$8.79_{9}$,$8.24_{5}$
resnet110,10000,$6.51_{6}$,$7.02_{8}$,$6.3_{5}$,$5.47_{2}$,$\mathbf{4.78_{1}}$,$6.21_{3}$,$6.84_{7}$,$6.22_{4}$,$7.79_{9}$
densenet40,1000,$13.59_{9}$,$11.57_{7}$,$10.98_{4}$,$10.74_{3}$,$\mathbf{8.06_{1}}$,$11.02_{5}$,$9.53_{2}$,$12.88_{8}$,$11.21_{6}$
densenet40,3000,$9.22_{9}$,$7.6_{6}$,$8.54_{7}$,$6.21_{2}$,$\mathbf{5.71_{1}}$,$6.77_{4}$,$6.71_{3}$,$8.73_{8}$,$6.82_{5}$
densenet40,10000,$7.14_{9}$,$6.59_{8}$,$5.6_{4}$,$4.29_{2}$,$\mathbf{4.17_{1}}$,$4.8_{3}$,$5.82_{6}$,$5.83_{7}$,$5.76_{5}$
wide32,1000,$13.13_{9}$,$10.82_{7}$,$10.45_{4}$,$10.3_{3}$,$\mathbf{7.38_{1}}$,$10.58_{5}$,$9.19_{2}$,$12.11_{8}$,$10.74_{6}$
wide32,3000,$7.67_{7}$,$7.15_{5}$,$7.71_{8}$,$6.82_{3}$,$\mathbf{6.23_{1}}$,$7.03_{4}$,$6.8_{2}$,$8.44_{9}$,$7.26_{6}$
wide32,10000,$6.2_{6}$,$6.7_{8}$,$5.74_{4}$,$5.04_{2}$,$\mathbf{4.81_{1}}$,$5.45_{3}$,$6.47_{7}$,$5.86_{5}$,$6.99_{9}$
,avg rank,$7.9$,$6.6$,$5.6$,$2.4$,$1.0$,$4.2$,$3.8$,$7.4$,$6.1$


\begin{table}
\caption{chat_dist_c_ndata}
\label{table:chat_dist_c_ndata}
\centering
\begin{adjustbox}{width=0.8\textwidth}
\begin{tabular}{lllllllllll}
\toprule
       & binning &    $ES_{15}$ & $ES_{sweep}$ &    $ES_{CV}$ &   $PL3^{CE}$ &       $PL_{NN}^{CE}$ &    $PL_{DE}$ &         beta &     isotonic &        Platt \\
Model & {} &              &              &              &              &                      &              &              &              &              \\
\midrule
resnet110 & 1000 &  $13.29_{8}$ &  $11.93_{6}$ &  $13.04_{7}$ &   $9.64_{2}$ &  $\mathbf{8.61_{1}}$ &  $11.66_{5}$ &  $10.18_{3}$ &  $13.98_{9}$ &  $10.68_{4}$ \\
       & 3000 &   $8.49_{8}$ &   $8.16_{4}$ &   $8.32_{7}$ &   $6.89_{3}$ &   $\mathbf{6.7_{1}}$ &   $8.24_{6}$ &   $6.89_{2}$ &   $8.79_{9}$ &   $8.24_{5}$ \\
       & 10000 &   $6.51_{6}$ &   $7.02_{8}$ &    $6.3_{5}$ &   $5.47_{2}$ &  $\mathbf{4.78_{1}}$ &   $6.21_{3}$ &   $6.84_{7}$ &   $6.22_{4}$ &   $7.79_{9}$ \\
densenet40 & 1000 &  $13.

# Create cal method table

In [52]:
cal_fns = ['sweep', 'PW6logIOCE', 'PW4MSE', 'logplatt', 'beta', 'Isotonic', 'ScalingBinning', 'TempS',
       'TempS1vsRest', 'VecS', 'dirL2', 'dirODIR', 'MSODIR']

In [53]:
df_cal_method = pd.read_pickle("../ECE_visualization/df_calibration_methods_12_10_1.p") 
df_cal_method = df_cal_method[df_cal_method.cal_fn.isin(cal_fns)]

In [54]:
df_cal_method.cal_fn.unique()

array(['beta', 'dirL2', 'dirODIR', 'Isotonic', 'logplatt', 'MSODIR',
       'PW4MSE', 'PW6logIOCE', 'ScalingBinning', 'sweep', 'TempS1vsRest',
       'TempS', 'VecS'], dtype=object)

In [117]:
tag_groups = [["1vsRest1"],["1vsRest3"], ["1vsRest5"], ["confidence"]]

df = pd.DataFrame()
ranks = pd.DataFrame()

# Construct the table row by row
# For each row collect the data and the rankings (we will add them together later) 
for tag_group in tag_groups:
    
    df_sel = df_cal_method[(df_cal_method.tag_name.isin(tag_group))
                          & (df_cal_method.cgt == "c_isotonic")]
    
    df_row = np.round((df_sel
                       .groupby(["cal_fn"]).mean()
                       [["p_dist_c"]].T) * 1000, 1)
    
    rank_row = (df_sel
                .groupby(["cal_fn"]).mean()
                .rank()
                [["p_dist_c"]].T)
    
    df = pd.concat([df, df_row])
    ranks = pd.concat([ranks, rank_row])
    
# Combine ranks and data
for row_idx in range(len(df)):
    for column_idx in range(len(df.iloc[row_idx])):
        
        item = df.iloc[row_idx, column_idx]
        rank = ranks.iloc[row_idx, column_idx]
        
        item = np.round(item, 4)
        
        df.iloc[row_idx, column_idx] = str(item) + "_{" + str(int(rank)) + "}"
        if rank == 1:
            df.iloc[row_idx, column_idx] = "\mathbf{" + df.iloc[row_idx, column_idx] + "}"
        df.iloc[row_idx, column_idx] = "$" + df.iloc[row_idx, column_idx] + "$"

In [119]:
ranks.mean(axis=0).sort_values()

cal_fn
PW6logIOCE         1.00
logplatt           2.75
ScalingBinning     3.75
Isotonic           4.00
PW4MSE             6.25
VecS               6.25
dirODIR            8.00
MSODIR             8.50
beta               8.75
TempS1vsRest       9.00
TempS              9.50
dirL2             11.50
sweep             11.75
dtype: float64

In [116]:
ranks

binning,$ES_{15}$,$ES_{CV}$,$ES_{sweep}$,$PL3^{CE}$,$PL_{DE}$,$PL_{NN}^{CE}$,Platt,beta,isotonic
c_hat_distance_c,7.0,4.0,9.0,3.0,6.0,1.0,8.0,2.0,5.0
c_hat_distance_c,9.0,5.0,8.0,2.0,3.0,1.0,7.0,4.0,6.0
c_hat_distance_c,9.0,6.0,7.0,2.0,5.0,1.0,4.0,3.0,8.0
c_hat_distance_c,8.0,7.0,4.0,2.0,5.0,1.0,6.0,3.0,9.0


In [56]:
df

cal_fn,Isotonic,MSODIR,PW4MSE,PW6logIOCE,ScalingBinning,TempS,TempS1vsRest,VecS,beta,dirL2,dirODIR,logplatt,sweep
p_dist_c,$4.0_{5}$,$4.3_{8}$,$4.8_{9}$,$\mathbf{2.9_{1}}$,$4.1_{6}$,$7.3_{12}$,$5.0_{10}$,$3.8_{4}$,$4.3_{7}$,$5.6_{11}$,$3.8_{3}$,$3.6_{2}$,$7.9_{13}$
p_dist_c,$9.0_{3}$,$12.9_{7}$,$10.5_{6}$,$\mathbf{5.3_{1}}$,$9.5_{4}$,$14.0_{9}$,$17.1_{12}$,$10.4_{5}$,$15.0_{10}$,$15.0_{11}$,$13.3_{8}$,$6.8_{2}$,$19.3_{13}$
p_dist_c,$6.8_{4}$,$11.5_{8}$,$10.4_{7}$,$\mathbf{3.5_{1}}$,$6.7_{3}$,$11.7_{10}$,$6.9_{5}$,$8.6_{6}$,$14.4_{12}$,$12.7_{11}$,$11.6_{9}$,$4.5_{2}$,$25.7_{13}$
p_dist_c,$11.6_{4}$,$37.5_{11}$,$9.8_{3}$,$\mathbf{8.5_{1}}$,$8.7_{2}$,$22.7_{7}$,$26.9_{9}$,$34.2_{10}$,$17.4_{6}$,$40.8_{13}$,$38.1_{12}$,$13.1_{5}$,$23.9_{8}$


In [57]:
# Rename cols
df = df.rename({
    "sweep": "$ES_{sweep}$",
    "PW6logIOCE": "$PL3$",
    "PW4MSE": "$PL_{NN}$",
    "logplatt": "Platt",
    "Isotonic": "isotonic"
}, axis=1)

# Select cols in new order
df = df[['$ES_{sweep}$', '$PL3$', '$PL_{NN}$', 'Platt', 'beta', 'isotonic', 'ScalingBinning', 'TempS', 'TempS1vsRest', 'VecS', 'dirL2', 'dirODIR', 'MSODIR']]

display(df)
with pd.option_context("max_colwidth", 25):
    with open(f'tables/test.tex','w') as tf:
        output = df.to_latex(escape=False)
        tf.write(output)
        print(output)

cal_fn,$ES_{sweep}$,$PL3$,$PL_{NN}$,Platt,beta,isotonic,ScalingBinning,TempS,TempS1vsRest,VecS,dirL2,dirODIR,MSODIR
p_dist_c,$7.9_{13}$,$\mathbf{2.9_{1}}$,$4.8_{9}$,$3.6_{2}$,$4.3_{7}$,$4.0_{5}$,$4.1_{6}$,$7.3_{12}$,$5.0_{10}$,$3.8_{4}$,$5.6_{11}$,$3.8_{3}$,$4.3_{8}$
p_dist_c,$19.3_{13}$,$\mathbf{5.3_{1}}$,$10.5_{6}$,$6.8_{2}$,$15.0_{10}$,$9.0_{3}$,$9.5_{4}$,$14.0_{9}$,$17.1_{12}$,$10.4_{5}$,$15.0_{11}$,$13.3_{8}$,$12.9_{7}$
p_dist_c,$25.7_{13}$,$\mathbf{3.5_{1}}$,$10.4_{7}$,$4.5_{2}$,$14.4_{12}$,$6.8_{4}$,$6.7_{3}$,$11.7_{10}$,$6.9_{5}$,$8.6_{6}$,$12.7_{11}$,$11.6_{9}$,$11.5_{8}$
p_dist_c,$23.9_{8}$,$\mathbf{8.5_{1}}$,$9.8_{3}$,$13.1_{5}$,$17.4_{6}$,$11.6_{4}$,$8.7_{2}$,$22.7_{7}$,$26.9_{9}$,$34.2_{10}$,$40.8_{13}$,$38.1_{12}$,$37.5_{11}$


\begin{tabular}{llllllllllllll}
\toprule
cal_fn & $ES_{sweep}$ &               $PL3$ &   $PL_{NN}$ &       Platt &         beta &    isotonic & ScalingBinning &        TempS & TempS1vsRest &         VecS &        dirL2 &      dirODIR &       MSODIR \\
\midrule
p_dist_c &   $7.9_{13}$ &  $\mathbf{2.9_{1}}$ &   $4.8_{9}$ &   $3.6_{2}$ &    $4.3_{7}$ &   $4.0_{5}$ &      $4.1_{6}$ &   $7.3_{12}$ &   $5.0_{10}$ &    $3.8_{4}$ &   $5.6_{11}$ &    $3.8_{3}$ &    $4.3_{8}$ \\
p_dist_c &  $19.3_{13}$ &  $\mathbf{5.3_{1}}$ &  $10.5_{6}$ &   $6.8_{2}$ &  $15.0_{10}$ &   $9.0_{3}$ &      $9.5_{4}$ &   $14.0_{9}$ &  $17.1_{12}$ &   $10.4_{5}$ &  $15.0_{11}$ &   $13.3_{8}$ &   $12.9_{7}$ \\
p_dist_c &  $25.7_{13}$ &  $\mathbf{3.5_{1}}$ &  $10.4_{7}$ &   $4.5_{2}$ &  $14.4_{12}$ &   $6.8_{4}$ &      $6.7_{3}$ &  $11.7_{10}$ &    $6.9_{5}$ &    $8.6_{6}$ &  $12.7_{11}$ &   $11.6_{9}$ &   $11.5_{8}$ \\
p_dist_c &   $23.9_{8}$ &  $\mathbf{8.5_{1}}$ &   $9.8_{3}$ &  $13.1_{5}$ &   $17.4_{6}$ &  $11.6_{4

In [63]:
df

cal_fn,$ES_{sweep}$,$PL3$,$PL_{NN}$,Platt,beta,isotonic,ScalingBinning,TempS,TempS1vsRest,VecS,dirL2,dirODIR,MSODIR
p_dist_c,$7.9_{13}$,$\mathbf{2.9_{1}}$,$4.8_{9}$,$3.6_{2}$,$4.3_{7}$,$4.0_{5}$,$4.1_{6}$,$7.3_{12}$,$5.0_{10}$,$3.8_{4}$,$5.6_{11}$,$3.8_{3}$,$4.3_{8}$
p_dist_c,$19.3_{13}$,$\mathbf{5.3_{1}}$,$10.5_{6}$,$6.8_{2}$,$15.0_{10}$,$9.0_{3}$,$9.5_{4}$,$14.0_{9}$,$17.1_{12}$,$10.4_{5}$,$15.0_{11}$,$13.3_{8}$,$12.9_{7}$
p_dist_c,$25.7_{13}$,$\mathbf{3.5_{1}}$,$10.4_{7}$,$4.5_{2}$,$14.4_{12}$,$6.8_{4}$,$6.7_{3}$,$11.7_{10}$,$6.9_{5}$,$8.6_{6}$,$12.7_{11}$,$11.6_{9}$,$11.5_{8}$
p_dist_c,$23.9_{8}$,$\mathbf{8.5_{1}}$,$9.8_{3}$,$13.1_{5}$,$17.4_{6}$,$11.6_{4}$,$8.7_{2}$,$22.7_{7}$,$26.9_{9}$,$34.2_{10}$,$40.8_{13}$,$38.1_{12}$,$37.5_{11}$


## Table 3 - ECE and Ranking

In [9]:
def table_ece_ranking(fname, binnings, cgt_nr = 3, check_cal=True,  
                      tag_groups = [["1vsRest1", "1vsRest3", "1vsRest5"], ["confidence"]]):


    df_cifar5m = get_cifar5m()
    df_cifar5m = df_cifar5m[(df_cifar5m.binning.isin(binnings))]

    if check_cal:
        cal_sub = ['beta', 'VecS', 'logplatt','PW6logIOCE', 'ScalingBinning', 'Isotonic'] # Six best ranking calibration methods
        df_cifar5m = df_cifar5m[df_cifar5m.calibration_function.isin(cal_sub)]

    df = pd.DataFrame()
    ranks = pd.DataFrame()

    # Construct the table row by row
    # For each row collect the data and the rankings (we will add them together later) 
    for tag_group in tag_groups:

        df_sel = df_cifar5m[df_cifar5m.tag_name.isin(tag_group)
                           &(df_cifar5m.cgt_nr == cgt_nr)] # isotonic ground truth

        # ECE_diff rows 
        for metric in ["ECE_diff_abs"]: # can also add c_hat_distance_c

            df_row = (df_sel
                      .groupby(["binning"]).mean()
                      [[metric]].T)
            df_row = np.round(df_row*1000, 2)


            rank_row = (df_sel
                         .groupby(["binning"]).mean()
                         .rank()
                         [[metric]].T)

            df = pd.concat([df, df_row])
            ranks = pd.concat([ranks, rank_row])

        # Model ordering rows (Spearman corrs)
        correlations_data = (df_sel.groupby(["seed", "tag_name", "n_data", "binning", "model_name"]) # should be 13 items in each group
                             .apply(correlation_finder)
                             .reset_index()
                            )

        df_row = (correlations_data
                  .groupby(["binning"]).mean()
                  [["spearman"]].T)
        df_row = np.round(df_row, 3)

        rank_row = (correlations_data
                    .groupby(["binning"]).mean()
                    .rank(ascending=False)
                    [["spearman"]].T)

        df = pd.concat([df, df_row])
        ranks = pd.concat([ranks, rank_row])

    # Combine ranks and data
    for row_idx in range(len(df)):
        for column_idx in range(len(df.iloc[row_idx])):

            item = df.iloc[row_idx, column_idx]
            rank = ranks.iloc[row_idx, column_idx]

            item = np.round(item, 4)

            df.iloc[row_idx, column_idx] = str(item) + "_{" + str(int(rank)) + "}"
            if rank == 1:
                df.iloc[row_idx, column_idx] = "\mathbf{" + df.iloc[row_idx, column_idx] + "}"
            df.iloc[row_idx, column_idx] = "$" + df.iloc[row_idx, column_idx] + "$"

    # Column titles on left
    df = pd.concat([pd.concat({"one-vs-rest": df[:2]}, names=['data']),
                    pd.concat({"confidence": df[2:4]}, names=['data'])]
                  )
    # Reorder columns
    df = df[['$ES_{15}$', '$ES_{sweep}$', '$ES_{CV}$',  '$PL3^{CE}$',
           '$PL_{NN}^{CE}$', '$PL_{DE}$', 'Platt', 'beta', 'isotonic']]
    display(df)

    with pd.option_context("max_colwidth", 25):
        with open(f'tables/%s.tex' % fname,'w') as tf:
            output = df.to_latex(escape=False)
            tf.write(output)
            print(output)

In [10]:
table_ece_ranking("cifar5m_ece_ranking_table", binnings1)

Unnamed: 0_level_0,binning,$ES_{15}$,$ES_{sweep}$,$ES_{CV}$,$PL3^{CE}$,$PL_{NN}^{CE}$,$PL_{DE}$,Platt,beta,isotonic
data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
one-vs-rest,ECE_diff_abs,$\mathbf{2.3_{1}}$,$3.24_{6}$,$3.66_{8}$,$2.59_{2}$,$2.87_{3}$,$2.99_{5}$,$3.63_{7}$,$2.9_{4}$,$3.8_{9}$
one-vs-rest,spearman,$0.457_{3}$,$0.195_{7}$,$0.443_{4}$,$0.36_{6}$,$0.513_{2}$,$0.381_{5}$,$0.01_{9}$,$0.128_{8}$,$\mathbf{0.552_{1}}$
confidence,ECE_diff_abs,$4.54_{2}$,$5.14_{5}$,$6.37_{8}$,$4.73_{4}$,$5.24_{6}$,$4.68_{3}$,$5.83_{7}$,$\mathbf{4.39_{1}}$,$7.2_{9}$
confidence,spearman,$0.624_{2}$,$0.434_{7}$,$0.563_{4}$,$0.406_{8}$,$0.51_{6}$,$0.568_{3}$,$0.018_{9}$,$0.511_{5}$,$\mathbf{0.657_{1}}$


\begin{tabular}{lllllllllll}
\toprule
           & binning &           $ES_{15}$ & $ES_{sweep}$ &    $ES_{CV}$ &   $PL3^{CE}$ & $PL_{NN}^{CE}$ &    $PL_{DE}$ &        Platt &                 beta &              isotonic \\
data & {} &                     &              &              &              &                &              &              &                      &                       \\
\midrule
one-vs-rest & ECE_diff_abs &  $\mathbf{2.3_{1}}$ &   $3.24_{6}$ &   $3.66_{8}$ &   $2.59_{2}$ &     $2.87_{3}$ &   $2.99_{5}$ &   $3.63_{7}$ &            $2.9_{4}$ &             $3.8_{9}$ \\
           & spearman &         $0.457_{3}$ &  $0.195_{7}$ &  $0.443_{4}$ &   $0.36_{6}$ &    $0.513_{2}$ &  $0.381_{5}$ &   $0.01_{9}$ &          $0.128_{8}$ &  $\mathbf{0.552_{1}}$ \\
confidence & ECE_diff_abs &          $4.54_{2}$ &   $5.14_{5}$ &   $6.37_{8}$ &   $4.73_{4}$ &     $5.24_{6}$ &   $4.68_{3}$ &   $5.83_{7}$ &  $\mathbf{4.39_{1}}$ &             $7.2_{9}$ \\
           & spearman &    

In [13]:
table_ece_ranking("cifar5m_ece_ranking_table_cgt0", binnings1, cgt_nr = 0)

Unnamed: 0_level_0,binning,$ES_{15}$,$ES_{sweep}$,$ES_{CV}$,$PL3^{CE}$,$PL_{NN}^{CE}$,$PL_{DE}$,Platt,beta,isotonic
data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
one-vs-rest,ECE_diff_abs,$\mathbf{2.79_{1}}$,$3.7_{6}$,$4.05_{8}$,$3.04_{2}$,$3.37_{5}$,$3.35_{4}$,$4.02_{7}$,$3.34_{3}$,$4.05_{9}$
one-vs-rest,spearman,$0.363_{4}$,$0.148_{7}$,$0.382_{3}$,$0.254_{6}$,$\mathbf{0.471_{1}}$,$0.351_{5}$,$0.058_{9}$,$0.124_{8}$,$0.466_{2}$
confidence,ECE_diff_abs,$4.53_{2}$,$4.98_{5}$,$6.35_{8}$,$4.74_{4}$,$5.19_{6}$,$4.69_{3}$,$5.68_{7}$,$\mathbf{4.32_{1}}$,$7.51_{9}$
confidence,spearman,$0.594_{2}$,$0.453_{5}$,$0.57_{3}$,$0.343_{8}$,$0.393_{7}$,$0.502_{4}$,$-0.026_{9}$,$0.432_{6}$,$\mathbf{0.622_{1}}$


\begin{tabular}{lllllllllll}
\toprule
           & binning &            $ES_{15}$ & $ES_{sweep}$ &    $ES_{CV}$ &   $PL3^{CE}$ &        $PL_{NN}^{CE}$ &    $PL_{DE}$ &         Platt &                 beta &              isotonic \\
data & {} &                      &              &              &              &                       &              &               &                      &                       \\
\midrule
one-vs-rest & ECE_diff_abs &  $\mathbf{2.79_{1}}$ &    $3.7_{6}$ &   $4.05_{8}$ &   $3.04_{2}$ &            $3.37_{5}$ &   $3.35_{4}$ &    $4.02_{7}$ &           $3.34_{3}$ &            $4.05_{9}$ \\
           & spearman &          $0.363_{4}$ &  $0.148_{7}$ &  $0.382_{3}$ &  $0.254_{6}$ &  $\mathbf{0.471_{1}}$ &  $0.351_{5}$ &   $0.058_{9}$ &          $0.124_{8}$ &           $0.466_{2}$ \\
confidence & ECE_diff_abs &           $4.53_{2}$ &   $4.98_{5}$ &   $6.35_{8}$ &   $4.74_{4}$ &            $5.19_{6}$ &   $4.69_{3}$ &    $5.68_{7}$ &  $\mathbf{4.32_{1}}$ &        

In [12]:
table_ece_ranking("cifar5m_ece_ranking_table_cgt1", binnings1, cgt_nr = 1)

Unnamed: 0_level_0,binning,$ES_{15}$,$ES_{sweep}$,$ES_{CV}$,$PL3^{CE}$,$PL_{NN}^{CE}$,$PL_{DE}$,Platt,beta,isotonic
data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
one-vs-rest,ECE_diff_abs,$3.98_{3}$,$5.0_{8}$,$5.01_{9}$,$\mathbf{3.69_{1}}$,$4.7_{7}$,$4.12_{4}$,$4.69_{6}$,$4.18_{5}$,$3.74_{2}$
one-vs-rest,spearman,$0.194_{4}$,$0.01_{9}$,$0.144_{6}$,$0.126_{8}$,$\mathbf{0.345_{1}}$,$0.2_{3}$,$0.157_{5}$,$0.134_{7}$,$0.226_{2}$
confidence,ECE_diff_abs,$\mathbf{4.83_{1}}$,$6.07_{6}$,$6.9_{9}$,$5.25_{4}$,$5.87_{5}$,$5.18_{3}$,$6.79_{8}$,$4.9_{2}$,$6.46_{7}$
confidence,spearman,$\mathbf{0.478_{1}}$,$0.261_{7}$,$0.396_{4}$,$0.241_{8}$,$0.39_{5}$,$0.453_{3}$,$-0.084_{9}$,$0.308_{6}$,$0.477_{2}$


\begin{tabular}{lllllllllll}
\toprule
           & binning &             $ES_{15}$ & $ES_{sweep}$ &    $ES_{CV}$ &           $PL3^{CE}$ &        $PL_{NN}^{CE}$ &    $PL_{DE}$ &         Platt &         beta &     isotonic \\
data & {} &                       &              &              &                      &                       &              &               &              &              \\
\midrule
one-vs-rest & ECE_diff_abs &            $3.98_{3}$ &    $5.0_{8}$ &   $5.01_{9}$ &  $\mathbf{3.69_{1}}$ &             $4.7_{7}$ &   $4.12_{4}$ &    $4.69_{6}$ &   $4.18_{5}$ &   $3.74_{2}$ \\
           & spearman &           $0.194_{4}$ &   $0.01_{9}$ &  $0.144_{6}$ &          $0.126_{8}$ &  $\mathbf{0.345_{1}}$ &    $0.2_{3}$ &   $0.157_{5}$ &  $0.134_{7}$ &  $0.226_{2}$ \\
confidence & ECE_diff_abs &   $\mathbf{4.83_{1}}$ &   $6.07_{6}$ &    $6.9_{9}$ &           $5.25_{4}$ &            $5.87_{5}$ &   $5.18_{3}$ &    $6.79_{8}$ &    $4.9_{2}$ &   $6.46_{7}$ \\
           & spearman 

## 4. Combo of different results

In [9]:
def get_partial_df(binnings, cgt_nr = 3, is_std = False, check_cal = True, metric = "c_hat_distance_c",
                   column_name = "tag_name",
                   column_groups = [["1vsRest1"], ["1vsRest3"], ["1vsRest5"], ["confidence"]]):



    df_cifar5m = get_cifar5m()
    df_cifar5m = df_cifar5m[(df_cifar5m.binning.isin(binnings))]
    
    if check_cal:
        cal_sub = ['beta', 'VecS', 'logplatt','PW6logIOCE', 'ScalingBinning', 'Isotonic'] # Six best ranking calibration methods
        df_cifar5m = df_cifar5m[df_cifar5m.calibration_function.isin(cal_sub)]

    df = pd.DataFrame()
    ranks = pd.DataFrame()

    # Construct the table row by row
    # For each row collect the data and the rankings (we will add them together later)

    for column_group in column_groups:
        
        df_sel = df_cifar5m[df_cifar5m[column_name].isin(column_group) & (df_cifar5m.cgt_nr == cgt_nr)]
                
        # Data
        if is_std:
            df_row = (df_sel.groupby(["binning", "seed"]).mean().groupby(["binning"]).std()[[metric]].T)
            df_row = np.round(df_row*1000, 2)

            # Ranks
            rank_row = (df_sel.groupby(["binning", "seed"]).mean().groupby(["binning"]).std().rank()[[metric]].T)

        else:

            df_row = (df_sel.groupby(["binning"]).mean()[[metric]].T)
            df_row = np.round(df_row*1000, 2)

            # Ranks
            rank_row = (df_sel.groupby(["binning"]).mean().rank()[[metric]].T)

        df = pd.concat([df, df_row])        
        ranks = pd.concat([ranks, rank_row])

    # Combine ranks and data
    for row_idx in range(len(df)):
        for column_idx in range(len(df.iloc[row_idx])):

            item = df.iloc[row_idx, column_idx]
            rank = ranks.iloc[row_idx, column_idx]

            item = np.round(item, 4)

            df.iloc[row_idx, column_idx] = str(item) + "_{" + str(int(rank)) + "}"
            if rank == 1:
                df.iloc[row_idx, column_idx] = "\mathbf{" + df.iloc[row_idx, column_idx] + "}"
            df.iloc[row_idx, column_idx] = "$" + df.iloc[row_idx, column_idx] + "$"
            
    return df

In [10]:
df0 = get_partial_df(binnings1, metric = "c_hat_distance_c", column_name = "model_name",
                column_groups = [["resnet110", "densenet40", "wide32"]])

In [11]:
df0

binning,$ES_{15}$,$ES_{CV}$,$ES_{sweep}$,$PL3^{CE}$,$PL_{DE}$,$PL_{NN}^{CE}$,Platt,beta,isotonic
c_hat_distance_c,$9.47_{9}$,$8.52_{6}$,$8.62_{7}$,$7.27_{2}$,$7.97_{4}$,$\mathbf{6.27_{1}}$,$8.39_{5}$,$7.6_{3}$,$9.2_{8}$


In [12]:
def get_table_combo(fname, binnings, cgt_nr = 3, is_std = False, 
                    check_cal = True, metric = "c_hat_distance_c"):
    
    df0 = get_partial_df(binnings, metric = metric, column_name = "model_name",
                column_groups = [["resnet110", "densenet40", "wide32"]])
    df1 = get_partial_df(binnings, metric = metric, column_name = "model_name",
                column_groups = [["resnet110"],  ["densenet40"], ["wide32"]])    
    df2 = get_partial_df(binnings, metric = metric, column_name = "n_data",
                column_groups = [["1000"],  ["3000"], ["10000"]])
    df3 = get_partial_df(binnings, metric = metric, column_name = "tag_name",
                column_groups = [["1vsRest1"], ["1vsRest3"],  ["1vsRest5"], ["confidence"]])
    
    # Add column titles on the left
    df0.index =  ["All"]
    df1.index =  ["ResNet110", "DenseNet40", "WideNet32"]
    df2.index = ["1000", "3000", "10000"]
    df3.index = ["cars vs rest", "cats vs rest", "dogs vs rest", "confidence"]
    
    df = pd.concat([pd.concat({"": df0}, names=['']),
                    pd.concat({"Initial model": df1}, names=['']),
                           pd.concat({"Data size": df2}, names=['']),
                           pd.concat({"Data": df3}, names=[''])])
    
    df = df[binnings] 
    
    display(df)
    
    capt_str = fname.split(".")[0]

    begin_list = ["\\begin{table}","\\caption{%s}" % capt_str,   "\\label{table:%s}" % capt_str, "\\centering", "\\begin{adjustbox}{width=0.8\\textwidth}"]
    hline_str = "\\hline"
    end_list = ["\\end{adjustbox}", "\\end{table}"]

    with pd.option_context("max_colwidth", 25):
        with open(f'tables/{fname}','w') as tf:
            output = df.to_latex(escape=False)
            splitted = output.split("\n")
            output_new = "\n".join(np.concatenate([begin_list, splitted[:-4], [hline_str], splitted[-4:-1], end_list]))
            tf.write(output_new)
            print(output_new)
    
    return df

In [13]:
df_final = get_table_combo("chat_dist_c_combo.tex", binnings1)

Unnamed: 0,binning,$ES_{15}$,$ES_{sweep}$,$ES_{CV}$,$PL3^{CE}$,$PL_{NN}^{CE}$,$PL_{DE}$,beta,Platt,isotonic
,,,,,,,,,,
,All,$9.47_{9}$,$8.62_{7}$,$8.52_{6}$,$7.27_{2}$,$\mathbf{6.27_{1}}$,$7.97_{4}$,$7.6_{3}$,$8.39_{5}$,$9.2_{8}$
Initial model,ResNet110,$9.43_{8}$,$9.04_{6}$,$9.22_{7}$,$7.33_{2}$,$\mathbf{6.7_{1}}$,$8.7_{4}$,$7.97_{3}$,$8.9_{5}$,$9.66_{9}$
Initial model,DenseNet40,$9.98_{9}$,$8.59_{7}$,$8.37_{6}$,$7.08_{2}$,$\mathbf{5.98_{1}}$,$7.53_{4}$,$7.35_{3}$,$7.93_{5}$,$9.15_{8}$
Initial model,WideNet32,$9.0_{9}$,$8.22_{6}$,$7.97_{5}$,$7.39_{2}$,$\mathbf{6.14_{1}}$,$7.69_{4}$,$7.49_{3}$,$8.33_{7}$,$8.8_{8}$
Data size,1000,$13.34_{9}$,$11.44_{6}$,$11.49_{7}$,$10.23_{3}$,$\mathbf{8.02_{1}}$,$11.09_{5}$,$9.63_{2}$,$10.88_{4}$,$12.99_{8}$
Data size,3000,$8.46_{8}$,$7.64_{6}$,$8.19_{7}$,$6.64_{2}$,$\mathbf{6.22_{1}}$,$7.35_{4}$,$6.8_{3}$,$7.44_{5}$,$8.65_{9}$
Data size,10000,$6.61_{7}$,$6.77_{8}$,$5.88_{4}$,$4.93_{2}$,$\mathbf{4.59_{1}}$,$5.49_{3}$,$6.38_{6}$,$6.85_{9}$,$5.97_{5}$
Data,cars vs rest,$4.07_{7}$,$4.61_{9}$,$3.82_{4}$,$3.58_{3}$,$\mathbf{2.74_{1}}$,$3.93_{6}$,$3.13_{2}$,$4.21_{8}$,$3.9_{5}$
Data,cats vs rest,$10.45_{9}$,$10.15_{8}$,$9.24_{5}$,$7.44_{2}$,$\mathbf{6.64_{1}}$,$8.86_{3}$,$9.02_{4}$,$9.82_{7}$,$9.6_{6}$


\begin{table}
\caption{chat_dist_c_combo}
\label{table:chat_dist_c_combo}
\centering
\begin{adjustbox}{width=0.8\textwidth}
\begin{tabular}{lllllllllll}
\toprule
     & binning &    $ES_{15}$ & $ES_{sweep}$ &    $ES_{CV}$ &   $PL3^{CE}$ &       $PL_{NN}^{CE}$ &    $PL_{DE}$ &         beta &        Platt &     isotonic \\
{} & {} &              &              &              &              &                      &              &              &              &              \\
\midrule
     & All &   $9.47_{9}$ &   $8.62_{7}$ &   $8.52_{6}$ &   $7.27_{2}$ &  $\mathbf{6.27_{1}}$ &   $7.97_{4}$ &    $7.6_{3}$ &   $8.39_{5}$ &    $9.2_{8}$ \\
Initial model & ResNet110 &   $9.43_{8}$ &   $9.04_{6}$ &   $9.22_{7}$ &   $7.33_{2}$ &   $\mathbf{6.7_{1}}$ &    $8.7_{4}$ &   $7.97_{3}$ &    $8.9_{5}$ &   $9.66_{9}$ \\
     & DenseNet40 &   $9.98_{9}$ &   $8.59_{7}$ &   $8.37_{6}$ &   $7.08_{2}$ &  $\mathbf{5.98_{1}}$ &   $7.53_{4}$ &   $7.35_{3}$ &   $7.93_{5}$ &   $9.15_{8}$ \\
     & WideNet32 &   

In [74]:
df_final = get_table_combo("ECE_abs_combo.tex", binnings1, metric = "ECE_diff_abs")

Unnamed: 0,binning,$ES_{15}$,$ES_{sweep}$,$ES_{CV}$,$PL3^{CE}$,$PL_{NN}^{CE}$,$PL_{DE}$,beta,isotonic,Platt
,,,,,,,,,,
Initial model,ResNet110,$2.87_{3}$,$3.52_{6}$,$4.23_{9}$,$\mathbf{2.72_{1}}$,$3.39_{5}$,$3.34_{4}$,$2.86_{2}$,$4.13_{8}$,$3.99_{7}$
Initial model,DenseNet40,$\mathbf{2.9_{1}}$,$3.65_{6}$,$4.44_{8}$,$3.33_{2}$,$3.47_{5}$,$3.46_{4}$,$3.44_{3}$,$5.26_{9}$,$4.38_{7}$
Initial model,WideNet32,$\mathbf{2.82_{1}}$,$3.98_{6}$,$4.34_{8}$,$3.32_{2}$,$3.52_{4}$,$3.43_{3}$,$3.53_{5}$,$4.55_{9}$,$4.18_{7}$
Data size,1000,$4.19_{2}$,$4.75_{3}$,$6.83_{8}$,$4.81_{4}$,$5.0_{5}$,$5.14_{6}$,$\mathbf{4.08_{1}}$,$7.48_{9}$,$5.32_{7}$
Data size,3000,$\mathbf{2.45_{1}}$,$3.55_{6}$,$4.11_{8}$,$2.77_{2}$,$3.31_{5}$,$3.05_{4}$,$2.96_{3}$,$4.13_{9}$,$3.78_{7}$
Data size,10000,$1.94_{2}$,$2.85_{8}$,$2.07_{4}$,$\mathbf{1.78_{1}}$,$2.07_{5}$,$2.03_{3}$,$2.79_{7}$,$2.33_{6}$,$3.45_{9}$
Data,cars vs rest,$1.54_{3}$,$1.84_{5}$,$2.66_{9}$,$\mathbf{1.38_{1}}$,$1.75_{4}$,$1.85_{6}$,$1.39_{2}$,$1.94_{7}$,$2.06_{8}$
Data,cats vs rest,$\mathbf{2.66_{1}}$,$4.26_{7}$,$4.26_{6}$,$2.97_{2}$,$3.69_{4}$,$3.4_{3}$,$4.03_{5}$,$4.46_{8}$,$4.93_{9}$
Data,dogs vs rest,$\mathbf{2.71_{1}}$,$3.63_{5}$,$4.06_{8}$,$3.41_{4}$,$3.17_{2}$,$3.71_{6}$,$3.3_{3}$,$4.99_{9}$,$3.91_{7}$


\begin{table}
\caption{ECE_abs_combo}
\label{table:ECE_abs_combo}
\centering
\begin{adjustbox}{width=0.8\textwidth}
\begin{tabular}{lllllllllll}
\toprule
     & binning &            $ES_{15}$ & $ES_{sweep}$ &   $ES_{CV}$ &           $PL3^{CE}$ & $PL_{NN}^{CE}$ &   $PL_{DE}$ &                 beta &    isotonic &       Platt \\
{} & {} &                      &              &             &                      &                &             &                      &             &             \\
\midrule
Initial model & ResNet110 &           $2.87_{3}$ &   $3.52_{6}$ &  $4.23_{9}$ &  $\mathbf{2.72_{1}}$ &     $3.39_{5}$ &  $3.34_{4}$ &           $2.86_{2}$ &  $4.13_{8}$ &  $3.99_{7}$ \\
     & DenseNet40 &   $\mathbf{2.9_{1}}$ &   $3.65_{6}$ &  $4.44_{8}$ &           $3.33_{2}$ &     $3.47_{5}$ &  $3.46_{4}$ &           $3.44_{3}$ &  $5.26_{9}$ &  $4.38_{7}$ \\
     & WideNet32 &  $\mathbf{2.82_{1}}$ &   $3.98_{6}$ &  $4.34_{8}$ &           $3.32_{2}$ &     $3.52_{4}$ &  $3.43_{3}$ &      