In [1]:
from helpers.generate_stats_df_from_results import create_table, df_to_latex, rename_df, add_hline, add_double_column_header

import numpy as np
import pandas as pd
from IPython.display import display

# Temperature scaling versus the proposed methods on DenseNet-40 CIFAR-10

In [2]:
df = pd.DataFrame()
methods = ["uncalibrated", "tempS", "rf_cv_ll", "knn_kl_cv_ll", "kernel_DIR_cv_ll"]
ranks = pd.DataFrame()
for metric in ["conf_ece", "cw_ece", "bs", "ll", "accuracy", "avg_loss_gap2"]:
    df_prev = create_table([("c10", "densenet40")], metric, methods, add_avg_rank=True, rounding=3)
    ranks = ranks.append(df_prev.iloc[1])
    df_prev = pd.DataFrame(df_prev.iloc[0]).T
    df_prev.index = [metric]
    df = df.append(df_prev)

df.loc[("average rank"), :] = np.round(ranks.mean(), 1)
df = rename_df(df[methods])
latex = df_to_latex(df)
print(df_to_latex(df))
display(df)

densenet40_c10
densenet40_c10
densenet40_c10
densenet40_c10
densenet40_c10
densenet40_c10
\begin{tabular}{llllll}
\toprule
{} &        uncal &                    TS &           RF &   KNN$_{kl}$ & kernel$_{DIR}$ \\
\midrule
confidence ECE &  $5.493_{5}$ &  $\mathbf{0.924_{1}}$ &  $3.201_{2}$ &  $3.342_{3}$ &    $3.381_{4}$ \\
classwise ECE  &  $0.445_{4}$ &  $\mathbf{0.255_{1}}$ &  $0.436_{3}$ &  $0.393_{2}$ &    $0.461_{5}$ \\
Brier score    &  $0.127_{5}$ &   $\mathbf{0.11_{1}}$ &   $0.12_{3}$ &  $0.118_{2}$ &    $0.124_{4}$ \\
log-loss       &  $0.428_{5}$ &  $\mathbf{0.225_{1}}$ &  $0.292_{2}$ &  $0.298_{4}$ &    $0.293_{3}$ \\
accuracy       &  $0.924_{3}$ &           $0.924_{3}$ &  $0.924_{3}$ &  $0.924_{3}$ &    $0.924_{3}$ \\
loss gap       &   $0.03_{5}$ &  $\mathbf{0.006_{1}}$ &  $0.011_{2}$ &  $0.016_{4}$ &    $0.015_{3}$ \\
average rank   &          4.5 &                   1.3 &          2.5 &            3 &            3.7 \\
\bottomrule
\end{tabular}



Unnamed: 0,uncal,TS,RF,KNN$_{kl}$,kernel$_{DIR}$
confidence ECE,$5.493_{5}$,$\mathbf{0.924_{1}}$,$3.201_{2}$,$3.342_{3}$,$3.381_{4}$
classwise ECE,$0.445_{4}$,$\mathbf{0.255_{1}}$,$0.436_{3}$,$0.393_{2}$,$0.461_{5}$
Brier score,$0.127_{5}$,$\mathbf{0.11_{1}}$,$0.12_{3}$,$0.118_{2}$,$0.124_{4}$
log-loss,$0.428_{5}$,$\mathbf{0.225_{1}}$,$0.292_{2}$,$0.298_{4}$,$0.293_{3}$
accuracy,$0.924_{3}$,$0.924_{3}$,$0.924_{3}$,$0.924_{3}$,$0.924_{3}$
loss gap,$0.03_{5}$,$\mathbf{0.006_{1}}$,$0.011_{2}$,$0.016_{4}$,$0.015_{3}$
average rank,4.5,1.3,2.5,3,3.7


# Results of all model-dataset combinations

In [3]:
methods = [
    "uncalibrated",
    "dir_ODIR_pt",
    "ms_ODIR_pt",
    "vecS",
    "iop_diag",
    "tempS",
    "dec2TS",
    "knnTS_kl_cv_ll",
]

metric = "bs"
metrics = ["bs", "ll", "conf_ece", "cw_ece", "avg_loss_gap2", "accuracy"]
roundings = [3, 3, 2, 2, 4, 3]

for m_idx in range(len(metrics)):
    metric = metrics[m_idx]
    rounding = roundings[m_idx]

    df_c10 = create_table([("c10", "densenet40"),
                           ("c10", "resnet110"),
                           ("c10", "resnet_wide32")],
                          metric, methods, rounding=rounding, add_avg_rank=True)
    df_c100 = create_table([("c100", "densenet40"),
                            ("c100", "resnet110"),
                            ("c100", "resnet_wide32")],
                           metric, methods, rounding=rounding, add_avg_rank=True)
    df_combined = rename_df(df_c10.append(df_c100))
    latex = df_to_latex(df_combined)
    latex = add_hline(latex, [6, 7, 10],
                      lengths=[(2, len(methods) + 2),
                               (1, len(methods) + 2),
                               (2, len(methods) + 2)])
    latex = add_double_column_header(latex, ["", "TS"], [len(methods) + 2 - 2, 2])
    latex = add_hline(latex, [2],
                      lengths=[(len(methods) + 1, len(methods) + 2)])

    print(latex)
    display(df_combined)

    with open(f"tables/{metric}.tex", "w") as f:
        f.write("\\begin{table*}\n"
                "\\caption{" + metric + "}\n"
                                        "\\label{table:" + metric + "}\n"
                                                                    "\\centering\n"
                                                                    "\\begin{adjustbox}{width=\\textwidth}\n\n")
        f.write(latex)
        f.write("\n\n\\end{adjustbox}\n"
                "\\end{table*}")

densenet40_c10
resnet110_c10
resnet_wide32_c10
densenet40_c100
resnet110_c100
resnet_wide32_c100
\begin{tabular}{llllllllll}
\toprule
\multicolumn{8}{c}{}&\multicolumn{2}{c}{TS}\\
\cmidrule{9-10}
\multicolumn{1}{c}{}&\multicolumn{1}{c}{}&\multicolumn{1}{c}{uncal}&\multicolumn{1}{c}{dir_ODIR_pt}&\multicolumn{1}{c}{ms_ODIR_pt}&\multicolumn{1}{c}{VS}&\multicolumn{1}{c}{IOP}&\multicolumn{1}{c}{TS}&\multicolumn{1}{c}{dec2TS}&\multicolumn{1}{c}{KNN}\\\midrule
C-10 & DenseNet-40 &  $0.127_{8}$ &   $0.11_{4}$ &            $0.11_{4}$ &   $0.11_{4}$ &   $0.11_{4}$ &   $0.11_{4}$ &           $0.11_{4}$ &   $0.11_{4}$ \\
      & ResNet-110 &   $0.11_{8}$ &  $0.098_{5}$ &           $0.098_{5}$ &  $0.098_{5}$ &  $0.097_{2}$ &  $0.098_{5}$ &          $0.097_{2}$ &  $0.097_{2}$ \\
      & ResNet Wide 32 &  $0.105_{8}$ &  $0.089_{2}$ &           $0.089_{2}$ &  $0.089_{2}$ &  $0.092_{6}$ &  $0.092_{6}$ &          $0.089_{2}$ &   $0.09_{5}$ \\
\cmidrule{2-10}
      & average rank &            8 &        

Unnamed: 0,Unnamed: 1,uncal,dir_ODIR_pt,ms_ODIR_pt,VS,IOP,TS,dec2TS,KNN
C-10,DenseNet-40,$0.127_{8}$,$0.11_{4}$,$0.11_{4}$,$0.11_{4}$,$0.11_{4}$,$0.11_{4}$,$0.11_{4}$,$0.11_{4}$
C-10,ResNet-110,$0.11_{8}$,$0.098_{5}$,$0.098_{5}$,$0.098_{5}$,$0.097_{2}$,$0.098_{5}$,$0.097_{2}$,$0.097_{2}$
C-10,ResNet Wide 32,$0.105_{8}$,$0.089_{2}$,$0.089_{2}$,$0.089_{2}$,$0.092_{6}$,$0.092_{6}$,$0.089_{2}$,$0.09_{5}$
,average rank,8,4,4,4,4.2,5.3,2.8,3.7
C-100,DenseNet-40,$0.491_{8}$,$0.399_{2}$,$\mathbf{0.398_{1}}$,$0.403_{6}$,$0.403_{6}$,$0.401_{3}$,$0.401_{3}$,$0.402_{5}$
C-100,ResNet-110,$0.453_{8}$,$0.391_{3}$,$0.391_{3}$,$0.394_{7}$,$0.391_{3}$,$0.392_{5}$,$\mathbf{0.39_{1}}$,$0.392_{5}$
C-100,ResNet Wide 32,$0.432_{8}$,$0.353_{3}$,$\mathbf{0.351_{1}}$,$0.352_{2}$,$0.355_{6}$,$0.355_{6}$,$0.354_{4}$,$0.355_{6}$
,average rank,8,2.7,1.7,5.2,5.2,5,2.8,5.5


densenet40_c10
resnet110_c10
resnet_wide32_c10
densenet40_c100
resnet110_c100
resnet_wide32_c100
\begin{tabular}{llllllllll}
\toprule
\multicolumn{8}{c}{}&\multicolumn{2}{c}{TS}\\
\cmidrule{9-10}
\multicolumn{1}{c}{}&\multicolumn{1}{c}{}&\multicolumn{1}{c}{uncal}&\multicolumn{1}{c}{dir_ODIR_pt}&\multicolumn{1}{c}{ms_ODIR_pt}&\multicolumn{1}{c}{VS}&\multicolumn{1}{c}{IOP}&\multicolumn{1}{c}{TS}&\multicolumn{1}{c}{dec2TS}&\multicolumn{1}{c}{KNN}\\\midrule
C-10 & DenseNet-40 &  $0.428_{8}$ &  $0.223_{2}$ &  $\mathbf{0.222_{1}}$ &  $0.223_{2}$ &  $0.225_{4}$ &  $0.225_{4}$ &  $0.264_{7}$ &  $0.229_{6}$ \\
      & ResNet-110 &  $0.358_{8}$ &  $0.205_{2}$ &  $\mathbf{0.204_{1}}$ &  $0.206_{4}$ &  $0.208_{5}$ &  $0.209_{6}$ &  $0.232_{7}$ &  $0.205_{2}$ \\
      & ResNet Wide 32 &  $0.382_{8}$ &  $0.183_{2}$ &  $\mathbf{0.182_{1}}$ &  $0.183_{2}$ &  $0.192_{6}$ &  $0.191_{5}$ &  $0.244_{7}$ &   $0.19_{4}$ \\
\cmidrule{2-10}
      & average rank &            8 &          2.5 &                 

Unnamed: 0,Unnamed: 1,uncal,dir_ODIR_pt,ms_ODIR_pt,VS,IOP,TS,dec2TS,KNN
C-10,DenseNet-40,$0.428_{8}$,$0.223_{2}$,$\mathbf{0.222_{1}}$,$0.223_{2}$,$0.225_{4}$,$0.225_{4}$,$0.264_{7}$,$0.229_{6}$
C-10,ResNet-110,$0.358_{8}$,$0.205_{2}$,$\mathbf{0.204_{1}}$,$0.206_{4}$,$0.208_{5}$,$0.209_{6}$,$0.232_{7}$,$0.205_{2}$
C-10,ResNet Wide 32,$0.382_{8}$,$0.183_{2}$,$\mathbf{0.182_{1}}$,$0.183_{2}$,$0.192_{6}$,$0.191_{5}$,$0.244_{7}$,$0.19_{4}$
,average rank,8,2.5,1,3,5.2,5.2,7,4.2
C-100,DenseNet-40,$2.017_{8}$,$1.057_{2}$,$\mathbf{1.047_{1}}$,$1.062_{4}$,$1.067_{5}$,$1.057_{2}$,$1.422_{7}$,$1.116_{6}$
C-100,ResNet-110,$1.694_{8}$,$1.094_{4}$,$\mathbf{1.073_{1}}$,$1.093_{3}$,$1.106_{5}$,$1.092_{2}$,$1.466_{7}$,$1.15_{6}$
C-100,ResNet Wide 32,$1.802_{8}$,$0.95_{5}$,$\mathbf{0.931_{1}}$,$0.941_{2}$,$0.945_{3}$,$0.945_{3}$,$1.276_{7}$,$0.994_{6}$
,average rank,8,3.8,1,3,4.5,2.7,7,6


densenet40_c10
resnet110_c10
resnet_wide32_c10
densenet40_c100
resnet110_c100
resnet_wide32_c100
\begin{tabular}{llllllllll}
\toprule
\multicolumn{8}{c}{}&\multicolumn{2}{c}{TS}\\
\cmidrule{9-10}
\multicolumn{1}{c}{}&\multicolumn{1}{c}{}&\multicolumn{1}{c}{uncal}&\multicolumn{1}{c}{dir_ODIR_pt}&\multicolumn{1}{c}{ms_ODIR_pt}&\multicolumn{1}{c}{VS}&\multicolumn{1}{c}{IOP}&\multicolumn{1}{c}{TS}&\multicolumn{1}{c}{dec2TS}&\multicolumn{1}{c}{KNN}\\\midrule
C-10 & DenseNet-40 &   $5.49_{8}$ &  $0.94_{5}$ &  $0.91_{3}$ &   $1.0_{6}$ &            $0.8_{2}$ &           $0.92_{4}$ &   $1.2_{7}$ &  $\mathbf{0.52_{1}}$ \\
      & ResNet-110 &   $4.75_{8}$ &   $1.1_{7}$ &  $0.99_{5}$ &  $1.07_{6}$ &           $0.91_{2}$ &           $0.94_{3}$ &  $0.94_{3}$ &  $\mathbf{0.84_{1}}$ \\
      & ResNet Wide 32 &   $4.48_{8}$ &   $0.7_{4}$ &  $0.75_{5}$ &  $0.78_{6}$ &           $0.69_{2}$ &           $0.69_{2}$ &  $0.89_{7}$ &  $\mathbf{0.54_{1}}$ \\
\cmidrule{2-10}
      & average rank &            8 

Unnamed: 0,Unnamed: 1,uncal,dir_ODIR_pt,ms_ODIR_pt,VS,IOP,TS,dec2TS,KNN
C-10,DenseNet-40,$5.49_{8}$,$0.94_{5}$,$0.91_{3}$,$1.0_{6}$,$0.8_{2}$,$0.92_{4}$,$1.2_{7}$,$\mathbf{0.52_{1}}$
C-10,ResNet-110,$4.75_{8}$,$1.1_{7}$,$0.99_{5}$,$1.07_{6}$,$0.91_{2}$,$0.94_{3}$,$0.94_{3}$,$\mathbf{0.84_{1}}$
C-10,ResNet Wide 32,$4.48_{8}$,$0.7_{4}$,$0.75_{5}$,$0.78_{6}$,$0.69_{2}$,$0.69_{2}$,$0.89_{7}$,$\mathbf{0.54_{1}}$
,average rank,8,5.3,4.3,6,2.2,3.3,5.8,1
C-100,DenseNet-40,$21.16_{8}$,$0.95_{2}$,$1.22_{4}$,$0.96_{3}$,$3.45_{6}$,$\mathbf{0.79_{1}}$,$3.67_{7}$,$3.34_{5}$
C-100,ResNet-110,$18.48_{8}$,$2.27_{3}$,$2.31_{4}$,$3.04_{6}$,$2.79_{5}$,$2.13_{2}$,$3.18_{7}$,$\mathbf{1.9_{1}}$
C-100,ResNet Wide 32,$18.78_{8}$,$1.54_{3}$,$1.85_{5}$,$1.68_{4}$,$\mathbf{1.03_{1}}$,$1.41_{2}$,$3.08_{7}$,$2.46_{6}$
,average rank,8,2.7,4.3,4.3,4,1.7,7,4


densenet40_c10
resnet110_c10
resnet_wide32_c10
densenet40_c100
resnet110_c100
resnet_wide32_c100
\begin{tabular}{llllllllll}
\toprule
\multicolumn{8}{c}{}&\multicolumn{2}{c}{TS}\\
\cmidrule{9-10}
\multicolumn{1}{c}{}&\multicolumn{1}{c}{}&\multicolumn{1}{c}{uncal}&\multicolumn{1}{c}{dir_ODIR_pt}&\multicolumn{1}{c}{ms_ODIR_pt}&\multicolumn{1}{c}{VS}&\multicolumn{1}{c}{IOP}&\multicolumn{1}{c}{TS}&\multicolumn{1}{c}{dec2TS}&\multicolumn{1}{c}{KNN}\\\midrule
C-10 & DenseNet-40 &  $0.44_{8}$ &  $0.21_{1}$ &  $0.21_{1}$ &  $0.23_{3}$ &  $0.25_{5}$ &           $0.25_{5}$ &   $0.3_{7}$ &           $0.24_{4}$ \\
      & ResNet-110 &  $0.36_{8}$ &   $0.2_{3}$ &  $0.18_{2}$ &   $0.2_{3}$ &  $0.21_{5}$ &           $0.22_{6}$ &  $0.23_{7}$ &  $\mathbf{0.17_{1}}$ \\
      & ResNet Wide 32 &   $0.5_{8}$ &  $0.19_{3}$ &  $0.18_{1}$ &  $0.18_{1}$ &  $0.45_{6}$ &           $0.45_{6}$ &  $0.29_{5}$ &           $0.27_{4}$ \\
\cmidrule{2-10}
      & average rank &           8 &         2.7 &         1.7 &  

Unnamed: 0,Unnamed: 1,uncal,dir_ODIR_pt,ms_ODIR_pt,VS,IOP,TS,dec2TS,KNN
C-10,DenseNet-40,$0.44_{8}$,$0.21_{1}$,$0.21_{1}$,$0.23_{3}$,$0.25_{5}$,$0.25_{5}$,$0.3_{7}$,$0.24_{4}$
C-10,ResNet-110,$0.36_{8}$,$0.2_{3}$,$0.18_{2}$,$0.2_{3}$,$0.21_{5}$,$0.22_{6}$,$0.23_{7}$,$\mathbf{0.17_{1}}$
C-10,ResNet Wide 32,$0.5_{8}$,$0.19_{3}$,$0.18_{1}$,$0.18_{1}$,$0.45_{6}$,$0.45_{6}$,$0.29_{5}$,$0.27_{4}$
,average rank,8,2.7,1.7,2.7,5.7,6,6.3,3
C-100,DenseNet-40,$0.17_{8}$,$0.09_{1}$,$0.1_{3}$,$0.09_{1}$,$0.11_{5}$,$0.1_{3}$,$0.14_{6}$,$0.15_{7}$
C-100,ResNet-110,$0.13_{7}$,$0.1_{2}$,$0.11_{4}$,$0.11_{4}$,$0.1_{2}$,$\mathbf{0.09_{1}}$,$0.13_{7}$,$0.13_{7}$
C-100,ResNet Wide 32,$0.12_{6}$,$0.09_{2}$,$0.09_{2}$,$0.09_{2}$,$0.1_{4}$,$0.1_{4}$,$0.13_{7}$,$0.14_{8}$
,average rank,7,2,3.3,2.7,4,3,6.7,7.3


densenet40_c10
resnet110_c10
resnet_wide32_c10
densenet40_c100
resnet110_c100
resnet_wide32_c100
\begin{tabular}{llllllllll}
\toprule
\multicolumn{8}{c}{}&\multicolumn{2}{c}{TS}\\
\cmidrule{9-10}
\multicolumn{1}{c}{}&\multicolumn{1}{c}{}&\multicolumn{1}{c}{uncal}&\multicolumn{1}{c}{dir_ODIR_pt}&\multicolumn{1}{c}{ms_ODIR_pt}&\multicolumn{1}{c}{VS}&\multicolumn{1}{c}{IOP}&\multicolumn{1}{c}{TS}&\multicolumn{1}{c}{dec2TS}&\multicolumn{1}{c}{KNN}\\\midrule
C-10 & DenseNet-40 &  $0.0295_{8}$ &           $0.0055_{3}$ &  $0.0058_{4}$ &  $0.0061_{6}$ &            $0.006_{5}$ &  $0.0063_{7}$ &           $0.0051_{2}$ &  $\mathbf{0.0046_{1}}$ \\
      & ResNet-110 &  $0.0259_{8}$ &            $0.006_{6}$ &  $0.0058_{5}$ &  $0.0062_{7}$ &           $0.0045_{3}$ &  $0.0056_{4}$ &           $0.0042_{2}$ &  $\mathbf{0.0035_{1}}$ \\
      & ResNet Wide 32 &  $0.0251_{8}$ &           $0.0048_{3}$ &  $0.0046_{2}$ &  $0.0048_{3}$ &           $0.0098_{7}$ &  $0.0095_{6}$ &  $\mathbf{0.0045_{1}}$ &       

Unnamed: 0,Unnamed: 1,uncal,dir_ODIR_pt,ms_ODIR_pt,VS,IOP,TS,dec2TS,KNN
C-10,DenseNet-40,$0.0295_{8}$,$0.0055_{3}$,$0.0058_{4}$,$0.0061_{6}$,$0.006_{5}$,$0.0063_{7}$,$0.0051_{2}$,$\mathbf{0.0046_{1}}$
C-10,ResNet-110,$0.0259_{8}$,$0.006_{6}$,$0.0058_{5}$,$0.0062_{7}$,$0.0045_{3}$,$0.0056_{4}$,$0.0042_{2}$,$\mathbf{0.0035_{1}}$
C-10,ResNet Wide 32,$0.0251_{8}$,$0.0048_{3}$,$0.0046_{2}$,$0.0048_{3}$,$0.0098_{7}$,$0.0095_{6}$,$\mathbf{0.0045_{1}}$,$0.0049_{5}$
,average rank,8,4.2,3.7,5.5,5,5.7,1.7,2.3
C-100,DenseNet-40,$0.1154_{8}$,$\mathbf{0.0064_{1}}$,$0.0077_{4}$,$0.0068_{2}$,$0.0197_{6}$,$0.0071_{3}$,$0.0177_{5}$,$0.021_{7}$
C-100,ResNet-110,$0.1008_{8}$,$0.0096_{3}$,$0.0109_{5}$,$0.0119_{7}$,$\mathbf{0.0059_{1}}$,$0.0078_{2}$,$0.0117_{6}$,$0.0098_{4}$
C-100,ResNet Wide 32,$0.1027_{8}$,$0.0086_{5}$,$0.0085_{3}$,$0.0085_{3}$,$\mathbf{0.0068_{1}}$,$0.0077_{2}$,$0.0115_{6}$,$0.0141_{7}$
,average rank,8,3,4.2,4.2,2.7,2.3,5.7,6


densenet40_c10
resnet110_c10
resnet_wide32_c10
densenet40_c100
resnet110_c100
resnet_wide32_c100
\begin{tabular}{llllllllll}
\toprule
\multicolumn{8}{c}{}&\multicolumn{2}{c}{TS}\\
\cmidrule{9-10}
\multicolumn{1}{c}{}&\multicolumn{1}{c}{}&\multicolumn{1}{c}{uncal}&\multicolumn{1}{c}{dir_ODIR_pt}&\multicolumn{1}{c}{ms_ODIR_pt}&\multicolumn{1}{c}{VS}&\multicolumn{1}{c}{IOP}&\multicolumn{1}{c}{TS}&\multicolumn{1}{c}{dec2TS}&\multicolumn{1}{c}{KNN}\\\midrule
C-10 & DenseNet-40 &  $0.924_{7}$ &           $0.925_{3}$ &           $0.925_{3}$ &  $0.925_{3}$ &  $0.924_{7}$ &  $0.924_{7}$ &           $0.925_{3}$ &  $0.925_{3}$ \\
      & ResNet-110 &  $0.936_{3}$ &           $0.935_{7}$ &           $0.936_{3}$ &  $0.935_{7}$ &  $0.936_{3}$ &  $0.936_{3}$ &           $0.936_{3}$ &  $0.936_{3}$ \\
      & ResNet Wide 32 &  $0.939_{7}$ &           $0.942_{2}$ &           $0.942_{2}$ &  $0.942_{2}$ &  $0.939_{7}$ &  $0.939_{7}$ &           $0.942_{2}$ &   $0.94_{5}$ \\
\cmidrule{2-10}
      & average

Unnamed: 0,Unnamed: 1,uncal,dir_ODIR_pt,ms_ODIR_pt,VS,IOP,TS,dec2TS,KNN
C-10,DenseNet-40,$0.924_{7}$,$0.925_{3}$,$0.925_{3}$,$0.925_{3}$,$0.924_{7}$,$0.924_{7}$,$0.925_{3}$,$0.925_{3}$
C-10,ResNet-110,$0.936_{3}$,$0.935_{7}$,$0.936_{3}$,$0.935_{7}$,$0.936_{3}$,$0.936_{3}$,$0.936_{3}$,$0.936_{3}$
C-10,ResNet Wide 32,$0.939_{7}$,$0.942_{2}$,$0.942_{2}$,$0.942_{2}$,$0.939_{7}$,$0.939_{7}$,$0.942_{2}$,$0.94_{5}$
,average rank,5.8,4.3,3,4.3,5.8,5.8,3,3.8
C-100,DenseNet-40,$0.7_{5}$,$0.703_{2}$,$\mathbf{0.704_{1}}$,$0.699_{8}$,$0.7_{5}$,$0.7_{5}$,$0.703_{2}$,$0.7_{5}$
C-100,ResNet-110,$0.715_{6}$,$\mathbf{0.717_{1}}$,$0.715_{6}$,$0.716_{2}$,$0.715_{6}$,$0.715_{6}$,$0.716_{2}$,$0.715_{6}$
C-100,ResNet Wide 32,$0.738_{6}$,$0.74_{2}$,$0.74_{2}$,$0.738_{6}$,$0.738_{6}$,$0.738_{6}$,$\mathbf{0.741_{1}}$,$0.739_{4}$
,average rank,6,2,3.2,5.7,6,6,2,5.2


# Appendix tables

In [4]:
methods = [
    "uncalibrated",
    "tempS",
    "knnTS_kl_cv_ll",
    "knnTS_euc_cv_ll",
    "rfTS_cv_ll",
    "kernelTS_RBF_cv_ll",
    "kernelTS_DIR_cv_ll",
]
metric = "bs"
metrics = ["bs", "ll", "conf_ece",
           "cw_ece",
           "avg_loss_gap2", "accuracy"
           ]
roundings = [3, 3, 2, 2, 4, 3]

for m_idx in range(len(metrics)):
    metric = metrics[m_idx]
    rounding = roundings[m_idx]

    df_c10 = create_table([("c10", "densenet40"),
                           ("c10", "resnet110"),
                           ("c10", "resnet_wide32")],
                          metric, methods, rounding=rounding, add_avg_rank=True)
    df_combined = rename_df(df_c10)
    latex = df_to_latex(df_combined)
    latex = add_hline(latex, [6, 7, 10],
                      lengths=[(2, len(methods) + 2),
                               (1, len(methods) + 2),
                               (2, len(methods) + 2)])
    latex = add_double_column_header(latex, ["", "TS"], [len(methods) + 2 - 2, 2])
    latex = add_hline(latex, [2],
                      lengths=[(len(methods) + 1, len(methods) + 2)])

    print(latex)
    display(df_combined)
    with open(f"tables/supp_{metric}.tex", "w") as f:
        f.write("\\begin{table*}\n"
                "\\caption{" + metric + "}\n"
                                        "\\label{table:" + metric + "}\n"
                                                                    "\\centering\n"
                                                                    "\\begin{adjustbox}{width=\\textwidth}\n\n")
        f.write(latex)
        f.write("\n\n\\end{adjustbox}\n"
                "\\end{table*}")


densenet40_c10
resnet110_c10
resnet_wide32_c10
\begin{tabular}{lllllllll}
\toprule
\multicolumn{7}{c}{}&\multicolumn{2}{c}{TS}\\
\cmidrule{8-9}
\multicolumn{1}{c}{}&\multicolumn{1}{c}{}&\multicolumn{1}{c}{uncal}&\multicolumn{1}{c}{TS}&\multicolumn{1}{c}{KNN}&\multicolumn{1}{c}{knnTS_euc_cv_ll}&\multicolumn{1}{c}{rfTS_cv_ll}&\multicolumn{1}{c}{kernelTS_RBF_cv_ll}&\multicolumn{1}{c}{kernelTS_DIR_cv_ll}\\\midrule
C-10 & DenseNet-40 &  $0.127_{7}$ &   $0.11_{3}$ &   $0.11_{3}$ &      $0.11_{3}$ &            $0.11_{3}$ &         $0.11_{3}$ &         $0.11_{3}$ \\
     & ResNet-110 &   $0.11_{7}$ &  $0.098_{6}$ &  $0.097_{3}$ &     $0.097_{3}$ &           $0.097_{3}$ &        $0.097_{3}$ &        $0.097_{3}$ \\
     & ResNet Wide 32 &  $0.105_{7}$ &  $0.092_{6}$ &   $0.09_{3}$ &      $0.09_{3}$ &  $\mathbf{0.089_{1}}$ &         $0.09_{3}$ &        $0.091_{5}$ \\
\cmidrule{2-9}
     & average rank &            7 &          5.2 &          3.2 &             3.2 &                   2.5 &        

Unnamed: 0,Unnamed: 1,uncal,TS,KNN,knnTS_euc_cv_ll,rfTS_cv_ll,kernelTS_RBF_cv_ll,kernelTS_DIR_cv_ll
C-10,DenseNet-40,$0.127_{7}$,$0.11_{3}$,$0.11_{3}$,$0.11_{3}$,$0.11_{3}$,$0.11_{3}$,$0.11_{3}$
C-10,ResNet-110,$0.11_{7}$,$0.098_{6}$,$0.097_{3}$,$0.097_{3}$,$0.097_{3}$,$0.097_{3}$,$0.097_{3}$
C-10,ResNet Wide 32,$0.105_{7}$,$0.092_{6}$,$0.09_{3}$,$0.09_{3}$,$\mathbf{0.089_{1}}$,$0.09_{3}$,$0.091_{5}$
,average rank,7,5.2,3.2,3.2,2.5,3.2,3.8


densenet40_c10
resnet110_c10
resnet_wide32_c10
\begin{tabular}{lllllllll}
\toprule
\multicolumn{7}{c}{}&\multicolumn{2}{c}{TS}\\
\cmidrule{8-9}
\multicolumn{1}{c}{}&\multicolumn{1}{c}{}&\multicolumn{1}{c}{uncal}&\multicolumn{1}{c}{TS}&\multicolumn{1}{c}{KNN}&\multicolumn{1}{c}{knnTS_euc_cv_ll}&\multicolumn{1}{c}{rfTS_cv_ll}&\multicolumn{1}{c}{kernelTS_RBF_cv_ll}&\multicolumn{1}{c}{kernelTS_DIR_cv_ll}\\\midrule
C-10 & DenseNet-40 &  $0.428_{7}$ &  $0.225_{1}$ &           $0.229_{5}$ &     $0.228_{4}$ &  $0.227_{3}$ &        $0.229_{5}$ &        $0.225_{1}$ \\
     & ResNet-110 &  $0.358_{7}$ &  $0.209_{6}$ &  $\mathbf{0.205_{1}}$ &     $0.206_{2}$ &  $0.207_{4}$ &        $0.208_{5}$ &        $0.206_{2}$ \\
     & ResNet Wide 32 &  $0.382_{7}$ &  $0.191_{5}$ &            $0.19_{4}$ &     $0.189_{2}$ &  $0.189_{2}$ &        $0.192_{6}$ &        $0.189_{2}$ \\
\cmidrule{2-9}
     & average rank &            7 &          4.2 &                   3.5 &             2.8 &            3 &        

Unnamed: 0,Unnamed: 1,uncal,TS,KNN,knnTS_euc_cv_ll,rfTS_cv_ll,kernelTS_RBF_cv_ll,kernelTS_DIR_cv_ll
C-10,DenseNet-40,$0.428_{7}$,$0.225_{1}$,$0.229_{5}$,$0.228_{4}$,$0.227_{3}$,$0.229_{5}$,$0.225_{1}$
C-10,ResNet-110,$0.358_{7}$,$0.209_{6}$,$\mathbf{0.205_{1}}$,$0.206_{2}$,$0.207_{4}$,$0.208_{5}$,$0.206_{2}$
C-10,ResNet Wide 32,$0.382_{7}$,$0.191_{5}$,$0.19_{4}$,$0.189_{2}$,$0.189_{2}$,$0.192_{6}$,$0.189_{2}$
,average rank,7,4.2,3.5,2.8,3,5.5,2


densenet40_c10
resnet110_c10
resnet_wide32_c10
\begin{tabular}{lllllllll}
\toprule
\multicolumn{7}{c}{}&\multicolumn{2}{c}{TS}\\
\cmidrule{8-9}
\multicolumn{1}{c}{}&\multicolumn{1}{c}{}&\multicolumn{1}{c}{uncal}&\multicolumn{1}{c}{TS}&\multicolumn{1}{c}{KNN}&\multicolumn{1}{c}{knnTS_euc_cv_ll}&\multicolumn{1}{c}{rfTS_cv_ll}&\multicolumn{1}{c}{kernelTS_RBF_cv_ll}&\multicolumn{1}{c}{kernelTS_DIR_cv_ll}\\\midrule
C-10 & DenseNet-40 &  $5.49_{7}$ &  $0.92_{5}$ &  $\mathbf{0.52_{1}}$ &      $0.72_{2}$ &  $0.92_{5}$ &          $0.8_{3}$ &         $0.82_{4}$ \\
     & ResNet-110 &  $4.75_{7}$ &  $0.94_{5}$ &  $\mathbf{0.84_{1}}$ &      $0.88_{2}$ &   $0.9_{3}$ &         $1.14_{6}$ &         $0.92_{4}$ \\
     & ResNet Wide 32 &  $4.48_{7}$ &  $0.69_{2}$ &  $\mathbf{0.54_{1}}$ &      $0.71_{3}$ &  $0.91_{5}$ &         $0.97_{6}$ &         $0.86_{4}$ \\
\cmidrule{2-9}
     & average rank &           7 &         4.2 &                    1 &             2.3 &         4.5 &                  5 &   

Unnamed: 0,Unnamed: 1,uncal,TS,KNN,knnTS_euc_cv_ll,rfTS_cv_ll,kernelTS_RBF_cv_ll,kernelTS_DIR_cv_ll
C-10,DenseNet-40,$5.49_{7}$,$0.92_{5}$,$\mathbf{0.52_{1}}$,$0.72_{2}$,$0.92_{5}$,$0.8_{3}$,$0.82_{4}$
C-10,ResNet-110,$4.75_{7}$,$0.94_{5}$,$\mathbf{0.84_{1}}$,$0.88_{2}$,$0.9_{3}$,$1.14_{6}$,$0.92_{4}$
C-10,ResNet Wide 32,$4.48_{7}$,$0.69_{2}$,$\mathbf{0.54_{1}}$,$0.71_{3}$,$0.91_{5}$,$0.97_{6}$,$0.86_{4}$
,average rank,7,4.2,1,2.3,4.5,5,4


densenet40_c10
resnet110_c10
resnet_wide32_c10
\begin{tabular}{lllllllll}
\toprule
\multicolumn{7}{c}{}&\multicolumn{2}{c}{TS}\\
\cmidrule{8-9}
\multicolumn{1}{c}{}&\multicolumn{1}{c}{}&\multicolumn{1}{c}{uncal}&\multicolumn{1}{c}{TS}&\multicolumn{1}{c}{KNN}&\multicolumn{1}{c}{knnTS_euc_cv_ll}&\multicolumn{1}{c}{rfTS_cv_ll}&\multicolumn{1}{c}{kernelTS_RBF_cv_ll}&\multicolumn{1}{c}{kernelTS_DIR_cv_ll}\\\midrule
C-10 & DenseNet-40 &  $0.44_{7}$ &  $0.25_{4}$ &           $0.24_{2}$ &      $0.24_{2}$ &  $0.25_{4}$ &         $0.26_{6}$ &  $\mathbf{0.23_{1}}$ \\
     & ResNet-110 &  $0.36_{7}$ &  $0.22_{5}$ &  $\mathbf{0.17_{1}}$ &      $0.18_{2}$ &  $0.19_{4}$ &         $0.22_{5}$ &           $0.18_{2}$ \\
     & ResNet Wide 32 &   $0.5_{7}$ &  $0.45_{6}$ &  $\mathbf{0.27_{1}}$ &       $0.3_{3}$ &  $0.29_{2}$ &         $0.37_{4}$ &           $0.37_{4}$ \\
\cmidrule{2-9}
     & average rank &           7 &         5.3 &                  1.5 &             2.7 &         3.5 &                5.

Unnamed: 0,Unnamed: 1,uncal,TS,KNN,knnTS_euc_cv_ll,rfTS_cv_ll,kernelTS_RBF_cv_ll,kernelTS_DIR_cv_ll
C-10,DenseNet-40,$0.44_{7}$,$0.25_{4}$,$0.24_{2}$,$0.24_{2}$,$0.25_{4}$,$0.26_{6}$,$\mathbf{0.23_{1}}$
C-10,ResNet-110,$0.36_{7}$,$0.22_{5}$,$\mathbf{0.17_{1}}$,$0.18_{2}$,$0.19_{4}$,$0.22_{5}$,$0.18_{2}$
C-10,ResNet Wide 32,$0.5_{7}$,$0.45_{6}$,$\mathbf{0.27_{1}}$,$0.3_{3}$,$0.29_{2}$,$0.37_{4}$,$0.37_{4}$
,average rank,7,5.3,1.5,2.7,3.5,5.3,2.7


densenet40_c10
resnet110_c10
resnet_wide32_c10
\begin{tabular}{lllllllll}
\toprule
\multicolumn{7}{c}{}&\multicolumn{2}{c}{TS}\\
\cmidrule{8-9}
\multicolumn{1}{c}{}&\multicolumn{1}{c}{}&\multicolumn{1}{c}{uncal}&\multicolumn{1}{c}{TS}&\multicolumn{1}{c}{KNN}&\multicolumn{1}{c}{knnTS_euc_cv_ll}&\multicolumn{1}{c}{rfTS_cv_ll}&\multicolumn{1}{c}{kernelTS_RBF_cv_ll}&\multicolumn{1}{c}{kernelTS_DIR_cv_ll}\\\midrule
C-10 & DenseNet-40 &  $0.0295_{7}$ &  $0.0063_{6}$ &  $0.0046_{3}$ &    $0.0046_{3}$ &            $0.004_{1}$ &            $0.004_{1}$ &       $0.0054_{5}$ \\
     & ResNet-110 &  $0.0259_{7}$ &  $0.0056_{6}$ &  $0.0035_{4}$ &    $0.0033_{2}$ &           $0.0035_{4}$ &  $\mathbf{0.0032_{1}}$ &       $0.0034_{3}$ \\
     & ResNet Wide 32 &  $0.0251_{7}$ &  $0.0095_{6}$ &  $0.0049_{2}$ &    $0.0056_{3}$ &  $\mathbf{0.0047_{1}}$ &           $0.0058_{4}$ &       $0.0075_{5}$ \\
\cmidrule{2-9}
     & average rank &             7 &             6 &           3.3 &             2.8 &     

Unnamed: 0,Unnamed: 1,uncal,TS,KNN,knnTS_euc_cv_ll,rfTS_cv_ll,kernelTS_RBF_cv_ll,kernelTS_DIR_cv_ll
C-10,DenseNet-40,$0.0295_{7}$,$0.0063_{6}$,$0.0046_{3}$,$0.0046_{3}$,$0.004_{1}$,$0.004_{1}$,$0.0054_{5}$
C-10,ResNet-110,$0.0259_{7}$,$0.0056_{6}$,$0.0035_{4}$,$0.0033_{2}$,$0.0035_{4}$,$\mathbf{0.0032_{1}}$,$0.0034_{3}$
C-10,ResNet Wide 32,$0.0251_{7}$,$0.0095_{6}$,$0.0049_{2}$,$0.0056_{3}$,$\mathbf{0.0047_{1}}$,$0.0058_{4}$,$0.0075_{5}$
,average rank,7,6,3.3,2.8,2.3,2.2,4.3


densenet40_c10
resnet110_c10
resnet_wide32_c10
\begin{tabular}{lllllllll}
\toprule
\multicolumn{7}{c}{}&\multicolumn{2}{c}{TS}\\
\cmidrule{8-9}
\multicolumn{1}{c}{}&\multicolumn{1}{c}{}&\multicolumn{1}{c}{uncal}&\multicolumn{1}{c}{TS}&\multicolumn{1}{c}{KNN}&\multicolumn{1}{c}{knnTS_euc_cv_ll}&\multicolumn{1}{c}{rfTS_cv_ll}&\multicolumn{1}{c}{kernelTS_RBF_cv_ll}&\multicolumn{1}{c}{kernelTS_DIR_cv_ll}\\\midrule
C-10 & DenseNet-40 &  $0.924_{6}$ &  $0.924_{6}$ &  $0.925_{3}$ &     $0.925_{3}$ &  $0.924_{6}$ &        $0.925_{3}$ &  $\mathbf{0.926_{1}}$ \\
     & ResNet-110 &  $0.936_{4}$ &  $0.936_{4}$ &  $0.936_{4}$ &     $0.936_{4}$ &  $0.936_{4}$ &        $0.936_{4}$ &           $0.936_{4}$ \\
     & ResNet Wide 32 &  $0.939_{6}$ &  $0.939_{6}$ &   $0.94_{4}$ &      $0.94_{4}$ &  $0.942_{2}$ &        $0.942_{2}$ &           $0.942_{2}$ \\
\cmidrule{2-9}
     & average rank &          5.5 &          5.5 &          3.8 &             3.8 &            4 &                  3 &              

Unnamed: 0,Unnamed: 1,uncal,TS,KNN,knnTS_euc_cv_ll,rfTS_cv_ll,kernelTS_RBF_cv_ll,kernelTS_DIR_cv_ll
C-10,DenseNet-40,$0.924_{6}$,$0.924_{6}$,$0.925_{3}$,$0.925_{3}$,$0.924_{6}$,$0.925_{3}$,$\mathbf{0.926_{1}}$
C-10,ResNet-110,$0.936_{4}$,$0.936_{4}$,$0.936_{4}$,$0.936_{4}$,$0.936_{4}$,$0.936_{4}$,$0.936_{4}$
C-10,ResNet Wide 32,$0.939_{6}$,$0.939_{6}$,$0.94_{4}$,$0.94_{4}$,$0.942_{2}$,$0.942_{2}$,$0.942_{2}$
,average rank,5.5,5.5,3.8,3.8,4,3,2.3
