# Calibration

Testing calibration method on neural networks on image data.

In [2]:
from sklearn.metrics import log_loss, brier_score_loss
from keras.losses import categorical_crossentropy
from scipy.optimize import minimize 
import numpy as np
from utility.unpickle_probs import unpickle_probs
from utility.calibration import ECE, MCE
from os.path import join
import sklearn.metrics as metrics
import pandas as pd
from betacal import BetaCalibration
from sklearn.isotonic import IsotonicRegression
from sklearn.linear_model import LogisticRegression
import time

Using TensorFlow backend.


## Load in Data for testing

Some data for testing methods.

In [19]:
PATH = join('..', '..', 'Semester IV', 'NN_image_probs')
files = ('probs_resnet152_SD_SVHN_logits.p', 'probs_resnet110_c100_logits.p', )

FILE_PATH = join(PATH, files[0])
#(y_probs_val, y_val), (y_probs_test, y_test) = unpickle_probs(FILE_PATH, True)
(y_logits_val, y_val), (y_logits_test, y_test) = unpickle_probs(FILE_PATH, True)


y_probs_val: (6000, 10)
y_true_val: (6000, 1)
y_probs_test: (26032, 10)
y_true_test: (26032, 1)


In [20]:
max(y_test)

array([9], dtype=int64)

In [15]:
y_val.flatten().shape

(6000,)

### Check out the Negative Log Likelihood (NLL).

In [21]:
#y_probs_val = softmax(y_logits_val)
y_probs_test = softmax(y_logits_test)

In [22]:
log_loss(y_true=y_val, y_pred=y_probs_val)

NameError: name 'y_probs_val' is not defined

In [19]:
log_loss(y_true=y_test, y_pred=y_probs_test)

0.085422771871456624

The NLL is different from the actual output for some reason. Look more into it.

## Model calibration

In [4]:
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=1, keepdims=1)

In [5]:
def evaluate(y_probs, y_true, verbose = False, normalize = False, bins = 15):
    
    y_preds = np.argmax(y_probs, axis=1)  # Take maximum confidence as prediction
    
    if normalize:
        y_confs = np.max(y_probs, axis=1)/np.sum(y_probs, axis=1)
        # Check if everything below or equal to 1?
    else:
        y_confs = np.max(y_probs, axis=1)  # Take only maximum confidence
    
    accuracy = metrics.accuracy_score(y_true, y_preds) * 100
    error = 100 - accuracy
    
        # Calculate ECE
    ece = ECE(y_confs, y_preds, y_true, bin_size = 1/bins)
    # Calculate MCE
    mce = MCE(y_confs, y_preds, y_true, bin_size = 1/bins)
    
    loss = log_loss(y_true=y_true, y_pred=y_probs)
    
    y_prob_true = np.array([y_probs[i, idx] for i, idx in enumerate(y_true)])  # Probability of positive class
    brier = brier_score_loss(y_true=y_true, y_prob=y_prob_true)  # Brier Score (MSE)
    
    if verbose:
        print("Accuracy:", accuracy)
        print("Error:", error)
        print("ECE:", ece)
        print("MCE:", mce)
        print("Loss:", loss)
        print("brier:", brier)
    
    return (error, ece, mce, loss, brier)

In [44]:
# Fit all calibration scores together (not 1-vs-all)
def cal_results(fn, path, files, m_kwargs = {}, check_val = False, method = "all"):
    
    df = pd.DataFrame(columns=["Name", "Error", "ECE", "MCE", "Loss", "Brier"])
    
    total_t1 = time.time()
    
    for i, f in enumerate(files):
        
        name = "_".join(f.split("_")[1:-1])
        print(name)
        t1 = time.time()

        FILE_PATH = join(path, f)
        (y_logits_val, y_val), (y_logits_test, y_test) = unpickle_probs(FILE_PATH)
        
        if method == "all":            

            y_val = y_val.flatten()

            model = fn(**m_kwargs)

            model.fit(y_logits_val, y_val)

            y_probs_val = model.predict(y_logits_val) 
            y_probs_test = model.predict(y_logits_test)
            
            error, ece, mce, loss, brier = evaluate(softmax(y_logits_test), y_test, verbose=True)  # Test before scaling
            error2, ece2, mce2, loss2, brier2 = evaluate(y_probs_test, y_test, verbose=False)
            
            print("Error %f; ece %f; mce %f; loss %f, brier %f" % evaluate(y_probs_val, y_val, verbose=False, normalize=True))

            
        else:  # 1-vs-k models
            y_probs_val = softmax(y_logits_val)  # Softmax logits
            y_probs_test = softmax(y_logits_test)
            K = y_probs_test.shape[1]
            
            # Go through all the classes
            for k in range(K):
                # Prep class labels (1 fixed true class, 0 other classes)
                y_cal = np.array(y_val == k, dtype="int")[:, 0]

                # Train model
                model = fn(**m_kwargs)
                model.fit(y_probs_val[:, k], y_cal) # Get only one column with probs for given class "k"

                y_probs_val[:, k] = model.predict(y_probs_val[:, k])  # Predict new values based on the fittting
                y_probs_test[:, k] = model.predict(y_probs_test[:, k])

                # Replace NaN with 0, as it should be close to zero  # TODO is it needed?
                idx_nan = np.where(np.isnan(y_probs_test))
                y_probs_test[idx_nan] = 0

                idx_nan = np.where(np.isnan(y_probs_val))
                y_probs_val[idx_nan] = 0

            # Get results for test set
            error, ece, mce, loss, brier = evaluate(softmax(y_logits_test), y_test, verbose=True, normalize=False)
            error2, ece2, mce2, loss2, brier2 = evaluate(y_probs_test, y_test, verbose=False, normalize=True)
            
            print("Error %f; ece %f; mce %f; loss %f, brier %f" % evaluate(y_probs_val, y_val, verbose=False, normalize=True))
            
        
        df.loc[i*2] = [name, error, ece, mce, loss, brier]
        df.loc[i*2+1] = [(name + "_calib"), error2, ece2, mce2, loss2, brier2]
        
        t2 = time.time()
        print("Time taken:", (t2-t1), "\n")
        
    total_t2 = time.time()
    print("Total time taken:", (total_t2-total_t1))
        
    return df
    

## Temperature Scaling

In [23]:
class TemperatureScaling():
    
    def __init__(self, temp = 0, maxiter = 200, solver = "BFGS"):
        self.temp = temp
        self.maxiter = maxiter
        self.solver = solver
    
    def _loss_fun(self, x, y_probs, y_true):
        scaled_probs = self.predict(y_probs, x)    
        loss = log_loss(y_true=y_true, y_pred=scaled_probs)
        return loss
    
    # Find the temperature
    def fit(self, y_logits_val, y_val):
        opt = minimize(self._loss_fun, x0 = 1, args=(y_logits_val, y_val), options={'maxiter':self.maxiter}, method = self.solver)
        print(opt)
        self.temp = opt.x[0]
        
    # Scales logits based on the temperature
    def predict(self, y_logits, temp = None):
        if not temp:
            return softmax(y_logits/self.temp)
        else:
            return softmax(y_logits/temp)

## Extension of Binning Methods

## Isotonic  Regression

In [43]:
params = {'y_min':0, 'y_max':1}
IsotonicRegression(**params)

IsotonicRegression(increasing=True, out_of_bounds='nan', y_max=1, y_min=0)

## Histogram Binning

In [9]:
class HistogramBinning(): 
    
    def __init__(self, M=15):
        self.bin_size = 1./M  # Calculate bin size
        self.conf = []  # Initiate confidence list
        self.upper_bounds = np.arange(self.bin_size, 1+self.bin_size, self.bin_size)  # Set bin bounds for intervals

    
    def _get_conf(self, conf_thresh_lower, conf_thresh_upper, probs, true):

        filtered = [x[0] for x in zip(true, probs) if x[1] > conf_thresh_lower and x[1] <= conf_thresh_upper]
        nr_elems = len(filtered)

        if nr_elems < 1:
            return 0
        else:
            conf = sum(filtered)/nr_elems
            return conf
    

    def fit(self, probs, true):

        conf = []

        # Got through intervals and add confidence to list
        for conf_thresh in self.upper_bounds:
            temp_conf = self._get_conf((conf_thresh - self.bin_size), conf_thresh, probs = probs, true = true)
            conf.append(temp_conf)

        self.conf = conf
        

    # Fit based on predicted confidence
    def predict(self, probs):

        # Go through all the probs and check what confidence is suitable for it.
        for i, prob in enumerate(probs):
            idx = np.searchsorted(self.upper_bounds, prob)
            probs[i] = self.conf[idx]

        return probs    

## Calibrate predictions.

Paths to files with logits.

In [41]:
PATH = join('..', '..', 'Semester IV', 'NN_image_probs')
files_10 = ('probs_resnet_wide32_c10_logits.p', 'probs_densenet40_c10_logits.p',
            'probs_lenet5_c10_logits.p', 'probs_resnet110_SD_c10_logits.p',
           'probs_resnet110_c10_logits.p', 'probs_resnet152_SD_SVHN_logits.p')
files_100 = ('probs_resnet_wide32_c100_logits.p', 'probs_densenet40_c100_logits.p',
             'probs_lenet5_c100_logits.p', 'probs_resnet110_SD_c100_logits.p')
files_200 = ('probs_resnet50_birds_logits.p',)
files_1k = ('probs_resnet152_imgnet_logits.p', 'probs_densenet161_imgnet_logits.p')

files = ('probs_resnet110_c10_logits.p', 'probs_resnet110_c100_logits.p', 
         'probs_densenet40_c10_logits.p', 'probs_densenet40_c100_logits.p',
        'probs_resnet_wide32_c10_logits.p', 'probs_resnet_wide32_c100_logits.p',
         'probs_resnet50_birds_logits.p', 'probs_resnet110_SD_c10_logits.p',
         'probs_resnet110_SD_c100_logits.p', 'probs_resnet152_SD_SVHN_logits.p',
        'probs_resnet152_imgnet_logits.p', 'probs_densenet161_imgnet_logits.p')


Isotonic Regression

In [39]:
df_iso = cal_results(IsotonicRegression, PATH, files, {'y_min':0, 'y_max':1}, method = "single")

resnet_wide32_c10
Accuracy: 93.93
Error: 6.07
ECE: 0.0450543206543
MCE: 0.372154697776
Loss: 0.381703866403
brier: 0.838771823116
Error 5.040000; ece 0.006339; mce 0.351095; loss 0.150488, brier 0.805553
Time taken: 2.606687068939209 

densenet40_c10
Accuracy: 92.42
Error: 7.58
ECE: 0.055002704373
MCE: 0.333955179269
Loss: 0.42820705658
brier: 0.819165176082
Error 5.840000; ece 0.005655; mce 0.264928; loss 0.165044, brier 0.793517
Time taken: 2.5879671573638916 

lenet5_c10
Accuracy: 72.74
Error: 27.26
ECE: 0.0518010279745
MCE: 0.112806777159
Loss: 0.823260828471
brier: 0.499918505037
Error 25.100000; ece 0.020586; mce 0.167352; loss 0.727377, brier 0.491146
Time taken: 2.5985217094421387 

resnet110_SD_c10
Accuracy: 94.04
Error: 5.96
ECE: 0.0411256498724
MCE: 0.324843168259
Loss: 0.303251782442
brier: 0.83380284845
Error 4.840000; ece 0.006890; mce 0.739708; loss 0.136153, brier 0.809760
Time taken: 2.7662312984466553 

resnet110_c10
Accuracy: 93.56
Error: 6.44
ECE: 0.047503518641
MCE

Temperature scaling

In [45]:
df_temp_scale = cal_results(TemperatureScaling, PATH, files, method = "all")

resnet110_c10
      fun: 0.18050375822068787
 hess_inv: array([[ 13.33975718]])
      jac: array([ -8.56816769e-08])
  message: 'Optimization terminated successfully.'
     nfev: 24
      nit: 7
     njev: 8
   status: 0
  success: True
        x: array([ 2.39501104])
Accuracy: 93.56
Error: 6.44
ECE: 0.047503518641
MCE: 0.295798957348
Loss: 0.358274134971
brier: 0.831574102734
Error 5.380000; ece 0.007282; mce 0.816615; loss 0.180504, brier 0.799735
Time taken: 5.4490673542022705 

resnet110_c100
      fun: 1.0636127548279124
 hess_inv: array([[ 2.99245655]])
      jac: array([ -1.49011612e-08])
  message: 'Optimization terminated successfully.'
     nfev: 21
      nit: 6
     njev: 7
   status: 0
  success: True
        x: array([ 2.31021564])
Accuracy: 71.48
Error: 28.52
ECE: 0.184804543945
MCE: 0.398817125148
Loss: 1.69371210955
brier: 0.661139586803
Error 27.900000; ece 0.020857; mce 0.056080; loss 1.063613, brier 0.531456
Time taken: 5.948598861694336 

densenet40_c10
      fun: 0

Beta methods

In [46]:
df_beta = cal_results(BetaCalibration, PATH, files, {'parameters':"abm"}, method = "single")

resnet110_c10
Accuracy: 93.56
Error: 6.44
ECE: 0.047503518641
MCE: 0.295798957348
Loss: 0.358274134971
brier: 0.831574102734
Error 5.460000; ece 0.009282; mce 0.192622; loss 0.179388, brier 0.797178
Time taken: 5.1304686069488525 

resnet110_c100
Accuracy: 71.48
Error: 28.52
ECE: 0.184804543945
MCE: 0.398817125148
Loss: 1.69371210955
brier: 0.661139586803
Error 27.440000; ece 0.037628; mce 0.131316; loss 1.055094, brier 0.531179
Time taken: 7.415851831436157 

densenet40_c10
Accuracy: 92.42
Error: 7.58
ECE: 0.055002704373
MCE: 0.333955179269
Loss: 0.42820705658
brier: 0.819165176082
Error 6.220000; ece 0.010305; mce 0.089124; loss 0.190338, brier 0.787227
Time taken: 6.1335742473602295 

densenet40_c100
Accuracy: 70.0
Error: 30.0
ECE: 0.211562852757
MCE: 0.45400331452
Loss: 2.0173981798
brier: 0.655000941434
Error 29.000000; ece 0.054667; mce 0.160630; loss 1.134922, brier 0.506427
Time taken: 7.982395648956299 

resnet_wide32_c10
Accuracy: 93.93
Error: 6.07
ECE: 0.0450543206543
MCE: 0

In [115]:
df_beta_am = cal_results(BetaCalibration, PATH, files, {'parameters':"am"}, method = "single")

resnet_wide32_c10
Accuracy: 93.93
Error: 6.07
ECE: 0.0450543206543
MCE: 0.372154697776
Loss: 0.381703866403
brier: 0.838771823116
Time taken: 2.3664910793304443 

densenet40_c10
Accuracy: 92.42
Error: 7.58
ECE: 0.055002704373
MCE: 0.333955179269
Loss: 0.42820705658
brier: 0.819165176082
Time taken: 2.4651689529418945 

lenet5_c10
Accuracy: 72.74
Error: 27.26
ECE: 0.0518010279745
MCE: 0.112806777159
Loss: 0.823260828471
brier: 0.499918505037
Time taken: 2.2047476768493652 

resnet110_SD_c10
Accuracy: 94.04
Error: 5.96
ECE: 0.0411256498724
MCE: 0.324843168259
Loss: 0.303251782442
brier: 0.83380284845
Time taken: 2.333550453186035 

resnet110_c10
Accuracy: 93.56
Error: 6.44
ECE: 0.047503518641
MCE: 0.295798957348
Loss: 0.358274134971
brier: 0.831574102734
Time taken: 2.3775203227996826 

resnet152_SD_SVHN
Accuracy: 98.1522741242
Error: 1.84772587585
ECE: 0.00862196738389
MCE: 0.250316608697
Loss: 0.0854227718715
brier: 0.907179195332
Time taken: 6.220763206481934 

Total time taken: 17.96

In [116]:
df_beta_ab = cal_results(BetaCalibration, PATH, files, {'parameters':"ab"}, method = "single")

resnet_wide32_c10
Accuracy: 93.93
Error: 6.07
ECE: 0.0450543206543
MCE: 0.372154697776
Loss: 0.381703866403
brier: 0.838771823116
Time taken: 2.6882224082946777 

densenet40_c10
Accuracy: 92.42
Error: 7.58
ECE: 0.055002704373
MCE: 0.333955179269
Loss: 0.42820705658
brier: 0.819165176082
Time taken: 2.4588310718536377 

lenet5_c10
Accuracy: 72.74
Error: 27.26
ECE: 0.0518010279745
MCE: 0.112806777159
Loss: 0.823260828471
brier: 0.499918505037
Time taken: 2.2678403854370117 

resnet110_SD_c10
Accuracy: 94.04
Error: 5.96
ECE: 0.0411256498724
MCE: 0.324843168259
Loss: 0.303251782442
brier: 0.83380284845
Time taken: 3.195789337158203 

resnet110_c10
Accuracy: 93.56
Error: 6.44
ECE: 0.047503518641
MCE: 0.295798957348
Loss: 0.358274134971
brier: 0.831574102734
Time taken: 2.6454038619995117 

resnet152_SD_SVHN
Accuracy: 98.1522741242
Error: 1.84772587585
ECE: 0.00862196738389
MCE: 0.250316608697
Loss: 0.0854227718715
brier: 0.907179195332
Time taken: 6.608421802520752 

Total time taken: 19.86

Histogram binning

In [117]:
df_hb = cal_results(HistogramBinning, PATH, files, {'M':15}, method = "single")

resnet_wide32_c10
Accuracy: 93.93
Error: 6.07
ECE: 0.0450543206543
MCE: 0.372154697776
Loss: 0.381703866403
brier: 0.838771823116
Time taken: 4.358970880508423 

densenet40_c10
Accuracy: 92.42
Error: 7.58
ECE: 0.055002704373
MCE: 0.333955179269
Loss: 0.42820705658
brier: 0.819165176082
Time taken: 4.470382928848267 

lenet5_c10
Accuracy: 72.74
Error: 27.26
ECE: 0.0518010279745
MCE: 0.112806777159
Loss: 0.823260828471
brier: 0.499918505037
Time taken: 6.034449100494385 

resnet110_SD_c10
Accuracy: 94.04
Error: 5.96
ECE: 0.0411256498724
MCE: 0.324843168259
Loss: 0.303251782442
brier: 0.83380284845
Time taken: 4.650790691375732 

resnet110_c10
Accuracy: 93.56
Error: 6.44
ECE: 0.047503518641
MCE: 0.295798957348
Loss: 0.358274134971
brier: 0.831574102734
Time taken: 3.934823989868164 

resnet152_SD_SVHN
Accuracy: 98.1522741242
Error: 1.84772587585
ECE: 0.00862196738389
MCE: 0.250316608697
Loss: 0.0854227718715
brier: 0.907179195332
Time taken: 9.072253465652466 

Total time taken: 32.522675

In [107]:
df_hb

Unnamed: 0,Name,Error,ECE,MCE,Loss,Brier
0,resnet152_imgnet,23.796,0.06543,0.142914,0.988479,0.647568
1,resnet152_imgnet_calib,31.32,0.089154,0.283194,6.159072,0.593328
2,densenet161_imgnet,22.952,0.057199,0.130711,0.943955,0.651801
3,densenet161_imgnet_calib,30.66,0.087133,0.277368,6.190333,0.597578


In [101]:
df_iso

Unnamed: 0,Name,Error,ECE,MCE,Loss,Brier
0,resnet152_imgnet,23.796,0.06543,0.142914,0.988479,0.647568
1,resnet152_imgnet_calib,28.156,0.070545,0.110293,2.852046,0.585573
2,densenet161_imgnet,22.952,0.057199,0.130711,0.943955,0.651801
3,densenet161_imgnet_calib,26.82,0.069016,0.133854,2.88574,0.597491


In [99]:
df_temp_scale

Unnamed: 0,Name,Error,ECE,MCE,Loss,Brier
0,resnet152_imgnet,23.796,0.06543,0.142914,0.988479,0.647568
1,resnet152_imgnet_calib,23.796,0.020775,0.067791,0.942067,0.594931
2,densenet161_imgnet,22.952,0.057199,0.130711,0.943955,0.651801
3,densenet161_imgnet_calib,22.952,0.019422,0.049666,0.909261,0.604853


In [105]:
df_beta

Unnamed: 0,Name,Error,ECE,MCE,Loss,Brier
0,resnet152_imgnet,23.796,0.06543,0.142914,0.988479,0.647568
1,resnet152_imgnet_calib,23.716,0.034685,0.088536,0.994656,0.596155
2,densenet161_imgnet,22.952,0.057199,0.130711,0.943955,0.651801
3,densenet161_imgnet_calib,22.768,0.030964,0.108879,0.973253,0.606716


In [108]:
df_beta_am

Unnamed: 0,Name,Error,ECE,MCE,Loss,Brier
0,resnet152_imgnet,23.796,0.06543,0.142914,0.988479,0.647568
1,resnet152_imgnet_calib,23.76,0.033151,0.066534,0.976855,0.603427
2,densenet161_imgnet,22.952,0.057199,0.130711,0.943955,0.651801
3,densenet161_imgnet_calib,22.82,0.03024,0.070295,0.94779,0.613146


In [109]:
df_beta_ab

Unnamed: 0,Name,Error,ECE,MCE,Loss,Brier
0,resnet152_imgnet,23.796,0.06543,0.142914,0.988479,0.647568
1,resnet152_imgnet_calib,23.84,0.032485,0.108533,0.973891,0.602902
2,densenet161_imgnet,22.952,0.057199,0.130711,0.943955,0.651801
3,densenet161_imgnet_calib,22.904,0.028832,0.071684,0.950764,0.60879


## Dataframe with results 

In [124]:
# df_temp_scale, df_iso, df_beta, df_hb

dfs = [df_hb, df_iso, df_temp_scale, df_beta, df_beta_am, df_beta_ab]
names = ["Name", "Uncalibrated", "Histogram Binning", "Isotonic Regression", "Temperature Scaling", "Beta Calibration",
        "BC am", "BC ab"]


def get_dataframe(dfs, column, names):

    df_res = pd.DataFrame(columns=names)

    for i in range(1, len(df_iso), 2):

        name = dfs[0].iloc[i-1]["Name"] # Get name of method
        uncalibrated = dfs[0].iloc[i-1][column]  # Get uncalibrated score

        row = [name, uncalibrated]  # Add scores to row

        for df in dfs:
            row.append(df.iloc[i][column])

        df_res.loc[(i-1)//2] = row
        #errordf.iloc[i]
    
    df_res.set_index('Name', inplace = True)
        
    return df_res

In [125]:
df_error = get_dataframe(dfs, "Error", names)
df_ece = get_dataframe(dfs, "ECE", names)
df_mce = get_dataframe(dfs, "MCE", names)
df_loss = get_dataframe(dfs, "Loss", names)
df_brier = get_dataframe(dfs, "Brier", names)

## Scores

In [126]:
def highlight_min(s):
    '''
    highlight the maximum in a Series yellow.
    '''
    is_max = s == s.min()
    return ['background-color: yellow' if v else '' for v in is_max]

## Error

In [127]:
df_error.style.apply(highlight_min, axis = 1)

Unnamed: 0_level_0,Uncalibrated,Histogram Binning,Isotonic Regression,Temperature Scaling,Beta Calibration,BC am,BC ab
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
resnet_wide32_c10,6.07,6.18,5.98,6.07,5.94,5.85,6.06
densenet40_c10,7.58,7.93,7.65,7.58,7.59,7.59,7.57
lenet5_c10,27.26,25.94,26.15,27.26,25.71,25.73,27.19
resnet110_SD_c10,5.96,6.2,5.91,5.96,5.86,5.91,5.96
resnet110_c10,6.44,6.59,6.36,6.44,6.44,6.4,6.45
resnet152_SD_SVHN,1.84773,2.07437,1.95529,1.84773,1.82468,1.82084,1.84773


## ECE

In [128]:
df_ece.style.apply(highlight_min, axis = 1)

Unnamed: 0_level_0,Uncalibrated,Histogram Binning,Isotonic Regression,Temperature Scaling,Beta Calibration,BC am,BC ab
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
resnet_wide32_c10,0.0450543,0.0101492,0.0118988,0.00783782,0.00966474,0.0102104,0.00935039
densenet40_c10,0.0550027,0.021302,0.0168437,0.00946355,0.0170027,0.0155363,0.0156559
lenet5_c10,0.051801,0.0279693,0.0229176,0.0166531,0.0236721,0.0241512,0.0207426
resnet110_SD_c10,0.0411256,0.0118827,0.010278,0.00555243,0.00979727,0.00905688,0.00862628
resnet110_c10,0.0475035,0.0125078,0.0147359,0.0113208,0.0142481,0.0128235,0.0137544
resnet152_SD_SVHN,0.00862197,0.00527611,0.00245605,0.006071,0.00502064,0.00497707,0.00488229


## MCE

In [129]:
df_mce.style.apply(highlight_min, axis = 1)

Unnamed: 0_level_0,Uncalibrated,Histogram Binning,Isotonic Regression,Temperature Scaling,Beta Calibration,BC am,BC ab
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
resnet_wide32_c10,0.372155,0.32977,0.267203,0.0705982,0.745817,0.245661,0.23136
densenet40_c10,0.333955,0.451015,0.0849184,0.0992925,0.250324,0.146299,0.105473
lenet5_c10,0.112807,0.194829,0.0946331,0.0915794,0.0975462,0.0463674,0.181709
resnet110_SD_c10,0.324843,0.490926,0.08144,0.0782316,0.100296,0.138632,0.106758
resnet110_c10,0.295799,0.444502,0.245814,0.236393,0.262211,0.25012,0.244292
resnet152_SD_SVHN,0.250317,0.222757,0.247784,0.182437,0.259019,0.216105,0.18475


## Loss

In [130]:
df_loss.style.apply(highlight_min, axis = 1)

Unnamed: 0_level_0,Uncalibrated,Histogram Binning,Isotonic Regression,Temperature Scaling,Beta Calibration,BC am,BC ab
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
resnet_wide32_c10,0.381704,0.513157,0.232681,0.191482,0.202013,0.203047,0.204758
densenet40_c10,0.428207,0.572461,0.277319,0.225086,0.239184,0.239227,0.238213
lenet5_c10,0.823261,0.850237,0.819531,0.800311,0.765456,0.765652,0.790838
resnet110_SD_c10,0.303252,0.499434,0.254419,0.177605,0.185573,0.186039,0.185175
resnet110_c10,0.358274,0.547192,0.270758,0.209261,0.21385,0.215255,0.212043
resnet152_SD_SVHN,0.0854228,0.288669,0.109272,0.0786087,0.0796386,0.0798676,0.0794824


## Brier

In [131]:
df_brier.style.apply(highlight_min, axis = 1)

Unnamed: 0_level_0,Uncalibrated,Histogram Binning,Isotonic Regression,Temperature Scaling,Beta Calibration,BC am,BC ab
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
resnet_wide32_c10,0.838772,0.781067,0.796844,0.78971,0.794616,0.797888,0.794589
densenet40_c10,0.819165,0.763132,0.778261,0.770248,0.774672,0.778086,0.776613
lenet5_c10,0.499919,0.472359,0.478784,0.455979,0.475399,0.471422,0.474766
resnet110_SD_c10,0.833803,0.785381,0.797051,0.792032,0.795086,0.797334,0.796442
resnet110_c10,0.831574,0.779163,0.790379,0.788059,0.786434,0.789272,0.788312
resnet152_SD_SVHN,0.907179,0.88591,0.888147,0.887131,0.884714,0.884732,0.886552
