# Calibration

Evaluating calibration methods on convolutional neural networks.

In [1]:
import numpy as np
import pandas as pd
from os.path import join
from sklearn.isotonic import IsotonicRegression
from sklearn.linear_model import LogisticRegression
from cal_methods import TemperatureScaling, evaluate, softmax, cal_results

Using TensorFlow backend.


Paths to files with logits.

In [2]:
PATH = '/Users/wildflowerlyi/Desktop/Github/NN_calibration/scripts'
files = ('resnet_cifar/probs_resnet110_c10_logits.p'
        , 'resnet_wide/probs_resnet_wide32_c10_logits.p'
        , 'resnet_densenet/probs_densenet40_c10_logits.p'
        #,'resnet_wide/probs_resnet_wide32_c10clip_logits.p'
        ,'resnet_wide/probs_resnet_wide32_c10clip_logits.p'
        ,'resnet_sd/probs_resnet110_SD_c10clip_logits.p'
        ,'resnet_densenet/probs_densenet40_c10clip_logits.p'
        #,'resnet_cifar/probs_resnet110_c100_logits.p'
        #,'resnet_wide/probs_resnet_wide32_c100_logits.p'
        #,'resnet_densenet/probs_densenet40_c100_logits.p'
        #,'resnet_wide/probs_resnet_wide32_c100clip_logits.p'    
        )

### Isotonic Regression

In [3]:
df_iso = cal_results(IsotonicRegression, PATH, files, {'y_min':0, 'y_max':1}, approach = "single")

cifar/probs_resnet110_c10
('Accuracy:', 93.390000000000001)
('Error:', 6.6099999999999994)
('ECE:', 0.048270407003164296)
('MCE:', 0.379260828194109)
('Loss:', 0.38292819397349553)
wide/probs_resnet_wide32_c10
('Accuracy:', 93.799999999999997)
('Error:', 6.2000000000000028)
('ECE:', 0.047311796060204525)
('MCE:', 0.3678059713045756)
('Loss:', 0.37100536326453859)
densenet/probs_densenet40_c10
('Accuracy:', 92.900000000000006)
('Error:', 7.0999999999999943)
('ECE:', 0.051972183519601804)
('MCE:', 0.35494045674345875)
('Loss:', 0.41025025195239284)
wide/probs_resnet_wide32_c10clip
('Accuracy:', 94.040000000000006)
('Error:', 5.9599999999999937)
('ECE:', 0.044856612253189064)
('MCE:', 0.31552806931237376)
('Loss:', 0.35969095881889424)
sd/probs_resnet110_SD_c10clip
('Accuracy:', 17.66)
('Error:', 82.340000000000003)
('ECE:', 0.066272482181340464)
('MCE:', 0.81101521884286121)
('Loss:', 8.5405460218011964)
densenet/probs_densenet40_c10clip
('Accuracy:', 93.109999999999999)
('Error:', 6.890

### Temperature scaling

In [4]:
df_temp_scale = cal_results(TemperatureScaling, PATH, files, approach = "all")

cifar/probs_resnet110_c10
('Accuracy:', 93.390000000000001)
('Error:', 6.6099999999999994)
('ECE:', 0.048270407003164296)
('MCE:', 0.379260828194109)
('Loss:', 0.38292819397349553)
wide/probs_resnet_wide32_c10
('Accuracy:', 93.799999999999997)
('Error:', 6.2000000000000028)
('ECE:', 0.047311796060204525)
('MCE:', 0.3678059713045756)
('Loss:', 0.37100536326453859)
densenet/probs_densenet40_c10
('Accuracy:', 92.900000000000006)
('Error:', 7.0999999999999943)
('ECE:', 0.051972183519601804)
('MCE:', 0.35494045674345875)
('Loss:', 0.41025025195239284)
wide/probs_resnet_wide32_c10clip
('Accuracy:', 94.040000000000006)
('Error:', 5.9599999999999937)
('ECE:', 0.044856612253189064)
('MCE:', 0.31552806931237376)
('Loss:', 0.35969095881889424)
sd/probs_resnet110_SD_c10clip
('Accuracy:', 17.66)
('Error:', 82.340000000000003)
('ECE:', 0.066272482181340464)
('MCE:', 0.81101521884286121)
('Loss:', 8.5405460218011964)
densenet/probs_densenet40_c10clip
('Accuracy:', 93.109999999999999)
('Error:', 6.890

#### Calibrated scores for CIFAR datasets.

In [5]:
df_iso

Unnamed: 0,Name,Error,ECE,MCE,Loss
0,cifar/probs_resnet110_c10,6.61,0.04827,0.379261,0.382928
1,cifar/probs_resnet110_c10_calib,6.82,0.012087,0.241357,0.282494
2,wide/probs_resnet_wide32_c10,6.2,0.047312,0.367806,0.371005
3,wide/probs_resnet_wide32_c10_calib,6.05,0.010466,0.083494,0.217106
4,densenet/probs_densenet40_c10,7.1,0.051972,0.35494,0.41025
5,densenet/probs_densenet40_c10_calib,7.1,0.012183,0.265471,0.281433
6,wide/probs_resnet_wide32_c10clip,5.96,0.044857,0.315528,0.359691
7,wide/probs_resnet_wide32_c10clip_calib,5.68,0.012156,0.186214,0.232532
8,sd/probs_resnet110_SD_c10clip,82.34,0.066272,0.811015,8.540546
9,sd/probs_resnet110_SD_c10clip_calib,77.3,0.063671,0.758553,2.22269


In [6]:
df_temp_scale

Unnamed: 0,Name,Error,ECE,MCE,Loss
0,cifar/probs_resnet110_c10,6.61,0.04827,0.379261,0.382928
1,cifar/probs_resnet110_c10_calib,6.61,0.00719,0.046223,0.22028
2,wide/probs_resnet_wide32_c10,6.2,0.047312,0.367806,0.371005
3,wide/probs_resnet_wide32_c10_calib,6.2,0.008058,0.251238,0.186995
4,densenet/probs_densenet40_c10,7.1,0.051972,0.35494,0.41025
5,densenet/probs_densenet40_c10_calib,7.1,0.009334,0.096582,0.216684
6,wide/probs_resnet_wide32_c10clip,5.96,0.044857,0.315528,0.359691
7,wide/probs_resnet_wide32_c10clip_calib,5.96,0.006065,0.242671,0.18537
8,sd/probs_resnet110_SD_c10clip,82.34,0.066272,0.811015,8.540546
9,sd/probs_resnet110_SD_c10clip_calib,14.34,0.009011,0.18685,0.423135


## Dataframe with results 

In [7]:
dfs = [df_iso, df_temp_scale]
names = ["Name", "Uncalibrated", "Isotonic Regression", "Temperature Scaling"]


def get_dataframe(dfs, column, names):

    df_res = pd.DataFrame(columns=names)

    for i in range(1, len(df_iso), 2):

        name = dfs[0].iloc[i-1]["Name"] # Get name of method
        uncalibrated = dfs[0].iloc[i-1][column]  # Get uncalibrated score

        row = [name, uncalibrated]  # Add scores to row

        for df in dfs:
            row.append(df.iloc[i][column])

        df_res.loc[(i-1)//2] = row
    
    df_res.set_index('Name', inplace = True)
        
    return df_res

In [8]:
df_error = get_dataframe(dfs, "Error", names)
df_ece = get_dataframe(dfs, "ECE", names)
df_mce = get_dataframe(dfs, "MCE", names)
df_loss = get_dataframe(dfs, "Loss", names)

## Scores

In [9]:
def highlight_min(s):
    '''
    highlight the min in a Series yellow.
    '''
    is_max = s == s.min()
    return ['background-color: yellow' if v else '' for v in is_max]

## Error Rate

In [10]:
df_error.style.apply(highlight_min, axis = 1)

Unnamed: 0_level_0,Uncalibrated,Isotonic Regression,Temperature Scaling
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
cifar/probs_resnet110_c10,6.61,6.82,6.61
wide/probs_resnet_wide32_c10,6.2,6.05,6.2
densenet/probs_densenet40_c10,7.1,7.1,7.1
wide/probs_resnet_wide32_c10clip,5.96,5.68,5.96
sd/probs_resnet110_SD_c10clip,82.34,77.3,14.34
densenet/probs_densenet40_c10clip,6.89,6.94,6.89


## ECE

In [11]:
df_ece.style.apply(highlight_min, axis = 1)

Unnamed: 0_level_0,Uncalibrated,Isotonic Regression,Temperature Scaling
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
cifar/probs_resnet110_c10,0.0482704,0.0120875,0.00719009
wide/probs_resnet_wide32_c10,0.0473118,0.0104662,0.00805839
densenet/probs_densenet40_c10,0.0519722,0.0121829,0.00933419
wide/probs_resnet_wide32_c10clip,0.0448566,0.0121559,0.00606529
sd/probs_resnet110_SD_c10clip,0.0662725,0.0636708,0.00901112
densenet/probs_densenet40_c10clip,0.0502588,0.0103795,0.00711893


## MCE

In [12]:
df_mce.style.apply(highlight_min, axis = 1)

Unnamed: 0_level_0,Uncalibrated,Isotonic Regression,Temperature Scaling
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
cifar/probs_resnet110_c10,0.379261,0.241357,0.0462229
wide/probs_resnet_wide32_c10,0.367806,0.0834937,0.251238
densenet/probs_densenet40_c10,0.35494,0.265471,0.096582
wide/probs_resnet_wide32_c10clip,0.315528,0.186214,0.242671
sd/probs_resnet110_SD_c10clip,0.811015,0.758553,0.18685
densenet/probs_densenet40_c10clip,0.294821,0.115042,0.0553514


## Loss

In [13]:
df_loss.style.apply(highlight_min, axis = 1)

Unnamed: 0_level_0,Uncalibrated,Isotonic Regression,Temperature Scaling
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
cifar/probs_resnet110_c10,0.382928,0.282494,0.22028
wide/probs_resnet_wide32_c10,0.371005,0.217106,0.186995
densenet/probs_densenet40_c10,0.41025,0.281433,0.216684
wide/probs_resnet_wide32_c10clip,0.359691,0.232532,0.18537
sd/probs_resnet110_SD_c10clip,8.54055,2.22269,0.423135
densenet/probs_densenet40_c10clip,0.405018,0.272771,0.209092
