# Calibration

Evaluating calibration methods on convolutional neural networks.

In [4]:
import numpy as np
import pandas as pd
from betacal import BetaCalibration
from os.path import join
from sklearn.isotonic import IsotonicRegression
from sklearn.linear_model import LogisticRegression
from cal_methods import HistogramBinning, TemperatureScaling, evaluate, cal_results

2023-04-16 18:05:28.079336: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-16 18:05:28.226598: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-04-16 18:05:29.079913: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-04-16 18:05:29.079990: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] 

## Calibration approaches

#### 1-vs-K calibration
Histogram binning, isotonic regression and beta calibration are calibrated in 1-vs-K fashion. This means K different models are trained for K classes, so 1 model for each class
#### Multiclass calibration
Temperature scaling calibrates one model for all the classes together.

## Calibration of Predictions.

Paths to files with logits.

In [50]:
PATH = join('..', '..', 'logits')
files_10 = ('probs_resnet_wide32_c10_logits.p', 'probs_densenet40_c10_logits.p',
            'probs_lenet5_c10_logits.p', 'probs_resnet110_SD_c10_logits.p',
           'probs_resnet110_c10_logits.p', 'probs_resnet152_SD_SVHN_logits.p')
files_100 = ('probs_resnet_wide32_c100_logits.p', 'probs_densenet40_c100_logits.p',
             'probs_lenet5_c100_logits.p', 'probs_resnet110_SD_c100_logits.p')
files_200 = ('probs_resnet50_birds_logits.p',)
files_1k = ('probs_resnet152_imgnet_logits.p', 'probs_densenet161_imgnet_logits.p')

files = ('probs_resnet110_c10_logits.p', 'probs_resnet110_c100_logits.p', 
         'probs_densenet40_c10_logits.p', 'probs_densenet40_c100_logits.p',
        'probs_resnet_wide32_c10_logits.p', 'probs_resnet_wide32_c100_logits.p',
         'probs_resnet50_birds_logits.p', 'probs_resnet110_SD_c10_logits.p',
         'probs_resnet110_SD_c100_logits.p', 'probs_resnet152_SD_SVHN_logits.p',
        'probs_resnet152_imgnet_logits.p', 'probs_densenet161_imgnet_logits.p'  # ImageNet calibration takes rather long time.
        )


In [34]:
PATH = join('/root/autodl-tmp/HugCode/custom/calibration', 'logits')
files = ('/root/autodl-tmp/HugCode/custom/calibration/logits/probs_defect-plbart_c10_logits.p',
)

In [33]:
for i, f in enumerate(files):
        
        name = "_".join(f.split("_")[1:-1])
        print(name)
        print("_".join(f.split("_")[1:-1]))
        FILE_PATH = join(PATH, f)
        print(FILE_PATH)

defect-plbart_c10
defect-plbart_c10
/root/autodl-tmp/HugCode/custom/calibration/logits/probs_defect-plbart_c10_logits.p


### Isotonic Regression

In [35]:
df_iso = cal_results(IsotonicRegression, PATH, files, {'y_min':0, 'y_max':1}, approach = "single")

defect-plbart_c10
defect-plbart_c10
/root/autodl-tmp/HugCode/custom/calibration/logits/probs_defect-plbart_c10_logits.p
Accuracy: 61.420204978038065
Error: 38.579795021961935
ECE: 0.09112956188346419
MCE: 0.27074009366333485
Loss: 0.69994050441078
brier: 0.41303374251895664
Error 37.115666; ece 0.000085; mce 0.003919; loss 0.645115, brier 0.338703
Time taken: 0.8089797496795654 

Total time taken: 0.8090527057647705


### Temperature scaling

In [36]:
df_temp_scale = cal_results(TemperatureScaling, PATH, files, approach = "all")

defect-plbart_c10
defect-plbart_c10
/root/autodl-tmp/HugCode/custom/calibration/logits/probs_defect-plbart_c10_logits.p
Accuracy: 61.420204978038065
Error: 38.579795021961935
ECE: 0.09112956188346419
MCE: 0.27074009366333485
Loss: 0.69994050441078
brier: 0.41303374251895664
Error 37.481698; ece 0.005603; mce 0.022184; loss 0.650637, brier 0.338456
Time taken: 0.830618143081665 

Total time taken: 0.8306975364685059


### Beta methods

In [38]:
df_beta = cal_results(BetaCalibration, PATH, files, {'parameters':"abm"}, approach = "single")

defect-plbart_c10
defect-plbart_c10
/root/autodl-tmp/HugCode/custom/calibration/logits/probs_defect-plbart_c10_logits.p
Accuracy: 61.420204978038065
Error: 38.579795021961935
ECE: 0.09112956188346419
MCE: 0.27074009366333485
Loss: 0.69994050441078
brier: 0.41303374251895664
Error 37.335286; ece 0.007569; mce 0.045946; loss 0.650608, brier 0.336263
Time taken: 0.8321552276611328 

Total time taken: 0.8325567245483398


In [39]:
df_beta_am = cal_results(BetaCalibration, PATH, files, {'parameters':"am"}, approach = "single")

defect-plbart_c10
defect-plbart_c10
/root/autodl-tmp/HugCode/custom/calibration/logits/probs_defect-plbart_c10_logits.p
Accuracy: 61.420204978038065
Error: 38.579795021961935
ECE: 0.09112956188346419
MCE: 0.27074009366333485
Loss: 0.69994050441078
brier: 0.41303374251895664
Error 37.298682; ece 0.009332; mce 0.022767; loss 0.650624, brier 0.336268
Time taken: 0.8198106288909912 

Total time taken: 0.8201992511749268


In [40]:
df_beta_ab = cal_results(BetaCalibration, PATH, files, {'parameters':"ab"}, approach = "single")

defect-plbart_c10
defect-plbart_c10
/root/autodl-tmp/HugCode/custom/calibration/logits/probs_defect-plbart_c10_logits.p
Accuracy: 61.420204978038065
Error: 38.579795021961935
ECE: 0.09112956188346419
MCE: 0.27074009366333485
Loss: 0.69994050441078
brier: 0.41303374251895664
Error 37.481698; ece 0.006204; mce 0.026876; loss 0.650637, brier 0.338304
Time taken: 0.8262889385223389 

Total time taken: 0.8271839618682861


### Histogram binning

In [41]:
df_hb = cal_results(HistogramBinning, PATH, files, {'M':15}, approach = "single")

defect-plbart_c10
defect-plbart_c10
/root/autodl-tmp/HugCode/custom/calibration/logits/probs_defect-plbart_c10_logits.p
Accuracy: 61.420204978038065
Error: 38.579795021961935
ECE: 0.09112956188346419
MCE: 0.27074009366333485
Loss: 0.69994050441078
brier: 0.41303374251895664
Error 37.335286; ece 0.000000; mce 0.000000; loss 0.649044, brier 0.337015
Time taken: 1.0744285583496094 

Total time taken: 1.0745718479156494


#### Calibrated scores for Defect plbart.

In [42]:
df_iso

Unnamed: 0,Name,Error,ECE,MCE,Loss,Brier
0,defect-plbart_c10,38.579795,0.09113,0.27074,0.699941,0.413034
1,defect-plbart_c10_calib,38.543192,0.025955,0.2,0.674055,0.338062


In [43]:
df_temp_scale

Unnamed: 0,Name,Error,ECE,MCE,Loss,Brier
0,defect-plbart_c10,38.579795,0.09113,0.27074,0.699941,0.413034
1,defect-plbart_c10_calib,38.579795,0.032573,0.048307,0.66484,0.33932


In [44]:
df_beta

Unnamed: 0,Name,Error,ECE,MCE,Loss,Brier
0,defect-plbart_c10,38.579795,0.09113,0.27074,0.699941,0.413034
1,defect-plbart_c10_calib,38.616398,0.038413,0.048747,0.664914,0.337125


In [45]:
df_beta_am

Unnamed: 0,Name,Error,ECE,MCE,Loss,Brier
0,defect-plbart_c10,38.579795,0.09113,0.27074,0.699941,0.413034
1,defect-plbart_c10_calib,38.653001,0.035713,0.047697,0.664649,0.337137


In [46]:
df_beta_ab

Unnamed: 0,Name,Error,ECE,MCE,Loss,Brier
0,defect-plbart_c10,38.579795,0.09113,0.27074,0.699941,0.413034
1,defect-plbart_c10_calib,38.579795,0.032531,0.048185,0.664813,0.339169


## Dataframe with results 

In [48]:
dfs = [df_hb, df_iso, df_temp_scale, df_beta, df_beta_am, df_beta_ab]
names = ["Name", "Uncalibrated", "Histogram Binning", "Isotonic Regression", "Temperature Scaling", "Beta Calibration",
        "BC am", "BC ab"]


def get_dataframe(dfs, column, names):

    df_res = pd.DataFrame(columns=names)

    for i in range(1, len(df_iso), 2):

        name = dfs[0].iloc[i-1]["Name"] # Get name of method
        uncalibrated = dfs[0].iloc[i-1][column]  # Get uncalibrated score

        row = [name, uncalibrated]  # Add scores to row

        for df in dfs:
            row.append(df.iloc[i][column])

        df_res.loc[(i-1)//2] = row
        #errordf.iloc[i]
    
    df_res.set_index('Name', inplace = True)
        
    return df_res

In [49]:
df_error = get_dataframe(dfs, "Error", names)
df_ece = get_dataframe(dfs, "ECE", names)
df_mce = get_dataframe(dfs, "MCE", names)
df_loss = get_dataframe(dfs, "Loss", names)
df_brier = get_dataframe(dfs, "Brier", names)

## Scores

In [50]:
def highlight_min(s):
    '''
    highlight the maximum in a Series yellow.
    '''
    is_max = s == s.min()
    return ['background-color: yellow' if v else '' for v in is_max]

## Error Rate

In [51]:
df_error.style.apply(highlight_min, axis = 1)

Unnamed: 0_level_0,Uncalibrated,Histogram Binning,Isotonic Regression,Temperature Scaling,Beta Calibration,BC am,BC ab
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
defect-plbart_c10,38.579795,40.190337,38.543192,38.579795,38.616398,38.653001,38.579795


## ECE

In [52]:
df_ece.style.apply(highlight_min, axis = 1)

Unnamed: 0_level_0,Uncalibrated,Histogram Binning,Isotonic Regression,Temperature Scaling,Beta Calibration,BC am,BC ab
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
defect-plbart_c10,0.09113,0.040363,0.025955,0.032573,0.038413,0.035713,0.032531


## MCE

In [53]:
df_mce.style.apply(highlight_min, axis = 1)

Unnamed: 0_level_0,Uncalibrated,Histogram Binning,Isotonic Regression,Temperature Scaling,Beta Calibration,BC am,BC ab
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
defect-plbart_c10,0.27074,0.070234,0.2,0.048307,0.048747,0.047697,0.048185


## Loss

In [54]:
df_loss.style.apply(highlight_min, axis = 1)

Unnamed: 0_level_0,Uncalibrated,Histogram Binning,Isotonic Regression,Temperature Scaling,Beta Calibration,BC am,BC ab
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
defect-plbart_c10,0.699941,0.666026,0.674055,0.66484,0.664914,0.664649,0.664813


## Brier

In [55]:
df_brier.style.apply(highlight_min, axis = 1)

Unnamed: 0_level_0,Uncalibrated,Histogram Binning,Isotonic Regression,Temperature Scaling,Beta Calibration,BC am,BC ab
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
defect-plbart_c10,0.413034,0.336918,0.338062,0.33932,0.337125,0.337137,0.339169
