# Calibration

Evaluating calibration methods on convolutional neural networks.

In [8]:
import os
print(os.getcwd())
import sys
sys.path.append('custom/calibration/scripts')


/root/autodl-tmp/HugCode


In [9]:
import numpy as np
import pandas as pd
from betacal import BetaCalibration
from os.path import join
from sklearn.isotonic import IsotonicRegression
from sklearn.linear_model import LogisticRegression
import cal_methods
from cal_methods import HistogramBinning, TemperatureScaling, evaluate, cal_results

## Calibration approaches

#### 1-vs-K calibration
Histogram binning, isotonic regression and beta calibration are calibrated in 1-vs-K fashion. This means K different models are trained for K classes, so 1 model for each class
#### Multiclass calibration
Temperature scaling calibrates one model for all the classes together.

## Calibration of Predictions.

Paths to files with logits.

In [50]:
PATH = join('..', '..', 'logits')
files_10 = ('probs_resnet_wide32_c10_logits.p', 'probs_densenet40_c10_logits.p',
            'probs_lenet5_c10_logits.p', 'probs_resnet110_SD_c10_logits.p',
           'probs_resnet110_c10_logits.p', 'probs_resnet152_SD_SVHN_logits.p')
files_100 = ('probs_resnet_wide32_c100_logits.p', 'probs_densenet40_c100_logits.p',
             'probs_lenet5_c100_logits.p', 'probs_resnet110_SD_c100_logits.p')
files_200 = ('probs_resnet50_birds_logits.p',)
files_1k = ('probs_resnet152_imgnet_logits.p', 'probs_densenet161_imgnet_logits.p')

files = ('probs_resnet110_c10_logits.p', 'probs_resnet110_c100_logits.p', 
         'probs_densenet40_c10_logits.p', 'probs_densenet40_c100_logits.p',
        'probs_resnet_wide32_c10_logits.p', 'probs_resnet_wide32_c100_logits.p',
         'probs_resnet50_birds_logits.p', 'probs_resnet110_SD_c10_logits.p',
         'probs_resnet110_SD_c100_logits.p', 'probs_resnet152_SD_SVHN_logits.p',
        'probs_resnet152_imgnet_logits.p', 'probs_densenet161_imgnet_logits.p'  # ImageNet calibration takes rather long time.
        )


In [52]:
PATH = join('/root/autodl-tmp/HugCode/custom/calibration/scripts', 'logits')
files = ('/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-graphcodebert_c2_logits.p',
'/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-plbart_c2_logits.p',
'/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-codet5_c2_logits.p',
# '/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-unixcoder_c2_logits.p',
'/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_defect-graphcodebert_c2_logits.p',
'/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_defect-plbart_c2_logits.p',
'/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_defect-codet5_c2_logits.p',
'/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_defect-unixcoder_c2_logits.p',
)

In [53]:
for i, f in enumerate(files):
        
        name = "_".join(f.split("_")[1:-1])
        print(name)
        print("_".join(f.split("_")[1:-1]))
        FILE_PATH = join(PATH, f)
        print(FILE_PATH)

clone-graphcodebert_c2
clone-graphcodebert_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-graphcodebert_c2_logits.p
clone-plbart_c2
clone-plbart_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-plbart_c2_logits.p
clone-codet5_c2
clone-codet5_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-codet5_c2_logits.p
defect-graphcodebert_c2
defect-graphcodebert_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_defect-graphcodebert_c2_logits.p
defect-plbart_c2
defect-plbart_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_defect-plbart_c2_logits.p
defect-codet5_c2
defect-codet5_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_defect-codet5_c2_logits.p
defect-unixcoder_c2
defect-unixcoder_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_defect-unixcoder_c2_logits.p


### Isotonic Regression

In [54]:
df_iso = cal_results(IsotonicRegression, PATH, files, {'y_min':0, 'y_max':1}, approach = "single")

clone-graphcodebert_c2
clone-graphcodebert_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-graphcodebert_c2_logits.p
Accuracy: 96.53333333333333
Error: 3.4666666666666686
ECE: 0.03278859847784045
MCE: 0.7661023139953613
Loss: 0.29724898789029236
brier: 0.5251249275356024
Error 3.333333; ece 0.000001; mce 0.000001; loss 0.114233, brier 0.471182
Time taken: 1.075087070465088 

clone-plbart_c2
clone-plbart_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-plbart_c2_logits.p
Accuracy: 97.06666666666666
Error: 2.933333333333337
ECE: 0.02698366065820055
MCE: 0.40200273195902503
Loss: 0.267708375843258
brier: 0.5165806085205252
Error 3.000000; ece 0.000004; mce 0.000004; loss 0.109275, brier 0.472977
Time taken: 1.0814688205718994 

clone-codet5_c2
clone-codet5_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-codet5_c2_logits.p
Accuracy: 96.93333333333334
Error: 3.066666666666663
ECE: 0.029178895950317387
MCE: 0.672526

### Temperature scaling

In [56]:
df_temp_scale = cal_results(TemperatureScaling, PATH, files, approach = "all")

clone-graphcodebert_c2
clone-graphcodebert_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-graphcodebert_c2_logits.p
Accuracy: 96.53333333333333
Error: 3.4666666666666686
ECE: 0.03278859847784045
MCE: 0.7661023139953613
Loss: 0.29724898789029236
brier: 0.5251249275356024
Error 3.666667; ece 0.012104; mce 0.513446; loss 0.140819, brier 0.497357
Time taken: 1.0947630405426025 

clone-plbart_c2
clone-plbart_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-plbart_c2_logits.p
Accuracy: 97.06666666666666
Error: 2.933333333333337
ECE: 0.02698366065820055
MCE: 0.40200273195902503
Loss: 0.267708375843258
brier: 0.5165806085205252
Error 3.266667; ece 0.007793; mce 0.268419; loss 0.128766, brier 0.495604
Time taken: 1.0808711051940918 

clone-codet5_c2
clone-codet5_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-codet5_c2_logits.p
Accuracy: 96.93333333333334
Error: 3.066666666666663
ECE: 0.029178895950317387
MCE: 0.67252

### Beta methods

In [57]:
df_beta = cal_results(BetaCalibration, PATH, files, {'parameters':"abm"}, approach = "single")

clone-graphcodebert_c2
clone-graphcodebert_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-graphcodebert_c2_logits.p
Accuracy: 96.53333333333333
Error: 3.4666666666666686
ECE: 0.03278859847784045
MCE: 0.7661023139953613
Loss: 0.29724898789029236
brier: 0.5251249275356024
Error 3.400000; ece 0.008232; mce 0.310171; loss 0.123639, brier 0.468987
Time taken: 1.0827643871307373 

clone-plbart_c2
clone-plbart_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-plbart_c2_logits.p
Accuracy: 97.06666666666666
Error: 2.933333333333337
ECE: 0.02698366065820055
MCE: 0.40200273195902503
Loss: 0.267708375843258
brier: 0.5165806085205252
Error 3.166667; ece 0.002312; mce 0.520693; loss 0.115250, brier 0.471493
Time taken: 1.0743794441223145 

clone-codet5_c2
clone-codet5_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-codet5_c2_logits.p
Accuracy: 96.93333333333334
Error: 3.066666666666663
ECE: 0.029178895950317387
MCE: 0.67252

In [58]:
df_beta_am = cal_results(BetaCalibration, PATH, files, {'parameters':"am"}, approach = "single")

clone-graphcodebert_c2
clone-graphcodebert_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-graphcodebert_c2_logits.p
Accuracy: 96.53333333333333
Error: 3.4666666666666686
ECE: 0.03278859847784045
MCE: 0.7661023139953613
Loss: 0.29724898789029236
brier: 0.5251249275356024
Error 3.433333; ece 0.011546; mce 0.513831; loss 0.124268, brier 0.469111
Time taken: 1.0891671180725098 

clone-plbart_c2
clone-plbart_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-plbart_c2_logits.p
Accuracy: 97.06666666666666
Error: 2.933333333333337
ECE: 0.02698366065820055
MCE: 0.40200273195902503
Loss: 0.267708375843258
brier: 0.5165806085205252
Error 3.133333; ece 0.004416; mce 0.515765; loss 0.115577, brier 0.471720
Time taken: 1.0730836391448975 

clone-codet5_c2
clone-codet5_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-codet5_c2_logits.p
Accuracy: 96.93333333333334
Error: 3.066666666666663
ECE: 0.029178895950317387
MCE: 0.67252

In [59]:
df_beta_ab = cal_results(BetaCalibration, PATH, files, {'parameters':"ab"}, approach = "single")

clone-graphcodebert_c2
clone-graphcodebert_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-graphcodebert_c2_logits.p
Accuracy: 96.53333333333333
Error: 3.4666666666666686
ECE: 0.03278859847784045
MCE: 0.7661023139953613
Loss: 0.29724898789029236
brier: 0.5251249275356024
Error 3.666667; ece 0.011326; mce 0.517749; loss 0.125131, brier 0.472373
Time taken: 1.0659291744232178 

clone-plbart_c2
clone-plbart_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-plbart_c2_logits.p
Accuracy: 97.06666666666666
Error: 2.933333333333337
ECE: 0.02698366065820055
MCE: 0.40200273195902503
Loss: 0.267708375843258
brier: 0.5165806085205252
Error 3.266667; ece 0.003245; mce 0.199963; loss 0.117006, brier 0.475055
Time taken: 1.0557358264923096 

clone-codet5_c2
clone-codet5_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-codet5_c2_logits.p
Accuracy: 96.93333333333334
Error: 3.066666666666663
ECE: 0.029178895950317387
MCE: 0.67252

### Histogram binning

In [60]:
df_hb = cal_results(HistogramBinning, PATH, files, {'M':15}, approach = "single")

clone-graphcodebert_c2
clone-graphcodebert_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-graphcodebert_c2_logits.p
Accuracy: 96.53333333333333
Error: 3.4666666666666686
ECE: 0.03278859847784045
MCE: 0.7661023139953613
Loss: 0.29724898789029236
brier: 0.5251249275356024
Error 3.300000; ece 0.000000; mce 0.000000; loss 0.131579, brier 0.469117
Time taken: 1.362532615661621 

clone-plbart_c2
clone-plbart_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-plbart_c2_logits.p
Accuracy: 97.06666666666666
Error: 2.933333333333337
ECE: 0.02698366065820055
MCE: 0.40200273195902503
Loss: 0.267708375843258
brier: 0.5165806085205252
Error 3.000000; ece 0.000000; mce 0.000000; loss 0.121285, brier 0.471997
Time taken: 1.3659155368804932 

clone-codet5_c2
clone-codet5_c2
/root/autodl-tmp/HugCode/custom/calibration/scripts/logits/probs_clone-codet5_c2_logits.p
Accuracy: 96.93333333333334
Error: 3.066666666666663
ECE: 0.029178895950317387
MCE: 0.672526

#### Calibrated scores for Defect plbart.

In [61]:
df_iso

Unnamed: 0,Name,Error,ECE,MCE,Loss,Brier
0,clone-graphcodebert_c2,3.466667,0.032789,0.766102,0.297249,0.525125
1,clone-graphcodebert_c2_calib,3.333333,0.011667,0.32,0.122896,0.475838
2,clone-plbart_c2,2.933333,0.026984,0.402003,0.267708,0.516581
3,clone-plbart_c2_calib,2.833333,0.005321,0.773298,0.111294,0.472353
4,clone-codet5_c2,3.066667,0.029179,0.672527,0.268137,0.521418
5,clone-codet5_c2_calib,2.533333,0.0159,0.47683,0.158161,0.498976
6,defect-graphcodebert_c2,37.115666,0.119405,0.223624,0.667802,0.429918
7,defect-graphcodebert_c2_calib,37.445095,0.02779,0.06486,0.619374,0.353393
8,defect-plbart_c2,38.579795,0.09113,0.27074,0.699941,0.413034
9,defect-plbart_c2_calib,38.543192,0.025955,0.2,0.674055,0.338062


In [62]:
df_temp_scale

Unnamed: 0,Name,Error,ECE,MCE,Loss,Brier
0,clone-graphcodebert_c2,3.466667,0.032789,0.766102,0.297249,0.525125
1,clone-graphcodebert_c2_calib,3.466667,0.009902,0.559791,0.137881,0.496431
2,clone-plbart_c2,2.933333,0.026984,0.402003,0.267708,0.516581
3,clone-plbart_c2_calib,2.933333,0.006846,0.22959,0.121612,0.492515
4,clone-codet5_c2,3.066667,0.029179,0.672527,0.268137,0.521418
5,clone-codet5_c2_calib,3.066667,0.015602,0.358715,0.123291,0.508139
6,defect-graphcodebert_c2,37.115666,0.119405,0.223624,0.667802,0.429918
7,defect-graphcodebert_c2_calib,37.115666,0.040837,0.071758,0.609647,0.350394
8,defect-plbart_c2,38.579795,0.09113,0.27074,0.699941,0.413034
9,defect-plbart_c2_calib,38.579795,0.032573,0.048307,0.66484,0.33932


In [63]:
df_beta

Unnamed: 0,Name,Error,ECE,MCE,Loss,Brier
0,clone-graphcodebert_c2,3.466667,0.032789,0.766102,0.297249,0.525125
1,clone-graphcodebert_c2_calib,3.266667,0.01372,0.354752,0.124261,0.469573
2,clone-plbart_c2,2.933333,0.026984,0.402003,0.267708,0.516581
3,clone-plbart_c2_calib,2.8,0.008867,0.363058,0.113534,0.470781
4,clone-codet5_c2,3.066667,0.029179,0.672527,0.268137,0.521418
5,clone-codet5_c2_calib,2.666667,0.014381,0.562848,0.115157,0.497394
6,defect-graphcodebert_c2,37.115666,0.119405,0.223624,0.667802,0.429918
7,defect-graphcodebert_c2_calib,37.335286,0.044848,0.072476,0.609969,0.353387
8,defect-plbart_c2,38.579795,0.09113,0.27074,0.699941,0.413034
9,defect-plbart_c2_calib,38.616398,0.038413,0.048747,0.664914,0.337125


In [64]:
df_beta_am

Unnamed: 0,Name,Error,ECE,MCE,Loss,Brier
0,clone-graphcodebert_c2,3.466667,0.032789,0.766102,0.297249,0.525125
1,clone-graphcodebert_c2_calib,2.933333,0.013888,0.384889,0.12507,0.469963
2,clone-plbart_c2,2.933333,0.026984,0.402003,0.267708,0.516581
3,clone-plbart_c2_calib,2.766667,0.008957,0.478737,0.114414,0.471531
4,clone-codet5_c2,3.066667,0.029179,0.672527,0.268137,0.521418
5,clone-codet5_c2_calib,2.733333,0.013485,0.3636,0.113783,0.495368
6,defect-graphcodebert_c2,37.115666,0.119405,0.223624,0.667802,0.429918
7,defect-graphcodebert_c2_calib,37.408492,0.045015,0.080539,0.609895,0.353267
8,defect-plbart_c2,38.579795,0.09113,0.27074,0.699941,0.413034
9,defect-plbart_c2_calib,38.653001,0.035713,0.047697,0.664649,0.337137


In [65]:
df_beta_ab

Unnamed: 0,Name,Error,ECE,MCE,Loss,Brier
0,clone-graphcodebert_c2,3.466667,0.032789,0.766102,0.297249,0.525125
1,clone-graphcodebert_c2_calib,3.466667,0.01089,0.636004,0.125044,0.47254
2,clone-plbart_c2,2.933333,0.026984,0.402003,0.267708,0.516581
3,clone-plbart_c2_calib,2.933333,0.004635,0.239886,0.11274,0.472749
4,clone-codet5_c2,3.066667,0.029179,0.672527,0.268137,0.521418
5,clone-codet5_c2_calib,3.066667,0.014682,0.46604,0.116143,0.500104
6,defect-graphcodebert_c2,37.115666,0.119405,0.223624,0.667802,0.429918
7,defect-graphcodebert_c2_calib,37.115666,0.041908,0.077026,0.609587,0.349315
8,defect-plbart_c2,38.579795,0.09113,0.27074,0.699941,0.413034
9,defect-plbart_c2_calib,38.579795,0.032531,0.048185,0.664813,0.339169


## Dataframe with results 

In [66]:
dfs = [df_hb, df_iso, df_temp_scale, df_beta, df_beta_am, df_beta_ab]
names = ["Name", "Uncalibrated", "Histogram Binning", "Isotonic Regression", "Temperature Scaling", "Beta Calibration",
        "BC am", "BC ab"]


def get_dataframe(dfs, column, names):

    df_res = pd.DataFrame(columns=names)

    for i in range(1, len(df_iso), 2):

        name = dfs[0].iloc[i-1]["Name"] # Get name of method
        uncalibrated = dfs[0].iloc[i-1][column]  # Get uncalibrated score

        row = [name, uncalibrated]  # Add scores to row

        for df in dfs:
            row.append(df.iloc[i][column])

        df_res.loc[(i-1)//2] = row
        #errordf.iloc[i]
    
    df_res.set_index('Name', inplace = True)
        
    return df_res

In [67]:
df_error = get_dataframe(dfs, "Error", names)
df_ece = get_dataframe(dfs, "ECE", names)
df_mce = get_dataframe(dfs, "MCE", names)
df_loss = get_dataframe(dfs, "Loss", names)
df_brier = get_dataframe(dfs, "Brier", names)

## Scores

In [68]:
def highlight_min(s):
    '''
    highlight the maximum in a Series yellow.
    '''
    is_max = s == s.min()
    return ['background-color: yellow' if v else '' for v in is_max]

## Error Rate

In [69]:
df_error.style.apply(highlight_min, axis = 1)

Unnamed: 0_level_0,Uncalibrated,Histogram Binning,Isotonic Regression,Temperature Scaling,Beta Calibration,BC am,BC ab
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
clone-graphcodebert_c2,3.466667,3.3,3.333333,3.466667,3.266667,2.933333,3.466667
clone-plbart_c2,2.933333,2.833333,2.833333,2.933333,2.8,2.766667,2.933333
clone-codet5_c2,3.066667,2.866667,2.533333,3.066667,2.666667,2.733333,3.066667
defect-graphcodebert_c2,37.115666,37.628111,37.445095,37.115666,37.335286,37.408492,37.115666
defect-plbart_c2,38.579795,40.190337,38.543192,38.579795,38.616398,38.653001,38.579795
defect-codet5_c2,35.688141,36.530015,36.163982,35.688141,35.834553,35.688141,35.688141
defect-unixcoder_c2,36.237189,36.93265,36.896047,36.237189,36.566618,36.493411,36.237189


## ECE

In [70]:
df_ece.style.apply(highlight_min, axis = 1)

Unnamed: 0_level_0,Uncalibrated,Histogram Binning,Isotonic Regression,Temperature Scaling,Beta Calibration,BC am,BC ab
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
clone-graphcodebert_c2,0.032789,0.004012,0.011667,0.009902,0.01372,0.013888,0.01089
clone-plbart_c2,0.026984,0.001484,0.005321,0.006846,0.008867,0.008957,0.004635
clone-codet5_c2,0.029179,0.007328,0.0159,0.015602,0.014381,0.013485,0.014682
defect-graphcodebert_c2,0.119405,0.027119,0.02779,0.040837,0.044848,0.045015,0.041908
defect-plbart_c2,0.09113,0.040363,0.025955,0.032573,0.038413,0.035713,0.032531
defect-codet5_c2,0.137634,0.028722,0.023728,0.034865,0.029445,0.03488,0.034678
defect-unixcoder_c2,0.118742,0.010158,0.023115,0.01698,0.022559,0.017062,0.018158


## MCE

In [71]:
df_mce.style.apply(highlight_min, axis = 1)

Unnamed: 0_level_0,Uncalibrated,Histogram Binning,Isotonic Regression,Temperature Scaling,Beta Calibration,BC am,BC ab
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
clone-graphcodebert_c2,0.766102,0.75,0.32,0.559791,0.354752,0.384889,0.636004
clone-plbart_c2,0.402003,0.095238,0.773298,0.22959,0.363058,0.478737,0.239886
clone-codet5_c2,0.672527,0.666667,0.47683,0.358715,0.562848,0.3636,0.46604
defect-graphcodebert_c2,0.223624,0.05994,0.06486,0.071758,0.072476,0.080539,0.077026
defect-plbart_c2,0.27074,0.070234,0.2,0.048307,0.048747,0.047697,0.048185
defect-codet5_c2,0.24632,0.082746,0.050712,0.047814,0.040939,0.049272,0.048109
defect-unixcoder_c2,0.209381,0.066373,0.072451,0.064006,0.05903,0.055345,0.042434


## Loss

In [72]:
df_loss.style.apply(highlight_min, axis = 1)

Unnamed: 0_level_0,Uncalibrated,Histogram Binning,Isotonic Regression,Temperature Scaling,Beta Calibration,BC am,BC ab
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
clone-graphcodebert_c2,0.297249,0.135954,0.122896,0.137881,0.124261,0.12507,0.125044
clone-plbart_c2,0.267708,0.140985,0.111294,0.121612,0.113534,0.114414,0.11274
clone-codet5_c2,0.268137,0.131294,0.158161,0.123291,0.115157,0.113783,0.116143
defect-graphcodebert_c2,0.667802,0.612253,0.619374,0.609647,0.609969,0.609895,0.609587
defect-plbart_c2,0.699941,0.666026,0.674055,0.66484,0.664914,0.664649,0.664813
defect-codet5_c2,0.713234,0.618844,0.630619,0.608711,0.60861,0.608628,0.60888
defect-unixcoder_c2,0.689692,0.625852,0.618509,0.614699,0.615147,0.615024,0.614416


## Brier

In [73]:
df_brier.style.apply(highlight_min, axis = 1)

Unnamed: 0_level_0,Uncalibrated,Histogram Binning,Isotonic Regression,Temperature Scaling,Beta Calibration,BC am,BC ab
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
clone-graphcodebert_c2,0.525125,0.467465,0.475838,0.496431,0.469573,0.469963,0.47254
clone-plbart_c2,0.516581,0.46801,0.472353,0.492515,0.470781,0.471531,0.472749
clone-codet5_c2,0.521418,0.486289,0.498976,0.508139,0.497394,0.495368,0.500104
defect-graphcodebert_c2,0.429918,0.352535,0.353393,0.350394,0.353387,0.353267,0.349315
defect-plbart_c2,0.413034,0.336918,0.338062,0.33932,0.337125,0.337137,0.339169
defect-codet5_c2,0.470671,0.357523,0.356818,0.356576,0.355418,0.355467,0.358871
defect-unixcoder_c2,0.414839,0.347982,0.34771,0.338987,0.345897,0.344656,0.333009
