In [1]:
# How changing the dataset sample size effects the calibration methods
# Fix training dataset size and change the calib set samples - best method is one that gets max calib with least data

# imports
import sys
import pandas as pd
sys.path.append('../../') # to access the files in higher directories
sys.path.append('../') # to access the files in higher directories
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import core_exp as cx
import core_calib as cal

In [2]:
params = {
    # exp
    "seed": 0,
    "runs": 15,
    "exp_name": "RF_fix_seed",
    "path": "../../",
    "split": "random_split", #CV
    "test_split": 0.3,
    "calib_split": 0.1,
    "cv_folds": 5,
    "plot": True,
    "calib_methods": ["RF_d", "RF_opt",
                      "Platt", "ISO", "Beta", "CRF", "VA",
                      "RF_ens_k", "RF_ens_r", "RF_large",
                      ],
    
    "metrics": ["acc", "brier", "ece", "logloss", "auc"],

    # calib param
    "bin_strategy": "uniform",
    "ece_bins": 20,
    "boot_size": 1000,
    "boot_count": 5,

    # RF hyper opt
    "hyper_opt": True,
    "opt_cv":5, 
    "opt_n_iter":20,
    "opt_top_K": 5,
    "search_space": {
                    "n_estimators": [100],
                    "max_depth": [2,3,4,5,6,7,8,10,15,20,30,40,50,60,100],
                    "criterion": ["gini", "entropy"],
                    "max_features": ["sqrt", "log2"],
                    "min_samples_split": [2,3,4,5],
                    "min_samples_leaf": [1,2,3],
                    "oob_score": [False]

                    },
    
    "n_estimators": 100,
    "oob": False,
}

exp_key = "data_name"
exp_values = [
              "vertebral", 
              "wilt",
              "parkinsons", 
              "heart",
              "wdbc",
            #   "bank", 
            #   "ionosphere", 
            #   "HRCompetencyScores",
              "spambase", 
            #   "QSAR", 
            #   "diabetes", 
            #   "breast", 
            #   "SPF",
            #   "hillvalley",
            #   "pc4",
            #   "scene",
            #   "Sonar_Mine_Rock_Data",
            #   "Customer_Churn",
            #   "jm1",
            #   "eeg",
              # "madelon",
              # "phoneme",
              # "nomao",
              ]
# exp_values = ["wilt"]

In [3]:
calib_results_dict, data_list = cx.run_exp(exp_key, exp_values, params)

exp_param vertebral done
exp_param wilt done
exp_param parkinsons done
exp_param heart done
exp_param wdbc done
exp_param spambase done


In [4]:
tables = cal.mean_and_ranking_table(calib_results_dict, 
                                    params["metrics"], 
                                    params["calib_methods"], 
                                    data_list, 
                                    mean_and_rank=True, 
                                    std=True)

In [5]:
import matplotlib.pyplot as plt
import numpy as np

def vialin_plot(results_dict, metrics, calib_methods, data_list):

    # save results as txt
    df_dict = {}
    for data in data_list:
        for metric in metrics:
            df = pd.DataFrame(columns=calib_methods)
            for method in calib_methods:
                df[method] = np.array(results_dict[data+ "_" + method + "_"+ metric])
            print("df", df.head())
            fig, ax1 = plt.subplots()
            ax1.violinplot(df, showmeans=True) 
            ax1.set_xticks(np.arange(len(calib_methods)+1), labels=[""]+ calib_methods)
            # Rotate the tick labels by 90 degrees
            plt.xticks(rotation = 90) 
            plt.savefig(f"results/vialin_plot/{data}_{metric}.pdf", format='pdf', transparent=True)
            plt.close()        
    return df_dict

In [6]:
vialin_plot(calib_results_dict, 
                params["metrics"], 
                params["calib_methods"], 
                data_list,)

df        RF_d    RF_opt     Platt       ISO      Beta       CRF        VA  RF_ens_k  RF_ens_r  RF_large
0  0.849462  0.838710  0.849462  0.817204  0.817204  0.827957  0.817204  0.860215  0.849462  0.849462
1  0.827957  0.827957  0.860215  0.860215  0.849462  0.860215  0.860215  0.838710  0.838710  0.817204
2  0.860215  0.838710  0.795699  0.795699  0.795699  0.860215  0.795699  0.849462  0.860215  0.838710
3  0.870968  0.849462  0.838710  0.870968  0.881720  0.860215  0.860215  0.838710  0.870968  0.849462
4  0.795699  0.774194  0.795699  0.784946  0.784946  0.774194  0.795699  0.795699  0.784946  0.784946
df        RF_d    RF_opt     Platt       ISO      Beta       CRF        VA  RF_ens_k  RF_ens_r  RF_large
0  0.102274  0.101916  0.108483  0.113530  0.101194  0.102119  0.110612  0.096642  0.097773  0.097125
1  0.097111  0.103395  0.107535  0.113785  0.098030  0.095099  0.113352  0.100315  0.099360  0.101993
2  0.103094  0.108943  0.124318  0.162207  0.163483  0.107539  0.137228  0.1

{}

In [7]:
for metric in params["metrics"]:
    print(metric, " ", params["calib_methods"][tables[metric].loc["Rank"].argmin()])
    tables[metric].round(5).to_latex(f"./results/{params['exp_name']}/{metric}.csv")


acc   RF_large
brier   RF_large
ece   Platt
logloss   RF_ens_k
auc   RF_large


  tables[metric].round(5).to_latex(f"./results/{params['exp_name']}/{metric}.csv")
  tables[metric].round(5).to_latex(f"./results/{params['exp_name']}/{metric}.csv")
  tables[metric].round(5).to_latex(f"./results/{params['exp_name']}/{metric}.csv")
  tables[metric].round(5).to_latex(f"./results/{params['exp_name']}/{metric}.csv")
  tables[metric].round(5).to_latex(f"./results/{params['exp_name']}/{metric}.csv")


In [8]:
for metric in params["metrics"]:
    print(metric, " ", params["calib_methods"][tables[metric].loc["Rank"].argmin()])
    tables[metric+ "_std"].round(5).to_latex(f"./results/{params['exp_name']}/{metric}_std.csv")


acc   RF_large
brier   RF_large
ece   Platt
logloss   RF_ens_k
auc   RF_large


  tables[metric+ "_std"].round(5).to_latex(f"./results/{params['exp_name']}/{metric}_std.csv")
  tables[metric+ "_std"].round(5).to_latex(f"./results/{params['exp_name']}/{metric}_std.csv")
  tables[metric+ "_std"].round(5).to_latex(f"./results/{params['exp_name']}/{metric}_std.csv")
  tables[metric+ "_std"].round(5).to_latex(f"./results/{params['exp_name']}/{metric}_std.csv")
  tables[metric+ "_std"].round(5).to_latex(f"./results/{params['exp_name']}/{metric}_std.csv")


In [9]:
tables["brier"].round(4)


Unnamed: 0_level_0,RF_d,RF_opt,Platt,ISO,Beta,CRF,VA,RF_ens_k,RF_ens_r,RF_large
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
vertebral,0.1062,0.1069,0.1168,0.1405,0.1353,0.1144,0.1198,0.1057,0.106,0.1057
wilt,0.0129,0.0127,0.0132,0.0132,0.0126,0.0131,0.0125,0.0129,0.0126,0.0125
parkinsons,0.0784,0.0789,0.0863,0.0998,0.0983,0.0811,0.0954,0.0784,0.0787,0.0784
heart,0.1318,0.1311,0.1349,0.1437,0.1374,0.1291,0.1398,0.1279,0.1281,0.1285
wdbc,0.0345,0.0339,0.0357,0.0366,0.0358,0.0339,0.0387,0.0336,0.0333,0.0333
spambase,0.0416,0.041,0.0383,0.0397,0.0403,0.04,0.0393,0.0433,0.0402,0.0404
Mean,0.0676,0.0674,0.0708,0.0789,0.0766,0.0686,0.0743,0.067,0.0665,0.0665
Rank,5.5,5.3333,6.3333,8.6667,7.1667,5.5,6.3333,4.0,3.3333,2.8333


In [10]:
tables["brier_std"].round(5)


Unnamed: 0_level_0,RF_d_std,RF_opt_std,Platt_std,ISO_std,Beta_std,CRF_std,VA_std,RF_ens_k_std,RF_ens_r_std,RF_large_std
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
vertebral,0.01829,0.01812,0.02279,0.03951,0.04516,0.02341,0.02274,0.01666,0.01728,0.01716
wilt,0.00166,0.00157,0.00228,0.00226,0.00198,0.00165,0.00148,0.00157,0.00151,0.00147
parkinsons,0.01469,0.01603,0.02182,0.03689,0.03824,0.02503,0.02777,0.01613,0.01669,0.01616
heart,0.01753,0.01496,0.01851,0.02626,0.02177,0.01833,0.02146,0.01557,0.01532,0.01537
wdbc,0.00927,0.00835,0.01167,0.01225,0.01294,0.01127,0.01118,0.00871,0.00866,0.00839
spambase,0.00242,0.00227,0.00302,0.00376,0.00652,0.00322,0.00338,0.00224,0.00236,0.00245


In [11]:
tables["logloss"].round(4)

Unnamed: 0_level_0,RF_d,RF_opt,Platt,ISO,Beta,CRF,VA,RF_ens_k,RF_ens_r,RF_large
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
vertebral,0.3281,0.3304,0.3723,2.9264,1.2201,0.3726,0.3778,0.3266,0.3286,0.327
wilt,0.0628,0.0655,0.0544,0.2048,0.0684,0.0588,0.0486,0.0517,0.05,0.0524
parkinsons,0.2548,0.2562,0.2989,1.903,1.3812,0.2593,0.3177,0.2554,0.2562,0.2555
heart,0.4148,0.4137,0.432,2.0651,0.9673,0.4115,0.4399,0.4053,0.4065,0.4078
wdbc,0.2111,0.1492,0.1478,0.6671,0.2634,0.1828,0.1563,0.1265,0.1253,0.1247
spambase,0.187,0.1677,0.1411,0.2605,0.166,0.1637,0.1441,0.1653,0.1584,0.1547
Mean,0.2431,0.2304,0.2411,1.3378,0.6777,0.2414,0.2474,0.2218,0.2208,0.2203
Rank,5.6667,5.8333,5.0,10.0,8.6667,5.8333,5.5,2.6667,3.1667,2.6667


In [12]:
tables["ece"].round(4)

Unnamed: 0_level_0,RF_d,RF_opt,Platt,ISO,Beta,CRF,VA,RF_ens_k,RF_ens_r,RF_large
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
vertebral,0.0058,0.0074,0.0063,0.0415,0.0179,0.0099,0.0105,0.0097,0.0087,0.0096
wilt,0.0227,0.0235,0.032,0.0206,0.0044,0.0268,0.0069,0.0252,0.0243,0.0241
parkinsons,0.0156,0.0096,0.0075,0.0651,0.052,0.0282,0.0189,0.0087,0.0102,0.0083
heart,0.006,0.0062,0.0065,0.0166,0.0039,0.0047,0.0074,0.006,0.0067,0.0061
wdbc,0.012,0.012,0.008,0.0392,0.0129,0.0061,0.0178,0.0096,0.0105,0.0109
spambase,0.0091,0.0107,0.004,0.0075,0.006,0.0109,0.0056,0.0116,0.0111,0.0112
Mean,0.0119,0.0116,0.0107,0.0318,0.0162,0.0144,0.0112,0.0118,0.0119,0.0117
Rank,4.3333,5.1667,3.8333,7.8333,5.1667,5.6667,6.1667,5.5,6.0,5.3333


In [13]:
tables["acc"].round(4)

Unnamed: 0_level_0,RF_d,RF_opt,Platt,ISO,Beta,CRF,VA,RF_ens_k,RF_ens_r,RF_large
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
vertebral,0.8358,0.8258,0.8251,0.8244,0.8229,0.8344,0.8294,0.8315,0.8323,0.8287
wilt,0.9827,0.9831,0.9835,0.9836,0.9843,0.9827,0.9837,0.9831,0.9831,0.983
parkinsons,0.8847,0.8915,0.8904,0.8633,0.8836,0.8859,0.8475,0.8915,0.8972,0.8938
heart,0.8125,0.8198,0.8242,0.7956,0.8168,0.8271,0.7912,0.8227,0.8205,0.8242
wdbc,0.9598,0.9602,0.9571,0.954,0.9559,0.9579,0.9552,0.9598,0.9595,0.961
spambase,0.9488,0.9517,0.9492,0.9463,0.9474,0.9495,0.9478,0.9474,0.9524,0.952
Mean,0.9041,0.9054,0.9049,0.8945,0.9018,0.9062,0.8924,0.906,0.9075,0.9071
Rank,5.75,4.5833,5.25,8.3333,7.1667,4.75,7.1667,4.75,3.6667,3.5833


In [14]:
import scipy.stats as stats
import numpy as np

ece_ranks = np.array(tables["ece"].loc["Rank"])
brier_ranks = np.array(tables["brier"].loc["Rank"])
logloss_ranks = np.array(tables["logloss"].loc["Rank"])
acc_ranks = np.array(tables["acc"].loc["Rank"])

tau, p_value = stats.kendalltau(brier_ranks, acc_ranks)
print(f"tau {tau} p_value {p_value}")

tau 0.8604651162790699 p_value 0.0007726767343104879
