In [1]:
# How changing the dataset sample size effects the calibration methods
# Fix training dataset size and change the calib set samples - best method is one that gets max calib with least data

In [2]:
# imports
import sys
import pandas as pd
import numpy as np
sys.path.append('../../') # to access the files in higher directories
sys.path.append('../') # to access the files in higher directories
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import Data.data_provider as dp
import core as cal
from estimators.IR_RF_estimator import IR_RF
from sklearn.model_selection import RandomizedSearchCV


In [3]:
# params
calib_methods = ["RF", "CRF", "RF_boot", "bin", "RF_ens"] # cal.calib_methods.copy() +
metrics = cal.metrics.copy()
metrics.remove("tce")

data_list = ["spambase", "climate", "QSAR", "bank", "climate", "parkinsons", "vertebral", "ionosphere", "diabetes", "breast", "blod"]
# data_list = ["spambase", "climate"]

params = {
    "runs": 5,
    "test_split": 0.3,
    "calib_split": 0.05
}

In [4]:
calib_results_dict = {}

for data_name in data_list:

    # Data
    X, y = dp.load_data(data_name, "../../")
    
    data_dict = {} # results for each data set will be saved in here.
    for seed in range(params["runs"]): # running the same dataset multiple times
        # split the data
        data = cal.split_train_calib_test(data_name, X, y, params["test_split"], params["calib_split"], seed)
        # print("train", len(data["x_train"]))
        # print("calib", len(data["x_calib"]))
        # print("test", len(data["x_test"]))
        # print("---------------------------------")

        # train model
        search_space = {
            "n_estimators": [20],
            "max_depth": [5, 10, 15, 20, 25],
            "criterion": ["gini", "entropy"],
            "min_samples_split": [2,3,4,5],
            "min_samples_leaf": [1,2,3],
        }
        rf = IR_RF(random_state=seed)

        RS = RandomizedSearchCV(rf, search_space, scoring=["accuracy"], refit="accuracy", cv=5, n_iter=10, random_state=0)
        RS.fit(data["x_train"], data["y_train"])
        rf_best = RS.best_estimator_
        
        # calibration
        res = cal.calibration(rf_best, data, calib_methods, metrics) # res is a dict with all the metrics results as well as RF probs and every calibration method decision for every test data point
        data_dict = cal.update_runs(data_dict, res) # calib results for every run for the same dataset is aggregated in data_dict (ex. acc of every run as an array)
    calib_results_dict.update(data_dict) # merge results of all datasets together

In [5]:
tables = cal.mean_and_ranking_table(calib_results_dict, metrics, calib_methods, data_list, mean_and_rank=True)

In [6]:
tables["brier"]

Unnamed: 0_level_0,RF,CRF,RF_boot,bin,RF_ens
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
spambase,0.043107,0.041825,0.043079,0.045635,0.040784
climate,0.066759,0.067934,0.066811,0.080556,0.064808
QSAR,0.1046,0.1074,0.104628,0.131798,0.098721
bank,0.009152,0.007121,0.009137,0.008252,0.007924
climate,0.066759,0.067934,0.066811,0.080556,0.064808
parkinsons,0.089967,0.089747,0.089978,0.111864,0.088942
vertebral,0.106286,0.104979,0.106183,0.147563,0.106179
ionosphere,0.062172,0.057677,0.062237,0.07348,0.057981
diabetes,0.166498,0.172206,0.166557,0.200063,0.157527
breast,0.044349,0.04603,0.044377,0.0585,0.04369


In [7]:
tables["logloss"]

Unnamed: 0_level_0,RF,CRF,RF_boot,bin,RF_ens
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
spambase,0.043107,0.041825,0.043079,0.045635,0.040784
climate,0.066759,0.067934,0.066811,0.080556,0.064808
QSAR,0.1046,0.1074,0.104628,0.131798,0.098721
bank,0.009152,0.007121,0.009137,0.008252,0.007924
climate,0.066759,0.067934,0.066811,0.080556,0.064808
parkinsons,0.089967,0.089747,0.089978,0.111864,0.088942
vertebral,0.106286,0.104979,0.106183,0.147563,0.106179
ionosphere,0.062172,0.057677,0.062237,0.07348,0.057981
diabetes,0.166498,0.172206,0.166557,0.200063,0.157527
breast,0.044349,0.04603,0.044377,0.0585,0.04369


In [8]:
tables["acc"]

Unnamed: 0_level_0,RF,CRF,RF_boot,bin,RF_ens
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
spambase,0.947864,0.947574,0.947574,0.947429,0.95076
climate,0.908642,0.906173,0.907407,0.917284,0.901235
QSAR,0.849842,0.852366,0.852997,0.852997,0.859306
bank,0.991748,0.991262,0.991262,0.991748,0.991262
climate,0.908642,0.906173,0.907407,0.917284,0.901235
parkinsons,0.871186,0.864407,0.871186,0.888136,0.877966
vertebral,0.843011,0.843011,0.84086,0.836559,0.825806
ionosphere,0.932075,0.933962,0.930189,0.922642,0.933962
diabetes,0.750649,0.753247,0.752381,0.760173,0.7671
breast,0.938012,0.936842,0.938012,0.938012,0.935673


In [9]:
tables["ece"]

Unnamed: 0_level_0,RF,CRF,RF_boot,bin,RF_ens
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
spambase,0.042407,0.032063,0.042521,0.037565,0.045322
climate,0.072655,0.072293,0.073587,0.070988,0.086868
QSAR,0.078418,0.079323,0.066684,0.121151,0.075461
bank,0.024704,0.014245,0.02414,0.008252,0.023988
climate,0.072655,0.072293,0.073587,0.070988,0.086868
parkinsons,0.173561,0.173096,0.17364,0.050847,0.178193
vertebral,0.138514,0.132365,0.140399,0.140161,0.141572
ionosphere,0.103325,0.089064,0.105309,0.06478,0.104173
diabetes,0.111542,0.123223,0.10683,0.159,0.124712
breast,0.046271,0.04431,0.048347,0.053879,0.047187
