In [1]:
# How changing the dataset sample size effects the calibration methods
# Fix training dataset size and change the calib set samples - best method is one that gets max calib with least data

# imports
import sys
import pandas as pd
sys.path.append('../../') # to access the files in higher directories
sys.path.append('../') # to access the files in higher directories
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import core_exp as cx
import core_calib as cal

In [2]:
params = {
    # exp
    "exp_name": "real",
    "runs": 2,
    "plot": True,
    "calib_methods": ["RF", "Platt"], #, "ISO", "Rank", "CRF", "VA", "Beta", "Elkan", "tlr", "Line", "RF_boot", "RF_ens_r", "RF_large", "RF_ens_line", "RF_large_line"],
    "metrics": ["acc", "logloss", "brier", "ece", "auc"],
    
    #data
    "test_split": 0.3,
    "calib_split": 0.1,

    # calib param
    "ece_bins": 20,
    "boot_size": 5000,
    "boot_count": 40,

    # RF hyper opt
    "hyper_opt": True,
    "opt_cv":5, 
    "opt_n_iter":10,
    "search_space": {
                    "n_estimators": [10],
                    "max_depth": [2,3,4,5,6,7,8,10,20,50,100],
                    "criterion": ["gini", "entropy"],
                    # "min_samples_split": [2,3,4,5],
                    # "min_samples_leaf": [1,2,3],
                    },
}

exp_key = "data_name"
exp_values = ["spambase", "climate"] # "QSAR", "bank", "climate", "parkinsons", "vertebral", "ionosphere", "diabetes", "breast", "blod"]


In [3]:
calib_results_dict, data_list = cx.run_exp(exp_key, exp_values, params)
tables = cal.mean_and_ranking_table(calib_results_dict, 
                                    params["metrics"], 
                                    params["calib_methods"], 
                                    data_list, 
                                    mean_and_rank=True, 
                                    std=True)

In [4]:
tables["brier"]

Unnamed: 0_level_0,RF,Platt
Data,Unnamed: 1_level_1,Unnamed: 2_level_1
spambase,0.048872,0.046724
climate,0.072316,0.068514
Mean,0.060594,0.057619
Rank,2.0,1.0


In [5]:
tables["logloss"]

Unnamed: 0_level_0,RF,Platt
Data,Unnamed: 1_level_1,Unnamed: 2_level_1
spambase,0.312189,0.17359
climate,0.542751,0.244739
Mean,0.42747,0.209164
Rank,2.0,1.0


In [6]:
tables["acc"]

Unnamed: 0_level_0,RF,Platt
Data,Unnamed: 1_level_1,Unnamed: 2_level_1
spambase,0.940261,0.938088
climate,0.910494,0.916667
Mean,0.925377,0.927378
Rank,1.5,1.5


In [7]:
tables["ece"]

Unnamed: 0_level_0,RF,Platt
Data,Unnamed: 1_level_1,Unnamed: 2_level_1
spambase,0.037313,0.02349
climate,0.057129,0.057016
Mean,0.047221,0.040253
Rank,2.0,1.0
