In [1]:
# How changing the dataset sample size effects the calibration methods
# Fix training dataset size and change the calib set samples - best method is one that gets max calib with least data

# imports
import sys
import pandas as pd
sys.path.append('../../') # to access the files in higher directories
sys.path.append('../') # to access the files in higher directories
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import core_exp as cx
import core_calib as cal

In [2]:
params = {
    # exp
    "exp_name": "real",
    "runs": 50 ,
    "plot": True,
    "calib_methods": ["RF", "Platt", "ISO", "Rank", "CRF", "VA", "Beta", "Elkan", "tlr", "Line", "RF_boot", "RF_ens_r", "RF_large", "RF_ens_line", "RF_ens_CRF", "RF_ens_Platt", "RF_large_line"],
    "metrics": ["acc", "logloss", "brier", "ece", "auc"],
    
    #data
    "test_split": 0.3,
    "calib_split": 0.1,

    # calib param
    "ece_bins": 40,
    "boot_size": 5000,
    "boot_count": 40,

    # RF hyper opt
    "hyper_opt": True,
    "opt_cv":5, 
    "opt_n_iter":10,
    "search_space": {
                    "n_estimators": [100],
                    "max_depth": [2,3,4,5,6,7,8,10,20,50,100],
                    "criterion": ["gini", "entropy"],
                    # "min_samples_split": [2,3,4,5],
                    # "min_samples_leaf": [1,2,3],
                    },
}

exp_key = "data_name"
# exp_values = ["spambase", "climate", "QSAR", "bank", "parkinsons", "vertebral", "ionosphere", "diabetes", "breast", "blod"]
exp_values = ["vertebral"]


In [3]:
calib_results_dict, data_list = cx.run_exp(exp_key, exp_values, params)
tables = cal.mean_and_ranking_table(calib_results_dict, 
                                    params["metrics"], 
                                    params["calib_methods"], 
                                    data_list, 
                                    mean_and_rank=True, 
                                    std=True)

In [4]:
tables["brier"]

Unnamed: 0_level_0,RF,Platt,ISO,Rank,CRF,VA,Beta,Elkan,tlr,Line,RF_boot,RF_ens_r,RF_large,RF_ens_line,RF_ens_CRF,RF_ens_Platt,RF_large_line
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
vertebral,0.113496,0.123158,0.140053,0.14502,0.11776,0.125763,0.131875,0.215269,0.11492,0.120083,0.113509,0.112153,0.112113,0.112803,0.127214,0.128998,0.113512
Mean,0.113496,0.123158,0.140053,0.14502,0.11776,0.125763,0.131875,0.215269,0.11492,0.120083,0.113509,0.112153,0.112113,0.112803,0.127214,0.128998,0.113512
Rank,4.0,10.0,15.0,16.0,8.0,11.0,14.0,17.0,7.0,9.0,5.0,2.0,1.0,3.0,12.0,13.0,6.0


In [5]:
tables["logloss"]

Unnamed: 0_level_0,RF,Platt,ISO,Rank,CRF,VA,Beta,Elkan,tlr,Line,RF_boot,RF_ens_r,RF_large,RF_ens_line,RF_ens_CRF,RF_ens_Platt,RF_large_line
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
vertebral,0.355676,0.388607,2.127159,2.204449,0.363943,0.391415,0.681404,0.66627,0.346855,0.625833,0.355704,0.34597,0.345947,0.454249,0.38423,0.434879,0.451477
Mean,0.355676,0.388607,2.127159,2.204449,0.363943,0.391415,0.681404,0.66627,0.346855,0.625833,0.355704,0.34597,0.345947,0.454249,0.38423,0.434879,0.451477
Rank,4.0,8.0,16.0,17.0,6.0,9.0,15.0,14.0,3.0,13.0,5.0,2.0,1.0,12.0,7.0,10.0,11.0


In [6]:
tables["acc"]

Unnamed: 0_level_0,RF,Platt,ISO,Rank,CRF,VA,Beta,Elkan,tlr,Line,RF_boot,RF_ens_r,RF_large,RF_ens_line,RF_ens_CRF,RF_ens_Platt,RF_large_line
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
vertebral,0.823226,0.815054,0.807742,0.790968,0.823226,0.810753,0.816344,0.714194,0.82129,0.815914,0.823656,0.827097,0.826452,0.824516,0.827097,0.827097,0.823441
Mean,0.823226,0.815054,0.807742,0.790968,0.823226,0.810753,0.816344,0.714194,0.82129,0.815914,0.823656,0.827097,0.826452,0.824516,0.827097,0.827097,0.823441
Rank,8.5,13.0,15.0,16.0,8.5,14.0,11.0,17.0,10.0,12.0,6.0,1.5,4.0,5.0,1.5,3.0,7.0


In [7]:
tables["ece"]

Unnamed: 0_level_0,RF,Platt,ISO,Rank,CRF,VA,Beta,Elkan,tlr,Line,RF_boot,RF_ens_r,RF_large,RF_ens_line,RF_ens_CRF,RF_ens_Platt,RF_large_line
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
vertebral,0.165716,0.181699,0.150731,0.157263,0.167256,0.189226,0.161634,0.260597,0.1533,0.15846,0.163505,0.168261,0.166407,0.146882,0.169338,0.149947,0.143063
Mean,0.165716,0.181699,0.150731,0.157263,0.167256,0.189226,0.161634,0.260597,0.1533,0.15846,0.163505,0.168261,0.166407,0.146882,0.169338,0.149947,0.143063
Rank,10.0,15.0,4.0,6.0,12.0,16.0,8.0,17.0,5.0,7.0,9.0,13.0,11.0,2.0,14.0,3.0,1.0
