In [1]:
# How changing the dataset sample size effects the calibration methods
# Fix training dataset size and change the calib set samples - best method is one that gets max calib with least data

# imports
import sys
import numpy as np
import pandas as pd
sys.path.append('../../') # to access the files in higher directories
sys.path.append('../') # to access the files in higher directories
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import core_exp as cx
import core_calib as cal
import core_tools as ct

In [2]:
params = {
    # exp
    "seed": 0,
    "runs": 5,
    "exp_name": ct.generate_readable_short_id("time_test_ML"), #"main_run5_cv5_21data_100trees_40opt_fast",
    "path": "../../",
    "split": "CV", #CV, random_split
    "cv_folds": 5,
    "plot": False,

    # "calib_methods": ["RF_d", "RF_opt", "RF_large",
    #                   "Platt", "ISO", "Beta", "CRF", "VA",
    #                   "tlr", "Rank", "Elkan",                      
    #                   ],

    "calib_methods": ["RF_opt", "RF_large",
                      "DT", "LR", "SVM", "NN"          
                      ],
    
    "metrics": ["acc", "brier", "ece", "logloss"],

    # calib param
    "bin_strategy": "uniform",
    "ece_bins": 20,
    "boot_size": 1000, 
    "boot_count": 5,

    # RF hyper opt
    "hyper_opt": True,
    "opt_cv":5, 
    "opt_n_iter":50,
    "opt_top_K": 5,
    "search_space": {
                    "n_estimators": [10],
                    "max_depth":  np.arange(2, 100).tolist(), # [None, 5, 10, 15, 20, 30, 50], #
                    "criterion": ["gini", "entropy"],
                    "max_features": ['sqrt', 'log2', None],
                    "min_samples_split": [2, 5, 10],
                    "min_samples_leaf":  [1, 2, 4],
                    # 'bootstrap': [True, False],                # Whether bootstrap samples are used
                    'class_weight': [None, 'balanced', 'balanced_subsample'],  # Weight for each class
                    "oob_score": [False]
                    },
    
    "oob": False,
    "laplace": 0, # if set to 1, OOB must be false

    "exp_key": "data_name",
    "exp_values": [
                  "cm1",
                  "datatrieve",
                  "kc1_class_level_defectiveornot",
                  "kc1",
                  "kc2",
                  "kc3",
                  "pc1",
                  "spect",
                  "spectf",
                  "vertebral", 
                #   "wilt",
                #   "parkinsons", 
                #   "heart",
                #   "wdbc",
                #   "bank", 
                #   "ionosphere", 
                #   "HRCompetencyScores",
                #   "spambase", 
                #   "QSAR", 
                #   "diabetes", 
                #   "breast", 
                #   "SPF",
                #   "hillvalley",
                #   "pc4",
                #   "scene",
                #   "Sonar_Mine_Rock_Data",
                #   "Customer_Churn",
                #   "jm1",
                #   "eeg",
                #   "phoneme",


                  # "madelon",
                  # "nomao",
                  ]
}

ct.save_params(params)

In [3]:
calib_results_dict, data_list = cx.run_exp(params["exp_key"], params["exp_values"], params)
ct.save_results(calib_results_dict, params['exp_name'])

exp_param cm1 done
exp_param datatrieve done
exp_param kc1_class_level_defectiveornot done
exp_param kc1 done
exp_param kc2 done
exp_param kc3 done
exp_param pc1 done
exp_param spect done
exp_param spectf done
exp_param vertebral done


In [4]:
tables = cal.mean_and_ranking_table(calib_results_dict, 
                                    params["metrics"], 
                                    params["calib_methods"], 
                                    data_list, 
                                    mean_and_rank=True, 
                                    std=True)
ct.save_metrics_to_csv(tables, params["metrics"], params['exp_name'])

In [5]:
tables["brier"].round(4)


Unnamed: 0_level_0,RF_d,RF_opt,RF_large,Platt,ISO,Beta,CRF,VA,tlr,Rank,Elkan
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
cm1,0.0935,0.0874,0.088,0.0876,0.089,0.0868,0.09,0.0898,0.0974,0.0916,0.1194
datatrieve,0.0866,0.0879,0.0815,0.0802,0.0873,0.0842,0.0839,0.0853,0.0847,0.0814,0.1171
kc1_class_level_defectiveornot,0.1883,0.1828,0.1759,0.1822,0.2008,0.1874,0.1859,0.1852,0.1877,0.2097,0.2135
kc1,0.1103,0.108,0.1041,0.1105,0.1112,0.11,0.1106,0.1105,0.1293,0.1151,0.1414
kc2,0.1281,0.1188,0.1217,0.1203,0.1239,0.1232,0.1223,0.122,0.132,0.1265,0.1465
kc3,0.0792,0.078,0.0752,0.0786,0.0862,0.0787,0.0813,0.083,0.0884,0.0802,0.1049
pc1,0.0543,0.0539,0.0507,0.0556,0.0566,0.056,0.0558,0.0555,0.064,0.0564,0.0735
spect,0.1372,0.1204,0.1289,0.1253,0.1291,0.1271,0.1272,0.1262,0.136,0.1546,0.1388
spectf,0.1397,0.1343,0.1292,0.1392,0.1452,0.1398,0.1368,0.1398,0.1472,0.1537,0.1473
vertebral,0.1172,0.1186,0.1082,0.1248,0.1298,0.1268,0.1231,0.1245,0.1246,0.1412,0.1511


In [6]:
ct.res_statistics(tables, params["metrics"], f"results/{params['exp_name']}")

metric acc
Friedman Test Statistic: 38.59225512528474
P-value: 2.991933505368201e-05
The differences between groups are significant.
metric brier
Friedman Test Statistic: 65.45454545454544
P-value: 3.317550076892051e-10
The differences between groups are significant.
metric ece
Friedman Test Statistic: 42.78181818181815
P-value: 5.440133872027003e-06
The differences between groups are significant.
metric logloss
Friedman Test Statistic: 83.5454545454545
P-value: 1.0098548737375722e-13
The differences between groups are significant.
