In [1]:
# How changing the dataset sample size effects the calibration methods
# Fix training dataset size and change the calib set samples - best method is one that gets max calib with least data

# imports
import sys
import numpy as np
import pandas as pd
sys.path.append('../../') # to access the files in higher directories
sys.path.append('../') # to access the files in higher directories
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import core_exp as cx
import core_calib as cal
import core_tools as ct

import warnings
from sklearn.exceptions import DataConversionWarning

# Ignore all warnings from scikit-learn
warnings.filterwarnings("ignore", module="sklearn")
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)
# Add more categories if needed



In [2]:
params = {
    # exp
    "seed": 0,
    "runs": 20f,
    "exp_name": ct.generate_readable_short_id("real_30_with_CT_20run"), #"main_run5_cv5_21data_100trees_40opt_fast",
    "path": "../../",
    "split": "CV", #CV, random_split
    "cv_folds": 5,
    "plot": False,

    "calib_methods": [
                      "RF_d", "RF_opt", "RF_large", "CT",
                      "Platt", "ISO", "Beta", "PPA", "VA",
                      "Rank"                   
                      ],

    "calib_method_colors": {
        "RF_d": "blue", 
        "RF_opt": "orange", 
        "RF_large": "red",
        "CT": "slategray",
        "Platt": "Brown", 
        "ISO": "purple", 
        "Beta": "magenta", 
        "PPA": "olive", 
        "VA": "gray",
        "Rank": "silver"     
    },

    # "calib_methods": ["RF_opt", "RF_large",
    #                   "DT", "LR", "SVM", "NN", "GNB"          
    #                   ],


    "metrics": ["acc", "brier", "ece", "logloss", "time"],

    # calib param
    "bin_strategy": "uniform",
    "ece_bins": 20,
    "boot_size": 1000, 
    "boot_count": 5,

    # RF hyper opt
    "hyper_opt": True,
    "opt_cv":5, 
    "opt_n_iter":50,
    "opt_top_K": 5,
    "search_space": {
                    "n_estimators": [10],
                    "max_depth":  np.arange(2, 100).tolist(), # [None, 5, 10, 15, 20, 30, 50], #
                    "criterion": ["gini", "entropy"],
                    "max_features": ['sqrt', 'log2', None],
                    "min_samples_split": np.arange(2, 11).tolist(),
                    "min_samples_leaf":  np.arange(1, 200).tolist(),
                    'class_weight': [None, 'balanced', 'balanced_subsample'],  # Weight for each class
                    "oob_score": [False]
                    },
    
    "oob": False,
    "laplace": 0, # if set to 1, OOB must be false

    "exp_key": "data_name",
    "exp_values": [
                  "cm1",
                  "datatrieve",
                  "kc1_class_level_defectiveornot",
                  "kc1",
                  "kc2",
                  "kc3",
                  "pc1",
                  "spect",
                  "spectf",
                  "vertebral", 
                  "wilt",
                  "parkinsons", 
                  "heart",
                  "wdbc",
                  "bank", 
                  "ionosphere", 
                  "HRCompetencyScores",
                  "spambase", 
                  "QSAR", 
                  "diabetes", 
                  "breast", 
                  "SPF",
                  "hillvalley",
                  "pc4",
                  "scene",
                  "Sonar_Mine_Rock_Data",
                  "Customer_Churn",
                  "jm1",
                  "eeg",
                  "phoneme",


                  # "madelon",
                  # "nomao",
                  ]
}

ct.save_params(params)

In [3]:
calib_results_dict, data_list = cx.run_exp(params["exp_key"], params["exp_values"], params)
ct.save_results(calib_results_dict, params['exp_name'])

exp_param cm1 done
exp_param datatrieve done
exp_param kc1_class_level_defectiveornot done
exp_param kc1 done
exp_param kc2 done
exp_param kc3 done
exp_param pc1 done
exp_param spect done
exp_param spectf done
exp_param vertebral done
exp_param wilt done
exp_param parkinsons done
exp_param heart done
exp_param wdbc done
exp_param bank done
exp_param ionosphere done
exp_param HRCompetencyScores done
exp_param spambase done
exp_param QSAR done
exp_param diabetes done
exp_param breast done
exp_param SPF done
exp_param hillvalley done
exp_param pc4 done
exp_param scene done
exp_param Sonar_Mine_Rock_Data done
exp_param Customer_Churn done
exp_param jm1 done
exp_param eeg done
exp_param phoneme done


In [4]:
tables = cal.mean_and_ranking_table(calib_results_dict, 
                                    params["metrics"], 
                                    params["calib_methods"], 
                                    data_list, 
                                    mean_and_rank=True, 
                                    std=True)
ct.save_metrics_to_csv(tables, params["metrics"], params['exp_name'])

In [5]:
tables["time"].round(4)


Unnamed: 0_level_0,RF_d,RF_opt,RF_large,CT,Platt,ISO,Beta,PPA,VA,Rank
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
cm1,0.0149,4.9993,0.0762,4.5056,4.4878,4.4872,4.4917,5.3043,4.4866,4.5178
datatrieve,0.0125,3.6154,0.0496,3.4049,3.5316,3.5318,3.5352,4.2745,3.5311,3.5443
kc1_class_level_defectiveornot,0.0131,3.7921,0.0548,3.4889,3.6287,3.6275,3.6305,4.373,3.6272,3.6402
kc1,0.0393,12.4617,0.1782,9.9873,9.8537,9.8526,9.8558,11.0336,9.8525,9.9671
kc2,0.0147,5.0472,0.0752,4.5595,4.5122,4.5115,4.5149,5.3357,4.5107,4.5416
kc3,0.0154,4.893,0.076,4.4151,4.3911,4.3907,4.3957,5.189,4.3904,4.4176
pc1,0.0272,8.096,0.1083,6.6382,6.6585,6.6579,6.6605,7.5817,6.6574,6.7172
spect,0.0117,4.0287,0.0541,3.7732,3.8004,3.7997,3.8017,4.5652,3.7992,3.8197
spectf,0.0132,4.2102,0.0605,3.8483,3.9372,3.9366,3.9403,4.7,3.9361,3.9536
vertebral,0.0129,4.1972,0.0578,3.9104,3.9653,3.9647,3.9666,4.7479,3.9636,3.9855


In [6]:
ct.res_statistics(tables, params["metrics"], f"results/{params['exp_name']}", colors=params["calib_method_colors"])

metric acc
Friedman Test Statistic: 106.69037995149564
P-value: 6.92712140404211e-19
The differences between groups are significant.
metric brier
Friedman Test Statistic: 115.23636363636365
P-value: 1.2582665554337384e-20
The differences between groups are significant.
metric ece
Friedman Test Statistic: 51.85454545454536
P-value: 4.815804590233454e-08
The differences between groups are significant.
metric logloss
Friedman Test Statistic: 177.95636363636368
P-value: 1.3531310691714593e-33
The differences between groups are significant.
metric time
Friedman Test Statistic: 246.60363636363627
P-value: 5.198183839914754e-48
The differences between groups are significant.


In [7]:
ct.save_metrics_to_latex(tables, params["metrics"], params['exp_name'])

In [8]:
tables["acc"]

Unnamed: 0_level_0,RF_d,RF_opt,RF_large,CT,Platt,ISO,Beta,PPA,VA,Rank
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
cm1,0.889266,0.901516,0.892381,0.900916,0.900809,0.900407,0.900909,0.901315,0.900306,0.901111
datatrieve,0.901538,0.914615,0.906154,0.914615,0.915385,0.913846,0.912692,0.914231,0.909615,0.915385
kc1_class_level_defectiveornot,0.711724,0.726207,0.73,0.727931,0.723448,0.719655,0.725172,0.728621,0.721034,0.662414
kc1,0.852559,0.845661,0.857846,0.846632,0.845117,0.843764,0.845164,0.846254,0.843717,0.845496
kc2,0.828046,0.837556,0.833926,0.836406,0.83303,0.831028,0.833796,0.832263,0.830935,0.8093
kc3,0.893881,0.903189,0.895738,0.900125,0.903186,0.902972,0.904498,0.904497,0.900782,0.903297
pc1,0.934673,0.93102,0.937335,0.932104,0.930525,0.930209,0.930795,0.930794,0.929939,0.93111
spect,0.810793,0.815049,0.822041,0.828235,0.819231,0.819354,0.821073,0.808169,0.821233,0.74347
spectf,0.796083,0.799647,0.81181,0.802079,0.798529,0.790112,0.795514,0.796461,0.789937,0.759549
vertebral,0.824677,0.819194,0.830806,0.824516,0.811935,0.802903,0.808226,0.807581,0.80629,0.789839
