In [1]:
# How changing the dataset sample size effects the calibration methods
# Fix training dataset size and change the calib set samples - best method is one that gets max calib with least data

# imports
import sys
import numpy as np
import pandas as pd
sys.path.append('../../') # to access the files in higher directories
sys.path.append('../') # to access the files in higher directories
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import core_exp as cx
import core_calib as cal
import core_tools as ct

import warnings
from sklearn.exceptions import DataConversionWarning

# Ignore all warnings from scikit-learn
warnings.filterwarnings("ignore", module="sklearn")
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)
# Add more categories if needed



In [2]:
params = {
    # exp
    "seed": 0,
    "runs": 5,
    "exp_name": ct.generate_readable_short_id("Real30_CTfix2"), #"main_run5_cv5_21data_100trees_40opt_fast",
    "path": "../../",
    "split": "CV", #CV, random_split
    "cv_folds": 5,
    "plot": False,

    "calib_methods": [
                      "RF_d","RF_opt", "RF_opt_CT", "RF_large", 
                      "Platt", "ISO", "Beta", "VA",
                      "CT", "PPA", "Rank"
                      ],

    "calib_method_colors": {
        "RF_d": "blue", 
        "RF_opt": "orange", 
        "RF_large": "red",
        "CT": "slategray",
        "RF_opt_CT": "black",
        "Platt": "Brown", 
        "ISO": "purple", 
        "Beta": "magenta", 
        "PPA": "olive", 
        "VA": "gray",
        "Rank": "silver"     
    },

    # "calib_methods": ["RF_opt", "RF_large",
    #                   "DT", "LR", "SVM", "NN", "GNB"          
    #                   ],


    "metrics": ["acc", "brier", "ece", "logloss", "time"],

    # calib param
    "bin_strategy": "uniform",
    "ece_bins": 20,
    "boot_size": 1000, 
    "boot_count": 5,

    # RF hyper opt
    "hyper_opt": True,
    "opt_cv":5, 
    "opt_n_iter":100,
    "opt_top_K": 5,
    "search_space": {
                    "n_estimators": [10],
                    "max_depth":  np.arange(2, 100).tolist(), # [None, 5, 10, 15, 20, 30, 50], #
                    "criterion": ["gini", "entropy"],
                    "max_features": ['sqrt', 'log2', None],
                    "min_samples_split": np.arange(2, 11).tolist(),
                    "min_samples_leaf":  np.arange(1, 200).tolist(),
                    'class_weight': [None, 'balanced', 'balanced_subsample'],  # Weight for each class
                    "oob_score": [False],
                    "laplace": [0,1]
                    },
    
    "oob": False,
    "laplace": 0, # if set to 1, OOB must be false
    "curt_v": np.arange(1, 200).tolist(),

    "exp_key": "data_name",
    "exp_values": [
                  "cm1",
                  "datatrieve",
                  "kc1_class_level_defectiveornot",
                  "kc1",
                  "kc2",
                  "kc3",
                  "pc1",
                  "spect",
                  "spectf",
                  "vertebral", 
                  "wilt",
                  "parkinsons", 
                  "heart",
                  "wdbc",
                  "bank", 
                  "ionosphere", 
                  "HRCompetencyScores",
                  "spambase", 
                  "QSAR", 
                  "diabetes", 
                  "breast", 
                  "SPF",
                  "hillvalley",
                  "pc4",
                  "scene",
                  "Sonar_Mine_Rock_Data",
                  "Customer_Churn",
                  "jm1",
                  "eeg",
                  "phoneme",


                  # "madelon",
                  # "nomao",
                  ]
}

ct.save_params(params)

In [3]:
calib_results_dict, data_list = cx.run_exp(params["exp_key"], params["exp_values"], params)
ct.save_results(calib_results_dict, params['exp_name'])

exp_param cm1 done
exp_param datatrieve done
exp_param kc1_class_level_defectiveornot done
exp_param kc1 done
exp_param kc2 done
exp_param kc3 done
exp_param pc1 done
exp_param spect done
exp_param spectf done
exp_param vertebral done
exp_param wilt done
exp_param parkinsons done
exp_param heart done
exp_param wdbc done
exp_param bank done
exp_param ionosphere done
exp_param HRCompetencyScores done
exp_param spambase done
exp_param QSAR done
exp_param diabetes done
exp_param breast done
exp_param SPF done
exp_param hillvalley done
exp_param pc4 done
exp_param scene done
exp_param Sonar_Mine_Rock_Data done
exp_param Customer_Churn done
exp_param jm1 done
exp_param eeg done
exp_param phoneme done


In [4]:
tables = cal.mean_and_ranking_table(calib_results_dict, 
                                    params["metrics"], 
                                    params["calib_methods"], 
                                    data_list, 
                                    mean_and_rank=True, 
                                    std=True)
ct.save_metrics_to_csv(tables, params["metrics"], params['exp_name'])

In [5]:
tables["time"].round(4)


Unnamed: 0_level_0,RF_d,RF_opt,RF_large,Platt,ISO,Beta,VA,CT,CT_minleaf,PPA,Rank
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
cm1,0.0084,2.4642,0.038,2.3106,2.3102,2.3123,2.3099,3.6591,2.2543,2.7156,2.3257
datatrieve,0.0049,1.8348,0.0257,1.8255,1.8253,1.8269,1.8251,2.0404,1.6907,2.1937,1.8306
kc1_class_level_defectiveornot,0.0068,1.9059,0.0271,1.8072,1.8067,1.8084,1.8065,2.378,1.7016,2.1685,1.8134
kc1,0.0184,6.1638,0.0882,4.9427,4.9423,4.9436,4.942,11.1846,4.9485,5.5286,4.9961
kc2,0.0077,2.4973,0.0374,2.2493,2.249,2.2502,2.2487,3.9382,2.2698,2.6532,2.2641
kc3,0.0074,2.4447,0.0345,2.2067,2.2064,2.2084,2.2062,3.4057,2.1913,2.6111,2.2196
pc1,0.0127,4.2828,0.0562,3.4183,3.4179,3.4194,3.4177,5.8568,3.3553,3.8888,3.4466
spect,0.0052,2.0136,0.0269,1.8584,1.8579,1.8593,1.8577,2.9141,1.8288,2.2356,1.8671
spectf,0.0069,2.0477,0.0332,1.9244,1.924,1.9259,1.9237,2.8932,1.8791,2.3059,1.9327
vertebral,0.0067,2.08,0.0292,1.979,1.9787,1.9802,1.9784,3.002,1.9596,2.3702,1.9885


In [6]:
ct.res_statistics(tables, params["metrics"], f"results/{params['exp_name']}", colors=params["calib_method_colors"])

metric acc
Friedman Test Statistic: 83.3013406459475
P-value: 1.1280070925378698e-13
The differences between groups are significant.
metric brier
Friedman Test Statistic: 108.60606060606051
P-value: 1.0189435909250171e-18
The differences between groups are significant.
metric ece
Friedman Test Statistic: 54.0060606060606
P-value: 4.839666487197811e-08
The differences between groups are significant.
metric logloss
Friedman Test Statistic: 173.8848484848486
P-value: 4.3481985614661754e-32
The differences between groups are significant.
metric time
Friedman Test Statistic: 273.28484848484845
P-value: 6.79025415355128e-53
The differences between groups are significant.


In [7]:
ct.save_metrics_to_latex(tables, params["metrics"], params['exp_name'])

In [8]:
tables["acc"]

Unnamed: 0_level_0,RF_d,RF_opt,RF_large,Platt,ISO,Beta,VA,CT,CT_minleaf,PPA,Rank
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
cm1,0.891168,0.9,0.894788,0.901616,0.900816,0.901616,0.900412,0.899212,0.901616,0.901616,0.901616
datatrieve,0.906154,0.910769,0.903077,0.915385,0.910769,0.912308,0.895385,0.906154,0.912308,0.912308,0.915385
kc1_class_level_defectiveornot,0.70069,0.736552,0.728276,0.72,0.704828,0.710345,0.713103,0.682759,0.728276,0.726897,0.657931
kc1,0.852916,0.845899,0.857749,0.845707,0.845233,0.845804,0.845233,0.846277,0.844665,0.845424,0.844759
kc2,0.827557,0.834872,0.831059,0.825645,0.824121,0.826795,0.821443,0.822637,0.826447,0.839062,0.803051
kc3,0.894763,0.903082,0.900454,0.905705,0.899618,0.899183,0.898743,0.901763,0.902652,0.905705,0.90614
pc1,0.937964,0.930388,0.939592,0.93111,0.929128,0.930389,0.929669,0.932553,0.932372,0.93111,0.930569
spect,0.814214,0.82239,0.831488,0.821719,0.820922,0.823955,0.820922,0.842683,0.842725,0.808162,0.754298
spectf,0.784249,0.79181,0.800014,0.802977,0.795556,0.802907,0.794801,0.800755,0.799315,0.802362,0.757317
vertebral,0.827097,0.811613,0.834194,0.817419,0.814194,0.816774,0.810968,0.812258,0.830968,0.810968,0.783871
