In [1]:
# Thsi is the code used for the paper results
# How changing the dataset sample size effects the calibration methods
# Fix training dataset size and change the calib set samples - best method is one that gets max calib with least data

# imports
import os
import sys
import numpy as np 
import pandas as pd
sys.path.append('../../') # to access the files in higher directories
sys.path.append('../') # to access the files in higher directories
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import core_exp as cx
import core_calib as cal
import core_tools as ct
import matplotlib.pyplot as plt


In [2]:
params = {
    # exp
    "seed": 0,
    "runs": 10,
    "exp_name": ct.generate_readable_short_id("feature_size_noACC"),
    "path": "../../",
    "split": "CV", #CV, random_split
    "cv_folds": 5,

    # "test_split": 0.1,
    # "calib_split": 0.11,
    
    "plot": False,
    "calib_methods": ["RF_d", "RF_opt", "RF_large",
                      "Platt", "ISO", "Beta", "PPA", "VA",
                      "Rank"#, "Elkan", "tlr",                      
                      ],
    
    "calib_method_colors": {
        "RF_d":     "blue", 
        "RF_opt":   "orange", 
        "RF_large": "red",
        "Platt":    "Brown", 
        "ISO":      "purple", 
        "Beta":     "magenta", 
        "PPA":      "olive", 
        "VA":       "gray",
        "Rank":     "silver"     
    },
    
    "metrics": ["acc", "brier", "ece", "logloss", "tce"],

    #data
    "data_name": "synthetic",
    "plot_data": True,
    "data_size": 1000,
    "n_features": 2,

    "bais_accuracy": 0,

    "class1_mean_min":0, 
    "class1_mean_max":1,
    "class2_mean_min":0, 
    "class2_mean_max":1, 

    "class1_cov_min":1, 
    "class1_cov_max":2,
    "class2_cov_min":1, 
    "class2_cov_max":2.5, 


    # calib param
    "bin_strategy": "uniform",
    "ece_bins": 20,
    "boot_size": 1000, 
    "boot_count": 5,

    # RF hyper opt
    "hyper_opt": True,
    "opt_cv":5, 
    "opt_n_iter":50,
    "opt_top_K": 5,
    "search_space": {
                    "n_estimators": [10],
                    "max_depth": np.arange(2, 100).tolist(),
                    "criterion": ["gini", "entropy"],
                    "max_features": ['sqrt', 'log2', None],
                    "min_samples_split": np.arange(2, 11).tolist(),
                    "min_samples_leaf":  np.arange(1, 11).tolist(),
                    # 'bootstrap': [True, False],                # Whether bootstrap samples are used
                    'class_weight': [None, 'balanced', 'balanced_subsample'],  # Weight for each class
                    "oob_score": [False]
                    },
    
    "oob": False,
    "laplace": 0, # if set to 1, OOB must be false

    "exp_key": "n_features",
    "exp_values":  np.arange(2, 50, 2).tolist()
}

ct.save_params(params)

In [3]:
calib_results_dict, data_list = cx.run_exp(params["exp_key"], params["exp_values"], params)
ct.save_results(calib_results_dict, params['exp_name'])

2: 0.0 ACC 0.555
exp_param 2 done
4: 0.0 ACC 0.55
exp_param 4 done
6: 0.0 ACC 0.59
exp_param 6 done
8: 0.0 ACC 0.67
exp_param 8 done
10: 0.0 ACC 0.615
exp_param 10 done
12: 0.0 ACC 0.725
exp_param 12 done
14: 0.0 ACC 0.615
exp_param 14 done
16: 0.0 ACC 0.685
exp_param 16 done
18: 0.0 ACC 0.81
exp_param 18 done
20: 0.0 ACC 0.775
exp_param 20 done
22: 0.0 ACC 0.825
exp_param 22 done
24: 0.0 ACC 0.83
exp_param 24 done
26: 0.0 ACC 0.8
exp_param 26 done
28: 0.0 ACC 0.82
exp_param 28 done
30: 0.0 ACC 0.845
exp_param 30 done
32: 0.0 ACC 0.86
exp_param 32 done
34: 0.0 ACC 0.845
exp_param 34 done
36: 0.0 ACC 0.845
exp_param 36 done
38: 0.0 ACC 0.855
exp_param 38 done
40: 0.0 ACC 0.85
exp_param 40 done
42: 0.0 ACC 0.855
exp_param 42 done
44: 0.0 ACC 0.89
exp_param 44 done
46: 0.0 ACC 0.825
exp_param 46 done
48: 0.0 ACC 0.875
exp_param 48 done


In [4]:
plot_calib_methods = params["calib_methods"].copy()
# plot_calib_methods.remove("RF_large")
# plot_calib_methods.remove("Elkan")
# plot_calib_methods.remove("Rank")
# plot_calib_methods.remove("ISO")
# plot_calib_methods.remove("Beta")
# plot_calib_methods.remove("PPA")
# plot_calib_methods.remove("Platt")
plot_calib_methods

['RF_d', 'RF_opt', 'RF_large', 'Platt', 'ISO', 'Beta', 'PPA', 'VA', 'Rank']

In [13]:
tables = cal.mean_and_ranking_table(calib_results_dict, 
                                    params["metrics"], 
                                    params["calib_methods"], 
                                    data_list, 
                                    mean_and_rank=False, 
                                    std=True)

path = f"results/{params['exp_name']}/features"
if not os.path.exists(path):
    os.makedirs(path)

max_value = [None, 0.2, 0.125, 2, 0.3]
min_value = [None, None, None, None, 0.00]

for metric, max_v, min_v in zip(params["metrics"], max_value, min_value): #
    df = tables[metric][plot_calib_methods].copy()

    # # remove noize by normalization
    # base = np.array(df["RF_d"])
    # ref = base[0]
    # for i in range(len(base)):
    #     base[i] = base[i] / ref
    # for col_name in plot_calib_methods:
    #     df[col_name] = df[col_name] / base
    
    # print(df)
    # ax = tables[metric][plot_calib_methods].plot()
    ax = df.plot(color=params["calib_method_colors"])
    ax.set_xlabel(params["exp_key"])
    ax.set_ylabel(metric)
    plt.ylim(min_v, max_v)
    plt.savefig(f"{path}/f_{metric}.pdf", format='pdf', transparent=True)
    plt.close() 
