In [1]:
# Thsi is the code used for the paper results
# How changing the dataset sample size effects the calibration methods
# Fix training dataset size and change the calib set samples - best method is one that gets max calib with least data

# imports
import os
import sys
import numpy as np 
import pandas as pd
sys.path.append('../../') # to access the files in higher directories
sys.path.append('../') # to access the files in higher directories
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import core_exp as cx
import core_calib as cal
import core_tools as ct
import matplotlib.pyplot as plt


In [2]:
D_limits = {2:6, 5:3.865, 10:2.586, 20:1.863}
DL = 20

In [3]:
log_array = np.logspace(0, 1, num=10, base=10) - 1

# Normalize the array to range between 0 and 5
normalized_log_array = (log_array / log_array[-1]) * D_limits[DL] # 2D 6 - 5D 3.865 - 10D 2.586 - 20D 1.863


In [4]:
params = {
    # exp
    "seed": 0,
    "runs": 5,
    "n_features": DL,
    "exp_name": ct.generate_readable_short_id(f"acc_test{DL}D"),
    "path": "../../",
    "split": "CV", #CV, random_split
    "cv_folds": 5,

    # "test_split": 0.2,
    # "calib_split": 0.2,
    
    "plot": True,

    "calib_methods": [
                      "RF_d",
                      # "RF_opt", "RF_large", 
                    #   "Platt", "ISO", "Beta", "VA",
                    #   "CT", "PPA",
                    #   "Rank"
                      ],

    "calib_method_colors": {
        "RF_d": "blue", 
        "RF_opt": "orange", 
        "RF_large": "red",
        # "Platt": "Brown", 
        # "ISO": "purple", 
        # "Beta": "magenta", 
        # "VA": "gray",
        # "CT": "slategray",
        # "PPA": "olive", 
        # "Rank": "silver"     
    },

    
    "metrics": ["acc", "brier", "ece", "logloss", "tce_mse", "unique_prob", "prob_ent", "IL", "CLGL"], 

    #data
    "data_name": "synthetic_o",
    "plot_data": True,
    "data_size": 1000,

    "bais_accuracy": 0,

    "class1_mean_min":0, 
    "class1_mean_max":1,
    "class2_mean_min":2.75, 
    "class2_mean_max":3.75, 

    "class1_cov_min":1, 
    "class1_cov_max":2,
    "class2_cov_min":1, 
    "class2_cov_max":2, 

    "overlap_delta": 0,


    # calib param
    "bin_strategy": "uniform",
    "ece_bins": 20,
    "boot_size": 1000, 
    "boot_count": 5,

    # RF hyper opt
    "hyper_opt": True,
    "opt_cv":5, 
    "opt_n_iter":50,
    "opt_top_K": 5,
    "search_space": {
                    "n_estimators": [10],
                    "max_depth": np.arange(2, 100).tolist(),
                    "criterion": ["gini", "entropy"],
                    "max_features": ['sqrt', 'log2', None],
                    "min_samples_split": np.arange(2, 11).tolist(),
                    "min_samples_leaf":  np.arange(1, 11).tolist(),
                    # 'bootstrap': [True, False],                # Whether bootstrap samples are used
                    'class_weight': [None, 'balanced', 'balanced_subsample'],  # Weight for each class
                    "oob_score": [False]
                    },
    
    "oob": False,
    "laplace": 0, # if set to 1, OOB must be false
    "curt_v": np.arange(1, 100).tolist(),

    "exp_key": "overlap_delta",
    # "exp_values":  [2.46, 2.21, 1.96, 1.71, 1.46, 1.21, 0.96, 0.71, 0.46, 0.21, 0] #, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5
    "exp_values": normalized_log_array.tolist() # np.linspace(0, D_limits[DL], 20).tolist()
}

ct.save_params(params)

In [5]:
calib_results_dict, data_list = cx.run_exp(params["exp_key"], params["exp_values"], params)
ct.save_results(calib_results_dict, params['exp_name'])

exp_param 0.0 done
exp_param 0.060350780658080964 done
exp_param 0.13829681120041218 done
exp_param 0.23896798083659995 done
exp_param 0.3689897962568748 done
exp_param 0.5369194284075579 done
exp_param 0.7538088885578451 done
exp_param 1.0339323981602075 done
exp_param 1.3957258231499328 done
exp_param 1.863 done


In [6]:
plot_calib_methods = params["calib_methods"].copy()
# plot_calib_methods.remove("RF_large")
# plot_calib_methods.remove("Elkan")
# plot_calib_methods.remove("Rank")
# plot_calib_methods.remove("ISO")
# plot_calib_methods.remove("Beta")
# plot_calib_methods.remove("PPA")
# plot_calib_methods.remove("Platt")
plot_calib_methods

['RF_d']

In [7]:
tables = cal.mean_and_ranking_table(calib_results_dict, 
                                    params["metrics"], 
                                    params["calib_methods"], 
                                    data_list, 
                                    mean_and_rank=False, 
                                    std=True)

path = f"results/{params['exp_name']}/features"
if not os.path.exists(path):
    os.makedirs(path)

# max_value = [None, 0.4, 0.125, 2, 0.3]
# min_value = [None, None, None, None, 0.00]

max_value = [None, None, None, None, None, None, None, None, None, None]
min_value = [None, None, None, None, None, None, None, None, None, None]

for metric, max_v, min_v in zip(params["metrics"], max_value, min_value): #
    df = tables[metric][plot_calib_methods].copy()
    df = df.reset_index(drop=True)
    df.set_index(np.linspace(100, 0, 10), inplace=True)


    # # remove noize by normalization
    # base = np.array(df["RF_d"])
    # ref = base[0]
    # for i in range(len(base)):
    #     base[i] = base[i] / ref
    # for col_name in plot_calib_methods:
    #     df[col_name] = df[col_name] / base
    
    # print(df)
    # ax = tables[metric][plot_calib_methods].plot()
    if metric == "acc":
        metric_p = "ACC"
    elif metric == "logloss":
        metric_p = "LogLoss"
    elif metric == "ece":
        metric_p = "ECE"
    elif metric == "brier":
        metric_p = "Brier"
    # elif metric == "tce_kl":
    #     metric_p = "TCE_KL"
    elif metric == "tce_mse":
        metric_p = "TCE_MSE"
    elif metric == "unique_prob":
        metric_p = "unique_prob"
    elif metric == "prob_ent":
        metric_p = "prob_ent"
    # elif metric == "true_prob_ent":
    #     metric_p = "true_prob_ent"
    elif metric == "IL":
        metric_p = "IL"
    elif metric == "CLGL":
        metric_p = "CL+GL"
    else:
        metric_p = metric

    ax = df.plot(color=params["calib_method_colors"])
    ax.set_xlabel("Overlap %") # params["exp_key"]
    ax.set_ylabel(metric_p)
    plt.ylim(min_v, max_v)
    plt.savefig(f"{path}/f_{metric}.pdf", format='pdf', transparent=True)
    plt.close() 
