In [1]:
# How changing the difficulty of a dataset effects the calibration methods
# Change difficulty of generated data such that the RF atcheaves ACC in a range betwean 100% to 50%

In [2]:
# imports
import sys
import pandas as pd
import numpy as np
sys.path.append('../../') # to access the files in higher directories
sys.path.append('../') # to access the files in higher directories
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import Data.data_provider as dp
import core as cal
from estimators.IR_RF_estimator import IR_RF

In [3]:
# params
calib_methods = cal.calib_methods.copy() #["RF", "Platt" , "ISO", "Rank", "CRF", "VA", "Beta", "Elkan", "tlr", "Line"]
metrics = cal.metrics.copy() #["acc", "auc", "brier", "logloss", "ece", "tce"]

data_name = "S_difficulty"

params = {
    "runs": 50,
    "data_difficulty": [10, 20, 40, 60, 80, 100], # as percentage of the x_calib data
    "data_size": 1000,
    "n_features": 40,
    "n_estimators": 100,
    "oob": False,
    "test_split": 0.3,
    "calib_split": 0.1
}

In [4]:
seed = 0
X, y, tp = dp.make_classification_gaussian_with_true_prob(params["data_size"], params["n_features"], seed)
data = cal.split_train_calib_test("diff_test", X, y, params["test_split"], params["calib_split"], seed)
irrf = IR_RF(n_estimators=params["n_estimators"], oob_score=params["oob"], random_state=seed)
irrf.fit(data["x_train"], data["y_train"])
acc = irrf.score(data["x_test"], data["y_test"])
acc

0.8466666666666667

In [5]:
data_list = []
calib_results_dict = {}

for exp_data_difficulty in params["data_difficulty"]:

    # Data
    exp_data_name = str(exp_data_difficulty) # data_name + "_" + 
    data_list.append(exp_data_name)
    X, y, tp = dp.make_classification_gaussian_with_true_prob(params["data_size"], params["n_features"], 0)

    data_dict = {} # results for each data set will be saved in here.
    for seed in range(params["runs"]): # running the same dataset multiple times
        # split the data
        data = cal.split_train_calib_test(exp_data_name, X, y, params["test_split"], params["calib_split"], seed)

        # train model
        irrf = IR_RF(n_estimators=params["n_estimators"], oob_score=params["oob"], random_state=seed)
        irrf.fit(data["x_train"], data["y_train"])

        # calibration
        res = cal.calibration(irrf, data, calib_methods, metrics) # res is a dict with all the metrics results as well as RF probs and every calibration method decision for every test data point
        data_dict = cal.update_runs(data_dict, res) # calib results for every run for the same dataset is aggregated in data_dict (ex. acc of every run as an array)
    calib_results_dict.update(data_dict) # merge results of all datasets together
tables = cal.mean_and_ranking_table(calib_results_dict, metrics, calib_methods, data_list, mean_and_rank=False)

KeyError: 'tp_test'

In [None]:
tables = cal.mean_and_ranking_table(calib_results_dict, metrics, calib_methods, data_list, mean_and_rank=False)

In [None]:
plot_calib_methods = calib_methods
plot_calib_methods.remove("VA")
plot_calib_methods

In [None]:
for metric in metrics:
    ax = tables[metric][plot_calib_methods].plot()
    ax.set_xlabel("Calib_size")
    ax.set_ylabel(metric)