In [37]:
# imports
import os
import sys
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error

sys.path.append('../../') # to access the files in higher directories
sys.path.append('../') # to access the files in higher directories
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import Data.data_provider as dp
import core as cal
from estimators.IR_RF_estimator import IR_RF
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier


In [38]:
# params
calib_methods = ["RF"] #cal.calib_methods.copy() 
metrics = ["acc", "tce", "brier", "ece"]#cal.metrics.copy()

plot = True
 
params = {
    "runs": 5,
    "n_tree": [2,3,5,10,20,50,100], 
    "depth": 6,
    "data_size": 10000,
    "n_features": 2,
    "oob": False,
    "test_split": 0.3,
    "calib_split": 0.1
}

In [39]:
data_list = []

X, y, tp = dp.make_classification_gaussian_with_true_prob(params["data_size"], params["n_features"], 
                                                          class1_mean_min=0, 
                                                          class1_mean_max=1,

                                                          class2_mean_min=1, 
                                                          class2_mean_max=3, 

                                                          seed=0)

# plot data
# plt.scatter(X[:,0], X[:,1], c=y)
# plt.show()

In [40]:
calib_results_dict = {}
for exp_param in params["n_tree"]:
    data_dict = {} # results for each data set will be saved in here.
    # Data
    exp_data_name = str(exp_param)
    data_list.append(exp_data_name)

    for seed in range(params["runs"]): # running the same dataset multiple times
        # split the data
        data = cal.split_train_calib_test(exp_data_name, X, y, params["test_split"], params["calib_split"], seed, tp)

        # train models
        models = {}

        # dt = DecisionTreeClassifier().fit(data["x_train"], data["y_train"])
        # models["dt"] = dt

        rf = RandomForestClassifier(n_estimators=exp_param, max_depth=params["depth"], oob_score=params["oob"], random_state=seed).fit(data["x_train"], data["y_train"])
        models["RF_pet"] = rf

        irrf_ct = IR_RF(n_estimators=exp_param, max_depth=params["depth"], oob_score=params["oob"], random_state=seed).fit(data["x_train"], data["y_train"])
        models["RF_ct"] = irrf_ct


        # calibration
        res = cal.model_calibration(models, data, metrics) # res is a dict with all the metrics results as well as RF probs and every calibration method decision for every test data point
        
        data_dict = cal.update_runs(data_dict, res) # calib results for every run for the same dataset is aggregated in data_dict (ex. acc of every run as an array)
        
        if plot:
            cal.plot_probs(exp_data_name, res, data, models, seed, ref_plot_name="RF_pet") 

        calib_results_dict.update(data_dict) # merge results of all datasets together


In [41]:
tables = cal.mean_and_ranking_table(calib_results_dict, metrics, models, data_list, mean_and_rank=True)

In [42]:
tables["acc"]

Unnamed: 0_level_0,RF_pet,RF_l,RF_ct,LR,SVM
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,0.78,0.78,0.774667,0.795,0.793
3,0.791333,0.791333,0.789,0.795,0.793
5,0.794333,0.793,0.792667,0.795,0.793
10,0.791,0.790667,0.791333,0.795,0.793
20,0.791333,0.791667,0.791667,0.795,0.793
50,0.790333,0.790333,0.792333,0.795,0.793
100,0.792333,0.792,0.791667,0.795,0.793
Mean,0.790095,0.789857,0.789048,0.795,0.793
Rank,3.642857,3.928571,4.214286,1.0,2.214286


In [43]:
tables["tce"]

Unnamed: 0_level_0,RF_pet,RF_l,RF_ct,LR,SVM
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,0.010395,0.009367,0.053255,0.000391,0.007113
3,0.007015,0.006452,0.044673,0.000391,0.007113
5,0.00468,0.00436,0.04023,0.000391,0.007113
10,0.003233,0.003069,0.032748,0.000391,0.007113
20,0.002652,0.00258,0.029809,0.000391,0.007113
50,0.002512,0.00247,0.029343,0.000391,0.007113
100,0.002363,0.002345,0.027922,0.000391,0.007113
Mean,0.004693,0.004377,0.036854,0.000391,0.007113
Rank,3.142857,2.142857,5.0,1.0,3.714286


In [44]:
tables["ece"]

Unnamed: 0_level_0,RF_pet,RF_l,RF_ct,LR,SVM
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,0.032373,0.029776,0.1655,0.01863,0.040923
3,0.033089,0.0306,0.152,0.01863,0.040923
5,0.02771,0.024153,0.142333,0.01863,0.040923
10,0.025013,0.025648,0.135233,0.01863,0.040923
20,0.020951,0.020809,0.128883,0.01863,0.040923
50,0.017936,0.019828,0.131353,0.01863,0.040923
100,0.019516,0.020255,0.127307,0.01863,0.040923
Mean,0.025227,0.024438,0.140373,0.01863,0.040923
Rank,2.428571,2.428571,5.0,1.142857,4.0


In [45]:
tables["brier"]

Unnamed: 0_level_0,RF_pet,RF_l,RF_ct,LR,SVM
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,0.153167,0.152317,0.19325,0.144111,0.15067
3,0.150201,0.14976,0.186,0.144111,0.15067
5,0.147769,0.14763,0.180493,0.144111,0.15067
10,0.146953,0.146844,0.174077,0.144111,0.15067
20,0.146588,0.146595,0.172662,0.144111,0.15067
50,0.146399,0.14647,0.172168,0.144111,0.15067
100,0.146189,0.146279,0.170458,0.144111,0.15067
Mean,0.148181,0.147985,0.178444,0.144111,0.15067
Rank,2.714286,2.571429,5.0,1.0,3.714286
