In [1]:
# imports
import sys
import pandas as pd
import numpy as np
sys.path.append('../../') # to access the files in higher directories
sys.path.append('../') # to access the files in higher directories
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import Data.data_provider as dp
import core as cal
from estimators.IR_RF_estimator import IR_RF
from sklearn.ensemble import RandomForestClassifier


In [2]:
# params
calib_methods = cal.calib_methods.copy()
metrics = ["acc", "logloss", "brier", "ece"]

data_list = ["spambase", "climate", "QSAR", "bank", "climate", "parkinsons", "vertebral", "ionosphere", "diabetes", "breast", "blod"]
# data_list = ["spambase", "climate"]

params = {
    "runs": 5,
    "n_estimators": 100,
    "oob": False,
    "test_split": 0.3,
    "calib_split": 0.1
}

In [3]:
calib_results_dict = {}

for data_name in data_list:

    # Data
    X, y = dp.load_data(data_name, "../../")
    
    data_dict = {} # results for each data set will be saved in here.
    for seed in range(params["runs"]): # running the same dataset multiple times
        # split the data
        data = cal.split_train_calib_test(data_name, X, y, params["test_split"], params["calib_split"], seed)
        models = {}
        # train model
        rf = RandomForestClassifier(n_estimators=params["n_estimators"], oob_score=params["oob"], random_state=seed).fit(data["x_train"], data["y_train"])
        models["RF_pet"] = rf

        # irrf_l = IR_RF(n_estimators=params["n_estimators"], oob_score=params["oob"], random_state=seed).fit(data["x_train"], data["y_train"])
        # models["RF_l"] = irrf_l

        irrf_ct = IR_RF(n_estimators=params["n_estimators"], oob_score=params["oob"], random_state=seed).fit(data["x_train"], data["y_train"])
        models["RF_ct"] = irrf_ct

        # calibration
        res = cal.model_calibration(models, data, metrics) # res is a dict with all the metrics results as well as RF probs and every calibration method decision for every test data point
        data_dict = cal.update_runs(data_dict, res) # calib results for every run for the same dataset is aggregated in data_dict (ex. acc of every run as an array)
    calib_results_dict.update(data_dict) # merge results of all datasets together

In [4]:
tables = cal.mean_and_ranking_table(calib_results_dict, metrics, models, data_list, mean_and_rank=True)

In [5]:
tables["brier"]

Unnamed: 0_level_0,RF_pet,RF_ct
Data,Unnamed: 1_level_1,Unnamed: 2_level_1
spambase,0.042282,0.042271
climate,0.070536,0.070536
QSAR,0.100068,0.100068
bank,0.008017,0.008017
climate,0.070536,0.070536
parkinsons,0.079268,0.079268
vertebral,0.105734,0.105734
ionosphere,0.056381,0.056381
diabetes,0.160032,0.160032
breast,0.042604,0.042604


In [6]:
tables["acc"]

Unnamed: 0_level_0,RF_pet,RF_ct
Data,Unnamed: 1_level_1,Unnamed: 2_level_1
spambase,0.949602,0.949747
climate,0.896296,0.896296
QSAR,0.863722,0.863722
bank,0.991748,0.991748
climate,0.896296,0.896296
parkinsons,0.901695,0.901695
vertebral,0.84086,0.84086
ionosphere,0.930189,0.930189
diabetes,0.765368,0.765368
breast,0.940351,0.940351


In [7]:
tables["ece"]

Unnamed: 0_level_0,RF_pet,RF_ct
Data,Unnamed: 1_level_1,Unnamed: 2_level_1
spambase,0.046962,0.047088
climate,0.057975,0.057975
QSAR,0.047836,0.047836
bank,0.020796,0.020796
climate,0.057975,0.057975
parkinsons,0.089695,0.089695
vertebral,0.081484,0.081484
ionosphere,0.066283,0.066283
diabetes,0.054442,0.055775
breast,0.03076,0.03076
