In [3]:
import numpy as np
import pickle
import os
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

In [4]:
def bin_data(y, n_bins):
    """
    Partitions the data into ordered bins based on
    the probabilities. Returns the binned indices.
    """
    edges = np.linspace(0, 1, n_bins)
    bin_idx = np.digitize(y, edges, right=True)
    binned_idx = [np.where(bin_idx == i)[0] for i in range(n_bins)]
    
    return binned_idx
    
def bin_stats(y_true, y_proba, bin_idx):
    # mean accuracy within each bin
    bin_acc = [
        np.equal(np.argmax(y_proba[idx], axis=1), y_true[idx]).mean() if len(idx) > 0 else 0
        for idx in bin_idx
    ]
    # mean confidence of prediction within each bin
    bin_conf = [
        np.mean(np.max(y_proba[idx], axis=1)) if len(idx) > 0 else 0
        for idx in bin_idx 
    ]
    
    return np.asarray(bin_acc), np.asarray(bin_conf)

def ece(y_true, y_proba, n_bins):
    bin_idx = bin_data(y_proba.max(axis=1), n_bins)
    n = len(y_true)
    
    bin_acc, bin_conf = bin_stats(y_true, y_proba, bin_idx)
    bin_sizes = [len(idx) for idx in bin_idx]
    
    ece = np.sum(np.abs(bin_acc - bin_conf) * np.asarray(bin_sizes)) / n
    
    return ece

def mce(y_true, y_proba, n_bins):
    bin_idx = bin_data(y_proba.max(axis=1), n_bins)
    
    bin_acc, bin_conf = bin_stats(y_true, y_proba, bin_idx)
    print(bin_acc)
    print(bin_conf)
    mce = np.max(np.abs(bin_acc - bin_conf))
    
    return mce

def brier_score_mvloss(y_true, y_proba):
    if y_true.ndim == 1:
        y_true = np.squeeze(np.eye(len(np.unique(y_true)))[y_true.reshape(-1)])
    return np.mean(
        np.mean((y_proba - y_true)**2, axis=1)
    )

In [5]:
clfs = ['RF', 'IRF', 'SigRF', 'UF']

In [77]:
df_rows = []
task_ids = []
results_dir = './results/'
for file in os.listdir(results_dir):
    with open(results_dir + file, 'rb') as f:
        results_dict = pickle.load(f)
    
    score_means = [
    np.mean([
            brier_score_mvloss(
                results_dict['y'][idx], y_proba
            ) for y_proba, idx in zip(results_dict[name], results_dict['test_indices'])
        ]) for name in clfs
    ]

    score_stds = [
        np.std([
            brier_score_mvloss(
                results_dict['y'][idx], y_proba
            ) for y_proba, idx in zip(results_dict[name], results_dict['test_indices'])
        ]) for name in clfs
    ]

    row = [f'{m:.3f} ({s:.3f})' for m, s in zip(score_means, score_stds)]
    row = [results_dict['task'], results_dict['n_classes'], results_dict['n_samples'], results_dict['n_features']] + row
    task_ids.append(results_dict['task_id'])
    df_rows.append(row)

In [78]:
header = ['Dataset', 'n_classes', 'n_samples', 'n_features'] + clfs

In [79]:
score_df = pd.DataFrame(df_rows, columns=header)

In [84]:
score_df.to_csv('./figures/brier_scores.csv')

In [88]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(score_df)

                                   Dataset  n_classes  n_samples  n_features  \
0                                pendigits         10      10992          16   
1                                      jm1          2      10885          21   
2                                     sick          2       3772          29   
3                                  texture         11       5500          40   
4                            mfeat-zernike         10       2000          47   
5                                 spambase          2       4601          57   
6                       steel-plates-fault          7       1941          27   
7                          ozone-level-8hr          2       2534          72   
8                                      dna          3       3186         180   
9        GesturePhaseSegmentationProcessed          5       9873          32   
10                               connect-4          3      67557          42   
11                                     p