In [None]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from sklearn.isotonic import IsotonicRegression
from src.helpers.binnings import EqualWidthBinning
from src.calibrators.temp_scaling import TemperatureScaling
from src.calibrators.dir_scaling import Dirichlet_NN
from src.calibrators.neighborhood_calibrator import NeighborhoodCalibrator, kullback_leibler, minkowski_dist, uniform_weights_for_proportionally_k_closest

from sklearn.metrics import log_loss
from sklearn.preprocessing import normalize
from src.helpers.evaluation import bs

from src.helpers.figures import draw_cf_triangle
from src.helpers.load_save_data import generate_data

import pickle
import numpy as np
import copy

In [None]:
def true_cal_fun(p):
    cal_p = np.ones(p.shape)
    cal_p[:,0] = p[:,0]**0.8 + p[:,0] * p[:,1] / 5
    cal_p[:,1] = p[:,1] + p[:,0] * p[:,2] / 3
    cal_p[:,2] = p[:,2] + p[:,0] * p[:,1] / 10
    return cal_p / np.sum(cal_p, axis=1).reshape(-1, 1)

dir_dist = [0.5, 0.5, 0.5]
n_train = 5000
n_test = 100_000
n_seeds = 100

# Experiment and Figures

In [None]:
methods = ["uncal","true_cal_fun","hova","hova_lecd","dir","ts","lece"]
metrics = ["ll", "bs", "acc", "cw_ce", "cf_ce"]

all_results = {}
for method in methods:
    all_results[method] = {}
    for metric in metrics:
        all_results[method][metric] = []

for seed_idx in range(n_seeds):
    print(seed_idx)
    data_train = generate_data(dir_dist, n_train, true_cal_fun, random_seed=seed_idx)
    data_test = generate_data(dir_dist, n_test, true_cal_fun, random_seed=seed_idx+100)


    binning1 = EqualWidthBinning(data_train["p"][:, 0], data_train["y"][:, 0], n_bins=5)
    binning2 = EqualWidthBinning(data_train["p"][:, 1], data_train["y"][:, 1], n_bins=5)
    binning3 = EqualWidthBinning(data_train["p"][:, 2], data_train["y"][:, 2], n_bins=5)

    def hova(p):
        pred1 = binning1.eval_flat(p[:, 0])
        pred2 = binning2.eval_flat(p[:, 1])
        pred3 = binning3.eval_flat(p[:, 2])
        out = np.dstack([pred1, pred2, pred3])[0]
        return normalize(out, "l1")

    def hova_lecd(p):
        pred1 = binning1.eval_slope_1(p[:, 0])
        pred2 = binning2.eval_slope_1(p[:, 1])
        pred3 = binning3.eval_slope_1(p[:, 2])
        out = np.dstack([pred1, pred2, pred3])[0]
        return normalize(out, "l1")


    dr = Dirichlet_NN(use_logits=False)
    dr.fit(data_train["p"], data_train["y"])
    def dir_scaler(p):
        return dr.predict(p)


    ts = TemperatureScaling()
    ts.fit(np.log(data_train["p"]), data_train["y"])
    def temp_scaler(p):
        logits_p = np.log(p)
        return ts.predict(logits_p)


    lece = NeighborhoodCalibrator(weights_fun=uniform_weights_for_proportionally_k_closest, 
                                  threshold=0.0, 
                                  batch_size=500, 
                                  use_LECE_assumption=True, 
                                  distance_fun=kullback_leibler, 
                                  neighborhood_size=0.10)
    lece.fit(data_train["p"], data_train["y"])
    def lece_cal(p):
        return lece.predict(p)


    def uncal(p):
        return p


    funs = {"uncal": uncal,
            "true_cal_fun": true_cal_fun,
            "hova": hova,
            "hova_lecd": hova_lecd,
            "dir": dir_scaler,
            "ts": temp_scaler,
            "lece": lece_cal,
            }


    for fun_name, fun in funs.items():
        print(fun_name)
        cal_p_test = fun(data_test["p"])
        
        all_results[fun_name]["ll"].append(log_loss(data_test["y"], cal_p_test))
        all_results[fun_name]["bs"].append(bs(data_test["y"], cal_p_test)) 
        all_results[fun_name]["acc"].append(np.mean(data_test["y"].argmax(axis=1) == cal_p_test.argmax(axis=1)))
        all_results[fun_name]["cw_ce"].append(np.mean(np.abs(cal_p_test - data_test['c'])))
        all_results[fun_name]["cf_ce"].append(np.mean(np.abs(cal_p_test.max(axis=1) - data_test['c'].max(axis=1))))
        
    if seed_idx == 0:
        draw_cf_triangle(hova, data_test=data_test, 
                 title="(a) Histogram binning one-vs-rest\nwith LECD assumption (classical)", name="hova")
        draw_cf_triangle(hova_lecd, data_test=data_test, 
                         title="(b) Histogram binning one-vs-rest\nwith LECE assumption (new)", name="hova_lecd")
        draw_cf_triangle(temp_scaler, data_test=data_test, 
                         title="(a) Temperature scaling", name="ts")
        draw_cf_triangle(dir_scaler, data_test=data_test, 
                         title="(b) Dirichlet calibration", name="dir")
        draw_cf_triangle(lece_cal, data_test=data_test, 
                         title=f"(c) LECE calibration (new)", name=f"lece")
        draw_cf_triangle(true_cal_fun, data_test=data_test, 
                         title="(c) True calibration map", name=f"true_c", draw_axis=True)
        draw_cf_triangle(true_cal_fun, data_test=data_test, 
                         title="(d) True calibration map", name=f"true_d", draw_axis=True)

# Table

In [None]:
import pandas as pd
method_sel = ["hova", "hova_lecd", "ts", "dir", "lece", "true_cal_fun"] 
metrics_sel = ["cf_ce", "cw_ce", "bs", "ll", "acc"]
name_map = {"cf_ce":"confidence CE", 
            "cw_ce":"classwise CE", 
            "bs":"Brier score", 
            "ll":"log-loss",
            "acc":"accuracy"
           }

means = pd.DataFrame(data=all_results).applymap(np.mean).round(3).reindex(metrics_sel)[method_sel]
stds = pd.DataFrame(data=all_results).applymap(np.std).round(3).reindex(metrics_sel)[method_sel]

In [None]:
for row_idx in range(len(means)):
    print(name_map[means.iloc[row_idx].name], end=" & ")
    for method_idx in range(len(method_sel)):
        print(f"${means.iloc[row_idx,method_idx]}\pm{stds.iloc[row_idx,method_idx]}$", end=" & ")
    print("\\\\")