In [None]:
# Let's build Dolan-More curves, see:
# http://abelsiqueira.github.io/blog/introduction-to-performance-profile/

In [None]:
from pathlib import Path
import random

import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
%matplotlib inline

In [None]:
SEED = 10
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)

torch.cuda.set_device(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


In [None]:
folder = Path('./data/regression')
df = pd.read_csv(folder / 'experiment_results.csv').drop_duplicates()

In [None]:
emasks = []
masks = []
for msk in df.Mask.unique():
    if msk[:2] == 'e_':
        emasks.append(msk)
    elif msk != 'eue':
        masks.append(msk)
print(masks)
print(emasks)

In [None]:
dsets = []
for fname in df.fname:
    dsets.append(fname.split('_')[0])
df['dset'] = dsets

In [None]:
# %%time
large_data = []
metric = 'Acc'
for fname, df_temp in df.groupby('fname'):
    data = []
    assert len(df_temp[df_temp.Mask == 'eue']) == 1
    eue_performance = df_temp[df_temp.Mask == 'eue'][metric].values[0]
    dct_single = {'eue': eue_performance}
    for mask in masks:
        perf = df_temp[df_temp.Mask == mask][metric].values[0]
        dct_single[mask] = perf
    dct_single = {k: 1./v for k, v in dct_single.items()}
    data.append(dct_single)
    
    df_res = pd.DataFrame(data)
    normalized_values = df_res.values/df_res.min(axis = 1).values[:,None]
    df_res = pd.DataFrame(normalized_values, columns = df_res.columns)
    large_data.append(df_res)

In [None]:
df_res = pd.concat(large_data)

In [None]:
def make_dm_plot(df_res, masks, emasks,
                 metric = 'some metric', 
                 exper = 'some exper',
                 taus = np.linspace(1, 3, 101),
                 plotter = plt.plot,
                 **kwargs
                ):
    npp = len(df_res)
    for msk in masks:
        if 'eue' == msk:
            continue
        pas = []
        for tau in taus:
            pas.append(sum(df_res[msk].values <= tau)/npp)
        print(f'{msk}, {pas[0]:.2f}')
        plotter(taus, pas, label = msk,**kwargs)
    for msk in emasks:
        if 'eue' == msk:
            continue
        pas = []
        for tau in taus:
            pas.append(sum(df_res[msk].values <= tau)/npp)
        print(f'{msk}, {pas[0]:.2f}')
        plotter(taus, pas, label = msk,**kwargs)
    pas = []
    for tau in taus:
        pas.append(sum(df_res['eue'].values <= tau)/npp)
    print(f'{msk}, {pas[0]:.2f}')
    plotter(taus, pas, 'k', label = 'eue',**kwargs)

    plt.legend()
    plt.title(metric + '|' + exper)

In [None]:
plt.figure(figsize=(8,6))
masks = ['mc_dropout', 'decorrelating_sc', 'dpp', 'k_dpp', 
         'eue']
make_dm_plot(df_res[masks], masks, [], metric, f' ', 
             taus = np.linspace(1, 3, 101),lw=3, alpha=.8)
plt.grid()
plt.title('Accuracy for single NN UE')
plt.legend(['MC dropout',
            'decorrelation',
            'dpp', 'k_dpp', 'ensemble'
           ])
plt.xlabel(r'$\tau$')
plt.ylabel(r'$\rho_a(\tau$)')
plt.xlim(1, 3)
plt.savefig('../figures/dolan_acc_single.png', dpi = 600)


In [None]:
import seaborn as sns
cnt = 1
names_as_in_paper = [
    'boston', 'concrete', 'energy', 'kin8nm',
    'naval', 'ccpp', 'naval', 'red wine', 'yacht'
]


plt.figure(figsize=(10,20))
for dset in df.dset.unique():
    plt.subplot(4,2,cnt)
    dfx = df[(df.dset == dset)]
    g=sns.boxplot(x = 'LL', y = 'Mask', data = dfx)#, hue = 'dset')
    if cnt % 2 == 0:
        g.set(yticklabels=[])
    plt.grid()
    plt.title(names_as_in_paper[cnt-1])
    cnt += 1
plt.tight_layout()


In [None]:
import seaborn as sns
cnt = 1
names_as_in_paper = [
    'boston', 'concrete', 'energy', 'kin8nm',
    'naval', 'ccpp', 'naval', 'red wine', 'yacht'
]

plt.figure(figsize=(10,12))
for dset in df.dset.unique():
    plt.subplot(4,2,cnt)
    dfx = df[(df.dset == dset) & (df.Mask.isin(masks))]
    g=sns.boxplot(x = 'LL', y = 'Mask', data = dfx)
    print(dset)
    g.set(ylabel='')
    if cnt % 2 == 0:
        g.set(yticklabels=[])
    else:
        g.set(yticklabels=['MC dropout', 'decorrelation', 'dpp', 'k_dpp', 'eue'])
    plt.grid()
    plt.title(names_as_in_paper[cnt-1])
    cnt += 1
plt.tight_layout()
plt.savefig('../figures/LL_UCI_single.png', dpi = 600)

In [None]:
cnt = 1
names_as_in_paper = [
    'boston', 'concrete', 'energy', 'kin8nm',
    'naval', 'ccpp', 'naval', 'red wine', 'yacht'
]

plt.figure(figsize=(10,12))

for dset in df.dset.unique():
    plt.subplot(4,2,cnt)
    dfx = df[(df.dset == dset) & (df.Mask.isin(emasks))]
    g=sns.boxplot(x = 'LL', y = 'Mask', data = dfx)
    g.set(ylabel='')
    if cnt % 2 == 0:
        g.set(yticklabels=[])
    else:
        g.set(yticklabels=['MC dropout', 'decorrelation', 'dpp', 'k_dpp'])
    plt.grid()
    plt.title(names_as_in_paper[cnt-1])
    cnt += 1
plt.tight_layout()
plt.savefig('../figures/LL_UCI_ens.png', dpi = 600)