In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import matplotlib.cm as cm
import seaborn as sns
import pickle
from tqdm import tqdm

import sys
sys.path.insert(0, '../')

from utils.evaluation import read_base_results, read_drp_results, read_eval_results

sns.set()
sns.set_style('whitegrid')
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

%load_ext autoreload
%autoreload 2

## CIFAR-10

In [None]:
file_path = '../results/'

datasetname='cifar10-vgg16'
nepoch = 7
base_file = datasetname + '-base-' + str(nepoch)
bdrp_train_file = datasetname + '-bernoulli-dropout-50-0.008'
gdrp_train_file = datasetname + '-gaussian-dropout-50-0.1'

In [None]:
base_result = read_base_results(file_path+base_file)
bdrp_train_result = read_drp_results(file_path+bdrp_train_file)
gdrp_train_result = read_drp_results(file_path+gdrp_train_file)

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(6, 4))

# Bernoulli
sc = ax[0].scatter((bdrp_train_result['drp_test_loss']-base_result['test_loss']).mean(axis=1), bdrp_train_result['drp_test_acc'].mean(axis=1),s=0,c=bdrp_train_result['drp_list'], cmap='Blues')
clb = plt.colorbar(sc, ax=ax[0])
clb.set_label(r'Bernoulli rate $p$', rotation=270, labelpad=15)

norm = matplotlib.colors.Normalize(vmin=min(bdrp_train_result['drp_list']), vmax=max(bdrp_train_result['drp_list']), clip=True)
mapper = cm.ScalarMappable(norm=norm, cmap='Blues')
colors = np.array([(mapper.to_rgba(drp)) for drp in bdrp_train_result['drp_list']])

for i in range(len(bdrp_train_result['drp_list'])):
    temp = bdrp_train_result['drp_test_loss'][i, :]-base_result['test_loss']
    temp2 = bdrp_train_result['drp_test_acc'][i, :]
    ax[0].errorbar(x=temp.mean(), y=temp2.mean(), xerr=temp.std(), yerr=temp2.std(), fmt='s', capsize=3, capthick=3, c=colors[i])

ax[0].plot(0, base_result['test_acc'], marker='*', color='black', markersize='6', linestyle='', label='Base Model', zorder=10)
ax[0].set_ylabel('Accuracy')    

# Gaussian
sc = ax[1].scatter((gdrp_train_result['drp_test_loss']-base_result['test_loss']).mean(axis=1), gdrp_train_result['drp_test_acc'].mean(axis=1),s=0,c=gdrp_train_result['drp_list'], cmap='Reds')
clb = plt.colorbar(sc, ax=ax[1])
clb.set_label(r'Gaussian variance $\alpha$', rotation=270, labelpad=15)

norm = matplotlib.colors.Normalize(vmin=min(gdrp_train_result['drp_list']), vmax=max(gdrp_train_result['drp_list']), clip=True)
mapper = cm.ScalarMappable(norm=norm, cmap='Reds')
colors = np.array([(mapper.to_rgba(drp)) for drp in gdrp_train_result['drp_list']])

for i in range(len(gdrp_train_result['drp_list'])):
    temp = gdrp_train_result['drp_test_loss'][i, :]-base_result['test_loss']
    temp2 = gdrp_train_result['drp_test_acc'][i, :]
    ax[1].errorbar(x=temp.mean(), y=temp2.mean(), xerr=temp.std(), yerr=temp2.std(), fmt='s', capsize=3, capthick=3, c=colors[i])
    
ax[1].plot(0, base_result['test_acc'], marker='*', color='black', markersize='6', linestyle='', label='Base Model', zorder=10)
    
ax[1].set_xlabel(r'Loss difference $\varepsilon$')
ax[1].set_ylabel('Accuracy');
ax[1].legend(loc='lower left')

plt.tight_layout()
# plt.savefig(datasetname+'-bern-gaussian-loss-vs-acc-vs-parameter.png', format='png', dpi=300)

#### Metric Comparison

In [None]:
def read_eval_results(file):
    results = np.load(file+'.npz')
    eps_list = results['eps_list']
    vpr = results['vpr']
    score_var = results['score_var']
    rc = results['rc']
    amb = results['amb']
    disc = results['disc']
    disa_hat = results['disa_hat']

    vpr=np.sort(vpr, axis=1)
    score_var=np.sort(score_var, axis=1)
    rc=np.sort(2**rc, axis=1)
    disa_hat=np.sort(disa_hat, axis=1)

    output = {
        'eps_list': eps_list,
        'vpr': vpr,
        'score_var': score_var,
        'rc': rc,
        'amb': amb,
        'disc': disc,
        'disa_hat': disa_hat
    }

    return output

In [None]:
datasetname='cifar10-vgg16'
file_path = '../results/'
sampling_filename = datasetname+'-sampling-20-5,6,7,8,9-eval'
bern_filename = datasetname+'-bernoulli-dropout-50-0.008-eval'
gauss_filename = datasetname+'-gaussian-dropout-50-0.1-eval'

In [None]:
sampling_result = read_eval_results(file_path+sampling_filename)
bdrp_result = read_eval_results(file_path+bern_filename)
gdrp_result = read_eval_results(file_path+gauss_filename)

In [None]:
## 
fig, ax = plt.subplots(4, 4, figsize=(14, 7), gridspec_kw={'height_ratios': [1,2,2,2]})
plt.subplots_adjust(wspace=0.3, hspace=0.3)
n = bdrp_result['vpr'].shape[1]
cum = np.arange(1, n+1)/n

colors = cm.ocean(np.linspace(0, 1, len(bdrp_result['eps_list'])+1))

for i, eps in enumerate(bdrp_result['eps_list']):
    ## make them cdf
    ax[1, 0].plot(sampling_result['vpr'][i, :], cum, color=colors[i])
    ax[1, 1].plot(sampling_result['score_var'][i, :], cum, color=colors[i])
    ax[1, 2].plot(sampling_result['rc'][i, :], cum, color=colors[i])
    ax[1, 3].plot(sampling_result['disa_hat'][i, :], cum, color=colors[i], label=r'$\epsilon$ = {:.3f}'.format(eps))
    
    ax[2, 0].plot(bdrp_result['vpr'][i, :], cum, color=colors[i])
    ax[2, 1].plot(bdrp_result['score_var'][i, :], cum, color=colors[i])
    ax[2, 2].plot(bdrp_result['rc'][i, :], cum, color=colors[i])
    ax[2, 3].plot(bdrp_result['disa_hat'][i, :], cum, color=colors[i])
    
    ax[3, 0].plot(gdrp_result['vpr'][i, :], cum, color=colors[i])
    ax[3, 1].plot(gdrp_result['score_var'][i, :], cum, color=colors[i])
    ax[3, 2].plot(gdrp_result['rc'][i, :], cum, color=colors[i])
    ax[3, 3].plot(gdrp_result['disa_hat'][i, :], cum, color=colors[i])
    
ax[3, 0].set_xlabel('Viable Prediction Range')
ax[3, 1].set_xlabel('Score Variance')
ax[3, 2].set_xlabel('Rashomon Capacity')
ax[3, 3].set_xlabel('Disagreement')

ax[1, 0].set_ylabel(r'${\bf Re-training}$' '\n' 'CDF of Samples')
ax[2, 0].set_ylabel(r'${\bf Bernoulli}$' '\n' 'CDF of Samples')
ax[3, 0].set_ylabel(r'${\bf Gaussian}$' '\n' 'CDF of Samples')
# ax[3].set_ylabel('CDF of Samples')
ax[1, 3].legend(bbox_to_anchor=(0.5, 1.8), ncol=6, title='Rashomon Parameter')

ax[0, 0].axis('off');
ax[0, 1].axis('off');
ax[0, 2].axis('off');
ax[0, 3].axis('off');

plt.tight_layout()
# plt.savefig(datasetname+'-sampling-bern-gaussian-scores.png', format='png', dpi=300, bbox_inches='tight')

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(7, 2.5))
colors = ['maroon', 'olive', 'orange']
ms = 3

ax[0].plot(sampling_result['eps_list'], sampling_result['amb'], markersize=ms, marker='o', color=colors[0])
ax[0].plot(bdrp_result['eps_list'], bdrp_result['amb'], markersize=ms, marker='s', color=colors[1])
ax[0].plot(gdrp_result['eps_list'], gdrp_result['amb'], markersize=ms, marker='^', color=colors[2])
ax[0].set_xlabel(r'Rashomon Parameter $\epsilon$')
ax[0].set_ylabel('Ambiguity')

ax[1].plot(sampling_result['eps_list'], sampling_result['disc'], label='Re-training', markersize=ms, marker='o', color=colors[0])
ax[1].plot(bdrp_result['eps_list'], bdrp_result['disc'], label='Bernoulli', markersize=ms, marker='s', color=colors[1])
ax[1].plot(gdrp_result['eps_list'], gdrp_result['disc'], label='Gaussian', markersize=ms, marker='^', color=colors[2])
ax[1].set_xlabel(r'Rashomon Parameter $\epsilon$')
ax[1].set_ylabel('Discrepancy');
ax[1].legend(loc='center right')

plt.tight_layout()
# plt.savefig(datasetname+'-bern-gaussian-decision-based.png', format='png', dpi=300)