# Import and pre-definitions

In [1]:
import os
MAIN_PATH = r'/home/luis-felipe'
DATA_PATH = os.path.join(MAIN_PATH,'data')
PATH_MODELS = os.path.join(MAIN_PATH,'torch_models')
FIGS_PATH = os.path.join(MAIN_PATH,'results','figs')

In [2]:
import torch
import matplotlib
import matplotlib.pyplot as plt
import numpy as np

In [3]:
# Define o computador utilizado como cuda (gpu) se existir ou cpu caso contrário
print(torch.cuda.is_available())
dev = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
torch.set_default_dtype(torch.float64)
torch.manual_seed(42)
np.random.seed(42)

True


In [4]:
import sys
sys.path.insert(1, '..')
sys.path.insert(1, '../..')

import models
from utils import measures,metrics
from data_utils import upload_logits, split
import pNormSoftmax
from collections import defaultdict

# Evaluate logits

In [8]:
MODEL_ARC = 'wide_resnet50_2'
DATASET = 'ImageNet'

In [9]:
logits,labels = upload_logits(MODEL_ARC,DATASET,PATH_MODELS, 
                            split = 'test', device = dev)

# Experiments

In [None]:
VAL_SIZE = 0.1 #Size of total hold-out samples 0.1 = 5000)
N_SPLITS_VAL_TEST = 5 #number of experiments of different splits of validation and test
N_SPLIT_SUB_VAL = 5 #How many experiments for each random subset of the holdout
SIZES_SUB = np.arange(0.02,1.0,0.02) #Hold-out sizes

METRIC = metrics.AURC #metric to be optimzied

In [None]:
exp_1 = []
exp_1_opt = defaultdict(list)
for r1 in range(N_SPLITS_VAL_TEST):
    logits_val,labels_val,logits_test,labels_test = split.split_logits(logits,labels,VAL_SIZE)
    loss_test = measures.wrong_class(logits_test,labels_test).float()
    T_star = pNormSoftmax.optimize.T(logits_test,loss_test, METRIC)
    p_star = pNormSoftmax.optimize.p(logits_test,loss_test, METRIC)
    pT_star = pNormSoftmax.optimize.p_and_beta(logits_test,loss_test,METRIC)
    exp_1_opt['baseline'].append(metrics.AUROC(loss_test,measures.MSP(logits_test)))
    exp_1_opt['T'].append(metrics.AUROC(loss_test,measures.MSP(logits_test.div(T_star))))
    exp_1_opt['p'].append(metrics.AUROC(loss_test,pNormSoftmax.pNormSoftmax(logits_test,p_star,None)))
    exp_1_opt['pT'].append(metrics.AUROC(loss_test,pNormSoftmax.pNormSoftmax(logits_test,pT_star[0],pT_star[1])))

    exp_2 = []
    for r2 in range(N_SPLITS_VAL_TEST):
        exp_3 = defaultdict(list)
        for val_size_sub in SIZES_SUB:
            logits_sub,labels_sub = split.split_logits(logits_val,labels_val,val_size_sub)[:2]
            loss_sub = measures.wrong_class(logits_sub,labels_sub).float()
            T = pNormSoftmax.optimize.T(logits_sub,loss_sub, METRIC)
            p = pNormSoftmax.optimize.p(logits_sub,loss_sub, METRIC)
            pT  = pNormSoftmax.optimize.p_and_beta(logits_sub,loss_sub,METRIC)
            exp_3['T'].append(metrics.AUROC(loss_test,measures.MSP(logits_test.div(T))))
            exp_3['p'].append(metrics.AUROC(loss_test,pNormSoftmax.pNormSoftmax(logits_test,p,pNormSoftmax.beta_heuristic(logits_sub,p))))
            exp_3['pT'].append(metrics.AUROC(loss_test,pNormSoftmax.pNormSoftmax(logits_test,pT[0],pT[1])))
        exp_2.append(exp_3)

    # when val_size_sub is 1.0, there is no need to run the experiment 5 times, since there is only 1 possible split.
    #Thus, the results for 1.0 are calculated separately to save time
    loss_val = measures.wrong_class(logits_val,labels_val).float()
    T = pNormSoftmax.optimize.T(logits_val,loss_val, METRIC)
    p = pNormSoftmax.optimize.p(logits_val,loss_val, METRIC)
    pT  = pNormSoftmax.optimize.p_and_beta(logits_val,loss_val,METRIC)
    for l in exp_2:
        l['T'].append(metrics.AUROC(loss_test,measures.MSP(logits_test.div(T))))
        l['p'].append(metrics.AUROC(loss_test,pNormSoftmax.pNormSoftmax(logits_test,p,None)))
        l['pT'].append(metrics.AUROC(loss_test,pNormSoftmax.pNormSoftmax(logits_test,pT[0],pT[1])))
    exp_1.append(exp_2)

In [None]:
sizes_sub = labels.size(0)*VAL_SIZE*np.r_[SIZES_SUB,1.0]
all_t = []
all_p = []
all_pT = []
for r1 in exp_1:
    for r2 in r1:
        all_t.append(r2['T'])
        all_p.append(r2['p'])
        all_pT.append(r2['pT'])
all_t = np.array(all_t)
all_p = np.array(all_p)
all_pT = np.array(all_pT)

In [None]:
means_T = all_t.mean(0)
means_p = all_p.mean(0)
means_pT = all_pT.mean(0)

std_T = all_t.std(0)
std_p = all_p.std(0)
std_pT = all_pT.std(0)

min_T = all_t.min(0)
min_p = all_p.min(0)
min_pT = all_pT.min(0)

max_T = all_t.max(0)
max_p = all_p.max(0)
max_pT = all_pT.max(0)

baseline_mean = np.mean(exp_1_opt['baseline'])
T_opt_mean = np.mean(exp_1_opt['T'])
p_opt_mean = np.mean(exp_1_opt['p'])
pT_opt_mean = np.mean(exp_1_opt['pT'])

baseline_min = np.min(exp_1_opt['baseline'])
T_opt_min = np.min(exp_1_opt['T'])
p_opt_min = np.min(exp_1_opt['p'])
pT_opt_min = np.min(exp_1_opt['pT'])

baseline_max = np.max(exp_1_opt['baseline'])
T_opt_max = np.max(exp_1_opt['T'])
p_opt_max = np.max(exp_1_opt['p'])
pT_opt_max = np.max(exp_1_opt['pT'])

In [None]:
PERCENTILES = (10,90)
per_T_0 = np.percentile(all_t,PERCENTILES[0],axis=0)
per_p_0 = np.percentile(all_p,PERCENTILES[0],axis=0)
per_pT_0 = np.percentile(all_pT,PERCENTILES[0],axis=0)

per_T_1 = np.percentile(all_t,PERCENTILES[1],axis=0)
per_p_1 = np.percentile(all_p,PERCENTILES[1],axis=0)
per_pT_1 = np.percentile(all_pT,PERCENTILES[1],axis=0)

per_T_0_opt = np.percentile(exp_1['T'],PERCENTILES[0],axis=0)
per_p_0_opt = np.percentile(exp_1['p'],PERCENTILES[0],axis=0)
per_pT_0_opt = np.percentile(exp_1['pT'],PERCENTILES[0],axis=0)

per_T_1_opt = np.percentile(exp_1['T'],PERCENTILES[1],axis=0)
per_p_1_opt = np.percentile(exp_1['p'],PERCENTILES[1],axis=0)
per_pT_1_opt = np.percentile(exp_1['pT'],PERCENTILES[1],axis=0)

# Plot

In [None]:
plt.figure(figsize=(8,6))
T_plot = plt.plot(sizes_sub,means_T, label = 'TS-AUROC', color = 'blue')
p_plot = plt.plot(sizes_sub,means_p, label = 'pNormSoftmax', color = 'red')
pT_plot = plt.plot(sizes_sub,means_pT, label = 'pNormSoftmax*', color = 'green')

#plt.fill_between(sizes_sub, min_p, max_p, facecolor=p_plot[0].get_color(), alpha=0., label = 'min-max')
#plt.fill_between(sizes_sub, min_pT, max_pT, facecolor='pT_plot[0].get_color(), alpha=0.5, label = 'min-max')

plt.fill_between(sizes_sub, per_p_0, per_p_1, facecolor=p_plot[0].get_color(), alpha=0.3)
plt.fill_between(sizes_sub, per_T_0, per_T_1, facecolor=T_plot[0].get_color(), alpha=0.3)
plt.fill_between(sizes_sub, per_pT_0, per_pT_1, facecolor=pT_plot[0].get_color(), alpha=0.3)

plt.axhline(baseline_mean,linestyle = '--', color = 'k')
plt.axhline(T_opt_mean,linestyle = '--', color = T_plot[0].get_color())
plt.axhline(p_opt_mean,linestyle = '--', color = p_plot[0].get_color())
plt.axhline(pT_opt_mean,linestyle = '--', color = pT_plot[0].get_color())

plt.axhline(per_pT_0_opt,linestyle = ':', color = pT_plot[0].get_color(),linewidth = 1.0)
plt.axhline(per_pT_1_opt,linestyle = ':', color = pT_plot[0].get_color(),linewidth = 1.0)

plt.axhline(per_T_0_opt,linestyle = ':', color = T_plot[0].get_color(),linewidth = 1.0)
plt.axhline(per_T_1_opt,linestyle = ':', color = T_plot[0].get_color(),linewidth = 1.0)

plt.axhline(per_p_0_opt,linestyle = ':', color = p_plot[0].get_color(),linewidth = 1.0)
plt.axhline(per_p_1_opt,linestyle = ':', color = p_plot[0].get_color(),linewidth = 1.0)

#plt.ylim(0.858,0.876)
plt.xlim(0,right=5000)
plt.xlabel('Hold-out Samples')
plt.ylabel('Test AUROC')
plt.legend()
plt.grid()
plt.savefig(os.path.join(FIGS_PATH, f'DataEfficiency_{MODEL_ARC}.pdf'), transparent = True, format = 'pdf',bbox_inches = 'tight')
plt.show()