# Libs and pre-definitions

### Bibliotecas padrões python e utils pytorch

In [1]:
import torch
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
import torchvision.transforms as transforms
from torchvision.transforms import ToTensor, Lambda, Compose, Normalize
from collections import defaultdict
from torch.utils.data import random_split
import copy

In [2]:
# Define o computador utilizado como cuda (gpu) se existir ou cpu caso contrário
print(torch.cuda.is_available())
dev = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

True


### Bibliotecas desenvolvidas

https://github.com/lfpc/Uncertainty_Estimation

In [3]:
import NN_models as models
import uncertainty.comparison as unc_comp
import uncertainty.quantifications as unc
import uncertainty.losses as losses
import uncertainty.train_and_eval_with_g as TE_g
import NN_utils as utils
import NN_utils.train_and_eval as TE

## Data download and transforms

In [4]:
transforms_train = transforms.Compose([
                    transforms.ToTensor(),
                    transforms.RandomCrop(32, padding=4),
                    transforms.RandomHorizontalFlip(),
                    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
transforms_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),])

In [5]:
training_data = datasets.CIFAR10(
root="data",
 train=True,
 download=True,
transform=transforms_train)

test_data = datasets.CIFAR10(
root="data",
train=False,
download=True,
transform=transforms_test)

train_size = int(0.85*len(training_data))
val_size = len(training_data) - train_size
training_data, validation_data = random_split(training_data, [train_size, val_size])

validation_data = copy.deepcopy(validation_data)
validation_data.dataset.transform = transforms_test

Files already downloaded and verified
Files already downloaded and verified


In [6]:
batch_size = 100
train_dataloader = DataLoader(training_data, batch_size=batch_size,shuffle = True)
validation_dataloader = DataLoader(validation_data, batch_size=batch_size,shuffle = False)
test_dataloader = DataLoader(test_data, batch_size=100)

# Loss

In [8]:
from scipy.optimize import root_scalar,fsolve

def w_fn(g,lamb = 1, dim=-1):
    lamb = torch.as_tensor(lamb)
    w = torch.nn.functional.softmax(lamb*g,dim=dim)
    return w

def H_fn(g,lamb = 1):
    w = w_fn(g,lamb)
    H = entropy(w)
    return H
def H_const(lamb,*args):
    g,c = args
    H = H_fn(g,lamb)
    const = (torch.exp(H)/torch.numel(g))-c
    return const
def entropy(y):
    ent = torch.special.entr(y)
    ent = torch.sum(ent,dim=-1)
    return ent

class selective_net_lambda(torch.nn.Module):
    def __init__(self,criterion,w_fn = w_fn, c_fn = H_const,c = 0.8,alpha = 1.0, head = None):
        super().__init__()

        self.criterion = criterion #criterion must have reduction set to 'none'
        self.w_fn = w_fn #transform applied to g
        self.c_fn = c_fn #transform applied to w that goes onto constraint
        self.c = c #coverage
        self.alpha = alpha
        self.head = head
        self.lamb = 1

    
    def get_loss(self,y_pred,w,y_true):
        
        loss = w*self.criterion(y_pred,y_true)
        loss = torch.sum(loss) #sum? mean? When w is a normalization, is must be sum. How to generalize?
        return loss

    def update_lambda(self,g):
        try:
            solver = root_scalar(self.c_fn,bracket=[0, 1000],args = (g,self.c))
            self.lamb = torch.as_tensor(solver.root)
            if solver.flag != 'converged':
                print(f'solution not converged, flag = {solver.flag}, lamb = {self.lamb}; c_fn = {self.c_fn(self.lamb,*(g,self.c))}, entropy(w) = {entropy(self.w_fn(g))}')
        except:
            print(f'solution not converged, entropy(w) = {entropy(self.w_fn(g))}')
            self.lamb = 1
        return self.lamb

    def forward(self,output,y_true):
        
        y_pred,g = output
        g = g.view(-1)
        self.update_lambda(g)
        w = self.w_fn(g,self.lamb)
        
        loss = self.get_loss(y_pred,w,y_true)

        if self.head is None:
            loss_h = 0
        else:
            w = self.w_fn(torch.ones([torch.numel(g)]),dim=-1).to(y_pred.device)
            if self.head == 'y':
                loss_h = self.get_loss(y_pred,w,y_true)
            else: 
                h = self.head()
                loss_h = self.get_loss(h,w,y_true)
        loss = self.alpha*loss + (1-self.alpha)*loss_h

        return loss

In [9]:
loss_criterion = nn.NLLLoss(reduction = 'none')

loss_fn = losses.selective_net_2(loss_criterion,const_var = 'g')
from torch.nn.functional import softmax
def SM_fn(g,lamb = 1, dim = -1):
    lamb = torch.as_tensor(lamb)
    w = softmax(lamb*g,dim=dim)
    return w

risk_dict = {'empirical_risk':losses.selective_net_2(loss_criterion,optim_method = None,head = None,alpha = 1),
             'bce_risk':lambda x,label: torch.mean(loss_criterion(x[0],label)),
             'lamb':lambda x,label: torch.as_tensor(loss_fn.update_lambda(x[1].view(-1))),
             'entropy_w':lambda x,label: H_fn(x[1].view(-1)),
             'entropy_g':lambda x,label: entropy(x[1].view(-1)),
             'selective_risk_g':lambda x,label: unc_comp.selective_risk(x,label,unc_type = 'g'),
            'selective_risk_mcp':  lambda x,label: unc_comp.selective_risk(x[0],label,unc_type = unc.MCP_unc)}

# Train classifier

In [9]:
#model with auxiliary head 'h' defined paralell to the classifier.
model_h = models.Model_CNN_with_g_and_h(10).cuda()
optimizer = torch.optim.SGD(model_h.parameters(), lr=1e-3,momentum = 0.9)
loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = selective_net_lambda(loss_criterion,head = model_h.get_h,alpha = 0.5)

model_trainer_h = TE_g.Trainer_with_g(model_h,optimizer,loss_fn, train_dataloader,validation_dataloader,c=0.8,risk_dict = risk_dict)
model_trainer_h.fit(train_dataloader,1000)
#state_dict  = model_h.state_dict()

model_h.return_g = False
acc = TE.model_acc(model_h,train_dataloader)
print('Conjunto de treinamento: acc = ', acc)
acc = TE.model_acc(model_h,test_dataloader)
print('Conjunto de teste: acc = ', acc)

Epoch  1 , loss =  2.119809094036327
Epoch  2 , loss =  1.860163232017966
Epoch  3 , loss =  1.6998071984683767
Epoch  4 , loss =  1.5708331629809211
Epoch  5 , loss =  1.5060793332492604
Epoch  6 , loss =  1.44948775992674
Epoch  7 , loss =  1.4125572796428905
Epoch  8 , loss =  1.3777991034002866
Epoch  9 , loss =  1.346636940170737
Epoch  10 , loss =  1.3235096844504861
Epoch  11 , loss =  1.3008919558805578
Epoch  12 , loss =  1.281439556795008
Epoch  13 , loss =  1.2565149405423333
Epoch  14 , loss =  1.2337450608085183
Epoch  15 , loss =  1.210946527228636
Epoch  16 , loss =  1.1888541802238015
Epoch  17 , loss =  1.1661857024361106
Epoch  18 , loss =  1.1417782476369072
Epoch  19 , loss =  1.1230413464938893
Epoch  20 , loss =  1.102008107550004
Epoch  21 , loss =  1.090003691000097
Epoch  22 , loss =  1.0769697967697591
Epoch  23 , loss =  1.0576861657815821
Epoch  24 , loss =  1.0327976146866293
Epoch  25 , loss =  1.0190827850734487
Epoch  26 , loss =  1.0089263544363134
Epoc

In [10]:
#model with auxiliary head as the main classifier
model_f = models.Model_CNN_with_g(10).cuda()
optimizer = torch.optim.SGD(model_f.parameters(), lr=1e-3,momentum = 0.9)
loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = selective_net_lambda(loss_criterion,head = 'y',alpha = 0.5)

model_trainer_f = TE_g.Trainer_with_g(model_f,optimizer,loss_fn, train_dataloader,validation_dataloader,c=0.8,risk_dict = risk_dict)
model_trainer_f.fit(train_dataloader,1000)
#state_dict  = model_h.state_dict()

model_f.return_g = False
acc = TE.model_acc(model_f,train_dataloader)
print('Conjunto de treinamento: acc = ', acc)
acc = TE.model_acc(model_f,test_dataloader)
print('Conjunto de teste: acc = ', acc)

Epoch  1 , loss =  2.0774241885016944
Epoch  2 , loss =  1.8379268503189088
Epoch  3 , loss =  1.670777062247781
Epoch  4 , loss =  1.5561661052703857
Epoch  5 , loss =  1.48290640410255
Epoch  6 , loss =  1.4242725972568289
Epoch  7 , loss =  1.3760541957967423
Epoch  8 , loss =  1.3520144762712367
Epoch  9 , loss =  1.3137344542671652
Epoch  10 , loss =  1.2884511456770056
Epoch  11 , loss =  1.2579913879843319
Epoch  12 , loss =  1.2261771589166979
Epoch  13 , loss =  1.1988269561879774
Epoch  14 , loss =  1.1659209517871632
Epoch  15 , loss =  1.1415770464784958
Epoch  16 , loss =  1.1158651609981762
Epoch  17 , loss =  1.0924095967236687
Epoch  18 , loss =  1.0660857687276952
Epoch  19 , loss =  1.039584476106307
Epoch  20 , loss =  1.0241266252012813
Epoch  21 , loss =  1.0067657417409561
Epoch  22 , loss =  0.9972284021097071
Epoch  23 , loss =  0.9702965331077575
Epoch  24 , loss =  0.9585405091678395
Epoch  25 , loss =  0.9463856943915872
Epoch  26 , loss =  0.9341106957547805

In [10]:
#model with auxiliary head as the main classifier
model_nohead = models.Model_CNN_with_g(10).cuda()
optimizer = torch.optim.SGD(model_nohead.parameters(), lr=1e-3,momentum = 0.9,weight_decay = 5e-4)
loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = selective_net_lambda(loss_criterion)

model_trainer_nohead = TE_g.Trainer_with_g(model_nohead,optimizer,loss_fn, train_dataloader,validation_dataloader,c=0.8,risk_dict = risk_dict)
model_trainer_nohead.fit(train_dataloader,1000)
#state_dict  = model_h.state_dict()

model_nohead.return_g = False
acc = TE.model_acc(model_nohead,train_dataloader)
print('Conjunto de treinamento: acc = ', acc)
acc = TE.model_acc(model_nohead,test_dataloader)
print('Conjunto de teste: acc = ', acc)

Epoch  1 , loss =  2.055044204207028
Epoch  2 , loss =  1.748914537149317
Epoch  3 , loss =  1.5792239172318403
Epoch  4 , loss =  1.482293950249167
Epoch  5 , loss =  1.4089802612977869
Epoch  6 , loss =  1.356014035729801
Epoch  7 , loss =  1.3154626585455502
Epoch  8 , loss =  1.2768333153163685
Epoch  9 , loss =  1.2483469971488503
Epoch  10 , loss =  1.215786495068494
Epoch  11 , loss =  1.181398875432856
Epoch  12 , loss =  1.1557859973346485
Epoch  13 , loss =  1.130837792929481
Epoch  14 , loss =  1.0990178405537325
Epoch  15 , loss =  1.0795834042044248
Epoch  16 , loss =  1.050365505218506
Epoch  17 , loss =  1.0273243152394014
Epoch  18 , loss =  1.0051904646088095
Epoch  19 , loss =  0.9840950201539432
Epoch  20 , loss =  0.9709796498803531
Epoch  21 , loss =  0.943778307578143
Epoch  22 , loss =  0.9279851367894341
Epoch  23 , loss =  0.9073156731268939
Epoch  24 , loss =  0.897581466366263
Epoch  25 , loss =  0.8841318733551923
Epoch  26 , loss =  0.8639846654499278
Epoch

# Save models

In [12]:
import pickle
PATH = r'/home/luis-felipe/Uncertainty_Estimation/torch_models'
PATH_trainer = r'/home/luis-felipe/Uncertainty_Estimation/torch_models/trainer'
SUFIX = '_lambda'

'''model_trainer_h.hist_val.loss_criterion = None
model_trainer_h.hist_val.risk_dict = None
model_trainer_h.hist_train.loss_criterion = None
model_trainer_h.hist_train.risk_dict = None

model_trainer_f.hist_train.loss_criterion = None
model_trainer_f.hist_train.risk_dict = None
model_trainer_f.hist_val.loss_criterion = None
model_trainer_f.hist_val.risk_dict = None'''

model_trainer_nohead.hist_train.loss_criterion = None
model_trainer_nohead.hist_train.risk_dict = None
model_trainer_nohead.hist_val.loss_criterion = None
model_trainer_nohead.hist_val.risk_dict = None


'''torch.save(model_h.state_dict(), PATH + '/selective_h' + SUFIX)
with open(PATH_trainer + r"/hist_val_h_trainer"+SUFIX, "wb") as output_file:
    pickle.dump(model_trainer_h.hist_val,output_file)
with open(PATH_trainer + r"/hist_train_h_trainer"+SUFIX, "wb") as output_file:
    pickle.dump(model_trainer_h.hist_train,output_file)
    
torch.save(model_f.state_dict(), PATH + '/selective_f'+SUFIX)
with open(PATH_trainer + r"/hist_train_f_trainer"+SUFIX, "wb") as output_file:
    pickle.dump(model_trainer_f.hist_train,output_file)
with open(PATH_trainer + r"/hist_val_f_trainer"+SUFIX, "wb") as output_file:
    pickle.dump(model_trainer_f.hist_val,output_file)'''
    
torch.save(model_nohead.state_dict(), PATH + '/selective_nohead'+SUFIX)
with open(PATH_trainer + r"/hist_train_nohead_trainer"+SUFIX, "wb") as output_file:
    pickle.dump(model_trainer_nohead.hist_train,output_file)
with open(PATH_trainer + r"/hist_val_nohead_trainer"+SUFIX, "wb") as output_file:
    pickle.dump(model_trainer_nohead.hist_val,output_file)

# Plots

In [None]:
plt.plot(model_trainer_h.hist_val.acc_c_mcp,label = 'MCP')
plt.plot(model_trainer_h.hist_val.acc_list,label = 'ACC_0')
plt.plot(model_trainer_h.hist_val.acc_c_entropy,label = 'Entropy')
plt.plot(model_trainer_h.hist_val.acc_c_g, label = 'g')
plt.grid()
plt.legend()
plt.show()

In [None]:
for key in risk_dict:
    plt.plot(model_trainer_h.hist_val.risk[key],label = key)

plt.grid()
plt.legend()
plt.show()

for key in risk_dict:
    plt.plot(model_trainer_f.hist_val.risk[key],label = key)

plt.grid()
plt.legend()
plt.show()