# Libs and pre-definitions

### Bibliotecas padrões python e utils pytorch

In [1]:
import torch
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
import torchvision.transforms as transforms
from torchvision.transforms import ToTensor, Lambda, Compose, Normalize
from collections import defaultdict
from torch.utils.data import random_split
import copy

In [2]:
# Define o computador utilizado como cuda (gpu) se existir ou cpu caso contrário
print(torch.cuda.is_available())
dev = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

True


### Bibliotecas desenvolvidas

https://github.com/lfpc/Uncertainty_Estimation

In [3]:
from NN_utils import *
from NN_utils.train_and_eval import *
from uncertainty import train_NN_with_g
from uncertainty.losses import penalized_uncertainty
from NN_models import Model_CNN
import uncertainty.comparison as unc_comp
import uncertainty.quantifications as unc

ImportError: cannot import name 'correct_total' from partially initialized module 'NN_utils.train_and_eval' (most likely due to a circular import) (/home/luis-felipe/anaconda3/lib/python3.8/site-packages/NN_utils/train_and_eval.py)

## Data download and transforms

In [4]:
transforms_train = transforms.Compose([
                    transforms.ToTensor(),
                    transforms.RandomCrop(32, padding=4),
                    transforms.RandomHorizontalFlip(),
                    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
transforms_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),])

In [5]:
training_data = datasets.CIFAR10(
root="data",
 train=True,
 download=True,
transform=transforms_train)

test_data = datasets.CIFAR10(
root="data",
train=False,
download=True,
transform=transforms_test)

train_size = int(0.85*len(training_data))
val_size = len(training_data) - train_size
training_data, validation_data = random_split(training_data, [train_size, val_size])

validation_data = copy.deepcopy(validation_data)
validation_data.dataset.transform = transforms_test

Files already downloaded and verified
Files already downloaded and verified


In [6]:
batch_size = 100
train_dataloader = DataLoader(training_data, batch_size=batch_size,shuffle = True)
validation_dataloader = DataLoader(validation_data, batch_size=batch_size,shuffle = False)
test_dataloader = DataLoader(test_data, batch_size=100)

In [7]:
len(train_dataloader)

425

## NN classes and Trainer class

### Definição da classe da rede neural

In [None]:
# Define model
class Model_CNN(nn.Module):
    """CNN."""

    def __init__(self,n_classes=10):
        """CNN Builder."""
        super().__init__()
        
        conv_layer = [

            # Conv Layer block 1
            nn.Conv2d(in_channels=3, out_channels=int(16), kernel_size=3, padding=1),
            nn.BatchNorm2d(int(16)),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=int(16), out_channels=int(32), kernel_size=3, padding=1),
            nn.Dropout(p=0.2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

        ]
        
        fc_layer = [
            nn.Flatten(),
            nn.Dropout(p=0.2),
            nn.Linear(8192, int(1024)),
            nn.ReLU(inplace=True),
            nn.Linear(int(1024), int(512)),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.2)]
        
        main_layer = conv_layer+fc_layer
        
        self.main_layer = nn.Sequential(*main_layer)
        
        self.classifier_layer = nn.Sequential(
            nn.Linear(int(512), n_classes),
            nn.LogSoftmax(dim=1)
        )


    def forward(self, x):
        """Perform forward."""
    
        x = self.main_layer(x)


        y = self.classifier_layer(x)

        y = y.float()
        if not self.training:
            y = torch.exp(y)
        

        
        return y


In [None]:
# Define model
class Model_CNN_with_g(nn.Module):
    """CNN."""

    def __init__(self,n_classes=10):
        """CNN Builder."""
        super().__init__()
        
        self.return_g = True
        
        conv_layer = [

            # Conv Layer block 1
            nn.Conv2d(in_channels=3, out_channels=int(16), kernel_size=3, padding=1),
            nn.BatchNorm2d(int(16)),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=int(16), out_channels=int(32), kernel_size=3, padding=1),
            nn.Dropout(p=0.2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

        ]
        
        fc_layer = [
            nn.Flatten(),
            nn.Dropout(p=0.2),
            nn.Linear(8192, int(1024)),
            nn.ReLU(inplace=True),
            nn.Linear(int(1024), int(512)),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.2)]
        
        
        main_layer = conv_layer+fc_layer
        
        self.main_layer = nn.Sequential(*main_layer)
        

        self.classifier_layer = nn.Sequential(
            nn.Linear(int(512), n_classes),
            nn.LogSoftmax(dim=1)
        )
        
        '''self.fc_g_layer = nn.Sequential(
            
            nn.Linear(int(512), 1),
            nn.Sigmoid()
        )'''
        
        self.g_layer = nn.Sequential(
            
            nn.Linear(int(512), n_classes),
            nn.Softmax(dim=1)
        )


    def forward(self, x):
        """Perform forward."""
    
        x = self.main_layer(x)


        y = self.classifier_layer(x)

        self.g = self.g_layer(x)
        self.g = torch.max(self.g,dim=1).values

        self.g = (self.g).float()
        y = y.float()

        
        if self.return_g:
            return y,self.g
        else:
            return y
    
    def get_g(self):
        return self.g

In [None]:
class Model_CNN_with_g_2(nn.Module):
    """CNN."""

    def __init__(self,n_classes=10):
        """CNN Builder."""
        super().__init__()
        
        self.return_g = True
        
        conv_layer = [

            # Conv Layer block 1
            nn.Conv2d(in_channels=3, out_channels=int(16), kernel_size=3, padding=1),
            nn.BatchNorm2d(int(16)),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=int(16), out_channels=int(32), kernel_size=3, padding=1),
            nn.Dropout(p=0.2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

        ]
        
        fc_layer = [
            nn.Flatten(),
            nn.Dropout(p=0.2),
            nn.Linear(8192, int(1024)),
            nn.ReLU(inplace=True),
            nn.Linear(int(1024), int(512)),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.2)]
        
        
        main_layer = conv_layer+fc_layer
        
        self.main_layer = nn.Sequential(*main_layer)
        

        self.classifier_layer = nn.Sequential(
            nn.Linear(int(512), n_classes),
            nn.LogSoftmax(dim=1)
        )
        
        '''self.fc_g_layer = nn.Sequential(
            
            nn.Linear(int(512), 1),
            nn.Sigmoid()
        )'''
        
        self.g_layer = nn.Sequential(
            
            nn.Linear(n_classes, 64),
            nn.ReLU(inplace=True), #tanh
            nn.Dropout(p=0.2),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )


    def forward(self, x):
        """Perform forward."""
    
        x = self.main_layer(x)


        y = self.classifier_layer(x)

        self.g = self.g_layer(y)

        self.g = (self.g).float()
        y = y.float()

        
        if self.return_g:
            return y,self.g
        else:
            return y
    
    def get_g(self):
        return self.g

In [None]:
class Model_CNN_with_g_selective(nn.Module):
    """CNN."""

    def __init__(self,n_classes=10):
        """CNN Builder."""
        super().__init__()
        
        self.return_g = True
        self.h = 0
        
        conv_layer = [

            # Conv Layer block 1
            nn.Conv2d(in_channels=3, out_channels=int(16), kernel_size=3, padding=1),
            nn.BatchNorm2d(int(16)),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=int(16), out_channels=int(32), kernel_size=3, padding=1),
            nn.Dropout(p=0.2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

        ]
        
        fc_layer = [
            nn.Flatten(),
            nn.Dropout(p=0.2),
            nn.Linear(8192, int(1024)),
            nn.ReLU(inplace=True),
            nn.Linear(int(1024), int(512)),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.2)]
        
        
        main_layer = conv_layer+fc_layer
        
        self.main_layer = nn.Sequential(*main_layer)
        

        self.classifier_layer = nn.Sequential(
            nn.Linear(int(512), n_classes),
            nn.LogSoftmax(dim=1)
        )
        
        '''self.fc_g_layer = nn.Sequential(
            
            nn.Linear(int(512), 1),
            nn.Sigmoid()
        )'''
        
        self.g_layer = nn.Sequential(
            
            nn.Linear(512, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 1),
            nn.ReLU(inplace=True),
            nn.Sigmoid()
        )
        
        self.auxiliary_layer = nn.Sequential(
            nn.Linear(int(512), n_classes),
            nn.LogSoftmax(dim=1)
        )


    def forward(self, x):
        """Perform forward."""
    
        x = self.main_layer(x)


        y = self.classifier_layer(x).float()

        self.g = self.g_layer(x)

        self.g = (self.g).float()
        
            
        self.h = self.auxiliary_layer(x).float()
        

        
        if self.return_g:
            return y,self.g
        else:
            return y
    
    def get_g(self):
        return self.g
    def get_h(self):
        return self.h

In [None]:
class Model_CNN_with_g_3(nn.Module):
    """CNN."""

    def __init__(self,n_classes=10):
        """CNN Builder."""
        super().__init__()
        
        self.return_g = True
        
        conv_layer = [

            # Conv Layer block 1
            nn.Conv2d(in_channels=3, out_channels=int(16), kernel_size=3, padding=1),
            nn.BatchNorm2d(int(16)),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=int(16), out_channels=int(32), kernel_size=3, padding=1),
            nn.Dropout(p=0.2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

        ]
        
        fc_layer = [
            nn.Flatten(),
            nn.Dropout(p=0.2),
            nn.Linear(8192, int(1024)),
            nn.ReLU(inplace=True),
            nn.Linear(int(1024), int(512)),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.2)]
        
        
        main_layer = conv_layer+fc_layer
        
        self.main_layer = nn.Sequential(*main_layer)
        

        self.classifier_layer = nn.Sequential(
            nn.Linear(int(512), n_classes),
            nn.LogSoftmax(dim=1)
        )
        
        '''self.fc_g_layer = nn.Sequential(
            
            nn.Linear(int(512), 1),
            nn.Sigmoid()
        )'''
        
        self.g_layer = nn.Sequential(
            
            nn.Linear(512, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 32),
            nn.ReLU(inplace=True),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )


    def forward(self, x):
        """Perform forward."""
    
        x = self.main_layer(x)


        y = self.classifier_layer(x)

        self.g = self.g_layer(x)

        self.g = (self.g).float()
    
            
        

        
        if self.return_g:
            return y,self.g
        else:
            return y
    
    def get_g(self):
        return self.g

In [None]:
class Model_CNN_with_g_4(nn.Module): #realizar concatenação de x com y e etc
    """CNN."""

    def __init__(self,n_classes=10):
        """CNN Builder."""
        super().__init__()
        
        self.return_g = True
        
        conv_layer = [

            # Conv Layer block 1
            nn.Conv2d(in_channels=3, out_channels=int(16), kernel_size=3, padding=1),
            nn.BatchNorm2d(int(16)),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=int(16), out_channels=int(32), kernel_size=3, padding=1),
            nn.Dropout(p=0.2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

        ]
        
        fc_layer = [
            nn.Flatten(),
            nn.Dropout(p=0.2),
            nn.Linear(8192, int(1024)),
            nn.ReLU(inplace=True),
            nn.Linear(int(1024), int(512)),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.2)]
        
        
        main_layer = conv_layer+fc_layer
        
        self.main_layer = nn.Sequential(*main_layer)
        

        self.classifier_layer = nn.Sequential(
            nn.Linear(int(512), n_classes),
            nn.LogSoftmax(dim=1)
        )
        
        '''self.fc_g_layer = nn.Sequential(
            
            nn.Linear(int(512), 1),
            nn.Sigmoid()
        )'''
        
        self.g_layer_1 = nn.Sequential(
            
            nn.Linear(512, 64),
            nn.ReLU(inplace=True),
            nn.Linear(64, 10)
        )
        self.g_layer_2 = nn.Sequential(
            
            nn.Linear(20, 32),
            nn.ReLU(inplace=True),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )


    def forward(self, x):
        """Perform forward."""
    
        x = self.main_layer(x)


        y = self.classifier_layer(x)

        self.g = self.g_layer_1(x)
        self.g = torch.cat((self.g,y),dim=1)
        self.g = self.g_layer_2(self.g)

        self.g = (self.g).float()
        y = y.float()
        
        if self.return_g:
            return y,self.g
        else:
            return y
    
    def get_g(self):
        return self.g

### Definição das classes de treinamento

In [None]:
class hist_train():

    '''Accumulate results while training. Every time update_hist() is called, 
    it evaluates the usefull metrics over the dataset data and stores it in a list.'''
    def __init__(self,model,loss_criterion,data, c = 1.0):
        
        self.model = model
        self.loss_criterion = loss_criterion
        self.data = data
        self.c = c #coverage
        
        self.acc_list = []
        self.loss_list = []
        if c<1:
            #acc_c represents accuracy when the c most uncertain samples are ignored
            self.acc_c_mcp = [] 
            self.acc_c_entropy = []

    
    def update_hist(self):
        '''Update acc_list's and loss_list.
        If coverage is defined (different than 1), updates acc_c lists'''
        
        dev = next(self.model.parameters()).device
        self.model.eval()
            
        with torch.no_grad():
            #y_pred and label are accumulated for all dataset so that accuracy by coverage can by calculated
            y_pred,label = accumulate_results(self.model,self.data)
            
            loss = self.loss_criterion(y_pred,label).item()
            acc = correct_total(y_pred,label)/label.size(0) #accuracy
            self.acc_list.append(acc)
            self.loss_list.append(loss)
            
            if self.c<1:
                #acc_c represents accuracy when the c most uncertain samples are ignored
                mcp = unc.MCP_unc(y_pred) #maximum softmax value
                ent = entropy(y_pred) #entropy of softmax
                self.acc_c_mcp.append(unc_comp.acc_coverage(y_pred,label,mcp,1-self.c))
                self.acc_c_entropy.append(unc_comp.acc_coverage(y_pred,label,ent,1-self.c))

            
class Trainer():
    '''Class for easily training/fitting a Pytorch's NN model. Creates 2 'hist' classes,
    keeping usefull metrics and values.'''
    def __init__(self,model,optimizer,loss_criterion,training_data,validation_data = None, c=1.0):

        self.model = model
        self.optimizer = optimizer
        self.loss_fn = loss_criterion
        self.epoch = 0
        
        self.hist_train = hist_train(model,loss_criterion,training_data, c=c)
        if validation_data is not None:
            self.hist_val = hist_train(model,loss_criterion,validation_data,c=c)
            

    def fit(self,data,n_epochs):
        for e in range(1,n_epochs+1):
            self.epoch += 1
            loss = train_NN(self.model,self.optimizer,data,self.loss_fn,1, print_loss = False) #model.train applied internally here
            print('Epoch ', self.epoch, ', loss = ', loss)

            self.hist_train.update_hist()
            try: self.hist_val.update_hist() #with try/except in case there is no validation hist class
            except: pass
            
    def update_hist(self):
        '''Updates hist classes.
        Usefull to use before training to keep pre-training values.'''
        self.hist_train.update_hist()
        try: self.hist_val.update_hist() #with try/except in case there is no validation hist class
        except: pass
        
            
            
class hist_train_g(hist_train):
     '''Accumulate results while training. Every time update_hist() is called, 
    it evaluates the usefull metrics over the dataset data and stores it in a list.
    Equal to hist_train class, but keeps g (uncertainty estimation) values'''
    def __init__(self,model,loss_criterion,data,c = 1.0):
        super().__init__(model,loss_criterion,data)
        
        self.c = c
        self.g_list = []
        if c>0:
            self.acc_c_g = []
            self.acc_c_mcp = []
            self.acc_c_entropy = []
            
    def update_hist(self):
        '''Update acc_list's and loss_list.
        Redefined so it update also g_list and (possibly) acc_c_g'''
        self.model.eval()
        with torch.no_grad():
            
            #output and label are accumulated for all dataset so that accuracy by coverage can by calculated
            output,label = accumulate_results_g(self.model,data)
            y_pred,g = output
            g = g.view(-1)
            
            loss = self.loss_criterion(output.to(dev),label.to(dev)).item()
            acc = correct_total(y_pred,label)/label.size(0)
            self.acc_list.append(acc)
            self.loss_list.append(loss)

            self.g_list.append(torch.mean(g).item())

            if self.c<1:
                #acc_c represents accuracy when the c most uncertain samples are ignored
                mcp = unc.MCP_unc(y_pred) #maximum softmax value
                ent = entropy(y_pred) #entropy of softmax
                self.acc_c_g.append(unc_comp.acc_coverage(y_pred,label,1-g,1-self.c))
                self.acc_c_mcp.append(unc_comp.acc_coverage(y_pred,label,mcp,1-self.c))
                self.acc_c_entropy.append(unc_comp.acc_coverage(y_pred,label,ent,1-self.c))


class Trainer_with_g(Trainer):
    '''Class for easily training/fitting a Pytorch's NN model. Creates 2 'hist' classes,
    keeping usefull metrics and values.
    Identical to Trainer class but with method for training only g's layers.'''
    def __init__(self,model,optimizer,loss_fn,training_data,validation_data = None, c = 0.8):
        super().__init__(model,optimizer,loss_fn,training_data,validation_data)
        
        self.hist_train = hist_train_g(model,loss_fn,training_data, c=c)
        if validation_data is not None:
            self.hist_val = hist_train_g(model,loss_fn,validation_data,c=c)

    def fit_g(self,data,n_epochs,ignored_layers = ['main_layer','classifier_layer']):
        '''Train only the layer specific for g, freezing (disables grad and set eval mode) the rest'''
        for e in range(1,n_epochs+1):
            self.epoch += 1
            self.model.train()
            #ignore_layers is applied every iteration because 'update_hist method set model to eval mode'
            ignore_layers(self.model,ignored_layers, reset = False) 
            train_NN(self.model,self.optimizer,data,self.loss_fn,n_epochs=1, print_loss = True,set_train_mode = False)
            self.hist_train.update_hist()
            try: self.hist_val.update_hist() #with try/except in case there is no validation hist class
            except: pass
        unfreeze_params(self.model) #unfreeze params to avoid future mistakes
        

# Testes e treinamentos

### Definição da perda

In [None]:
loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = penalized_uncertainty(loss_criterion,np.log(2))

In [None]:
class aux_loss_fs(nn.Module):
    
    def __init__(self,loss_criterion):
        super().__init__()
        self.L0 = 0
        self.criterion = loss_criterion
        
    def forward(self, output,y_true):
        y_pred,g = output
        g = g.view(-1)
        y_pred = torch.exp(y_pred)
        right = correct_class(y_pred,y_true).float()
        #loss = torch.square(g.view(-1)-MCP)
        loss = self.criterion(g,right)
        loss = torch.mean(loss)
        return loss
    
    def update_L0(self,new_L0):
        with torch.no_grad():
            self.L0 = new_L0

In [None]:
class aux_loss(nn.Module):
    
    def __init__(self,loss_criterion):
        super().__init__()
        self.L0 = 0
        self.criterion = loss_criterion
        
    def forward(self, y_pred,g,y_true):
        g = g.view(-1)
        y_pred = torch.exp(y_pred)
        MCP = unc.get_MCP(y_pred)
        loss = torch.square(g.view(-1)-MCP)
        loss = torch.mean(loss)
        return loss
    
    def update_L0(self,new_L0):
        with torch.no_grad():
            self.L0 = new_L0

In [None]:
def accumulate_results(model,data):
    '''Accumulate output (of model) and label of a entire dataset.'''
    with torch.no_grad():
        model.eval()
        dev = next(model.parameters()).device

        output_list = torch.Tensor([])
        label_list = torch.Tensor([])
        g_list = torch.Tensor([])

        for image,label in data:
            image = image.to(dev)

            output = model(image)
            g_bool = isinstance(output, tuple)
            
            if g_bool:
                output,g = output
                g = g.view(-1).cpu()
                g_list = torch.cat((g_list,g))

            label_list = torch.cat((label_list,label.cpu()))
            output_list = torch.cat((output_list,output.cpu()))
            
        if g_bool:    
            output_list = (output_list,g_list)
        
    return output_list,label_list.long()

def entropy(y_pred, reduction = 'none',eps = 1e-10):
    '''Returns the entropy of a probabilities tensor.'''
    
    entropy = -y_pred*torch.log(y_pred+eps)
    entropy = torch.sum(entropy,-1)
    
    
    if reduction == 'mean':
        entropy = torch.mean(entropy)
    elif reduction == 'sum':
        entropy = torch.sum(entropy)
        
    return entropy

### Treinamento dos modelos


#### Classificador

In [None]:
model = Model_CNN(10).cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
loss_criterion = nn.NLLLoss(reduction = 'mean')

model_trainer = Trainer(model,optimizer,loss_criterion, train_dataloader,validation_dataloader)
model_trainer.fit(train_dataloader,2000)
state_dict  = model.state_dict()

acc = model_acc(model,train_dataloader)
print('Conjunto de treinamento: acc = ', acc)
acc = model_acc(model,test_dataloader)
print('Conjunto de teste: acc = ', acc)

In [None]:
PATH = r'/home/luis-felipe/Uncertainty_Estimation/torch_models'
torch.save(model.state_dict(), PATH + 'model_classifier')

#### Perda padrão

In [None]:
model_1 = Model_CNN_with_g()
model_1 = model_1.to(dev)
optimizer = torch.optim.SGD(model_1.parameters(), lr=1e-3)

loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = penalized_uncertainty(loss_criterion,np.log(10))

model_trainer_1 = Trainer_with_g(model_1,optimizer,loss_fn, train_dataloader,validation_dataloader,c = 0.2)
model_trainer_1.fit_all(train_dataloader,200)
acc, g, bce = model_metrics(model_1,loss_criterion,train_dataloader)
print('Conjunto de treinamento: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')
acc, g, bce = model_metrics(model_1,loss_criterion,test_dataloader)
print('Conjunto de teste: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')

In [None]:

model_sep = Model_CNN_with_g()
model_sep = model_sep.to(dev)
model_sep.load_state_dict(state_dict,strict = False)
optimizer = torch.optim.SGD(model_sep.parameters(), lr=1e-3)
loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = penalized_uncertainty(loss_criterion,np.log(10))

model_trainer_sep = Trainer_with_g(model_sep,optimizer,loss_fn, train_dataloader,validation_dataloader,c = 0.2)
model_trainer_sep.hist_train = model_trainer.hist_train
model_trainer_sep.hist_val = model_trainer.hist_val
model_trainer_sep.hist_val.c = 0.2

#model_trainer_sep.fit(train_dataloader,40)
#model_trainer_sep.optimizer = torch.optim.SGD(model_sep.parameters(), lr=1e-2) #testar variações de lr
model_trainer_sep.fit_g(validation_dataloader,200)

acc, g, bce = model_metrics(model_sep,loss_criterion,train_dataloader)
print('Conjunto de treinamento: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')
acc, g, bce = model_metrics(model_sep,loss_criterion,test_dataloader)
print('Conjunto de teste: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')

In [None]:
model_2 = Model_CNN_with_g_2()
model_2 = model_2.to(dev)
optimizer = torch.optim.SGD(model_2.parameters(), lr=1e-3)
loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = penalized_uncertainty(loss_criterion,np.log(10))

model_trainer_2 = Trainer_with_g(model_2,optimizer,loss_fn, train_dataloader,validation_dataloader,c = 0.2)
model_trainer_2.fit_all(train_dataloader,80)

acc, g, bce = model_metrics(model_2,loss_criterion,train_dataloader)
print('Conjunto de treinamento: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')
acc, g, bce = model_metrics(model_2,loss_criterion,test_dataloader)
print('Conjunto de teste: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')

In [None]:
#modelo 2 # testar com tanh
model_sep_2 = Model_CNN_with_g_2()
model_sep_2 = model_sep_2.to(dev)
model_sep_2.load_state_dict(state_dict,strict = False)
optimizer = torch.optim.SGD(model_sep_2.parameters(), lr=1e-3)
loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = penalized_uncertainty(loss_criterion,np.log(10))

model_trainer_sep_2 = Trainer_with_g(model_sep_2,optimizer,loss_fn, train_dataloader,validation_dataloader,c = 0.2)
model_trainer_sep_2.hist_train.acc_list = copy.copy(model_trainer.hist_train.acc_list)
model_trainer_sep_2.hist_train.loss_list = copy.copy(model_trainer.hist_train.loss_list)
model_trainer_sep_2.hist_val.acc_list = copy.copy(model_trainer.hist_val.acc_list)
model_trainer_sep_2.hist_val.loss_list = copy.copy(model_trainer.hist_val.loss_list)
#model_trainer_sep_2.fit(train_dataloader,40)
model_trainer_sep_2.fit_g(train_dataloader,200)

acc, g, bce = model_metrics(model_sep_2,loss_criterion,train_dataloader)
print('Conjunto de treinamento: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')
acc, g, bce = model_metrics(model_sep_2,loss_criterion,test_dataloader)
print('Conjunto de teste: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')

In [None]:
model_3 = Model_CNN_with_g_3()
model_3 = model_3.to(dev)
optimizer = torch.optim.SGD(model_3.parameters(), lr=1e-3)
loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = penalized_uncertainty(loss_criterion,np.log(10))

model_trainer_3 = Trainer_with_g(model_3,optimizer,loss_fn, train_dataloader,validation_dataloader,c = 0.2)
model_trainer_3.fit_all(train_dataloader,80)

acc, g, bce = model_metrics(model_3,loss_criterion,train_dataloader)
print('Conjunto de treinamento: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')
acc, g, bce = model_metrics(model_3,loss_criterion,test_dataloader)
print('Conjunto de teste: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')

In [None]:
#modelo 3
model_sep_3 = Model_CNN_with_g_3()
model_sep_3 = model_sep_3.to(dev)
model_sep_3.load_state_dict(state_dict,strict = False)
optimizer = torch.optim.SGD(model_sep_3.parameters(), lr=1e-3)
loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = penalized_uncertainty(loss_criterion,np.log(10))

model_trainer_sep_3 = Trainer_with_g(model_sep_3,optimizer,loss_fn, train_dataloader,validation_dataloader,c = 0.2)
model_trainer_sep_3.hist_train.acc_list = copy.copy(model_trainer.hist_train.acc_list)
model_trainer_sep_3.hist_train.loss_list = copy.copy(model_trainer.hist_train.loss_list)
model_trainer_sep_3.hist_val.acc_list = copy.copy(model_trainer.hist_val.acc_list)
model_trainer_sep_3.hist_val.loss_list = copy.copy(model_trainer.hist_val.loss_list)
#model_trainer_sep_2.fit(train_dataloader,40)
model_trainer_sep_3.fit_g(train_dataloader,200)

acc, g, bce = model_metrics(model_sep_3,loss_criterion,train_dataloader)
print('Conjunto de treinamento: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')
acc, g, bce = model_metrics(model_sep_3,loss_criterion,test_dataloader)
print('Conjunto de teste: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')

In [None]:
model_4 = Model_CNN_with_g_4()
model_4 = model_4.to(dev)
optimizer = torch.optim.SGD(model_4.parameters(), lr=1e-3)
loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = penalized_uncertainty(loss_criterion,np.log(10))

model_trainer_4 = Trainer_with_g(model_4,optimizer,loss_fn, train_dataloader,validation_dataloader,c = 0.2)
model_trainer_4.fit_all(train_dataloader,80)

acc, g, bce = model_metrics(model_4,loss_criterion,train_dataloader)
print('Conjunto de treinamento: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')
acc, g, bce = model_metrics(model_4,loss_criterion,test_dataloader)
print('Conjunto de teste: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')

In [None]:
#modelo 2
model_sep_4 = Model_CNN_with_g_4()
model_sep_4 = model_sep_4.to(dev)
model_sep_4.load_state_dict(state_dict,strict = False)
optimizer = torch.optim.SGD(model_sep_4.parameters(), lr=1e-3)
loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = penalized_uncertainty(loss_criterion,np.log(10))

model_trainer_sep_4 = Trainer_with_g(model_sep_4,optimizer,loss_fn, train_dataloader,validation_dataloader,c = 0.2)
model_trainer_sep_4.hist_train.acc_list = model_trainer.hist_train.acc_list
model_trainer_sep_4.hist_train.loss_list = model_trainer.hist_train.loss_list
model_trainer_sep_4.hist_val.acc_list = model_trainer.hist_val.acc_list
model_trainer_sep_4.hist_val.loss_list = model_trainer.hist_val.loss_list
#model_trainer_sep_2.fit(train_dataloader,40)
model_trainer_sep_4.fit_g(train_dataloader,200)

acc, g, bce = model_metrics(model_sep_4,loss_criterion,train_dataloader)
print('Conjunto de treinamento: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')
acc, g, bce = model_metrics(model_sep_4,loss_criterion,test_dataloader)
print('Conjunto de teste: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')

#### Perda adaptada

In [None]:
#modelo 2
model_sep_mcp = Model_CNN_with_g_2()
model_sep_mcp = model_sep_mcp.to(dev)
optimizer = torch.optim.SGD(model_sep_mcp.parameters(), lr=1e-3)
loss_fn = aux_loss(loss_criterion)#penalized_uncertainty(loss_criterion,np.log(10))

model_trainer_sep_mcp = Trainer_with_g(model_sep_mcp,optimizer,loss_fn, train_dataloader,validation_dataloader,c = 0.2)
model_trainer_sep_mcp.fit(train_dataloader,40)
model_trainer_sep_mcp.fit_g(validation_dataloader,200)

acc, g, bce = model_metrics(model_sep_mcp,loss_criterion,train_dataloader)
print('Conjunto de treinamento: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')
acc, g, bce = model_metrics(model_sep_mcp,loss_criterion,test_dataloader)
print('Conjunto de teste: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')

In [None]:
#modelo fs
model_sep_fs = Model_CNN_with_g_2()
model_sep_fs = model_sep_fs.to(dev)
model_sep_fs.load_state_dict(state_dict,strict = False)
optimizer = torch.optim.SGD(model_sep_fs.parameters(), lr=1e-3)
loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = aux_loss_fs(loss_criterion)#penalized_uncertainty(loss_criterion,np.log(10))

model_trainer_sep_fs = Trainer_with_g(model_sep_fs,optimizer,loss_fn, train_dataloader,validation_dataloader,c = 0.2)
#model_trainer_sep_fs.fit(train_dataloader,40)
model_trainer_sep_fs.hist_train.acc_list = copy.copy(model_trainer.hist_train.acc_list)
model_trainer_sep_fs.hist_train.loss_list = copy.copy(model_trainer.hist_train.loss_list)
model_trainer_sep_fs.hist_val.acc_list = copy.copy(model_trainer.hist_val.acc_list)
model_trainer_sep_fs.hist_val.loss_list = copy.copy(model_trainer.hist_val.loss_list)

model_trainer_sep_fs.loss_fn.criterion = nn.BCELoss()
model_trainer_sep_fs.fit_g(train_dataloader,900)

acc, g, bce = model_metrics(model_sep_fs,loss_criterion,train_dataloader)
print('Conjunto de treinamento: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')
acc, g, bce = model_metrics(model_sep_fs,loss_criterion,test_dataloader)
print('Conjunto de teste: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')

In [None]:
#modelo fs
model_sep_fs_3 = Model_CNN_with_g_3()
model_sep_fs_3 = model_sep_fs_3.to(dev)
model_sep_fs_3.load_state_dict(state_dict,strict = False)
optimizer = torch.optim.SGD(model_sep_fs_3.parameters(), lr=1e-3)
loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = aux_loss_fs(loss_criterion)#penalized_uncertainty(loss_criterion,np.log(10))

model_trainer_sep_fs_3 = Trainer_with_g(model_sep_fs_3,optimizer,loss_fn, train_dataloader,validation_dataloader,c = 0.2)
#model_trainer_sep_fs.fit(train_dataloader,40)
model_trainer_sep_fs_3.hist_train.acc_list = copy.copy(model_trainer.hist_train.acc_list)
model_trainer_sep_fs_3.hist_train.loss_list = copy.copy(model_trainer.hist_train.loss_list)
model_trainer_sep_fs_3.hist_val.acc_list = copy.copy(model_trainer.hist_val.acc_list)
model_trainer_sep_fs_3.hist_val.loss_list = copy.copy(model_trainer.hist_val.loss_list)

model_trainer_sep_fs_3.loss_fn.criterion = nn.BCELoss()
model_trainer_sep_fs_3.fit_g(train_dataloader,800)

acc, g, bce = model_metrics(model_sep_fs_3,loss_criterion,train_dataloader)
print('Conjunto de treinamento: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')
acc, g, bce = model_metrics(model_sep_fs_3,loss_criterion,test_dataloader)
print('Conjunto de teste: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')

In [None]:
#modelo fs
model_sep_fs_4 = Model_CNN_with_g_4()
model_sep_fs_4 = model_sep_fs_4.to(dev)
model_sep_fs_4.load_state_dict(state_dict,strict = False)
optimizer = torch.optim.SGD(model_sep_fs_4.parameters(), lr=1e-3)
loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = aux_loss_fs(loss_criterion)#penalized_uncertainty(loss_criterion,np.log(10))

model_trainer_sep_fs_4 = Trainer_with_g(model_sep_fs_4,optimizer,loss_fn, train_dataloader,validation_dataloader,c = 0.2)
#model_trainer_sep_fs.fit(train_dataloader,40)
model_trainer_sep_fs_4.hist_train.acc_list = model_trainer.hist_train.acc_list
model_trainer_sep_fs_4.hist_train.loss_list = model_trainer.hist_train.loss_list
model_trainer_sep_fs_4.hist_val.acc_list = model_trainer.hist_val.acc_list
model_trainer_sep_fs_4.hist_val.loss_list = model_trainer.hist_val.loss_list

model_trainer_sep_fs_4.loss_fn.criterion = nn.BCELoss()
model_trainer_sep_fs_4.fit_g(train_dataloader,800)

acc, g, bce = model_metrics(model_sep_fs,loss_criterion,train_dataloader)
print('Conjunto de treinamento: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')
acc, g, bce = model_metrics(model_sep_fs,loss_criterion,test_dataloader)
print('Conjunto de teste: acc = ', acc, 'média de g = ', g, 'média de bce = ', bce, '\n')

#### Perda selective

In [None]:
def entropy_const(w):
    H = torch.exp(entropy(w,reduction = 'sum'))/w.size(0)
    return H
normalize_tensor = (lambda x: torch.nn.functional.normalize(x, p=1,dim=-1))

mean_const = (lambda x: torch.mean(x, dim=-1))

def IPM_selectivenet(r,const,lamb = 32):
    #optimize x such that const >0
    gama = lamb*torch.square(torch.maximum(torch.tensor([0]).cuda(),const))
    objective = r + gama
    return objective
    
#implementar 3 abordagens para baseline do selective net:
#com cabeça H, com F no lugar da cabeça H, e sem cabeça auxiliar (artigo)
class selective_net_2(torch.nn.Module):
    def __init__(self,criterion,w_fn = normalize_tensor,c_fn = entropy_const,optim_method = IPM_selectivenet, c = 0.8,
                 alpha = 1.0, head = 'y',const_var = 'w'):
        super().__init__()

        self.criterion = criterion #criterion must have reduction set to 'none'
        self.w_fn = w_fn #transform applied to g
        self.c_fn = c_fn #transform applied to w that goes onto constraint
        self.optim_method = optim_method #transform applied to risk (loss) and constraint and returns a equivalent unconstrained objective
        self.c = c #coverage
        self.alpha = alpha
        self.head = head
        self.const_var = const_var
    
    def get_loss(self,y_pred,w,y_true):
        
        loss = w*self.criterion(y_pred,y_true)
        loss = torch.sum(loss)

        return loss

    def get_constraint(self,w): 
        H = self.c_fn(w) #must be >= c
        constraint = self.c - H #must be <=0
        return constraint

    def forward(self,output,y_true):
        
        y_pred,g = output
        g = g.view(-1)
        w = self.w_fn(g)
        
        loss = self.get_loss(y_pred,w,y_true)
        if self.const_var == 'w':
            const = self.get_constraint(w)
        elif self.const_var == 'g':
            const = self.get_constraint(g)
        if self.optim_method is not None:
            loss = self.optim_method(loss, const)
            
        if self.alpha != 1.0:
            w = self.w_fn(torch.ones([torch.numel(g)])).to(y_pred.device)
            if self.head == 'y':
                loss_h = self.get_loss(y_pred,w,y_true)
            else: 
                h = self.head()
                loss_h = self.get_loss(h,w,y_true) if (h.size(0) == y_true.size(0)) else 0
            loss = self.alpha*loss + (1-self.alpha)*loss_h
            

        return loss

In [None]:
model_selective_noconst = Model_CNN_with_g_3(10).cuda()
optimizer = torch.optim.SGD(model_selective_noconst.parameters(), lr=1e-3)
loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = selective_net_2(loss_criterion,optim_method = None)

model_trainer_selective_noconst = Trainer_with_g(model_selective_noconst,optimizer,loss_fn, train_dataloader,validation_dataloader,c = 0.2)
model_trainer_selective_noconst.fit(train_dataloader,500)

model_selective_noconst.return_g = False
acc = model_acc(model_selective_noconst,train_dataloader)
print('Conjunto de treinamento: acc = ', acc)
acc = model_acc(model_selective_noconst,test_dataloader)
print('Conjunto de teste: acc = ', acc)

In [None]:
model_selective = Model_CNN_with_g_3(10).cuda() #batch_size = 12
optimizer = torch.optim.SGD(model_selective.parameters(), lr=1e-3)
loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = selective_net_2(loss_criterion)

model_trainer_selective = Trainer_with_g(model_selective,optimizer,loss_fn, train_dataloader,validation_dataloader,c = 0.2)
model_trainer_selective.fit(train_dataloader,2000)

model_selective.return_g = False
acc = model_acc(model_selective,train_dataloader)
print('Conjunto de treinamento: acc = ', acc)
acc = model_acc(model_selective,test_dataloader)
print('Conjunto de teste: acc = ', acc)

In [None]:
model_selective_100 = Model_CNN_with_g_3(10).cuda() #batch_size = 100
optimizer = torch.optim.SGD(model_selective_100.parameters(), lr=1e-3)
loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = selective_net_2(loss_criterion)

model_trainer_selective_100 = Trainer_with_g(model_selective_100,optimizer,loss_fn, train_dataloader,validation_dataloader,c = 0.2)
model_trainer_selective_100.fit(train_dataloader,500)

model_selective_100.return_g = False
acc = model_acc(model_selective_100,train_dataloader)
print('Conjunto de treinamento: acc = ', acc)
acc = model_acc(model_selective_100,test_dataloader)
print('Conjunto de teste: acc = ', acc)

In [None]:
model_selective_sep_noconst = Model_CNN_with_g_3(10).cuda()
model_selective_sep_noconst.load_state_dict(state_dict,strict = False)
optimizer = torch.optim.SGD(model_selective_sep_noconst.parameters(), lr=1e-3)
loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = selective_net_2(loss_criterion,optim_method = None)

model_trainer_selective_sep_noconst = Trainer_with_g(model_selective_sep_noconst,optimizer,loss_fn, train_dataloader,validation_dataloader,c = 0.2)
model_trainer_selective_sep_noconst.fit_g(train_dataloader,500)

model_selective_sep_noconst.return_g = False
acc = model_acc(model_selective_sep_noconst,train_dataloader)
print('Conjunto de treinamento: acc = ', acc)
acc = model_acc(model_selective_sep_noconst,test_dataloader)
print('Conjunto de teste: acc = ', acc)

In [None]:
model_selective_sep = Model_CNN_with_g_3(10).cuda()
model_selective_sep.load_state_dict(state_dict,strict = False)
optimizer = torch.optim.SGD(model_selective_sep.parameters(), lr=1e-3)
loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = selective_net_2(loss_criterion)

model_trainer_selective_sep = Trainer_with_g(model_selective_sep,optimizer,loss_fn, train_dataloader,validation_dataloader,c = 0.2)
model_trainer_selective_sep.fit_g(train_dataloader,500)

model_selective_sep.return_g = False
acc = model_acc(model_selective_sep,train_dataloader)
print('Conjunto de treinamento: acc = ', acc)
acc = model_acc(model_selective_sep,test_dataloader)
print('Conjunto de teste: acc = ', acc)

In [None]:
model_selective_def_sep = Model_CNN_with_g_3(10).cuda()
model_selective_def_sep.load_state_dict(state_dict,strict = False)
optimizer = torch.optim.SGD(model_selective_def_sep.parameters(), lr=1e-3)
loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = selective_net_2(loss_criterion, c_fn = mean_const)

model_trainer_selective_def_sep = Trainer_with_g(model_selective_def_sep,optimizer,loss_fn, train_dataloader,validation_dataloader,c = 0.2)
model_trainer_selective_def_sep.fit_g(train_dataloader,500)

model_selective_def_sep.return_g = False
acc = model_acc(model_selective_def_sep,train_dataloader)
print('Conjunto de treinamento: acc = ', acc)
acc = model_acc(model_selective_def_sep,test_dataloader)
print('Conjunto de teste: acc = ', acc)

In [None]:
model_selective_def = Model_CNN_with_g_selective(10).cuda()
optimizer = torch.optim.SGD(model_selective_def.parameters(), lr=1e-3)
loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = selective_net_2(loss_criterion, c_fn = mean_const,alpha = 0.5,head = model_selective_def.get_h,const_var = 'g')

model_trainer_selective_def = Trainer_with_g(model_selective_def,optimizer,loss_fn, train_dataloader,validation_dataloader,c = 0.2)
model_trainer_selective_def.fit(train_dataloader,500)

model_selective_def.return_g = False
acc = model_acc(model_selective_def,train_dataloader)
print('Conjunto de treinamento: acc = ', acc)
acc = model_acc(model_selective_def,test_dataloader)
print('Conjunto de teste: acc = ', acc)

In [None]:
model_selective_def_head_f = Model_CNN_with_g_selective(10).cuda()
optimizer = torch.optim.SGD(model_selective_def_head_f.parameters(), lr=1e-3)
loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = selective_net_2(loss_criterion, c_fn = mean_const,alpha = 0.5,head = 'y',const_var = 'g')

model_trainer_selective_def_head_f = Trainer_with_g(model_selective_def_head_f,optimizer,loss_fn, train_dataloader,validation_dataloader,c = 0.2)
model_trainer_selective_def_head_f.fit(train_dataloader,1000)

model_selective_def_head_f.return_g = False
acc = model_acc(model_selective_def_head_f,train_dataloader)
print('Conjunto de treinamento: acc = ', acc)
acc = model_acc(model_selective_def_head_f,test_dataloader)
print('Conjunto de teste: acc = ', acc)

In [None]:
model_selective_def_no_head = Model_CNN_with_g_selective(10).cuda()
optimizer = torch.optim.SGD(model_selective_def_no_head.parameters(), lr=1e-3)
loss_criterion = nn.NLLLoss(reduction = 'none')
loss_fn = selective_net_2(loss_criterion, c_fn = mean_const,const_var = 'g')

model_trainer_selective_def_no_head = Trainer_with_g(model_selective_def_no_head,optimizer,loss_fn, train_dataloader,validation_dataloader,c = 0.2)
model_trainer_selective_def_no_head.fit(train_dataloader,1000)

model_selective_no_head.return_g = False
acc = model_acc(model_selective_def_no_head,train_dataloader)
print('Conjunto de treinamento: acc = ', acc)
acc = model_acc(model_selective_def_no_head,test_dataloader)
print('Conjunto de teste: acc = ', acc)

## Plots e análises

In [None]:
models = {model:model_trainer,
         model_selective: model_trainer_selective,
          model_selective_noconst: model_trainer_selective_noconst,
         model_selective_sep: model_trainer_selective_sep,
         model_selective_sep_noconst: model_trainer_selective_sep_noconst,
         model_selective_def:model_trainer_selective_def,
         model_selective_def_no_head:model_trainer_selective_def_no_head}#,
          #model_sep_2:model_trainer_sep_2,
          #model_sep_4:model_trainer_sep_4,
          #model_sep_fs:model_trainer_sep_fs,
          #model_sep_fs_3:model_trainer_sep_fs_3,
          #model_sep_fs_4:model_trainer_sep_fs_4,
          #model_sep_3:model_trainer_sep_3}

In [None]:
trainer.hist_val.acc_c_mcp[1000]

In [None]:
mod = model_selective_def
trainer = models[mod]
plt.plot(trainer.hist_val.acc_c_mcp, label = 'mcp - val')
plt.plot(trainer.hist_val.acc_c_g, label = 'g - val')
plt.plot(trainer.hist_val.acc_c_entropy, label = 'entropy - val')
plt.plot(trainer.hist_val.acc_list, label = 'acc_0 - val')
#plt.axhline(trainer.hist_val.acc_list[-1],color = 'tab:red', label = 'acc_0 - val')
#plt.xlim(0,250)
plt.grid()
plt.legend()
plt.show()

plt.plot(trainer.hist_train.acc_c_mcp, label = 'mcp - train')
plt.plot(trainer.hist_train.acc_c_g, label = 'g - train')
plt.plot(trainer.hist_train.acc_c_entropy, label = 'entropy - train')
plt.axhline(trainer.hist_train.acc_list[-1],color = 'r', label = 'acc_0 - train')
plt.grid()
plt.legend()
plt.show()

plt.plot(trainer.hist_val.g_list,label = 'g - val')
plt.plot(trainer.hist_train.g_list,label = 'g - train')
plt.title('variação da média de g')

plt.legend()
plt.grid()
plt.show()

plt.plot(trainer.hist_val.loss_list,label = 'Validation')
plt.plot(trainer.hist_train.loss_list,label = 'Training')
plt.grid()
plt.legend()
plt.show()

In [None]:
mod = model_selective_def
trainer = models[mod]
plt.plot(trainer.hist_val.acc_c_mcp,'--', label = 'mcp - val')
plt.plot(trainer.hist_val.acc_c_g,'--', label = 'g - val')
plt.plot(trainer.hist_val.acc_c_entropy,'--', label = 'entropy - val')
plt.plot(trainer.hist_val.acc_list,'--', label = 'acc_0 - val')
#plt.axhline(trainer.hist_val.acc_list[-1],color = 'tab:red', label = 'acc_0 - val')
mod = model_selective_def_no_head
trainer = models[mod]
plt.plot(trainer.hist_val.acc_c_mcp, label = 'mcp - val - noHead')
plt.plot(trainer.hist_val.acc_c_g, label = 'g - val - NoHead')
plt.plot(trainer.hist_val.acc_c_entropy, label = 'entropy - val - NoHead')
plt.plot(trainer.hist_val.acc_list, label = 'acc_0 - val - NoHead')

plt.xlim(0,200)
plt.grid()
plt.legend()
plt.show()

mod = model_selective_def
trainer = models[mod]
plt.plot(trainer.hist_train.acc_c_mcp,'--', label = 'mcp - train')
plt.plot(trainer.hist_train.acc_c_g,'--', label = 'g - train')
plt.plot(trainer.hist_train.acc_c_entropy,'--', label = 'entropy - train')
plt.plot(trainer.hist_train.acc_list,'--', label = 'acc_0 - train')

mod = model_selective_def_no_head
trainer = models[mod]
plt.plot(trainer.hist_train.acc_c_mcp, label = 'mcp - train - NoHead')
plt.plot(trainer.hist_train.acc_c_g, label = 'g - train - NoHead')
plt.plot(trainer.hist_train.acc_c_entropy, label = 'entropy - train - NoHead')
plt.plot(trainer.hist_train.acc_list, label = 'acc_0 - train - NoHead')
plt.xlim(0,200)
plt.grid()
plt.legend()
plt.show()

In [None]:
trainer = models[model]

In [None]:
mod = model
trainer = models[mod]
plt.plot(trainer.hist_val.acc_list, label = 'acc_fg - val')
plt.plot(trainer.hist_train.acc_list, label = 'acc_fg - train')
#plt.axhline(trainer.hist_val.acc_list[-1],color = 'tab:red', label = 'acc_0 - val')

mod = model
trainer = models[mod]
plt.plot(trainer.hist_train.acc_list, label = 'acc_f - train')
plt.plot(trainer.hist_val.acc_list, label = 'acc_f - val')
#plt.xlim(0,100)

plt.grid()
plt.legend()
plt.show()

In [None]:
mod = model
trainer = models[mod]
plt.plot(trainer.hist_train.acc_list, label = 'acc - train')
plt.plot(trainer.hist_val.acc_list, label = 'acc - val')
plt.grid()
plt.legend()
plt.show()

plt.plot(trainer.hist_train.loss_list,label = 'loss - Training')
plt.plot(trainer.hist_val.loss_list,label = 'loss - Validation')
plt.grid()
plt.legend()
plt.show()

In [None]:
mod = model
output,label = accumulate_results(mod,validation_dataloader)
#w = normalize_tensor(g)
#H = entropy_const(w)

In [None]:
H.item()

In [None]:
plt.hist(g)

In [None]:
mod = model_selective
mod.return_g = True
output,label = accumulate_results(mod,test_dataloader)

#acc = correct_total(output,label)/label.size(0)
#g_list = []
#mcp_list = []
ideal = []
for c in np.arange(0,1,0.05):
    g_list.append(unc_comp.acc_coverage(output,label,1-g,c))
    mcp = unc.MCP_unc(output)
    mcp_list.append(unc_comp.acc_coverage(output,label,mcp,c))
    ideal.append(min(1,acc/(1-c)))


In [None]:
ideal = []
mcp_list = []
for c in np.arange(0,1,0.05):
    mcp = unc.MCP_unc(output)
    mcp_list.append(unc_comp.acc_coverage(output,label,mcp,c))
    ideal.append(min(1,acc/(1-c)))


plt.plot(np.arange(0,1,0.05),mcp_list,label = 'mcp')
plt.plot(np.arange(0,1,0.05),ideal,label = 'ideal')
plt.grid()
plt.legend()
plt.show()

In [None]:
mcp_list[4]

In [None]:
from scipy.optimize import fsolve
def const_eq(lamb,g):
    w = nn.functional.softmax(lamb*g)
    m = torch.numel(g)
    H = entropy(w)
    const = H - torch.log(0.8*m)