In [12]:
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from DataPreprocess import create_dataloader
from InitializingModule import InitModel
from TrainingAlgorithm import train, reorganize_module, LTS_module, multiclass_weight_tuning
from utils import BinaryFocalLossWithLogits

In [16]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=0.8, gamma=2, weight=None, size_average=True):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, inputs, targets, smooth=1):
        
        #comment out if your model contains a sigmoid or equivalent activation layer
        inputs = torch.sigmoid(inputs)       
        
        #flatten label and prediction tensors
        inputs = inputs.view(-1)
        targets = targets.view(-1)
        
        #first compute binary cross-entropy 
        BCE = F.binary_cross_entropy(inputs, targets, reduction='mean')
        BCE_EXP = torch.exp(-BCE)
        focal_loss = self.alpha * (1-BCE_EXP)**self.gamma * BCE
                       
        return focal_loss

In [27]:
y_true = torch.tensor([0.])
y_pred = torch.tensor([1.])
criterion = BinaryFocalLossWithLogits(alpha=0.25)
criterion(y_true, y_pred)

tensor(0.1733)

In [26]:
criterion = FocalLoss(alpha=0.25)
criterion(y_true, y_pred)

tensor(0.0433)

In [7]:
torch.sigmoid(y_pred)

tensor([0.1235])

In [127]:
train_loader, val_loader = create_dataloader(datapath='data/SPECT_data.txt', batch_size=128)

In [133]:
device = torch.device('cuda')
a = InitModel(input_size=22, hidden_size=5, output_size=1, device=device, batch_norm=True)
model = a.init_module_multi_ReLU_AE(train_loader)


In [149]:
sum(p.numel() for p in model.parameters())

131

In [148]:
1 == 1

True

In [146]:
a[:2]

[0, 1]

In [124]:
reg_optimizer = optim.Adam(model.parameters(), lr=5e-4)
weight_optimizer = optim.Adam(model.parameters(), lr=5e-4)
criterion = nn.BCEWithLogitsLoss()
_, index = LTS_module(train_loader, model, criterion, 0.5)
print(index)



model = reorganize_module(model=model,
                            train_loader=train_loader,
                            val_loader=val_loader,
                            criterion=criterion,
                            reg_epochs=5,
                            reg_optimizer=reg_optimizer,
                            reg_loss=None,
                            reg_eta=None,
                            weight_epochs=10,
                            weight_optimizer=weight_optimizer,
                            weight_loss=0.4,
                            weight_eta=None,
                            p=20*3,
                            device=device)

164
[ 1/5 ] | train_loss = 0.42140, train_acc = 0.85901, val_loss = 0.43815, val_acc = 0.70370
[ 2/5 ] | train_loss = 0.42022, train_acc = 0.86880, val_loss = 0.43722, val_acc = 0.70370
[ 3/5 ] | train_loss = 0.41914, train_acc = 0.86880, val_loss = 0.43616, val_acc = 0.70370
[ 4/5 ] | train_loss = 0.41802, train_acc = 0.86880, val_loss = 0.43500, val_acc = 0.70370
[ 5/5 ] | train_loss = 0.41696, train_acc = 0.86880, val_loss = 0.43391, val_acc = 0.70370
Already trained 5 epochs, acceptable
[ 1/10 ] | train_loss = 0.53132, train_acc = 0.75726, val_loss = 0.53903, val_acc = 0.83333
[ 2/10 ] | train_loss = 0.53132, train_acc = 0.75726, val_loss = 0.53903, val_acc = 0.83333
[ 3/10 ] | train_loss = 0.53132, train_acc = 0.75726, val_loss = 0.53903, val_acc = 0.83333
[ 4/10 ] | train_loss = 0.53132, train_acc = 0.75726, val_loss = 0.53903, val_acc = 0.83333
[ 5/10 ] | train_loss = 0.53132, train_acc = 0.75726, val_loss = 0.53903, val_acc = 0.79630
[ 6/10 ] | train_loss = 0.53132, train_acc =

In [134]:
wt_opt = optim.Adam(model.parameters(), lr=5e-4)
s, model = multiclass_weight_tuning(train_loader, val_loader, epochs=10, model=model, optimizer=wt_opt, criterion=criterion, loss_threshold=0.35, eta_threshold=1e-6, device=device)


[ 1/10 ] | train_loss = 0.62045, train_acc = 0.66530, val_loss = 0.61779, val_acc = 0.70370
[ 2/10 ] | train_loss = 0.61557, train_acc = 0.70253, val_loss = 0.61311, val_acc = 0.70370
[ 3/10 ] | train_loss = 0.60996, train_acc = 0.70055, val_loss = 0.60770, val_acc = 0.75926
[ 4/10 ] | train_loss = 0.60338, train_acc = 0.72013, val_loss = 0.60138, val_acc = 0.75926
[ 5/10 ] | train_loss = 0.59589, train_acc = 0.72992, val_loss = 0.59436, val_acc = 0.74074
[ 6/10 ] | train_loss = 0.58755, train_acc = 0.72992, val_loss = 0.58648, val_acc = 0.72222
[ 7/10 ] | train_loss = 0.57822, train_acc = 0.72992, val_loss = 0.57780, val_acc = 0.72222
[ 8/10 ] | train_loss = 0.56786, train_acc = 0.73773, val_loss = 0.56828, val_acc = 0.72222
[ 9/10 ] | train_loss = 0.55735, train_acc = 0.73971, val_loss = 0.55920, val_acc = 0.70370
[ 10/10 ] | train_loss = 0.54651, train_acc = 0.73382, val_loss = 0.54984, val_acc = 0.72222
Already trained 10 epochs, unacceptable
Unacceptable


In [8]:
def predict(model, data_loader, criterion, device):
    
    model.to(device).eval()
    mem = []
    with torch.no_grad():
        for i, data in enumerate(data_loader.dataset):
            x, y = data[0].unsqueeze(0), data[1]
            y_pred = torch.round(torch.sigmoid(model(x))).squeeze()
            loss = criterion(y, y_pred)
            
            if y_pred != y:
                mem.append((i, loss.item()))
                
    return max(mem, key=lambda mem: mem[1])[0]


def binary_acc(y_pred, y_true):
    y_pred_tag = torch.round(torch.sigmoid(y_pred))
    correct_result_sum = (y_pred_tag == y_true).sum().float()
    acc = correct_result_sum / y_true.shape[0]
    
    return acc


def evaluate(train_loader=None, model=None, criterion=None, device=None, binary=True):
    
    model.to(device).eval()
    valid_loss = []
    valid_accs = []

    for i, batch in enumerate(train_loader.dataset):
        x, y = batch[0].unsqueeze(0), batch[1].view(-1)
        
        with torch.no_grad():
            logits = model(x.to(device))
            
            if binary:
                loss = criterion(logits, y.to(device).unsqueeze(1))
                acc = binary_acc(logits, y.to(device).unsqueeze(1))
            else:
                loss = criterion(logits, y.to(device))
                acc = (logits.argmax(dim=-1) == y.to(device)).float().mean()
                
            valid_loss.append((loss.item()))
            valid_accs.append(acc)
    
    print(f'loss:{sum(valid_loss) / len(valid_loss)} | acc:{sum(valid_accs) / len(valid_accs)}')

In [9]:
mem = predict(model, train_loader, criterion, device)

In [10]:
mem

5

In [11]:
evaluate(train_loader, model, criterion, device)

loss:0.4477453765594903 | acc:0.8591549396514893


In [12]:
model.add_neuron(train_loader, mem)

In [13]:
evaluate(train_loader, model, criterion, device)

loss:0.4742723027585258 | acc:0.7840375900268555


In [14]:
num = predict(model, train_loader, criterion, device)

In [15]:
num

17

In [20]:
sum([p.numel() for p in model.layer_out.parameters()])

5

4

In [16]:
evaluate(train_loader, model, criterion, device)

loss:0.6724752671841724 | acc:0.6009389758110046


In [17]:
train_loader.dataset[0]

(tensor([-0.9144, -0.5755, -0.7678, -0.6481, -0.8149, -0.5538, -0.6190, -0.8637,
         -0.6922, -0.7756, -0.5755, -0.6262, -0.9586, -0.6774, -0.4808, -0.6701,
          2.4230, -0.3563,  1.6543, -0.6997, -0.7146, -0.7990], device='cuda:0'),
 tensor(1., device='cuda:0'))

In [17]:
model.layer_1.bias

Parameter containing:
tensor([  0.3140,  -0.0873,   0.1117, -21.0000], device='cuda:0',
       requires_grad=True)

tensor(1.2633)

In [8]:
a = torch.randn([1])

In [9]:
b = torch.randn([3])
b = b.view(1, 3)

In [10]:
b.shape

torch.Size([1, 3])

In [11]:
torch.cat((b, a.unsqueeze(1)), dim=1)

tensor([[-1.5384,  1.9720,  0.2452, -0.2344]])

In [12]:
a.unsqueeze(1).shape

torch.Size([1, 1])

In [13]:
b

tensor([[-1.5384,  1.9720,  0.2452]])