In [8]:
%reload_ext autoreload
%autoreload 1
import torch 
import sys
sys.path.append('..')
from torch import nn 
from torch.nn import functional as F
from torch import optim
from utils.loader import load
# from models.vince_models import Net2B

In [9]:
# load the dataset
train_input,train_target, train_classes, test_input, test_target, test_classes = load()
train_target=train_target.float() # necessary ?
test_target=test_target.float()

In [35]:
# weight sharing + aux loss
class Net_shared_aux(nn.Module):

    def __init__(self, name = None):
            
        super(Net_shared_aux, self).__init__()
        self.name = name
        self.conv1 = nn.Conv2d(1, 32, kernel_size = 3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size = 3)
        self.fc1 = nn.Linear(256, 200)
        self.fc2 = nn.Linear(256, 10)
        self.fc3 = nn.Linear(400, 1)

        
        #self.dropout = nn.Dropout()
        self.sigmoid = nn.Sigmoid()  
        
    def forward(self, x):
        
        x_1 = x [:, 0, :, :].view(-1, 1, 14, 14) # channel 1
        x_2 = x [:, 1, :, :].view(-1, 1, 14, 14) # channel 2
    
        x_1 = F.relu(F.max_pool2d(self.conv1(x_1), kernel_size = 3, stride = 1))
        y_1 = F.relu(F.max_pool2d(self.conv2(x_1), kernel_size = 5, stride = 3))
        y_1 = F.relu(self.fc2(y_1.view(-1,256)))
        x_1 = F.relu(F.max_pool2d(self.conv2(x_1), kernel_size = 5, stride = 3))
        x_1 = F.relu(self.fc1(x_1.view(-1, 256)))
        
        x_2 = F.relu(F.max_pool2d(self.conv1(x_2), kernel_size = 3, stride = 1))
        y_2 = F.relu(F.max_pool2d(self.conv2(x_2), kernel_size = 5, stride = 3))
        y_2 = F.relu(self.fc2(y_2.view(-1,256)))
        x_2 = F.relu(F.max_pool2d(self.conv2(x_2), kernel_size = 5, stride = 3))
        x_2 = F.relu(self.fc1(x_2.view(-1, 256)))
    
        
        
        
        x_c = torch.cat([x_1, x_2], 1)
        x_c = self.fc3(x_c)
        
        
        return self.sigmoid(x_c), y_1, y_2
    

In [54]:
def train_binary(model, epoch, train_input, train_target, batch_size, 
                 criterion = nn.BCELoss(), optimizer = optim.SGD, 
                 eta = 1e-1, penalty = None, lambda_l2 = 1e-3):  
    # train a binary classifier by minimizing the binary cross entropy loss of a sigmoid output
    # not BCELossWithLogits because sigmoid activation of output already applied in model architecture
    model.train()
    optimizer = optimizer(model.parameters(), lr = eta)
    train_loss = 0
    
    for input_, target in (zip(train_input.split(batch_size), train_target.split(batch_size))):
        output = model(input_)
        batch_loss = criterion(output, target.unsqueeze(1))
        
        if penalty is not None:
            for p in model.parameters():
                batch_loss += lambda_l2 * p.pow(2).sum() # L2 penalty term
        optimizer.zero_grad()
        batch_loss.backward()
        optimizer.step()
        train_loss += batch_loss   
    print('Train Epoch: {}  | Loss {:.6f}'.format(
                epoch, train_loss.item()))
    
def train_aux(model, epoch, train_input, train_target, batch_size, train_classes,
                 criterion_binary = nn.BCELoss(), criterion_aux = nn.CrossEntropyLoss(), optimizer = optim.SGD, 
                 eta = 1e-1, penalty = None, lambda_l2 = 1e-3):  
    # train a binary classifier by minimizing the binary cross entropy loss of a sigmoid output
    # not BCELossWithLogits because sigmoid activation of output already applied in model architecture
    model.train()
    optimizer = optimizer(model.parameters(), lr = eta)
    train_loss = 0
    
    for input_, target, train_classes in (zip(train_input.split(batch_size), train_target.split(batch_size), train_classes.split(batch_size))):
        
        output, output_aux_1, output_aux_2 = model(input_)
        
        binary_loss = criterion_binary(output, target.unsqueeze(1))
        aux_loss_1 = criterion_aux(output_aux_1, train_classes[:,0])
        aux_loss_2 = criterion_aux(output_aux_1, train_classes[:,1])
        
        batch_loss = binary_loss + 0.3 * aux_loss_1 + 0.3 * aux_loss_2
        
        if penalty is not None:
            for p in model.parameters():
                batch_loss += lambda_l2 * p.pow(2).sum() # L2 penalty term
                
                
        optimizer.zero_grad()
        batch_loss.backward()
        optimizer.step()
        train_loss += batch_loss  
        
    print('Train Epoch: {}  | Loss {:.6f}'.format(
                epoch, train_loss.item()))
        
    

def test_binary(model, test_input, test_target, batch_size, criterion = nn.BCELoss()):
    # test a binary classifier using sigmoid followed by rounding ptobability to 0 or 1
    model.eval()
    test_loss = 0
    correct = 0
    
    with torch.no_grad():
        print('Model Name : {}'.format(model.name))
        
        for data, target in zip(test_input.split(batch_size), test_target.split(batch_size)):
            output = model(data)
            
            batch_loss = criterion(output[0], target.unsqueeze(1))
            test_loss += batch_loss
            pred = output 
            correct += torch.round(output[0]).squeeze().eq(target).sum() # only need to round because sigmoid already applied in model 
        print('\nTest set:  Loss: {:.4f}, Accuracy: {:.0f}%\n'.format(
        test_loss.item(), 100 * correct/len(test_target)))

In [55]:
###############################
######  Weight Sharing   ######
###############################

# initialize models and normalize data
mean, std = train_input.mean(), train_input.std()
train_input.sub_(mean).div_(std)
test_input.sub_(mean).div_(std)

model = Net_shared_aux('net2c_aux')



# train models
nb_epochs = 50

print('Model Name : {}'.format(model.name))

for epoch in range(1, nb_epochs):
    train_aux(model, epoch, train_input, train_target, 100,  train_classes )



    

Model Name : net2c_aux
Train Epoch: 1  | Loss 20.523861
Train Epoch: 2  | Loss 20.185600
Train Epoch: 3  | Loss 19.728657
Train Epoch: 4  | Loss 19.034885
Train Epoch: 5  | Loss 18.271309
Train Epoch: 6  | Loss 17.561296
Train Epoch: 7  | Loss 17.166363
Train Epoch: 8  | Loss 16.735151
Train Epoch: 9  | Loss 16.414862
Train Epoch: 10  | Loss 16.114281
Train Epoch: 11  | Loss 15.833653
Train Epoch: 12  | Loss 15.595093
Train Epoch: 13  | Loss 15.397440
Train Epoch: 14  | Loss 15.172255
Train Epoch: 15  | Loss 14.955202
Train Epoch: 16  | Loss 14.800822
Train Epoch: 17  | Loss 14.608245
Train Epoch: 18  | Loss 14.422678
Train Epoch: 19  | Loss 14.360229
Train Epoch: 20  | Loss 14.171050
Train Epoch: 21  | Loss 14.006302
Train Epoch: 22  | Loss 13.873783
Train Epoch: 23  | Loss 13.957229
Train Epoch: 24  | Loss 13.697421
Train Epoch: 25  | Loss 13.515837
Train Epoch: 26  | Loss 13.393350
Train Epoch: 27  | Loss 13.454119
Train Epoch: 28  | Loss 13.299150
Train Epoch: 29  | Loss 13.067602


In [56]:
test_binary(model, test_input, test_target, 100)

Model Name : net2c_aux

Test set:  Loss: 5.2153, Accuracy: 85%

