In [1]:
%reload_ext autoreload
%autoreload 1
import torch 
import sys
sys.path.append('..')
from torch import nn 
from torch.nn import functional as F
from torch import optim
from utils.loader import load
from utils.loader import PairSetMNIST
from torch.utils.data import Dataset, DataLoader

In [2]:
# load the dataset as a Dataset object
train_data = PairSetMNIST(train=True,swap_channel = True)
test_data  = PairSetMNIST(test=True)

In [3]:
class conv_block(nn.Module) :
    """
    basic 2d convolution with batch norm
    """
    
    def __init__(self, in_channels,out_channels,kernel_size = 1,stride =1, padding = 0) :
        super(conv_block,self).__init__()
        self.conv = nn.Conv2d(in_channels,out_channels,kernel_size,stride ,padding)
        self.bn = nn.BatchNorm2d(out_channels)
    
    def forward(self,x) :
        x = self.bn(self.conv(x))
        return x

In [4]:
class Inception_block(nn.Module):
    """
    Inception block with four different filters scale
    """
    def __init__(self,in_channels,channels_1x1,channels_3x3,channels_5x5,pool_channels):
        super(Inception_block, self).__init__()
        # 1x1 convolution
        self.conv1x1 = conv_block(in_channels,channels_1x1, kernel_size = 1)
        # 3x3 convolution factorized in 1x3 followed by 3x1
        self.conv3x3 = nn.Sequential(conv_block(in_channels,channels_3x3, kernel_size = 1),
                                     conv_block(channels_3x3, channels_3x3, kernel_size = (1,3), padding = (0,1)),
                                     conv_block(channels_3x3, channels_3x3, kernel_size = (3,1), padding = (1,0)))
        # 5x5 convolution factorized in two consecutive 3x3 implemented as above
        self.conv5x5 = nn.Sequential(conv_block(in_channels,channels_5x5, kernel_size = 1),
                                     conv_block(channels_5x5, channels_5x5, kernel_size = (1,3),padding =(0,1)),
                                     conv_block(channels_5x5, channels_5x5, kernel_size = (3,1), padding = (1,0)),
                                     conv_block(channels_5x5,channels_5x5, kernel_size = (1,3),padding=(0,1)),
                                     conv_block(channels_5x5, channels_5x5, kernel_size = (3,1),padding = (1,0)))
        # pooling layer 
        self.pool = nn.Sequential(nn.MaxPool2d(kernel_size=3, stride=1, padding=1, ceil_mode=True),
                                  conv_block(in_channels, pool_channels, kernel_size=1))

        
    def forward(self, x):
        
        # compute the four filter of the inception block :  Nx64x14x14
        scale1 = F.relu(self.conv1x1(x))
        scale2 = F.relu(self.conv3x3(x))
        scale3 = F.relu(self.conv5x5(x))
        scale4 = F.relu(self.pool(x))
        
        # concatenate layer for next result
        outputs = [scale1, scale2, scale3, scale4]
        # Nx256x14x14
        filter_cat = torch.cat(outputs,1)
        
        return filter_cat

In [5]:
class Auxiliary_loss (nn.Module) :
    """
    auxiliary loss classification of the digit
    """
    
    def __init__(self,in_channels,drop_prob_aux,nb_classes = 10):
        super(Auxiliary_loss, self).__init__()
        
        self.conv = conv_block(in_channels, 128, kernel_size=1)

        self.fc1 = nn.Linear(2048, 1024)
        self.fc2 = nn.Linear(1024, nb_classes)
        self.dropout= nn.Dropout(drop_prob_aux)

    def forward(self, x):
        # aux: N x 256 x 14 x 14
        x = F.adaptive_avg_pool2d(x, (4, 4))
        # aux: N x 256 x 4 x 4
        x = self.conv(x)
        # N x 128 x 4 x 4
        x = x.view(-1,2048)
        # N x 2048
        x = F.relu(self.fc1(x), inplace=True)
        # N x 1024
        x = self.dropout(x)
        # N x 10 (nb_classes)
        x = self.fc2(x)

        return x

In [10]:
class Google_Net (nn.Module) :
    """
    Google net implementing two inception layer in parralel for each channel
    Use auxiliary loss to classify the digit number
    Concatenate the number classification feature map and classify the two channel
    """
    
    def __init__(self,channels_1x1,channels_3x3,channels_5x5,pool_channels,nhidden = 60,
                 drop_prob = 0,drop_prob_aux = 0.7,nb_classes = 10):
        super(Google_Net, self).__init__()
        
        # local response norm
        self.conv1 = conv_block(1, 32, kernel_size = 3, padding = (3 - 1)//2)
        #inception block
        self.inception1 = Inception_block(1,channels_1x1,channels_3x3,channels_5x5,pool_channels)
        self.inception2 = Inception_block(256,channels_1x1,channels_3x3,channels_5x5,pool_channels)
        #auxiliary
        self.auxiliary = Auxiliary_loss(256,drop_prob_aux)
        
        # weights for binary classification 
        self.fc1 = nn.Linear(20, nhidden)
        self.fc2 = nn.Linear(nhidden, 90)
        self.fc3 = nn.Linear(90, 2)
        self.dropout = nn.Dropout(drop_prob)
        
    def forward(self, input_):
        
        # split the 2-channel input into two 1*14*14 images
        x = input_[:, 0, :, :].view(-1, 1, 14, 14)
        y = input_[:, 1, :, :].view(-1, 1, 14, 14)
        
        # Local response norm
        #x = self.conv1(x)
        #y = self.conv1(y)
        
        # inception blocks
        x = self.inception1(x)
        y = self.inception1(y)
        
        
        # auxiliary loss 
        x = self.auxiliary(x)
        y = self.auxiliary(y)
        
        # concatenate layers  
        z = torch.cat([x, y], 1)
        
        z = F.relu(self.fc1(z))
        z = F.relu(self.fc2(z))
        z = self.dropout(z)
        z = self.fc3(z)
        
        
        return x,y,z
        

In [7]:
##### train function ######

def train_aux (model, train_data, mini_batch_size=100, optimizer = optim.SGD,
                criterion = nn.CrossEntropyLoss(), n_epochs=50, eta=1e-1, lambda_l2 = 0, alpha=0.5, beta=0.5):
    
    
    """
    Train network with auxiliary loss + weight sharing
    
    """
    # create data loader
    train_loader = DataLoader(train_data, batch_size=mini_batch_size, shuffle=True)
    
    model.train()
    optimizer = optimizer(model.parameters(), lr = eta)
    
    for e in range(n_epochs):
        epoch_loss = 0
        
        for i, data in enumerate(train_loader, 0):
            
            input_, target_, classes_ = data
            class_1, class_2,out= model(input_)
            aux_loss1 = criterion(class_1, classes_[:,0])
            aux_loss2 = criterion(class_2, classes_[:,1])
            out_loss  = criterion(out, target_)
            net_loss = (alpha * (out_loss) + beta * (aux_loss1 + aux_loss2 ))
            epoch_loss += net_loss
            
            if lambda_l2 != 0:
                for p in model.parameters():
                    epoch_loss += lambda_l2 * p.pow(2).sum() # add an l2 penalty term to the loss 
            
            optimizer.zero_grad()
            net_loss.backward()
            optimizer.step()
            
        print('Train Epoch: {}  | Loss {:.6f}'.format(
                e, epoch_loss.item()))
        
#########################################################################################################################
#########################################################################################################################

### test function  ###

def test_aux(model, test_data, mini_batch_size=100, criterion = nn.CrossEntropyLoss()):
    
    """
    Test function to calculate prediction accuracy of a cnn with auxiliary loss
    
    """
    
    # create test laoder
    test_loader = DataLoader(test_data, batch_size=mini_batch_size, shuffle=True)
    
    model.eval()
    test_loss = 0
    nb_errors=0
    
    with torch.no_grad():
        
        for i, data in enumerate(test_loader, 0):
            input_, target_, classes_ = data
            
            _,_ ,output = model(input_) 
            batch_loss = criterion(output, target_)
            test_loss += batch_loss
            
            _, predicted_classes = output.max(1)
            for k in range(mini_batch_size):
                if target_[k] != predicted_classes[k]:
                    nb_errors = nb_errors + 1
                                   
             
        print('\nTest set | Loss: {:.4f} | Accuracy: {:.0f}% | # misclassified : {}/{}\n'.format(
        test_loss.item(), 100 * (len(test_data)-nb_errors)/len(test_data), nb_errors, len(test_data)))

In [98]:
model1 = Google_Net()
train_aux(model1, train_data)
test_aux(model1,test_data)

Train Epoch: 0  | Loss 29.767370
Train Epoch: 1  | Loss 13.516229
Train Epoch: 2  | Loss 8.503138
Train Epoch: 3  | Loss 6.766137
Train Epoch: 4  | Loss 6.159487
Train Epoch: 5  | Loss 4.913076
Train Epoch: 6  | Loss 4.067536
Train Epoch: 7  | Loss 3.638935
Train Epoch: 8  | Loss 3.654038
Train Epoch: 9  | Loss 3.028535
Train Epoch: 10  | Loss 2.505662
Train Epoch: 11  | Loss 2.409002
Train Epoch: 12  | Loss 2.404171
Train Epoch: 13  | Loss 1.905158
Train Epoch: 14  | Loss 1.695484
Train Epoch: 15  | Loss 1.579560
Train Epoch: 16  | Loss 1.456288
Train Epoch: 17  | Loss 1.149295
Train Epoch: 18  | Loss 1.258273
Train Epoch: 19  | Loss 1.108944
Train Epoch: 20  | Loss 0.973030
Train Epoch: 21  | Loss 0.992214
Train Epoch: 22  | Loss 0.861399
Train Epoch: 23  | Loss 1.920845
Train Epoch: 24  | Loss 1.003562
Train Epoch: 25  | Loss 0.763496
Train Epoch: 26  | Loss 0.588748
Train Epoch: 27  | Loss 0.613650
Train Epoch: 28  | Loss 0.558315
Train Epoch: 29  | Loss 0.466551
Train Epoch: 30  |

In [126]:
model2 = Google_Net(64,64,64,64)
train_aux(model2, train_data)
test_aux(model2,test_data)

Train Epoch: 0  | Loss 32.350616
Train Epoch: 1  | Loss 13.768975
Train Epoch: 2  | Loss 9.004088
Train Epoch: 3  | Loss 6.802941
Train Epoch: 4  | Loss 5.937626
Train Epoch: 5  | Loss 5.061621
Train Epoch: 6  | Loss 4.577953
Train Epoch: 7  | Loss 4.060737
Train Epoch: 8  | Loss 3.481941
Train Epoch: 9  | Loss 3.141851
Train Epoch: 10  | Loss 2.864805
Train Epoch: 11  | Loss 2.238279
Train Epoch: 12  | Loss 2.085346
Train Epoch: 13  | Loss 1.725571
Train Epoch: 14  | Loss 1.535687
Train Epoch: 15  | Loss 1.645390
Train Epoch: 16  | Loss 1.230233
Train Epoch: 17  | Loss 1.921638
Train Epoch: 18  | Loss 1.524699
Train Epoch: 19  | Loss 1.127218
Train Epoch: 20  | Loss 0.960918
Train Epoch: 21  | Loss 0.884457
Train Epoch: 22  | Loss 0.779605
Train Epoch: 23  | Loss 0.568249
Train Epoch: 24  | Loss 0.629181
Train Epoch: 25  | Loss 0.582528
Train Epoch: 26  | Loss 0.581650
Train Epoch: 27  | Loss 0.557396
Train Epoch: 28  | Loss 0.516551
Train Epoch: 29  | Loss 0.561240
Train Epoch: 30  |

In [129]:
model3 = Google_Net(64,64,64,64)
train_aux(model3, train_data)
test_aux(model3,test_data)

Train Epoch: 0  | Loss 32.565662
Train Epoch: 1  | Loss 14.515175
Train Epoch: 2  | Loss 8.981249
Train Epoch: 3  | Loss 6.971626
Train Epoch: 4  | Loss 6.269598
Train Epoch: 5  | Loss 5.010170
Train Epoch: 6  | Loss 4.414513
Train Epoch: 7  | Loss 3.765643
Train Epoch: 8  | Loss 3.314440
Train Epoch: 9  | Loss 3.376633
Train Epoch: 10  | Loss 2.951209
Train Epoch: 11  | Loss 2.482831
Train Epoch: 12  | Loss 2.482694
Train Epoch: 13  | Loss 2.026098
Train Epoch: 14  | Loss 1.793649
Train Epoch: 15  | Loss 1.911194
Train Epoch: 16  | Loss 1.520952
Train Epoch: 17  | Loss 1.366843
Train Epoch: 18  | Loss 1.324510
Train Epoch: 19  | Loss 1.021954
Train Epoch: 20  | Loss 1.113924
Train Epoch: 21  | Loss 0.861108
Train Epoch: 22  | Loss 0.904173
Train Epoch: 23  | Loss 0.887597
Train Epoch: 24  | Loss 0.901606
Train Epoch: 25  | Loss 0.819775
Train Epoch: 26  | Loss 0.671401
Train Epoch: 27  | Loss 0.491444
Train Epoch: 28  | Loss 0.651164
Train Epoch: 29  | Loss 0.532023
Train Epoch: 30  |

In [134]:
model4 = Google_Net(64,64,64,64,drop_prob = 0.5)
train_aux(model4, train_data)
test_aux(model4,test_data)

Train Epoch: 0  | Loss 31.853571
Train Epoch: 1  | Loss 13.978946
Train Epoch: 2  | Loss 9.044988
Train Epoch: 3  | Loss 7.276228
Train Epoch: 4  | Loss 5.823573
Train Epoch: 5  | Loss 4.920546
Train Epoch: 6  | Loss 4.541502
Train Epoch: 7  | Loss 3.792336
Train Epoch: 8  | Loss 3.247078
Train Epoch: 9  | Loss 2.945832
Train Epoch: 10  | Loss 2.867447
Train Epoch: 11  | Loss 2.398836
Train Epoch: 12  | Loss 2.897313
Train Epoch: 13  | Loss 1.970138
Train Epoch: 14  | Loss 1.753320
Train Epoch: 15  | Loss 1.540551
Train Epoch: 16  | Loss 1.412518
Train Epoch: 17  | Loss 1.375399
Train Epoch: 18  | Loss 1.086837
Train Epoch: 19  | Loss 1.367363
Train Epoch: 20  | Loss 1.236998
Train Epoch: 21  | Loss 0.909724
Train Epoch: 22  | Loss 0.870065
Train Epoch: 23  | Loss 0.714281
Train Epoch: 24  | Loss 0.648039
Train Epoch: 25  | Loss 0.646123
Train Epoch: 26  | Loss 0.593228
Train Epoch: 27  | Loss 0.461571
Train Epoch: 28  | Loss 0.518458
Train Epoch: 29  | Loss 0.440357
Train Epoch: 30  |

In [137]:
model5 = Google_Net(64,64,64,64,drop_prob = 0.5)
train_aux(model5, train_data)
test_aux(model5,test_data)

Train Epoch: 0  | Loss 26.392162
Train Epoch: 1  | Loss 10.809406
Train Epoch: 2  | Loss 7.564365
Train Epoch: 3  | Loss 6.040642
Train Epoch: 4  | Loss 4.994551
Train Epoch: 5  | Loss 4.132074
Train Epoch: 6  | Loss 3.559065
Train Epoch: 7  | Loss 3.158071
Train Epoch: 8  | Loss 2.641409
Train Epoch: 9  | Loss 2.734702
Train Epoch: 10  | Loss 2.332850
Train Epoch: 11  | Loss 1.981328
Train Epoch: 12  | Loss 1.947118
Train Epoch: 13  | Loss 1.671224
Train Epoch: 14  | Loss 1.351950
Train Epoch: 15  | Loss 1.461564
Train Epoch: 16  | Loss 1.429318
Train Epoch: 17  | Loss 1.179722
Train Epoch: 18  | Loss 0.950029
Train Epoch: 19  | Loss 1.024328
Train Epoch: 20  | Loss 0.803164
Train Epoch: 21  | Loss 0.799966
Train Epoch: 22  | Loss 0.673487
Train Epoch: 23  | Loss 0.654501
Train Epoch: 24  | Loss 0.514970
Train Epoch: 25  | Loss 0.601667
Train Epoch: 26  | Loss 0.549156
Train Epoch: 27  | Loss 0.524546
Train Epoch: 28  | Loss 0.437109
Train Epoch: 29  | Loss 0.412676
Train Epoch: 30  |

In [150]:
model6 = Google_Net(64,64,64,64,drop_prob = 0.5)
train_aux(model6, train_data)
test_aux(model6,test_data)

Train Epoch: 0  | Loss 36.097652
Train Epoch: 1  | Loss 12.594378
Train Epoch: 2  | Loss 7.989007
Train Epoch: 3  | Loss 5.971697
Train Epoch: 4  | Loss 5.103000
Train Epoch: 5  | Loss 4.165145
Train Epoch: 6  | Loss 3.976546
Train Epoch: 7  | Loss 3.224481
Train Epoch: 8  | Loss 2.712321
Train Epoch: 9  | Loss 2.628671
Train Epoch: 10  | Loss 2.293429
Train Epoch: 11  | Loss 1.924949
Train Epoch: 12  | Loss 1.738328
Train Epoch: 13  | Loss 1.711177
Train Epoch: 14  | Loss 1.793992
Train Epoch: 15  | Loss 1.166682
Train Epoch: 16  | Loss 1.112388
Train Epoch: 17  | Loss 1.252998
Train Epoch: 18  | Loss 1.083019
Train Epoch: 19  | Loss 0.888970
Train Epoch: 20  | Loss 0.879558
Train Epoch: 21  | Loss 0.714483
Train Epoch: 22  | Loss 0.639415
Train Epoch: 23  | Loss 1.050229
Train Epoch: 24  | Loss 0.612210
Train Epoch: 25  | Loss 0.574584
Train Epoch: 26  | Loss 0.454151
Train Epoch: 27  | Loss 0.385831
Train Epoch: 28  | Loss 0.484206
Train Epoch: 29  | Loss 0.379391
Train Epoch: 30  |

In [11]:
model6 = Google_Net(64,64,64,64,drop_prob = 0.5)
train_aux(model6, train_data, optimizer = optim.Adam, eta = 0.001)
test_aux(model6,test_data)

Train Epoch: 0  | Loss 23.261400
Train Epoch: 1  | Loss 9.118961
Train Epoch: 2  | Loss 6.285275
Train Epoch: 3  | Loss 4.901327
Train Epoch: 4  | Loss 4.493050
Train Epoch: 5  | Loss 3.991235
Train Epoch: 6  | Loss 3.349438
Train Epoch: 7  | Loss 3.045502
Train Epoch: 8  | Loss 2.766802
Train Epoch: 9  | Loss 2.464422
Train Epoch: 10  | Loss 2.135505
Train Epoch: 11  | Loss 1.712572
Train Epoch: 12  | Loss 1.663981
Train Epoch: 13  | Loss 1.683771
Train Epoch: 14  | Loss 1.349212
Train Epoch: 15  | Loss 1.171406
Train Epoch: 16  | Loss 1.174358
Train Epoch: 17  | Loss 1.172775
Train Epoch: 18  | Loss 1.143875
Train Epoch: 19  | Loss 1.106894
Train Epoch: 20  | Loss 1.042744
Train Epoch: 21  | Loss 1.001812
Train Epoch: 22  | Loss 0.845421
Train Epoch: 23  | Loss 0.783974
Train Epoch: 24  | Loss 0.974004
Train Epoch: 25  | Loss 0.941843
Train Epoch: 26  | Loss 0.698549
Train Epoch: 27  | Loss 1.214441
Train Epoch: 28  | Loss 0.815623
Train Epoch: 29  | Loss 0.601879
Train Epoch: 30  | 