In [14]:
import torch 
import sys
sys.path.append('..')
from torch import nn 
from torch.nn import functional as F
from torch import optim
from utils.new_loader import load,PairSetMNIST,Training_set,Test_set, Training_set_split,Validation_set
from torch.utils.data import Dataset, DataLoader

In [15]:
# load the dataset as a Dataset object
data = PairSetMNIST( rotate=True,translate=False,swap_channel = True)
train_data = Training_set(data)
test_data = Test_set(data)
print(train_data.train_input.shape)
print(test_data.test_input.shape)

torch.Size([8000, 2, 14, 14])
torch.Size([1000, 2, 14, 14])


In [16]:
class conv_block(nn.Module) :
    """
    basic 2d convolution with batch norm
    """
    
    def __init__(self, in_channels,out_channels,kernel_size = 1,stride =1, padding = 0) :
        super(conv_block,self).__init__()
        self.conv = nn.Conv2d(in_channels,out_channels,kernel_size,stride ,padding)
        self.bn = nn.BatchNorm2d(out_channels)
    
    def forward(self,x) :
        x = self.bn(self.conv(x))
        return x

In [17]:
class Inception_block(nn.Module):
    """
    Inception block with four different filters scale
    """
    def __init__(self,in_channels,channels_1x1,channels_3x3,channels_5x5,pool_channels):
        super(Inception_block, self).__init__()
        # 1x1 convolution
        self.conv1x1 = conv_block(in_channels,channels_1x1, kernel_size = 1)
        # 3x3 convolution factorized in 1x3 followed by 3x1
        self.conv3x3 = nn.Sequential(conv_block(in_channels,channels_3x3, kernel_size = 1),
                                     conv_block(channels_3x3, channels_3x3, kernel_size = (1,3), padding = (0,1)),
                                     conv_block(channels_3x3, channels_3x3, kernel_size = (3,1), padding = (1,0)))
        # 5x5 convolution factorized in two consecutive 3x3 implemented as above
        self.conv5x5 = nn.Sequential(conv_block(in_channels,channels_5x5, kernel_size = 1),
                                     conv_block(channels_5x5, channels_5x5, kernel_size = (1,3),padding =(0,1)),
                                     conv_block(channels_5x5, channels_5x5, kernel_size = (3,1), padding = (1,0)),
                                     conv_block(channels_5x5,channels_5x5, kernel_size = (1,3),padding=(0,1)),
                                     conv_block(channels_5x5, channels_5x5, kernel_size = (3,1),padding = (1,0)))
        # pooling layer 
        self.pool = nn.Sequential(nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
                                  conv_block(in_channels, pool_channels, kernel_size=1))

        
    def forward(self, x):
        
        # compute the four filter of the inception block :  Nx64x14x14
        scale1 = F.relu(self.conv1x1(x))
        scale2 = F.relu(self.conv3x3(x))
        scale3 = F.relu(self.conv5x5(x))
        scale4 = F.relu(self.pool(x))
        
        # concatenate layer for next result
        outputs = [scale1, scale2, scale3, scale4]
        # Nx256x14x14
        filter_cat = torch.cat(outputs,1)
        
        return filter_cat

In [18]:
class Auxiliary_loss (nn.Module) :
    """
    auxiliary loss classification of the digit
    """
    
    def __init__(self,in_channels,drop_prob_aux,nb_classes = 10):
        super(Auxiliary_loss, self).__init__()
        
        self.conv = conv_block(in_channels, 128, kernel_size=1)

        self.fc1 = nn.Linear(2048, 1024)
        self.fc2 = nn.Linear(1024, nb_classes)
        self.dropout= nn.Dropout(drop_prob_aux)

    def forward(self, x):
        # aux: N x 256 x 14 x 14
        x = F.adaptive_avg_pool2d(x, (4, 4))
        # aux: N x 256 x 4 x 4
        x = self.conv(x)
        # N x 128 x 4 x 4
        x = x.view(-1,2048)
        # N x 2048
        x = F.relu(self.fc1(x))
        # N x 1024
        x = self.dropout(x)
        # N x 10 (nb_classes)
        x = self.fc2(x)

        return x

In [19]:
class Google_Net (nn.Module) :
    """
    Google net implementing two inception layer in parralel for each channel
    Use auxiliary loss to classify the digit number
    Concatenate the number classification feature map and classify the two channel
    """
    
    def __init__(self,channels_1x1,channels_3x3,channels_5x5,pool_channels,nhidden = 60,
                 drop_prob = 0,drop_prob_aux = 0.7,nb_classes = 10):
        super(Google_Net, self).__init__()
        
        # local response norm
        #self.conv1 = conv_block(1, 32, kernel_size = 3, padding = (3 - 1)//2)
        #inception block
        self.inception = Inception_block(1,channels_1x1,channels_3x3,channels_5x5,pool_channels)
        #auxiliary
        self.auxiliary = Auxiliary_loss(256,drop_prob_aux)
        
        # weights for binary classification 
        self.fc1 = nn.Linear(20, nhidden)
        self.fc2 = nn.Linear(nhidden, 90)
        self.fc3 = nn.Linear(90, 2)
        self.dropout = nn.Dropout(drop_prob)
        
    def forward(self, input_):
        
        # split the 2-channel input into two 1*14*14 images
        x = input_[:, 0, :, :].view(-1, 1, 14, 14)
        y = input_[:, 1, :, :].view(-1, 1, 14, 14)
        
        # inception blocks
        x = self.inception(x)
        y = self.inception(y)
        
        
        # auxiliary loss 
        x = self.auxiliary(x)
        y = self.auxiliary(y)
        
        # concatenate layers  
        z = torch.cat([x, y], 1)
        
        z = F.relu(self.fc1(z))
        z = F.relu(self.fc2(z))
        z = self.fc3(z)
        
        
        return x,y,z
        

In [20]:
##### train function ######

def train_aux (model, train_data, mini_batch_size=100, optimizer = optim.SGD,
                criterion = nn.CrossEntropyLoss(), n_epochs=50, eta=1e-1, lambda_l2 = 0, alpha=0.5, beta=0.5):
    
    
    """
    Train network with auxiliary loss + weight sharing
    
    """
    # create data loader
    train_loader = DataLoader(train_data, batch_size=mini_batch_size, shuffle=True)
    
    model.train()
    optimizer = optimizer(model.parameters(), lr = eta)
    
    for e in range(n_epochs):
        epoch_loss = 0
        
        for i, data in enumerate(train_loader, 0):
            
            input_, target_, classes_ = data
            class_1, class_2,out= model(input_)
            aux_loss1 = criterion(class_1, classes_[:,0])
            aux_loss2 = criterion(class_2, classes_[:,1])
            out_loss  = criterion(out, target_)
            net_loss = (alpha * (out_loss) + beta * (aux_loss1 + aux_loss2 ))
            epoch_loss += net_loss
            
            if lambda_l2 != 0:
                for p in model.parameters():
                    epoch_loss += lambda_l2 * p.pow(2).sum() # add an l2 penalty term to the loss 
            
            optimizer.zero_grad()
            net_loss.backward()
            optimizer.step()
            
        print('Train Epoch: {}  | Loss {:.6f}'.format(
                e, epoch_loss.item()))
        
#########################################################################################################################
#########################################################################################################################

### test function  ###

def test_aux(model, test_data, mini_batch_size=100, criterion = nn.CrossEntropyLoss()):
    
    """
    Test function to calculate prediction accuracy of a cnn with auxiliary loss
    
    """
    
    # create test laoder
    test_loader = DataLoader(test_data, batch_size=mini_batch_size, shuffle=True)
    
    model.eval()
    test_loss = 0
    nb_errors=0
    
    with torch.no_grad():
        
        for i, data in enumerate(test_loader, 0):
            input_, target_, classes_ = data
            
            _,_ ,output = model(input_) 
            batch_loss = criterion(output, target_)
            test_loss += batch_loss
            
            _, predicted_classes = output.max(1)
            for k in range(mini_batch_size):
                if target_[k] != predicted_classes[k]:
                    nb_errors = nb_errors + 1
                                   
             
        print('\nTest set | Loss: {:.4f} | Accuracy: {:.0f}% | # misclassified : {}/{}\n'.format(
        test_loss.item(), 100 * (len(test_data)-nb_errors)/len(test_data), nb_errors, len(test_data)))

In [8]:
model = Google_Net(64,64,64,64)
train_aux(model, train_data)
test_aux(model,test_data)

Train Epoch: 0  | Loss 31.377213
Train Epoch: 1  | Loss 14.407424
Train Epoch: 2  | Loss 8.786600
Train Epoch: 3  | Loss 7.054515
Train Epoch: 4  | Loss 6.078754
Train Epoch: 5  | Loss 4.966668
Train Epoch: 6  | Loss 4.498782
Train Epoch: 7  | Loss 3.953635
Train Epoch: 8  | Loss 3.586557
Train Epoch: 9  | Loss 3.155195
Train Epoch: 10  | Loss 2.932753
Train Epoch: 11  | Loss 2.517632
Train Epoch: 12  | Loss 2.422868
Train Epoch: 13  | Loss 2.178124
Train Epoch: 14  | Loss 1.695058
Train Epoch: 15  | Loss 1.768368
Train Epoch: 16  | Loss 1.467569
Train Epoch: 17  | Loss 1.291992
Train Epoch: 18  | Loss 1.166596
Train Epoch: 19  | Loss 1.179637
Train Epoch: 20  | Loss 1.044233
Train Epoch: 21  | Loss 0.977379
Train Epoch: 22  | Loss 0.755890
Train Epoch: 23  | Loss 0.773148
Train Epoch: 24  | Loss 0.718674
Train Epoch: 25  | Loss 0.693958
Train Epoch: 26  | Loss 0.651033
Train Epoch: 27  | Loss 0.645568
Train Epoch: 28  | Loss 0.547370
Train Epoch: 29  | Loss 0.650572
Train Epoch: 30  |

In [11]:
model2 = Google_Net(64,64,64,64)
train_aux(model2, train_data)
test_aux(model2,test_data)

Train Epoch: 0  | Loss 32.618233
Train Epoch: 1  | Loss 13.434915
Train Epoch: 2  | Loss 8.934690
Train Epoch: 3  | Loss 6.789910
Train Epoch: 4  | Loss 5.602638
Train Epoch: 5  | Loss 4.740703
Train Epoch: 6  | Loss 4.155932
Train Epoch: 7  | Loss 3.976521
Train Epoch: 8  | Loss 3.275844
Train Epoch: 9  | Loss 2.913592
Train Epoch: 10  | Loss 2.519236
Train Epoch: 11  | Loss 2.622946
Train Epoch: 12  | Loss 2.283235
Train Epoch: 13  | Loss 1.969821
Train Epoch: 14  | Loss 1.843643
Train Epoch: 15  | Loss 1.550166
Train Epoch: 16  | Loss 1.426786
Train Epoch: 17  | Loss 1.288421
Train Epoch: 18  | Loss 1.332516
Train Epoch: 19  | Loss 1.089396
Train Epoch: 20  | Loss 0.837815
Train Epoch: 21  | Loss 0.964031
Train Epoch: 22  | Loss 0.834835
Train Epoch: 23  | Loss 0.781474
Train Epoch: 24  | Loss 0.603094
Train Epoch: 25  | Loss 0.505649
Train Epoch: 26  | Loss 0.469993
Train Epoch: 27  | Loss 0.526265
Train Epoch: 28  | Loss 0.551350
Train Epoch: 29  | Loss 0.502799
Train Epoch: 30  |

In [11]:
model3 = Google_Net(64,64,64,64)
train_aux(model3, train_data)
test_aux(model3,test_data)

Train Epoch: 0  | Loss 30.186747
Train Epoch: 1  | Loss 14.004303
Train Epoch: 2  | Loss 8.854354
Train Epoch: 3  | Loss 6.635819
Train Epoch: 4  | Loss 6.167346
Train Epoch: 5  | Loss 4.947651
Train Epoch: 6  | Loss 4.128981
Train Epoch: 7  | Loss 3.859995
Train Epoch: 8  | Loss 3.362609
Train Epoch: 9  | Loss 2.963300
Train Epoch: 10  | Loss 2.969986
Train Epoch: 11  | Loss 2.424399
Train Epoch: 12  | Loss 2.077563
Train Epoch: 13  | Loss 1.753907
Train Epoch: 14  | Loss 1.533150
Train Epoch: 15  | Loss 1.809114
Train Epoch: 16  | Loss 1.479668
Train Epoch: 17  | Loss 1.156908
Train Epoch: 18  | Loss 1.062603
Train Epoch: 19  | Loss 1.073249
Train Epoch: 20  | Loss 0.818271
Train Epoch: 21  | Loss 0.886532
Train Epoch: 22  | Loss 0.813413
Train Epoch: 23  | Loss 0.629606
Train Epoch: 24  | Loss 0.665872
Train Epoch: 25  | Loss 0.675358
Train Epoch: 26  | Loss 0.592510
Train Epoch: 27  | Loss 0.556999
Train Epoch: 28  | Loss 0.626615
Train Epoch: 29  | Loss 0.527285
Train Epoch: 30  |

In [21]:
model4 = Google_Net(64,64,64,64)
train_aux(model4, train_data)
test_aux(model4,test_data)

Train Epoch: 0  | Loss 104.899452
Train Epoch: 1  | Loss 45.052601
Train Epoch: 2  | Loss 31.685064
Train Epoch: 3  | Loss 25.729883
Train Epoch: 4  | Loss 21.673063
Train Epoch: 5  | Loss 18.168068
Train Epoch: 6  | Loss 15.400216
Train Epoch: 7  | Loss 13.026170
Train Epoch: 8  | Loss 11.510374
Train Epoch: 9  | Loss 10.211856
Train Epoch: 10  | Loss 8.992868
Train Epoch: 11  | Loss 8.161642
Train Epoch: 12  | Loss 7.570150
Train Epoch: 13  | Loss 6.382459
Train Epoch: 14  | Loss 5.506278
Train Epoch: 15  | Loss 5.625072
Train Epoch: 16  | Loss 5.564719
Train Epoch: 17  | Loss 5.208803
Train Epoch: 18  | Loss 4.632640
Train Epoch: 19  | Loss 3.732130
Train Epoch: 20  | Loss 3.632525
Train Epoch: 21  | Loss 3.484497
Train Epoch: 22  | Loss 2.767345
Train Epoch: 23  | Loss 2.949233
Train Epoch: 24  | Loss 2.331871
Train Epoch: 25  | Loss 2.440623
Train Epoch: 26  | Loss 2.724271
Train Epoch: 27  | Loss 2.507766
Train Epoch: 28  | Loss 2.288896
Train Epoch: 29  | Loss 2.249889
Train Epo