In [1]:
# PyTorch imports
import torch
from torch import nn
from torch import optim
from torch.nn import functional as F
# DLC Practical prologue
import dlc_practical_prologue as prologue

# Load Data

In [2]:
N = 1000
train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(N)

In [3]:
# Data sizes for reference
print('Train input:', train_input.size())
print('Train target:', train_target.size())
print('Train classes:', train_classes.size())

print('\nTest input:', test_input.size())
print('Test target:', test_target.size())
print('Test classes:', test_classes.size())

Train input: torch.Size([1000, 2, 14, 14])
Train target: torch.Size([1000])
Train classes: torch.Size([1000, 2])

Test input: torch.Size([1000, 2, 14, 14])
Test target: torch.Size([1000])
Test classes: torch.Size([1000, 2])


# Model Architectures

### Baseline Convolutional Model

For Conv2D, MaxPool2D => See sizing rules on PyTorch page.

| Datasize | Operation      |
| ---------| ---------------|
| 1x14x14  | Conv, k=3, p=1 |
| 32x14x14 | MaxP, k=3, s=1 |
| 32x12x12 | Conv, k=3, p=1 |
| 64x12x12 | MaxP, k=2, s=2 |
| 64x6x6   | Flatten        |
| 2304     | Lin, 2304->64  |
| 64       | Lin, 64->10    |
| 10       |                |

Changed Architecture

| Datasize | Operation      |
| ---------| ---------------|
| 1x14x14  | Conv, k=3, p=1 |
| 10x14x14 | MaxP, k=3, s=1 |
| 10x12x12 | Conv, k=3, p=1 |
| 10x12x12 | MaxP, k=2, s=2 |
| 10x6x6   | Flatten        |
| 360      | Lin, 360->10   |
| 10       |                |

In [4]:
class BaseNet(nn.Module):
    # Init method
    def __init__(self, use_weight_sharing_, use_auxiliary_loss_):
        super().__init__()
        self.use_weight_sharing = use_weight_sharing_
        self.use_auxiliary_loss = use_auxiliary_loss_
        self.conv_out = 10*6*6
        
        # X1
        self.x1_conv1 = nn.Conv2d(1, 10, kernel_size=3, padding=1)
        self.x1_conv2 = nn.Conv2d(10, 10, kernel_size=3, padding=1)
        self.x1_fc1 = nn.Linear(self.conv_out, 10)
#         self.x1_fc2 = nn.Linear(36, 10)
        
        # X2
        if not self.use_weight_sharing:
            self.x2_conv1 = nn.Conv2d(1, 10, kernel_size=3, padding=1)
            self.x2_conv2 = nn.Conv2d(10, 10, kernel_size=3, padding=1)
            self.x2_fc1 = nn.Linear(self.conv_out, 10)
#             self.x2_fc2 = nn.Linear(36, 10)
        
        # Combine
        self.comp_fc1 = nn.Linear(20, 20)
#         self.comp_fc2 = nn.Linear(100, 50)
        self.comp_fc3 = nn.Linear(20, 2)
        # Dropout
        self.dropout=nn.Dropout()
        
    # Forward method    
    def forward(self, x):
        # X1
        x1 = self.dropout(F.relu(F.max_pool2d(self.x1_conv1(x[:,0:1]), kernel_size=3, stride=1)))
        x1 = self.dropout(F.relu(F.max_pool2d(self.x1_conv2(x1), kernel_size=2, stride=2)))
        x1 = x1.reshape(x1.size(0), -1)
        x1 = self.x1_fc1(x1)
#         x1 = self.x1_fc2(x1)
        
        # X2
        if not self.use_weight_sharing:
            x2 = self.dropout(F.relu(F.max_pool2d(self.x2_conv1(x[:,1:2]), kernel_size=3, stride=1)))
            x2 = self.dropout(F.relu(F.max_pool2d(self.x2_conv2(x2), kernel_size=2, stride=2)))
            x2 = x2.reshape(x2.size(0), -1)
            x2 = self.x2_fc1(x2)
#             x2 = self.x2_fc2(x2)
        else: 
            x2 = self.dropout(F.relu(F.max_pool2d(self.x1_conv1(x[:,1:2]), kernel_size=3, stride=1)))
            x2 = self.dropout(F.relu(F.max_pool2d(self.x1_conv2(x2), kernel_size=2, stride=2)))
            x2 = x2.reshape(x2.size(0), -1)
            x2 = self.x1_fc1(x2)
#             x2 = self.x1_fc2(x2)          
        
        # Combine
        x = F.relu(self.comp_fc1(torch.cat((x1, x2), 1)))
#         x = F.relu(self.comp_fc2(x))
        x = self.comp_fc3(x)
        if self.use_auxiliary_loss:
            return x, x1, x2
        else:
            return x

### AlexNet-like Convolutional Model

For Conv2D, MaxPool2D => See sizing rules on PyTorch page.

| Datasize | Operation      |
| ---------| ---------------|
| 1x14x14  | Conv, k=6, p=1 |
| 32x11x11 | MaxP, k=2, s=2 |
| 32x5x5   | Conv, k=2, p=1 |
| 85x5x5   | MaxP, k=2, s=2 |
| 85x2x2   | Conv, k=3, p=1 | 
| 128x2x2  | Conv, k=3, p=1 | 
| 128x2x2  | Conv, k=3, p=1 | 
| 85x2x2   | MaxP, k=2, s=2 |
| 85x1x1   | Flatten        |
| 85       | Lin, 85->85    |
| 85       | Lin, 85->10    |
| 10       |                |

Changed Architecture

| Datasize | Operation      |
| ---------| ---------------|
| 1x14x14  | Conv, k=6, p=1 |
| 3x11x11 | MaxP, k=2, s=2 |
| 3x5x5   | Conv, k=2, p=1 |
| 6x5x5   | MaxP, k=2, s=2 |
| 6x2x2   | Conv, k=3, p=1 | 
| 12x2x2  | Conv, k=3, p=1 | 
| 12x2x2  | Conv, k=3, p=1 | 
| 10x2x2   | MaxP, k=2, s=2 |
| 10x1x1   | Flatten        |
| 10       | Lin, 10->10    |

In [5]:
class AlexNet(nn.Module):
    # Init method
    def __init__(self, use_weight_sharing_, use_auxiliary_loss_):
        super().__init__()
        self.use_weight_sharing = use_weight_sharing_
        self.use_auxiliary_loss = use_auxiliary_loss_
        self.conv_out = 10*5*5
        
        # X1
        self.x1_conv1 = nn.Conv2d(1, 3, kernel_size=3, padding=2, padding_mode = 'reflect')
        self.x1_conv2 = nn.Conv2d(3, 6, kernel_size=2, padding=2, padding_mode = 'reflect')
        self.x1_nest1 = nn.Conv2d(6, 12, kernel_size=3, padding=2, padding_mode = 'reflect')
        self.x1_nest2 = nn.Conv2d(12, 12, kernel_size=3, padding=1)
        self.x1_nest3 = nn.Conv2d(12, 10, kernel_size=2, padding=1)
        self.x1_fc1 = nn.Linear(self.conv_out, self.conv_out)
        self.x1_fc2 = nn.Linear(self.conv_out, 10)
        
        # X2
        if not self.use_weight_sharing:
            self.x2_conv1 = nn.Conv2d(1, 3, kernel_size=3, padding=2, padding_mode = 'reflect')
            self.x2_conv2 = nn.Conv2d(3, 6, kernel_size=2, padding=2, padding_mode = 'reflect')
            self.x2_nest1 = nn.Conv2d(6, 12, kernel_size=3, padding=2, padding_mode = 'reflect')
            self.x2_nest2 = nn.Conv2d(12, 12, kernel_size=3, padding=1)
            self.x2_nest3 = nn.Conv2d(12, 10, kernel_size=2, padding=1)
            self.x2_fc1 = nn.Linear(self.conv_out, self.conv_out)
            self.x2_fc2 = nn.Linear(self.conv_out, 10)
        
        # Combine
        self.comp_fc1 = nn.Linear(20, 20)
#         self.comp_fc2 = nn.Linear(100, 50)
        self.comp_fc3 = nn.Linear(20, 2)
        # Dropout
        self.dropout=nn.Dropout()
        
    # Forward method    
    def forward(self, x):
        # X1
        x1 = F.relu(F.max_pool2d(self.x1_conv1(x[:,0:1]), kernel_size=3, stride=1))
        x1 = F.relu(F.max_pool2d(self.x1_conv2(x1), kernel_size=2, stride=2))
        x1 = F.relu(F.max_pool2d(self.x1_nest3(F.relu(self.x1_nest1(x1))), kernel_size=2, stride=2))
        x1 = x1.reshape(x1.size(0), -1)
        x1 = F.relu(self.x1_fc1(x1))
        x1 = self.x1_fc2(x1)
        
        # X2
        if not self.use_weight_sharing:
            x2 = F.relu(F.max_pool2d(self.x2_conv1(x[:,1:2]), kernel_size=3, stride=1))
            x2 = F.relu(F.max_pool2d(self.x2_conv2(x2), kernel_size=2, stride=2))
            x2 = F.relu(F.max_pool2d(self.x2_nest3(F.relu(self.x2_nest1(x2))), kernel_size=2, stride=2))
            x2 = x2.reshape(x2.size(0), -1)
            x2 = F.relu(self.x2_fc1(x2))
            x2 = self.x2_fc2(x2)
        else: 
            x2 = F.relu(F.max_pool2d(self.x1_conv1(x[:,1:2]), kernel_size=3, stride=1))
            x2 = F.relu(F.max_pool2d(self.x1_conv2(x2), kernel_size=2, stride=2))
            x2 = F.relu(F.max_pool2d(self.x1_nest3(F.relu(self.x1_nest1(x2))), kernel_size=2, stride=2))
            x2 = x2.reshape(x2.size(0), -1)
            x2 = F.relu(self.x1_fc1(x2))
            x2 = self.x1_fc2(x2)
        
        # Combine
        x = F.relu(self.comp_fc1(torch.cat((x1, x2), 1)))
#         x = F.relu(self.comp_fc2(x))
        x = self.comp_fc3(x)
        if self.use_auxiliary_loss:
            return x, x1, x2 
        else:
            return x

### Residual Network Convolutional Model

For Conv2D, MaxPool2D => See sizing rules on PyTorch page.

| Datasize | Operation      |
| ---------| ---------------|
| 1x14x14  | Conv, k=1      |
| 32x14x14 | ResBlocks seq  |
| 32x14x14 | AvgP, k=14     |
| 32x1x1   | Flatten        |
| 32       | Lin, 32->10    |
| 10       |                |

Shown below is the ResBlock:
![ResBlock](./tmpResBlock.jpg "ResBlock")

**Credits Missing**

In [6]:
# To be used in the Residual Network
class ResBlock(nn.Module):
    def __init__(self, nb_channels, kernel_size):
        super().__init__()
        
        self.conv1 = nn.Conv2d(nb_channels, nb_channels, kernel_size, padding = (kernel_size-1)//2)
        self.bn1 = nn.BatchNorm2d(nb_channels)
        self.conv2 = nn.Conv2d(nb_channels, nb_channels, kernel_size, padding = (kernel_size-1)//2)
        self.bn2 = nn.BatchNorm2d(nb_channels)
        
    def forward(self, x):
        y = self.bn1(self.conv1(x))
        y = F.relu(y)
        y = self.bn2(self.conv2(y))
        y += x
        y = F.relu(y)
        return y

In [7]:
class ResNet(nn.Module):
    # Init method
    def __init__(self, use_weight_sharing_, use_auxiliary_loss_):
        super().__init__()
        self.use_weight_sharing = use_weight_sharing_
        self.use_auxiliary_loss = use_auxiliary_loss_
        self.rb_kernel_size = 3  # Odd size expected
        self.nb_blocks = 6
        
        # X1
        self.x1_conv0 = nn.Conv2d(1, 16, kernel_size=3)
        self.x1_resblocks = nn.Sequential(*(ResBlock(16, self.rb_kernel_size) for _ in range(self.nb_blocks)))
        self.x1_avg = nn.AvgPool2d(kernel_size=12)
        self.x1_fc = nn.Linear(16, 10)
        
        # X2
        if not self.use_weight_sharing:
            self.x2_conv0 = nn.Conv2d(1, 16, kernel_size=3)
            self.x2_resblocks = nn.Sequential(*(ResBlock(16, self.rb_kernel_size) for _ in range(self.nb_blocks)))
            self.x2_avg = nn.AvgPool2d(kernel_size=12)
            self.x2_fc = nn.Linear(16, 10)
            
        # Combine
        self.comp_fc1 = nn.Linear(20, 20)
#         self.comp_fc2 = nn.Linear(100, 50)
        self.comp_fc3 = nn.Linear(20, 2)
        # Dropout
        self.dropout=nn.Dropout()
        
    # Forward method
    def forward(self, x):
        # X1
        x1 = F.relu(self.x1_conv0(x[:,0:1]))
        x1 = self.x1_resblocks(x1)
        x1 = F.relu(self.x1_avg(x1))
        x1 = x1.view(x1.size(0), -1)
        x1 = self.x1_fc(x1)
        
        # X2
        if not self.use_weight_sharing:
            x2 = F.relu(self.x2_conv0(x[:,1:2]))
            x2 = self.x2_resblocks(x2)
            x2 = F.relu(self.x2_avg(x2))
            x2 = x2.view(x2.size(0), -1)
            x2 = self.x2_fc(x2)
        else:
            x2 = F.relu(self.x1_conv0(x[:,1:2]))
            x2 = self.x1_resblocks(x2)
            x2 = F.relu(self.x1_avg(x2))
            x2 = x2.view(x2.size(0), -1)
            x2 = self.x1_fc(x2)
            
        # Combine
        x = F.relu(self.comp_fc1(torch.cat((x1, x2), 1)))
#         x = F.relu(self.comp_fc2(x))
        x = self.comp_fc3(x)
        if self.use_auxiliary_loss:
            return x, x1, x2 
        else:
            return x

# Methods for Training and Testing

## Train Model

In [8]:
def train_model(model_type, train_input, train_target, train_classes, test_input, test_target, test_classes, 
                mini_batch_size, nb_epochs, nb_iterations):
    '''
    Trains the specified model using train_data and tests the model using the compute_nb_errors function.
    '''
    
    # Initialize model parameters
    model = None
    eta = 1e-1
    alpha = 0.1
    net_types = [(False, False), (True, False), (False, True), (True, True)]
    
    # Iterate for four possible cases
    for (use_weight_sharing, use_auxiliary_loss) in net_types:
        print('\nTraining', '-'*60)
        print('Using Weight Sharing:', use_weight_sharing)
        print('Using Auxiliary Loss:', use_auxiliary_loss)        
        
        all_errors = torch.empty(nb_iterations)
        
        # Iterate for performance estimation
        for k in range(nb_iterations):
            # Define the training model
            if model_type == 'baseline':
                model = BaseNet(use_weight_sharing, use_auxiliary_loss)
            elif model_type == 'alex':
                model = AlexNet(use_weight_sharing, use_auxiliary_loss)
            elif model_type == 'residual':
                model = ResNet(use_weight_sharing, use_auxiliary_loss)
            elif model_type == 'deep':
                model = DeepNet(use_weight_sharing, use_auxiliary_loss) # Undefined
            elif model_type == 'very_deep':
                model = VeryDeepNet(use_weight_sharing, use_auxiliary_loss) # Undefined
            #
            model.to(device)
            model.train()
            optimizer= torch.optim.SGD(model.parameters(), lr = eta)
            criterion = nn.CrossEntropyLoss()
#             trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
#             print("\nThe number of Trainable parameters: {}".format(trainable_params))
            # Iterate over several epochs
            for e in range(nb_epochs):
                # Iterate over mini-batches
                for b in range(0, train_input.size(0), mini_batch_size):
                    if use_auxiliary_loss:
                        output, output2, output3 = model(train_input.narrow(0, b, mini_batch_size).to(device))
                        loss1 = criterion(output, train_target.narrow(0, b, mini_batch_size).to(device))
                        loss2 = criterion(output2, train_classes[:, 0].narrow(0, b, mini_batch_size).to(device))
                        loss3 = criterion(output3, train_classes[:, 1].narrow(0, b, mini_batch_size).to(device))
                        loss = (1-alpha)*loss1 + alpha*(loss2 + loss3) # Take weighted average
                    
                    else:
                        output = model(train_input.narrow(0, b, mini_batch_size).to(device))
                        loss = criterion(output, train_target.narrow(0, b, mini_batch_size).to(device))
                    
                    # Backprop
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
            
            # Compute number of errors
            if use_auxiliary_loss:
                (nb_errors, nb_errors2, nb_errors3) = compute_nb_errors(model, test_input, test_target, test_classes)
                error = (100 * nb_errors) / test_input.size(0)
                error2 = (100 * nb_errors2) / test_classes.size(0)
                error3 = (100 * nb_errors3) / test_classes.size(0)
                print('test error Net {:0.2f}% {:d}/{:d}'.format(error, nb_errors, test_input.size(0)))
                print('test error X1 {:0.2f}% {:d}/{:d}'.format(error2, nb_errors2, test_classes.size(0)))
                print('test error X2 {:0.2f}% {:d}/{:d}'.format(error3, nb_errors3, test_classes.size(0))) 
            
            else:
                nb_errors = compute_nb_errors(model, test_input, test_target, test_classes)
                error = (100 * nb_errors) / test_input.size(0)
                print('test error Net {:0.2f}% {:d}/{:d}'.format(error, nb_errors, test_input.size(0)))
            #
            all_errors[k] = error
        
        #
        print('Standard Deviation: {:0.2f}%'.format(all_errors.std().item()))
        print('Mean Error: {:0.2f}%'.format(all_errors.mean().item()))

In [9]:
# Compute number of errors
def compute_nb_errors(model, test_input, test_target, test_classes):
    '''
    Computes and returns the number of prediction mistakes
    '''
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.eval()
    nb_errors = 0
    
    with torch.no_grad():
      
        if model.use_auxiliary_loss:
            o1, o2, o3 = model(test_input.to(device))
        else:
            o1 = model(test_input.to(device))
    
    # Count number of errors  
    if model.use_auxiliary_loss:
        output1 = torch.argmax(o1, dim = 1)
        expected1 = test_target.to(device)
        nb_errors = torch.count_nonzero((expected1 != output1))
        #
        output2 = torch.argmax(o2, dim = 1)
        expected2 = test_classes[:,0].to(device)
        nb_errors2 = torch.count_nonzero((expected2 != output2))
        output3 = torch.argmax(o3, dim = 1)
        expected3 = test_classes[:,1].to(device)
        nb_errors3 = torch.count_nonzero((expected3 != output3))
        
        return (nb_errors, nb_errors2, nb_errors3)
    
    else:
        output1 = torch.argmax(o1, dim = 1)
        expected1 = test_target.to(device)
        nb_errors = torch.count_nonzero((expected1 != output1))
        
        return nb_errors

# Deployment

In [10]:
net_types = [(False, False), (True, False)]
model_types = ['baseline', 'alex', 'residual']

for model_type in model_types:
    # Iterate for four possible cases
    for (use_weight_sharing, use_auxiliary_loss) in net_types:
#         print('\nTraining', '-'*60)
        print("\nModel type:", model_type)
        print('Using Weight Sharing:', use_weight_sharing)
#         print('Using Auxiliary Loss:', use_auxiliary_loss)        


        # Define the training model
        if model_type == 'baseline':
            model = BaseNet(use_weight_sharing, use_auxiliary_loss)
        elif model_type == 'alex':
            model = AlexNet(use_weight_sharing, use_auxiliary_loss)
        elif model_type == 'residual':
            model = ResNet(use_weight_sharing, use_auxiliary_loss)
        elif model_type == 'deep':
            model = DeepNet(use_weight_sharing, use_auxiliary_loss) # Undefined
        elif model_type == 'very_deep':
            model = VeryDeepNet(use_weight_sharing, use_auxiliary_loss) # Undefined

        model.train()
        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        print("The number of Trainable parameters: {}".format(trainable_params))


Model type: baseline
Using Weight Sharing: False
The number of Trainable parameters: 9702

Model type: baseline
Using Weight Sharing: True
The number of Trainable parameters: 5082

Model type: alex
Using Weight Sharing: False
The number of Trainable parameters: 136114

Model type: alex
Using Weight Sharing: True
The number of Trainable parameters: 68288

Model type: residual
Using Weight Sharing: False
The number of Trainable parameters: 57570

Model type: residual
Using Weight Sharing: True
The number of Trainable parameters: 29016


In [11]:
# Initialize parameters
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
nb_iterations = 10
nb_epochs = 50
mini_batch_size = 100
model_types = ['baseline', 'alex', 'residual']

# Use normalized inputs
inp_mean = train_input.mean()
inp_std = train_input.std()
norm_train_input = train_input.sub_(inp_mean).div_(inp_std)
norm_test_input = test_input.sub_(inp_mean).div_(inp_std)

print('Using device:', device, '\n')

# Iterate for all model architectures
for model_type in model_types:
    print('Using model:', model_type, '='*90)
    
    train_model(model_type, norm_train_input, train_target, train_classes, norm_test_input, test_target, test_classes, 
            mini_batch_size, nb_epochs, nb_iterations)

Using device: cuda 


Training ------------------------------------------------------------
Using Weight Sharing: False
Using Auxiliary Loss: False
test error Net 21.70% 217/1000
test error Net 21.50% 215/1000
test error Net 17.70% 177/1000
test error Net 19.30% 193/1000
test error Net 19.40% 194/1000
test error Net 19.40% 194/1000
test error Net 18.90% 189/1000
test error Net 21.60% 216/1000
test error Net 18.80% 188/1000
test error Net 18.30% 183/1000
Standard Deviation: 1.44%
Mean Error: 19.66%

Training ------------------------------------------------------------
Using Weight Sharing: True
Using Auxiliary Loss: False
test error Net 14.60% 146/1000
test error Net 16.10% 161/1000
test error Net 17.30% 173/1000
test error Net 14.10% 141/1000
test error Net 15.50% 155/1000
test error Net 15.50% 155/1000
test error Net 15.50% 155/1000
test error Net 16.90% 169/1000
test error Net 13.90% 139/1000
test error Net 15.40% 154/1000
Standard Deviation: 1.10%
Mean Error: 15.48%

Training ------

test error Net 17.00% 170/1000
test error X1 10.60% 106/1000
test error X2 10.50% 105/1000
test error Net 16.50% 165/1000
test error X1 11.80% 118/1000
test error X2 12.70% 127/1000
test error Net 12.00% 120/1000
test error X1 10.20% 102/1000
test error X2 13.00% 130/1000
Standard Deviation: 1.59%
Mean Error: 14.08%

Training ------------------------------------------------------------
Using Weight Sharing: True
Using Auxiliary Loss: True
test error Net 10.60% 106/1000
test error X1 6.20% 62/1000
test error X2 6.70% 67/1000
test error Net 11.40% 114/1000
test error X1 4.80% 48/1000
test error X2 5.80% 58/1000
test error Net 9.50% 95/1000
test error X1 5.80% 58/1000
test error X2 5.10% 51/1000
test error Net 11.10% 111/1000
test error X1 6.10% 61/1000
test error X2 5.90% 59/1000
test error Net 10.30% 103/1000
test error X1 5.10% 51/1000
test error X2 5.90% 59/1000
test error Net 10.50% 105/1000
test error X1 6.20% 62/1000
test error X2 5.80% 58/1000
test error Net 9.60% 96/1000
test err

# Part below not to be submitted

### BaseNet

In [12]:
# # Initialize parameters
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model_type = 'baseline'
# nb_iterations = 10
# nb_epochs = 50
# mini_batch_size = 100
# # Use normalised inputs
# m1 = train_input.mean()
# s1 = train_input.std()
# norm_train_input = train_input.sub_(m1).div_(s1)
# norm_test_input = test_input.sub_(m1).div_(s1)
# #
# print('Using device:', device)
# train_model(model_type, norm_train_input, train_target, train_classes, norm_test_input, test_target, test_classes, 
#             mini_batch_size, nb_epochs, nb_iterations)

### AlexNet

In [13]:
# # Initialize parameters
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model_type = 'alex'
# nb_iterations = 10
# nb_epochs = 50
# mini_batch_size = 100
# # Use normalised inputs
# m1 = train_input.mean()
# s1 = train_input.std()
# norm_train_input = train_input.sub_(m1).div_(s1)
# norm_test_input = test_input.sub_(m1).div_(s1)
# #
# print('Using device:', device)
# train_model(model_type, norm_train_input, train_target, train_classes, norm_test_input, test_target, test_classes, 
#             mini_batch_size, nb_epochs, nb_iterations)

### ResNet

In [14]:
# # Initialize parameters
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model_type = 'residual'
# nb_iterations = 10
# nb_epochs = 50
# mini_batch_size = 100
# # Use normalised inputs
# m1 = train_input.mean()
# s1 = train_input.std()
# norm_train_input = train_input.sub_(m1).div_(s1)
# norm_test_input = test_input.sub_(m1).div_(s1)
# #
# print('Using device:', device)
# train_model(model_type, norm_train_input, train_target, train_classes, norm_test_input, test_target, test_classes, 
#             mini_batch_size, nb_epochs, nb_iterations)

In [15]:
# # For development purposes
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model_type = 'residual'
# nb_iterations = 3
# nb_epochs = 10
# mini_batch_size = 2
# #
# ex_train_input = train_input[0:10]
# ex_train_target = train_target[0:10]
# ex_train_classes = train_classes[0:10]
# #
# ex_test_input = test_input[0:10]
# ex_test_target = test_target[0:10]
# ex_test_classes = test_classes[0:10]
# #
# print('Using device:', device)
# train_model(model_type, ex_train_input, ex_train_target, ex_train_classes, ex_test_input, ex_test_target, ex_test_classes, 
#             mini_batch_size, nb_epochs, nb_iterations)

In [16]:
# # Without normalisation
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model_type = 'shallow'
# nb_iterations = 10
# nb_epochs = 50
# mini_batch_size = 100
# #
# print('Using device:', device)
# train_model(model_type, train_input, train_target, train_classes, test_input, test_target, test_classes, 
#             mini_batch_size, nb_epochs, nb_iterations)

In [17]:
# m1 = train_input.mean()
# s1 = train_input.std()
# x_train=train_input.sub_(m1).div_(s1)
# x_test=test_input.sub_(m1).div_(s1)

# def execute_norm(model_nm, nb_iterations, nb_epochs=20):
#     model=None
#     nb_epochs = nb_epochs
#     mini_batch_size = 50
#     eta = 1e-1
#     use_gpu=True
#     tot_err=0
#     for k in range(nb_iterations):
#         if(model_nm=="base"):
#             model = Baseline_Net()
#         else:
#             model = WtSharing_Net()
#         optimizer = torch.optim.SGD(model.parameters(), lr = eta)
#         criterion = nn.CrossEntropyLoss()#MSELoss()
#         train_model(model, x_train, train_target, train_classes, mini_batch_size, optimizer, criterion, nb_epochs, use_gpu)
#         nb_test_errors = compute_nb_errors(model, x_test, test_target, test_classes, mini_batch_size,use_gpu)
#         tot_err+=nb_test_errors
#         print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / x_test.size(0),
#                                                       nb_test_errors, x_test.size(0)))
#     print("Avg. Error: ",(tot_err/(10*nb_iterations)), "%")    


In [18]:
# from torchsummary import summary
# print(summary(model_base.to("cuda"),input_size=(2, 14, 14)))
# print(summary(model_ws.to("cuda"),input_size=(2, 14, 14)))

In [19]:
# !pip install torchsummary 