# Assignment 2 - part 2

## Convolutional neural network

Use the training and evaluation pipeline developed in part 1 to train and evaluate two CNN models.
The first shall be composed of standard convolutional layers, non-linaerities, and pooling layers of your choice. The second shall include skip connections. For this you shall develop your own ResidualBlack as a new layer under the nn.Module super-class. 

### Train and apply model

Train the two models. Try different values of the hyper-paramter settings. You shall achieve at least 93% test accuracy with your best model.

Describe briefly your two models and your hyper-parameter setups and comment your results.

**Compare the performance of the two models and the three feed forward models using suitable supportive tables and graphs, and complemented by relevant comments.**


In [3]:
import torch
from torch import nn
import torch.nn.functional as F
import torchvision
from torchvision import datasets, transforms
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np

# Define a transformation to normalize the dataset
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5), (0.5))])

# Download and load the training data
trainset = datasets.FashionMNIST("~/.pytorch/F_MNIST_data/", download=True, train=True, transform = transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# Download and load the test data
testset = datasets.FashionMNIST("~/.pytorch/F_MNIST_data/", download=True, train=False, transform = transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)

In [43]:
input_size = 1
hidden_sizes = [32, 64, 128, 256]        
output_size = 10                # there are 10 classes
epochs = 17

2 Models:
- 1 Model: conv, relu, linear, pool;
- 2 Model: 

In [39]:
class StandardCNN(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(StandardCNN, self).__init__()
        self.conv1 = nn.Conv2d(input_size, hidden_sizes[0], kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(hidden_sizes[0], hidden_sizes[1], kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(hidden_sizes[1] * 7 * 7, hidden_sizes[2])
        self.fc2 = nn.Linear(hidden_sizes[2], output_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.25)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, hidden_sizes[1] * 7 * 7)
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.fc2(x)
        return x

# Instantiate the model
standard_cnn = StandardCNN(input_size, hidden_sizes, output_size)


In [33]:
print(standard_cnn)

StandardCNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=3136, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.25, inplace=False)
)


In [52]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(ResNet, self).__init__()
        self.layer1 = nn.Conv2d(input_size, hidden_sizes[0], kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(hidden_sizes[0])
        self.layer2 = self._make_layer(hidden_sizes[0], hidden_sizes[1], 2, stride=2)
        self.layer3 = self._make_layer(hidden_sizes[1], hidden_sizes[2], 2, stride=2)
        self.layer4 = self._make_layer(hidden_sizes[2], hidden_sizes[3], 2, stride=2)
        self.fc = nn.Linear(hidden_sizes[3], output_size)

    def _make_layer(self, in_channels, out_channels, blocks, stride):
        layers = []
        layers.append(ResidualBlock(in_channels, out_channels, stride))
        for _ in range(1, blocks):
            layers.append(ResidualBlock(out_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.layer1(x)))
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

# Instantiate the model
resnet_cnn = ResNet(input_size, hidden_sizes, output_size)


In [53]:
print(resnet_cnn)

ResNet(
  (layer1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer2): Sequential(
    (0): ResidualBlock(
      (conv1): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential(
        (0): Conv2d(32, 64, kernel_size=(1, 1), stride=(2, 2))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): ResidualBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, ke

In [50]:
  
def train (model, trainloader, optimizer):
    
    train_losses, test_losses = [], []

    for e in range(epochs):
        tot_train_loss = 0 
        for images, labels in trainloader:
            log_ps = model(images)
            loss = loss_func(log_ps, labels)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            tot_train_loss += loss.item()
        else:
            tot_test_loss = 0
            test_correct = 0
            # Turn off gradients for validation to save memory and speed up computations
            with torch.no_grad():
                model.eval()
                for images, labels in testloader:
                    log_ps = model(images)
                    loss = loss_func(log_ps, labels)
                    tot_test_loss += loss.item()
                    
                    ps = torch.exp(log_ps)
                    top_p, top_class = ps.topk(1, dim=1)
                    equals = top_class == labels.view(*top_class.shape)
                    test_correct += equals.sum().item()
                    
            model.train()
            
            # Get mean loss to enable comparison between train and test sets
            train_loss = tot_train_loss / len(trainloader.dataset)
            test_loss = tot_test_loss / len(testloader.dataset)
            
            # At completion of epoch
            train_losses.append(train_loss)
            test_losses.append(test_loss)
            
            if e % 2==0:                                   
                print("Epoch: {}/{}.. ".format(e+1, epochs),
                "Training Loss {:.3f}.. ".format(train_loss),
                "Test Loss {:.3f}.. ".format(test_loss),
                "Test Accuracy {:.3f}".format(test_correct / len(testloader.dataset)))



# # Define loss function and optimizers
loss_func = nn.CrossEntropyLoss()
optimizer1 = optim.Adam(standard_cnn.parameters(), lr=0.001)
optimizer2 = optim.Adam(resnet_cnn.parameters(), lr=0.001)




In [40]:
print("Model 1:")   
train(standard_cnn, trainloader, optimizer1)

Model 1:


KeyboardInterrupt: 

In [54]:
print("Model 2:")   
train(resnet_cnn, trainloader, optimizer2)

Model 2:
Epoch: 1/17..  Training Loss 0.037..  Test Loss 0.037..  Test Accuracy 0.098


KeyboardInterrupt: 