# LIS 640 Applied Deep Learning : Convolutional Neural Networks

# Code Blocks for Problem 4

In [12]:
import torch
from torch import nn

class LeNet(nn.Module):
    def __init__(self):
        super().__init__()
        ###################################################################
        # TODO: Design your own network, define layers here.              #
        # Some common Choices are: Linear, Conv2d, ReLU, MaxPool2d        #
        ###################################################################
        # Replace "pass" statement with your code
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=20, kernel_size=5, padding=2, stride=1)
        # second convolutional layer
        self.conv2 = nn.Conv2d(in_channels=20, out_channels=50, kernel_size=5, padding=2, stride=1)
        # max pooling layer
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        # fully connected layers
        self.fc1 = nn.Linear(50 * 7 * 7, 500)  # The size 7x7 comes from the output of the second max pooling
        self.fc2 = nn.Linear(500, 10)

        ###################################################################
        #                     END OF YOUR CODE                            #
        ###################################################################

    def forward(self,x):
        ###################################################################
        # TODO: Design your own network, implement forward pass here.     #
        ###################################################################
        # Replace "pass" statement with your code
        x = self.maxpool(nn.functional.relu(self.conv1(x)))
        # Apply second convolution ReLU and max pooling
        x = self.maxpool(nn.functional.relu(self.conv2(x)))
        # Flatten
        x = torch.flatten(x, 1)
        # fully connected layer ReLU
        x = nn.functional.relu(self.fc1(x))
        # fully connected layer
        x = self.fc2(x)
        ###################################################################
        #                     END OF YOUR CODE                            #
        ###################################################################
        return x

def train_epoch(inputs, labels, optimizer, loss_function):
    ###################################################################
    # TODO: Finish the in epoch training process here.                #
    ###################################################################
    # Replace "pass" statement with your code
    model.train()
    optimizer.zero_grad()
    model_outputs = model(inputs)
    loss = loss_function(model_outputs, labels)
    loss.backward()
    optimizer.step()
    ###################################################################
    #                     END OF YOUR CODE                            #
    ###################################################################
    return model_outputs, loss

# Questions for Problem 4

Let's try some real applications with Pytorch. In this section, we will directly use Pytoch functions to build a LeNet-5 model. Then we test the model on MNIST dataset. Your task is to compute the lacked `Output Size` and finish building the architecture in **Code Blocks for Problem 4**. We will use functions like Linear, Conv2d, ReLU, MaxPool2d in Pytorch. Refer to [https://pytorch.org/docs/stable/nn.functional.html](https://pytorch.org/docs/stable/nn.functional.html) for more information.

# LeNet-5

The following Table shows the LeNet-5 model architecture and part of the output sizes.


|Layer|Output Size|
|----|----|
|$Input$|1\*28\*28|
|$Conv(C_{out}=20, K=5, P=2, S=1)$||
|$ReLU$||
|$MaxPool(K=2, S=2)$||
|$Conv(C_{out}=50, K=5, P=2, S=1)$||
|$ReLU$||
|$MaxPool(K=2, S=2)$||
|$Flatten$||
|$Linear$|500|
|$ReLU$|500|
|$Linear$|10|

In [13]:
print("LeNet Model: ")
model = LeNet()
print(model)

LeNet Model: 
LeNet(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=2450, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=10, bias=True)
)


Let's start testing our model on MNIST dataset.

In [14]:
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import torch

# hyperparameters
learning_rate = 0.01
num_epochs = 10

# load dataset
train_dataset = datasets.MNIST(root='dataset/', train=True,
                               transform=transforms.Compose([transforms.ToTensor()]), download=True)
test_dataset = datasets.MNIST(root='dataset/', train=False,
                              transform=transforms.Compose([transforms.ToTensor()]), download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64)
dataset_sizes = {'train':len(train_dataset), 'test':len(test_dataset)}

# load model
model = LeNet().cuda()

# define loss
ce_loss = nn.CrossEntropyLoss()

# define optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# start training
for epoch in range(num_epochs):

    print('Epoch', epoch)
    running_loss = 0.0
    valid_loss = 0.0
    model.train()

    correct = 0
    for data in train_loader: # dataloaders[0] is train loader

        inputs, labels  = data
        batch_size = inputs.shape[0]

        inputs = inputs.cuda()
        labels = labels.cuda()

        model_outputs, loss = train_epoch(inputs, labels, optimizer, ce_loss)

        correct += (torch.argmax(model_outputs,dim=1)==labels).sum().item()
        running_loss += loss.data * batch_size

    epoch_loss = running_loss / len(train_dataset)
    print('Training loss:', epoch_loss.item())
    print('Training Acc:', correct/len(train_dataset))

    # evaluation each epoch
    model.eval()

    with torch.no_grad():
        correct = 0
        for data in test_loader:
            inputs, labels  = data
            batch_size = inputs.shape[0]

            inputs = inputs.cuda()
            labels = labels.cuda()

            outputs = model(inputs)
            optimizer.zero_grad()
            loss = ce_loss(outputs, labels)

            correct += (torch.argmax(outputs,dim=1)==labels).sum().item()
            valid_loss += loss.data * batch_size

        epoch_valid_loss = valid_loss / len(test_dataset)
        print('Validation loss:', epoch_valid_loss.item())
        print('Validation Acc:', correct/len(test_dataset))

Epoch 0
Training loss: 0.18237850069999695
Training Acc: 0.9428833333333333
Validation loss: 0.11201713234186172
Validation Acc: 0.9677
Epoch 1
Training loss: 0.09535914659500122
Training Acc: 0.9726333333333333
Validation loss: 0.09736984223127365
Validation Acc: 0.9717
Epoch 2
Training loss: 0.0773555338382721
Training Acc: 0.9776333333333334
Validation loss: 0.09346228837966919
Validation Acc: 0.9747
Epoch 3
Training loss: 0.08146487921476364
Training Acc: 0.9778833333333333
Validation loss: 0.07830006629228592
Validation Acc: 0.9773
Epoch 4
Training loss: 0.08177363872528076
Training Acc: 0.9772166666666666
Validation loss: 0.09612464159727097
Validation Acc: 0.9752
Epoch 5
Training loss: 0.07544002681970596
Training Acc: 0.97905
Validation loss: 0.09381267428398132
Validation Acc: 0.9762
Epoch 6
Training loss: 0.07365214824676514
Training Acc: 0.9802166666666666
Validation loss: 0.07499419152736664
Validation Acc: 0.9795
Epoch 7
Training loss: 0.06126044690608978
Training Acc: 0.9