# Problem C
Build a deep convolutional neural network (CNN) that achieves test accuracy of at least 0.985 on MNIST. 
Use this notebook to write your code for CNN by filling in the sections marked `# TODO` and running all cells. You are allowed to change the other parts of the notebook.

## Load the MNIST Dataset
First we need to download Pytorch and import dependencies. In Pytorch, you need to create a `Dataset` first and then use a `DataLoader` to iterate over the data samples. We can directly download MNIST using `torchvision.datasets` automatically.

In [1]:
import torch
import torchvision

In [2]:
# Define batch size, which is how many samples you use for training in one iteration
batch_size_train = 32
batch_size_test = 1024

# define how image transformed
image_transform = torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])
# MNIST datasets
train_dataset = torchvision.datasets.MNIST('dataset/', 
                                           train=True, 
                                           download=True,
                                           transform=image_transform)
test_dataset = torchvision.datasets.MNIST('dataset/', 
                                          train=False, 
                                          download=True,
                                          transform=image_transform)
# Create data loaders
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=batch_size_train, 
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=batch_size_test, 
                                          shuffle=True)

## Visualize the Sample 
We can check the dataloader and visualize one sample of the hand-written digit image

In [3]:
import matplotlib.pyplot as plt
_, (example_datas, labels) = next(enumerate(test_loader))
sample = example_datas[0][0]
# show the data
plt.imshow(sample, cmap='gray', interpolation='none')
print("Label: "+ str(labels[0]))

Label: tensor(4)


In [4]:
#my code
example_datas.shape

torch.Size([1024, 1, 28, 28])

sample = example_datas[1022][0]
plt.imshow(sample, cmap='gray', interpolation='none')

## Build CNN model
Now we can start to build our CNN model.  You are only allowed to use 
- Linear Layers
- Conv2D
- MaxPool2D
- BatchNorm2D
- Dropout Layers
- ReLU and Softmax
and have fewer than 1million parameters,

In [5]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


class CNN(nn.Module):
    """This class defines your deep learning model that extends a Module class
      The constructor of your class defines the layers of the model. 
      The forward() function defines how to forward propagate 
      input through the defined layers of the model.
      Many layers are available, such as Linear for fully connected layers, 
      Conv2d for convolutional layers, and MaxPool2d for pooling layers.

    """
    #============================================
    # TODO: Implement CNN model.
    #=============================================
    #A picture shape: 28 x 28 x 1  and there are 1024 pictures --> 28 * 28 * 1
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5)   # output_shape=(24,24,16)     (28-5+2(0))/1 + 1 = 24
        self.pool = nn.MaxPool2d(kernel_size=2)     # output_shape=(12,12,30)
        #self.bnm1 = nn.BatchNorm2d(16)
        #self.drop_layer = nn.Dropout(p=0.2)         # To avoid the training model overfitting 
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=40, kernel_size=5)  # output_shape=(8,8,40)
        #self.bnm2 = nn.BatchNorm2d(40)
        self.pool = nn.MaxPool2d(kernel_size=2)     # output_shape=(4,4,40)
        self.fc1 = nn.Linear(4*4*40, 10)           # Applies a linear transformation to the incoming data: y = x*W^T + b
                                                   # 10 possible classes: 0 to 9 
        
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        #x = self.bnm1(x)   # To avoid the vanishing/exploding gradient
        #x = self.drop_layer(x)
        x = self.pool(F.relu(self.conv2(x)))
        #x = self.bnm2(x)
        x = x.view(-1, 4*4*40)
        x = self.fc1(x)
        x = F.log_softmax(x)
        return x

In [6]:
cnn = CNN()
sum(p.numel() for p in cnn.parameters())

22866

 ## define train function

In [7]:
from tqdm import tqdm_notebook as tqdm
def train(model, device, train_loader, optimizer, epoch, log_interval=10000):
    model.train()
    iterator = tqdm(train_loader, total=int(len(train_loader)))
    counter = 0
    for batch_idx, (data, target) in enumerate(iterator):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        counter += 1
        iterator.set_postfix(loss=(loss.item()*data.size(0) / (counter * train_loader.batch_size)))

## define test function

In [8]:
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

## Training deep learning model

In [9]:
learning_rate = 0.01
momentum = 0.5
device = "cpu"
model = CNN().to(device) #using cpu here
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
#optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum, weight_decay=0.01) #L2 regularization weight_decay = lamda
num_epoch = 10

for epoch in range(1, num_epoch + 1):
        train(model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader)

HBox(children=(IntProgress(value=0, max=1875), HTML(value='')))





Test set: Average loss: 0.0789, Accuracy: 9774/10000 (98%)



HBox(children=(IntProgress(value=0, max=1875), HTML(value='')))



Test set: Average loss: 0.0653, Accuracy: 9829/10000 (98%)



HBox(children=(IntProgress(value=0, max=1875), HTML(value='')))



Test set: Average loss: 0.0533, Accuracy: 9852/10000 (99%)



HBox(children=(IntProgress(value=0, max=1875), HTML(value='')))



Test set: Average loss: 0.0497, Accuracy: 9866/10000 (99%)



HBox(children=(IntProgress(value=0, max=1875), HTML(value='')))



Test set: Average loss: 0.0545, Accuracy: 9845/10000 (98%)



HBox(children=(IntProgress(value=0, max=1875), HTML(value='')))



Test set: Average loss: 0.0473, Accuracy: 9865/10000 (99%)



HBox(children=(IntProgress(value=0, max=1875), HTML(value='')))



Test set: Average loss: 0.0500, Accuracy: 9853/10000 (99%)



HBox(children=(IntProgress(value=0, max=1875), HTML(value='')))



Test set: Average loss: 0.0564, Accuracy: 9838/10000 (98%)



HBox(children=(IntProgress(value=0, max=1875), HTML(value='')))



Test set: Average loss: 0.0555, Accuracy: 9825/10000 (98%)



HBox(children=(IntProgress(value=0, max=1875), HTML(value='')))



Test set: Average loss: 0.0450, Accuracy: 9873/10000 (99%)

