**LeNet for MNIST Handwritten character recognition**


In [0]:
import torch

In [0]:
# Device configuration
# How to choose between CPU and GPU?
# Use torch.cuda.is_available() and torch.device() to assign the device (CPU/GPU) to a variable named device.   

In [0]:
torch.device()

*   cuda.is_available  = [Link](https://pytorch.org/docs/stable/cuda.html#torch.cuda.is_available)
*   device = [Link](https://pytorch.org/docs/stable/tensor_attributes.html#torch.torch.device)



In [2]:
torch.cuda.is_available()

True

In [0]:
# Play around with the hyperparams below
# Hyper parameters
num_epochs = 5
num_classes = 10
batch_size = 100
learning_rate = 0.001

In [0]:
# The dataset we are going to work on is MNIST handwritten characters dataset.
# Follow the api in the below link to load the MNIST dataset in torchvision.datasets
# train_loader and test_loader need to use the dataloader api to have a batch wise data loading function
# Try to understand how batch wise training works by thinking about how training was done in the previous ML experiements 

*   torchvision.datasets.MNIST = [Link](https://pytorch.org/docs/stable/torchvision/datasets.html#mnist)
*   torch.utils.data.DataLoader = [Link](https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader)


In [0]:
import torchvision
import torchvision.transforms as transforms

# MNIST dataset
# Checkout possible transforms. Data augmentation can help improve accuracy in most cases.
# Explore any of the options available. Try to understand what they all do
train_dataset = torchvision.datasets.MNIST(root='../../data/',
                                           train=True, 
                                           transform=##Enter code here##,
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='../../data/',
                                          train=False, 
                                          transform=transforms.ToTensor())

# Try to add a cell below to see how the batch loader outputs data
# Data loader
train_loader = torch.utils.data.DataLoader(dataset=##Enter code here##,
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size, 
                                          shuffle=False)

Lets try to build  the network architecture from the given image.


1st layer:

Convolution layer:

>in_channels = 1
>out_channels = 16
>kernel_size = 5
>stride = 1
>padding = 2

Batchnorm features = 16

Maxpool layer:

>kernel size= 2
>stride = 2
 
 
2nd layer:

Convolution layer:

>in_channels = 16
>out_channels = 32
>kernel_size = 5
>stride = 1
>padding = 2

Batchnorm features = 32

Maxpool layer:

>kernel size= 2
>stride = 2
 

![alt text](https://pytorch.org/tutorials/_images/mnist.png)

In [0]:
import torch.nn as nn

# Convolutional neural network (two convolutional layers)
# What does each layer do? Try to understand the significance of each operation
class ConvNet(nn.Module):
    def __init__(self, num_classes=10):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(##Enter code here##),
            nn.BatchNorm2d(##Enter code here##),
            nn.ReLU(),
            nn.MaxPool2d(##Enter code here##))
        self.layer2 = nn.Sequential(
            nn.Conv2d(##Enter code here##),
            nn.BatchNorm2d(##Enter code here##),
            nn.ReLU(),
            nn.MaxPool2d(##Enter code here##))
        self.fc = nn.Linear(7*7*32, num_classes)
        
    def forward(self, x):
        ## Design the flow graph here
        # x here is the data
        # the transformations that need to be done are the 5 layers in sequence.
        # You might have to reshape the vector before the fully connected layers
        out = ##Enter the code here##
        return out

model = ConvNet(num_classes).to(device)
print(model)

In [0]:
# Loss and optimizer
# WHat are the other losses that are available?
# Is cross entropy loss the best option?
# How should one choose the loss function?
# Ask TA or Professor, if you do not answers to these. Must know.
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [0]:
# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
      
        ## Pytorch has an easy method to convert data format to be compatible between CPU and GPU.
        # Convert the data vectors to the "device" type
        
        ## Enter the code here##        
        
        # Forward pass
        # Note how the output is extracted from the network in the line below.
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

In [0]:
# Test the model
model.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))

In [0]:
# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')