# 6. A complete example

In [2]:
import sys
import colorama
from collections import OrderedDict
from matplotlib import pyplot as plt 

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.set_printoptions(precision=3)
! pip3 -q install colorama

## Building LeNet 

![](https://pytorch.org/tutorials/_images/mnist.png)

**Architecture Details**

+ Convolutional part:


| Layer       | Name | Input channels | Output channels | Kernel | stride |
| ----------- | :--: | :------------: | :-------------: | :----: | :----: |
| Convolution |  C1  |       1        |        6        |  5x5   |   1    |
| ReLU        |      |       6        |        6        |        |        |
| MaxPooling  |  S2  |       6        |        6        |  2x2   |   2    |
| Convolution |  C3  |       6        |       16        |  5x5   |   1    |
| ReLU        |      |       16       |       16        |        |        |
| MaxPooling  |  S4  |       16       |       16        |  2x2   |   2    |
| Convolution |  C5  |       6        |       120       |  5x5   |   1    |
| ReLU        |      |      120       |       120       |        |        |


+ Fully Connected part:

| Layer      | Name | Input size | Output size |
| ---------- | :--: | :--------: | :---------: |
| Linear     |  F5  |    120     |     84      |
| ReLU       |      |            |             |
| Linear     |  F6  |     84     |     10      |
| LogSoftmax |      |            |             |


In [10]:
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        
        self.conv_net = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size=(5, 5)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2),
            
            nn.Conv2d(6, 16, kernel_size=(5, 5)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2),
            
            nn.Conv2d(16, 120, kernel_size=(5, 5)),
            nn.ReLU(),
        )
        
        self.fully_connected = nn.Sequential(
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, 10),
            nn.LogSoftmax(dim=-1)
        )
        
        
    def forward(self, imgs):
        output = self.conv_net(imgs)
        output = output.view(imgs.shape[0], -1)  # imgs.shape[0] is the batch_size
        output = self.fully_connected(output)
        return output        


### Print a network summary

In [11]:
conv_net = LeNet5()
print(conv_net)

LeNet5(
  (conv_net): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(16, 120, kernel_size=(5, 5), stride=(1, 1))
    (7): ReLU()
  )
  (fully_connected): Sequential(
    (0): Linear(in_features=120, out_features=84, bias=True)
    (1): ReLU()
    (2): Linear(in_features=84, out_features=10, bias=True)
    (3): LogSoftmax()
  )
)


### Retrieve trainable parameters

In [12]:
named_params = list(conv_net.named_parameters())
print("len(params): %s\n" % len(named_params))
for name, param in named_params:
    print("%s:\t%s" % (name, param.shape))

len(params): 10

conv_net.0.weight:	torch.Size([6, 1, 5, 5])
conv_net.0.bias:	torch.Size([6])
conv_net.3.weight:	torch.Size([16, 6, 5, 5])
conv_net.3.bias:	torch.Size([16])
conv_net.6.weight:	torch.Size([120, 16, 5, 5])
conv_net.6.bias:	torch.Size([120])
fully_connected.0.weight:	torch.Size([84, 120])
fully_connected.0.bias:	torch.Size([84])
fully_connected.2.weight:	torch.Size([10, 84])
fully_connected.2.bias:	torch.Size([10])


### Feed network with a random input

In [13]:
input = torch.randn(1, 1, 32, 32)  # batch_size, num_channels, height, width
out = conv_net(input)
print("Log-Probabilities: \n%s\n" % out)
print("Probabilities: \n%s\n" % torch.exp(out))
print("out.shape: \n%s" % (out.shape,))

Log-Probabilities: 
tensor([[-2.176, -2.340, -2.282, -2.345, -2.391, -2.319, -2.382, -2.251, -2.279,
         -2.279]], grad_fn=<LogSoftmaxBackward>)

Probabilities: 
tensor([[0.114, 0.096, 0.102, 0.096, 0.092, 0.098, 0.092, 0.105, 0.102, 0.102]],
       grad_fn=<ExpBackward>)

out.shape: 
torch.Size([1, 10])


## Loading the train and test data

In [15]:
from torchvision import datasets, transforms

transformations = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor()
])

train_data = datasets.MNIST('./data', 
                            train = True, 
                            download = True,
                            transform = transformations)

test_data = datasets.MNIST('./data', 
                            train = False, 
                            download = True,
                            transform = transformations)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=256, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=1024, shuffle=False)

## Train function 

In [38]:
def train(model, train_loader, test_loader, device, num_epochs=3, lr=0.1):

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = torch.nn.CrossEntropyLoss()

    for epoch in range(num_epochs):
        print("=" * 20, "Starting epoch %d" % (epoch + 1), "=" * 20)
        
        model.train()  # Not necessary in our example, but still good practice.
                       # Only models with nn.Dropout and nn.BatchNorm modules require it
                
        for batch_idx, (data, labels) in enumerate(train_loader):
            data, labels = data.to(device), labels.to(device)

            output = model(data)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            
            if batch_idx % 40 == 0:
                print("Batch %d/%d, Loss=%.4f" % (batch_idx, len(train_loader), loss.item()))
        
        train_acc = accuracy(model, train_loader, device)
        test_acc = accuracy(model, test_loader, device)
        
        print("\nAccuracy on training: %.2f%%" % (100*train_acc))
        print("Accuracy on test: %.2f%%" % (100*test_acc))

In [39]:
def accuracy(model, dataloader, device):
    """ Computes the model's accuracy on the data provided by 'dataloader'
    """
    model.eval()
    
    num_correct = 0
    num_samples = len(dataloader.dataset)
    with torch.no_grad():  # deactivates autograd, reduces memory usage and speeds up computations
        for data, labels in dataloader:
            data, labels = data.to(device), labels.to(device)

            predictions = model(data).argmax(1)
            num_correct += (predictions == labels).sum().item()
        
    return num_correct / num_samples

## Train the model!

In [37]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
conv_net = conv_net.to(device)

train(conv_net, train_loader, test_loader, device, lr=2e-3)

Batch 0/235, Loss=0.0427
Batch 40/235, Loss=0.0218
Batch 80/235, Loss=0.0349
Batch 120/235, Loss=0.0369
Batch 160/235, Loss=0.0360


KeyboardInterrupt: 