# LeNet-5 – A Classic CNN Architecture 


Yann LeCun, Leon Bottou, Yosuha Bengio and Patrick Haffner proposed a neural network architecture for handwritten and machine-printed character recognition in 1990’s which they called LeNet-5. 

The LeNet-5 architecture consists of two sets of convolutional and average pooling layers, followed by a flattening convolutional layer, then two fully-connected layers and finally a softmax classifier.

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim 
import torch.nn.functional as F  

from torch.utils.data import DataLoader

import torchvision.datasets as datasets
import torchvision.transforms as transforms

In [2]:
train_dataset = datasets.MNIST('', train=True,
                               transform=transforms.ToTensor(), 
                               download=True)


test_dataset = datasets.MNIST('', train=False,
                               transform=transforms.ToTensor(), 
                               download=True)


train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=True)


In [3]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.relu = nn.ReLU() #Not in the original LeNet, but wasn't invented yet
        self.pool = nn.AvgPool2d(kernel_size=(2, 2), stride=(2, 2))
        self.conv1 = nn.Conv2d(
            in_channels = 1,
            out_channels = 6,
            kernel_size = (5, 5),
            stride = (1, 1),
            padding = (0, 0),
        )
        self.conv2 = nn.Conv2d(
            in_channels = 6,
            out_channels = 16,
            kernel_size = (5, 5),
            stride = (1, 1),
            padding = (0, 0),
        )
        self.conv3 = nn.Conv2d(
            in_channels = 16,
            out_channels = 120,
            kernel_size = (4, 4),
            stride = (1, 1),
            padding = (0, 0),
        )
        self.linear1 = nn.Linear(120, 84)
        self.linear2 = nn.Linear(84, 10)
        
    def forward(self,x):
        x = self.relu(self.conv1(x))
        x = self.pool(x)
        x = self.relu(self.conv2(x))
        x = self.pool(x)
        x = self.relu(self.conv3(x))
        x = x.reshape(x.shape[0], -1)
        x = self.relu(self.linear1(x))
        x = self.linear2(x)
        return x
    

Let's specify the device:

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
model = LeNet().to(device)

Loss function and optimizer:

In [6]:
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001) #After several tries, this looks like best learning rate

## Training

In [7]:
for epoch in range(7):
    for batch_idx, (data, targets) in enumerate(train_loader):
        # Get data to cuda if possible
        data = data.to(device=device)
        targets = targets.to(device=device)

        # forward
        scores = model(data)
        loss = loss_function(scores, targets)

        # backward
        optimizer.zero_grad()
        loss.backward()

        # gradient descent or adam step
        optimizer.step()
    print(loss)

tensor(0.0555, grad_fn=<NllLossBackward>)
tensor(0.0346, grad_fn=<NllLossBackward>)
tensor(0.0078, grad_fn=<NllLossBackward>)
tensor(0.0028, grad_fn=<NllLossBackward>)
tensor(0.0236, grad_fn=<NllLossBackward>)
tensor(0.0098, grad_fn=<NllLossBackward>)
tensor(0.0022, grad_fn=<NllLossBackward>)


In [8]:
def check_accuracy(loader, model):
    if loader.dataset.train:
        print("Accuracy on training data:")
    else:
        print("Accuracy on test data:")

    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)

            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

        print(
            f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}"
        )

    model.train()

    

check_accuracy(train_loader, model)
check_accuracy(test_loader, model)

Accuracy on training data:
Got 59575 / 60000 with accuracy 99.29
Accuracy on test data:
Got 9872 / 10000 with accuracy 98.72
