In [1]:
# Load in relevant libraries, and alias where appropriate
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

In [2]:
batch_size = 32

#Loading the dataset and preprocessing
train_dataset = torchvision.datasets.MNIST(root = './data',
                                           train = True,
                                           transform = transforms.Compose([
                                                  transforms.Resize((32,32)),
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1307,), std = (0.3081,))]),
                                           download = True)


test_dataset = torchvision.datasets.MNIST(root = './data',
                                          train = False,
                                          transform = transforms.Compose([
                                                  transforms.Resize((32,32)),
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1325,), std = (0.3105,))]),
                                          download=True)


train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)


test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)

In [7]:
# Consist of 7 layers
# NCHW -> NM (M = C * H * W)
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.relu = nn.ReLU()
        self.pool = nn.AvgPool2d(kernel_size=2, stride=2)
        self.c1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1,padding=0)
        self.c2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1,padding=0)  # NCHW
        self.linear1 = nn.Linear(16 * 5 * 5, 120) # NM (M = C * H * W)
        self.linear2 = nn.Linear(120, 84)
        self.linear3 = nn.Linear(84,10)
    def forward(self, x):
        x = self.pool(self.relu(self.c1(x)))
        x = self.pool(self.relu(self.c2(x)))
        x = torch.flatten(x, 1) # Flatten the vectors so it can be inputed into the first linear layer (NM)
        x = self.relu(self.linear1(x))
        x = self.relu(self.linear2(x))
        x = self.linear3(x)
        return x

In [8]:
x = torch.randn(64, 1, 32, 32)
model = LeNet5()
print(model(x).shape)

torch.Size([64, 10])


In [9]:
model = LeNet5()

#Setting the loss function
cost = nn.CrossEntropyLoss()

#Setting the optimizer with the model parameters and learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=3e-3)

#this is defined to print how many steps are remaining when training
total_step = len(train_loader)

In [10]:
total_step = len(train_loader)
num_epochs=5
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):  
        images = images
        labels = labels
        
        #Forward pass
        outputs = model(images)
        loss = cost(outputs, labels)
        	
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        		
        if (i+1) % 400 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
        		           .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

Epoch [1/5], Step [400/1875], Loss: 0.1207
Epoch [1/5], Step [800/1875], Loss: 0.1158
Epoch [1/5], Step [1200/1875], Loss: 0.2940
Epoch [1/5], Step [1600/1875], Loss: 0.1032
Epoch [2/5], Step [400/1875], Loss: 0.0979
Epoch [2/5], Step [800/1875], Loss: 0.1512
Epoch [2/5], Step [1200/1875], Loss: 0.1885
Epoch [2/5], Step [1600/1875], Loss: 0.1077
Epoch [3/5], Step [400/1875], Loss: 0.0243
Epoch [3/5], Step [800/1875], Loss: 0.0182
Epoch [3/5], Step [1200/1875], Loss: 0.1920
Epoch [3/5], Step [1600/1875], Loss: 0.0915
Epoch [4/5], Step [400/1875], Loss: 0.0117
Epoch [4/5], Step [800/1875], Loss: 0.0012
Epoch [4/5], Step [1200/1875], Loss: 0.0109
Epoch [4/5], Step [1600/1875], Loss: 0.0037
Epoch [5/5], Step [400/1875], Loss: 0.0004
Epoch [5/5], Step [800/1875], Loss: 0.0014
Epoch [5/5], Step [1200/1875], Loss: 0.0054
Epoch [5/5], Step [1600/1875], Loss: 0.0016


In [11]:
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))


Accuracy of the network on the 10000 test images: 98.86 %
