In [11]:
# Imports
import torch
from torchvision import transforms, datasets
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


In [12]:
# Train and test data
train = datasets.MNIST("", train=True, download=True,
                       transform=transforms.Compose([transforms.ToTensor()]))
test = datasets.MNIST("", train=False, download=True,
                      transform=transforms.Compose([transforms.ToTensor()]))

trainset = torch.utils.data.DataLoader(train, batch_size=10, shuffle=True)
testset = torch.utils.data.DataLoader(test, batch_size=10, shuffle=True)


In [1]:
# Create PyTorch module

# Neural network architecture is defined in the __init__ method
# nn.Linear applies an affine transformation to the input (w.T*x + b)
class Net(nn.Module):

    # Create network layers
    def __init__(self):
        super().__init__()

        # Convolutional layers
        # Inputs 1 & 2 represent dimensions of layer
        # Input 3 represents dimensio  of convolution kernel
        self.conv1 = nn.Conv2d(1, 32, 5, padding=2)
        self.conv2 = nn.Conv2d(32, 64, 5, padding=2)

        # Create input layer, with 64 neurons and 784 inputs each
        self.fc1 = nn.Linear(64*7*7, 128)

        # Create layer #2, 64 inputs & 64 outputs
        self.fc2 = nn.Linear(128, 10)

    def convs(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))

        return x

    # Computing the output of the network
    def forward(self, x):
        x = self.convs(x)
        x = x.view(-1, 64*7*7)

        x = F.relu(self.fc1(x))
        x = self.fc2(x)

        # Compute the softmax functon of a given input
        return F.softmax(x, dim=1)


net = Net()


NameError: name 'nn' is not defined

In [14]:
# Setup the optimiser, setup the error function, compute the gradient with respect to the loss, and train for a number of iterations

# Setup optimisation method, stochastic  gradient descent (SGD)
optimiser = optim.Adam(net.parameters(), lr=0.001)

# Setup number of training epochs
Epochs = 3

# Iterate over training data
for epoch in range(Epochs):
    for data in trainset:
        X, y = data

        # Sets up the gradient stored in each variable of the network to zero
        net.zero_grad()

        # Compute output using Forward() method
        # .view() sets up a 2 dimensional tensor input (a 28 by 28 matrix) to a 1 dimensional one (a 784 vector)
        output = net.forward(X)

        # Setup loss function
        # Network is a classifer thus we use a cross entropy loss function
        loss = F.nll_loss(output, y)

        # Computes the gradient with respect to the loss function over each parameter  of  the  network
        loss.backward()

        # Update network parameters
        optimiser.step()


In [15]:
# Initialise counters
correct = 0
total = 0

# Test the model
# .no_grad() methods prevents gradients stored from being updated
with torch.no_grad():
    for data in testset:
        X, y = data
        output = net.forward(X)
        for idx, i in enumerate(output):
            if torch.argmax(i) == y[idx]:
                correct += 1
            total += 1

# Print result
print("accuracy:", round(correct/total, 3))


accuracy: 0.894
