<a href="https://colab.research.google.com/github/mariaberardi/SVM_example/blob/main/PyTorch_classification_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Standard exercise: design and train a model to classify handwritten digits from 0 to 9 using the MNIST dataset. 

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import sklearn.metrics as metrics

In [7]:
import numpy as np

In [2]:
# load the MNIST dataset using DataLoader
# we will train a model to classify handwritten digits from 0 to 9

# Transforms are common image transformations available in the torchvision.transforms module 
# They can be chained together using Compose
transform = transforms.Compose([transforms.ToTensor()])

# download training dataset
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
# load training dataset
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

# download testing dataset
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
# load testing dataset
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [4]:
# checking what data points look like

print(trainset[8]) # will return a 28x28x1 array, 28x28 corresponding to pixels in each image in the dataset

(tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000

In [5]:
print(len(trainset))
print(len(testset))

# MNIST database contains 60,000 training images and 10,000 testing images

60000
10000


In [6]:
BATCH_SIZE = 32

# this means that 32 samples from the training dataset will be used to estimate 
# the error gradient before the model weights are updated

In [10]:
# want to  train a model to classify digits based on their pictures

# model will have one convolutional layer followed by two fully-connected layers

class Model(nn.Module): # model in Pytorch is a subclass of nn.Model class
    def __init__(self):
        super(Model, self).__init__()

        # 28x28x1 -> 26x26x32, change shape
        # here input is size of image (28x28) x input channel (1 in this case: black and white image gives us 1dim gray scale) 
        # there a 128 dim inner layer 
        # output is 10 because this is a classification task of digits 0 to 9

        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3)
        self.d1 = nn.Linear(26*26*32, 128) # 32 = batch size 
        self.d2 = nn.Linear(128, 10)

    def forward(self, x): 
    # model includes a forward function which defines what operations get applied to the inputted data
        
        # 32x1x28x28 -> 32x32x26x26
        x = self.conv1(x)
        x = F.relu(x)

        # flatten -> 32 x (32*26*26)
        x = x.flatten(start_dim = 1)

        # 32 x (32*26*26) -> 32x128
        x = self.d1(x)
        x = F.relu(x)

        # logits => 32x10
        logits = self.d2(x)
        out = F.softmax(logits, dim=1)
        return out

In [15]:
# define parameters for training

learning_rate = 0.001
num_epochs = 5

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = Model()
model = model.to(device)
loss_function = nn.CrossEntropyLoss() # this defines loss function
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) #adjust parameters of model to minimize loss

In [16]:
# define training loop

for epoch in range(num_epochs): # number of epochs = number of complete passes through the training dataset
    train_running_loss = 0.0
    train_acc = 0.0

    # training step
    for i, (images, labels) in enumerate(trainloader):
        
        images = images.to(device)
        labels = labels.to(device)

        logits = model(images)
        loss = loss_function(logits, labels)
        optimizer.zero_grad() # call optimizer.zero_grad() to zero out the gradients from previous round of training
        loss.backward() # call loss.backward() to backpropagate the new round of gradients
        optimizer.step() # call optimizer.step() to adjust the model parameters based on these gradients

        train_running_loss += loss.detach().item()
        train_acc += (torch.argmax(logits, 1).flatten() == labels).type(torch.float).mean().item()
    
    print('Epoch: %d | Loss: %.4f | Train Accuracy: %.2f' \
          %(epoch, train_running_loss / i, train_acc/i))

Epoch: 0 | Loss: 1.6188 | Train Accuracy: 0.85
Epoch: 1 | Loss: 1.4951 | Train Accuracy: 0.97
Epoch: 2 | Loss: 1.4842 | Train Accuracy: 0.98
Epoch: 3 | Loss: 1.4791 | Train Accuracy: 0.98
Epoch: 4 | Loss: 1.4760 | Train Accuracy: 0.99


In [19]:
# Evaluate the model
# Run the forward pass of the model only on the test set to check the accuracy

test_accuracy = 0.0
for i, (images, labels) in enumerate(testloader, 0):
    images = images.to(device)
    labels = labels.to(device)
    outputs = model(images)
    test_accuracy += (torch.argmax(outputs, 1).flatten() == labels).type(torch.float).mean().item()
    predictions = torch.argmax(outputs, 1).flatten().cpu().numpy() #convert back to numpy to use metrics

# compare labels with model predictions
    print("Label:", labels)
    print("Prediction:", predictions)
    print("Difference:", labels - predictions) # only useful to see if it's 0 or nonzero, actual nonzero value is of course irrelevant
        
print('Test Accuracy: %.2f'%(test_accuracy/i))

Label: tensor([7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1, 5, 9, 7, 3, 4, 9, 6, 6, 5,
        4, 0, 7, 4, 0, 1, 3, 1])
Prediction: [7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 8 4 9 6 6 5 4 0 7 4 0 1 3 1]
Difference: tensor([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        -5,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0])
Label: tensor([3, 4, 7, 2, 7, 1, 2, 1, 1, 7, 4, 2, 3, 5, 1, 2, 4, 4, 6, 3, 5, 5, 6, 0,
        4, 1, 9, 5, 7, 8, 9, 3])
Prediction: [3 4 7 2 7 1 2 1 1 7 4 2 3 5 1 2 4 4 6 3 5 5 6 0 4 1 9 5 7 8 9 3]
Difference: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0])
Label: tensor([7, 4, 6, 4, 3, 0, 7, 0, 2, 9, 1, 7, 3, 2, 9, 7, 7, 6, 2, 7, 8, 4, 7, 3,
        6, 1, 3, 6, 9, 3, 1, 4])
Prediction: [7 4 6 4 3 0 7 0 2 9 1 7 3 2 9 7 7 6 2 7 8 4 7 3 6 1 3 6 9 3 1 4]
Difference: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0])