<a href="https://colab.research.google.com/github/subhashjprasad/machine-learning-projects/blob/main/MNISTClassifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Loading the Data

In [12]:
# visualization
import matplotlib.pyplot as plt

# actual imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, sampler
import torchvision.datasets as datasets
import torchvision.transforms as T

# loading MNIST data
batch_size = 32

mnist_train = datasets.MNIST('.', download = True, train = True, transform = T.ToTensor())
loader_train = DataLoader(mnist_train, batch_size=batch_size, sampler=sampler.SubsetRandomSampler(range(50000)))

mnist_val = datasets.MNIST('.', download = True, train = True, transform = T.ToTensor())
loader_val = DataLoader(mnist_val, batch_size=batch_size, sampler=sampler.SubsetRandomSampler(range(50000, 60000)))

# checking out the batches
batch = next(iter(loader_train))
print(batch[0].shape, batch[1].shape)

torch.Size([32, 1, 28, 28]) torch.Size([32])


Checking Device

In [13]:
USE_GPU = True

dtype = torch.float32

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda') # this is what should happen
else:
    device = torch.device('cpu')

print('using device:', device) # should show cuda

using device: cuda


Defining Necessary Functions

In [14]:
def check_accuracy(loader, model):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
    return acc

def train(model, optimizer, loader_train, loader_val, epochs=1, print_every=100):
    """
    Train a model on using the PyTorch Module API and prints model
    accuracies during training.

    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - loader_train: Dataloader for training
    - loader_val: Dataloader for evaluation
    - epochs: (Optional) A Python integer giving the number of epochs to train for
    - print_every: Number of iterations at which the accuracy of the model
      should be evaluated periodically

    Returns: Lists of validation accuracies at the end of each epoch.
    """
    loss_fn = nn.CrossEntropyLoss()
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    train_accs = []
    val_accs = []
    for e in range(epochs):
        print('-' * 128)
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = loss_fn(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each trainable parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()

            if t % print_every == 0:
                print('Iteration %d, loss = %.4f' % (t, loss.item()))
                check_accuracy(loader_val, model)
                print()
        val_accs.append(check_accuracy(loader_val, model))
    return val_accs


Creating the Model

In [35]:
# flatten function
def flatten(x):
    N = x.shape[0] # read in N, C, H, W
    return x.view(N, -1)  # "flatten" the C * H * W values into a single vector per image

# class to flatten the data
class Flatten(nn.Module):
    def forward(self, x):
        return flatten(x)

hidden_layer_size = 18 # number of nodes in the hidden layers

# stochastic gradient descent (with momentum) hyperparameters
learning_rate = 1e-2 # small number
momentum_factor = 0.9 # momentum factor

# adam hyperparameters
adam_learning_rate = 1e-3 # small number
beta1 = 0.9 # momentum factor
beta2 = 0.999 # RMSprop factor
adam_weight_decay = 0.0 # weight decay factor

# model definition
model = nn.Sequential(
    Flatten(),
    nn.Linear(1 * 28 * 28, 18), # 1 * 28 * 28 = 784
    nn.ReLU(),
    nn.Linear(18, 18),
    nn.ReLU(),
    nn.Linear(18, 10),
)

model = model.to(device) # making sure the model runs on GPU

Testing Input/Output Dimensions

In [32]:
x = torch.zeros((8, 1, 28, 28), device=device, dtype=dtype)  # minibatch size of 8
scores = model(x)
print(scores.size())  # you should see [8, 10]

torch.Size([8, 10])


Checking Size of Network

In [36]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
count_parameters(model) # number of trainable parameters

14662

Training Network

In [37]:
# optimizer = optim.SGD(model.parameters(), lr = learning_rate, momentum = momentum_factor) # stochastic gradient descent (with momentum)
optimizer = optim.Adam(model.parameters(), lr = adam_learning_rate, betas = (beta1, beta2), weight_decay = adam_weight_decay) # adam

train(model, optimizer, loader_train, loader_val, epochs=5, print_every=200) # should get to 95% accuracy

--------------------------------------------------------------------------------------------------------------------------------
Iteration 0, loss = 2.3193
Checking accuracy on validation set
Got 819 / 10000 correct (8.19)

Iteration 200, loss = 0.6471
Checking accuracy on validation set
Got 8446 / 10000 correct (84.46)

Iteration 400, loss = 0.5561
Checking accuracy on validation set
Got 8810 / 10000 correct (88.10)

Iteration 600, loss = 0.4854
Checking accuracy on validation set
Got 8923 / 10000 correct (89.23)

Iteration 800, loss = 0.2766
Checking accuracy on validation set
Got 9101 / 10000 correct (91.01)

Iteration 1000, loss = 0.1025
Checking accuracy on validation set
Got 9170 / 10000 correct (91.70)

Iteration 1200, loss = 0.2597
Checking accuracy on validation set
Got 9218 / 10000 correct (92.18)

Iteration 1400, loss = 0.4047
Checking accuracy on validation set
Got 9210 / 10000 correct (92.10)

Checking accuracy on validation set
Got 9224 / 10000 correct (92.24)
-----------

[0.9224, 0.9425, 0.9437, 0.9518, 0.9545]

Success!