In [1]:
import os
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms

from fastai.vision.all import *

In [44]:
# This patch is to display epoch results while using Visual Studio Code
from IPython.display import clear_output, DisplayHandle
def update_patch(self, obj):
    clear_output(wait=True)
    self.display(obj)
DisplayHandle.update = update_patch

# Defining Model
The `nn.Module`, which allows layers to be stacked to form a network, is the most commonly used approach for building a Neural Network in PyTorch. We now have more control over the forward pass.

The Linear layer, also known as a fully connected layer or dense layer, is best represented by $f(wx + b)$, where $x$ represents a tensor containing the input features, $w$ and $b$ are the weight matrix and bias vector, respectively, and $f$ is the activation function. Because each layer in a NN receives input from the previous layer, its dimensionality is fixed. Typically, we only need to consider output dimensionality when designing a NN architecture.

In this case, we'd like to define a model with two hidden layers. The first takes $784$ features as input and projects them to $25$ neurons. Because we have $10$ class labels, the second layer receives the output of the previous layer (which has a size of $25$) and projects it to three $10$ output neurons.

initializing model parameters with random weights is necessary to break the symmetry during backpropagation—otherwise, a multilayer NN would be no more useful than a single-layer NN like logistic regression. When creating a PyTorch tensor, we can also use a random initialization scheme. `nn.init.xavier_normal_` and `nn.init.xavier_uniform_` are such two initialization methods. You can find many other initialization techniques in the `nn.init` module.

In [45]:
# a simple classifier
class NN(nn.Module):
    def __init__(self, in_features, num_classes):
        super(NN, self).__init__()
        self.fc1 = nn.Linear(in_features, 25)
        nn.init.xavier_normal_(self.fc1.weight)
        self.fc2 = nn.Linear(25, num_classes)
        nn.init.xavier_normal_(self.fc2.weight)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

To accelerate operations in the neural network, we move it to the GPU if available.

In [46]:
# sets device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# initializes and sends the model to appropriate GPU/CPU
model = NN(784, 10).to(device)

# Data Loader & Preprocessing
PyTorch offers domain-specific libraries such as TorchText, TorchVision, and TorchAudio, all of which include datasets. All of these are subclasses of `torch.utils.data.Dataset` and, therefore, can be used in `torch.utils.data.DataLoader` class. Find more about torchvision datasets at [here](https://pytorch.org/vision/0.8/datasets.html).

In [47]:
batch_size = 64

# load data
train_data = datasets.MNIST(
    root='./res/datasets',
    train=True,
    transform=transforms.ToTensor(),
    download=True
)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

test_data = datasets.MNIST(
    root='./res/datasets',
    train=False,
    transform=transforms.ToTensor(),
    download=True
)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [48]:
# Checks dimensions of each minibatch
x, y = next(iter(train_loader))
print(x.size())
print(y.size())

torch.Size([64, 1, 28, 28])
torch.Size([64])


In [49]:
# Checks how the target labels are encoded
y[0]

tensor(9)

# Hyperparameters & Optimizers

In [50]:
# sets hyperparameters
in_features = 784
num_classes = 10
learning_rate = 0.01
num_epochs = 5

To train a model, we need a loss function and an optimizer.

In [51]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [52]:
loss_per_epoch = [0] * num_epochs
acc = [0] * num_epochs

# Training

In [53]:
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        # Moves the data to the GPU/CPU
        data = data.to(device)
        # Converts the 2D image into a 1-D vector
        data = data.reshape(data.shape[0], -1)
        # Moves the target labels to GPU/CPU
        targets = targets.to(device)

        # Make predictions with the current parameters
        scores = model(data)
        # Calculates loss of the current minibatch
        loss = criterion(scores, targets)

        # Resets the gradients to zero
        optimizer.zero_grad()
        # Compute gradients of loss function with respect to parameters
        loss.backward()
        # Updates parameters
        optimizer.step()

        loss_per_epoch[epoch] += loss.item() * data.size(0)
        correct = (torch.argmax(scores, dim=1) == targets).float()
        acc[epoch] += correct.mean()

    loss_per_epoch[epoch] /= len(train_loader.dataset)
    acc[epoch] /= (len(train_loader.dataset) / batch_size)
    print('Epoch {}| Loss {} | Accuracy {}'.format(epoch, loss_per_epoch[epoch], acc[epoch]))


Epoch 0| Loss 0.28610282474756243 | Accuracy 0.9149500131607056
Epoch 1| Loss 0.19095433277487756 | Accuracy 0.9427833557128906
Epoch 2| Loss 0.1652491458406051 | Accuracy 0.9521166682243347
Epoch 3| Loss 0.15396445863743624 | Accuracy 0.9553833603858948
Epoch 4| Loss 0.13811732213596503 | Accuracy 0.9598667025566101


# Saving & Loading Models
Trained models can be saved to disk and reused in the future. When you call `save(model)`, **you are saving both the model architecture and all of the learned parameters**. As a standard practice, we can save models with the 'pt' or 'pth' file extensions.

In [54]:
if 'models' not in os.listdir():
    os.mkdir('models')
    print('models directory created!')
else:
    print('models directory already exists!')

models directory already exists!


In [55]:
torch.save(model, 'models/ann.pth')

In [56]:
model = torch.load('models/ann.pth')
model.eval()

NN(
  (fc1): Linear(in_features=784, out_features=25, bias=True)
  (fc2): Linear(in_features=25, out_features=10, bias=True)
)

Instead, if you wanted, you could also save just the parameters, not the architecture.

In [57]:
torch.save(model.state_dict(), 'models/ann_state.pth')

In [58]:
model = NN(784, 10)
model.load_state_dict(torch.load('models/ann_state.pth'))
model = model.to(device)

# Model Evaluation

In [59]:
def check_accuracy(loader, model):
    correct = 0
    total = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.reshape(x.shape[0], -1)
            x = x.to(device)
            y = y.to(device)

            scores = model(x)
            _, predictions = scores.max(1)
            correct += (predictions == y).sum()
            total += predictions.size(0)
        model.train()
        print('Accuracy: ', correct/total)

In [60]:
check_accuracy(test_loader, model)

Accuracy:  tensor(0.9522, device='cuda:0')


# Fast.AI
The `MNIST SAMPLE` dataset from fastai is a smaller version of the actual `MNIST Digits` dataset, and it contains images of only $5$ and $7$.

In [61]:
path = untar_data(URLs.MNIST_SAMPLE)
for dir in path.ls():
    print(dir)

C:\Users\musab\.fastai\data\mnist_sample\labels.csv
C:\Users\musab\.fastai\data\mnist_sample\train
C:\Users\musab\.fastai\data\mnist_sample\valid


In [62]:
print((path/'train').ls())
print((path/'valid').ls())

[Path('C:/Users/musab/.fastai/data/mnist_sample/train/3'), Path('C:/Users/musab/.fastai/data/mnist_sample/train/7')]
[Path('C:/Users/musab/.fastai/data/mnist_sample/valid/3'), Path('C:/Users/musab/.fastai/data/mnist_sample/valid/7')]


In [63]:
train_x = torch.cat((
    torch.stack([tensor(Image.open(f)).reshape(-1) for f in (path/'train'/'3').ls().sorted()]) / 255.0,
    torch.stack([tensor(Image.open(f)).reshape(-1) for f in (path/'train'/'7').ls().sorted()]) / 255.0
), dim=0)
print('Training Dataset: ', train_x.shape)

valid_x = torch.cat((
    torch.stack([tensor(Image.open(f)).reshape(-1) for f in (path/'valid'/'3').ls().sorted()]) / 255.0,
    torch.stack([tensor(Image.open(f)).reshape(-1) for f in (path/'valid'/'7').ls().sorted()]) / 255.0
), dim=0)
print('Validation Dataset: ', valid_x.shape)

Training Dataset:  torch.Size([12396, 784])
Validation Dataset:  torch.Size([2038, 784])


In [64]:
train_y = tensor([3] * len((path/'train'/'3').ls()) + [7] * len((path/'train'/'7').ls()))
print('Train Dataset: ', train_y.shape)
valid_y = tensor([3] * len((path/'valid'/'3').ls()) + [7] * len((path/'valid'/'7').ls()))
print('Validation Dataset: ', valid_y.shape)

Train Dataset:  torch.Size([12396])
Validation Dataset:  torch.Size([2038])


In [65]:
train_loader = DataLoader(L(zip(train_x, train_y)), batch_size=32, shuffle=True)
valid_loader = DataLoader(L(zip(valid_x, valid_y)), batch_size=32, shuffle=True)
dls = DataLoaders(train_loader, valid_loader)

In [66]:
learner = Learner(dls, NN(784, 10).to(device), opt_func=Adam, loss_func=nn.functional.cross_entropy, metrics=accuracy)
learner.fit(5, lr=0.01)

epoch,train_loss,valid_loss,accuracy,time
0,0.040441,0.03743,0.986752,00:01
1,0.028806,0.050326,0.98528,00:01
2,0.009235,0.044813,0.989205,00:01
3,0.009766,0.032387,0.991168,00:01
4,0.011788,0.03877,0.991659,00:01
