# PyTorch MNIST Classifier

In this notebook I will implement a deep convolutional neural network to classify handwritten digits from the MNIST dataset.

Import all the necessary packages:

In [49]:
import torch
import torchvision
from torch.utils.data import DataLoader, Subset
from torch import nn
import numpy as np
import matplotlib.pyplot as plt

In [50]:
device = torch.device(
    'cuda' 
    if torch.cuda.is_available() 
    else 'cpu'
)

Define the validation, train, and test subsets:

In [51]:
image_path = './'
transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])

mnist_dataset = torchvision.datasets.MNIST(
    root=image_path,
    train=True,
    transform=transform,
    download=False
)

mnist_dataset.data.to(device=device)
mnist_dataset.targets.to(device=device)

mnist_valid_dataset = Subset(
    mnist_dataset,
    torch.arange(10_000)
)

mnist_train_dataset = Subset(
    mnist_dataset,
    torch.arange(
        10_000, len(mnist_dataset) 
    )
)

mnist_test_dataset = torchvision.datasets.MNIST(
    root=image_path,
    train=False,
    transform=transform,
    download=False
)

Set up data loaders:

In [52]:
batch_size = 64
torch.manual_seed(1)

train_dl = DataLoader(
    mnist_train_dataset,
    batch_size,
    shuffle=True
)

valid_dl = DataLoader(
    mnist_valid_dataset,
    batch_size,
    shuffle=False
)

In [53]:
my_net = nn.Sequential(
    nn.Conv2d(
        in_channels=1, out_channels=32,
        kernel_size=5, padding=2
    ),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2),
    nn.Conv2d(
        in_channels=32, out_channels=64,
        kernel_size=5, padding=2
    ),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2),
    nn.Flatten(),
    nn.Linear(3136, 1024),
    nn.ReLU(),
    nn.Dropout(p=0.5),
    nn.Linear(1024, 10)
)

In [54]:
my_net.cuda()

Sequential(
  (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (1): ReLU()
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (4): ReLU()
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Flatten(start_dim=1, end_dim=-1)
  (7): Linear(in_features=3136, out_features=1024, bias=True)
  (8): ReLU()
  (9): Dropout(p=0.5, inplace=False)
  (10): Linear(in_features=1024, out_features=10, bias=True)
)

In [55]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(my_net.parameters(), lr=1e-3)

In [56]:
print(device)
for x_batch, _ in train_dl:
    print(x_batch.device)
    x_batch.to('cuda')
    print(x_batch.device)

cuda
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu
cpu

In [57]:
def train(model, n_epochs, train_dl, valid_dl):
    loss_history_train = [0] * n_epochs
    loss_history_valid = [0] * n_epochs
    accuracy_history_train = [0] * n_epochs
    accuracy_history_valid = [0] * n_epochs

    for epoch in range(n_epochs):
        model.train()

        for x_batch, y_batch in train_dl:
            x_batch.to(device)
            y_batch.to(device)
            print(x_batch.device, y_batch.device)


            optimizer.zero_grad()
            prediction = model(x_batch)
            loss_value = loss_fn(prediction, y_batch)
            loss_value.backward()
            optimizer.step()

            is_correct = (
                torch.argmax(prediction, dim=1) == y_batch
            ).float()

            accuracy_history_train[epoch] += is_correct.sum()
        
        loss_history_train[epoch] /= len(train_dl.dataset)
        accuracy_history_train[epoch] /= len(train_dl.dataset)

        model.eval()

        with torch.no_grad():
            for x_batch, y_batch in valid_dl:
                prediction = model(x_batch)
                loss_value = loss_fn(prediction, y_batch)

                loss_history_valid[epoch] += loss_value.item() * y_batch.size(0)

                is_correct = (
                    torch.argmax(prediction, dim=1) == y_batch
                ).float()

                accuracy_history_valid[epoch] += is_correct.sum()
        loss_history_valid[epoch] /= len(valid_dl.dataset)
        accuracy_history_valid[epoch] /= len(valid_dl.dataset)

        print(
            f'Epoch {epoch + 1} '
            f'train accuracy: {accuracy_history_train[epoch]:.3f}'
            f'validation accuracy: {accuracy_history_valid[epoch]:.3f}'
        )

    return loss_history_train, loss_history_valid,\
           accuracy_history_train, accuracy_history_valid 

In [58]:
n_epochs = 25
hist = train(my_net, n_epochs, train_dl, valid_dl)

cpu cpu


RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor