<a href="https://colab.research.google.com/github/rsfwalters/NEU-OB-MLP/blob/main/Digit_Classification_with_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Dataset

In [2]:
%%capture
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor

# download dataset
train_data = MNIST(root='data', train=True, transform=ToTensor(), download=True)
test_data = MNIST(root='data', train=False, transform=ToTensor(), download=True)

In [None]:
# print stats about datasets
print(train_data[0])
print(test_data)

Model

In [24]:
from torch import nn

NUM_CLASSES = 10

model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(28*28, 256),
    nn.ReLU(True),
    nn.Linear(256, 256),
    nn.ReLU(True),
    nn.Linear(256, 10),
).cuda()

Train

In [25]:
import time
import torch
from torch.nn.modules.container import ModuleList
from torch.utils.data import DataLoader

# list hyperparameters
BATCH_SIZE = 256
NUM_EPOCHS = 50
LEARNING_RATE = 0.001

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

# create dataloaders
train_dataloader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=True)

for epoch in range(NUM_EPOCHS):
  t = time.perf_counter()

  train_losses = []
  for imgs, labels in train_dataloader:
    imgs, labels = imgs.cuda(), labels.cuda()

    optimizer.zero_grad()
    pred_labels = model(imgs)
    loss = nn.CrossEntropyLoss()(pred_labels, labels)
    loss.backward()
    optimizer.step()
    
    train_losses.append(loss.item())
  
  test_losses = []
  for imgs, labels in test_dataloader:
    imgs, labels = imgs.cuda(), labels.cuda()

    pred_labels = model(imgs)
    test_losses.append(loss.item())
  
  elapsed_time = time.perf_counter() - t
  avg_train_loss = sum(train_losses) / len(train_losses)
  avg_test_loss = sum(test_losses) / len(test_losses)
  print('Epoch {}/{}: TRAIN LOSS={:.4f} | TEST LOSS={:.4f} | time={:.1f} s'.format(
       epoch, NUM_EPOCHS, avg_train_loss, avg_test_loss, elapsed_time)
  )

Epoch 0/50: TRAIN LOSS=0.4080 | TEST LOSS=0.1596 | time=5.6 s
Epoch 1/50: TRAIN LOSS=0.1499 | TEST LOSS=0.1842 | time=5.8 s
Epoch 2/50: TRAIN LOSS=0.1000 | TEST LOSS=0.0471 | time=6.4 s
Epoch 3/50: TRAIN LOSS=0.0729 | TEST LOSS=0.1370 | time=6.5 s
Epoch 4/50: TRAIN LOSS=0.0562 | TEST LOSS=0.0944 | time=5.5 s
Epoch 5/50: TRAIN LOSS=0.0448 | TEST LOSS=0.0484 | time=5.5 s
Epoch 6/50: TRAIN LOSS=0.0356 | TEST LOSS=0.0121 | time=5.7 s
Epoch 7/50: TRAIN LOSS=0.0305 | TEST LOSS=0.0873 | time=5.6 s
Epoch 8/50: TRAIN LOSS=0.0239 | TEST LOSS=0.0090 | time=5.7 s
Epoch 9/50: TRAIN LOSS=0.0173 | TEST LOSS=0.0940 | time=6.1 s
Epoch 10/50: TRAIN LOSS=0.0160 | TEST LOSS=0.0130 | time=5.8 s
Epoch 11/50: TRAIN LOSS=0.0110 | TEST LOSS=0.0012 | time=5.5 s
Epoch 12/50: TRAIN LOSS=0.0088 | TEST LOSS=0.0297 | time=5.5 s
Epoch 13/50: TRAIN LOSS=0.0084 | TEST LOSS=0.0003 | time=5.5 s
Epoch 14/50: TRAIN LOSS=0.0084 | TEST LOSS=0.0024 | time=5.5 s
Epoch 15/50: TRAIN LOSS=0.0101 | TEST LOSS=0.0018 | time=5.5 s
Ep

In [None]:
# plot learning curves

Evaluate

In [None]:
# plot confusion matrix
# plot incorrect predictions