In [1]:
import torch
import torchvision as tv

import pandas as pd
import numpy as np
import time

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
BATCH_SIZE = 256
NUM_EPOCHS = 10

In [3]:
transforms = tv.transforms.Compose([
    tv.transforms.Resize(32),
    tv.transforms.ToTensor()
])

train_dataset = tv.datasets.MNIST('.', train=True, transform=transforms, download=True)
test_dataset = tv.datasets.MNIST('.', train=False, transform=transforms, download=True)
train = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:01<00:00, 5.83MB/s]


Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 153kB/s]


Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:01<00:00, 1.45MB/s]


Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 3.68MB/s]

Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw






In [4]:
def model_training(num_epochs):
  for epoch in range(num_epochs):
    train_loss, train_acc, train_n = 0.0, 0.0, 0
    test_acc, test_n = torch.Tensor([0]), 0

    for X, y in train:
      trainer.zero_grad()
      y_pred = model(X)
      l = loss(y_pred, y)
      l.backward()
      trainer.step()
      train_loss += l.item()
      train_acc += (y_pred.argmax(axis=1) == y).sum().item()
      train_n += y.shape[0]

    for X, y in test:
      test_acc += (model(X).argmax(axis=1) == y).sum()
      test_n += y.shape[0]

    train_acc = train_acc / train_n
    test_acc = test_acc.item() / test_n

    print(f'epoch: {epoch}, train_loss: {train_loss}, train_acc: {train_acc}, test_acc: {test_acc}')

In [48]:
# Lenet

model = torch.nn.Sequential(
    torch.nn.Conv2d(1, 6, kernel_size=5, padding=0),
    torch.nn.Tanh(),
    torch.nn.AvgPool2d(2, stride=2),
    torch.nn.Conv2d(6, 16, kernel_size=5),
    torch.nn.Tanh(),
    torch.nn.AvgPool2d(2, stride=2),
    torch.nn.Conv2d(16, 120, kernel_size=5),
    torch.nn.Flatten(),
    torch.nn.Linear(120, 84),
    torch.nn.Tanh(),
    torch.nn.Linear(84, 10)
)

loss = torch.nn.CrossEntropyLoss()
trainer = torch.optim.SGD(model.parameters(), lr=0.1)
model_training(NUM_EPOCHS)

epoch: 0, train_loss: 227.33826522529125, train_acc: 0.7296333333333334, test_acc: 0.8873
epoch: 1, train_loss: 73.7262009754777, train_acc: 0.9081333333333333, test_acc: 0.9214
epoch: 2, train_loss: 54.580240946263075, train_acc: 0.9311, test_acc: 0.9402
epoch: 3, train_loss: 42.099421709775925, train_acc: 0.9467333333333333, test_acc: 0.9547
epoch: 4, train_loss: 33.396080400794744, train_acc: 0.9579833333333333, test_acc: 0.9646
epoch: 5, train_loss: 27.46368094161153, train_acc: 0.9656, test_acc: 0.9694
epoch: 6, train_loss: 23.407922545447946, train_acc: 0.9701666666666666, test_acc: 0.973
epoch: 7, train_loss: 20.528326246887445, train_acc: 0.9736333333333334, test_acc: 0.9753
epoch: 8, train_loss: 18.39118379401043, train_acc: 0.9763666666666667, test_acc: 0.9783
epoch: 9, train_loss: 16.738640897441655, train_acc: 0.97855, test_acc: 0.98


In [28]:
model = torch.nn.Sequential(
    torch.nn.Conv2d(1, 6, kernel_size=5),#, padding=1, stride=1),
    torch.nn.Tanh(),
    torch.nn.MaxPool2d(kernel_size=2),
    torch.nn.Conv2d(6, 16, kernel_size=5),
    torch.nn.Tanh(),
    torch.nn.MaxPool2d(kernel_size=2),

    torch.nn.Flatten(1),
    torch.nn.Linear(16*5*5, 120),
    torch.nn.Tanh(),
    torch.nn.Linear(120, 84),
    torch.nn.Tanh(),
    torch.nn.Linear(84, 10)
)

loss = torch.nn.CrossEntropyLoss()
trainer = torch.optim.SGD(model.parameters(), lr=0.1)
model_training(NUM_EPOCHS)

epoch: 0, train_loss: 241.24076376855373, train_acc: 0.7063, test_acc: 0.9087
epoch: 1, train_loss: 54.39765515178442, train_acc: 0.93535, test_acc: 0.9516
epoch: 2, train_loss: 33.193366879597306, train_acc: 0.9597833333333333, test_acc: 0.9673
epoch: 3, train_loss: 25.05830300692469, train_acc: 0.96885, test_acc: 0.9742
epoch: 4, train_loss: 20.67414717376232, train_acc: 0.9738833333333333, test_acc: 0.9777
epoch: 5, train_loss: 17.80203195847571, train_acc: 0.9770166666666666, test_acc: 0.9795
epoch: 6, train_loss: 15.721782105974853, train_acc: 0.9796166666666667, test_acc: 0.9814
epoch: 7, train_loss: 14.117115839151666, train_acc: 0.9818666666666667, test_acc: 0.9822
epoch: 8, train_loss: 12.824605531292036, train_acc: 0.98375, test_acc: 0.9828
epoch: 9, train_loss: 11.749177674995735, train_acc: 0.9852333333333333, test_acc: 0.9834


In [30]:
model = torch.nn.Sequential(
    torch.nn.Conv2d(1, 32, kernel_size=2),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=3),
    torch.nn.Conv2d(32, 64, kernel_size=2),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=2),
    torch.nn.Conv2d(64, 128, kernel_size=2),
    torch.nn.ReLU(),
    torch.nn.AdaptiveAvgPool2d((6, 6)),

    torch.nn.Flatten(1),
    torch.nn.Linear(128*6*6, 1024),
    torch.nn.Dropout(p=0.2),
    torch.nn.ReLU(),
    torch.nn.Linear(1024, 256),
    torch.nn.Dropout(p=0.1),
    torch.nn.ReLU(),
    torch.nn.Linear(256, 10)
)

loss = torch.nn.CrossEntropyLoss()
trainer = torch.optim.SGD(model.parameters(), lr=0.1)
model_training(NUM_EPOCHS)

epoch: 0, train_loss: 413.46580162644386, train_acc: 0.42113333333333336, test_acc: 0.8081
epoch: 1, train_loss: 77.39575636386871, train_acc: 0.89505, test_acc: 0.8728
epoch: 2, train_loss: 40.5428414568305, train_acc: 0.9459166666666666, test_acc: 0.9209
epoch: 3, train_loss: 29.198725285939872, train_acc: 0.96105, test_acc: 0.94
epoch: 4, train_loss: 22.802080584689975, train_acc: 0.9697, test_acc: 0.9674
epoch: 5, train_loss: 19.4442764329724, train_acc: 0.9746166666666667, test_acc: 0.9659
epoch: 6, train_loss: 16.576837942702696, train_acc: 0.9787666666666667, test_acc: 0.9512
epoch: 7, train_loss: 14.845557510619983, train_acc: 0.9801666666666666, test_acc: 0.9804
epoch: 8, train_loss: 12.976454479852691, train_acc: 0.9827166666666667, test_acc: 0.9786
epoch: 9, train_loss: 11.62627318536397, train_acc: 0.9850666666666666, test_acc: 0.9802
