In [117]:
from fastai.vision.utils import URLs, untar_data
from pathlib import Path
path = untar_data(URLs.MNIST)

In [118]:
(path / "training" / "0").ls()

(#5923) [Path('/root/.fastai/data/mnist_png/training/0/639.png'),Path('/root/.fastai/data/mnist_png/training/0/39850.png'),Path('/root/.fastai/data/mnist_png/training/0/49171.png'),Path('/root/.fastai/data/mnist_png/training/0/21614.png'),Path('/root/.fastai/data/mnist_png/training/0/40820.png'),Path('/root/.fastai/data/mnist_png/training/0/11792.png'),Path('/root/.fastai/data/mnist_png/training/0/8187.png'),Path('/root/.fastai/data/mnist_png/training/0/38614.png'),Path('/root/.fastai/data/mnist_png/training/0/23250.png'),Path('/root/.fastai/data/mnist_png/training/0/1590.png'),Path('/root/.fastai/data/mnist_png/training/0/12741.png'),Path('/root/.fastai/data/mnist_png/training/0/20225.png'),Path('/root/.fastai/data/mnist_png/training/0/30087.png'),Path('/root/.fastai/data/mnist_png/training/0/4002.png'),Path('/root/.fastai/data/mnist_png/training/0/59313.png'),Path('/root/.fastai/data/mnist_png/training/0/19859.png'),Path('/root/.fastai/data/mnist_png/training/0/41051.png'),Path('/roo

In [173]:
# Let's explore one example
from fastai.vision.data import Image

file = Image.open(Path('/Users/szlendak/.fastai/data/mnist_png/training/9/36655.png'))
file.shape

(28, 28)

In [121]:
import numpy as np
import torch
from typing import Literal
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def get_tensor_list(i: int, dataset: Literal["training", "testing"]) -> list[torch.Tensor]:
    digit_path = Path(path / dataset / str(i))
    return [torch.tensor(np.array(Image.open(img_path))).view(-1, 28 * 28).squeeze(0)/255 for img_path in digit_path.ls()]

data_dict_train = {i: get_tensor_list(i, "training") for i in range(10)}
data_dict_test = {i: get_tensor_list(i, "testing") for i in range(10)}

In [122]:
train_ds_list = [(tensor, label) for label, category in data_dict_train.items() for tensor in category]
test_ds_list = [(tensor, label) for label, category in data_dict_test.items() for tensor in category]

In [123]:
train_ds_list[0][0].shape

torch.Size([784])

In [124]:
from fastai.data.core import DataLoader
from fastai.data.load import first

dl = DataLoader(train_ds_list, batch_size=1024, device=device)
valid_dl = DataLoader(test_ds_list, batch_size=1024, device=device)
x_batch, yb = first(dl)

x_batch.shape, yb.shape

(torch.Size([1024, 784]), torch.Size([1024]))

In [125]:
weights = torch.randn(784, 10) / 784
weights = weights.to(device)
weights.requires_grad_()
bias = torch.randn(10) / 10
bias = bias.to(device)
bias.requires_grad_()

tensor([ 0.1695,  0.1674,  0.1208, -0.1730,  0.1183, -0.0726,  0.0079, -0.1693,
         0.0017, -0.0921], device='cuda:0', requires_grad=True)

In [126]:
from torch import logsumexp

def loss_func(preds: torch.Tensor, targets: torch.Tensor):
    loss = logsumexp(preds, dim=1, keepdim=True) - preds
    return loss[:, targets].mean()


In [127]:
def calc_grad(x_batch, y_batch, model):
    preds = model(x_batch)
    loss = loss_func(preds, y_batch)
    loss.backward()

In [128]:
def train_one_epoch(model, lr, params: torch.Tensor, dl: DataLoader):
    for batch, label in dl:
        calc_grad(batch, label, model)
        for param in params:
            param.data -= lr * param.grad
            param.grad.data.zero_()

In [129]:
def batch_accuracy(preds_batch: torch.Tensor, yb: torch.Tensor):
    correct = torch.argmax(preds_batch, 1) == yb
    return correct.float().mean()

In [130]:
def forward(x_batch: torch.Tensor):
    return x_batch @ weights + bias

In [131]:
def validate_epoch(model, dl):
    accs = [batch_accuracy(model(xb), yb) for xb,yb in dl]
    return round(torch.stack(accs).mean().item(), 4)

In [132]:
validate_epoch(forward, valid_dl)

0.1163

In [133]:
lr = 0.04
params = [weights, bias]
train_one_epoch(forward, lr, params, dl)
validate_epoch(forward, valid_dl)

0.1221

In [134]:
for i in range(40):
    train_one_epoch(forward, lr, params, dl)
    print(validate_epoch(forward, valid_dl))

0.2669
0.424
0.5147
0.5722
0.6154
0.648
0.6738
0.694
0.7103
0.7246
0.7397
0.7504
0.7611
0.7698
0.7778
0.7846
0.791
0.7964
0.8018
0.8056
0.8094
0.8127
0.8161
0.8187
0.8216
0.824
0.8266
0.8279
0.8295
0.8318
0.8336
0.836
0.8373
0.8379
0.8394
0.8407
0.842
0.8437
0.8449
0.8461


In [135]:
from torch import nn
model = nn.Sequential(
    nn.Linear(28*28, 28*28*3),
    nn.ReLU(),
    nn.Linear(28*28*3, 28*28*3),
    nn.ReLU(),
    nn.Linear(28*28*3, 10),
)
if device.type == "cuda":
    model = model.cuda()

optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

loss = nn.CrossEntropyLoss()

In [136]:
# Training loop with proper PyTorch pattern
def train_epoch(model: nn.Sequential, optimizer: torch.optim.Optimizer, loss_func: callable, dataloader: DataLoader):
    for batch_x, batch_y in dataloader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        optimizer.zero_grad()
        preds = model(batch_x)
        loss = loss_func(preds, batch_y)
        loss.backward()
        optimizer.step()
    print(validate_epoch(model, valid_dl))

In [140]:
for _ in range(50):
    train_epoch(model, optimizer, loss, dl)

0.9758
0.976
0.9761
0.9763
0.9767
0.9769
0.9769
0.9769
0.9769
0.9768
0.9771
0.9772
0.9775
0.9774
0.9775
0.9777
0.9779
0.9779
0.9779
0.9779
0.978
0.978
0.9781
0.9781
0.9782
0.9784
0.9784
0.9784
0.9784
0.9784
0.9789
0.9789
0.9789
0.9788
0.9788
0.9787
0.9789
0.9789
0.9787
0.9787
0.9787
0.9787
0.9788
0.9789
0.9789
0.9791
0.9791
0.9791
0.9792
0.9794
