In [47]:
from fastai.vision.utils import URLs, untar_data
from pathlib import Path
path = untar_data(URLs.MNIST)

In [48]:
(path / "training" / "0").ls()

(#5923) [Path('/Users/szlendak/.fastai/data/mnist_png/training/0/16585.png'),Path('/Users/szlendak/.fastai/data/mnist_png/training/0/24537.png'),Path('/Users/szlendak/.fastai/data/mnist_png/training/0/25629.png'),Path('/Users/szlendak/.fastai/data/mnist_png/training/0/20751.png'),Path('/Users/szlendak/.fastai/data/mnist_png/training/0/34730.png'),Path('/Users/szlendak/.fastai/data/mnist_png/training/0/15926.png'),Path('/Users/szlendak/.fastai/data/mnist_png/training/0/22152.png'),Path('/Users/szlendak/.fastai/data/mnist_png/training/0/35348.png'),Path('/Users/szlendak/.fastai/data/mnist_png/training/0/22634.png'),Path('/Users/szlendak/.fastai/data/mnist_png/training/0/6819.png'),Path('/Users/szlendak/.fastai/data/mnist_png/training/0/32427.png'),Path('/Users/szlendak/.fastai/data/mnist_png/training/0/24245.png'),Path('/Users/szlendak/.fastai/data/mnist_png/training/0/33739.png'),Path('/Users/szlendak/.fastai/data/mnist_png/training/0/44599.png'),Path('/Users/szlendak/.fastai/data/mnist

In [49]:
# Let's explore one example
from fastai.vision.data import Image

file = Image.open(Path('/Users/szlendak/.fastai/data/mnist_png/training/9/36655.png'))
file.shape

(28, 28)

In [50]:
import numpy as np
import torch
from typing import Literal

def get_tensor_list(i: int, dataset: Literal["training", "testing"]) -> list[torch.Tensor]:
    digit_path = Path(path / dataset / str(i))
    return [torch.tensor(np.array(Image.open(img_path))).view(-1, 28 * 28).squeeze(0)/255 for img_path in digit_path.ls()]

data_dict_train = {i: get_tensor_list(i, "training") for i in range(10)}
data_dict_test = {i: get_tensor_list(i, "testing") for i in range(10)}

In [51]:
train_ds_list = [(tensor, label) for label, category in data_dict_train.items() for tensor in category]
test_ds_list = [(tensor, label) for label, category in data_dict_test.items() for tensor in category]

In [52]:
train_ds_list[0][0].shape

torch.Size([784])

In [53]:
from fastai.data.core import DataLoader
from fastai.data.load import first

dl = DataLoader(train_ds_list, batch_size=256)
valid_dl = DataLoader(test_ds_list, batch_size=256)
x_batch, yb = first(dl)

x_batch.shape, yb.shape

(torch.Size([256, 784]), torch.Size([256]))

In [54]:
weights = torch.randn(784, 10) / 784
weights.requires_grad_()
bias = torch.randn(10) / 10
bias.requires_grad_()

tensor([ 0.0460,  0.0283, -0.0832,  0.1161, -0.0179, -0.1159, -0.0220,  0.1430,
         0.0541, -0.0737], requires_grad=True)

In [55]:
from torch import logsumexp

def loss_func(preds: torch.Tensor, targets: torch.Tensor):
    loss = logsumexp(preds, dim=1, keepdim=True) - preds
    return loss[:, targets].mean()


In [56]:
def calc_grad(x_batch, y_batch, model):
    preds = model(x_batch)
    loss = loss_func(preds, y_batch)
    loss.backward()

In [57]:
def train_one_epoch(model, lr, params: torch.Tensor, dl: DataLoader):
    for batch, label in dl:
        calc_grad(batch, label, model)
        for param in params:
            param.data -= lr * param.grad
            param.grad.data.zero_()

In [58]:
def batch_accuracy(preds_batch: torch.Tensor, yb: torch.Tensor):
    correct = torch.argmax(preds_batch, 1) == yb
    return correct.float().mean()

In [59]:
def forward(x_batch: torch.Tensor):
    return x_batch @ weights + bias

In [60]:
def validate_epoch(model, dl):
    accs = [batch_accuracy(model(xb), yb) for xb,yb in dl]
    return round(torch.stack(accs).mean().item(), 4)

In [61]:
validate_epoch(forward, valid_dl)

0.1115

In [62]:
lr = 0.04
params = [weights, bias]
train_one_epoch(forward, lr, params, dl)
validate_epoch(forward, valid_dl)

0.122

In [63]:
for i in range(40):
    train_one_epoch(forward, lr, params, dl)
    print(validate_epoch(forward, valid_dl))

0.208
0.3567
0.4359
0.4838
0.5238
0.5494
0.5715
0.5887
0.6024
0.6145
0.6261
0.6359
0.6437
0.6516
0.6587
0.6637
0.6683
0.6732
0.6782
0.6826
0.6865
0.6906
0.6939
0.6978
0.7009
0.7045
0.7076
0.7104
0.7123
0.7141
0.7168
0.7187
0.7204
0.7219
0.7247
0.7266
0.7289
0.7297
0.7312
0.7325


In [168]:
from torch import nn

model = nn.Sequential(
    nn.Linear(28*28, 28*28*3),
    nn.ReLU(),
    nn.Linear(28*28*3, 28*28*3),
    nn.ReLU(),
    nn.Linear(28*28*3, 10),
)

optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

loss = nn.CrossEntropyLoss()

In [169]:
# Training loop with proper PyTorch pattern
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def train_epoch(model: nn.Sequential, optimizer: torch.optim.Optimizer, loss_func: callable, dataloader: DataLoader):
    for batch_x, batch_y in dataloader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        optimizer.zero_grad()
        preds = model(batch_x)
        loss = loss_func(preds, batch_y)
        loss.backward()
        optimizer.step()
    print(validate_epoch(model, valid_dl))

In [170]:
for _ in range(50):
    train_epoch(model, optimizer, loss, dl)

0.122
0.122
0.1349
0.1782
0.2086
0.2535
0.2948
0.3043


KeyboardInterrupt: 