In [1]:
from pathlib import Path
import requests
import torch
from torch import nn
import pickle
import gzip
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

In [2]:
DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"

PATH.mkdir(parents=True, exist_ok=True)

URL = "https://github.com/pytorch/tutorials/raw/main/_static/"
FILENAME = "mnist.pkl.gz"

if not (PATH / FILENAME).exists():
        content = requests.get(URL + FILENAME).content
        (PATH / FILENAME).open("wb").write(content)

In [3]:
with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
        ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1")

x_train, y_train, x_valid, y_valid = map(
    torch.tensor, (x_train, y_train, x_valid, y_valid)
)

In [4]:
x_train.shape, y_train.shape

(torch.Size([50000, 784]), torch.Size([50000]))

In [5]:
torch.unique(y_train, return_counts = True)

(tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 tensor([4932, 5678, 4968, 5101, 4859, 4506, 4951, 5175, 4842, 4988]))

# Create model

In [18]:
class MnistModel(nn.Module):
    def __init__(self, n_input):
        super().__init__()
        self.n_input = n_input
        self.linear = nn.Linear(n_input, 10)
        
    def forward(self, x):
        y_pred = self.linear(x)
        return y_pred

In [19]:
model = MnistModel(x_train.shape[1])

In [20]:
loss_func = nn.CrossEntropyLoss()

# Create data loader

In [21]:
class CustomDataset(Dataset):
    def __init__(self, x, y):
        super().__init__()
        self.x = x
        self.y = y
    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [22]:
train_ds = CustomDataset(x_train, y_train)
val_ds = CustomDataset(x_valid, y_valid)

In [23]:
batch_size = 64
train_dl = DataLoader(train_ds, batch_size = batch_size, shuffle = True)
val_dl = DataLoader(val_ds, batch_size = batch_size, shuffle = False)

In [24]:
len(train_dl)

782

# Training the model

In [25]:
epochs = 10

In [26]:
opt = optim.Adam(model.parameters(), lr = 0.001)

In [27]:
for epoch in range(epochs):
    model.train()
    for batch_idx, batch in enumerate(train_dl):
        x_batch, y_batch = batch
        y_pred = model(x_batch)
        loss = loss_func(y_pred, y_batch)

        opt.zero_grad()
        loss.backward()
        opt.step()
        if batch_idx % 100 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Step [{batch_idx}], Loss: {loss.item():.4f}")

Epoch [1/10], Step [0], Loss: 2.2693
Epoch [1/10], Step [100], Loss: 0.8869
Epoch [1/10], Step [200], Loss: 0.6337
Epoch [1/10], Step [300], Loss: 0.4262
Epoch [1/10], Step [400], Loss: 0.6604
Epoch [1/10], Step [500], Loss: 0.3494
Epoch [1/10], Step [600], Loss: 0.4586
Epoch [1/10], Step [700], Loss: 0.3766
Epoch [2/10], Step [0], Loss: 0.3575
Epoch [2/10], Step [100], Loss: 0.4947
Epoch [2/10], Step [200], Loss: 0.5417
Epoch [2/10], Step [300], Loss: 0.3679
Epoch [2/10], Step [400], Loss: 0.2961
Epoch [2/10], Step [500], Loss: 0.3609
Epoch [2/10], Step [600], Loss: 0.2279
Epoch [2/10], Step [700], Loss: 0.2842
Epoch [3/10], Step [0], Loss: 0.2507
Epoch [3/10], Step [100], Loss: 0.2179
Epoch [3/10], Step [200], Loss: 0.3120
Epoch [3/10], Step [300], Loss: 0.5109
Epoch [3/10], Step [400], Loss: 0.2243
Epoch [3/10], Step [500], Loss: 0.2775
Epoch [3/10], Step [600], Loss: 0.2882
Epoch [3/10], Step [700], Loss: 0.5316
Epoch [4/10], Step [0], Loss: 0.2537
Epoch [4/10], Step [100], Loss: 0