## Classificaiton of MNIST [pytorch]

In [1]:
import numpy as np

### Data

- Train images: t10k-images-idx3-ubyte.gz
- Train labels: t10k-labels-idx1-ubyte.gz
- Test images: train-images-idx3-ubyte.gz
- Test labels: train-labels-idx1-ubyte.gz

In [48]:
import os
import gzip

def load_mnist_images(data_dir, filename):
    data_path = os.path.join(data_dir, filename)
    with gzip.open(data_path, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=16)
    return data.reshape(-1, 28, 28)

def load_mnist_labels(data_dir, filename):
    data_path = os.path.join(data_dir, filename)
    with gzip.open(data_path, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=8)
    return data

data_dir = "/mnt/d/datasets/fashion_mnist_29M/"
x_train = load_mnist_images(data_dir, "train-images-idx3-ubyte.gz")
y_train = load_mnist_labels(data_dir, "train-labels-idx1-ubyte.gz")
x_test = load_mnist_images(data_dir, "t10k-images-idx3-ubyte.gz")
y_test = load_mnist_labels(data_dir, "t10k-labels-idx1-ubyte.gz")

print(f">> Train images: {x_train.shape}, {x_train.dtype}")
print(f">> Train labels: {y_train.shape}, {y_train.dtype}")
print(f">> Test images:  {x_test.shape}, {x_test.dtype}")
print(f">> Test labels:  {y_test.shape}, {y_test.dtype}")

>> Train images: (60000, 28, 28), uint8
>> Train labels: (60000,), uint8
>> Test images:  (10000, 28, 28), uint8
>> Test labels:  (10000,), uint8


### Preprocessing

In [49]:
x_train_scaled = x_train.astype(np.float32).reshape(-1, 28*28) / 255
x_test_scaled = x_test.astype(np.float32).reshape(-1, 28*28) / 255

y_train = y_train.astype(np.int64)
y_test = y_test.astype(np.int64)

print(f">> Train images: {x_train_scaled.shape}, {x_train_scaled.dtype}")
print(f">> Train labels: {y_train.shape}, {y_train.dtype}")
print(f">> Test images:  {x_test_scaled.shape}, {x_test_scaled.dtype}")
print(f">> Test labels:  {y_test.shape}, {y_test.dtype}")

>> Train images: (60000, 784), float32
>> Train labels: (60000,), int64
>> Test images:  (10000, 784), float32
>> Test labels:  (10000,), int64


### Modeling

In [50]:
import torch
import torch.nn as nn
import torch.optim as optim

## Model: 2-layer MLP
torch.manual_seed(42)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_size, hidden_size, output_size = 28*28, 256, 10

mlp = nn.Sequential(
    nn.Linear(input_size, hidden_size),
    nn.Sigmoid(),
    nn.Linear(hidden_size, output_size),
)

### Training

In [51]:
def accuracy(y_pred, y_true):
    y_pred = y_pred.argmax(dim=1)
    return torch.eq(y_pred, y_true).float().mean()

In [52]:
n_epochs = 10
learning_rate = 0.01

model = mlp.to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

x_train = torch.tensor(x_train_scaled).float().to(device)
y_train = torch.tensor(y_train).long().to(device)

batch_size = 32
for epoch in range(1, n_epochs + 1):
    batch_loss = 0
    batch_acc = 0
    indices = torch.randperm(len(x_train))
    for i in range(len(x_train) // batch_size):
        x = x_train[indices[i*batch_size: (i+1)*batch_size]]
        y = y_train[indices[i*batch_size: (i+1)*batch_size]]
        
        # Forward propagation
        out = model(x)

        loss = loss_fn(out, y)
        acc = accuracy(out, y)

        # Backward propagation
        loss.backward()
        
        # Update weights and biases
        optimizer.step()
        optimizer.zero_grad()
        
        batch_loss += loss.item()
        batch_acc += acc.item()

    if epoch % (n_epochs // 10) == 0:
        print(f"[{epoch}/{n_epochs}] loss: {batch_loss/(i+1):.3f} acc: {batch_acc/(i+1):.3f}")

[1/10] loss: 1.557 acc: 0.571
[2/10] loss: 0.881 acc: 0.723
[3/10] loss: 0.719 acc: 0.749
[4/10] loss: 0.649 acc: 0.768
[5/10] loss: 0.605 acc: 0.784
[6/10] loss: 0.572 acc: 0.797
[7/10] loss: 0.547 acc: 0.807
[8/10] loss: 0.528 acc: 0.814
[9/10] loss: 0.513 acc: 0.820
[10/10] loss: 0.500 acc: 0.825


In [53]:
x_test = torch.tensor(x_test_scaled).float().to(device)
y_test = torch.tensor(y_test).long().to(device)

with torch.no_grad():
    out = model(x_test)

    loss = loss_fn(out, y_test)
    acc = accuracy(out, y_test)

print(f"loss: {loss.item():.3f} acc: {acc.item():.3f}")

loss: 0.519 acc: 0.814
