In [18]:
from torch import nn
from torch.optim import Adam
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
import torch

In [21]:
# turn on the gpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Train and Test data
train = datasets.FashionMNIST(root="data", download=True, train=True,
                              transform=ToTensor())
test = datasets.FashionMNIST(root="data", download=True, train=False,
                             transform=ToTensor())

# Data Loader
train_loader = DataLoader(train, batch_size=32, shuffle=True)
test_loader = DataLoader(test, batch_size=32)

100%|██████████| 26.4M/26.4M [00:02<00:00, 11.2MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 171kB/s]
100%|██████████| 4.42M/4.42M [00:01<00:00, 3.21MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 24.7MB/s]


In [22]:
image, label = train[0]
print(image.shape, label)

torch.Size([1, 28, 28]) 9


In [23]:
# CNN
class ImageClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        # feature extraction layers
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),   # 28 → 14

            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),   # 14 → 7
        )
        #classification layers
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 7 * 7, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.features(x)
        return self.classifier(x)

# Model setup
clf = ImageClassifier().to(device)
opt = Adam(clf.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()

In [24]:
for epoch in range(10):
    clf.train()
    train_correct = 0
    train_total = 0

    # Train
    for X, y in train_loader:
        X, y = X.to(device), y.to(device)

        # loss computation
        yhat = clf(X)
        loss = loss_fn(yhat, y)

        # backpropagation
        opt.zero_grad()
        loss.backward()
        opt.step()

        #accuracy computation
        preds = torch.argmax(yhat, dim=1)
        train_correct += (preds == y).sum().item()
        train_total += y.size(0)

    train_acc = train_correct / train_total

    # Test

    clf.eval()
    test_correct = 0
    test_total = 0

    with torch.no_grad():
        for X, y in test_loader:
            X, y = X.to(device), y.to(device)
            yhat = clf(X)
            preds = torch.argmax(yhat, dim=1)
            test_correct += (preds == y).sum().item()
            test_total += y.size(0)

    test_acc = test_correct / test_total

    print(f"Epoch {epoch+1}")
    print(f"Train Loss: {loss.item():.4f}")
    print(f"Train Accuracy: {train_acc:.4f}")
    print(f"Test Accuracy:  {test_acc:.4f}")
    print("-" * 30)


Epoch 1
Train Loss: 0.5459
Train Accuracy: 0.8440
Test Accuracy:  0.8791
------------------------------
Epoch 2
Train Loss: 0.2715
Train Accuracy: 0.8984
Test Accuracy:  0.8961
------------------------------
Epoch 3
Train Loss: 0.2618
Train Accuracy: 0.9157
Test Accuracy:  0.9018
------------------------------
Epoch 4
Train Loss: 0.3595
Train Accuracy: 0.9280
Test Accuracy:  0.9091
------------------------------
Epoch 5
Train Loss: 0.0908
Train Accuracy: 0.9374
Test Accuracy:  0.9161
------------------------------
Epoch 6
Train Loss: 0.2699
Train Accuracy: 0.9450
Test Accuracy:  0.9209
------------------------------
Epoch 7
Train Loss: 0.1256
Train Accuracy: 0.9520
Test Accuracy:  0.9158
------------------------------
Epoch 8
Train Loss: 0.0525
Train Accuracy: 0.9591
Test Accuracy:  0.9189
------------------------------
Epoch 9
Train Loss: 0.0424
Train Accuracy: 0.9652
Test Accuracy:  0.9181
------------------------------
Epoch 10
Train Loss: 0.0149
Train Accuracy: 0.9711
Test Accuracy

In [25]:
from torch import save, load

with open('model_state.pt', 'wb') as f:
    save(clf.state_dict(), f)