Libraries

In [19]:
# Tiny CNN on scikit-learn Digits (simple, PyTorch, single cell)

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

In [20]:
np.random.seed(0)
torch.manual_seed(0)
df = pd.read_csv("files/fashion-mnist_train.csv")
test_df = pd.read_csv("files/fashion-mnist_test.csv")

train_labels = df['label'].to_numpy()
train_pixels = df.drop('label',axis=1).to_numpy()
train_pixels = train_pixels.astype(np.float32)/255.0
train_images = train_pixels.reshape(-1,28,28)

test_labels = test_df['label'].to_numpy()
test_pixels = test_df.drop('label',axis=1).to_numpy()
test_pixels = test_pixels.astype(np.float32)/255.0
test_images = test_pixels.reshape(-1,28,28)

In [21]:
Xtr = torch.from_numpy(train_images[:,None,:,:])
ytr = torch.from_numpy(train_labels)

Xte = torch.from_numpy(test_images[:,None,:,:])
yte = torch.from_numpy(test_labels)

In [22]:
train_loader = DataLoader(TensorDataset(Xtr,ytr), batch_size= 32, shuffle=True)
test_loader = DataLoader(TensorDataset(Xte,yte), batch_size= 64, shuffle=True)

In [23]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [24]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1,8,kernel_size=3,padding=1),
            nn.BatchNorm2d(8),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(8,16,kernel_size=3,padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(784,32),
            nn.ReLU(),
            nn.Linear(32,10),
            # nn.Softmax(dim=1)
        )
    def forward(self,x):
        x = self.conv(x)
        x = self.conv2(x)
        x = self.fc(x)
        return x

In [25]:
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr = 1e-3)

In [26]:
def accuracy(loader):
    model.eval()
    correct = total = 0
    with torch.no_grad():
        for xb, yb in loader:
            xb,yb = xb.to(device), yb.to(device)
            pred = model(xb).argmax(1)
            correct += (pred == yb).sum().item()
            total += yb.size(0)
    return correct / total

In [27]:
for epoch in range(1,10):
    model.train()
    running = 0.0
    for xb,yb in train_loader:
        xb,yb = xb.to(device),yb.to(device)
        optimizer.zero_grad()
        logits = model(xb)
        loss = criterion(logits,yb)
        loss.backward()
        optimizer.step()
        running+= loss.item()*xb.size(0)
    tr_loss = running / len(train_loader.dataset)
    print(f"epoch# {epoch} ; train loss = {tr_loss:.4f} ; test_acc = {accuracy(test_loader):.3f}")
print(f"Final test accuracy: {accuracy(test_loader):.3f}")

epoch# 1 ; train loss = 0.4310 ; test_acc = 0.890
epoch# 2 ; train loss = 0.3040 ; test_acc = 0.887
epoch# 3 ; train loss = 0.2749 ; test_acc = 0.904
epoch# 4 ; train loss = 0.2559 ; test_acc = 0.904
epoch# 5 ; train loss = 0.2412 ; test_acc = 0.909
epoch# 6 ; train loss = 0.2293 ; test_acc = 0.909
epoch# 7 ; train loss = 0.2200 ; test_acc = 0.912
epoch# 8 ; train loss = 0.2102 ; test_acc = 0.908
epoch# 9 ; train loss = 0.2052 ; test_acc = 0.909
Final test accuracy: 0.909
