In [1]:
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [2]:
# 1.a Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor

training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor())

testing_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor())

training_data.data.shape, testing_data.data.shape

(torch.Size([60000, 28, 28]), torch.Size([10000, 28, 28]))

In [3]:
# 1.b DataLoader
from torch.utils.data import DataLoader

training_dataloader = DataLoader(training_data, 64, shuffle=True)
testing_dataloader = DataLoader(testing_data, 64, shuffle=False)

X, y = next(iter(training_dataloader))
X.shape, y.shape

(torch.Size([64, 1, 28, 28]), torch.Size([64]))

In [4]:
# play with conv layer
from torch import nn

# build a conv layer with basic parameter
# n input channel, n output feature, ks:kenel size, act:activate
def conv(ni, nf, ks=3, stride=1, act=True):
    layers = [nn.Conv2d(ni, nf, stride=stride, kernel_size=ks, padding=ks//2)]
    layers.append(nn.BatchNorm2d(nf))
    if act: layers.append(nn.ReLU())
    res = nn.Sequential(*layers)
    return res

def block(ni, nf): return conv(ni, nf)

# get model from block
def get_model():
    return nn.Sequential(
            block(1,16),   #14x14
            block(16,32),  #7x7
            block(32, 64), #4x4
            block(64, 128),#2x2
            block(128,256),#1x1
            nn.AdaptiveAvgPool2d(1),
            nn.Flatten(),
            nn.Linear(256, 10))

model = get_model()
print(X.shape, model(X).shape)

torch.Size([64, 1, 28, 28]) torch.Size([64, 10])


In [5]:
y_hat = model(X)

In [6]:
# 3. Loss
loss_fn = nn.CrossEntropyLoss()

loss = loss_fn(y_hat, y)

loss.backward()

In [7]:
# 4. Optimizer SGD
from torch.optim import SGD

In [8]:
# 5. Combine things together:
# train model
def train(model, dataloader, optimizer, epochs=2):
    size = len(dataloader.dataset)
    for epoch in range(epochs):
        for batch, (X, y) in enumerate(dataloader):
            y_hat = model(X)
            loss = loss_fn(y_hat, y)

            # backward
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            if batch % 300 == 0:
                loss, current = loss.item(), batch * len(X)
                print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


# optimizer = SGD(model.parameters(), lr=3e-3)
# train(model, training_dataloader, optimizer)

In [9]:
def test(model, testing_dataloader):
    size = len(testing_dataloader.dataset)
    total = 0
    for X, y in testing_dataloader:
        y_hat = model(X)
        total += sum(y_hat.argmax(1) == y).item()
    print(f'Accuracy: {total/size:>2f}')
    
# test(model, testing_dataloader)

In [10]:
# Base-line models:
# model = get_model()
# optimizer = SGD(model.parameters(), lr=3e-3)
# train(model, training_dataloader, optimizer, 10)
# test(model, testing_dataloader)
# 0.887400

In [11]:
from IPython.core.debugger import set_trace

def noop(x):
    return x

# ResnetBlock
class ResnetBlock(nn.Module):
    def __init__(self, ni, nf, stride=2):
        super(ResnetBlock, self).__init__()
        self.convs = nn.Sequential(
                        conv(ni, nf),
                        conv(nf, nf, stride=stride, act=False))
        self.pool = noop if stride == 1 else nn.AvgPool2d(stride, ceil_mode=True)
        self.idconv = noop if ni == nf else conv(ni, nf, ks=1, act=False)
    def forward(self, x):
        return nn.ReLU()(self.convs(x) + self.idconv(self.pool(x)))

In [12]:
# Resnet block
def block(ni, nf): return ResnetBlock(ni, nf)
model = get_model()
optimizer = SGD(model.parameters(), lr=3e-3)
# train(model, training_dataloader, optimizer, 10)
# test(model, testing_dataloader)
# 0.893100

In [13]:
# stack block
def block(ni, nf): return nn.Sequential(ResnetBlock(ni, nf), ResnetBlock(nf, nf, stride=1))
model = get_model()
optimizer = SGD(model.parameters(), lr=3e-3)
train(model, training_dataloader, optimizer, 10)
test(model, testing_dataloader)
# 0.893100

loss: 2.428055  [    0/60000]
loss: 0.500507  [19200/60000]
loss: 0.313588  [38400/60000]
loss: 0.367805  [57600/60000]
loss: 0.399477  [    0/60000]
loss: 0.234004  [19200/60000]
loss: 0.232941  [38400/60000]
loss: 0.393569  [57600/60000]
loss: 0.208819  [    0/60000]
loss: 0.369349  [19200/60000]
loss: 0.428407  [38400/60000]
loss: 0.279916  [57600/60000]
loss: 0.207196  [    0/60000]
loss: 0.175709  [19200/60000]
loss: 0.234070  [38400/60000]
loss: 0.387717  [57600/60000]
loss: 0.272289  [    0/60000]
loss: 0.176166  [19200/60000]
loss: 0.205463  [38400/60000]
loss: 0.204345  [57600/60000]
loss: 0.265925  [    0/60000]
loss: 0.273075  [19200/60000]
loss: 0.263643  [38400/60000]
loss: 0.161081  [57600/60000]
loss: 0.082600  [    0/60000]
loss: 0.183584  [19200/60000]
loss: 0.092436  [38400/60000]
loss: 0.250213  [57600/60000]
loss: 0.079052  [    0/60000]
loss: 0.176493  [19200/60000]
loss: 0.146372  [38400/60000]
loss: 0.146396  [57600/60000]
loss: 0.051742  [    0/60000]
loss: 0.13