In [1]:
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [3]:
# 1.a Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor

training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor())

testing_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor())

training_data.data.shape, testing_data.data.shape

(torch.Size([60000, 28, 28]), torch.Size([10000, 28, 28]))

In [4]:
# 1.b DataLoader
from torch.utils.data import DataLoader

training_dataloader = DataLoader(training_data, 64, shuffle=True)
testing_dataloader = DataLoader(testing_data, 64, shuffle=False)

X, y = next(iter(training_dataloader))
X.shape, y.shape

(torch.Size([64, 1, 28, 28]), torch.Size([64]))

In [5]:
cuda_X, cuda_y = X.to(device), y.to(device)

In [9]:
# play with conv layer
from torch import nn

# build a conv layer with basic parameter
# n input channel, n output feature, ks:kenel size, act:activate
def conv(ni, nf, ks=3, stride=1, act=True):
    layers = [nn.Conv2d(ni, nf, stride=stride, kernel_size=ks, padding=ks//2)]
    layers.append(nn.BatchNorm2d(nf))
    if act: layers.append(nn.ReLU())
    res = nn.Sequential(*layers)
    return res

def block(ni, nf): return conv(ni, nf)

# get model from block
def get_model():
    return nn.Sequential(
            block(1,16),   #14x14
            block(16,32),  #7x7
            block(32, 64), #4x4
            block(64, 128),#2x2
            block(128,256),#1x1
            nn.AdaptiveAvgPool2d(1),
            nn.Flatten(),
            nn.Linear(256, 10)).to(device)

model = get_model()
print(X.shape, model(X.to(device)).shape)

torch.Size([64, 1, 28, 28]) torch.Size([64, 10])


In [11]:
y_hat = model(X.to(device))

In [13]:
# 3. Loss
loss_fn = nn.CrossEntropyLoss()

loss = loss_fn(y_hat, y.to(device))

loss.backward()

In [14]:
# 4. Optimizer SGD
from torch.optim import SGD

In [16]:
# 5. Combine things together:
# train model
def train(model, dataloader, optimizer, epochs=2):
    size = len(dataloader.dataset)
    for epoch in range(epochs):
        for batch, (X, y) in enumerate(dataloader):
            X, y = X.to(device), y.to(device)
            y_hat = model(X)
            loss = loss_fn(y_hat, y)

            # backward
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            if batch % 300 == 0:
                loss, current = loss.item(), batch * len(X)
                print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


optimizer = SGD(model.parameters(), lr=3e-3)
train(model, training_dataloader, optimizer)

loss: 2.314800  [    0/60000]
loss: 1.759572  [19200/60000]
loss: 1.341351  [38400/60000]
loss: 1.210514  [57600/60000]
loss: 1.325168  [    0/60000]
loss: 1.192551  [19200/60000]
loss: 1.140095  [38400/60000]
loss: 0.938623  [57600/60000]


In [18]:
def test(model, testing_dataloader):
    size = len(testing_dataloader.dataset)
    total = 0
    for X, y in testing_dataloader:
        X, y = X.to(device), y.to(device)
        y_hat = model(X)
        total += sum(y_hat.argmax(1) == y).item()
    print(f'Accuracy: {total/size:>2f}')
    
test(model, testing_dataloader)

Accuracy: 0.700500


In [21]:
# Base-line models:
model = get_model()
optimizer = SGD(model.parameters(), lr=3e-3)
train(model, training_dataloader, optimizer, 10)
test(model, testing_dataloader)
# 0.887400

loss: 2.326347  [    0/60000]
loss: 1.640328  [19200/60000]
loss: 1.513402  [38400/60000]
loss: 1.327605  [57600/60000]
loss: 1.212009  [    0/60000]
loss: 1.129552  [19200/60000]
loss: 1.040257  [38400/60000]
loss: 1.006928  [57600/60000]
loss: 0.993070  [    0/60000]
loss: 1.009636  [19200/60000]
loss: 1.042453  [38400/60000]
loss: 0.837528  [57600/60000]
loss: 0.920342  [    0/60000]
loss: 0.727526  [19200/60000]
loss: 0.657815  [38400/60000]
loss: 0.926464  [57600/60000]
loss: 0.582225  [    0/60000]
loss: 0.648594  [19200/60000]
loss: 0.658379  [38400/60000]
loss: 0.791733  [57600/60000]
loss: 0.779679  [    0/60000]
loss: 0.471150  [19200/60000]
loss: 0.602264  [38400/60000]
loss: 0.642031  [57600/60000]
loss: 0.605967  [    0/60000]
loss: 0.675904  [19200/60000]
loss: 0.492870  [38400/60000]
loss: 0.608716  [57600/60000]
loss: 0.575342  [    0/60000]
loss: 0.607085  [19200/60000]
loss: 0.658717  [38400/60000]
loss: 0.599891  [57600/60000]
loss: 0.765165  [    0/60000]
loss: 0.55

In [22]:
from IPython.core.debugger import set_trace

def noop(x):
    return x

# ResnetBlock
class ResnetBlock(nn.Module):
    def __init__(self, ni, nf, stride=2):
        super(ResnetBlock, self).__init__()
        self.convs = nn.Sequential(
                        conv(ni, nf),
                        conv(nf, nf, stride=stride, act=False))
        self.pool = noop if stride == 1 else nn.AvgPool2d(stride, ceil_mode=True)
        self.idconv = noop if ni == nf else conv(ni, nf, ks=1, act=False)
    def forward(self, x):
        return nn.ReLU()(self.convs(x) + self.idconv(self.pool(x)))

In [23]:
# Resnet block
def block(ni, nf): return ResnetBlock(ni, nf)
model = get_model()
optimizer = SGD(model.parameters(), lr=3e-3)
train(model, training_dataloader, optimizer, 10)
test(model, testing_dataloader)
# 0.893100

loss: 2.404256  [    0/60000]
loss: 0.633075  [19200/60000]
loss: 0.444788  [38400/60000]
loss: 0.432799  [57600/60000]
loss: 0.431536  [    0/60000]
loss: 0.241054  [19200/60000]
loss: 0.407671  [38400/60000]
loss: 0.241693  [57600/60000]
loss: 0.292393  [    0/60000]
loss: 0.244618  [19200/60000]
loss: 0.327733  [38400/60000]
loss: 0.230948  [57600/60000]
loss: 0.376455  [    0/60000]
loss: 0.131386  [19200/60000]
loss: 0.242885  [38400/60000]
loss: 0.354061  [57600/60000]
loss: 0.231409  [    0/60000]
loss: 0.126834  [19200/60000]
loss: 0.219649  [38400/60000]
loss: 0.184550  [57600/60000]
loss: 0.355030  [    0/60000]
loss: 0.284776  [19200/60000]
loss: 0.293274  [38400/60000]
loss: 0.203125  [57600/60000]
loss: 0.275308  [    0/60000]
loss: 0.141862  [19200/60000]
loss: 0.133716  [38400/60000]
loss: 0.139759  [57600/60000]
loss: 0.165664  [    0/60000]
loss: 0.193569  [19200/60000]
loss: 0.140947  [38400/60000]
loss: 0.327154  [57600/60000]
loss: 0.093778  [    0/60000]
loss: 0.20

In [None]:
# stack block
def block(ni, nf): return nn.Sequential(ResnetBlock(ni, nf), ResnetBlock(nf, nf, stride=1))
model = get_model()
optimizer = SGD(model.parameters(), lr=3e-3)
train(model, training_dataloader, optimizer, 10)
test(model, testing_dataloader)
# 0.893100

loss: 2.527095  [    0/60000]
loss: 0.441763  [19200/60000]
loss: 0.336222  [38400/60000]
loss: 0.284289  [57600/60000]
loss: 0.360004  [    0/60000]
loss: 0.148952  [19200/60000]
loss: 0.212381  [38400/60000]
loss: 0.281431  [57600/60000]
loss: 0.240316  [    0/60000]
loss: 0.386212  [19200/60000]
loss: 0.189799  [38400/60000]
loss: 0.301721  [57600/60000]
loss: 0.170949  [    0/60000]
