In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

from torch.utils.tensorboard import SummaryWriter

In [2]:
print(torch.__version__)
print(torchvision.__version__)

1.7.0+cpu
0.8.1+cpu


In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [4]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [5]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=8, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(num_features=8),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(num_features=16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.fc1 = nn.Sequential(
            nn.Linear(in_features=7*7*16, out_features=128),
            nn.BatchNorm1d(num_features=128),
            nn.Dropout(0.2),
            nn.ReLU()
        )
        self.fc2 = nn.Sequential(
            nn.Linear(in_features=128, out_features=64),
            nn.BatchNorm1d(num_features=64),
            nn.ReLU()
        )
        self.out = nn.Linear(in_features=64, out_features=10)


    def forward(self, t):
        t = self.layer1(t)
        t = self.layer2(t)
        t = t.reshape(t.size(0), -1)
        t = self.fc1(t)
        t = self.fc2(t)
        t = self.out(t)

        return t

In [6]:
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,))])

train_set = torchvision.datasets.FashionMNIST(
    root='./data/',
    train=True,
    download=True,
    transform=transform
)
test_set = torchvision.datasets.FashionMNIST(
    root='./data/',
    train=False,
    download=True,
    transform=transform
)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=1000)

In [7]:
len(train_set)

60000

In [8]:
train_set.classes

['T-shirt/top',
 'Trouser',
 'Pullover',
 'Dress',
 'Coat',
 'Sandal',
 'Shirt',
 'Sneaker',
 'Bag',
 'Ankle boot']

In [9]:
train_set.targets

tensor([9, 0, 0,  ..., 3, 0, 5])

In [10]:
train_set.targets.bincount()

tensor([6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000])

In [None]:
from collections import OrderedDict
from collections import namedtuple
from itertools import product

class RunBuilder():
    @staticmethod
    def get_runs(params):
        Run = namedtuple('run', params.keys())
        runs = []
        for v in product(*params.values()):
            runs.append(Run(*v))

        return runs

params = OrderedDict(
    lr = [0.01, 0.003],
    batch_size = [256, 512]
)

In [None]:
criterion = nn.CrossEntropyLoss()

for run in RunBuilder.get_runs(params):
    network = Network().to(device)
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=run.batch_size, shuffle=True, num_workers=1)
    optimizer = optim.Adam(network.parameters(), lr=run.lr)

    comment = f'-{run}'
    tb = SummaryWriter(comment=comment)

    for epoch in range(20):

        train_loss = 0
        train_correct = 0

        network.train()
        for batch in train_loader:
            images, labels = batch[0].to(device), batch[1].to(device)
            preds = network(images)
            loss = criterion(preds, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * run.batch_size
            train_correct += get_num_correct(preds, labels)

        tb.add_scalar('Train Loss', train_loss, epoch)
        tb.add_scalar('Train Accuracy', train_correct / len(train_set), epoch)
        
        network.eval()
        with torch.no_grad():
            test_loss = 0
            test_correct = 0
            for batch in test_loader:
                images, labels = batch[0].to(device), batch[1].to(device)
                preds = network(images)
                loss = criterion(preds, labels)

                test_loss += loss.item() * 256
                test_correct += get_num_correct(preds, labels)

            tb.add_scalar('Test Loss', test_loss, epoch)
            tb.add_scalar('Test Accuracy', test_correct / len(test_set), epoch)


        for name, weight in network.named_parameters():
            tb.add_histogram(name, weight, epoch)
            tb.add_histogram(f'{name}.grad', weight.grad, epoch)


    torch.save(network.state_dict(), f'./models/model-{run}.ckpt')


In [11]:
model = Network().to(device)
model.load_state_dict(torch.load('models/model-run(lr=0.003, batch_size=256).ckpt', map_location=device))

<All keys matched successfully>

In [12]:
def get_all_preds(model, loader):
    all_preds = torch.tensor([])
    for batch in loader:
        images, labels = batch[0].to(device), batch[1].to(device)
        preds = model(images)
        all_preds = torch.cat((all_preds, preds), dim=0)

    return all_preds

In [13]:
model.eval()
with torch.no_grad():
    pred_loader = torch.utils.data.DataLoader(train_set, batch_size=5000)
    train_preds = get_all_preds(model, pred_loader)
    test_preds = get_all_preds(model, test_loader)
    print(train_preds.shape)
    print(test_preds.shape)

torch.Size([60000, 10])
torch.Size([10000, 10])


In [14]:
train_correct = get_num_correct(train_preds, train_set.targets)
test_correct = get_num_correct(test_preds, test_set.targets)

print('Train Correct: {:5}\tTrain Accuracy: {:5.2f}%'.format(train_correct, 100*train_correct/len(train_set)))
print('Test Correct: {:6}\tTest Accuracy: {:6.2f}%'.format(test_correct, 100*test_correct/len(test_set)))

Train Correct: 59018	Train Accuracy: 98.36%
Test Correct:   9108	Test Accuracy:  91.08%


In [15]:
train_stacked = torch.stack(
    (train_set.targets, train_preds.argmax(dim=1)),
    dim=1
)
test_stacked = torch.stack(
    (test_set.targets, test_preds.argmax(dim=1)),
    dim=1
)
train_confmat = torch.zeros(10, 10, dtype=torch.int16)
test_confmat = torch.zeros(10, 10, dtype=torch.int16)

In [16]:
print(train_set.classes)
for row in train_stacked:
    cl, pl = row.tolist()
    train_confmat[cl, pl] += 1

train_confmat

['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']


tensor([[5764,    0,    7,   33,    1,    1,  194,    0,    0,    0],
        [   1, 5989,    0,   10,    0,    0,    0,    0,    0,    0],
        [  28,    0, 5866,    4,   50,    0,   52,    0,    0,    0],
        [   8,    0,    4, 5966,   11,    0,   11,    0,    0,    0],
        [   3,    0,  127,   32, 5803,    0,   35,    0,    0,    0],
        [   0,    0,    0,    0,    0, 5994,    0,    5,    0,    1],
        [ 110,    0,   93,   18,   57,    0, 5721,    0,    1,    0],
        [   0,    0,    0,    0,    0,   11,    0, 5985,    0,    4],
        [   1,    0,    1,    0,    1,    0,    0,    0, 5997,    0],
        [   0,    0,    0,    0,    0,    5,    0,   62,    0, 5933]],
       dtype=torch.int16)

In [17]:
print(test_set.classes)
for row in test_stacked:
    cl, pl = row.tolist()
    test_confmat[cl, pl] += 1

test_confmat

['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']


tensor([[841,   0,  12,  15,   3,   0, 124,   0,   5,   0],
        [  1, 978,   0,  14,   1,   0,   3,   0,   3,   0],
        [ 21,   1, 863,   9,  47,   0,  59,   0,   0,   0],
        [ 13,   2,   8, 928,  22,   0,  27,   0,   0,   0],
        [  2,   0,  65,  26, 845,   0,  61,   0,   1,   0],
        [  0,   0,   0,   0,   0, 987,   0,  10,   0,   3],
        [ 90,   0,  65,  26,  67,   0, 747,   0,   5,   0],
        [  0,   0,   0,   0,   0,   6,   0, 982,   1,  11],
        [  2,   1,   2,   3,   3,   2,   3,   2, 982,   0],
        [  1,   0,   0,   0,   1,   4,   0,  39,   0, 955]], dtype=torch.int16)

In [18]:
for i in range(10):
    print('Train accuracy of {:12s}:\t{:.2f}% ({}/{})'.format(
        train_set.classes[i],
        train_confmat[i, i]/60,
        train_confmat[i, i],
        6000))

Train accuracy of T-shirt/top :	96.07% (5764/6000)
Train accuracy of Trouser     :	99.82% (5989/6000)
Train accuracy of Pullover    :	97.77% (5866/6000)
Train accuracy of Dress       :	99.43% (5966/6000)
Train accuracy of Coat        :	96.72% (5803/6000)
Train accuracy of Sandal      :	99.90% (5994/6000)
Train accuracy of Shirt       :	95.35% (5721/6000)
Train accuracy of Sneaker     :	99.75% (5985/6000)
Train accuracy of Bag         :	99.95% (5997/6000)
Train accuracy of Ankle boot  :	98.88% (5933/6000)


In [19]:
for i in range(10):
    print('Test accuracy of {:12s}:\t{:.2f}% ({}/{})'.format(
        train_set.classes[i],
        test_confmat[i, i]/10,
        test_confmat[i, i],
        1000))

Test accuracy of T-shirt/top :	84.10% (841/1000)
Test accuracy of Trouser     :	97.80% (978/1000)
Test accuracy of Pullover    :	86.30% (863/1000)
Test accuracy of Dress       :	92.80% (928/1000)
Test accuracy of Coat        :	84.50% (845/1000)
Test accuracy of Sandal      :	98.70% (987/1000)
Test accuracy of Shirt       :	74.70% (747/1000)
Test accuracy of Sneaker     :	98.20% (982/1000)
Test accuracy of Bag         :	98.20% (982/1000)
Test accuracy of Ankle boot  :	95.50% (955/1000)
