In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torchvision import datasets
import torchvision.transforms as transforms
import mmcv
from itertools import product

In [9]:
torch.manual_seed(7)
device = 'cuda:0'

In [10]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
    
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=2, stride=2)

        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=2, stride=2)

        x = torch.flatten(x, start_dim=1)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.out(x)

        return x

In [11]:
train_set = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))
val_set = datasets.FashionMNIST(root='./data', train=False,download=True, transform=transforms.Compose([transforms.ToTensor()]))

In [12]:
# enable tensorboard
from torch.utils.tensorboard import SummaryWriter

parameters = dict(
    batch_size_list = [512, 1024, 1024*8],
    lr_list = [.01, .001, .0001, .00001],
    shuffle = [True, False]
)
param_values = [v for v in parameters.values()]
param_values

[[512, 1024, 8192], [0.01, 0.001, 0.0001, 1e-05], [True, False]]

In [13]:
epochs = 10

for batch_size, lr, shuffle in product(*param_values):
    model = Network().to(device)
    
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size)
    val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    comment = f'_batch_size={batch_size}_lr={lr}_shuffle={shuffle}'
    writer = SummaryWriter(comment=comment)
    
    for epoch in mmcv.track_iter_progress(range(epochs)):
        correct_train, loss_train = 0., 0.
        for images, labels in (train_loader):
            images, labels = images.to(device), labels.to(device)
            preds = model(images)
            loss = F.cross_entropy(preds, labels)
            loss_train += loss.item()
            correct_train += (preds.argmax(dim=1) == labels).sum()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        correct_val, loss_val = 0., 0.
        with torch.no_grad():
            for images, labels in (val_loader):
                images, labels = images.to(device), labels.to(device)
                preds = model(images)
                loss = F.cross_entropy(preds, labels)
                loss_val += loss.item()
                correct_val += (preds.argmax(dim=1) == labels).sum()

        acc_train = correct_train/len(train_set)
        acc_val = correct_val/len(val_set)

        writer.add_scalar('Loss/train', loss_train, epoch)
        writer.add_scalar('Loss/test', loss_val, epoch)
        writer.add_scalar('Accuracy/train', acc_train, epoch)
        writer.add_scalar('Accuracy/test', acc_val, epoch)
        
    writer.close()

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 0.2 task/s, elapsed: 58s, ETA:     0s
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 0.2 task/s, elapsed: 59s, ETA:     0s
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 0.2 task/s, elapsed: 59s, ETA:     0s
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 0.2 task/s, elapsed: 59s, ETA:     0s
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 0.2 task/s, elapsed: 59s, ETA:     0s
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 0.2 task/s, elapsed: 59s, ETA:     0s
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 0.2 task/s, elapsed: 60s, ETA:     0s
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 0.2 task/s, elapsed: 59s, ETA:     0s
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 0.2 task/s, elapsed: 56s, ETA:     0s
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 0.2 task/s, elapsed: 56s, ETA:     0s
[>>>>>>>>>>>>>>>>>>>

In [16]:
val_preds = torch.tensor([], dtype=torch.long).to(device)
val_labels = torch.tensor([], dtype=torch.long).to(device)

with torch.no_grad():
    for images, labels in (val_loader):
        images, labels = images.to(device), labels.to(device)
        preds = model(images).argmax(dim=1)
        val_preds = torch.cat((val_preds, preds.type(torch.long)), dim=0)
        val_labels = torch.cat((val_labels, labels.type(torch.long)), dim=0)

In [17]:
val_preds = val_preds.cpu()
val_labels = val_labels.cpu()

In [18]:
def confusion_matrix(preds, labels):
    stacked = torch.stack((val_labels, val_preds), dim=1)

    cmt = torch.zeros(10, 10, dtype=torch.int64)
    for p in stacked:
        j, k = p.tolist()
        cmt[j, k] += 1
    return cmt

cmt = confusion_matrix(val_preds, val_labels)
print(cmt)

tensor([[  0, 736,   0,   0,   0, 264,   0,   0,   0,   0],
        [  0, 747,   0,   0,   0, 253,   0,   0,   0,   0],
        [  0, 633,   0,   0,   0, 367,   0,   0,   0,   0],
        [  0, 780,   0,   0,   0, 220,   0,   0,   0,   0],
        [  0, 836,   0,   0,   0, 164,   0,   0,   0,   0],
        [  0,  48,   0,   0,   0, 952,   0,   0,   0,   0],
        [  0, 613,   0,   0,   0, 387,   0,   0,   0,   0],
        [  0, 269,   0,   0,   0, 731,   0,   0,   0,   0],
        [  0, 545,   0,   0,   0, 455,   0,   0,   0,   0],
        [  0,  98,   0,   0,   0, 902,   0,   0,   0,   0]])


In [31]:
from plot_confusion_matrix import plot_confusion_matrix

In [None]:
names = ('T-shirt/top' ,'Trouser' ,'Pullover' ,'Dress' ,'Coat' ,'Sandal' ,'Shirt' ,'Sneaker' ,'Bag' ,'Ankle boot')

plot_confusion_matrix2(cmt, names, normalize=True)