# fashion MNIST  
---  
## Tensorboard  
https://www.youtube.com/watch?v=ycxulUVoNbk&list=PLZbbT5o_s2xrfNyHZsM6ufI0iZENK9xgG&index=30

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms
# from torchvision import transforms

torch.set_printoptions(linewidth=120) # display option for output
torch.set_grad_enabled(True) # gradient tracking turned on (default)

from torch.utils.tensorboard import SummaryWriter # pip install future

In [3]:
print(torch.__version__)
print(torchvision.__version__)

1.2.0
0.4.0


In [4]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [5]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        # linear layer == fully connected layer == fc == dense layer
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):
        # (1) input layer:
        # t = t
        
        # (2) hidden conv layer:
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        # (3) hidden conv layer:
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        # (4) hidden linear layer:
        t = t.reshape(-1, 12*4*4)
        # t = t.flatten()
        t = self.fc1(t)
        t = F.relu(t)
        
        # (5) hidden linear layer:
        t = self.fc2(t)
        t = F.relu(t)
        
        # (6) output layer:
        t = self.out(t)
        #t = F.softmax(t, dim=1)
        
        return t

In [6]:
train_set = torchvision.datasets.FashionMNIST(
    root='data',
    train=True,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

---  
## Training loop  
Tweaking the hyperparamers

In [None]:
!tensorboard --logdir=runs

In [8]:
from itertools import product

batch_sizes = [100, 1000, 10000]
lrs = [0.1, 0.01, 0.001]
shuffles = [True, False]

for lr, batch_size, shuffle in product(lrs, batch_sizes, shuffles):
    print (lr, batch_size, shuffle)

0.1 100 True
0.1 100 False
0.1 1000 True
0.1 1000 False
0.1 10000 True
0.1 10000 False
0.01 100 True
0.01 100 False
0.01 1000 True
0.01 1000 False
0.01 10000 True
0.01 10000 False
0.001 100 True
0.001 100 False
0.001 1000 True
0.001 1000 False
0.001 10000 True
0.001 10000 False


In [9]:
for lr, batch_size, shuffle in product(lrs, batch_sizes, shuffles):
    
    network = Network()
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=shuffle)
    optimizer = optim.Adam(network.parameters(), lr=lr)

    images, labels = next(iter(train_loader))
    grid = torchvision.utils.make_grid(images)

    comment = f' batch_size={batch_size} lr={lr} shuffle={shuffle}'
    tb = SummaryWriter(comment=comment)
    tb.add_image('images', grid)
    tb.add_graph(network, images)
    print(comment)
    
    for epoch in range(5):
        total_loss = 0
        total_correct = 0

        for batch in train_loader:
            images, labels = batch

            preds = network(images)
            loss = F.cross_entropy(preds, labels) # calculating the loss function

            optimizer.zero_grad() # zero out the gradients, because pytorch is actually adding the grads
            loss.backward() # calculating the gradients
            optimizer.step() # update the weight

            total_loss += loss.item() * batch_size # total loss is dependent on batch_size
            total_correct += get_num_correct(preds, labels)

        tb.add_scalar('Loss', total_loss, epoch)
        tb.add_scalar('Correct', total_correct, epoch)
        tb.add_scalar('Accuracy', 100*total_correct/len(train_set), epoch)

        # tb.add_histogram('conv1.bias', network.conv1.bias, epoch)
        # tb.add_histogram('conv1.weight', network.conv1.weight, epoch)
        # tb.add_histogram('conv1.weight.grad', network.conv1.weight.grad, epoch)

        for name, weight in network.named_parameters():
            tb.add_histogram(name, weight, epoch)
            tb.add_histogram(f'{name}.grad', weight.grad, epoch)

        print(f'epoch: {epoch} total correct: {total_correct} loss: {total_loss:.2f} accuracy {100*total_correct / len(train_set):.2f}%')

    tb.close()

 batch_size=100 lr=0.1 shuffle=True
epoch: 0 total correct: 5997 loss: 146071.41 accuracy 9.99%
epoch: 1 total correct: 6027 loss: 138583.77 accuracy 10.04%
epoch: 2 total correct: 6143 loss: 138567.50 accuracy 10.24%
epoch: 3 total correct: 6069 loss: 138554.44 accuracy 10.12%
epoch: 4 total correct: 6065 loss: 138632.04 accuracy 10.11%
 batch_size=100 lr=0.1 shuffle=False
epoch: 0 total correct: 5985 loss: 156482.29 accuracy 9.97%
epoch: 1 total correct: 5985 loss: 138615.24 accuracy 9.97%
epoch: 2 total correct: 5995 loss: 138621.82 accuracy 9.99%
epoch: 3 total correct: 5987 loss: 138624.36 accuracy 9.98%
epoch: 4 total correct: 5982 loss: 138625.52 accuracy 9.97%
 batch_size=1000 lr=0.1 shuffle=True
epoch: 0 total correct: 6057 loss: 253312.84 accuracy 10.10%
epoch: 1 total correct: 6088 loss: 138300.55 accuracy 10.15%
epoch: 2 total correct: 5902 loss: 138305.82 accuracy 9.84%
epoch: 3 total correct: 5916 loss: 138272.17 accuracy 9.86%
epoch: 4 total correct: 5838 loss: 138268.01