# fashion MNIST  
---  
## RunBuilder  
https://www.youtube.com/watch?v=NSKghk0pcco&list=PLZbbT5o_s2xrfNyHZsM6ufI0iZENK9xgG&index=31  
https://www.youtube.com/watch?v=ozpv_peZ894&list=PLZbbT5o_s2xrfNyHZsM6ufI0iZENK9xgG&index=32  
https://www.youtube.com/watch?v=kWVgvsejXsE&list=PLZbbT5o_s2xrfNyHZsM6ufI0iZENK9xgG&index=33

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms
# from torchvision import transforms

torch.set_printoptions(linewidth=120) # display option for output
torch.set_grad_enabled(True) # gradient tracking turned on (default)

from torch.utils.tensorboard import SummaryWriter # pip install future

In [2]:
print(torch.__version__)
print(torchvision.__version__)

1.2.0
0.4.0


In [3]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        # linear layer == fully connected layer == fc == dense layer
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):
        # (1) input layer:
        # t = t
        
        # (2) hidden conv layer:
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        # (3) hidden conv layer:
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        # (4) hidden linear layer:
        t = t.reshape(-1, 12*4*4)
        # t = t.flatten()
        t = self.fc1(t)
        t = F.relu(t)
        
        # (5) hidden linear layer:
        t = self.fc2(t)
        t = F.relu(t)
        
        # (6) output layer:
        t = self.out(t)
        #t = F.softmax(t, dim=1)
        
        return t

In [4]:
from collections import OrderedDict, namedtuple
from itertools import product

In [5]:
class RunBuilder():
    @staticmethod
    def get_runs(params):
        Run = namedtuple('Run', params.keys())
        
        runs = []
        for val in product(*params.values()):
            runs.append(Run(*val))
            
        return runs

In [6]:
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from IPython.display import display, clear_output
import pandas as pd
import time
import json

In [7]:
class RunManager():
    def __init__(self):
        self.epoch_count = 0
        self.epoch_lost = 0
        self.epoch_num_correct = 0
        self.epoch_start_time = None
        
        self.run_params = None
        self.run_count = 0
        self.run_data = []
        self.run_start_time = None
        
        self.network = None
        self.loader = None
        self.tb = None
    
    def begin_run(self, run, network, loader):
        
        self.run_start_time = time.time()
        
        self.run_params = run
        self.run_count +=1
        
        self.network = network
        self.loader = loader
        self.tb = SummaryWriter(comment =f'-{run}')
        
        images, labels = next(iter(self.loader))
        grid = torchvision.utils.make_grid(images)
        
        self.tb.add_image('images', grid)
        self.tb.add_graph(self.network, images)
        
    def end_run(self):
        self.tb.close()
        self.epoch_count = 0
        
    def begin_epoch(self):
        self.epoch_start_time = time.time()
        
        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        
    def end_epoch(self):
        
        epoch_duration = time.time() - self.epoch_start_time
        run_duration = time.time() - self.run_start_time
        
        loss = self.epoch_loss / len(self.loader.dataset)
        accuracy = 100 * self.epoch_num_correct / len(self.loader.dataset)
        
        self.tb.add_scalar('Loss', loss, self.epoch_count)
        self.tb.add_scalar('Accuracy', accuracy, self.epoch_count)
        
        for name, param in self.network.named_parameters():
            self.tb.add_histogram(name, param, self.epoch_count)
            self.tb.add_histogram(f'{name}.grad', param.grad, self.epoch_count)
        
        results = OrderedDict()
        results['run'] = self.run_count
        results['epoch'] = self.epoch_count
        results['loss'] = loss
        results['accuracy'] = accuracy
        results['epoch duration'] = epoch_duration
        results['run duration'] = run_duration
        for key, val in self.run_params._asdict().items():
            results[key] = val
        self.run_data.append(results)
        df = pd.DataFrame.from_dict(self.run_data, orient='columns')
        
        clear_output(wait=True)
        display(df)
        
    def track_loss(self, loss):
        self.epoch_loss += loss.item() * self.loader.batch_size
        
    def track_num_correct(self, pred, labels):
        self.epoch_num_correct += self._get_num_correct(preds, labels)
        
    @torch.no_grad()
    def _get_num_correct(self, preds, labels): # _ in front == "private" method, not meant to be used by the users
        return preds.argmax(dim=1).eq(labels).sum().item()
    
    def save(self, fileName):
        pd.DataFrame.from_dict(
            self.run_data,
            orient='columns'
        ).to_csv(f'{fileName}.csv')
        
        with open(f'{fileName}.json', 'w', encoding='utf-8') as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent=4)

        

In [8]:
train_set = torchvision.datasets.FashionMNIST(
    root='data',
    train=True,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

---  
## Training loop  
Tweaking the hyperparamers  
*Note: to test num_workers actually 1 epoch is enough.*

In [None]:
!tensorboard --logdir=runs

In [9]:
params = OrderedDict(
    batch_size = [100, 1000, 10000],
    lr = [0.01],
    num_workers = [0, 1, 2, 4],
    shuffle = [True]
)

In [10]:
mngr = RunManager()

for run in RunBuilder.get_runs(params):
    
    network = Network()
    loader = torch.utils.data.DataLoader(train_set, batch_size=run.batch_size, num_workers=run.num_workers, shuffle=run.shuffle)
    optimizer = optim.Adam(network.parameters(), lr=run.lr)

    images, labels = next(iter(loader))
    grid = torchvision.utils.make_grid(images)

    mngr.begin_run(run, network, loader)
    
    for epoch in range(3):
        mngr.begin_epoch()

        for batch in loader:
            
            images, labels = batch
            preds = network(images)
            loss = F.cross_entropy(preds, labels) # calculating the loss function
            optimizer.zero_grad() # zero out the gradients, because pytorch is actually adding the grads
            loss.backward() # calculating the gradients
            optimizer.step() # update the weight

            mngr.track_loss(loss)
            mngr.track_num_correct(preds, labels)
        mngr.end_epoch()
    mngr.end_run()
mngr.save('results')


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,batch_size,lr,num_workers,shuffle
0,1,1,0.571189,78.261667,20.015237,22.034181,100,0.01,0,True
1,1,2,0.383685,85.776667,20.453636,42.602922,100,0.01,0,True
2,1,3,0.36432,86.526667,20.028046,62.727056,100,0.01,0,True
3,2,1,0.569404,78.666667,14.119495,15.037305,100,0.01,1,True
4,2,2,0.390172,85.771667,14.283015,29.414405,100,0.01,1,True
5,2,3,0.355496,87.003333,14.495126,44.005618,100,0.01,1,True
6,3,1,0.567981,78.551667,14.679277,15.607478,100,0.01,2,True
7,3,2,0.374666,86.256667,14.429941,30.132505,100,0.01,2,True
8,3,3,0.349176,87.086667,14.587203,44.814795,100,0.01,2,True
9,4,1,0.548496,79.148333,14.733175,15.741094,100,0.01,4,True
