In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import time 
import pandas as pd
from IPython.display import display
from IPython.display import clear_output
import simplejson as json

torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)

from collections import OrderedDict
from collections import namedtuple
from itertools import product

In [2]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [3]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        # Convolutional layers
        
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1) # in_channel = 1 = grayscale, hyperparam, hyperparam
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5, stride=1) # we in crease the output channel when have extra conv layers
                
        # Fully connected layers
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120, bias=True) # we also shrink the number of features to number of class that we have
        self.fc2 = nn.Linear(in_features = 120, out_features=60, bias=True)
        self.out = nn.Linear(in_features = 60, out_features=10, bias=True) 
        
    def forward(self, t):
        # input layer
        t = t
        
        # convolution 1, not 
        t = self.conv1(t)
        t = F.relu(t) # operation do not use weight, unlike layers
        t = F.max_pool2d(t, kernel_size=2, stride=2) # operation do not use weight, unlike layers
        
        # convolution 2: => relu => maxpool
        t = self.conv2(t)
        # WHY do we need these 2 layers?
        t = F.relu(t) 
        t = F.max_pool2d(t, kernel_size=2, stride=2) # how to determine these values?
        
        # Transition from Conv to Linear will require flatten
        t = t.reshape(-1, 12*4*4) # 4x4 = shape of reduce image (originally 28x28)
        
        # linear 1:
        t = self.fc1(t)
        t = F.relu(t)
        
        # linear 2:
        t = self.fc2(t)
        t = F.relu(t)
        
        # output:
        t = self.out(t)
        
        return t

In [4]:
class RunBuilder():
    @staticmethod
    def get_runs(params):
        # Build runs for us, based on the params we passed in
        Run = namedtuple("Run", params.keys())
        
        runs = []
        for v in product(*params.values()):
            runs.append(Run(*v))
            
        return runs

In [5]:
# Run Manager Class for separating tensorboard code
class RunManager():
    def __init__(self):
        self.epoch_count = 0
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        self.epoch_start_time = None
        
        self.run_params = None
        self.run_count = 0
        self.run_data = []
        self.run_start_time = None
        
        self.network = None
        self.loader = None
        self.tb = None
    
    def begin_run(self, run, network, loader):
        self.run_start_time = time.time()
        
        self.run_params = run
        self.run_count += 1
        
        self.network = network
        self.loader = loader
        self.tb = SummaryWriter(comment=f"-{run}")
        
        images, labels = next(iter(self.loader))
        grid = torchvision.utils.make_grid(images)
        
        self.tb.add_image("images", grid)
        self.tb.add_graph(self.network, images)
    
    def end_run(self):
        self.tb.close()
        self.epoch_count = 0
    
    def begin_epoch(self):
        self.epoch_start_time = time.time()
        
    def begin_epoch(self):
        self.epoch_start_time = time.time()
        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        
    def end_epoch(self):
        epoch_duration = time.time() - self.epoch_start_time
        run_duration = time.time() - self.run_start_time
        
        loss = self.epoch_loss / len(self.loader.dataset)
        accuracy = self.epoch_num_correct / len(self.loader.dataset)
        
        self.tb.add_scalar("Loss", loss, self.epoch_count)
        self.tb.add_scalar("Accuracy", accuracy, self.epoch_count)
        
        for name, param in self.network.named_parameters():
            self.tb.add_histogram(name, param, self.epoch_count)
            self.tb.add_histogram(f"{name}.grad", param.grad, self.epoch_count)
          
        # built pandas to analyze data outside of TB
        results = OrderedDict()
        results["run"] = self.run_count
        results["epoch"] = self.epoch_count
        results["loss"] = loss
        results["accuracy"] = accuracy
        results["epoch duration"] = epoch_duration
        results["run duration"] = run_duration
        for k,v in self.run_params._asdict().items(): results[k] = v # allow us to see what results match with what param
        self.run_data.append(results)
        df = pd.DataFrame.from_dict(self.run_data, orient="columns")
        
        # update in ipynb in real time
        clear_output(wait=True)
        display(df)
    
    def track_loss(self, loss):
        self.epoch_loss += loss.item() * self.loader.batch_size
    
    def track_num_correct(self, preds, labels):
        self.epoch_num_correct += self._get_num_correct(preds, labels)
        
    @torch.no_grad()
    def _get_num_correct(self, preds, labels):
        return preds.argmax(dim=1).eq(labels).sum().item()
    
    def save(self, fileName):
        pd.DataFrame.from_dict(
            self.run_data,
            orient="columns"
        ).to_csv(f"{fileName}.csv") # save in csv
        
        # to create in tensorboard 
        with open(f"{fileName}.json", "w", encoding="utf-8") as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent = 4)

In [6]:
train_set = torchvision.datasets.FashionMNIST(
    root="./data/FashionMNIST",
    train=True,
    download=True,
    transform=transforms.Compose([ # convert image to 
        transforms.ToTensor()
    ]))

In [None]:
params = OrderedDict(
    lr = [0.01],
    batch_size = [1000],
#     num_workers = [0,1,2,4,8,16]
    shuffle = [True]
)
m = RunManager()

for run in RunBuilder.get_runs(params):
    network = Network()
    loader = torch.utils.data.DataLoader(train_set, batch_size=run.batch_size, shuffle = shuffle, num_workers=run.num_workers) # num worker to speed up process for dataloader
    optimizer = optim.Adam(network.parameters(), lr=lr)

    m.begin_run(run, network, loader)
    for epoch in range(5):
        m.begin_epoch()
        for batch in loader:
            images = batch[0]
            labels = batch[1]
            preds = network(images) # pass batch
            loss = F.cross_entropy(preds, labels) # calculate loss
            optimizer.zero_grad() # zero gradient
            loss.backward() # back prop for calculating gradient
            optimizer.step() # update weights
            
            m.track_loss(loss)
            m.track_num_correct(preds, labels)
        
        m.end_epoch()
    m.end_run()
m.save("results")