In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from IPython.display import display, clear_output
import pandas as pd
import time
import json

from itertools import product
from collections import namedtuple
from collections import OrderedDict

In [2]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 5)
        self.conv2 = nn.Conv2d(in_channels = 6, out_channels = 12, kernel_size = 5)
        
        self.fc1 = nn.Linear(in_features = 12*4*4, out_features = 120)
        self.fc2 = nn.Linear(in_features = 120, out_features = 60)
        self.out = nn.Linear(in_features = 60, out_features = 10)
    
    def forward(self, t):
        
        t = F.relu(self.conv1(t))
        t = F.max_pool2d(t, kernel_size = 2, stride = 2)
        
        t = F.relu(self.conv2(t))
        t = F.max_pool2d(t, kernel_size = 2, stride = 2)
        
        t = t.flatten(start_dim = 1)
        t = F.relu(self.fc1(t))
        t = F.relu(self.fc2(t))
        t = self.out(t)
        
        return t

In [3]:
def get_num_correct(preds, labels):
      return preds.argmax(dim=1).eq(labels).sum().item()

In [4]:
class RunBuilder():
    @staticmethod
    def get_runs(params):
        
        Run = namedtuple("Run", params.keys())
        
        runs = []
        for v in product(*params.values()):
            runs.append(Run(*v))
            
        return runs

In [5]:
class RunManager():
    def __init__(self):
        
        self.epoch_count = 0
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        self.epoch_start_time = None
        
        self.run_params = None
        self.run_count = 0
        self.run_data = []
        self.run_start_time = None
        
        self.network = None
        self.loader = None
        self.tb = None
    
    def begin_run(self, run, network, loader):
        
        self.run_start_time = time.time()
        
        self.run_params = run
        self.run_count += 1
        
        self.network = network
        self.loader = loader
        self.tb = SummaryWriter(comment = f'-{run}')
        
        images, labels = next(iter(self.loader))
        grid = torchvision.utils.make_grid(images)
        
        #self.tb.add_image('images', grid)
        #self.tb.add_graph(self.network, images)
        
    def end_run(self):
        self.tb.close()
        self.epoch_count = 0

        
        
       # zero epoch count, loss, accuracy, 
    def begin_epoch(self):
        self.epoch_start_time = time.time()

        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        
    # 
    def end_epoch(self):
        # calculate epoch duration and run duration(accumulate)
        epoch_duration = time.time() - self.epoch_start_time
        run_duration = time.time() - self.run_start_time

        # record epoch loss and accuracy
        loss = self.epoch_loss / len(self.loader.dataset)
        accuracy = self.epoch_num_correct / len(self.loader.dataset)

        # Record epoch loss and accuracy to TensorBoard 
        self.tb.add_scalar('Loss', loss, self.epoch_count)
        self.tb.add_scalar('Accuracy', accuracy, self.epoch_count)

        # Record params to TensorBoard
        for name, param in self.network.named_parameters():
            self.tb.add_histogram(name, param, self.epoch_count)
            self.tb.add_histogram(f'{name}.grad', param.grad, self.epoch_count)
    

        # Write into 'results' (OrderedDict) for all run related data
        results = OrderedDict()
        results["run"] = self.run_count
        results["epoch"] = self.epoch_count
        results["loss"] = loss
        results["accuracy"] = accuracy
        results["epoch duration"] = epoch_duration
        results["run duration"] = run_duration

        # Record hyper-params into 'results'
        for k,v in self.run_params._asdict().items(): results[k] = v
        self.run_data.append(results)
        df = pd.DataFrame.from_dict(self.run_data, orient = 'columns')

        # display epoch information and show progress
        clear_output(wait=True)
        display(df)

      # accumulate loss of batch into entire epoch loss
    def track_loss(self, loss):
        # multiply batch size so variety of batch sizes can be compared
        self.epoch_loss += loss.item() * self.loader.batch_size

      # accumulate number of corrects of batch into entire epoch num_correct
    def track_num_correct(self, preds, labels):
        self.epoch_num_correct += self._get_num_correct(preds, labels)
    
    @torch.no_grad()
    def _get_num_correct(self, preds, lables):
        return preds.argmax(dim = 1).eq(labels).sum().item()
    
    def save(self, fileName):
        
        pd.DataFrame.from_dict(
        self.run_data,
        orient = "columns").to_csv(f'{fileName}.csv')
        
        with open(f'{fileName}.json', 'w', encoding='utf-8') as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent=4)

In [6]:
torch.manual_seed(42)
network1 = nn.Sequential(
    nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 5),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size = 2, stride = 2),
    nn.Conv2d(in_channels = 6, out_channels = 12, kernel_size = 5),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    #flatten every image in the batch, but not the batch itself
    nn.Flatten(start_dim=1),
    nn.Linear(in_features = 12*4*4, out_features = 120),
    nn.ReLU(),
    nn.Linear(in_features = 120, out_features = 60),
    nn.ReLU(),
    nn.Linear(in_features = 60, out_features = 10)
    
)

In [7]:
torch.manual_seed(42)
network2 = nn.Sequential(
    nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 5),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size = 2, stride = 2),
    # how many features coming in from the conv layer
    # scale and shift parameters inside
    nn.BatchNorm2d(6),
    nn.Conv2d(in_channels = 6, out_channels = 12, kernel_size = 5),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size = 2, stride = 2),
    nn.Flatten(start_dim = 1),
    nn.Linear(in_features = 12*4*4, out_features = 120),
    nn.ReLU(),
    nn.BatchNorm1d(120),
    nn.Linear(in_features = 120, out_features = 60),
    nn.ReLU(),
    nn.Linear(in_features = 60, out_features = 10)
    
)

In [8]:
train_set = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST'
    ,train = True
    ,download = True
    ,transform = transforms.Compose([
        transforms.ToTensor()
    ])
)


In [9]:
loader = torch.utils.data.DataLoader(
    train_set, batch_size = len(train_set), num_workers = 1, pin_memory = True
)
data = next(iter(loader))
mean = data[0].mean()
std = data[0].std()
mean, std

(tensor(0.2860), tensor(0.3530))

In [10]:
train_set_normal = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST'
    ,train = True
    ,download = True
    ,transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
)


In [11]:
trainsets = {
    'not_normal': train_set,
    'normal' : train_set_normal
}

In [12]:
# Run multiple networks through testing framework
networks = {
    'no_batch_norm': network1,
    'batch_norm': network2
}

In [13]:
# Test different configurations
# for every run [value] that is going to be used e.g [.001, .01] = two runs
params = OrderedDict(
    lr = [.01],
    batch_size = [1000],
    num_workers = [1],
    device = ["cuda"],
    trainset = ["normal"],
    # try all the values in the dict network1, network2
    network = list(networks.keys())
)
m = RunManager()
# active run or current run
for run in RunBuilder.get_runs(params):
    
    device = torch.device(run.device)
    # redefine the network
    network = networks[run.network].to(device)
    loader = DataLoader(trainsets[run.trainset], batch_size = run.batch_size, num_workers = run.num_workers, 
                       pin_memory = True) 
    optimizer = optim.Adam(network.parameters(), lr = run.lr) 
    
    m.begin_run(run, network, loader)
    for epoch in range(20):
        m.begin_epoch()
        for batch in loader:
            
            images = batch[0].to(device)
            labels = batch[1].to(device)
            preds = network(images)
            loss = F.cross_entropy(preds, labels)
            #7.9
            #optimizer.zero_grad()
            #8 sec
            for p in network.parameters(): p.grad = None
            loss.backward() # Calculate gradients
            optimizer.step() # Update Weights
            
            m.track_loss(loss)
            m.track_num_correct(preds, labels)
        m.end_epoch()
    m.end_run()
m.save("results")

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,num_workers,device,trainset,network
0,1,1,0.830967,0.683533,10.145446,10.644288,0.01,1000,1,cuda,normal,no_batch_norm
1,1,2,0.455314,0.826933,8.234865,19.074716,0.01,1000,1,cuda,normal,no_batch_norm
2,1,3,0.386308,0.8562,8.262508,27.40081,0.01,1000,1,cuda,normal,no_batch_norm
3,1,4,0.35157,0.86965,8.27803,35.743322,0.01,1000,1,cuda,normal,no_batch_norm
4,1,5,0.325868,0.879217,8.239512,44.045652,0.01,1000,1,cuda,normal,no_batch_norm
5,1,6,0.311092,0.886017,8.292844,52.400483,0.01,1000,1,cuda,normal,no_batch_norm
6,1,7,0.295289,0.890933,8.291497,60.75331,0.01,1000,1,cuda,normal,no_batch_norm
7,1,8,0.278343,0.897567,8.271598,69.090849,0.01,1000,1,cuda,normal,no_batch_norm
8,1,9,0.271311,0.900017,8.250522,77.402763,0.01,1000,1,cuda,normal,no_batch_norm
9,1,10,0.259443,0.904267,8.27864,85.748942,0.01,1000,1,cuda,normal,no_batch_norm


In [14]:
pd.DataFrame.from_dict(m.run_data).sort_values("accuracy", ascending = False)

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,num_workers,device,trainset,network
39,2,20,0.16708,0.936717,8.387012,168.065338,0.01,1000,1,cuda,normal,batch_norm
38,2,19,0.172256,0.933733,8.27624,159.596072,0.01,1000,1,cuda,normal,batch_norm
36,2,17,0.179296,0.932233,8.354341,142.847733,0.01,1000,1,cuda,normal,batch_norm
37,2,18,0.178035,0.93205,8.299305,151.23194,0.01,1000,1,cuda,normal,batch_norm
35,2,16,0.187565,0.928,8.299871,134.400142,0.01,1000,1,cuda,normal,batch_norm
34,2,15,0.190935,0.92765,8.430041,126.022603,0.01,1000,1,cuda,normal,batch_norm
32,2,13,0.195068,0.92685,8.291974,109.131425,0.01,1000,1,cuda,normal,batch_norm
33,2,14,0.192696,0.9265,8.298658,117.507253,0.01,1000,1,cuda,normal,batch_norm
31,2,12,0.199013,0.925683,8.354903,100.762196,0.01,1000,1,cuda,normal,batch_norm
30,2,11,0.206537,0.9219,8.315146,92.315603,0.01,1000,1,cuda,normal,batch_norm
