# Reference
- namedtuple
https://stackoverflow.com/questions/2970608/what-are-named-tuples-in-python
```python
from collections import namedtuple
Point = namedtuple('Point', 'x y')
pt1 = Point(1.0, 5.0)
pt2 = Point(2.5, 1.5)

from math import sqrt
line_length = sqrt((pt1.x-pt2.x)**2 + (pt1.y-pt2.y)**2)
```

# stack vs cat

In [1]:
import torch

In [2]:
t1 = torch.tensor([1,1,1])
t2 = torch.tensor([2,2,2])
t3 = torch.tensor([3,3,3])

In [3]:
t1.shape

torch.Size([3])

In [4]:
t1.unsqueeze(0).shape

torch.Size([1, 3])

In [5]:
cat1 = torch.cat((t1,t2,t3), dim=0)  

In [6]:
cat1.shape   # same axis

torch.Size([9])

In [7]:
stack1 = torch.stack((t1, t2, t3), dim=0)

In [8]:
stack1.shape   # new axis

torch.Size([3, 3])

In [9]:
torch.cat(
    (
        t1.unsqueeze(0),
        t2.unsqueeze(0),
        t3.unsqueeze(0)
    ),
    dim=0
)

tensor([[1, 1, 1],
        [2, 2, 2],
        [3, 3, 3]])

In [10]:
torch.cat(
    (
        t1.unsqueeze(0),
        t2.unsqueeze(0),
        t3.unsqueeze(0)
    ),
    dim=0
).shape

torch.Size([3, 3])

In [11]:
torch.stack(
    (t1, t2, t3),
    dim=0
)

tensor([[1, 1, 1],
        [2, 2, 2],
        [3, 3, 3]])

In [12]:
torch.stack(
    (t1, t2, t3),
    dim=1
)

tensor([[1, 2, 3],
        [1, 2, 3],
        [1, 2, 3]])

# Run builder

In [13]:
from collections import OrderedDict
from collections import namedtuple
from itertools import product

In [14]:
class RunBuilder():
    @staticmethod
    def get_runs(params):
        Run = namedtuple("testRun", params.keys())
        
        runs = []
        for v in product(*params.values()):
            runs.append(Run(*v))
        return runs

In [15]:
params = OrderedDict(
    lr = [.01, .001],
    batch_size = [1000, 10000]
)

In [16]:
print(params)

OrderedDict([('lr', [0.01, 0.001]), ('batch_size', [1000, 10000])])


In [17]:
a = []
c = []
b = namedtuple("test", params.keys())
for each in product(*params.values()):
    print(each)
    print(b(*each))

(0.01, 1000)
test(lr=0.01, batch_size=1000)
(0.01, 10000)
test(lr=0.01, batch_size=10000)
(0.001, 1000)
test(lr=0.001, batch_size=1000)
(0.001, 10000)
test(lr=0.001, batch_size=10000)


In [18]:
runs = RunBuilder.get_runs(params)
runs

[testRun(lr=0.01, batch_size=1000),
 testRun(lr=0.01, batch_size=10000),
 testRun(lr=0.001, batch_size=1000),
 testRun(lr=0.001, batch_size=10000)]

In [19]:
for i in runs:
    for j in i: 
        print(j)

0.01
1000
0.01
10000
0.001
1000
0.001
10000


In [20]:
run = runs[0]
run

testRun(lr=0.01, batch_size=1000)

In [21]:
print(run.lr)

0.01


In [22]:
for each in runs:
    print(run, run.lr, run.batch_size)

testRun(lr=0.01, batch_size=1000) 0.01 1000
testRun(lr=0.01, batch_size=1000) 0.01 1000
testRun(lr=0.01, batch_size=1000) 0.01 1000
testRun(lr=0.01, batch_size=1000) 0.01 1000


In [23]:
# another example
params_gpu_cpu = OrderedDict(
    lr = [.01, .001],
    batch_size = [1000, 10000],
    device=['cuda', 'cpu']
)

In [24]:
runs = RunBuilder.get_runs(params_gpu_cpu)
runs

[testRun(lr=0.01, batch_size=1000, device='cuda'),
 testRun(lr=0.01, batch_size=1000, device='cpu'),
 testRun(lr=0.01, batch_size=10000, device='cuda'),
 testRun(lr=0.01, batch_size=10000, device='cpu'),
 testRun(lr=0.001, batch_size=1000, device='cuda'),
 testRun(lr=0.001, batch_size=1000, device='cpu'),
 testRun(lr=0.001, batch_size=10000, device='cuda'),
 testRun(lr=0.001, batch_size=10000, device='cpu')]

### How to build the RunBuilder

In [25]:
params = OrderedDict(
    lr = [.01, .001],
    batch_size = [1000, 10000]
)

In [26]:
params.keys()

odict_keys(['lr', 'batch_size'])

In [27]:
params.values()

odict_values([[0.01, 0.001], [1000, 10000]])

In [28]:
Run = namedtuple("Run", params.keys())

In [29]:
runs = []
for v in product(*params.values()):
    runs.append(Run(*v))
runs

[Run(lr=0.01, batch_size=1000),
 Run(lr=0.01, batch_size=10000),
 Run(lr=0.001, batch_size=1000),
 Run(lr=0.001, batch_size=10000)]

In [30]:
for run in RunBuilder.get_runs(params):
    comment = f"-{run}"
    print(f"comment: {comment} lr={run.lr}, batch_size={run.batch_size}")

comment: -testRun(lr=0.01, batch_size=1000) lr=0.01, batch_size=1000
comment: -testRun(lr=0.01, batch_size=10000) lr=0.01, batch_size=10000
comment: -testRun(lr=0.001, batch_size=1000) lr=0.001, batch_size=1000
comment: -testRun(lr=0.001, batch_size=10000) lr=0.001, batch_size=10000


# CNN example with paramters tuning

In [31]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

In [32]:
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from IPython.display import display, clear_output

In [33]:
import pandas as pd
import time
import json

In [34]:
from itertools import product
from collections import namedtuple
from collections import OrderedDict

In [35]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, 
                               out_channels=6,
                               kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, 
                               out_channels=12,
                               kernel_size=5)
        self.fc1 = nn.Linear(in_features=12*4*4,
                             out_features=120)
        self.fc2 = nn.Linear(in_features=120,
                             out_features=60)
        self.out = nn.Linear(in_features=60,
                             out_features=10)
        
    def forward(self, t):
        
        t = F.relu(self.conv1(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = F.relu(self.conv2(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = t.flatten(start_dim=1)
        t = F.relu(self.fc1(t))
        t = F.relu(self.fc2(t))
        t = self.out(t)
        
        return t

In [36]:
# flatten example

t = torch.tensor([[[1, 2],
                       [3, 4]],
                      [[5, 6],
                       [7, 8]]])

In [37]:
t

tensor([[[1, 2],
         [3, 4]],

        [[5, 6],
         [7, 8]]])

In [38]:
t.shape

torch.Size([2, 2, 2])

In [39]:
class RunBuilder():
    @staticmethod
    def get_runs(params):
        
        Run = namedtuple("Run", params.keys())
        
        runs = []
        
        for v in product(*params.values()):
            runs.append(Run(*v))
            
        return runs

In [40]:
class RunManager():
    def __init__(self):
        
        self.epoch_count = 0
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        self.epoch_start_time = None
        
        self.run_params = None
        self.run_count = 0
        self.run_data = []
        self.run_start_time = None
        
        self.network = None
        self.loader = None
        self.tb = None
        
    def begin_run(self, run, network, loader):
        
        self.run_start_time = time.time()

        self.run_params = run
        self.run_count += 1
        
        self.network = network
        self.loader = loader
        self.tb = SummaryWriter(comment=f'-{run}')
        
        images, labels = next(iter(self.loader))
        grid = torchvision.utils.make_grid(images)

        self.tb.add_image('images', grid)
        self.tb.add_graph(
             self.network
            ,images.to(getattr(run, 'device', 'cpu'))
        )
        
    def end_run(self):
        self.tb.close()
        self.epoch_count = 0   

    def begin_epoch(self):
        self.epoch_start_time = time.time()
        
        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0

    def end_epoch(self):
        
        epoch_duration = time.time() - self.epoch_start_time
        run_duration = time.time() - self.run_start_time
        
        loss = self.epoch_loss / len(self.loader.dataset)
        accuracy = self.epoch_num_correct / len(self.loader.dataset)
                
        self.tb.add_scalar('Loss', loss, self.epoch_count)
        self.tb.add_scalar('Accuracy', accuracy, self.epoch_count)
        
        for name, param in self.network.named_parameters():
            self.tb.add_histogram(name, param, self.epoch_count)
            self.tb.add_histogram(f'{name}.grad', param.grad, self.epoch_count)
        
        results = OrderedDict()
        results["run"] = self.run_count
        results["epoch"] = self.epoch_count
        results['loss'] = loss
        results["accuracy"] = accuracy
        results['epoch duration'] = epoch_duration
        results['run duration'] = run_duration
        for k,v in self.run_params._asdict().items(): results[k] = v
        self.run_data.append(results)
        
        df = pd.DataFrame.from_dict(self.run_data, orient='columns')
        
        clear_output(wait=True)
        display(df)
        
    def track_loss(self, loss, batch):
        self.epoch_loss += loss.item() * batch[0].shape[0]
        
    def track_num_correct(self, preds, labels):
        self.epoch_num_correct += self._get_num_correct(preds, labels)
    
    def _get_num_correct(self, preds, labels):
        return preds.argmax(dim=1).eq(labels).sum().item()
    
    def save(self, fileName):
        
        pd.DataFrame.from_dict(
            self.run_data
            ,orient='columns'
        ).to_csv(f'{fileName}.csv')
        
        with open(f'{fileName}.json', 'w', encoding='utf-8') as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent=4)

In [41]:
train_set = torchvision.datasets.FashionMNIST(
    root = "./data",
    train = True,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

In [42]:
params = OrderedDict(
    lr = [.01],
    batch_size = [1000],
    shuffle=[True]
)

m = RunManager()

for run in RunBuilder.get_runs(params):
    
    network = Network()
    loader = DataLoader(train_set, batch_size=run.batch_size, shuffle=run.shuffle)
    optimizer = optim.Adam(network.parameters(), lr=run.lr)
    
    
    m.begin_run(run, network, loader)
    for epoch in range(1):
        m.begin_epoch()
        for batch in loader:
            
            images, labels = batch
            preds = network(images) # Pass Batch
            loss = F.cross_entropy(preds, labels) # Calculate Loss
            optimizer.zero_grad() # Zero Gradients
            loss.backward() # Calculate Gradients
            optimizer.step() # Update Weights
            
            m.track_loss(loss, batch)
            m.track_num_correct(preds, labels)  
        m.end_epoch()
    m.end_run()
m.save('results')

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,1.014723,0.62065,6.739429,7.378196,0.01,1000,True


In [43]:
# Pytorch dataloader num_workers

params = OrderedDict(
    lr = [.01],
    batch_size =[100], # 1000, 10000
    num_workers = [0], # 1,2,4,8,16
    # shuffle=[True, False]
)

m = RunManager()

for run in RunBuilder.get_runs(params):
    
    network = Network()
    loader = DataLoader(train_set, 
                        batch_size=run.batch_size, 
                        num_workers=run.num_workers)
    optimizer = optim.Adam(network.parameters(), lr=run.lr)
    
    m.begin_run(run, network, loader)
    for epoch in range(1):
        m.begin_epoch()
        for batch in loader:
            images, labels = batch
            preds = network(images)
            loss = F.cross_entropy(preds, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            m.track_loss(loss, batch)
            m.track_num_correct(preds, labels)
        m.end_epoch()
    m.end_run()
m.save("results")

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,num_workers
0,1,1,0.570681,0.782467,8.260319,8.358446,0.01,100,0


# Pytorch and GPU

In [44]:

t = torch.ones(1,1,28, 28)
t

tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
           1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
           1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
           1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
           1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
           1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
           1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
           1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 

In [45]:
network=Network()
network

Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (out): Linear(in_features=60, out_features=10, bias=True)
)

In [46]:
t = t.cuda()
network = network.cuda()

In [47]:
gpu_pred = network(t)
gpu_pred.device

device(type='cuda', index=0)

In [48]:
# moving to cpu
t = t.cpu()
network = network.cpu()

In [49]:
cpu_pred = network(t)
cpu_pred.device

device(type='cpu')

In [50]:
t1 = torch.tensor([
    [1,2],
    [3,4]
])

t2 = torch.tensor([
    [5,6],
    [7,8]
])

In [51]:
t1.device, t2.device

(device(type='cpu'), device(type='cpu'))

In [52]:
t1 = t1.to('cuda')
t1.device

device(type='cuda', index=0)

In [53]:
try: t1+t2
    
except Exception as e: print(e)

Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!


In [54]:
try: t2+t1
except Exception as e: print(e)

Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!


In [55]:
t2 = t2.to("cuda")
t1+t2

tensor([[ 6,  8],
        [10, 12]], device='cuda:0')

In [56]:
torch.cuda.is_available()

True

In [57]:
# Using GPU
params = OrderedDict(
    lr = [.01],
    batch_size = [20000],
    num_workers = [1],
    device = ["cuda", "cpu"]
)


m = RunManager()

for run in RunBuilder.get_runs(params):
    
    device = torch.device(run.device)
    network = Network().to(device)
    loader = DataLoader(train_set, \
                        batch_size=run.batch_size, 
                        num_workers=run.num_workers)
    optimizer=optim.Adam(network.parameters(), lr=run.lr)
    
    m.begin_run(run, network, loader)
    
    for epoch in range(1):
        m.begin_epoch()
        for batch in loader:
            images = batch[0].to(device)
            labels = batch[1].to(device)
            preds = network(images)
            loss = F.cross_entropy(preds, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            m.track_loss(loss, batch)
            m.track_num_correct(preds, labels)
        m.end_epoch()
    m.end_run()
m.save("results")

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,num_workers,device
0,1,1,2.294642,0.139167,4.781247,12.368031,0.01,20000,1,cuda
1,2,1,2.275643,0.160133,5.651601,14.750737,0.01,20000,1,cpu


In [58]:
pd.DataFrame.from_dict(m.run_data).sort_values("epoch duration")

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,num_workers,device
0,1,1,2.294642,0.139167,4.781247,12.368031,0.01,20000,1,cuda
1,2,1,2.275643,0.160133,5.651601,14.750737,0.01,20000,1,cpu


# Data normalization

In [59]:
loader = DataLoader(train_set, batch_size=len(train_set), num_workers=1)
data = next(iter(loader))
data[0].mean(), data[0].std()

(tensor(0.2860), tensor(0.3530))

In [60]:
loader = DataLoader(train_set, batch_size=1000, num_workers=1)
num_of_pixels = len(train_set) * 28 * 28

total_sum = 0
for batch in loader: 
    total_sum += batch[0].sum()    
mean = total_sum / num_of_pixels

sum_of_squared_error = 0
for batch in loader: sum_of_squared_error += ((batch[0] - mean).pow(2)).sum()
std = torch.sqrt(sum_of_squared_error / num_of_pixels)

mean, std

(tensor(0.2860), tensor(0.3530))

In [61]:
import matplotlib.pyplot as plt

In [62]:
# plt.hist(data[0].flatten())
# plt.axvline(data[0].mean())

In [63]:
# Using mean and std

train_set_normal = torchvision.datasets.FashionMNIST(
    root="./data",
    train=True,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
)

In [64]:
loader = DataLoader(train_set_normal, batch_size=len(train_set), num_workers=1)
data = next(iter(loader))
data[0].mean(), data[0].std()

(tensor(-1.0662e-07), tensor(1.0000))

In [65]:
# plt.hist(data[0].flatten())
# plt.axvline(data[0].mean())

In [66]:
trainsets = {
    "not_normal": train_set,
    "normal": train_set_normal
}

In [67]:
params = OrderedDict(
    lr = [.01],
    batch_size = [1000],
    num_workers = [1],
    device = ["cuda"],
    trainset = ["not_normal", "normal"]
)

In [68]:
m = RunManager()

for run in RunBuilder.get_runs(params):
    
    device = torch.device(run.device)
    network = Network().to(device)
    loader = DataLoader(trainsets[run.trainset], 
                        batch_size=run.batch_size,
                        num_workers=run.num_workers)
    optimizer=optim.Adam(network.parameters(), lr=run.lr)
    
    m.begin_run(run, network, loader)
    
    for epoch in range(20):
        
        m.begin_epoch()
        
        for batch in loader:
            
            images = batch[0].to(device)
            labels = batch[1].to(device)
            preds = network(images)
            loss = F.cross_entropy(preds, labels) # Calculate Loss
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            m.track_loss(loss, batch)
            m.track_num_correct(preds, labels)
        m.end_epoch()
    m.end_run()
m.save("results")

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,num_workers,device,trainset
0,1,1,0.991937,0.625633,4.572407,5.075577,0.01,1000,1,cuda,not_normal
1,1,2,0.549086,0.785633,4.606768,9.725939,0.01,1000,1,cuda,not_normal
2,1,3,0.466257,0.827467,4.596125,14.365214,0.01,1000,1,cuda,not_normal
3,1,4,0.408242,0.850233,4.547924,18.956721,0.01,1000,1,cuda,not_normal
4,1,5,0.361185,0.867483,4.467439,23.46892,0.01,1000,1,cuda,not_normal
5,1,6,0.342569,0.874217,4.417886,27.930042,0.01,1000,1,cuda,not_normal
6,1,7,0.325741,0.881633,4.440347,32.414897,0.01,1000,1,cuda,not_normal
7,1,8,0.310126,0.886133,4.396535,36.856056,0.01,1000,1,cuda,not_normal
8,1,9,0.298008,0.88985,4.473086,41.373192,0.01,1000,1,cuda,not_normal
9,1,10,0.289104,0.893667,4.545705,45.963057,0.01,1000,1,cuda,not_normal


In [69]:
pd.DataFrame.from_dict(m.run_data).sort_values('accuracy', ascending=False)

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,num_workers,device,trainset
39,2,20,0.223404,0.91455,6.728842,139.119634,0.01,1000,1,cuda,normal
37,2,18,0.231778,0.912333,6.888417,125.413941,0.01,1000,1,cuda,normal
38,2,19,0.231664,0.911917,6.878509,132.343302,0.01,1000,1,cuda,normal
36,2,17,0.235024,0.91025,6.770137,118.442979,0.01,1000,1,cuda,normal
19,1,20,0.239819,0.910017,4.676514,92.440558,0.01,1000,1,cuda,not_normal
33,2,14,0.238886,0.909567,6.917353,97.983526,0.01,1000,1,cuda,normal
34,2,15,0.240939,0.908917,6.808426,104.838456,0.01,1000,1,cuda,normal
18,1,19,0.245104,0.908533,4.59313,87.710883,0.01,1000,1,cuda,not_normal
32,2,13,0.24296,0.9081,6.962447,91.019799,0.01,1000,1,cuda,normal
16,1,17,0.24882,0.906417,4.655392,78.290975,0.01,1000,1,cuda,not_normal


In [70]:
# torch.manual_seed(50)
# sequential1 = nn.Sequential(
#       nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
#     , nn.ReLU()
#     , nn.MaxPool2d(kernel_size=2, stride=2)
#     , nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
#     , nn.ReLU()
#     , nn.MaxPool2d(kernel_size=2, stride=2)
#     , nn.Flatten(start_dim=1)  
#     , nn.Linear(in_features=12*4*4, out_features=120)
#     , nn.ReLU()
#     , nn.Linear(in_features=120, out_features=60)
#     , nn.ReLU()
#     , nn.Linear(in_features=60, out_features=10)
# )


# torch.manual_seed(50)
# layers = OrderedDict([
#      ('conv1', nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5))
#     ,('relu1', nn.ReLU())
#     ,('maxpool1', nn.MaxPool2d(kernel_size=2, stride=2))
    
#     ,('conv2', nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5))
#     ,('relu2', nn.ReLU())
#     ,('maxpool2', nn.MaxPool2d(kernel_size=2, stride=2))
    
#     ,('flatten', nn.Flatten(start_dim=1)  )
#     ,('fc1', nn.Linear(in_features=12*4*4, out_features=120))
#     ,('relu3', nn.ReLU())
    
#     ,('fc2', nn.Linear(in_features=120, out_features=60))
#     ,('relu4', nn.ReLU())
#     ,('out', nn.Linear(in_features=60, out_features=10))
# ])

# sequential2 = nn.Sequential(layers)


# torch.manual_seed(50)
# sequential3 = nn.Sequential()
# sequential3.add_module('conv1', nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5))
# sequential3.add_module('relu1', nn.ReLU())
# sequential3.add_module('maxpool1', nn.MaxPool2d(kernel_size=2, stride=2))
    
# sequential3.add_module('conv2', nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5))
# sequential3.add_module('relu2', nn.ReLU())
# sequential3.add_module('maxpool2', nn.MaxPool2d(kernel_size=2, stride=2))
    
# sequential3.add_module('flatten', nn.Flatten(start_dim=1))
# sequential3.add_module('fc1', nn.Linear(in_features=12*4*4, out_features=120))
# sequential3.add_module('relu3', nn.ReLU())
    
# sequential3.add_module('fc2', nn.Linear(in_features=120, out_features=60))
# sequential3.add_module('relu4', nn.ReLU())
# sequential3.add_module('out', nn.Linear(in_features=60, out_features=10))

# RunManager updated

In [71]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn. functional as F
import torchvision
import torchvision.transforms as transforms

from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from IPython.display import display, clear_output

import pandas as pd
import time
import json

from itertools import product
from collections import namedtuple
from collections import OrderedDict

In [72]:
class RunBuilder():
    @staticmethod
    def get_runs(params):
        Run = namedtuple("Run", params.keys())
        runs = []
        for v in product(*params.values()):
            runs.append(Run(*v))
        return runs

In [73]:
class RunManager():
    def __init__(self):
        # Run
        self.run_params = None
        self.run_count = 0
        self.run_data = []
        self.run_start_time = None
        
        # model, dataloader, and tensorboard
        self.network = None
        self.loader = None
        self.tb = None
        
        # Epoch
        self.epoch_count = 0
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        self.epoch_start_time = None
        
    def begin_run(self, run, network, loader):
        self.run_start_time = time.time()
        self.run_params = run
        self.run_count += 1
        
        self.network = network
        self.loader = loader
#         self.tb = SummaryWriter(comment=f"-{run}")
        self.tb = SummaryWriter()
    
        # Load the images, and labels
        imgs, lbls = next(iter(self.loader))
        grid = torchvision.utils.make_grid(imgs)
        
#         self.tb.add_image("images", grid)
#         self.tb.add_graph(
#             self.network,
#             imgs.to(getattr(run, "device", "cpu"))
#         )
        
    def begin_epoch():
        self.epoch_start_time = time.time()
        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        
    def end_epoch():
        epoch_duration = time.time() - self.epoch_start_time
        run_duration = time.time() - self.run_start_time
        
        # for each epoch, all the losses divided by the len(dataset)
        loss = self.epoch_loss / len(self.loader.dataset)
        accuracy = self.epoch_num_correct / len(self.loader.dataset)
        
        self.tb.add_scalar("Loss", loss, self.epoch_count)
        self.tb.add_scalar("Accuracy", accuracy, self.epoch_count)
        
        for name, param in self.network.named_parameters():
            self.tb.add_histogram(name, param, self.epoch_count)
            self.tb.add_histogram(f"{name}.grad", param.grad, self.epoch_count)
            
        # The output result collection
        results = OrderedDict()
        results["run"] = self.run_count
        results["epoch"] = self.epoch_count
        results["loss"] = loss
        results["accuracy"] = accuracy
        results["epoch duration"] = epoch_duration
        results["run duration"] = run_duration
        for k, v in self.run_params._asdict().items():
            results[k] = v
        self.run_data.append(results)
        
        # Define the output result from dict
        df = pd.DataFrame.from_dict(self.run_data, orient="columns")
    
        # Clear the canvas and display teh output result
        clear_output(wait=True)
        display(df)
        
    def end_run(self):
        """
        for the run is ended, tensorboard, and epoch count gets reset
        """
        self.tb.close()
        self.epoch_count = 0
            
    def track_loss(self, loss, batch):
        self.epoch_loss += loss.item() * batch[0].shape[0]
        
    def track_num_correct(self, preds, labels):
        self.epoch_num_correct += self._get_num_correct(preds, labels)
        
    def _get_num_correct(self, preds, labels):
        return preds.argmax(dim=1).eq(labels).sum().item()
    
    def save(self, fileName):
        # CSV
        pd.DataFrame.from_dict(
            self.run_data,
            orient="columns"
        ).to_csv(f"{fileName}.csv")
        
        # JSON 
        with open(f"{filename}.json", "w", encodng="utf-8") as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent=4)

# Batch Norm


In [74]:
# No BatchNorm
torch.manual_seed(50)
network_nobatch = nn.Sequential(
      nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
    , nn.ReLU()
    , nn.MaxPool2d(kernel_size=2, stride=2)
    , nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
    , nn.ReLU()
    , nn.MaxPool2d(kernel_size=2, stride=2)
    , nn.Flatten(start_dim=1)  
    , nn.Linear(in_features=12*4*4, out_features=120)
    , nn.ReLU()
    , nn.Linear(in_features=120, out_features=60)
    , nn.ReLU()
    , nn.Linear(in_features=60, out_features=10)
)

In [75]:
# BatchNorm
torch.manual_seed(50)
network_batch = nn.Sequential(
      nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
    , nn.ReLU()
    , nn.MaxPool2d(kernel_size=2, stride=2)
    , nn.BatchNorm2d(6)
    , nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
    , nn.ReLU()
    , nn.MaxPool2d(kernel_size=2, stride=2)
    , nn.Flatten(start_dim=1)  
    , nn.Linear(in_features=12*4*4, out_features=120)
    , nn.ReLU()
    , nn.BatchNorm1d(120)
    , nn.Linear(in_features=120, out_features=60)
    , nn.ReLU()
    , nn.Linear(in_features=60, out_features=10)
)

In [76]:
# trainset
train_set = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

In [77]:
# get mean, std
loader = DataLoader(train_set, batch_size=len(train_set), num_workers=1)
data = next(iter(loader))
mean = data[0].mean()
std = data[0].std()
mean, std

(tensor(0.2860), tensor(0.3530))

In [78]:
# generate trainset with normal distribution
train_set_normal = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
        ,transforms.Normalize(mean, std)
    ])
)

In [79]:
# different trainsets
trainsets = {
    'not_normal': train_set
    ,'normal': train_set_normal
}

In [80]:
# different networks
networks = {
    'no_batch_norm': network_nobatch
    ,'batch_norm': network_batch
}

In [81]:
# Training process with the above parameters
params = OrderedDict(
    lr = [.01],
    batch_size = [1000],
    num_workers = [1],
    device = ["cuda"],
    trainset = ["normal"],
    network = list(networks.keys())
)

In [82]:
mgr = RunManager()

In [83]:
for run in RunBuilder.get_runs(params):
    device = torch.device(run.device)
    network = networks[run.network].to(device)
    loader = DataLoader(trainsets[run.trainset], 
                        batch_size=run.batch_size,
                        num_workers=run.num_workers)
    optimizer = optim.Adam(network.parameters(),
                           lr=run.lr)
    
    mgr.begin_run(run, network, loader)
    for epoch in range(20):
        mgr.begin_epoch()
        for batch in loader:
            imgs = batch[0].to(device)
            lbls = batch[1].to(device)
            preds = network(imgs)
            loss = F.cross_entropy(preds, lbls)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            mgr.track_loss(loss, batch)
            

TypeError: RunManager.begin_epoch() takes 0 positional arguments but 1 was given

In [None]:
nn.CrossEntropyLoss
nn.functional.cross_entropy