In [103]:
import torch.utils.tensorboard
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)

from torch.utils.tensorboard import SummaryWriter

In [105]:
print(torch.__version__)
print(torchvision.__version__)

1.4.0
0.5.0


In [0]:
def accuracy_nums(preds,labels):
    return (preds.argmax(dim=1).eq(labels).sum().item())


In [0]:
class Network(nn.Module): #Line 1
    def __init__(self):
        super().__init__() # Line 3
        
        self.conv1=nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2=nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1=nn.Linear(in_features=12*4*4, out_features=120)# 12 comnes from output of 12 channel in conv2
        self.fc2=nn.Linear(in_features=120, out_features=60)
        
        self.out=nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):
        # (1) Input Layer
        #t=t
        
        # (2) hidden conv1 Layer
        t=self.conv1(t)
        t=F.relu(t)
        t=F.max_pool2d(t, kernel_size=2, stride=2)
        
        # (3) hidden conv2 Layer
        t=self.conv2(t)
        t=F.relu(t)
        t=F.max_pool2d(t, kernel_size=2, stride=2)
        
        # (4) hidden linear1 layer
        t=t.reshape(-1, 12*4*4) # Need to flatten before using FC layer 
        t=self.fc1(t)
        t=F.relu(t)
        
        # (5) Hidden linear2 layer
        t=self.fc2(t)
        t=F.relu(t)
        
        # (6) Output layer
        t=self.out(t)
        # Avoiding softmax because we will use cross entropy loss fucntion which has built in softmax fucntion.
        #t=F.softmax(t, dim=1) 
        # THhis alllows the network to not compute the additiional operation while network is been infrenced. But computaion will of course work.
        
        
        
        return t
network=Network()

In [0]:
train_set=torchvision.datasets.FashionMNIST(
    root='./data/FashionMINIST',
    train=True,
    download=True,
    transform=transforms.Compose( # For composition of transforamtion here onlu one transformation is sufficient 
        [transforms.ToTensor()]
        )
)
train_loader=torch.utils.data.DataLoader(
    train_set, 
    batch_size=100, #Default batach size =1
    shuffle=True
)

## Hyperparameter Tuning & Experimentation auto mode

* Earlier process of training was inefficent, now we will make them effcient using runs
### Objective:

1. We want automatic addition of parameters to set just by defining them.
2. We want program to unpack the parameter by itself

In [0]:
from collections import  OrderedDict
from collections import namedtuple
from itertools import product

In [110]:
parameters=dict(
    lr=[0.01,0.001,0.0001,0.00001],
    batch_size=[100,1000,10000],
    shuffle=[False]
)
parameters.keys()

dict_keys(['lr', 'batch_size', 'shuffle'])

In [111]:
parameters.values()

dict_values([[0.01, 0.001, 0.0001, 1e-05], [100, 1000, 10000], [False]])

In [112]:
# namedtuple help us access the tuple by name also, rather then by only index
# Example:
Point = namedtuple('Point', ['x', 'y'])
p=Point(11,12)
p # Named tuple behave as tuple but also as function, they can accept parameters

Point(x=11, y=12)

In [113]:
p.x+p.y

23

In [114]:
Run=namedtuple('Run',parameters.keys())
Run

__main__.Run

In [115]:
runs=[]
for v in product(*parameters.values()):
  runs.append(Run(*v))# Star "*" helps us to pass the value as paramters rather then the tuple itself
runs

[Run(lr=0.01, batch_size=100, shuffle=False),
 Run(lr=0.01, batch_size=1000, shuffle=False),
 Run(lr=0.01, batch_size=10000, shuffle=False),
 Run(lr=0.001, batch_size=100, shuffle=False),
 Run(lr=0.001, batch_size=1000, shuffle=False),
 Run(lr=0.001, batch_size=10000, shuffle=False),
 Run(lr=0.0001, batch_size=100, shuffle=False),
 Run(lr=0.0001, batch_size=1000, shuffle=False),
 Run(lr=0.0001, batch_size=10000, shuffle=False),
 Run(lr=1e-05, batch_size=100, shuffle=False),
 Run(lr=1e-05, batch_size=1000, shuffle=False),
 Run(lr=1e-05, batch_size=10000, shuffle=False)]

#### Making class of above steps we have:


* The RunBuilder class encapsulates the get_runs function. 
* This is considered to be better design than simply having the function sitting inside the global scope of the program. 
* This becomes especially true if there are multiple functions that logically belong together.


In [0]:
class RunBuilder():
  @staticmethod 
  # This allows us to call the function without calling the class, hence saving memory. 
  # This is done becuase we do not want global functions and neither extra class objects.  
  def get_runs(parameters):
    Run=namedtuple("Run", parameters.keys())
    runs=[]
    for v in product(*parameters.values()):
      runs.append(Run(*v))
    return runs

### Making Run Manager Class to declutter training loop


In [0]:
import time
import pandas as pd
from IPython.display import clear_output
import json

class RunManager():
  def __init__(self):
    
    self.epoch_count=0
    self.epoch_loss=0
    self.epoch_num_correct=0
    self.epoch_start_time=None

    self.run_params=None # From run builder class
    self.run_count=0
    self.run_data=[]
    self.run_start_time=None

    self.network=None
    self.loader=None
    self.tb=None

  def begin_run(self, run, network, loader):
    self.run_start_time=time.time()

    self.run_params=run
    self.run_count+=1

    self.network=network
    self.loader=loader
    self.tb=SummaryWriter(comment=str(run))
    
    images, labels=next(iter(self.loader))
    grid=torchvision.utils.make_grid(images)

    self.tb.add_image('images', grid)
    self.tb.add_graph(self.network, images)

  def end_run(self):
    self.tb.close()
    self.epoch_count=0

  def begin_epoch(self):
    self.epoch_start_time=time.time()

    self.epoch_count+=1
    self.epoch_loss=0
    self.epoch_num_correct=0


  def end_epoch(self):
    epoch_duration=time.time()-self.epoch_start_time
    run_duration=time.time()-self.run_start_time

    loss=self.epoch_loss/len(self.loader.dataset)
    accuracy=self.epoch_num_correct/len(self.loader.dataset)

    self.tb.add_scalar("Loss", loss, self.epoch_count)
    self.tb.add_scalar("Accuracy", accuracy, self.epoch_count)
    
    for name, param in self.network.named_parameters():
        self.tb.add_histogram(name, param,self.epoch_count)
        self.tb.add_histogram(str(name)+".grad", param.grad, self.epoch_count)
  
    results=OrderedDict()
    results["run"]=self.run_count
    results["epoch"]=self.epoch_count
    results["loss"]=loss
    results["accuracy"]=accuracy
    results["epoch duration"]=epoch_duration
    results["run duration"]=run_duration

    for index, value in self.run_params._asdict().items(): results[index]=value
    self.run_data.append(results)
    df=pd.DataFrame.from_dict(self.run_data, orient='columns')

    clear_output(wait=True)
    display(df)
    
  def track_loss(self, loss):
    self.epoch_loss +=loss.item()*self.loader.batch_size

  def track_num_correct(self, preds, labels):
    self.epoch_num_correct+=self._get_num_correct(preds, labels)

  def _get_num_correct(self, preds,labels): # _ here tells that this function is internal
    return preds.argmax(dim=1).eq(labels).sum().item()

  def save(self, fileName):
    pd.DataFrame.from_dict(
        self.run_data,
        orient='columns'

    ).to_csv(str(fileName)+".csv")

    with open(str(fileName)+".json", 'w', encoding='utf8') as f:
      json.dump(self.run_data, f, ensure_ascii=False, indent=4)



In [0]:
parameters=dict(
    lr=[0.01,0.001],
    batch_size=[100,10000],
    shuffle=[False]
)

m=RunManager()

for run in RunBuilder.get_runs(parameters):
  # comment=str(run)

  lr=run.lr,
  batch_size=run.batch_size
  shuffle=run.shuffle

  network=Network()

  train_loader=torch.utils.data.DataLoader(
      train_set,
      batch_size=batch_size,
      shuffle=shuffle,
    )
  
  optimizer=optim.Adam(network.parameters(), lr=run.lr)


  m.begin_run(run,network, train_loader)

  for epoch in range(10):
      m.begin_epoch()

      for batch in train_loader:
          images, labels =batch

          preds=network(images) # Predict from Batch
          loss=F.cross_entropy(preds, labels)# This has the all the data to for back prop

          optimizer.zero_grad() # Needed becuase Pytorch makes gradiesnt cumulative, which we do not want
          loss.backward() # Back-prop to find gradients
          optimizer.step() # Updating weights

          m.track_loss(loss)
          m.track_num_correct(preds, labels)

      m.end_epoch()
  m.end_run()
m.save('results')

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,0.563334,0.788967,15.688773,15.819127,0.01,100,False
1,1,2,0.390641,0.856783,15.78528,31.714363,0.01,100,False
2,1,3,0.355015,0.869167,15.630134,47.457055,0.01,100,False
3,1,4,0.336299,0.876067,15.671265,63.260104,0.01,100,False
4,1,5,0.327058,0.879283,15.596707,78.971243,0.01,100,False
5,1,6,0.320862,0.8825,15.632622,94.723215,0.01,100,False
6,1,7,0.31574,0.884567,15.769847,110.61063,0.01,100,False
7,1,8,0.312779,0.8863,15.9516,126.688384,0.01,100,False
8,1,9,0.307433,0.885633,15.827248,142.636922,0.01,100,False
9,1,10,0.305354,0.887,15.886459,158.647893,0.01,100,False
