In [25]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from IPython.display import display, clear_output
import pandas as pd
import time
import json

from itertools import product
from collections import namedtuple
from collections import OrderedDict

In [26]:
class RunManager():
    def __init__(self):
        
        self.epoch_count = 0
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        self.epoch_start_time = None
        
        self.run_params = None
        self.run_count = 0
        self.run_data = []
        self.run_start_time = 0
        
        self.network = None
        self.loader = None 
        self.tb = None
    
    def begin_run(self ,run ,network ,loader):
        
        self.run_start_time = time.time()
        
        self.run_params = run
        self.run_count += 1
        self.network = network
        self.loader = loader
        self.tb = SummaryWriter(comment = f'-{run}')
        
        images ,labels = next(iter(self.loader))
        grid = torchvision.utils.make_grid(images)
        
        self.tb.add_image('images' ,grid)
        self.tb.add_graph(self.network ,images)
        
    def end_run(self):
        self.tb.close()
        self.epoch_count = 0
    
    def begin_epoch(self):
        self.epoch_start_time = time.time()
        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        
    def end_epoch(self):
        epoch_duration = time.time() -self.epoch_start_time
        run_duration = time.time() -self.run_start_time
        
        loss = self.epoch_loss / len(self.loader.dataset)
        accuracy = self.epoch_num_correct/len(self.loader.dataset)
        
        self.tb.add_scalar('Loss' ,loss ,self.epoch_count)
        self.tb.add_scalar('Accuracy' ,accuracy ,self.epoch_count)
        
        for name ,param in self.network.named_parameters():
            self.tb.add_histogram(name ,param ,self.epoch_count)
            self.tb.add_histogram(f'{name}.grad' ,param.grad ,self.epoch_count)
            
        results = OrderedDict()
        results['run'] = self.run_count 
        results['epoch'] = self.epoch_count
        results['loss'] = loss
        results['accuracy'] = accuracy
        results['epoch_duration'] = epoch_duration
        results['run_duration'] = run_duration
        
        for k ,v in self.run_params._asdict().items():#_asdict is a method of namedtuple which convert it to a dict
            results[k] = v #key-value pair of dict is populated with field-value pair of namedtuple
        self.run_data.append(results)
#         Syntax: dictionary.items()
#         Parameters: This method takes no parameters.
#         Returns: A view object that displays a list of a given dictionary’s (key, value) tuple pair.
        
        df = pd.DataFrame.from_dict(self.run_data ,orient ='columns')
        
        clear_output(wait = True)
        display(df)
    
    def track_loss(self ,loss ,batch):
        self.epoch_loss += loss.item() * batch[0].shape[0] #batch unpacks to images and labels now batch[0] == images of shape 
                                                           #batch[0].shape==[batch_size,channels,height,width]
                                                           #batch[0].shape[0] == batch_size
    
    def track_num_correct(self, preds, labels):
        self.epoch_num_correct += self.get_num_correct(preds, labels)
    
    def get_num_correct(self, preds, labels):
        return preds.argmax(dim=1).eq(labels).sum().item()
    
    def save(self, fileName):
        pd.DataFrame.from_dict(self.run_data, orient='columns').to_csv(f'{fileName}.csv')
        with open(f'{fileName}.json', 'w', encoding='utf-8') as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent=4)

        

In [31]:
m.run_data

[OrderedDict([('run', 1),
              ('epoch', 1),
              ('loss', 1.0608767877022425),
              ('accuracy', 0.5936166666666667),
              ('epoch_duration', 28.22715973854065),
              ('run_duration', 30.030163526535034),
              ('lr', 0.01),
              ('batch_size', 1000),
              ('shuffle', True)]),
 OrderedDict([('run', 1),
              ('epoch', 2),
              ('loss', 0.5743795787294705),
              ('accuracy', 0.7748333333333334),
              ('epoch_duration', 28.0795316696167),
              ('run_duration', 58.33154487609863),
              ('lr', 0.01),
              ('batch_size', 1000),
              ('shuffle', True)]),
 OrderedDict([('run', 1),
              ('epoch', 3),
              ('loss', 0.4824780349930127),
              ('accuracy', 0.8188666666666666),
              ('epoch_duration', 26.7427818775177),
              ('run_duration', 85.26679873466492),
              ('lr', 0.01),
              ('batch_siz

In [27]:
class run_builder():
    @staticmethod
    def  get_runs(params):
        run = namedtuple('Run' ,params.keys())
        runs = []
        for v in product(*params.values()):
            runs.append(run(*v))
        return runs

In [28]:
#we have to perform ETL ie extracting the data from any source,transform the data in tensor or any other required form and then create a loader object.
#torchvision.datasets.FashionMNIST code handels the loading and transform task and torch.utils.data.DataLoader handels tge loader object task.

train_set=torchvision.datasets.FashionMNIST(root='./data/FashionMNIST',train=True,download=True,transform=transforms.Compose([transforms.ToTensor()]))

#We can pass an entire list(here we pass only one transformation i.e. transforms.ToTensor) of transformations to our data(batch of images here) which would be performed one after the other.

In [29]:
class Network(nn.Module):
    def __init__(self):
        super(Network,self).__init__()
        self.conv1=nn.Conv2d(in_channels=1,out_channels=6,kernel_size=5)
        self.conv2=nn.Conv2d(in_channels=6,out_channels=12,kernel_size=5)
        self.fc1=nn.Linear(in_features=12*4*4,out_features=120)
        self.fc2=nn.Linear(in_features=120,out_features=60)
        self.out=nn.Linear(in_features=60,out_features=10)
        
    def forward(self,t):
        t=F.relu(self.conv1(t))
        t=F.max_pool2d(t,kernel_size=2,stride=2)
        
        t=F.relu(self.conv2(t))
        t=F.max_pool2d(t,kernel_size=2,stride=2)
        
        t=F.relu(self.fc1(t.reshape(-1,12*4*4)))
        t=F.relu(self.fc2(t))
        t=self.out(t)
        
        return t
    

In [30]:
params = OrderedDict(lr = [0.01] ,batch_size = [1000 ,2000] ,shuffle = [True ,False])

m = RunManager()

for run in run_builder().get_runs(params):
    
    network = Network()
    loader = DataLoader(train_set ,batch_size = run.batch_size ,shuffle = run.shuffle)
    optimizer = optim.Adam(network.parameters() ,lr = run.lr)
    
    m.begin_run(run ,network ,loader)
    
    for epoch in range(5):
        m.begin_epoch()
        for batch in loader:
            images ,labels = batch
            preds = network(images)
            loss = F.cross_entropy(preds ,labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            m.track_loss(loss ,batch)
            m.track_num_correct(preds ,labels)
        m.end_epoch()
    m.end_run()
    
m.save('results')
        
            



# hyper_parameter = dict( batch_size = [10 ,100 ,1000] ,lr = [0.01 ,0.001] ,shuffle = [True ,False])
# params_values = [v for v in hyper_parameter.values()]

# for batch_size ,lr ,shuffle in product(*params_values):

#     network = Network()
#     train_loader= torch.utils.data.DataLoader(train_set,batch_size=batch_size ,shuffle=shuffle)
#     optimizer=optim.Adam(network.parameters(), lr=lr)
    
#     images, labels = next(iter(train_loader))
#     grid = torchvision.utils.make_grid(images)
    
#     comment=f' batch_size={batch_size} lr={lr} shuffle = {shuffle}'
#     tb = SummaryWriter(comment = comment)
#     tb.add_image('images', grid)
#     tb.add_graph(network, images)

    
#     for epoch in range(10):
#         total_loss = 0
#         total_correct = 0


#         for images ,labels in train_loader:

#             preds = network(images) #pass batch
#             loss = F.cross_entropy(preds ,labels) #calculate loss

#             optimizer.zero_grad()
#             loss.backward() #calculate gradient
#             optimizer.step() #update weights

#             total_loss += loss.item() * batch_size #batch size is multiplied to account for the different size batches incase we want to compare the performance with different batch size
#             total_correct += get_correct_preds(preds ,labels)
#         tb.add_scalar('loss',total_loss,epoch)
#         tb.add_scalar('number of correct preds',total_correct,epoch)
#         tb.add_scalar('Accuracy',total_correct/len(train_set),epoch)

#         for name,weight in network.named_parameters():
#             tb.add_histogram(name,weight,epoch)
#             tb.add_histogram(name,weight.grad,epoch)


#         print("epoch :",epoch ,'total_loss :',total_loss,'total_correct :',total_correct)

Unnamed: 0,run,epoch,loss,accuracy,epoch_duration,run_duration,lr,batch_size,shuffle
0,1,1,1.060877,0.593617,28.22716,30.030164,0.01,1000,True
1,1,2,0.57438,0.774833,28.079532,58.331545,0.01,1000,True
2,1,3,0.482478,0.818867,26.742782,85.266799,0.01,1000,True
3,1,4,0.426917,0.84085,28.065805,113.566762,0.01,1000,True
4,1,5,0.380936,0.859517,28.281624,142.065548,0.01,1000,True
5,2,1,0.96609,0.628,27.558884,29.161914,0.01,1000,False
6,2,2,0.548439,0.7862,27.64419,57.023848,0.01,1000,False
7,2,3,0.458827,0.832483,28.215435,85.462027,0.01,1000,False
8,2,4,0.401963,0.851667,27.706347,113.392402,0.01,1000,False
9,2,5,0.362693,0.866867,28.58104,142.184038,0.01,1000,False


In [393]:
@torch.no_grad()
def get_all_pred(loader,model):
    all_preds = torch.tensor([])
    for images ,labels in loader:
        preds = model(images)
        all_preds = torch.cat((all_preds,preds),dim=0)
    return all_preds
        
    

In [394]:
loader = torch.utils.data.DataLoader(train_set ,batch_size = 10000 )
all_preds = get_all_pred(loader ,network )

In [395]:
all_preds.shape

torch.Size([60000, 10])

In [396]:
t1=torch.tensor([[1,2],[3,6]])
t2=torch.tensor([[3,5],[4,8]])  #this is an example to illustrate the concatenation operation in general
t3=torch.cat((t1,t2),dim=0)
print(t3)
t1=torch.tensor([[1,2],[3,6]])
t2=torch.tensor([[3,5],[4,8]])  #this is an example to illustrate the concatenation operation in general
t4=torch.cat((t1,t2),dim=1)
print(t4)

tensor([[1, 2],
        [3, 6],
        [3, 5],
        [4, 8]])
tensor([[1, 2, 3, 5],
        [3, 6, 4, 8]])


In [397]:
total_correct = get_correct_preds(all_preds ,train_set.targets)

print('total correct :',total_correct)
print('prediction accuracy :',total_correct/60000)


total correct : 53018
prediction accuracy : 0.8836333333333334


In [400]:
#creation of the confusion matrix
print(all_preds.argmax(dim=1))
print(train_set.targets)
stacked=torch.stack((all_preds.argmax(dim=1),train_set.targets), dim=1)


tensor([9, 0, 0,  ..., 3, 0, 5])
tensor([9, 0, 0,  ..., 3, 0, 5])


In [409]:
stacked

tensor([[9, 9],
        [0, 0],
        [0, 0],
        ...,
        [3, 3],
        [0, 0],
        [5, 5]])

In [412]:
cmt = torch.zeros((10,10),dtype = torch.int64)
cmt


tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [410]:
for k in stacked:
    (i ,j) = k.tolist()
    cmt[i][j] = cmt[i][j] + 1     #cmt is used to indicate the confusion matrix


In [411]:
cmt

tensor([[10622,    32,   106,   418,    12,     4,  2232,     0,    18,     2],
        [    6, 11558,     0,    20,     8,     2,     8,     0,     2,     0],
        [  224,    12, 10354,   102,  1168,     0,  1290,     0,   108,     0],
        [  230,   292,    58, 10682,   262,     4,   194,     0,    44,     0],
        [   22,    22,   796,   512,  9408,     0,   718,     0,    26,     0],
        [    0,     2,     2,     0,     0, 11352,     2,    86,    22,    30],
        [  788,    56,   614,   242,  1040,     6,  7392,     0,    84,     2],
        [    0,     0,     2,     0,     0,   336,     0, 11236,    24,   188],
        [  106,    26,    68,    22,    98,    44,   164,    12, 11670,    16],
        [    2,     0,     0,     2,     4,   252,     0,   666,     2, 11762]])

In [420]:
cmt=confusion_matrix(train_set.targets,all_preds.argmax(dim=1))
cmt #this is another way of plotting confusion matrix using the sklearn.metrics.confusion_matrix package

array([[5311,    3,  112,  115,   11,    0,  394,    0,   53,    1],
       [  16, 5779,    6,  146,   11,    1,   28,    0,   13,    0],
       [  53,    0, 5177,   29,  398,    1,  307,    1,   34,    0],
       [ 209,   10,   51, 5341,  256,    0,  121,    0,   11,    1],
       [   6,    4,  584,  131, 4704,    0,  520,    0,   49,    2],
       [   2,    1,    0,    2,    0, 5676,    3,  168,   22,  126],
       [1116,    4,  645,   97,  359,    1, 3696,    0,   82,    0],
       [   0,    0,    0,    0,    0,   43,    0, 5618,    6,  333],
       [   9,    1,   54,   22,   13,   11,   42,   12, 5835,    1],
       [   1,    0,    0,    0,    0,   15,    1,   94,    8, 5881]],
      dtype=int64)

In [426]:
hyper_parameter = dict( batch_size = [10 ,100 ,1000] ,lr = [0.01 ,0.001] ,shuffle = [True ,False])
hyper_parameter.values()

dict_values([[10, 100, 1000], [0.01, 0.001], [True, False]])