In [15]:
import torch 
import numpy as np
import pandas  as pd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)
from torch.utils.tensorboard import SummaryWriter

In [16]:
class Network(nn.Module):
    def __init__(self):
        super(Network,self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels= 1, out_channels= 6 , kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels= 6, out_channels= 12 , kernel_size=5)
        
        self.fc1 = nn.Linear(in_features= 12*4*4 , out_features= 120)
        self.fc2 = nn.Linear(in_features= 120, out_features= 60)
        self.out = nn.Linear(in_features= 60, out_features= 10)

    def forward(self,t):
        
        #1 inpput layer 
        
        t = t
        
        #2 hidden conv layer
        
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size = 2 , stride = 2)
        
        #3 hidden conv layer 
        
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size = 2 , stride = 2)
        
        #4 Linear layer 
        
        t = t.reshape(-1, 12*4*4)     #flattening is hapening here
        t = self.fc1(t)
        t = F.relu(t)

        
        #5 Linear layer 
        
        t = self.fc2(t)
        t = F.relu(t)
        
        #6 output layer
        
        t = self.out(t)
        # t = F.softmax(t,dim = 0)    but this line is not required because we will predict the output later and softmax will be used explicitly later
        return t
        
#loading data from url of Mnist

train_set = torchvision.datasets.FashionMNIST(
        root = './data'
        ,train = True
        ,download=True
        ,transform = transforms.Compose([
            transforms.ToTensor()
        ])
    
)

        
data_loader = torch.utils.data.DataLoader(train_set,
                    batch_size= 100                  
                    )


In [17]:
from itertools import product
parameters = dict(
             batch_size = [100,1000,10000],
lr = [.01,.001],
shuffle = [True,False]
)
param_values = [v for v in parameters.values()]
param_values

[[100, 1000, 10000], [0.01, 0.001], [True, False]]

In [18]:
def get_num_correct(preds,labels):
    
    return preds.argmax(dim = 1).eq(labels).sum().item()



In [19]:
for batch_size,lr,shuffle in product(*param_values):
    print(batch_size,lr,shuffle)

100 0.01 True
100 0.01 False
100 0.001 True
100 0.001 False
1000 0.01 True
1000 0.01 False
1000 0.001 True
1000 0.001 False
10000 0.01 True
10000 0.01 False
10000 0.001 True
10000 0.001 False


In [25]:
test_net = Network()
for batch_size,lr,shuffle in product(*param_values):
    print(batch_size,lr,shuffle)
    comment = f'batch size = {batch_size},lr = {lr},shuffle = {shuffle}'
    
    
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size ,shuffle=shuffle)
    images,labels = next(iter(train_loader))
    grid = torchvision.utils.make_grid(images)
    

    tb = SummaryWriter(comment = comment)
    tb.add_image('images',grid)
    tb.add_graph(test_net,images)

    optimizer = optim.Adam(test_net.parameters(), lr=lr)


    for epoch in range(3):

        total_loss = 0
        total_correct = 0
        for batch in train_loader:                      # Get Batch
            images, labels = batch

            preds = test_net(images) # Pass Batch
            loss = F.cross_entropy(preds, labels) # Calculate Loss
            optimizer.zero_grad()
            loss.backward() # Calculate Gradients
            optimizer.step() # Update Weights

            total_loss += loss.item()*images.shape[0]               #using it to generalise the loss for different batch sizes
            total_correct += get_num_correct(preds,labels)

        tb.add_scalar("loss",total_loss,epoch)
        tb.add_scalar("total correct", total_correct,epoch)
        tb.add_scalar("Accuracy", total_correct/len(train_set), epoch)

        tb.add_histogram("conv1 bias", test_net.conv1.bias, epoch)
        tb.add_histogram("conv1.weight", test_net.conv1.weight,epoch)
        tb.add_histogram("conv1.weight.grad", test_net.conv1.weight.grad,epoch)

        print("epoch:", epoch  , "total loss:", total_loss, "total correct:" ,total_correct )

tb.close()


100 0.01 True
epoch: 0 total loss: 36440.217581391335 total correct: 46108
epoch: 1 total loss: 23586.867792904377 total correct: 51198
epoch: 2 total loss: 21597.924087941647 total correct: 51903
100 0.01 False
epoch: 0 total loss: 21190.988355875015 total correct: 52044
epoch: 1 total loss: 20417.86715835333 total correct: 52312
epoch: 2 total loss: 19859.13606584072 total correct: 52569
100 0.001 True
epoch: 0 total loss: 16326.094368845224 total correct: 53832
epoch: 1 total loss: 15301.605065912008 total correct: 54114
epoch: 2 total loss: 14822.40756303072 total correct: 54273
100 0.001 False
epoch: 0 total loss: 14477.373493462801 total correct: 54384
epoch: 1 total loss: 14109.046341478825 total correct: 54508
epoch: 2 total loss: 13822.247599065304 total correct: 54589
1000 0.01 True
epoch: 0 total loss: 15180.344849824905 total correct: 54101
epoch: 1 total loss: 14249.944567680359 total correct: 54500
epoch: 2 total loss: 14089.283168315887 total correct: 54559
1000 0.01 Fal