# Using TensorBoard with PyTorch

- TensorBoard, as mentioned earlier, is a visualization toolkit we could use in order to further understand our model
    - It essentially reads data from a file and displays it
        - Track and visualize metrics (loss, accuracy)
        - Network graph
        - Histograms of weights and biases
- In order to use TensorBoard, we need to write the data into a file that TensorBoard can read
    - PyTorch has a utility class which we can use for this called SummaryWriter 
- Run the following
    - **tensorboard --logdir=runs**
    - The above command will write a runs file containing data written to it using the SummaryWriter() class
- One of the key things we could do with TensorBoard is to compare multiple runs
    - This will allow us to compare runs side by side
    - We can use to experiment and find out which parameters work best 
    


In [1]:
import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms 
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import Dataset

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# print format
torch.set_printoptions(linewidth=120) 
from torch.utils.tensorboard import SummaryWriter
#! tensorboard --version

#### Define Network

In [2]:
# Implementing the forward method
class Network(nn.Module): # extending nn.Module base class
    def __init__(self):
        super(Network, self).__init__() # initializing base class
        # prebuilt layers
        # 1 input channel, convolved by 6 different filters
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        # fully connected, or dense layers 
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):
        # (1) input layer
        t = t
        
        # (2) hidden conv layer
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        # (3) hidden conv layer
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        # (4) hidden layer reshape
        # 4*4 -> height * width -> reduction due to conv operations
        t = t.reshape(-1, 12*4*4)
        t = self.fc1(t)
        t = F.relu(t)
        
        # (5) hidden linear layer
        t = self.fc2(t)
        t = F.relu(t)
        
        # (6) output layer (10 classes)
        # using the softmax fucntion which returns a positive probability that sum to 1
        # but we won't use it here because it will be there in the loss function
        # which will implicitly execute the softmax function 
        t = self.out(t)
        # t = F.softmax(t, dim=1) -> done in the loss part implicitly
        
        return t

In [3]:
# Additional items required
train_set = torchvision.datasets.FashionMNIST(
    root='./Documents/data'
     ,train=True
    ,download=True # downloads it locally (checks existence beforehand)
    ,transform=transforms.Compose([
        transforms.ToTensor() # butilt in tensor transformer
    ])
)

def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [4]:
network = Network()

batch_size = 100
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size)
optimizer = optim.Adam(network.parameters(), lr=0.01)

images, labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)

tb = SummaryWriter()
tb.add_image('images', grid)
tb.add_graph(network, images)

num_epochs = 10
# loop over all epochs
for epoch in range(num_epochs):
    
    # variables to track
    total_loss = 0
    total_correct = 0
    
    # loop over all batches in the train loader
    for batch in train_loader:
        images, labels = batch

        preds = network(images)
        loss = F.cross_entropy(preds, labels)

        optimizer.zero_grad() # zero grad because pytorch accumulates gradient
        loss.backward() # calculate gradients
        optimizer.step() # update weights

        # update variables
        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)
    
    # Add metrics to TensorBoard
    # scalar -> tag, value, epoch
    tb.add_scalar('Loss', total_loss, epoch)
    tb.add_scalar('Number Correct', total_correct, epoch)
    tb.add_scalar('Accuracy', total_correct/len(train_set), epoch)
    # histograms 
    tb.add_histogram('conv1.bias', network.conv1.bias, epoch)
    tb.add_histogram('conv1.weight', network.conv1.weight, epoch)
    tb.add_histogram('conv1.weight.grad', network.conv1.weight.grad, epoch)
        
    # print information for selected epochs
    if (epoch+1) % 2 == 0:
        print("Epoch: ", epoch+1, "\n\tAccuracy (%):", total_correct/len(train_set),
          "\n\tLoss ", total_loss)


print("\nNumber of steps taken towards the loss minimum:", len(train_set)/batch_size)

Epoch:  2 
	Accuracy (%): 0.8564666666666667 
	Loss  232.38755886256695
Epoch:  4 
	Accuracy (%): 0.8715833333333334 
	Loss  205.62885503470898
Epoch:  6 
	Accuracy (%): 0.8790166666666667 
	Loss  195.41866463422775
Epoch:  8 
	Accuracy (%): 0.8842333333333333 
	Loss  187.49598574638367
Epoch:  10 
	Accuracy (%): 0.8878666666666667 
	Loss  181.07690523564816

Number of steps taken towards the loss minimum: 600.0


## Rapid Experimentation -- Hyperparameter Tuning

- We will now modify the code above to show how TensorBoard can be used in order to tune hyperparameters

In [9]:
# Import python tool to elegantly create hyperparameter combinations
from itertools import product

# Hyperparameters
parameters = dict(
    lr = [0.01, 0.001],
    batch_size = [10, 100, 1000],
    shuffle = [True, False]
)

param_values = list(parameters.values())

# Loop over a product of param_values
for lr, batch_size, shuffle in product(*param_values):
    
    network = Network()

    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size)
    optimizer = optim.Adam(network.parameters(), lr=lr)

    images, labels = next(iter(train_loader))
    grid = torchvision.utils.make_grid(images)

    # Modify SummaryWriter with comment
    comment = f' batch_size={batch_size} lr={lr} shuffle={shuffle}'
    tb = SummaryWriter(comment=comment)

    tb.add_image('images', grid)
    tb.add_graph(network, images)

    num_epochs = 5
    # loop over all epochs
    for epoch in range(num_epochs):

        # variables to track
        total_loss = 0
        total_correct = 0

        # loop over all batches in the train loader
        for batch in train_loader:
            images, labels = batch

            preds = network(images)
            loss = F.cross_entropy(preds, labels)

            optimizer.zero_grad() # zero grad because pytorch accumulates gradient
            loss.backward() # calculate gradients
            optimizer.step() # update weights

            # update variables
            # account for loss variation with respect to batch_size
            total_loss += loss.item() * batch_size
            total_correct += get_num_correct(preds, labels)

        # Add metrics to TensorBoard
        # scalar -> tag, value, epoch
        tb.add_scalar('Loss', total_loss, epoch)
        tb.add_scalar('Number Correct', total_correct, epoch)
        tb.add_scalar('Accuracy', total_correct/len(train_set), epoch)
        # histograms 
        tb.add_histogram('conv1.bias', network.conv1.bias, epoch)
        tb.add_histogram('conv1.weight', network.conv1.weight, epoch)
        tb.add_histogram('conv1.weight.grad', network.conv1.weight.grad, epoch)

        # print information for selected epochs
        if (epoch+1) % 2 == 0:
            print("Epoch: ", epoch+1, "\n\tAccuracy (%):", total_correct/len(train_set),
              "\n\tLoss ", total_loss)


    print("\nNumber of steps taken towards the loss minimum:", len(train_set)/batch_size)

Epoch:  2 
	Accuracy (%): 0.8102333333333334 
	Loss  31454.31646344252
Epoch:  4 
	Accuracy (%): 0.8166333333333333 
	Loss  31274.454530994408

Number of steps taken towards the loss minimum: 6000.0
Epoch:  2 
	Accuracy (%): 0.7854666666666666 
	Loss  35716.84761739336
Epoch:  4 
	Accuracy (%): 0.8164833333333333 
	Loss  31106.936360271648

Number of steps taken towards the loss minimum: 6000.0
Epoch:  2 
	Accuracy (%): 0.8579333333333333 
	Loss  22807.459658384323
Epoch:  4 
	Accuracy (%): 0.8764166666666666 
	Loss  19858.607479929924

Number of steps taken towards the loss minimum: 600.0
Epoch:  2 
	Accuracy (%): 0.8577333333333333 
	Loss  22955.647145211697
Epoch:  4 
	Accuracy (%): 0.87405 
	Loss  20577.574764192104

Number of steps taken towards the loss minimum: 600.0
Epoch:  2 
	Accuracy (%): 0.8013666666666667 
	Loss  31078.93568277359
Epoch:  4 
	Accuracy (%): 0.8598166666666667 
	Loss  22521.239042282104

Number of steps taken towards the loss minimum: 60.0
Epoch:  2 
	Accura

# Resources
 
- https://deeplizard.com/learn/video/pSexXMdruFM
- https://deeplizard.com/learn/video/ycxulUVoNbk