# TensorBoard with PyTorch - Metrics Analysis

In [3]:
import torch
print(torch.__version__)

1.3.1


In [5]:
from torch.utils.tensorboard import SummaryWriter

In [6]:
!tensorboard --version

2.0.0


In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transform


class Network(nn.Module):
    def __init__(self, channels=1): # default grayscale
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=channels, out_channels=6, kernel_size=5) 
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120) # ((28-5+1)/2 -5 +1)/2 = 4
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):        
        # hidden conv layers, conv w/ relu activation -> max pool
        t = F.relu(self.conv1(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = F.relu(self.conv2(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)

        # hidden fully connected layers
        t = t.reshape(-1, 12*4*4) # flatten
        t = F.relu(self.fc1(t))
        t = F.relu(self.fc2(t))
        
        # output layer
        t = self.out(t)
        return t

In [8]:
def get_num_correct(preds, labels):
    return (preds.argmax(dim=1) == labels).sum().item()

## TensorBoard: TensorFlow's Visualization Toolkit
TensorBoard provides the visualization and tooling needed for machine learning experimentation:

- Tracking and visualizing metrics such as loss and accuracy
- Visualizing the model graph (ops and layers)
- Viewing histograms of weights, biases, or other tensors as they change over time
- Projecting embeddings to a lower dimensional space
- Displaying images, text, and audio data
- Profiling TensorFlow programs
- And much more

In [10]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data',
    train=True,
    download=True,
    transform=transform.Compose([
        transform.ToTensor()
    ])
)

In [11]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size=128, shuffle=True)

## Network Graph And Training Set Images

The `SummaryWriter` class comes with a bunch of method that we can call to selectively pick and choose which data we want to be available to TensorBoard. 

We'll start by first by passing our network and a batch of images to the writer.

In [14]:
tb = SummaryWriter()


network = Network()
images, labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)

In [15]:
tb.add_image('images', grid)
tb.add_graph(network, images)
tb.close()

In [17]:
# Compile network
network = Network()
optimizer = optim.Adam(network.parameters(), lr=0.001)

# Initialize tensorboard
tb = SummaryWriter()            # from torch.utils.tensorboard import SummaryWriter
tb.add_image('images', grid)
tb.add_graph(network, images)


# Training
for epoch in range(10): 
    total_loss = 0
    total_correct = 0
    
    for batch in train_loader:
        images, labels = batch 
        preds = network(images)
        
        loss = F.cross_entropy(preds, labels) # loss function
        optimizer.zero_grad()                 # set all gradients to zero
        
        loss.backward()         # calculate gradients, training points are supply constants
        optimizer.step()        # update weights to minimize loss (accdg to adam)

        total_loss += loss.item() 
        total_correct += get_num_correct(preds, labels)
    
    tb.add_scalar('Loss', total_loss, epoch)
    tb.add_scalar('Number Correct', total_correct, epoch)
    tb.add_scalar('Accuracy', total_correct / len(train_set), epoch)

    tb.add_histogram('conv1.bias', network.conv1.bias, epoch)
    tb.add_histogram('conv1.weight', network.conv1.weight, epoch)
    tb.add_histogram('conv1.weight.grad', network.conv1.weight.grad, epoch)

    print("epoch", epoch, "total_correct:", total_correct, "loss:", total_loss)

tb.close()

epoch 0 total_correct: 41126 loss: 390.1810419559479
epoch 1 total_correct: 47761 loss: 254.7536964416504
epoch 2 total_correct: 49847 loss: 219.45858508348465
epoch 3 total_correct: 50799 loss: 198.94785119593143
epoch 4 total_correct: 51440 loss: 185.06912292540073
epoch 5 total_correct: 51822 loss: 175.7590073943138
epoch 6 total_correct: 52123 loss: 167.45953722298145
epoch 7 total_correct: 52458 loss: 160.68897560238838
epoch 8 total_correct: 52626 loss: 156.17585119605064
epoch 9 total_correct: 52874 loss: 151.31312596797943


## Hyperparameter Tuning and Experimenting

In [21]:
from itertools import product

In [22]:
parameters = dict(
    lr = [.01, .001],
    batch_size = [16, 128, 1024],
    shuffle = [True, False]
)

In [23]:
param_values = [v for v in parameters.values()]
param_values

[[0.01, 0.001], [16, 128, 1024], [True, False]]

In [24]:
for lr, batch_size, shuffle in product(*param_values):
    print(lr, batch_size, shuffle)

0.01 16 True
0.01 16 False
0.01 128 True
0.01 128 False
0.01 1024 True
0.01 1024 False
0.001 16 True
0.001 16 False
0.001 128 True
0.001 128 False
0.001 1024 True
0.001 1024 False


In [25]:
for lr, batch_size, shuffle in product(*param_values):
    comment = f' batch_size={batch_size} lr={lr} shuffle={shuffle}'

    network = Network()
    train_loader = torch.utils.data.DataLoader(
        train_set, batch_size=batch_size
    )
    
    optimizer = optim.Adam(
        network.parameters(), lr=lr
    )
    tb = SummaryWriter(comment=comment)
    tb.add_image('images', grid)
    tb.add_graph(network, images)

    # Training
    for epoch in range(10): 
        total_loss = 0
        total_correct = 0
        
        for batch in train_loader:
            images, labels = batch 
            preds = network(images)
            
            loss = F.cross_entropy(preds, labels) # loss function
            optimizer.zero_grad()                 # set all gradients to zero
            
            loss.backward()         # calculate gradients, training points are supply constants
            optimizer.step()        # update weights to minimize loss (accdg to adam)

            total_loss += loss.item() * batch_size
            total_correct += get_num_correct(preds, labels)
        
        tb.add_scalar('Loss', total_loss, epoch)
        tb.add_scalar('Number Correct', total_correct, epoch)
        tb.add_scalar('Accuracy', total_correct / len(train_set), epoch)

        for name, weight in network.named_parameters():
            tb.add_histogram(name, weight, epoch)
            tb.add_histogram(f'{name}.grad', weight.grad, epoch)

        print("epoch", epoch, "total_correct:", total_correct, "loss:", total_loss)

    tb.close()

epoch 0 total_correct: 44622 loss: 40696.588181972504
epoch 1 total_correct: 47819 loss: 32704.234094321728
epoch 2 total_correct: 48305 loss: 31529.556753486395
epoch 3 total_correct: 48796 loss: 30624.589919894934
epoch 4 total_correct: 48982 loss: 30358.130439311266
epoch 5 total_correct: 49110 loss: 29695.127415567636
epoch 6 total_correct: 49046 loss: 29879.626960217953
epoch 7 total_correct: 49303 loss: 29182.038885638118
epoch 8 total_correct: 49146 loss: 30156.863276034594
epoch 9 total_correct: 49315 loss: 29328.800338461995
epoch 0 total_correct: 46608 loss: 35205.80108290911
epoch 1 total_correct: 49715 loss: 28250.308654636145
epoch 2 total_correct: 50200 loss: 27314.599574416876
epoch 3 total_correct: 50442 loss: 26323.78204035759
epoch 4 total_correct: 50295 loss: 27272.679124072194
epoch 5 total_correct: 50515 loss: 26456.09761375189
epoch 6 total_correct: 50658 loss: 26297.1187017411
epoch 7 total_correct: 50297 loss: 27639.12085402757
epoch 8 total_correct: 50522 loss: