TensorBoard in PyTorch — With Train vs Validation

In [1]:
# Step 1: Setup

# Install TensorBoard if needed
# pip install tensorboard

# Imports
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter


In [2]:
# Step 2: Load MNIST dataset (train + validation)

transform = transforms.Compose([transforms.ToTensor()])

trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
testset  = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
testloader  = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

100.0%
100.0%
100.0%
100.0%


In [3]:
# Step 3: Define the model

class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(28*28, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 10)
        
    def forward(self, x):
        x = x.view(-1, 28*28)
        x = self.relu(self.fc1(x))
        return self.fc2(x)

model = SimpleNet()

In [4]:
# Step 4: Initialize TensorBoard Writer
writer = SummaryWriter("runs/mnist_train_val")

In [5]:
# Step 5: Define loss and optimizer

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [6]:
# Step 6: Training and validation loop (with TensorBoard logging)

for epoch in range(5):
    # -------------------- TRAIN --------------------
    model.train()
    train_loss, train_correct, train_total = 0.0, 0, 0
    
    for images, labels in trainloader:
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = outputs.max(1)
        train_total += labels.size(0)
        train_correct += predicted.eq(labels).sum().item()
    
    avg_train_loss = train_loss / len(trainloader)
    train_accuracy = 100.0 * train_correct / train_total
    
    # -------------------- VALIDATION --------------------
    model.eval()
    val_loss, val_correct, val_total = 0.0, 0, 0
    
    with torch.no_grad():
        for images, labels in testloader:
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            val_loss += loss.item()
            _, predicted = outputs.max(1)
            val_total += labels.size(0)
            val_correct += predicted.eq(labels).sum().item()
    
    avg_val_loss = val_loss / len(testloader)
    val_accuracy = 100.0 * val_correct / val_total
    
    # -------------------- LOG TO TENSORBOARD --------------------
    writer.add_scalars("Loss", {"Train": avg_train_loss, "Validation": avg_val_loss}, epoch)
    writer.add_scalars("Accuracy", {"Train": train_accuracy, "Validation": val_accuracy}, epoch)
    
    print(f"Epoch [{epoch+1}/5] | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | "
          f"Train Acc: {train_accuracy:.2f}% | Val Acc: {val_accuracy:.2f}%")

writer.flush()

Epoch [1/5] | Train Loss: 0.3463 | Val Loss: 0.1874 | Train Acc: 90.75% | Val Acc: 94.38%
Epoch [2/5] | Train Loss: 0.1578 | Val Loss: 0.1288 | Train Acc: 95.44% | Val Acc: 96.28%
Epoch [3/5] | Train Loss: 0.1084 | Val Loss: 0.1094 | Train Acc: 96.88% | Val Acc: 96.62%
Epoch [4/5] | Train Loss: 0.0822 | Val Loss: 0.0889 | Train Acc: 97.61% | Val Acc: 97.20%
Epoch [5/5] | Train Loss: 0.0644 | Val Loss: 0.0829 | Train Acc: 98.12% | Val Acc: 97.37%


In [7]:
# Step 7: Log sample images (optional)

import torchvision.utils as vutils

images, labels = next(iter(trainloader))
img_grid = vutils.make_grid(images)
writer.add_image('MNIST_Samples', img_grid)
writer.flush()

# Step 8: Launch TensorBoard inside Jupyter
#tensorboard --logdir=runs

# tensorboard --logdir=learn/j_notebook/Learn_pytorch_deep_dive/runs
# ...and open a browser tab to http://localhost:6006/

You’ll now see:

Two loss curves (Train vs Validation)

Two accuracy curves (Train vs Validation)
in the Scalars tab of TensorBoard.

In [8]:
# Step 9: Optional — visualize embeddings
features = model.fc1(images.view(-1, 28*28))
writer.add_embedding(
    mat=features,
    metadata=[str(l.item()) for l in labels],
    label_img=images
)
writer.close()

What You’ll See in TensorBoard
Tab	        Visualization
Scalars	    Training vs Validation loss & accuracy curves
Graphs	    Model architecture (auto-logged by add_graph)
Images	    Grid of MNIST sample images
Projector	2D/3D embedding visualization
HParams	    Compare runs (if added)