# Module 3 - Exercise 3: Monitoring & Visualization with TensorBoard

## Learning Objectives
- Understand how to use TensorBoard for training visualization
- Learn to log scalars, histograms, and model graphs
- Practice monitoring training metrics in real-time
- Visualize weight distributions and gradient flow
- Compare multiple training runs

## Setup and Installation

First, let's install and import the necessary libraries.

In [None]:
# Install TensorBoard (if not already installed)
!pip install tensorboard -q

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
import os

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

## Section 1: Introduction to TensorBoard

TensorBoard is a visualization toolkit that helps you understand, debug, and optimize your neural networks. Let's start with the basics.

In [None]:
# Create a simple experiment directory
log_dir = f"runs/experiment_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

# Initialize a SummaryWriter
writer = SummaryWriter(log_dir)

print(f"TensorBoard logs will be saved to: {log_dir}")
print("\nTo view in TensorBoard, run:")
print(f"tensorboard --logdir={log_dir}")

### Logging Scalar Values

The most common use case is logging training metrics like loss and accuracy.

In [None]:
# TODO: Simulate a training loop and log loss values
# Create fake loss values that decrease over time
epochs = 100
initial_loss = 2.5

for epoch in range(epochs):
    # Simulate decreasing loss with some noise
    loss = initial_loss * np.exp(-0.05 * epoch) + np.random.normal(0, 0.01)
    
    # TODO: Log the loss value using writer.add_scalar()
    # Hint: writer.add_scalar('Loss/train', loss, epoch)
    writer.add_scalar('Loss/train', loss, epoch)
    
    # Simulate validation loss (slightly higher)
    val_loss = loss + 0.1 + np.random.normal(0, 0.02)
    writer.add_scalar('Loss/validation', val_loss, epoch)
    
    # Simulate accuracy increasing over time
    accuracy = 1 - np.exp(-0.03 * epoch) + np.random.normal(0, 0.005)
    accuracy = np.clip(accuracy, 0, 1)  # Keep between 0 and 1
    writer.add_scalar('Accuracy/train', accuracy, epoch)

print(f"Logged {epochs} epochs of training metrics")
writer.flush()  # Ensure all data is written

## Section 2: Visualizing Model Architecture

TensorBoard can visualize your model's computational graph, helping you understand the flow of data.

In [None]:
# Define a simple neural network
class SimpleNet(nn.Module):
    def __init__(self, input_size=10, hidden_size=20, output_size=2):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)
        self.fc2 = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Create model instance
model = SimpleNet()

# TODO: Add the model graph to TensorBoard
# Create a dummy input tensor
dummy_input = torch.randn(1, 10)

# TODO: Use writer.add_graph() to visualize the model
# Hint: writer.add_graph(model, dummy_input)
writer.add_graph(model, dummy_input)

print("Model graph added to TensorBoard")
print(f"Model architecture:\n{model}")

## Section 3: Histogram Visualization

Histograms help you understand the distribution of weights and gradients during training.

In [None]:
# Create a slightly more complex model for demonstration
class DeepNet(nn.Module):
    def __init__(self):
        super(DeepNet, self).__init__()
        self.layer1 = nn.Linear(10, 50)
        self.layer2 = nn.Linear(50, 30)
        self.layer3 = nn.Linear(30, 10)
        self.output = nn.Linear(10, 2)
        
    def forward(self, x):
        x = torch.relu(self.layer1(x))
        x = torch.relu(self.layer2(x))
        x = torch.relu(self.layer3(x))
        return self.output(x)

# Initialize model and optimizer
deep_model = DeepNet()
optimizer = optim.SGD(deep_model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

# Training loop with histogram logging
for epoch in range(50):
    # Generate random data
    inputs = torch.randn(32, 10)
    targets = torch.randint(0, 2, (32,))
    
    # Forward pass
    outputs = deep_model(inputs)
    loss = criterion(outputs, targets)
    
    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    
    # TODO: Log weight histograms for each layer
    # Iterate through named parameters and log histograms
    for name, param in deep_model.named_parameters():
        if 'weight' in name:
            # TODO: Add histogram for weights
            # Hint: writer.add_histogram(f'Weights/{name}', param, epoch)
            writer.add_histogram(f'Weights/{name}', param, epoch)
            
        if param.grad is not None:
            # TODO: Add histogram for gradients
            # Hint: writer.add_histogram(f'Gradients/{name}', param.grad, epoch)
            writer.add_histogram(f'Gradients/{name}', param.grad, epoch)
    
    # Update weights
    optimizer.step()
    
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

print("\nWeight and gradient histograms logged")

## Section 4: Comparing Multiple Runs

One powerful feature of TensorBoard is comparing different experiments. Let's simulate training with different hyperparameters.

In [None]:
# Close the previous writer
writer.close()

# Function to simulate training with different learning rates
def train_with_lr(learning_rate, run_name):
    # Create a separate writer for this run
    writer = SummaryWriter(f'runs/lr_comparison/{run_name}')
    
    # Simple model
    model = nn.Sequential(
        nn.Linear(10, 20),
        nn.ReLU(),
        nn.Linear(20, 1)
    )
    
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    criterion = nn.MSELoss()
    
    # Training loop
    for step in range(100):
        # Generate synthetic data
        X = torch.randn(32, 10)
        y = torch.randn(32, 1)
        
        # Forward pass
        predictions = model(X)
        loss = criterion(predictions, y)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # TODO: Log the loss for this learning rate
        # Hint: writer.add_scalar('Loss', loss.item(), step)
        writer.add_scalar('Loss', loss.item(), step)
        
        # Log learning rate (constant in this case)
        writer.add_scalar('Learning_Rate', learning_rate, step)
    
    writer.close()
    return loss.item()

# TODO: Train with different learning rates
learning_rates = [0.001, 0.01, 0.1]

for lr in learning_rates:
    final_loss = train_with_lr(lr, f'lr_{lr}')
    print(f"Learning rate: {lr}, Final loss: {final_loss:.4f}")

print("\nAll runs completed! You can compare them in TensorBoard.")

## Section 5: Custom Visualizations

TensorBoard also supports custom images and figures from matplotlib.

In [None]:
# Create a new writer for custom visualizations
writer = SummaryWriter('runs/custom_viz')

# Generate some data for visualization
epochs = 50
train_losses = []
val_losses = []

for epoch in range(epochs):
    train_loss = 2.0 * np.exp(-0.1 * epoch) + np.random.normal(0, 0.05)
    val_loss = train_loss + 0.2 + np.random.normal(0, 0.05)
    
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    
    # Every 10 epochs, create a custom plot
    if epoch % 10 == 0:
        # TODO: Create a matplotlib figure showing training progress
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
        
        # Plot losses
        ax1.plot(train_losses, label='Train Loss', color='blue')
        ax1.plot(val_losses, label='Val Loss', color='red')
        ax1.set_xlabel('Epoch')
        ax1.set_ylabel('Loss')
        ax1.set_title(f'Training Progress - Epoch {epoch}')
        ax1.legend()
        ax1.grid(True, alpha=0.3)
        
        # Plot loss difference
        loss_diff = np.array(val_losses) - np.array(train_losses)
        ax2.plot(loss_diff, color='green')
        ax2.set_xlabel('Epoch')
        ax2.set_ylabel('Val - Train Loss')
        ax2.set_title('Generalization Gap')
        ax2.grid(True, alpha=0.3)
        ax2.axhline(y=0, color='black', linestyle='--', alpha=0.5)
        
        # TODO: Add the figure to TensorBoard
        # Hint: writer.add_figure('Training_Analysis', fig, epoch)
        writer.add_figure('Training_Analysis', fig, epoch)
        
        plt.close(fig)  # Close to free memory

print("Custom visualizations added to TensorBoard")

## Section 6: Embedding Visualization

TensorBoard can visualize high-dimensional embeddings in 2D or 3D using techniques like PCA or t-SNE.

In [None]:
# Create synthetic embeddings
n_samples = 100
embedding_dim = 128

# Generate embeddings for 3 different classes
embeddings = []
labels = []
metadata = []

for class_id in range(3):
    # Create clustered embeddings for each class
    class_center = torch.randn(embedding_dim) * 2
    for i in range(n_samples // 3):
        embedding = class_center + torch.randn(embedding_dim) * 0.5
        embeddings.append(embedding)
        labels.append(class_id)
        metadata.append(f"Class_{class_id}_sample_{i}")

# Stack all embeddings
embeddings = torch.stack(embeddings)

# TODO: Add embeddings to TensorBoard
# Hint: writer.add_embedding(embeddings, metadata=labels, tag='embeddings')
writer.add_embedding(
    embeddings,
    metadata=labels,
    tag='class_embeddings'
)

print(f"Added {len(embeddings)} embeddings to TensorBoard")
print("You can visualize them in the 'Projector' tab of TensorBoard")

## Starting TensorBoard

To view all the visualizations we've created, you need to start TensorBoard. Run the following in a terminal or use the magic command below:

In [None]:
# Load TensorBoard extension (for Jupyter/Colab)
%load_ext tensorboard

# Start TensorBoard inline (for Jupyter/Colab)
%tensorboard --logdir runs

# Note: If running locally, you can also start TensorBoard from terminal:
# tensorboard --logdir=runs --port=6006

In [None]:
# Clean up - close any remaining writers
writer.close()

print("Exercise complete! 🎉")
print("\nYou've learned the essential TensorBoard features for monitoring deep learning experiments.")
print("Remember to check the TensorBoard interface to explore all the visualizations you've created!")