In [1]:
%load_ext tensorboard

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import os
import datetime
from torch.utils.tensorboard import SummaryWriter
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data import random_split, DataLoader
from sklearn.metrics import precision_score, recall_score, f1_score
import pandas as pd
from tensorboard.backend.event_processing import event_accumulator

# =======================
# Initialization Section
# =======================

# Load the MNIST dataset
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor())

train_batch_size = 64  # Size of batches for training

# Split the training dataset into training and validation sets
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_subset, val_subset = random_split(train_dataset, [train_size, val_size])

# Create data loaders
train_loader = DataLoader(train_subset, batch_size=train_batch_size, shuffle=True)
val_loader = DataLoader(val_subset, batch_size=1000, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

# Define a simple neural network
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28*28, 128)  # First fully connected layer (input: 28x28 pixels, output: 128 units)
        self.fc2 = nn.Linear(128, 10)     # Second fully connected layer (input: 128 units, output: 10 units for 10 classes)

    def forward(self, x):
        x = x.view(-1, 28*28)             # Flatten the input image
        x = torch.relu(self.fc1(x))       # Apply ReLU activation to the output of the first layer
        x = self.fc2(x)                   # Output layer
        return x

# List of optimizers with names
optimizers = [
    ("SGD", lambda params: optim.SGD(params, lr=0.1)),  # Standard SGD
    ("SGD-Momentum", lambda params: optim.SGD(params, lr=0.01, momentum=0.9)),  # Momentum
    ("SGD-Nesterov", lambda params: optim.SGD(params, lr=0.01, momentum=0.9, nesterov=True)),  # Nesterov
    ("Adagrad", lambda params: optim.Adagrad(params, lr=0.01, )),  # Adagrad
    ("RMSprop", lambda params: optim.RMSprop(params, lr=0.001)),  # RMSprop
    ("Adam", lambda params: optim.Adam(params, lr=0.001)),  # Adam
]

# List of schedulers
schedulers = [
    lambda opt: lr_scheduler.ExponentialLR(opt, gamma=0.9),
    lambda opt: lr_scheduler.OneCycleLR(opt, max_lr=0.1, steps_per_epoch=len(train_loader), epochs=20, anneal_strategy='linear'),
    lambda opt: lr_scheduler.StepLR(opt, step_size=5),
    lambda opt: lr_scheduler.ReduceLROnPlateau(opt),
    lambda opt: lr_scheduler.PolynomialLR(opt),
    None  # No scheduler
]

Using TensorFlow backend.


ValueError: numpy.ufunc size changed, may indicate binary incompatibility. Expected 232 from C header, got 216 from PyObject

In [None]:
lr_rates = []
# Loop over each optimizer and scheduler
for optimizer_name, optimizer_fn in optimizers:
    for scheduler_fn in schedulers:
        # Skip OneCycleLR with Adagrad because it doesn't work
        if optimizer_name == "Adagrad" and scheduler_fn == schedulers[1]:
            continue

        # Initialize the network and optimizer
        model = Net()
        optimizer = optimizer_fn(model.parameters())

        # Initialize the learning rate scheduler, if any
        scheduler = scheduler_fn(optimizer) if scheduler_fn else None

        # Log the optimizer and scheduler names
        scheduler_name = type(scheduler).__name__ if scheduler else "NoScheduler"

        # Create a directory for logs
        logdir = os.path.join("runs", f'{optimizer_name}-{scheduler_name}')

        # Initialize the SummaryWriter with the log directory
        writer = SummaryWriter(logdir)

        # Define the loss function
        criterion = nn.CrossEntropyLoss()

        # Training loop
        num_epochs = 20  # Number of epochs to train
        for epoch in range(1, num_epochs + 1):
            total_loss = 0  # Initialize total loss for the epoch
            model.train()

            for batch_idx, (data, target) in enumerate(train_loader):
                optimizer.zero_grad()  # Clear the gradients
                output = model(data)   # Forward pass: compute the model output
                loss = criterion(output, target)  # Compute the loss
                loss.backward()  # Backward pass: compute gradient of the loss w.r.t. model parameters
                optimizer.step()  # Update model parameters
                total_loss += loss.item()  # Accumulate the loss

            avg_loss = total_loss / train_batch_size
            writer.add_scalar("Train Loss", avg_loss, epoch)  # Log the test accuracy

            current_lr = optimizer.param_groups[0]['lr']
            lr_rates.append(current_lr)
            print(f'Epoch {epoch}, Current Learning Rate: {current_lr}')

            if scheduler:
                if isinstance(scheduler, lr_scheduler.ReduceLROnPlateau):
                    scheduler.step(avg_loss)
                else:
                    scheduler.step()

            # Print training status
            print(f"Epoch [{epoch}/{num_epochs}], Loss: {total_loss / len(train_loader):.4f}")

            model.eval()  # Set the model to evaluation mode
            val_correct = 0
            val_total = 0
            val_loss = 0
            with torch.no_grad():  # Disable gradient calculation for evaluation
                for data, target in val_loader:
                    output = model(data)  # Forward pass: compute the model output
                    val_loss += criterion(output, target).item()  # Compute the loss
                    _, predicted = torch.max(output.data, 1)  # Get the index of the max log-probability (predicted class)
                    val_total += target.size(0)  # Accumulate the total number of samples
                    val_correct += (predicted == target).sum().item()  # Accumulate the number of correct predictions

            val_accuracy = 100 * val_correct / val_total  # Calculate accuracy as a percentage
            writer.add_scalar("Validation Accuracy", val_accuracy, epoch)  # Log the validation accuracy
            writer.add_scalar("Validation Loss", val_loss / len(val_loader), epoch)  # Log the validation loss
            print(f"Epoch [{epoch}/{num_epochs}], Validation Accuracy: {val_accuracy:.2f}%, Validation Loss: {val_loss / len(val_loader):.4f}")

        # Evaluation on test set
        correct = 0
        total = 0
        all_targets = []
        all_predictions = []
        with torch.no_grad():  # Disable gradient calculation for evaluation
            for data, target in test_loader:
                output = model(data)  # Forward pass: compute the model output
                _, predicted = torch.max(output.data, 1)  # Get the index of the max log-probability (predicted class)
                total += target.size(0)  # Accumulate the total number of samples
                correct += (predicted == target).sum().item()  # Accumulate the number of correct predictions
                all_targets.extend(target.cpu().numpy())
                all_predictions.extend(predicted.cpu().numpy())

        # Calculate different metrics for the test set
        accuracy = 100 * correct / total
        precision = precision_score(all_targets, all_predictions, average='macro')
        recall = recall_score(all_targets, all_predictions, average='macro')
        f1 = f1_score(all_targets, all_predictions, average='macro')

        # Write the metrics to the event file
        writer.add_scalar("Test Accuracy", accuracy)  # Log the test accuracy
        writer.add_scalar("Test Precision", precision)
        writer.add_scalar("Test Recall", recall)
        writer.add_scalar("Test F1 Score", f1)

        # Console output
        print(f'Accuracy on test set: {accuracy:.2f}%')
        print(f'Precision on test set: {precision:.4f}')
        print(f'Recall on test set: {recall:.4f}')
        print(f'F1 Score on test set: {f1:.4f}')

        writer.close()  # Close the TensorBoard writer

        # =======================
        # Save TensorBoard Logs
        # =======================

        # Specify the directory where the TensorBoard logs are saved
        event_file = f'{optimizer_name}-{scheduler_name}'
        log_dir = f'./runs/{event_file}'

        # Get the list of event files in the directory
        event_files = [os.path.join(log_dir, f) for f in os.listdir(log_dir) if 'events.out.tfevents' in f]

        # Initialize an empty list to store data
        data = []

        # Initialize an event accumulator for each event file
        for event_file in event_files:
            ea = event_accumulator.EventAccumulator(event_file)
            ea.Reload()  # Load the event file

            # Extract scalar data
            tags = ea.Tags()['scalars']
            for tag in tags:
                events = ea.Scalars(tag)
                for event in events:
                    step = event.step
                    value = event.value
                    wall_time = event.wall_time
                    data.append([tag, step, value, wall_time])

        # Convert the data into a DataFrame
        df = pd.DataFrame(data, columns=['Tag', 'Step', 'Value', 'Wall time'])

        # Save the DataFrame to an Excel file
        output_file = f'tensorboard_logs_{optimizer_name}_{scheduler_name}.xlsx'
        df.to_excel(output_file, index=False)

        print(f"Data has been written to {output_file}")

Epoch 1, Current Learning Rate: 0.004
Epoch [1/20], Loss: 0.5883
Epoch [1/20], Validation Accuracy: 90.95%, Validation Loss: 0.3074
Epoch 2, Current Learning Rate: 0.004021338075127806
Epoch [2/20], Loss: 0.2737
Epoch [2/20], Validation Accuracy: 93.15%, Validation Loss: 0.2380
Epoch 3, Current Learning Rate: 0.004042676150255612
Epoch [3/20], Loss: 0.2174
Epoch [3/20], Validation Accuracy: 94.46%, Validation Loss: 0.1955
Epoch 4, Current Learning Rate: 0.004064014225383419
Epoch [4/20], Loss: 0.1794
Epoch [4/20], Validation Accuracy: 95.03%, Validation Loss: 0.1725
Epoch 5, Current Learning Rate: 0.004085352300511225
Epoch [5/20], Loss: 0.1516
Epoch [5/20], Validation Accuracy: 95.40%, Validation Loss: 0.1594
Epoch 6, Current Learning Rate: 0.004106690375639031
Epoch [6/20], Loss: 0.1321
Epoch [6/20], Validation Accuracy: 95.86%, Validation Loss: 0.1415
Epoch 7, Current Learning Rate: 0.004128028450766837
Epoch [7/20], Loss: 0.1160
Epoch [7/20], Validation Accuracy: 96.37%, Validation

Logging

In [None]:
import os
import pandas as pd
from tensorboard.backend.event_processing import event_accumulator

# Specify the directory where the TensorBoard logs are saved
'''
!!! Hier anpassen, welches Event ihr auslesen wollt. !!!
'''
event_file = '07-16_12-01-Adam-PolynomialLR'

log_dir = f'./runs/{event_file}'

# Get the list of event files in the directory
event_files = [os.path.join(log_dir, f) for f in os.listdir(log_dir) if 'events.out.tfevents' in f]

# Initialize an empty list to store data
data = []

# Initialize an event accumulator for each event file
for event_file in event_files:
    ea = event_accumulator.EventAccumulator(event_file)
    ea.Reload()  # Load the event file

    # Extract scalar data
    tags = ea.Tags()['scalars']
    for tag in tags:
        events = ea.Scalars(tag)
        for event in events:
            step = event.step
            value = event.value
            wall_time = event.wall_time
            data.append([tag, step, value, wall_time])

# Convert the data into a DataFrame
df = pd.DataFrame(data, columns=['Tag', 'Step', 'Value', 'Wall time'])

# Save the DataFrame to an Excel file
output_file = 'tensorboard_logs.xlsx'
df.to_excel(output_file, index=False)

print(f"Data has been written to {output_file}")

Data has been written to tensorboard_logs.xlsx
