<a href="https://colab.research.google.com/github/kalakhushi18/Deep-Learning/blob/main/Hyperparameter_tuning_Pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Code Source: Opencampus Kiel
https://edu.opencampus.sh/en/course/417

# Batch Size Experiment

In [None]:
#comment out the next line for running the experiment but it will takes super long depending on your sweep
raise Exception("This is an error message. Beware, the following code takes a lot of time to run. Maybe you just want to look at the evaluation")
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import numpy as np

# Define a simple neural network
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.model = nn.Sequential(
          nn.Linear(28*28, 200),
          nn.Sigmoid(),
          #nn.Linear(250, 100),
          #nn.Sigmoid(),
          nn.Linear(200, 10),
          nn.Sigmoid()
          )

    def forward(self, x):
        x = x.view(-1, 28*28)  # Flatten the images
        x = self.model(x)
        return x


# Function to evaluate accuracy and loss
def evaluate(model, data_loader, device, criterion):
    model.eval()
    correct = 0
    total = 0
    total_loss = 0
    with torch.no_grad():
        for data, target in data_loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            loss = criterion(outputs, target)
            total_loss += loss.item() * data.size(0)
            _, predicted = torch.max(outputs, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    avg_loss = total_loss / total
    accuracy = 100 * correct / total
    return accuracy, avg_loss

# Training function
def train(model, device, train_loader, optimizer, criterion):
    model.train()
    total_loss = 0
    for data, target in train_loader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * data.size(0)
    avg_loss = total_loss / len(train_loader.dataset)
    return avg_loss

# Hyperparameter tuning function
def hyperparameter_tuner(epochs, batch_sizes, learning_rates, normalization, shuffle):
    # Results dictionary
    results = {}

    for batch_size in batch_sizes:
        for lr in learning_rates:
            for norm in normalization:
                for shuf in shuffle:
                    # Prepare MNIST dataset
                    transform_list = [transforms.ToTensor()]
                    if norm:
                        transform_list.append(transforms.Normalize((0.1307,), (0.3081,)))
                    transform = transforms.Compose(transform_list)

                    train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
                    test_dataset = datasets.MNIST('./data', train=False, transform=transform)

                    # DataLoaders
                    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuf)
                    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

                    # Set device
                    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

                    # Initialize the model
                    model = Net().to(device)
                    optimizer = optim.SGD(model.parameters(), lr=lr)
                    criterion = nn.CrossEntropyLoss()

                    # Lists for accuracies and losses
                    train_accuracies = []
                    test_accuracies = []
                    train_losses = []
                    test_losses = []

                    # Training and evaluation
                    for epoch in range(1, epochs + 1):
                        train_loss = train(model, device, train_loader, optimizer, criterion)
                        train_accuracy, _ = evaluate(model, train_loader, device, criterion)
                        test_accuracy, test_loss = evaluate(model, test_loader, device, criterion)

                        train_accuracies.append(train_accuracy)
                        test_accuracies.append(test_accuracy)
                        train_losses.append(train_loss)
                        test_losses.append(test_loss)

                        print(f'Epoch {epoch}/{epochs}, Batch Size {batch_size}, LR {lr}, Norm: {norm}, Shuffle: {shuf}, '
                              f'Train Acc: {train_accuracy:.2f}%, Train Loss: {train_loss:.4f}, '
                              f'Test Acc: {test_accuracy:.2f}%, Test Loss: {test_loss:.4f}')

                    # Save results
                    results[(batch_size, lr, norm, shuf)] = {
                        'train_accuracies': train_accuracies,
                        'test_accuracies': test_accuracies,
                        'train_losses': train_losses,
                        'test_losses': test_losses
                    }

    return results

# Define the hyperparameters to tune
batch_sizes = [1,2,4,8,16,32,64, 128]
learning_rates = [0.2, 0.1, 0.05, 0.01,0.005, 0.001]
epochs = 50
normalization_options = [True, False]
shuffle_options = [True, False]

# Run hyperparameter tuner
tuning_results = hyperparameter_tuner(epochs, batch_sizes, learning_rates, normalization_options, shuffle_options)

# Process tuning results to find the best hyperparameters based on test accuracy
best_acc = 0
best_params = None
for params, metrics in tuning_results.items():
    if metrics['test_accuracies'][-1] > best_acc:
        best_acc = metrics['test_accuracies'][-1]
        best_params = params

print(f"Best parameters: Batch Size = {best_params[0]}, Learning Rate = {best_params[1]}, "
      f"Normalization = {best_params[2]}, Shuffle = {best_params[3]} "
      f"with Test Accuracy = {best_acc:.2f}%")


## Evaluation

I provide the results of the script above as a csv file(see below). It took nearly one week to run fully on one gpu. So you can see it is even with those small models quite expensive and time consuming to run those experiments. Nevertheless it is really educational because you can consume a lot of results. So pls take a good use of these extensive results below 😀

In [None]:
import pandas as pd
url = 'https://drive.google.com/uc?export=download&id=1XhxHdG93XXu5vuKGGz5kpEMhIsXd-jMO'
df = pd.read_csv(url)
df

Unnamed: 0,batch_size,learning_rate,normalization,shuffle,epoch,train_accuracy,test_accuracy,train_loss,test_loss
0,1,0.200,True,True,1,96.051667,95.44,1.533758,1.504039
1,1,0.200,True,True,2,97.003333,96.41,1.498761,1.495327
2,1,0.200,True,True,3,97.253333,96.71,1.491554,1.494662
3,1,0.200,True,True,4,97.436667,96.56,1.486813,1.493336
4,1,0.200,True,True,5,98.185000,97.23,1.483290,1.488044
...,...,...,...,...,...,...,...,...,...
9595,128,0.001,False,False,46,37.541667,38.94,2.258011,2.256308
9596,128,0.001,False,False,47,38.541667,39.91,2.256688,2.254945
9597,128,0.001,False,False,48,39.486667,40.78,2.255341,2.253557
9598,128,0.001,False,False,49,40.461667,41.78,2.253971,2.252145


In [None]:
#max test accuracy
df[df['test_accuracy'] == 98.22]

Unnamed: 0,batch_size,learning_rate,normalization,shuffle,epoch,train_accuracy,test_accuracy,train_loss,test_loss
188,1,0.2,False,False,39,99.398333,98.22,1.466238,1.482943


Run the following code cell to open an interactive widget playground below this code cell. You don't have to read or understand the code. Just run it. Always click on the "Update Plots" button to render the plots. The rest should be quite self-explanatory.

In [None]:
import ipywidgets as widgets
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display, clear_output

selections = []  # List to store all selections
selector_boxes = []  # To store HBox references for each selector
output = widgets.Output()  # Output widget for plots

def create_selector(index):
    """ Create a new set of selectors with a delete button """
    selector = {
        'batch_size': widgets.Dropdown(options=df['batch_size'].unique(), description=f'Batch Size {index}:'),
        'learning_rate': widgets.Dropdown(options=df['learning_rate'].unique(), description=f'Learning Rate {index}:'),
        'normalization': widgets.Dropdown(options=[True, False], description=f'Normalization {index}:'),
        'shuffle': widgets.Dropdown(options=[True, False], description=f'Shuffle {index}:'),
        'delete_button': widgets.Button(description='Delete', button_style='danger')
    }
    selector['delete_button'].on_click(lambda b: delete_selector(index))
    return selector

def add_selector(b=None):
    """ Add a new set of selectors and update display """
    index = len(selections) + 1
    selector = create_selector(index)
    selections.append(selector)
    hbox = widgets.HBox([selector['batch_size'], selector['learning_rate'],
                         selector['normalization'], selector['shuffle'], selector['delete_button']])
    selector_boxes.append(hbox)
    update_display()

def delete_selector(index):
    """ Delete a specified selector and update display """
    if index <= len(selections):
        del selections[index - 1]
        del selector_boxes[index - 1]
        update_display()

def update_display():
    """ Update the display after adding/removing a selector """
    clear_output(wait=True)
    for hbox in selector_boxes:
        display(hbox)
    display(widgets.HBox([add_button, plot_button]))
    display(output)

def update_plots(b=None):
    with output:
        output.clear_output(wait=True)  # Clear the previous plot
        fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 4))
        colors = ['blue', 'green', 'red', 'purple', 'orange', 'brown']  # Extend as needed

        for i, selector in enumerate(selections, start=1):
            color = colors[i % len(colors)]
            filtered_df = df[(df['batch_size'] == selector['batch_size'].value) &
                             (df['learning_rate'] == selector['learning_rate'].value) &
                             (df['normalization'] == selector['normalization'].value) &
                             (df['shuffle'] == selector['shuffle'].value)]

            axes[0].plot(filtered_df['epoch'], filtered_df['train_accuracy'], label=f'Train Acc {i}', color=color, linestyle='-', marker='o')
            axes[0].plot(filtered_df['epoch'], filtered_df['test_accuracy'], label=f'Test Acc {i}', color=color, linestyle='--', marker='x')

            axes[1].plot(filtered_df['epoch'], filtered_df['train_loss'], label=f'Train Loss {i}', color=color, linestyle='-', marker='o')
            axes[1].plot(filtered_df['epoch'], filtered_df['test_loss'], label=f'Test Loss {i}', color=color, linestyle='--', marker='x')

        for ax in axes:
            ax.legend()
            ax.grid(True)

        plt.tight_layout()
        plt.show()

# Button to add new selector
add_button = widgets.Button(description='Add Selector')
add_button.on_click(add_selector)

# Button to update plots
plot_button = widgets.Button(description='Update Plots')
plot_button.on_click(update_plots)

# Initial setup
add_selector()

# Initial display setup
update_display()


HBox(children=(Dropdown(description='Batch Size 1:', options=(1, 2, 4, 8, 16, 32, 64, 128), value=1), Dropdown…

HBox(children=(Button(description='Add Selector', style=ButtonStyle()), Button(description='Update Plots', sty…

Output()