In [6]:
# Install the Optuna library for hyperparameter optimization
!pip install optuna

# Import necessary PyTorch modules
import torch
import torch.nn as nn # Neural network modules (e.g., layers)
import torch.optim as optim # Optimization algorithms (e.g., Adam)

# Import Optuna for hyperparameter tuning
import optuna

# Import data utilities from PyTorch
from torch.utils.data import DataLoader # For batching and shuffling data

# Import image datasets and transformations from torchvision
from torchvision import datasets, transforms # For MNIST dataset and data preprocessing



In [7]:
class Net(nn.Module):
    # Constructor for the neural network
    def __init__(self, hidden_size):
        super(Net, self).__init__() # Initialize the base nn.Module class
        # Define the first fully connected (linear) layer
        # Input size: 28*28 (flattened MNIST image), Output size: hidden_size (tunable)
        self.fc1 = nn.Linear(28*28, hidden_size)
        # Define the second fully connected (linear) layer
        # Input size: hidden_size, Output size: 10 (for 10 MNIST digits)
        self.fc2 = nn.Linear(hidden_size, 10)

    # Defines the forward pass of the neural network
    def forward(self, x):
        # Flatten the input tensor (e.g., from (batch_size, 1, 28, 28) to (batch_size, 784))
        x = torch.flatten(x, 1)
        # Apply the first linear layer followed by a ReLU activation function
        x = torch.relu(self.fc1(x))
        # Apply the second linear layer (output layer)
        x = self.fc2(x)
        return x

In [8]:
def objective(trial):
    # Suggest an integer for the hidden layer size between 128 and 512
    hidden_size = trial.suggest_int('hidden_size', 128, 512)
    # Suggest a floating-point number for the learning rate between 1e-4 and 1e-1 (log-uniform distribution)
    learning_rate = trial.suggest_float('lr', 1e-4, 1e-1, log=True)

    # Define image transformations: convert image to PyTorch tensor
    transform = transforms.Compose([transforms.ToTensor()])
    # Load the MNIST training dataset and create a DataLoader
    # Downloads the dataset if not available, shuffles it, and uses a batch size of 32
    train_loader = DataLoader(datasets.MNIST(
        './data', train=True, download=True, transform=transform), batch_size=32, shuffle=True)

    # Initialize the neural network model with the suggested hidden_size
    model = Net(hidden_size)
    # Initialize the Adam optimizer with the model parameters and suggested learning_rate
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    # Define the loss function: Cross Entropy Loss, suitable for multi-class classification
    criterion = nn.CrossEntropyLoss()

    # Set the model to training mode
    model.train()
    # Iterate for one epoch (training loop)
    for epoch in range(1):
        # Iterate through batches of data from the train_loader
        for batch_idx, (data, target) in enumerate(train_loader):
            # Zero out the gradients from the previous iteration
            optimizer.zero_grad()
            # Perform a forward pass: get predictions from the model
            output = model(data)
            # Calculate the loss between the predictions and the true targets
            loss = criterion(output, target)
            # Perform a backward pass: compute gradients of the loss with respect to model parameters
            loss.backward()
            # Update model parameters using the optimizer
            optimizer.step()

    # Return the loss value for Optuna to minimize
    return loss.item()

In [9]:
# Create an Optuna study object, aiming to minimize the objective function
study = optuna.create_study(direction='minimize')
# Run the optimization process, calling the 'objective' function for 5 trials
study.optimize(objective, n_trials=5)
# Print the best hyperparameters found by Optuna
print("Best Hyperparameters:", study.best_params)

[I 2025-12-16 13:24:23,852] A new study created in memory with name: no-name-e76aaa73-09e0-43e0-a2cd-6ce60133e9ec
[I 2025-12-16 13:24:40,196] Trial 0 finished with value: 0.5686120390892029 and parameters: {'hidden_size': 448, 'lr': 0.006725217873395584}. Best is trial 0 with value: 0.5686120390892029.
[I 2025-12-16 13:24:53,393] Trial 1 finished with value: 0.05871235579252243 and parameters: {'hidden_size': 298, 'lr': 0.0020325105977196797}. Best is trial 1 with value: 0.05871235579252243.
[I 2025-12-16 13:25:04,122] Trial 2 finished with value: 0.28575313091278076 and parameters: {'hidden_size': 165, 'lr': 0.0011340906581675688}. Best is trial 1 with value: 0.05871235579252243.
[I 2025-12-16 13:25:15,634] Trial 3 finished with value: 0.11740577220916748 and parameters: {'hidden_size': 298, 'lr': 0.00346089699178028}. Best is trial 1 with value: 0.05871235579252243.
[I 2025-12-16 13:25:25,646] Trial 4 finished with value: 0.7570409178733826 and parameters: {'hidden_size': 176, 'lr': 

Best Hyperparameters: {'hidden_size': 298, 'lr': 0.0020325105977196797}
