In [1]:
import pandas as pd
import numpy as np

# Importing Dataset

In [2]:
df = pd.read_csv("/content/drive/MyDrive/Datasets/fashion-mnist_test.csv")

# Splitting and Scalling

In [3]:
from sklearn.model_selection import train_test_split as split

train_x, test_x, train_y, test_y = split(df.drop("label", axis=1), df["label"], test_size=0.2, random_state=42)

# Scalling
train_x = train_x/255.0
test_x = test_x/255.0

# GPU availability

In [4]:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"

# Custom Dataset Class

In [5]:
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
  def __init__(self, features, labels):
    self.features = torch.tensor(features.values, dtype=torch.float32)
    self.labels = torch.tensor(labels.values, dtype=torch.long)

  def __len__(self):
    return len(self.features)

  def __getitem__(self, idx):
    return self.features[idx], self.labels[idx]

In [6]:
# Object of Custom Dataset
train_dataset = CustomDataset(train_x, train_y)
test_dataset = CustomDataset(test_x, test_y)

# Deep Learning Model

In [7]:
import torch.nn as nn

class MyModel(nn.Module):
  def __init__(self, input_dim, output_dim, num_hidden_layers, neurones_per_layers, dropout_rate):
    super(MyModel, self).__init__()

    layers = []

    for i in range(num_hidden_layers):
      layers.append(nn.Linear(input_dim, neurones_per_layers))
      layers.append(nn.BatchNorm1d(neurones_per_layers))
      layers.append(nn.ReLU())
      layers.append(nn.Dropout(dropout_rate))
      input_dim = neurones_per_layers

    layers.append(nn.Linear(input_dim, output_dim))

    self.model = nn.Sequential(*layers)

  def forward(self, x):
    return self.model(x)

# Optuna Application

In [8]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.2.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.1-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.8-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.2.0-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.4/383.4 kB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.14.1-py3-none-any.whl (233 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.6/233.6 kB[0m [31m20.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading Mako-1.3.8-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: M

In [9]:
import optuna

In [10]:
def objective(trial):

  # all hyper parameter value from search space
  num_hidden_layers = trial.suggest_int("num_hidden_layers", 1, 5)
  neurones_per_layers = trial.suggest_categorical("neurones_per_layers", [8, 16, 32, 64, 128]) # Changed to suggest_categorical
  epochs = trial.suggest_int("epochs", 10, 30, step=5)
  learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)
  dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5, step=0.1)
  batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256, 512])
  optimiser_name = trial.suggest_categorical("optimiser", ["Adam", "SGD", "RMSprop"])
  activation_function = trial.suggest_categorical("activation_function", ["ReLU", "Tanh", "Sigmoid"])
  weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-1, log=True)




  # Object of DataLoader
  train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory = True)
  test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory = True)

  # Model Initialisation
  input_dim = train_x.shape[1]
  output_dim = 10
  model = MyModel(input_dim, output_dim, num_hidden_layers, neurones_per_layers, dropout_rate)
  model.to(device)

  criterion = nn.CrossEntropyLoss()
  optimiser = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=weight_decay)

  # Optimiser salecttion
  if optimiser_name == "Adam":
    optimiser = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
  elif optimiser_name == "SGD":
    optimiser = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=weight_decay)
  else:
    optimiser = torch.optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

  # Training loop
  for epoch in range(epochs):

    for batch_features, batch_labels in train_loader:
      batch_features = batch_features.to(device)
      batch_labels = batch_labels.to(device)

      # Forward pass
      outputs = model(batch_features)

      # Calculate loss
      loss = criterion(outputs, batch_labels)

      # Backward pass and optimization
      optimiser.zero_grad()
      loss.backward()

      # Gradient Update
      optimiser.step()

  # Model Evaluation
  model.eval()

  # evaluation on test data
  total = 0
  correct = 0

  with torch.no_grad():

    for batch_features, batch_labels in test_loader:

      # move data to gpu
      batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

      outputs = model(batch_features)

      _, predicted = torch.max(outputs, 1)

      total = total + batch_labels.shape[0]

      correct = correct + (predicted == batch_labels).sum().item()

    accuracy = correct/total

  return accuracy


In [11]:
# Optimising Optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

[I 2025-01-29 08:54:20,000] A new study created in memory with name: no-name-cf17547f-9e93-497d-a56b-de3f05c371ab
[I 2025-01-29 08:54:45,906] Trial 0 finished with value: 0.577 and parameters: {'num_hidden_layers': 4, 'neurones_per_layers': 16, 'epochs': 20, 'learning_rate': 0.023622248782512383, 'dropout_rate': 0.1, 'batch_size': 32, 'optimiser': 'RMSprop', 'activation_function': 'Tanh', 'weight_decay': 0.0038521209119591485}. Best is trial 0 with value: 0.577.
[I 2025-01-29 08:54:57,272] Trial 1 finished with value: 0.8025 and parameters: {'num_hidden_layers': 2, 'neurones_per_layers': 128, 'epochs': 30, 'learning_rate': 0.02672894651601289, 'dropout_rate': 0.1, 'batch_size': 64, 'optimiser': 'RMSprop', 'activation_function': 'Tanh', 'weight_decay': 0.00011559521094646363}. Best is trial 1 with value: 0.8025.
[I 2025-01-29 08:55:01,224] Trial 2 finished with value: 0.7095 and parameters: {'num_hidden_layers': 1, 'neurones_per_layers': 8, 'epochs': 30, 'learning_rate': 0.0030578861481

In [12]:
print("Number of finished trials: {}".format(len(study.trials)))
print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Number of finished trials: 50
Best trial:
  Value: 0.8605
  Params: 
    num_hidden_layers: 4
    neurones_per_layers: 128
    epochs: 20
    learning_rate: 0.00013717878772796023
    dropout_rate: 0.1
    batch_size: 64
    optimiser: RMSprop
    activation_function: ReLU
    weight_decay: 2.3015857458888027e-05


# Visualisation using DataFrame

In [13]:
study.trials_dataframe()

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_activation_function,params_batch_size,params_dropout_rate,params_epochs,params_learning_rate,params_neurones_per_layers,params_num_hidden_layers,params_optimiser,params_weight_decay,state
0,0,0.577,2025-01-29 08:54:20.005658,2025-01-29 08:54:45.905655,0 days 00:00:25.899997,Tanh,32,0.1,20,0.023622,16,4,RMSprop,0.003852,COMPLETE
1,1,0.8025,2025-01-29 08:54:45.907764,2025-01-29 08:54:57.271959,0 days 00:00:11.364195,Tanh,64,0.1,30,0.026729,128,2,RMSprop,0.000116,COMPLETE
2,2,0.7095,2025-01-29 08:54:57.273407,2025-01-29 08:55:01.224325,0 days 00:00:03.950918,ReLU,512,0.2,30,0.003058,8,1,RMSprop,0.009962,COMPLETE
3,3,0.1455,2025-01-29 08:55:01.225928,2025-01-29 08:55:06.689748,0 days 00:00:05.463820,Tanh,256,0.5,25,5.2e-05,16,4,SGD,1.6e-05,COMPLETE
4,4,0.8285,2025-01-29 08:55:06.694625,2025-01-29 08:55:09.686235,0 days 00:00:02.991610,Sigmoid,512,0.2,20,0.000989,16,3,RMSprop,0.003325,COMPLETE
5,5,0.6915,2025-01-29 08:55:09.687748,2025-01-29 08:55:23.231475,0 days 00:00:13.543727,Tanh,32,0.4,15,0.000158,128,5,SGD,3.5e-05,COMPLETE
6,6,0.815,2025-01-29 08:55:23.233030,2025-01-29 08:55:25.381401,0 days 00:00:02.148371,Tanh,512,0.3,15,0.000589,32,2,RMSprop,0.001357,COMPLETE
7,7,0.7545,2025-01-29 08:55:25.382956,2025-01-29 08:55:28.521541,0 days 00:00:03.138585,ReLU,256,0.1,20,0.013163,8,1,RMSprop,5.8e-05,COMPLETE
8,8,0.3825,2025-01-29 08:55:28.522922,2025-01-29 08:55:31.765145,0 days 00:00:03.242223,Sigmoid,256,0.5,15,0.001371,16,5,Adam,0.000134,COMPLETE
9,9,0.8175,2025-01-29 08:55:31.767520,2025-01-29 08:55:36.559391,0 days 00:00:04.791871,Tanh,512,0.4,30,0.002203,32,2,SGD,0.005193,COMPLETE


# Visualisation using graph

In [14]:
from optuna.visualization import plot_optimization_history, plot_param_importances, plot_contour, plot_slice, plot_edf

In [15]:
plot_optimization_history(study)

In [16]:
plot_param_importances(study)

# Train with the Best Parameter

In [19]:
# print each value from best parameter of optuna
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

    num_hidden_layers: 4
    neurones_per_layers: 128
    epochs: 20
    learning_rate: 0.00013717878772796023
    dropout_rate: 0.1
    batch_size: 64
    optimiser: RMSprop
    activation_function: ReLU
    weight_decay: 2.3015857458888027e-05
