<a href="https://colab.research.google.com/github/m4vic/neuralnets/blob/main/hyperparameterAnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
import torch.optim as optim
import matplotlib.pyplot as plt



# *DataLoading*

In [2]:
#set random seeds for reproducibility
torch.manual_seed(42)

<torch._C.Generator at 0x7f85b721d850>

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"using device{device}")

using devicecuda


In [4]:
df = pd.read_csv('/content/fashion-mnist_train.csv')
df.head()


Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
X = df.iloc[:, 1:].values # all from 1 index to end
y = df.iloc[:, 0].values # all 0 th index ie label

In [6]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# SCALING THE FEATURES coz noramalize
x_train = x_train / 255.0
x_test = x_test / 255.0

In [8]:
# create CustomDataset Class

class CustomDataset(Dataset):

  def __init__(self, features, labels): # it will fetch the data from dataset
    self.features = torch.tensor(features, dtype=torch.float32)
    self.labels = torch.tensor(labels, dtype=torch.long) # declaring the data

  def __len__(self): # it will retrun len
    return len(self.features)

  def __getitem__(self, idx):
    return self.features[idx], self.labels[idx]


In [9]:
# create train_dataset object
train_dataset = CustomDataset(x_train, y_train)
#create test_dataset object
test_dataset = CustomDataset(x_test, y_test)

# **HyperparameterTuning**

In [10]:
# define NN with optimization

class MyNN(nn.Module):
  def __init__(self, input_dim, output_dim, num_hidden_layers, neurons_per_layer, dropout_rate):
    super().__init__()
    layers = []

    for i in range(num_hidden_layers):
      layers.append(nn.Linear(input_dim, neurons_per_layer)) #(784 , 128)
      layers.append(nn.BatchNorm1d(neurons_per_layer))
      layers.append(nn.ReLU()) # activation function
      layers.append(nn.Dropout(dropout_rate))
      input_dim= neurons_per_layer# (128,128)

    layers.append(nn.Linear(neurons_per_layer, output_dim)) # (128, 10)

    self.model = nn.Sequential(*layers) # unpacked

  def forward(self, x):
    return self.model(x)


In [11]:
#objective function

def objective(trial):

  #next hyperparameter values from the search space
  num_hidden_layers = trial.suggest_int("num_hidden_layers", 1,5)
  neurons_per_layer = trial.suggest_int("neurons_per_layer",8,128,step=8)
  epochs = trial.suggest_int("epoch", 10, 50, step=10)
  learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)
  dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5, step=0.1)
  batch_size = trial.suggest_categorical("batch_size", [16, 32, 64,128])
  optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "SGD","RMSprop"])
  weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-3, log=True)


  #create train and test loader
  train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory =True)
  test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)


  #model init

  input_dim = 784# imput dimension
  output_dim = 10 # output dimension

  model = MyNN(input_dim, output_dim, num_hidden_layers, neurons_per_layer, dropout_rate).to(device)


  # optimizers

  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)

  if optimizer_name == 'ADam':
    optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
  elif optimizer_name == 'SGD':
    optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=weight_decay)
  else:
    optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=weight_decay)




  # training loop
  for epoch in range(epochs):
    for batch_features, batch_labels in train_loader:

      # move data to gpu
      batch_features = batch_features.to(device)
      batch_labels = batch_labels.to(device)

      #forward pass
      outputs = model(batch_features)

      #calculate loss
      loss = criterion(outputs, batch_labels)

      #backward pass
      optimizer.zero_grad()
      loss.backward()

      # update grads
      optimizer.step()

#evaluation
  total = 0
  correct = 0
  model.eval()


  with torch.no_grad():

    for batch_features, batch_labels in test_loader:

      # move data to gpu
      batch_features = batch_features.to(device)
      batch_labels = batch_labels.to(device)


      outputs = model(batch_features)

      _, predicted = torch.max(outputs.data, 1) # give max from the predited value

      total = total + batch_labels.shape[0] #
      correct += (predicted == batch_labels).sum().item() # corrected if pred == batch

    accuracy = correct / total



  return accuracy




In [12]:
pip install optuna

Collecting optuna
  Downloading optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.16.4-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.4.0-py3-none-any.whl (395 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m395.9/395.9 kB[0m [31m29.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.16.4-py3-none-any.whl (247 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m247.0/247.0 kB[0m [31m22.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.16.4 colorlog-6.9.0 optuna-4.4.0


In [13]:
import optuna

study = optuna.create_study(direction='maximize')


[I 2025-08-13 08:00:31,700] A new study created in memory with name: no-name-0eefd461-3358-41cb-bd14-63677f3b0f04


In [14]:
study.optimize(objective, n_trials=10)

[I 2025-08-13 08:02:01,227] Trial 0 finished with value: 0.88275 and parameters: {'num_hidden_layers': 5, 'neurons_per_layer': 80, 'epoch': 50, 'learning_rate': 0.0007121687597117525, 'dropout_rate': 0.4, 'batch_size': 128, 'optimizer': 'SGD', 'weight_decay': 0.0001212867908908892}. Best is trial 0 with value: 0.88275.
[I 2025-08-13 08:04:54,976] Trial 1 finished with value: 0.8821666666666667 and parameters: {'num_hidden_layers': 2, 'neurons_per_layer': 112, 'epoch': 50, 'learning_rate': 6.309208649621293e-05, 'dropout_rate': 0.5, 'batch_size': 32, 'optimizer': 'SGD', 'weight_decay': 0.00012617068120039098}. Best is trial 0 with value: 0.88275.
[I 2025-08-13 08:08:54,545] Trial 2 finished with value: 0.8565 and parameters: {'num_hidden_layers': 4, 'neurons_per_layer': 56, 'epoch': 50, 'learning_rate': 0.0019156297329845262, 'dropout_rate': 0.4, 'batch_size': 32, 'optimizer': 'RMSprop', 'weight_decay': 4.6191977616070134e-05}. Best is trial 0 with value: 0.88275.
[I 2025-08-13 08:09:51

In [15]:
study.best_value

0.8883333333333333

In [16]:
study.best_params

{'num_hidden_layers': 4,
 'neurons_per_layer': 96,
 'epoch': 40,
 'learning_rate': 0.0006824071165141762,
 'dropout_rate': 0.30000000000000004,
 'batch_size': 128,
 'optimizer': 'RMSprop',
 'weight_decay': 8.246493067771315e-05}