# Building a ANN using PyTorch

# Install Optuna

In [1]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.6.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading optuna-4.6.0-py3-none-any.whl (404 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m404.7/404.7 kB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.10.1 optuna-4.6.0


# Import Libraries

In [2]:
import pandas as pd
import numpy as np

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [5]:
import kagglehub
import os

In [6]:
import optuna

# Import Data

In [7]:
path = kagglehub.dataset_download("zalando-research/fashionmnist")
print("Path to dataset files:", path)

Using Colab cache for faster access to the 'fashionmnist' dataset.
Path to dataset files: /kaggle/input/fashionmnist


In [8]:
df = pd.read_csv(f'{path}/fashion-mnist_train.csv')
df.shape

(60000, 785)

In [9]:
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Check `GPU` availability

In [10]:
device = 'cpu'
x = torch.rand(2, 3)

# Move the tensor to CUDA (if available)
if torch.cuda.is_available():
  device = torch.device("cuda")
  x_cuda = x.to(device)
  print(f"Tensor on: {x_cuda.device}") # Output: Tensor on: cuda:0

device

Tensor on: cuda:0


device(type='cuda')

# Extract Features and Labels

In [11]:
X = df.drop('label', axis=1)
X = X.values
y = df['label'].values

In [12]:
X[0][100:150]

array([136,  61,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,  88, 201, 228, 225, 255, 115,  62,
       137, 255, 235, 222, 255, 135,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,  47, 252, 234, 238, 224])

# Scale Data

In [13]:
X = X/255.0
X[0][100:150]

array([0.53333333, 0.23921569, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.34509804,
       0.78823529, 0.89411765, 0.88235294, 1.        , 0.45098039,
       0.24313725, 0.5372549 , 1.        , 0.92156863, 0.87058824,
       1.        , 0.52941176, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.18431373, 0.98823529, 0.91764706, 0.93333333, 0.87843137])

# Train Test Split

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=17)

X_train.shape, X_test.shape

((48000, 784), (12000, 784))

# `Dataset` Class

In [15]:
class CustomDataset(Dataset):
  def __init__(self, features, labels):
    self.features = torch.from_numpy(features).float()
    self.labels = torch.from_numpy(labels).long()

  def __len__(self):
    return len(self.labels)

  def __getitem__(self, index):

    return self.features[index], self.labels[index]


In [16]:
train_dataset = CustomDataset(features=X_train, labels=y_train)
test_dataset = CustomDataset(features=X_test, labels=y_test)

# Custom Model

In [17]:
class MyANN(nn.Module):
  def __init__(self, input_dim, output_dim, num_of_hidden_layers, neurons_per_layer):

    super(MyANN, self).__init__()

    # layer of hidden layers
    layers = []
    for _ in range(num_of_hidden_layers):

      layers.append(nn.Linear(input_dim, neurons_per_layer))
      layers.append(nn.BatchNorm1d(neurons_per_layer))
      layers.append(nn.SELU())
      layers.append(nn.Dropout(0.4))

      input_dim = neurons_per_layer
    layers.append(nn.Linear(input_dim, 10))

    self.model = nn.Sequential(*layers)


  def forward(self, features):
    return self.model(features)

# Optuna Hyperparameter

## Objective Function

In [22]:
def objective(trial):

  # Hyperparameter Search Space

  # layers and neurons
  num_of_hidden_layers = trial.suggest_int("num_of_hidden_layers", 2, 5)
  neurons_per_layer = trial.suggest_int('neurons_per_layer', 64, 512, step=32)

  # epochs and learning rate
  epochs = trial.suggest_int("epochs", 5, 25, step=5)
  learning_rate = trial.suggest_float("learning_rate", 1e-3, 1e-1, step=0.5)

  # optimizer, dropout, lambda
  optimx = trial.suggest_categorical("optimizer", ['adam', 'sgd', 'rmsprop'])
  drop_out_rate = trial.suggest_float("dropout_rate", 0.2, 0.7, step=0.1)
  weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-3)

  # batch size
  batch_size = trial.suggest_categorical("Batch_size", [32, 64, 128, 256, 512])


  # model init
  input_dim = X_train.shape[1]
  output_dim = 10

  model = MyANN(
      input_dim = input_dim,
      output_dim = output_dim,
      num_of_hidden_layers = num_of_hidden_layers,
      neurons_per_layer = neurons_per_layer
  )
  # model Device Changes
  model = model.to(device)

  # Params Init
  # learning_rate = 0.01
  # epochs = 5

  # Optimizer selection
  criterion = nn.CrossEntropyLoss()
  # optimizer = torch.optim.SGD(params=model.parameters(), lr=learning_rate, weight_decay=1e-3)
  if optimx == 'sgd':
    optimizer = torch.optim.SGD(params=model.parameters(), lr=learning_rate, weight_decay=weight_decay)
  elif optimx == 'rmsprop':
    optimizer = torch.optim.RMSprop(params=model.parameters(), lr=learning_rate, weight_decay=weight_decay)
  else:
    optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate, weight_decay=weight_decay)

  # DataLoader
  train_loader = DataLoader(
    dataset = train_dataset,
    batch_size = batch_size,
    shuffle= True,
    pin_memory=True
  )

  test_loader = DataLoader(
    dataset = test_dataset,
    batch_size = batch_size,
    shuffle= True,
    pin_memory=True
  )

  # training pipeline
  for epoch in range(epochs):
    losses = []

    model.train()

    for batch_features, batch_labels in  train_loader:

      # move data to gpu
      batch_features = batch_features.to(device)
      batch_labels = batch_labels.to(device)

      # forward pass
      y_pred = model(batch_features)

      # loss calculate
      loss = criterion(y_pred, batch_labels)

      # back pass
      optimizer.zero_grad()
      loss.backward()

      # update params
      optimizer.step()

  # evaluation
  model.eval()

  total_rows = 0
  correct = 0

  with torch.no_grad():
    for features, labels in test_loader:

        # move data to gpu
        features = features.to(device)
        labels = labels.to(device)

        # Output from network: [batch, 10]
        y_pred = model(features)

        # Predicted class index
        _, predict = torch.max(y_pred, 1)

        # Count correct predictions
        correct += (predict == labels).sum().item()
        total_rows += labels.size(0)

    avg_accuracy = correct / total_rows
    # print(f'Average Accuracy: {avg_accuracy:.4f}')
  return avg_accuracy

## Call Optuna

In [23]:
study = optuna.create_study(
    study_name="My-First-Optuna",
    direction='maximize'
  )

study.optimize(objective, 10)

[I 2025-12-04 04:50:06,851] A new study created in memory with name: My-First-Optuna
[I 2025-12-04 04:50:25,058] Trial 0 finished with value: 0.8215833333333333 and parameters: {'num_of_hidden_layers': 4, 'neurons_per_layer': 64, 'epochs': 20, 'learning_rate': 0.001, 'optimizer': 'rmsprop', 'dropout_rate': 0.7, 'weight_decay': 0.0004470139586374249, 'Batch_size': 256}. Best is trial 0 with value: 0.8215833333333333.
[I 2025-12-04 04:50:32,619] Trial 1 finished with value: 0.7955 and parameters: {'num_of_hidden_layers': 2, 'neurons_per_layer': 352, 'epochs': 10, 'learning_rate': 0.001, 'optimizer': 'rmsprop', 'dropout_rate': 0.6000000000000001, 'weight_decay': 0.0005019220297786515, 'Batch_size': 256}. Best is trial 0 with value: 0.8215833333333333.
[I 2025-12-04 04:50:49,770] Trial 2 finished with value: 0.8404166666666667 and parameters: {'num_of_hidden_layers': 2, 'neurons_per_layer': 384, 'epochs': 5, 'learning_rate': 0.001, 'optimizer': 'rmsprop', 'dropout_rate': 0.3000000000000000

In [25]:
study.best_trials

[FrozenTrial(number=6, state=<TrialState.COMPLETE: 1>, values=[0.86275], datetime_start=datetime.datetime(2025, 12, 4, 4, 51, 45, 428489), datetime_complete=datetime.datetime(2025, 12, 4, 4, 53, 35, 653126), params={'num_of_hidden_layers': 4, 'neurons_per_layer': 480, 'epochs': 25, 'learning_rate': 0.001, 'optimizer': 'sgd', 'dropout_rate': 0.2, 'weight_decay': 0.00030355586644340596, 'Batch_size': 32}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'num_of_hidden_layers': IntDistribution(high=5, log=False, low=2, step=1), 'neurons_per_layer': IntDistribution(high=512, log=False, low=64, step=32), 'epochs': IntDistribution(high=25, log=False, low=5, step=5), 'learning_rate': FloatDistribution(high=0.001, log=False, low=0.001, step=0.5), 'optimizer': CategoricalDistribution(choices=('adam', 'sgd', 'rmsprop')), 'dropout_rate': FloatDistribution(high=0.7, log=False, low=0.2, step=0.1), 'weight_decay': FloatDistribution(high=0.001, log=False, low=1e-05, step=None), 

In [24]:
study.best_params

{'num_of_hidden_layers': 4,
 'neurons_per_layer': 480,
 'epochs': 25,
 'learning_rate': 0.001,
 'optimizer': 'sgd',
 'dropout_rate': 0.2,
 'weight_decay': 0.00030355586644340596,
 'Batch_size': 32}

In [26]:
study.best_value

0.86275