In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import torch
import torch.nn as nn

In [3]:
from torch.utils.data import Dataset, DataLoader

In [4]:
!pip install optuna
import optuna

Collecting optuna
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.5.0-py3-none-any.whl (400 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/400.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.9/400.9 kB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.9.0 optuna-4.5.0


In [5]:
device  = torch.device('cuda' if torch.cuda.is_available() else 'CPU')

In [6]:
print(device)

cuda


In [19]:
df = pd.read_csv('/content/fashion-mnist_train.csv')

In [20]:
df

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
59996,1,0,0,0,0,0,0,0,0,0,...,73,0,0,0,0,0,0,0,0,0
59997,8,0,0,0,0,0,0,0,0,0,...,160,162,163,135,94,0,0,0,0,0
59998,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


**Spliting data**

In [21]:
x_train, x_test, y_train, y_test = train_test_split(df.iloc[:, 1:].values, df.iloc[:, 0].values, test_size=0.2)

In [22]:
x_train = x_train/255
x_test = x_test/255

In [23]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((48000, 784), (12000, 784), (48000,), (12000,))

In [24]:
x_train[10].shape

(784,)

**Customizing the dataset**

In [49]:
# Defining a class for custom dataset

class CustDataset(Dataset):

  def __init__(self, features, labels):
    self.features = torch.tensor(features, dtype= torch.float32)
    self.labels = torch.tensor(labels, dtype=torch.long)

  def __len__(self):
    return len(self.features)

  def __getitem__(self, index):
    return self.features[index], self.labels[index]

In [50]:
train_dataset = CustDataset(torch.tensor(x_train), torch.tensor(y_train))
test_dataset = CustDataset(torch.tensor(x_test), torch.tensor(y_test))

  self.features = torch.tensor(features, dtype= torch.float32)
  self.labels = torch.tensor(labels, dtype=torch.long)


**Batching**

**model Class**

In [58]:
class Model(nn.Module):

  def __init__(self, input_size, output_size, num_hidden_layers, num_neurons, dropout_rate):
    super().__init__()
    layers = []

    for i in range(num_hidden_layers):
      layers.append(nn.Linear(input_size, num_neurons))
      layers.append(nn.BatchNorm1d(num_neurons))
      layers.append(nn.ReLU())
      layers.append(nn.Dropout(dropout_rate))
      input_size = num_neurons

    layers.append(nn.Linear(input_size, output_size))

    self.model = nn.Sequential(*layers)

  def forward(self, x):
    return self.model(x)

**Defining the objective function for trails**

In [59]:
def objective(trial):


  num_hidden_layers = trial.suggest_int('num_hidden_layer', 3,5)
  num_neurons = trial.suggest_int('num_neurons', 128, 512, step=36)
  epochs = trial.suggest_int('epochs', 50, 100)
  learning_rate = trial.suggest_float('learning_rate', 1e-6, 1e-3, log=True)
  dropouts = trial.suggest_float('dropouts', 0.3, 0.5, step=0.1)
  batch_size = trial.suggest_categorical('batch_size', [32, 64, 16, 48])
  optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'SGD', 'RMSprop'])
  weight_decay = trial.suggest_float('weight_decay', 1e-5, 1e-2, log=True)

  # defining input and output size of the nural network
  input_size = 784
  output_size = 10

  # Defining Batch Size
  train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
  test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False,pin_memory=True)

  # model initialization
  model = Model(input_size, output_size, num_hidden_layers, num_neurons, dropouts)

  # Assign the model to GPU
  model.to(device)

  # Defining Optimizer function
  if optimizer_name == 'Adam':
    optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate, weight_decay=weight_decay)
  elif optimizer_name == 'SGD':
    optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, weight_decay=weight_decay)
  else:
    optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate, weight_decay=weight_decay)
  # Loss function
  loss_func = nn.CrossEntropyLoss()

  # Model Pipeline
  for epoch in range(epochs):

    for features_batch, label_batch in train_loader:

      features_batch, label_batch = features_batch.to(device), label_batch.to(device)

      output = model(features_batch)

      loss = loss_func(output, label_batch)

      optimizer.zero_grad()

      loss.backward()

      optimizer.step()

  # model evaluation
  model.eval()

  with torch.no_grad():
    for features_batch, label_batch in test_loader:

      features_batch, label_batch = features_batch.to(device), label_batch.to(device)

      output = model(features_batch)

      _, predicted = torch.max(output, 1)

      accuracy = (predicted == label_batch).sum().item()/label_batch.size(0)

  return accuracy


In [54]:
study = optuna.create_study(direction='maximize')

[I 2025-10-05 09:07:44,285] A new study created in memory with name: no-name-3850119c-6790-4475-901b-e3d1adc6d238


In [60]:
study.optimize(objective, n_trials=10)

[I 2025-10-05 09:42:16,231] Trial 2 finished with value: 1.0 and parameters: {'num_hidden_layer': 5, 'num_neurons': 308, 'epochs': 100, 'learning_rate': 3.757958068289409e-05, 'dropouts': 0.4, 'batch_size': 32, 'optimizer': 'Adam', 'weight_decay': 1.746886856775662e-05}. Best is trial 2 with value: 1.0.
[I 2025-10-05 09:45:24,236] Trial 3 finished with value: 0.90625 and parameters: {'num_hidden_layer': 3, 'num_neurons': 272, 'epochs': 50, 'learning_rate': 0.0005211005751794748, 'dropouts': 0.5, 'batch_size': 32, 'optimizer': 'SGD', 'weight_decay': 0.0009370989271373897}. Best is trial 2 with value: 1.0.
[I 2025-10-05 09:49:03,306] Trial 4 finished with value: 0.9375 and parameters: {'num_hidden_layer': 3, 'num_neurons': 164, 'epochs': 53, 'learning_rate': 7.630737319229441e-06, 'dropouts': 0.4, 'batch_size': 32, 'optimizer': 'Adam', 'weight_decay': 3.086556494587183e-05}. Best is trial 2 with value: 1.0.
[I 2025-10-05 09:59:03,928] Trial 5 finished with value: 0.6875 and parameters: {

In [72]:
class New_Model(nn.Module):

  def __init__(self, input_size, output_size, num_hidden_layers, num_neurons, dropout_rate):
    super().__init__()
    layers = []

    for i in range(num_hidden_layers):
      layers.append(nn.Linear(input_size, num_neurons))
      layers.append(nn.BatchNorm1d(num_neurons))
      layers.append(nn.ReLU())
      layers.append(nn.Dropout(dropout_rate))
      input_size = num_neurons

    layers.append(nn.Linear(input_size, output_size))

    self.network = nn.Sequential(*layers)


  def forward(self, x):
    return self.network(x)

In [73]:
new_model = Model(784, 10, 5, 308, 0.4)

new_model.to(device)

optimizer = torch.optim.Adam(new_model.parameters(), lr = 3.757958068289409e-05, weight_decay=1.746886856775662e-05)

loss_func = nn.CrossEntropyLoss()

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False,pin_memory=True)

for epoch in range(100):

  for features_batch, label_batch in train_loader:

    features_batch, label_batch = features_batch.to(device), label_batch.to(device)

    output = new_model(features_batch)

    loss = loss_func(output, label_batch)

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()


with torch.no_grad():

  for features_batch, label_batch in train_loader:

    features_batch, label_batch = features_batch.to(device), label_batch.to(device)

    output = new_model(features_batch)

    _, predicted = torch.max(output, 1)

    accuracy = (predicted == label_batch).sum().item()/label_batch.size(0)

print(accuracy)

0.9375
