In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim

import optuna

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
torch.manual_seed(42)

<torch._C.Generator at 0x1aef25569d0>

In [3]:
df = pd.read_csv(r"D:\Datasets\fashion_mnist\fashion-mnist_train.csv")
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
X = df.iloc[:, 1:].values
y = df.iloc[:, 0].values

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
#Scaling the features
X_train = X_train/255
X_test = X_test/255

In [8]:
gpu = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(torch.cuda.is_available())
print(torch.version.cuda)
print(torch.cuda.get_device_name(0))

True
12.1
NVIDIA GeForce RTX 3070 Laptop GPU


In [16]:
class customDataset(Dataset):

    def __init__(self, features, labels):

        super().__init__()
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)
        #self.features = self.features.to(device=gpu)
        #self.labels = self.labels.to(device=gpu)

    def __len__(self):

        return self.features.shape[0]
    
    def __getitem__(self, index):
        
        return self.features[index].float(), self.labels[index].long()


In [17]:
train_dataset = customDataset(X_train, y_train)
test_dataset = customDataset(X_test, y_test)

In [18]:
class artificialNN(nn.Module):

    def __init__(self, input_dim, output_dim, num_hidden_layers, neurons_per_layer, dropout_rate):
        super().__init__()
        layers = []

        for i in range(num_hidden_layers):

            layers.append(nn.Linear(input_dim, neurons_per_layer))
            layers.append(nn.BatchNorm1d(neurons_per_layer))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_rate))

            input_dim = neurons_per_layer

        layers.append(nn.Linear(neurons_per_layer, output_dim))

        self.model = nn.Sequential(*layers)

    def forward(self, features):

        return self.model(features)

In [23]:
def optuna_objective(trial):
    
    # Search Space
    num_hidden_layers = trial.suggest_int("num_hidden_layers", 1, 10)
    num_epochs = trial.suggest_int("num_epochs", 50, 500)
    learning_rate = trial.suggest_float("learning_rate", 0.01, 0.1, log=True)
    weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-3, log=True)
    dropout_rate = trial.suggest_float("dropout_raye", 0.1, 0.5)
    optimizer_name = trial.suggest_categorical("optimizer", ['SGD', 'Adam', 'RMSprop', 'Adagrad'])
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256, 512])
    neurons_per_layer = trial.suggest_int("neurons_per_layer", 8, 256, step=8)

    # Static Parameters
    input_dim = 784
    output_dim = 10

    model = artificialNN(input_dim, output_dim, num_hidden_layers, neurons_per_layer, dropout_rate)
    model = model.to(device=gpu)

    criterion = nn.CrossEntropyLoss()
    if optimizer_name == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    elif optimizer_name == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    elif optimizer_name == 'RMSprop':
        optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    else:
        optimizer = torch.optim.Adagrad(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)


    # Training loop

    for epoch in range(num_epochs):

        for batch_features, batch_labels in train_loader:

            batch_features = batch_features.to(device=gpu)
            batch_labels = batch_labels.to(device=gpu)
            
            # Forward Pass
            outputs = model(batch_features)
            # Loss calculation
            loss = criterion(outputs, batch_labels)
            # Backward Pass
            optimizer.zero_grad()
            loss.backward()
            # Update grads
            optimizer.step()

    
    # Evaluation steps
    model.eval()

    total = 0
    correct = 0

    with torch.no_grad():

        for batch_features, batch_labels in test_loader:

            batch_features = batch_features.to(device=gpu)
            batch_labels = batch_labels.to(device=gpu)

            output = model(batch_features)
            predicted = torch.argmax(output, dim=1)
            total += batch_labels.shape[0]
            correct = correct + (predicted == batch_labels).sum().item()

        accuracy = correct/total

    return accuracy

In [24]:
study = optuna.create_study(direction='maximize')

[I 2026-02-07 20:58:54,748] A new study created in memory with name: no-name-5bbbf2ea-7153-4f2e-b6f7-bc323953ae66


In [25]:
study.optimize(optuna_objective, n_trials=10)

[I 2026-02-07 21:06:11,423] Trial 0 finished with value: 0.8619166666666667 and parameters: {'num_hidden_layers': 8, 'num_epochs': 479, 'learning_rate': 0.01442945757227185, 'weight_decay': 1.8156641225801864e-05, 'dropout_raye': 0.27083356270565606, 'optimizer': 'RMSprop', 'batch_size': 256, 'neurons_per_layer': 136}. Best is trial 0 with value: 0.8619166666666667.
[I 2026-02-07 21:09:33,702] Trial 1 finished with value: 0.8824166666666666 and parameters: {'num_hidden_layers': 8, 'num_epochs': 138, 'learning_rate': 0.0195723520410793, 'weight_decay': 0.00081927381986094, 'dropout_raye': 0.47611194669668133, 'optimizer': 'SGD', 'batch_size': 128, 'neurons_per_layer': 136}. Best is trial 1 with value: 0.8824166666666666.
[I 2026-02-07 21:18:22,160] Trial 2 finished with value: 0.20741666666666667 and parameters: {'num_hidden_layers': 7, 'num_epochs': 115, 'learning_rate': 0.03542281620858385, 'weight_decay': 0.0008921187887910298, 'dropout_raye': 0.3223063460092655, 'optimizer': 'RMSpro