In [1]:
import torch
import torch.nn as nn
import pandas as pd 
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset,DataLoader
import torch.optim as optim
import matplotlib.pyplot as plt

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'using device : {device}')

using device : cuda


In [3]:
df = pd.read_csv('fashion-mnist_train.csv')
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
df.shape

(60000, 785)

In [5]:
torch.manual_seed(42)

<torch._C.Generator at 0x1607211f450>

In [6]:
X = df.iloc[:,1:].values
y = df.iloc[:,0].values

In [7]:
X_train , X_test , y_train , y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [8]:
X_train = X_train/255.0
X_test = X_test/255.0

In [9]:
## creating custom dataset
class CustomDataset(Dataset):
    def __init__(self,features,labels):
        self.features = torch.tensor(features,dtype=torch.float32)
        self.labels = torch.tensor(labels,dtype=torch.long)
    def __len__(self):
        return self.features.shape[0]
    def __getitem__(self,idx):
       
        return self.features[idx],self.labels[idx]

In [10]:
## creating dataset objects
train_dataset = CustomDataset(X_train,y_train)
test_dataset = CustomDataset(X_test,y_test)

In [11]:
class MyNN(nn.Module):
    def __init__(self,input_dim,output_dim,num_hidden_layers,neuron_per_layer,dropout_rate):
        super().__init__()
        layers = []
        for i in range(num_hidden_layers):
            layers.append(nn.Linear(input_dim,neuron_per_layer))
            layers.append(nn.BatchNorm1d(neuron_per_layer))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_rate))
            input_dim = neuron_per_layer

        layers.append(nn.Linear(neuron_per_layer,output_dim))
        self.model = nn.Sequential(*layers)
    
    def forward(self,x):
        return self.model(x)

In [12]:
def Objective(trial):
    
    num_hidden_layers = trial.suggest_int('num_hidden_layers',1,5)
    neurons_per_layer = trial.suggest_int('neurons_per_layer',8,128,step=8)
    epochs = trial.suggest_int('epochs',10,50,step=10)
    learning_rate = trial.suggest_float('learning_rate',1e-5,1e-1,log=True)
    dropout_rate = trial.suggest_float('dropout_rate',0.1,0.5,step=0.1)
    batch_size = trial.suggest_categorical('batch_size',[16,32,64,128])
    optimizer_name = trial.suggest_categorical('optimizer',['Adam','SGD','RMSprop'])
    weight_decay = trial.suggest_float('weight_decay',1e-5,1e-3,log=True)

    train_loader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True,pin_memory=True)
    test_loader = DataLoader(test_dataset,batch_size=batch_size,shuffle=False,pin_memory=True)

    input_dim = 784
    output_dim = 10

    model = MyNN(input_dim,output_dim,num_hidden_layers,neurons_per_layer,dropout_rate)
    model.to(device)

    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.1, weight_decay=1e-4)
    if optimizer_name == 'Adam':
        optimizer = optim.Adam(model.parameters(),lr=learning_rate,weight_decay=weight_decay)
    elif optimizer == 'SGD':
        optimizer = optim.SGD(model.parameters(),lr=learning_rate,weight_decay=weight_decay)
    else : 
        optimizer = optim.RMSprop(model.parameters(),lr=learning_rate,weight_decay=weight_decay)

    

    for epoch in range(epochs):
        
        for batch_features,batch_labels in train_loader:
            # move data to gpu
            batch_features,batch_labels = batch_features.to(device) , batch_labels.to(device)
            y_pred = model(batch_features)
            loss = loss_function(y_pred,batch_labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()   
    ## set model to eval mode 
    model.eval()
    ## evaluate the model
    total = 0
    correct = 0

    with torch.no_grad():
        for batch_features,batch_labels in test_loader:
            batch_features,batch_labels = batch_features.to(device),batch_labels.to(device)
            y_pred = model(batch_features)
            _,predicted = torch.max(y_pred.data,1)
            total += batch_labels.size(0)
            correct += (predicted == batch_labels).sum().item()
        accuracy = correct/total

    return accuracy


In [13]:
import optuna 

  from .autonotebook import tqdm as notebook_tqdm


In [14]:
print(device)

cuda


In [15]:
study = optuna.create_study(direction='maximize')
study.optimize(Objective,n_trials=10)

[I 2025-12-26 10:24:39,077] A new study created in memory with name: no-name-cb1d1eb0-b167-4a8e-81fb-e3f8396a2a7f
[I 2025-12-26 10:26:09,675] Trial 0 finished with value: 0.532 and parameters: {'num_hidden_layers': 5, 'neurons_per_layer': 8, 'epochs': 40, 'learning_rate': 0.026618433560934497, 'dropout_rate': 0.1, 'batch_size': 64, 'optimizer': 'SGD', 'weight_decay': 0.0006594704075285296}. Best is trial 0 with value: 0.532.
[I 2025-12-26 10:28:28,637] Trial 1 finished with value: 0.71225 and parameters: {'num_hidden_layers': 2, 'neurons_per_layer': 104, 'epochs': 30, 'learning_rate': 0.032117139787966174, 'dropout_rate': 0.30000000000000004, 'batch_size': 16, 'optimizer': 'SGD', 'weight_decay': 0.00015861409857265195}. Best is trial 1 with value: 0.71225.
[I 2025-12-26 10:30:18,898] Trial 2 finished with value: 0.8291666666666667 and parameters: {'num_hidden_layers': 1, 'neurons_per_layer': 16, 'epochs': 30, 'learning_rate': 0.0007239223857274892, 'dropout_rate': 0.5, 'batch_size': 16

In [16]:
best_params = study.best_params
best_params

{'num_hidden_layers': 4,
 'neurons_per_layer': 128,
 'epochs': 50,
 'learning_rate': 0.0019583811033671633,
 'dropout_rate': 0.2,
 'batch_size': 128,
 'optimizer': 'SGD',
 'weight_decay': 0.00010527530987830198}

In [17]:
best_accuracy = study.best_value
best_accuracy

0.8851666666666667

In [18]:
model = MyNN(
    input_dim=784,
    output_dim=10,
    num_hidden_layers=best_params['num_hidden_layers'],
    neuron_per_layer=best_params['neurons_per_layer'],
    dropout_rate=best_params['dropout_rate']
).to(device)

optimizer = optim.Adam(
    model.parameters(),
    lr=best_params['learning_rate'],
    weight_decay=best_params['weight_decay']
)

loss_function = nn.CrossEntropyLoss()
train_loader = DataLoader(
    train_dataset,
    batch_size=best_params['batch_size'],
    shuffle=True
)

In [19]:
for epoch in range(best_params['epochs']):
    model.train()
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        loss = loss_function(model(x), y)
        loss.backward()
        optimizer.step()