In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset,DataLoader
import torch.nn as nn
import torch.optim as optim

In [2]:
#check for gpu availability
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'using device:{device}')

using device:cuda


In [3]:
data=pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
data.drop(columns=['id','Unnamed: 32'],inplace=True)
X_train,X_test,y_train,y_test=train_test_split(data.iloc[:,1:],data.iloc[:,0],test_size=0.2,random_state=32)


scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)
print(X_train.shape)

encoder=LabelEncoder()
y_train=encoder.fit_transform(y_train)
y_test=encoder.transform(y_test)
print(y_train.shape)


(455, 30)
(455,)


In [4]:
# Define DatasetClass and create obejcts

class CustomDataset(Dataset):
  def __init__(self,features,labels):
    self.features=torch.tensor(features,dtype=torch.float32)
    self.labels=torch.tensor(labels,dtype=torch.long)


  def __len__(self):
    return len(self.features)

  def __getitem__(self,index):
    return self.features[index],self.labels[index]


In [5]:
#create a train and test dataset objects
train_dataset=CustomDataset(X_train,y_train)
test_dataset=CustomDataset(X_test,y_test)


In [32]:
class SimpleNN(nn.Module):

  def __init__(self,input_dim,output_dim,num_hidden_layers,neurons_per_layer,dropout_rate):

    super().__init__()

    layers=[]

    for i in range(num_hidden_layers):

      layers.append(nn.Linear(input_dim,neurons_per_layer))
      layers.append(nn.BatchNorm1d(neurons_per_layer))
      layers.append(nn.ReLU())
      layers.append(nn.Dropout(dropout_rate))
      input_dim=neurons_per_layer
    layers.append(nn.Linear(neurons_per_layer,output_dim))

    self.model=nn.Sequential(*layers)

  def forward(self,X):
      return self.model(X)

In [35]:
#trail objective function
def objective(trail):

  # hyperparameters
  neurons_per_layer=trail.suggest_int('neurons_per_layer',16,256,step=8)
  num_hidden_layers=trail.suggest_int('hidden_layers',1,5)
  dropuout_rate=trail.suggest_float('dropuout_rate',0.1,0.5,step=0.1)
  learning_rate=trail.suggest_float('learning_rate',1e-5,1e-1,log=True)
  weight_decay=trail.suggest_float('weight_decay',1e-5,1e-1,log=True)
  epochs=trail.suggest_int('epochs',5,10,step=2)
  optimizer=trail.suggest_categorical('optimizer',['adam','sgd','rmsprop'])
  batch_size=trail.suggest_categorical('batch_size',[16,32,64,128])

  #create a train and test dataloader objects
  train_loader=DataLoader(train_dataset, batch_size=32,shuffle=True,pin_memory=True)
  test_loader=DataLoader(test_dataset, batch_size=32, shuffle=False,pin_memory=True)

  # model initialization
  input_dim=X_train.shape[1]
  output_dim=2

  model=SimpleNN(input_dim,output_dim,num_hidden_layers,neurons_per_layer,dropuout_rate)
  model=model.to(device)

  #optimiser selection
  criterion=nn.CrossEntropyLoss()
  if optimizer=='adam':
    optimizer=optim.Adam(model.parameters(),lr=learning_rate,weight_decay=weight_decay)
  elif optimizer=='sgd':
    optimizer=optim.SGD(model.parameters(),lr=learning_rate,weight_decay=weight_decay)
  else:
    optimizer=optim.RMSprop(model.parameters(),lr=learning_rate,weight_decay=weight_decay)

  # training loop
  for epoch in range(epochs):

    for batch_features,batch_labels in train_loader:
      #move data to gpu
      batch_features,batch_labels=batch_features.to(device),batch_labels.to(device)
      # forward pass
      y_pred=model(batch_features)
      #loss calculation
      loss=criterion(y_pred,batch_labels)
      #backpropagation
      optimizer.zero_grad()
      loss.backward()
      #weights update
      optimizer.step()

  # evaluation
  model.eval()
  total=0
  correct=0
  with torch.no_grad():
    for batch_features, batch_labels in test_loader:
      batch_features,batch_labels=batch_features.to(device),batch_labels.to(device)
      y_pred=model(batch_features)
      _,y_pred_class=torch.max(y_pred,1)
      total+=batch_labels.shape[0]
      correct+=(y_pred_class==batch_labels).sum().item()
    accuracy=correct/total
  return accuracy


In [29]:
!pip install optuna



In [36]:
import optuna
study=optuna.create_study(direction='maximize')

[I 2025-12-17 03:35:49,165] A new study created in memory with name: no-name-35ac4c43-8a94-43a3-8101-e93829c06357


In [37]:
study.optimize(objective,n_trials=15)

[I 2025-12-17 03:35:57,433] Trial 0 finished with value: 0.6140350877192983 and parameters: {'neurons_per_layer': 128, 'hidden_layers': 4, 'dropuout_rate': 0.1, 'learning_rate': 0.09095654695423334, 'weight_decay': 3.720267610982598e-05, 'epochs': 5, 'optimizer': 'rmsprop', 'batch_size': 32}. Best is trial 0 with value: 0.6140350877192983.
[I 2025-12-17 03:35:57,846] Trial 1 finished with value: 0.8245614035087719 and parameters: {'neurons_per_layer': 160, 'hidden_layers': 4, 'dropuout_rate': 0.4, 'learning_rate': 9.065015605865055e-05, 'weight_decay': 0.00011545905895644886, 'epochs': 7, 'optimizer': 'adam', 'batch_size': 64}. Best is trial 1 with value: 0.8245614035087719.
[I 2025-12-17 03:35:58,043] Trial 2 finished with value: 0.8859649122807017 and parameters: {'neurons_per_layer': 64, 'hidden_layers': 1, 'dropuout_rate': 0.1, 'learning_rate': 4.671270054847759e-05, 'weight_decay': 1.4762332796310826e-05, 'epochs': 7, 'optimizer': 'adam', 'batch_size': 64}. Best is trial 2 with va

In [38]:
study.best_value

0.9912280701754386

In [39]:
study.best_params

{'neurons_per_layer': 200,
 'hidden_layers': 3,
 'dropuout_rate': 0.1,
 'learning_rate': 0.08912635162833195,
 'weight_decay': 0.003639454974085414,
 'epochs': 7,
 'optimizer': 'sgd',
 'batch_size': 64}

In [40]:
study.best_trials

[FrozenTrial(number=7, state=<TrialState.COMPLETE: 1>, values=[0.9912280701754386], datetime_start=datetime.datetime(2025, 12, 17, 3, 35, 59, 71496), datetime_complete=datetime.datetime(2025, 12, 17, 3, 35, 59, 339639), params={'neurons_per_layer': 200, 'hidden_layers': 3, 'dropuout_rate': 0.1, 'learning_rate': 0.08912635162833195, 'weight_decay': 0.003639454974085414, 'epochs': 7, 'optimizer': 'sgd', 'batch_size': 64}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'neurons_per_layer': IntDistribution(high=256, log=False, low=16, step=8), 'hidden_layers': IntDistribution(high=5, log=False, low=1, step=1), 'dropuout_rate': FloatDistribution(high=0.5, log=False, low=0.1, step=0.1), 'learning_rate': FloatDistribution(high=0.1, log=True, low=1e-05, step=None), 'weight_decay': FloatDistribution(high=0.1, log=True, low=1e-05, step=None), 'epochs': IntDistribution(high=9, log=False, low=5, step=2), 'optimizer': CategoricalDistribution(choices=('adam', 'sgd', 'rmsprop'