<a href="https://colab.research.google.com/github/sumithkumar07/Rice_type_classifiers_by_optuna/blob/main/VAE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [39]:
import torch
from torch import nn
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset
from torchsummary import summary
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import numpy as np
import optuna

In [40]:
import optuna

study = optuna.create_study(direction='maximize')

[I 2025-04-12 08:12:21,128] A new study created in memory with name: no-name-e3963c8d-1f38-4851-a8e8-5bc8cee1a6d8


In [41]:
pip install optuna



In [42]:
data_df= pd.read_csv('/content/riceClassification.csv')

In [43]:
data_df.head()

Unnamed: 0,id,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,EquivDiameter,Extent,Perimeter,Roundness,AspectRation,Class
0,1,4537,92.229316,64.012769,0.719916,4677,76.004525,0.657536,273.085,0.76451,1.440796,1
1,2,2872,74.691881,51.400454,0.725553,3015,60.471018,0.713009,208.317,0.831658,1.453137,1
2,3,3048,76.293164,52.043491,0.731211,3132,62.296341,0.759153,210.012,0.868434,1.46595,1
3,4,3073,77.033628,51.928487,0.738639,3157,62.5513,0.783529,210.657,0.870203,1.483456,1
4,5,3693,85.124785,56.374021,0.749282,3802,68.571668,0.769375,230.332,0.874743,1.51,1


In [44]:
original_df = data_df.copy() # Creating a copy of the original Dataframe to use to normalize inference

for column in data_df.columns:
    data_df[column] = data_df[column]/data_df[column].abs().max() # Divide by the maximum of the column which will make max value of each column is 1
data_df.head()

Unnamed: 0,id,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,EquivDiameter,Extent,Perimeter,Roundness,AspectRation,Class
0,5.5e-05,0.444368,0.503404,0.775435,0.744658,0.424873,0.66661,0.741661,0.537029,0.844997,0.368316,1.0
1,0.00011,0.281293,0.407681,0.622653,0.750489,0.273892,0.53037,0.80423,0.409661,0.919215,0.371471,1.0
2,0.000165,0.298531,0.416421,0.630442,0.756341,0.28452,0.54638,0.856278,0.412994,0.959862,0.374747,1.0
3,0.00022,0.300979,0.420463,0.629049,0.764024,0.286791,0.548616,0.883772,0.414262,0.961818,0.379222,1.0
4,0.000275,0.361704,0.464626,0.682901,0.775033,0.345385,0.601418,0.867808,0.452954,0.966836,0.386007,1.0


In [45]:
X = np.array(data_df.iloc[:,:-1]) # Get the inputs, all rows and all columns except last column (output)
Y = np.array(data_df.iloc[:, -1]) # Get the ouputs, all rows and last column only (output column)

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.3) # Create the training split



In [56]:
class dataset(Dataset):
    def __init__(self, X, Y):
        self.features = torch.tensor(X, dtype = torch.float32) # Changed 'featurs' to 'features'
        self.labels = torch.tensor(Y, dtype = torch.float32)

    def __len__(self):
        return len(self.features)


    def __getitem__(self, index):
        return self.features[index], self.labels[index]


In [57]:
test_dataset = dataset(X_test, y_test)

In [58]:
train_dataset = dataset(X_train, y_train)

In [59]:
class MyModel(nn.Module):
    def __init__(self, input_dim, output_dim, num_hidden_layers, neurons_per_layer, dropout_rate):
        super(MyModel, self).__init__()
        layers = []
        current_dim = input_dim

        for _ in range(num_hidden_layers):
            layers.append(nn.Linear(current_dim, neurons_per_layer))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_rate))
            current_dim = neurons_per_layer

        layers.append(nn.Linear(current_dim, output_dim))
        # Use Sigmoid for binary classification
        layers.append(nn.Sigmoid())

        self.model = nn.Sequential(*layers)

    def forward(self, x):
        x = self.model(x)
        return x

In [60]:
def objective(trial):
  num_hidden_layers = trial.suggest_int("num_hidden_layers", 1, 5)
  neurons_per_layer = trial.suggest_int("neurons_per_layer", 8, 128, step=8)
  epochs = trial.suggest_int("epochs", 10, 50, step=10)
  learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)
  dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5, step=0.1)
  batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 128])
  optimizer_name = trial.suggest_categorical("optimizer", ['Adam', 'SGD', 'RMSprop'])
  weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-3, log=True)

  train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
  test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

  input_dim = X_train.shape[1]
  output_dim = 1 # Output dimension is 1 for binary classification
  model = MyModel(input_dim, output_dim, num_hidden_layers, neurons_per_layer, dropout_rate)

  # Use Binary Cross Entropy Loss for binary classification
  criterion = nn.BCELoss()

  # The optimizer should be defined based on the optimizer_name
  if optimizer_name == 'Adam':
    optimizer = Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
  elif optimizer_name == 'SGD':
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay) # Use torch.optim.SGD
  else:
    optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=weight_decay) # Use torch.optim.RMSprop

  # Training loop
  for epoch in range(epochs):
    for batch_features, batch_labels in train_loader:
      # Forward pass
      outputs = model(batch_features)

      # Calculate loss
      loss = criterion(outputs.squeeze(), batch_labels) # Squeeze output for BCELoss

      # Backpropagation
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

  # Evaluation
  model.eval()
  total = 0
  correct = 0

  with torch.no_grad():
    for batch_features, batch_labels in test_loader:
      outputs = model(batch_features)

      # Predictions for binary classification
      predicted = (outputs > 0.5).float()

      total += batch_labels.size(0)
      correct += (predicted.squeeze() == batch_labels).sum().item()

  accuracy = correct / total
  return accuracy

In [61]:
study = optuna.create_study(direction='maximize')

[I 2025-04-12 08:24:36,572] A new study created in memory with name: no-name-e9975083-7ac9-466c-9d35-9b45bf47936d


In [62]:
study.optimize(objective, n_trials=10)

[I 2025-04-12 08:24:47,934] Trial 0 finished with value: 0.5852272727272727 and parameters: {'num_hidden_layers': 1, 'neurons_per_layer': 40, 'epochs': 20, 'learning_rate': 0.00022377544108844168, 'dropout_rate': 0.2, 'batch_size': 32, 'optimizer': 'SGD', 'weight_decay': 0.0005827274463679046}. Best is trial 0 with value: 0.5852272727272727.
[I 2025-04-12 08:24:58,435] Trial 1 finished with value: 0.9998167155425219 and parameters: {'num_hidden_layers': 2, 'neurons_per_layer': 120, 'epochs': 30, 'learning_rate': 0.017396135906236215, 'dropout_rate': 0.5, 'batch_size': 64, 'optimizer': 'SGD', 'weight_decay': 0.00016911710652196043}. Best is trial 1 with value: 0.9998167155425219.
[I 2025-04-12 08:27:26,675] Trial 2 finished with value: 0.9998167155425219 and parameters: {'num_hidden_layers': 4, 'neurons_per_layer': 128, 'epochs': 50, 'learning_rate': 5.112272367468492e-05, 'dropout_rate': 0.5, 'batch_size': 16, 'optimizer': 'RMSprop', 'weight_decay': 0.00019435095078231278}. Best is tri

In [63]:
study.best_value

1.0

In [64]:
study.best_params

{'num_hidden_layers': 1,
 'neurons_per_layer': 16,
 'epochs': 50,
 'learning_rate': 0.0002260373785589777,
 'dropout_rate': 0.30000000000000004,
 'batch_size': 128,
 'optimizer': 'Adam',
 'weight_decay': 3.0496107342543217e-05}