In [6]:
# Importing necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import numpy as np

# Step 1: Load and preprocess the data
def load_and_preprocess_data():
    # Load the Iris dataset
    iris = load_iris()
    X = iris.data  # Features
    y = iris.target  # Labels (Classes)

    # One-hot encoding of the labels
    encoder = OneHotEncoder(sparse_output=False)
    y_onehot = encoder.fit_transform(y.reshape(-1, 1))

    # Splitting data into train and test sets (80% train, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)

    # Standardizing the features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Converting to PyTorch tensors
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

    return X_train_tensor, X_test_tensor, y_train_tensor, y_test_tensor

# Step 2: Define the neural network model
class IrisNet(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super(IrisNet, self).__init__()
        # First hidden layer
        self.fc1 = nn.Linear(input_size, hidden_size1)
        # Second hidden layer
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        # Output layer
        self.fc3 = nn.Linear(hidden_size2, output_size)
    
    def forward(self, x):
        # Apply ReLU activations after each layer except the last
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        # Apply softmax for multi-class classification at the output layer
        x = torch.softmax(self.fc3(x), dim=1)
        return x

# Step 3: Train the model
def train_model(model, X_train, y_train, epochs=50, learning_rate=0.01):
    # Loss function (cross-entropy for classification)
    criterion = nn.CrossEntropyLoss()
    # Optimizer (Adam)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(epochs):
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(X_train)
        # Calculate the loss
        loss = criterion(outputs, torch.max(y_train, 1)[1])

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Step 4: Evaluate the model
def evaluate_model(model, X_test, y_test):
    with torch.no_grad():  # Disable gradient calculation
        outputs = model(X_test)
        predicted = torch.argmax(outputs, 1)
        correct = torch.sum(predicted == torch.argmax(y_test, 1)).item()
        accuracy = correct / y_test.shape[0]
        print(f'Test Accuracy: {accuracy * 100:.2f}%')

# Step 5: Putting it all together
if __name__ == "__main__":
    # Load data
    X_train, X_test, y_train, y_test = load_and_preprocess_data()

    # Define model
    input_size = X_train.shape[1]
    hidden_size1 = 10
    hidden_size2 = 8
    output_size = y_train.shape[1]
    model = IrisNet(input_size, hidden_size1, hidden_size2, output_size)

    # Train model
    train_model(model, X_train, y_train, epochs=1000, learning_rate=0.01)

    # Evaluate model
    evaluate_model(model, X_test, y_test)


Epoch [10/1000], Loss: 1.0268
Epoch [20/1000], Loss: 0.8336
Epoch [30/1000], Loss: 0.7381
Epoch [40/1000], Loss: 0.6810
Epoch [50/1000], Loss: 0.6338
Epoch [60/1000], Loss: 0.6047
Epoch [70/1000], Loss: 0.5919
Epoch [80/1000], Loss: 0.5854
Epoch [90/1000], Loss: 0.5816
Epoch [100/1000], Loss: 0.5792
Epoch [110/1000], Loss: 0.5775
Epoch [120/1000], Loss: 0.5760
Epoch [130/1000], Loss: 0.5746
Epoch [140/1000], Loss: 0.5731
Epoch [150/1000], Loss: 0.5716
Epoch [160/1000], Loss: 0.5700
Epoch [170/1000], Loss: 0.5685
Epoch [180/1000], Loss: 0.5672
Epoch [190/1000], Loss: 0.5661
Epoch [200/1000], Loss: 0.5652
Epoch [210/1000], Loss: 0.5645
Epoch [220/1000], Loss: 0.5639
Epoch [230/1000], Loss: 0.5634
Epoch [240/1000], Loss: 0.5629
Epoch [250/1000], Loss: 0.5625
Epoch [260/1000], Loss: 0.5622
Epoch [270/1000], Loss: 0.5619
Epoch [280/1000], Loss: 0.5617
Epoch [290/1000], Loss: 0.5615
Epoch [300/1000], Loss: 0.5613
Epoch [310/1000], Loss: 0.5612
Epoch [320/1000], Loss: 0.5610
Epoch [330/1000],

In [None]:
# define optuna trial
def otimiza(trial):
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
    model = IrisNet(input_size, hidden_size1, hidden_size2, output_size)
    train_model(model, X_train, y_train, epochs=50, learning_rate=learning_rate)
    with torch.no_grad():
        outputs = model(X_test)
        predicted = torch.argmax(outputs, 1)
        correct = torch.sum(predicted == torch.argmax(y_test, 1)).item()
        accuracy = correct / y_test.shape[0]

# define optuna study
study = optuna.create_study(direction='minimize')
study.optimize(otimiza, n_trials=100)