In [35]:
#!pip install scikit-learn
#! pip install skorch
#! pip install scikit-optimize 

In [22]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [23]:
#load the iris dataset and extract features and target
iris = load_iris()
X = iris.data
y = iris.target
#split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
import pandas as pd
# Create a DataFrame for the iris dataset
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
# Add the target variable to the DataFrame
df['species_id'] = iris.target
df['species_name'] = df['species_id'].map(lambda x: iris.target_names[x])
# Display the first few rows of the DataFrame
print(df.head())


   sepal length (cm)  sepal width (cm)  ...  species_id  species_name
0                5.1               3.5  ...           0        setosa
1                4.9               3.0  ...           0        setosa
2                4.7               3.2  ...           0        setosa
3                4.6               3.1  ...           0        setosa
4                5.0               3.6  ...           0        setosa

[5 rows x 6 columns]


In [39]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
# Define a CNN  model for iris dataset, which has 4 features and 3 classes and dropout as 0.5
# The model should have 2 convolutional layers, 2 fully connected layers, and relu activation and softmax activation at the end.
class IrisCNN(nn.Module):
    def __init__(self):
        super(IrisCNN, self).__init__()
        self.layer1 = nn.Linear(4, 16)  # Input layer to first hidden layer
        self.layer2 = nn.Linear(16, 32)  # First hidden layer to second hidden layer
        self.layer3 = nn.Linear(32, 3)   # Second hidden layer to output layer
        self.dropout = nn.Dropout(0.5)   # Dropout layer to prevent overfitting

    def forward(self, x):
        x = F.relu(self.layer1(x))  # Apply ReLU activation to first layer
        x = self.dropout(x)          # Apply dropout
        x = F.relu(self.layer2(x))  # Apply ReLU activation to second layer
        x = self.dropout(x)          # Apply dropout
        x = self.layer3(x)          # Output layer
        return F.log_softmax(x, dim=1)  # Apply softmax activation for multi-class classification

# Convert the features to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
# Convert the target to PyTorch tensors
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)



# create a model with skorch
from skorch import NeuralNetClassifier
model = NeuralNetClassifier(
    IrisCNN,
    criterion=nn.CrossEntropyLoss,
    optimizer=optim.Adam,
    verbose=1,
)
# define grid search parameters
from sklearn.model_selection import GridSearchCV

param_grid = {
    'lr': (0.001, 1.0),
    'max_epochs': (10, 50),
    'batch_size': (8, 32)
}
grid = GridSearchCV(
    model,
    param_grid,
    cv=3,
    verbose=1
)
# fit the model with grid search
grid.fit(X_train_tensor, y_train_tensor)
# Print the best parameters and score
print("Best parameters found: ", grid.best_params_)
print("Best score: ", grid.best_score_)
# Evaluate the best model on the test set
best_model = grid.best_estimator_
test_score = best_model.score(X_test_tensor, y_test_tensor)
print("Test score: ", test_score)
# Save the best model
import joblib
joblib.dump(best_model, 'best_iris_cnn_model.pkl')

Fitting 3 folds for each of 8 candidates, totalling 24 fits
  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m1.2236[0m       [32m0.3125[0m        [35m1.1511[0m  0.0107
      2        1.2396       0.3125        [35m1.1338[0m  0.0059
      3        [36m1.1766[0m       0.3125        [35m1.1173[0m  0.0057
      4        [36m1.1343[0m       0.3125        [35m1.1017[0m  0.0059
      5        [36m1.1150[0m       0.3125        [35m1.0892[0m  0.0051
      6        1.1278       0.3125        [35m1.0754[0m  0.0054
      7        [36m1.0504[0m       0.3125        [35m1.0586[0m  0.0044
      8        [36m1.0376[0m       [32m0.3750[0m        [35m1.0373[0m  0.0043
      9        1.0506       [32m0.6250[0m        [35m1.0153[0m  0.0048
     10        [36m1.0146[0m       [32m0.6875[0m        [35m0.9903[0m  0.0048
  epoch    train_loss    valid_acc    valid_loss     dur
-------  -

['best_iris_cnn_model.pkl']

In [40]:
## Why Baysian optimization?
# Bayesian optimization is a powerful technique for hyperparameter tuning that can find optimal parameters more efficiently than grid search or random search.
# It uses a probabilistic model to explore the hyperparameter space and can converge to good solutions with fewer evaluations.
# This is particularly useful when training deep learning models, where each evaluation can be computationally expensive.


# Initialize Bayesian optimization
from skopt import BayesSearchCV

opt = BayesSearchCV(
    model,
    {
        'lr': (0.01, 1.0, 'log-uniform'),
        'max_epochs': (10, 50),
        'batch_size': (8, 32)
    },
    n_iter=10,
    cv=3,
    verbose=1
)
# Fit the model with Bayesian optimization
opt.fit(X_train_tensor, y_train_tensor)
# Print the best parameters and score
print("Best parameters found: ", opt.best_params_)
print("Best score: ", opt.best_score_)
# Evaluate the best model on the test set
test_score_opt = opt.score(X_test_tensor, y_test_tensor)
print("Test score with Bayesian optimization: ", test_score_opt)
# Save the best model from Bayesian optimization

Fitting 3 folds for each of 1 candidates, totalling 3 fits
  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m4.2235[0m       [32m0.6250[0m        [35m1.3347[0m  0.0041
      2        [36m1.8845[0m       0.3750        [35m1.2442[0m  0.0026
      3        [36m1.1399[0m       [32m0.6875[0m        [35m0.8499[0m  0.0025
      4        1.5075       0.4375        0.9600  0.0024
      5        1.1781       0.3125        0.8755  0.0039
      6        [36m1.0868[0m       0.3125        0.9516  0.0037
      7        1.1489       0.3125        1.0991  0.0023
      8        1.1542       0.3125        1.1089  0.0023
      9        1.1023       0.3125        1.1004  0.0030
     10        1.1024       0.3125        1.0989  0.0025
     11        1.0991       0.3125        1.1009  0.0022
     12        1.0981       0.3125        1.1032  0.0022
  epoch    train_loss    valid_acc    valid_loss     dur
-----