Step 1: Hyperparameter Tuning and Model Training

In [1]:
# !pip install optuna
# !pip install scikit-learn
# !pip install flask

In [2]:
import optuna
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=11)

# Define the objective function for Optuna
def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 10, 200)
    max_depth = trial.suggest_int('max_depth', 2, 32)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 10)

    model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        random_state=42
    )
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return accuracy_score(y_test, y_pred)

# Run Optuna hyperparameter optimization
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

# Train the best model
best_params = study.best_params
print("Best parameters:", best_params)

best_model = RandomForestClassifier(**best_params, random_state=42)
best_model.fit(X_train, y_train)

# Evaluate the model
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Test accuracy:", accuracy)


  from .autonotebook import tqdm as notebook_tqdm
[I 2025-01-25 12:20:19,165] A new study created in memory with name: no-name-65aaadc0-9235-4272-b99f-1eb6eeb507b1
[I 2025-01-25 12:20:19,216] Trial 0 finished with value: 0.9210526315789473 and parameters: {'n_estimators': 107, 'max_depth': 4, 'min_samples_split': 5}. Best is trial 0 with value: 0.9210526315789473.
[I 2025-01-25 12:20:19,270] Trial 1 finished with value: 0.9210526315789473 and parameters: {'n_estimators': 123, 'max_depth': 12, 'min_samples_split': 7}. Best is trial 0 with value: 0.9210526315789473.
[I 2025-01-25 12:20:19,343] Trial 2 finished with value: 0.9210526315789473 and parameters: {'n_estimators': 168, 'max_depth': 26, 'min_samples_split': 6}. Best is trial 0 with value: 0.9210526315789473.
[I 2025-01-25 12:20:19,418] Trial 3 finished with value: 0.9210526315789473 and parameters: {'n_estimators': 171, 'max_depth': 8, 'min_samples_split': 10}. Best is trial 0 with value: 0.9210526315789473.
[I 2025-01-25 12:20:1

Best parameters: {'n_estimators': 107, 'max_depth': 4, 'min_samples_split': 5}
Test accuracy: 0.9210526315789473


Step 2: Save the Model

In [3]:
import joblib
joblib.dump(best_model, 'iris_model.pkl')

['iris_model.pkl']