# Hyperparameter Tuning with Optuna and MLflow

This is a minimal notebook demonstrating hyperparameter tuning with Optuna in MLflow.

In [None]:
# Import common libraries
import optuna
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

import mlflow

In [None]:
# Load data
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Define the objective function for Optuna
def objective(trial):
    with mlflow.start_run(nested=True):
        # Define hyperparameters to search
        n_estimators = trial.suggest_int("n_estimators", 10, 100)
        max_depth = trial.suggest_int("max_depth", 5, 30)
        min_samples_split = trial.suggest_int("min_samples_split", 2, 10)

        # Log parameters
        mlflow.log_param("n_estimators", n_estimators)
        mlflow.log_param("max_depth", max_depth)
        mlflow.log_param("min_samples_split", min_samples_split)

        # Train model
        model = RandomForestClassifier(
            n_estimators=n_estimators,
            max_depth=max_depth,
            min_samples_split=min_samples_split,
            random_state=42,
        )
        model.fit(X_train, y_train)

        # Evaluate and log metrics
        accuracy = model.score(X_test, y_test)
        mlflow.log_metric("accuracy", accuracy)

        return accuracy

In [None]:
# Run Optuna optimization
with mlflow.start_run(run_name="optuna_parent_run"):
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=10)

    # Log best parameters and performance in parent run
    best_params = study.best_params
    for param_name, param_value in best_params.items():
        mlflow.log_param(f"best_{param_name}", param_value)

    mlflow.log_metric("best_accuracy", study.best_value)