In [1]:
import mlflow
import mlflow.sklearn
import optuna
from optuna.integration.mlflow import MLflowCallback
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [2]:
# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [5]:
# Initialize MLflow
mlflow.set_experiment("Iris SVM Classification")

def objective(trial):
    # Define the hyperparameters search space
    kernel = trial.suggest_categorical('kernel', ['linear', 'rbf', 'poly', 'sigmoid'])
    C = trial.suggest_loguniform('C', 1e-5, 1e2)
    
    # Train an SVM model with the suggested hyperparameters
    svm_model = SVC(kernel=kernel, C=C, random_state=42)
    svm_model.fit(X_train, y_train)

    # Predict on the test set
    y_pred = svm_model.predict(X_test)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    
    # Log accuracy as the optimization target
    return accuracy

# Create an MLflow callback for Optuna
mlflc = MLflowCallback(
    tracking_uri=mlflow.get_tracking_uri(),
    metric_name="accuracy"
)

# Run Optuna optimization
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50, callbacks=[mlflc])

# Get the best trial
best_trial = study.best_trial
best_params = best_trial.params

# Train the best model using the best parameters
with mlflow.start_run():
    best_svm_model = SVC(**best_params, random_state=42)
    best_svm_model.fit(X_train, y_train)

    # Predict on the test set
    y_pred = best_svm_model.predict(X_test)

    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    class_report = classification_report(y_test, y_pred, target_names=iris.target_names)
    conf_matrix = confusion_matrix(y_test, y_pred)

    # Log metrics to MLflow
    mlflow.log_param("kernel", best_params['kernel'])
    mlflow.log_param("C", best_params['C'])
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_text(class_report, "classification_report.txt")
    mlflow.log_artifact("classification_report.txt")
    
    # Log the model
    mlflow.sklearn.log_model(best_svm_model, "svm_model")
    mlflow.sklearn.save_model(best_svm_model, path="svm_model")

    # Print the classification report and confusion matrix
    print(f"Best Kernel: {best_params['kernel']}")
    print(f"Best C: {best_params['C']}")
    print(f"Accuracy: {accuracy}")
    print(f"Classification Report:\n{class_report}")
    print(f"Confusion Matrix:\n{conf_matrix}")


  mlflc = MLflowCallback(
[I 2024-08-13 08:50:21,866] A new study created in memory with name: no-name-4eea528c-79c5-4129-887d-000cac176c85
  C = trial.suggest_loguniform('C', 1e-5, 1e2)
[I 2024-08-13 08:50:21,872] Trial 0 finished with value: 0.5333333333333333 and parameters: {'kernel': 'linear', 'C': 1.2350035029047487e-05}. Best is trial 0 with value: 0.5333333333333333.
2024/08/13 08:50:21 INFO mlflow.tracking.fluent: Experiment with name 'no-name-4eea528c-79c5-4129-887d-000cac176c85' does not exist. Creating a new experiment.
  C = trial.suggest_loguniform('C', 1e-5, 1e2)
[I 2024-08-13 08:50:21,894] Trial 1 finished with value: 1.0 and parameters: {'kernel': 'poly', 'C': 0.15580216291777882}. Best is trial 1 with value: 1.0.
  C = trial.suggest_loguniform('C', 1e-5, 1e2)
[I 2024-08-13 08:50:21,908] Trial 2 finished with value: 1.0 and parameters: {'kernel': 'poly', 'C': 0.01613627866824646}. Best is trial 1 with value: 1.0.
  C = trial.suggest_loguniform('C', 1e-5, 1e2)
[I 2024-0

Best Kernel: poly
Best C: 0.15580216291777882
Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        19
  versicolor       1.00      1.00      1.00        13
   virginica       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

Confusion Matrix:
[[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]
