# Example: Iris classifier

In [None]:
#Load packages
import os
import mlflow
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import classification_report
from sklearn import tree
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from mlflow_for_ml_dev.experiments.exp_utils import get_or_create_experiment

## Loading and Exploring Dataset

In [None]:
## Load data
data = load_iris(as_frame=True)


In [None]:
data.data

In [None]:
data.target

In [None]:
# split dataset into training and testing data
x_train, x_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)

## Modelling

In [None]:
experiment = get_or_create_experiment(
    experiment_name="iris",
    tags={
        "mlflow.note.content": "iris dataset classification experiment",
        "project_name":"iris_classifier",
        "task": "classification",
        "topic": "run_management",
    }
)

In [None]:
# start mlflow run
with mlflow.start_run(run_name = "iris_classifier", experiment_id=experiment.experiment_id) as run:

    print("Run ID:", run.info.run_id)
    #create instance of random forest classifier
    rfc = RandomForestClassifier()

    # train model
    rfc.fit(x_train, y_train)

    # predict
    y_pred = rfc.predict(x_test)
    classification_report_ = classification_report(y_test, y_pred, output_dict=True)
    accuracy_score = balanced_accuracy_score(y_test, y_pred)
    confusion_matrix = ConfusionMatrixDisplay.from_estimator(rfc, x_test, y_test)

    # log metrics as json file
    mlflow.log_dict(classification_report_, "files/classification_report.json")
    
    # log metrics
    mlflow.log_metric("accuracy", accuracy_score)

    # log figure 
    mlflow.log_figure(confusion_matrix.figure_, "confusion_matrix.png")

    # log params
    mlflow.log_params(rfc.get_params())

    # log model
    mlflow.sklearn.log_model(sk_model=rfc, artifact_path="model", metadata={"some_key": "some_value"})

**Get plot of first estimators**

In [None]:
# code taken from this post https://stackoverflow.com/questions/40155128/plot-trees-for-a-random-forest-in-python-with-scikit-learn
fn = data.feature_names
cn = data.target_names
fig, axes = plt.subplots(nrows = 1,ncols = 5,figsize = (10,2), dpi=900)
for index in range(0, 5):
    tree.plot_tree(rfc.estimators_[index],
                   feature_names = fn, 
                   class_names=cn,
                   filled = True,
                   ax = axes[index])

    axes[index].set_title('Estimator: ' + str(index), fontsize = 11)


# create directory for artifacts
os.makedirs('./artifacts_example', exist_ok=True)

# save figure
fig.savefig('./artifacts_example/rf_5trees.png')

**Get feature Importance Plot**

In [None]:
importances = rfc.feature_importances_
std = np.std([tree.feature_importances_ for tree in rfc.estimators_], axis=0)
forest_importances = pd.Series(importances, index=fn)

fig, ax = plt.subplots()
forest_importances.plot.bar(yerr=std, ax=ax)
ax.set_title("Feature importances using MDI")
ax.set_ylabel("Mean decrease in impurity")
fig.tight_layout()

# save figure
fig.savefig("./artifacts_example/feature_importance.png")

In [None]:
x_test["target"] = y_test
x_test["prediction"] = y_pred

# save predictions
x_test.to_csv("./artifacts_example/predictions.csv", index=False)

In [None]:
# log artifacts
with mlflow.start_run(run_id=run.info.run_id, experiment_id=experiment.experiment_id):

    # Log individual artifacts
    mlflow.log_artifact(local_path = "./artifacts_example/rf_5trees.png", artifact_path="artifacts")
    mlflow.log_artifact(local_path = "./artifacts_example/feature_importance.png", artifact_path="artifacts")
    mlflow.log_artifact(local_path = "./artifacts_example/predictions.csv", artifact_path="artifacts")

    # Log all artifacts in a directory
    mlflow.log_artifacts(local_dir = "./artifacts_example", artifact_path="all_artifacts")

: 