In [35]:
import warnings

import mlflow
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

warnings.filterwarnings('ignore')

In [36]:
X, y = datasets.load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

#### in a models array, define different models wih their different respective parameter values and the train and test dataset to get heir performances

In [37]:
models = [
    (
        "Logistic Regression_liblinear",
        {"C": 1, "solver": 'liblinear'},
        LogisticRegression(),
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        "Random Forest_30",
        {"n_estimators": 30, "max_depth": 3},
        RandomForestClassifier(),
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        "Logistic Regression_lbfgs",
        {"max_iter": 100, "solver": 'lbfgs'},
        LogisticRegression(),
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        "Random Forest_100",
        {"n_estimators": 100, "max_depth": 7},
        RandomForestClassifier(),
        (X_train, y_train),
        (X_test, y_test)
    ),
]

#### Loop through the models array and the the classification report for each of those models. The classification report reports detailed precision and recall for individual classes too.

In [38]:
reports = []
for model_name, params, model, train_set, test_set in models:
    X_train = train_set[0]
    y_train = train_set[1]
    X_test = test_set[0]
    y_test = test_set[1]

    model.set_params(**params)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    report = classification_report(y_test, y_pred, output_dict=True)
    reports.append(report)

#### Set a new experiment in mlflow and then log the relevant hings such as model name, parameters and accuracy, recall for the individual classes and f1-score as well.

In [39]:
mlflow.set_experiment("Model Management Quickstart")

for i, element in enumerate(models):
    model_name = element[0]
    params = element[1]
    model = element[2]
    report = reports[i]

    with mlflow.start_run(run_name=model_name):
        mlflow.log_params(params)
        mlflow.log_metrics({
            'accuracy': report['accuracy'],
            'recall_class_1': report['1']['recall'],
            'recall_class_0': report['0']['recall'],
            'f1_score_macro': report['macro avg']['f1-score']
        })

        mlflow.sklearn.log_model(model, model_name)

#### Once all the models are logged, programatically load the best performing model

In [40]:
top_models = mlflow.search_logged_models(
    experiment_ids=["3"],
    filter_string="metrics.accuracy> 0.97",
    order_by=[{"field_name": "metrics.accuracy", "ascending": False}],
    max_results=3,
)

best_model = top_models.iloc[0]
print(best_model)

#### Register the best performing model and name is "best iris model". Once that is done, you should be able to see this model on MLFlow-->Models as registered.

In [41]:
registered = mlflow.register_model(
    model_uri=f"models:/{best_model.model_id}",
    name="best_iris_model",
)
print("registered version:", registered.version)

Add tags and aliases to this registered model. Champion means this model is the one deployed. Challenger means these are models tha are caandidates to replace the champion. Once these aliases are defined and given, loading them gets even easier.

In [42]:
client = mlflow.MlflowClient()

client.update_model_version(
    name="best_iris_model",
    version=registered.version,
    description=(
        "Champion model selected via mlflow.search_logged_models, "
        f"run_id={best_model.source_run_id}, metric=accuracy."
    ),
)

client.set_model_version_tag(
    name="best_iris_model",
    version=registered.version,
    key="role",
    value="champion",
)
client.set_model_version_tag(
    name="best_iris_model",
    version=registered.version,
    key="source_experiment_id",

    value=best_model.experiment_id,
)

client.set_model_version_tag(
    name="best_iris_model",
    version=registered.version,
    key="run_id",
    value=best_model.source_run_id,
)

# 4. Assign an alias (e.g., champion) to this version
client.set_registered_model_alias(
    name="best_iris_model",
    alias="champion",
    version=registered.version,
)

#### You can just load the champion model for the best iris model and test

In [43]:
# champion_model = mlflow.sklearn.load_model(
#     model_uri="models:/best_iris_model@champion"
# )
# print(champion_model.get_params())

#### Sanity check: if the loaded champion model was indeed the top model thata we found before by checking the corresponding run ids.

In [44]:
# champion_model = client.get_model_version_by_alias(
#     name="best_iris_model",
#     alias="champion"
# )
# print(champion_model.run_id)