In [1]:
import mlflow

In [2]:
experiment_name = "evaluation"

In [3]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import pandas as pd

In [8]:
X, y = make_classification(n_samples=1000, n_features=4, random_state = 3, scale = 10, n_informative = 3, n_redundant = 1)
X = pd.DataFrame(X, columns=[f"f_{i}" for i in range(X.shape[1])])
y = pd.Series(y, name="target")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3)
feature_names = X.columns.tolist()

eval_data = X_test.copy()
eval_data["target"] = y_test

In [None]:
rfc = RandomForestClassifier()

rfc.fit(X_train, y_train)

artifact_path = "random-forest-classifier"

with mlflow.start_run(run_name = "evaluating_rfc") as run:
    mlflow.sklearn.log_model(rfc, artifact_path, registered_model_name = "random-forest-classifier")


    result = mlflow.evaluate(
        model = f"runs:/{run.info.run_id}/{artifact_path}",
        data = eval_data,
        model_type = "classifier",
        targets = "target",
        feature_names = feature_names,
    )


Registered model 'random-forest-classifier' already exists. Creating a new version of this model...
Created version '2' of model 'random-forest-classifier'.
2025/09/13 18:37:21 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2025/09/13 18:37:21 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2025/09/13 18:37:28 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


In [12]:
eval_data.head()

Unnamed: 0,f_0,f_1,f_2,f_3,target
642,13.390612,16.589707,-22.432819,13.999437,1
762,13.403873,-0.407266,14.633971,-3.26862,0
909,16.805758,13.792503,-21.213979,-8.215804,0
199,6.071158,-2.771887,10.972801,-6.992161,0
586,12.273573,8.164306,-12.881873,-11.386556,0


Evaluating Multiple Models

In [14]:
estimators = [2,5,10,20,30,50]
rfc_models = {}

for n_estimator in estimators:
    rfc = RandomForestClassifier(n_estimators=n_estimator)
    rfc.fit(X_train, y_train)
    rfc_models[str(n_estimator)] = rfc
    print(f"{rfc_models}")

{'2': RandomForestClassifier(n_estimators=2)}
{'2': RandomForestClassifier(n_estimators=2), '5': RandomForestClassifier(n_estimators=5)}
{'2': RandomForestClassifier(n_estimators=2), '5': RandomForestClassifier(n_estimators=5), '10': RandomForestClassifier(n_estimators=10)}
{'2': RandomForestClassifier(n_estimators=2), '5': RandomForestClassifier(n_estimators=5), '10': RandomForestClassifier(n_estimators=10), '20': RandomForestClassifier(n_estimators=20)}
{'2': RandomForestClassifier(n_estimators=2), '5': RandomForestClassifier(n_estimators=5), '10': RandomForestClassifier(n_estimators=10), '20': RandomForestClassifier(n_estimators=20), '30': RandomForestClassifier(n_estimators=30)}
{'2': RandomForestClassifier(n_estimators=2), '5': RandomForestClassifier(n_estimators=5), '10': RandomForestClassifier(n_estimators=10), '20': RandomForestClassifier(n_estimators=20), '30': RandomForestClassifier(n_estimators=30), '50': RandomForestClassifier(n_estimators=50)}


In [None]:
estimators = [2, 5, 10, 20, 30, 50]
rfc_models = {}

for n_estimators in estimators:
    rfc = RandomForestClassifier(n_estimators=n_estimators)
    rfc.fit(X_train, y_train)
    rfc_models[f"{n_estimators}"] = rfc
    print(f"Trained {n_estimators} trees.")

with mlflow.start_run(run_name="evaluating-multiple-models") as parent_run:

    for n_estimators, model in rfc_models.items():
        with mlflow.start_run(
            run_name=f"rfc_{n_estimators}", 
            parent_run_id=parent_run.info.run_id, 
            nested=True
        ) as child_run:
            artifact_path = "random-forest-model"
            mlflow.sklearn.log_model(model, artifact_path)
            mlflow.log_param("n_estimators", int(n_estimators))

            result = mlflow.evaluate(
                model=f"runs:/{child_run.info.run_id}/{artifact_path}",
                data=eval_data,
                model_type="classifier",
                targets="target",
                feature_names=feature_names,
            )

Trained 2 trees.
Trained 5 trees.
Trained 10 trees.
Trained 20 trees.
Trained 30 trees.
Trained 50 trees.


2025/09/13 21:13:11 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2025/09/13 21:13:11 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2025/09/13 21:13:14 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.
2025/09/13 21:13:26 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2025/09/13 21:13:26 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2025/09/13 21:13:29 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.
2025/09/13 21:13:43 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2025/09/13 21:13:43 INFO mlflow.models.evaluation.default_evaluator: Testing metr