# RandomForestRegressor Scikit-learn Model Example
This example trains an RandomForestRegressor with the diabetes dataset and logs hyperparameters, metrics, and trained model.

Like the other RandomForestRegressor example, we enable autologging for RandomForestRegressor scikit-learn models via mlflow.RandomForestRegressor.autolog(). Saving / loading models also supports RandomForestRegressor scikit-learn models.


# Get Started 

# Reference at https://github.com/mlflow/mlflow/tree/master/examples
# Quickstart : https://mlflow.org/docs/latest/quickstart.html


In [1]:
pip install mlflow

Collecting urllib3>=1.26.0
  Downloading urllib3-2.0.3-py3-none-any.whl (123 kB)
[K     |████████████████████████████████| 123 kB 1.0 MB/s eta 0:00:01
Installing collected packages: urllib3
  Attempting uninstall: urllib3
    Found existing installation: urllib3 1.25.11
    Uninstalling urllib3-1.25.11:
      Successfully uninstalled urllib3-1.25.11
[31mERROR: After October 2020 you may experience errors when installing or updating packages. This is because pip will change the way that it resolves dependency conflicts.

We recommend you use --use-feature=2020-resolver to test your packages with the new resolver before it becomes the default.

requests 2.24.0 requires urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1, but you'll have urllib3 2.0.3 which is incompatible.
docker 6.1.3 requires requests>=2.26.0, but you'll have requests 2.24.0 which is incompatible.
databricks-cli 0.17.7 requires urllib3<2.0.0,>=1.26.7, but you'll have urllib3 2.0.3 which is incompatible.
botocore 1.27.63 requires

In [19]:
import mlflow
from mlflow.models.signature import infer_signature

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import (
    f1_score,
    accuracy_score,
    recall_score,
    precision_score,
    ConfusionMatrixDisplay,
    confusion_matrix,
    roc_auc_score,
    RocCurveDisplay,
    classification_report,
)
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

mlflow.autolog()


def eval_metrics(actual, pred):
        rmse = np.sqrt(mean_squared_error(actual, pred))
        mae = mean_absolute_error(actual, pred)
        r2 = r2_score(actual, pred)
        return rmse, mae, r2

def accuracy_score(model, X_train,y_train): 
    accuracy_score = model.score(X_train, y_train)
    return accuracy_score


with mlflow.start_run() as run:

    db = load_diabetes()
    X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

    # Create and train models.
    rf = RandomForestRegressor(n_estimators=100, max_depth=6, max_features=3)
    rf.fit(X_train, y_train)

    # Use the model to make predictions on the test dataset.
    # Evaluate Metrics    
    predictions = rf.predict(X_test)
    (rmse, mae, r2) = eval_metrics(y_test, predictions)
    accuracy_score = accuracy_score(rf, X_train, y_train)

    
    
    # Infer model signature
    signature = infer_signature(X_test, predictions)
    mlflow.sklearn.log_model(rf, "model", signature=signature)

    print("Run ID: {}".format(run.info.run_id))

    # Print out metrics
    print("  Accuracy Score: %s" % accuracy_score)
    print("  RMSE: %s" % rmse)
    print("  MAE: %s" % mae)
    print("  R2: %s" % r2)

    # Log metrics, and model to MLflow
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2", r2)
    mlflow.log_metric("mae", mae)
    mlflow.log_metric("accuracy_score", accuracy_score)

    mlflow.sklearn.log_model(rf, "model", signature=signature)

2023/07/02 00:35:54 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2023/07/02 00:35:54 INFO mlflow.tracking.fluent: Autologging successfully enabled for pyspark.


Run ID: 12bd191f0a5c4b1cbe2aefc5b1401a8b
  Accuracy Score: 0.7814019153641414
  RMSE: 56.47667079630999
  MAE: 45.18754060232396
  R2: 0.44017027450939816


## Train.py

In [26]:
import mlflow.sklearn
# from utils import fetch_logged_data
from sklearn.metrics import f1_score

from mlflow.tracking import MlflowClient


def yield_artifacts(run_id, path=None):
    """Yield all artifacts in the specified run"""
    client = MlflowClient()
    for item in client.list_artifacts(run_id, path):
        if item.is_dir:
            yield from yield_artifacts(run_id, item.path)
        else:
            yield item.path


def fetch_logged_data(run_id):
    """Fetch params, metrics, tags, and artifacts in the specified run"""
    client = MlflowClient()
    data = client.get_run(run_id).data
    # Exclude system tags: https://www.mlflow.org/docs/latest/tracking.html#system-tags
    tags = {k: v for k, v in data.tags.items() if not k.startswith("mlflow.")}
    artifacts = list(yield_artifacts(run_id))
    return {
        "params": data.params,
        "metrics": data.metrics,
        "tags": tags,
        "artifacts": artifacts,
    }

if __name__ == "__main__":
    score = rf.score(X_train, y_train)
    print("Score: %s" % score)
    mlflow.log_metric("score", score)
    predictions = rf.predict(X_test)
    # # Infer model signature
    signature = infer_signature(X_test, predictions)
    mlflow.sklearn.log_model(rf, "model", signature=signature)
    print("Model saved in run %s" % mlflow.active_run().info.run_uuid)

    run_id = mlflow.last_active_run().info.run_id
    print("Logged data and model in run {}".format(run_id))

    # show logged data
    for key, data in fetch_logged_data(run_id).items():
        print("\n---------- logged {} ----------".format(key))
        print(data)



Score: 0.7814019153641414
Model saved in run b1032f2a8e424903bb7df7f522962a26
Logged data and model in run b1032f2a8e424903bb7df7f522962a26

---------- logged params ----------
{}

---------- logged metrics ----------
{'score': 0.7814019153641414}

---------- logged tags ----------
{}

---------- logged artifacts ----------
['model/MLmodel', 'model/conda.yaml', 'model/model.pkl', 'model/python_env.yaml', 'model/requirements.txt']


In [27]:
# end the current run
mlflow.end_run()

In [28]:
mlflow ui

SyntaxError: invalid syntax (3574192917.py, line 1)