# MLflow AutoLogging

Enables (or disables) and configures autologging for scikit-learn estimators.

>```python
>mlflow.sklearn.autolog(log_input_examples=False, log_model_signatures=True, log_models=True, log_datasets=True, disable=False, exclusive=False, disable_for_unsupported_versions=False, >silent=False, max_tuning_runs=5, log_post_training_metrics=True, serialization_format='cloudpickle', registered_model_name=None, pos_label=None, extra_tags=None)


In [None]:
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from pprint import pprint
import mlflow 

from mlflow_for_ml_dev.experiments.exp_utils import get_or_create_experiment

In [None]:
# create experiment
experiment_name = "sklearn_autologging"
experiment = get_or_create_experiment(
    experiment_name = experiment_name,
    tags = {
        "proejct_name": "UNDEFINED",
        "topic":"run_management",
        "mlflow.note.content": "This experiment is used to test the autologging feature of mlflow."
    }
)

In [None]:
# enable autologging
mlflow.sklearn.autolog()

# load data
iris = load_iris(as_frame=True)
X = iris.data
y = iris.target

# train model
rf = RandomForestClassifier()
with mlflow.start_run(run_name="autologged_run") as run:
    print(f"MLflow run_id: {run.info.run_id}")
    rf.fit(X, y)

In [None]:
# get run and fecth logged data
run_id = run.info.run_id
run = mlflow.get_run(run_id)
pprint(run.data.params)


In [None]:
run.data.metrics

In [None]:
run.data.tags

In [None]:
model_artifacts = [f.path for f in mlflow.MlflowClient().list_artifacts(run_id, "model")]
run_artifacts = [f.path for f in mlflow.MlflowClient().list_artifacts(run_id)]
print("Model artifacts:")
pprint(model_artifacts)
print("All artifacts:")
pprint(run_artifacts)

## Logging Input examples

In [None]:
# enable autologging
mlflow.sklearn.autolog(log_input_examples=True)

# load data
iris = load_iris(as_frame=True)
X = iris.data
y = iris.target

# train model
rf = RandomForestClassifier()
with mlflow.start_run(run_name="autologged_run") as run:
    print(f"MLflow run_id: {run.info.run_id}")
    rf.fit(X, y)

In [None]:
# enable autologging
mlflow.sklearn.autolog(log_input_examples=True)

# Autologging must be enabled before scikit-learn metric
# APIs are imported from sklearn.metrics. Metric APIs imported before autologging 
# is enabled do not log metrics to MLflow runs.

from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

# load data
iris = load_iris(as_frame=True)
X = iris.data
y = iris.target

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# train model
rf = RandomForestClassifier()
with mlflow.start_run(run_name="autologged_run") as run:
    print(f"MLflow run_id: {run.info.run_id}")
    rf.fit(x_train, y_train)

    # predictions
    y_pred = rf.predict(x_test)
    balanced_accuracy = balanced_accuracy_score(y_test, y_pred)
    print(f"Balanced accuracy: {balanced_accuracy}")

    # accuracy
    accuracy = accuracy_score(y_test, y_pred)

    # recall
    recall = recall_score(y_test, y_pred, average="weighted")

    # f1
    f1 = f1_score(y_test, y_pred, average="weighted")

: 