# MLflow Tracking

The MLflow Tracking is an API and UI for logging parameters, code versions, metrics, and output files when running your machine learning code and for later visualizing the results.

## Concepts

![Taken from MLflow Docs](https://mlflow.org/docs/latest/_images/tracking-basics.png)

**Runs**

MLflow Tracking is organized around the concept of runs, which are executions of some piece of data science code, for example, a single python train.py execution.


**Experiments** 

An experiment groups together runs for a specific task. 




In [None]:
import mlflow
mlflow.login()

In [None]:
mlflow.get_tracking_uri()

# Create an experiment

To create an experiment in Databricks the name should be a path in the Workspace, example: /Shared/Users/...

In [None]:
experiment_name = "/Shared/Experiments/01 - Introduction to MLflow - 1"

The path must exist before creating the experiment. 

In [None]:
from mlflow_for_ml_dev.experiments.exp_utils import print_experiment_info

In [None]:
experiment_id = mlflow.create_experiment(name=experiment_name)
print(experiment_id)

In [None]:
# Set the experiment  "/Shared/Experiments/01 - Introduction to MLflow" as active experiment
experiment = mlflow.set_experiment(experiment_name)

In [None]:
# get the artifact location
experiment.artifact_location

In [None]:
# demo run

from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(n_estimators=10)

with mlflow.start_run(run_name="first-run") as run:
    mlflow.log_param("param1", 5)
    mlflow.sklearn.log_model(sk_model=rfc, artifact_path="sklearn-model")

## Specifying Artifact Location

In [None]:
experiment_name = "/Shared/Experiments/01 - Introduction to MLflow - 2"
experiment_id = mlflow.create_experiment(name=experiment_name, artifact_location="dbfs:/FileStore/mlflow-experiments")

In [None]:
# Set the experiment  "/Shared/Experiments/01 - Introduction to MLflow - 2" as active experiment
experiment = mlflow.set_experiment(experiment_name)

In [None]:
# get the artifact location
experiment.artifact_location

In [None]:
# demo run
from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(n_estimators=10)

with mlflow.start_run(run_name="first-run") as run:
    mlflow.log_param("param1", 5)
    mlflow.sklearn.log_model(sk_model=rfc, artifact_path="sklearn-model")

## Adding tags

In [None]:
experiment_name = "/Shared/Experiments/01 - Introduction to MLflow - 3"
experiment_id = mlflow.create_experiment(name=experiment_name, tags={"topic":"experiment_management", "project_name":"UNKNOWN"})

In [None]:
# Set the experiment  "/Shared/Experiments/01 - Introduction to MLflow - 3" as active experiment
experiment = mlflow.set_experiment(experiment_name)

In [None]:
# get the artifact location
experiment.artifact_location

In [None]:
# demo run
from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(n_estimators=10)

with mlflow.start_run(run_name="first-run") as run:
    mlflow.log_param("param1", 5)
    mlflow.sklearn.log_model(sk_model=rfc, artifact_path="sklearn-model")

In [None]:
# we can use tags to search a specific experiment
experiments = mlflow.search_experiments(filter_string="tags.topic = 'experiment_management'")

In [None]:
for experiment in experiments:
    print_experiment_info(experiment)

## Adding a description

In [None]:
experiment_name = "/Shared/Experiments/01 - Introduction to MLflow - 4"
experiment_id = mlflow.create_experiment(name=experiment_name, tags={"topic":"experiment_management", "project_name":"UNKNOWN", "mlflow.note.content":"This is a test experiment"})

In [None]:
# Set the experiment  "/Shared/Experiments/01 - Introduction to MLflow - 4" as active experiment
experiment = mlflow.set_experiment(experiment_name)

## Update Tags

In [None]:
tags = {
    "tag1": "value1",
    "tag2": "value2"
}
mlflow.set_experiment_tags(tags=tags)


In [None]:
experiments = mlflow.search_experiments(filter_string="tags.tag1 = 'value1'")
for experiment in experiments:
    print_experiment_info(experiment)

In [None]:
# Update Value of tag1
mlflow.set_experiment_tag(key="tag1", value="new_value1")

In [None]:
# now works only if we provide the new value "new_value1"
experiments = mlflow.search_experiments(filter_string="tags.tag1 = 'new_value1'")
for experiment in experiments:
    print_experiment_info(experiment)

## Using the client to set a tag

In [None]:
client = mlflow.MlflowClient()

In [None]:
client.set_experiment_tag(experiment_id = experiment.experiment_id, key="tag3", value="value3")

In [None]:
# Check if the tag is deleted
experiments = mlflow.search_experiments(filter_string="tags.tag3 = 'value3'")
for experiment in experiments:
    print_experiment_info(experiment)

## Rename Experiment

In [None]:
new_name = "/Shared/Experiments/01 - Introduction to MLflow - 4 - Renamed"
client.rename_experiment(experiment_id = experiment.experiment_id, new_name=new_name)

## Clean Up

In [None]:
experiments = mlflow.search_experiments(filter_string="name LIKE '/Shared/Experiments%'")
# experiments = mlflow.search_experiments()
for experiment in experiments:
    print(f"Deleting: {experiment.name}")
    # mlflow.delete_experiment(experiment.experiment_id)
