In [None]:
# ! pip install flowcept[mlflow]

### Set the env var pointing to the conf file where the ports, hostnames, and other conf variables are read from.

There is an exemplary conf file available in the `resources` directory in FlowCept repository. You can use it as is if running this Notebook on your local laptop.

In [None]:
SQLITE_PATH = 'mlflow.db'

In [None]:
## This cell Resets MLFlow Database
! rm -f {SQLITE_PATH}
! rm -rf mlruns
import mlflow
mlflow.set_tracking_uri(f"sqlite:///{SQLITE_PATH}")
mlflow.delete_experiment(mlflow.create_experiment('starter'))

In [None]:
# Sleeps are used because these notebooks are being tested automatically as part of the CI/CD. 
# In a normal user interaction, these sleeps would not be necessary.
from time import sleep

## Define a simple MLFlow workflow

In [None]:
def run_mlflow_workflow(sqlite_path=SQLITE_PATH, batch_size=64, epochs=10):
    import uuid
    import mlflow

    mlflow.set_tracking_uri(
        f"sqlite:///" f"{sqlite_path}"
    )
    experiment_name = "LinearRegression"
    experiment_id = mlflow.create_experiment(
        experiment_name + str(uuid.uuid4())
    )
    with mlflow.start_run(experiment_id=experiment_id) as run:
        mlflow.log_params({"epochs": epochs})
        mlflow.log_params({"batch_size": batch_size})
        # Actual training code would go here
        print("Generated training metadata.")
        mlflow.log_metric("loss", 0.04)
        return run.info.run_id

## Initialize consumer API

In [None]:
from flowcept import Flowcept
flowcept = Flowcept("mlflow")
flowcept.start()

## Run MLFlow workflow

In [None]:
mlflow_run_id = run_mlflow_workflow(SQLITE_PATH, batch_size=18)
print(f"MLflow task id={mlflow_run_id}")

In [None]:
sleep(10)

## Initialize Query API

In [None]:
from flowcept import TaskQueryAPI
query_api = TaskQueryAPI()

## Query the task executed

In [None]:
_filter = {"task_id": mlflow_run_id}
query_api.query(_filter)

# Inspect more tasks through the query api

In [None]:
from flowcept.commons.utils import get_utc_minutes_ago

### Get the tasks executed in my experiment in the last 60 minutes

This example assumes that you have run the Dask notebook example before. If you haven't run it, just ignore these queries.

In [None]:
_filter = {
    "utc_timestamp": { "$gte" : get_utc_minutes_ago(60) },
    "campaign_id": "super_campaign"
}
query_api.query(_filter)

### Get the tasks executed in my experiment in the last 60 minutes that were executed using mlflow and dask

In [None]:
_filter = {
    "utc_timestamp": { "$gte" : get_utc_minutes_ago(60) },
    "campaign_id": "super_campaign",
    "adapter_id": {"$in": ['mlflow', 'dask']}
}
docs = query_api.query(_filter)
docs

### Get the tasks executed in my experiment in the last 60 minutes that generated a batch_size > 0

In [None]:
_filter = {
    "utc_timestamp": { "$gte" : get_utc_minutes_ago(120) },
    "campaign_id": "super_campaign",
    "adapter_id": 'dask',
    "generated.batch_size": { "$gte" : 0 }
}
docs = query_api.query(filter=_filter)
batch_sizes = []
for doc in docs:
    print(f"task={doc['task_id']}, generated batch_size={doc['generated']['batch_size']}")
    batch_sizes.append(doc['generated']['batch_size'])
batch_sizes

### Now run a new MLFlow task using the batch_sizes generated by the Dask workflow

In [None]:
batch_sizes = batch_sizes if len(batch_sizes) else [32]  # To use this if you haven't executed the Dask workflow first

In [None]:
for batch_size in batch_sizes:
    mlflow_task = run_mlflow_workflow(batch_size=batch_size)
    print(mlflow_task)

In [None]:
sleep(15)

### Get these tasks

In [None]:
_filter = {
    "task_id": mlflow_task
}
docs = query_api.query(filter=_filter)
docs

In [None]:
assert len(docs)

## Stop consumers

In [None]:
flowcept.stop()