# Run workflow using MLFlow

The workflow consists of an ETL and a Train part.

In [1]:
# Add the root of the repo to the sys path
import sys

sys.path[0] = "../"

In [2]:
import os
import mlflow
import core.mlflow_helper as mhelper
from core.util import load_config
from core.pandas_helper import load_pandas_df

In [3]:
# Setup the remote tracking server
general_conf = load_config("../workflow_conf.yaml", "general")
mhelper.set_remote_tracking(general_conf["experiment_name"])

### Run ETL

In [5]:
mlflow.run(
    "../",
    entry_point="etl",
    env_manager="local",
    parameters={},
)

2022/08/04 06:14:20 INFO mlflow.projects.docker: === Building docker image bolig_market ===
2022/08/04 06:14:31 INFO mlflow.projects.utils: === Created directory /tmp/tmpor7_5viz for downloading remote URIs passed to arguments of type 'path' ===
2022/08/04 06:14:31 INFO mlflow.projects.backend.local: === Running command 'docker run --rm -v /home/subu/.aws:/.aws -e MLFLOW_RUN_ID=dafdd3618c81490d8a55d027a219aa26 -e MLFLOW_TRACKING_URI=http://10.100.55.198:5000 -e MLFLOW_EXPERIMENT_ID=5 docker.io/library/bolig_market:latest python workflow_runner.py etl --no-run-on-delta --config-grid-index -1' in run with ID 'dafdd3618c81490d8a55d027a219aa26' === 


Getting all URLs
Loading URL content


100%|██████████| 116/116 [00:00<00:00, 347279.99it/s]
100%|██████████| 116/116 [01:33<00:00,  1.24it/s]
[2022-08-04 06:16:10,533] [INFO] [botocore.credentials]: Found credentials in shared credentials file: ~/.aws/credentials


Error occured in URL https://www.boligsiden.dk/postnummer/1500/solgte/ejerlejlighed?sortAscending=true&registrationTypes=normal&areaMin=20&areaMax=30&yearSoldFrom=1994&yearSoldTo=1995
Error occured in URL https://www.boligsiden.dk/postnummer/1500/solgte/ejerlejlighed?sortAscending=true&registrationTypes=normal&areaMin=30&areaMax=40&yearSoldFrom=2003&yearSoldTo=2004


2022/08/04 06:16:11 INFO mlflow.projects: === Run (ID 'dafdd3618c81490d8a55d027a219aa26') succeeded ===


<mlflow.projects.submitted_run.LocalSubmittedRun at 0x7f9cc29aad70>

### Run training

In [6]:
mlflow.run(
    "../",
    entry_point="train",
    parameters={"input_data_run_id_arg":"dafdd3618c81490d8a55d027a219aa26"},
)

2022/08/04 06:21:38 INFO mlflow.projects.docker: === Building docker image bolig_market ===
2022/08/04 06:21:49 INFO mlflow.projects.utils: === Created directory /tmp/tmpdvebfq6n for downloading remote URIs passed to arguments of type 'path' ===
2022/08/04 06:21:49 INFO mlflow.projects.backend.local: === Running command 'docker run --rm -v /home/subu/.aws:/.aws -e MLFLOW_RUN_ID=02c1d441233e4a449a470c27707daa75 -e MLFLOW_TRACKING_URI=http://10.100.55.198:5000 -e MLFLOW_EXPERIMENT_ID=5 docker.io/library/bolig_market:latest python workflow_runner.py train dafdd3618c81490d8a55d027a219aa26 --config-grid-index -1' in run with ID '02c1d441233e4a449a470c27707daa75' === 
[2022-08-04 06:21:53,043] [INFO] [core.worker]: Starting MLFlow experiment for training
[2022-08-04 06:21:53,845] [INFO] [botocore.credentials]: Found credentials in shared credentials file: ~/.aws/credentials
2022/08/04 06:22:01 INFO mlflow.projects: === Run (ID '02c1d441233e4a449a470c27707daa75') succeeded ===


<mlflow.projects.submitted_run.LocalSubmittedRun at 0x7f9cc29aae60>

### Scoring

In [19]:
mlflow.run(
    "../",
    entry_point="predict",
    parameters={
        "input_data_run_id_arg":"dafdd3618c81490d8a55d027a219aa26",
        "model_run_id_arg": "02c1d441233e4a449a470c27707daa75"},
)

2022/08/04 07:23:06 INFO mlflow.projects.docker: === Building docker image bolig_market ===
2022/08/04 07:23:17 INFO mlflow.projects.utils: === Created directory /tmp/tmpy3popnz_ for downloading remote URIs passed to arguments of type 'path' ===
2022/08/04 07:23:17 INFO mlflow.projects.backend.local: === Running command 'docker run --rm -v /home/subu/.aws:/.aws -e MLFLOW_RUN_ID=390d1371c689478e9c7f68d43c323b45 -e MLFLOW_TRACKING_URI=http://10.100.55.198:5000 -e MLFLOW_EXPERIMENT_ID=5 docker.io/library/bolig_market:latest python workflow_runner.py predict dafdd3618c81490d8a55d027a219aa26 02c1d441233e4a449a470c27707daa75' in run with ID '390d1371c689478e9c7f68d43c323b45' === 
2022/08/04 07:23:22 INFO mlflow.projects: === Run (ID '390d1371c689478e9c7f68d43c323b45') succeeded ===


<mlflow.projects.submitted_run.LocalSubmittedRun at 0x7f3fdb9ddb10>

In [20]:
load_pandas_df("390d1371c689478e9c7f68d43c323b45")

Unnamed: 0,prediction
0,19273.071187
1,19273.071187
2,19273.071187
3,26151.157935
4,26151.157935
...,...
921,21081.307287
922,21081.307287
923,26421.855311
924,26421.855311


### Get model object

In [21]:
run_id = "02c1d441233e4a449a470c27707daa75"
logged_model = f"runs:/{run_id}/estimator"
model = mlflow.sklearn.load_model(logged_model)
model