In [1]:
import mlflow
import os
import datetime
import mlflow
from mlflow.tracking import MlflowClient
import pandas as pd
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score

In [2]:
client = MlflowClient()

## Work with API

In [3]:
client.list_experiments()

[<Experiment: artifact_location='mlflow-artifacts:/0', experiment_id='0', lifecycle_stage='active', name='Default', tags={}>,
 <Experiment: artifact_location='mlflow-artifacts:/1', experiment_id='1', lifecycle_stage='active', name='Manual_logging', tags={}>,
 <Experiment: artifact_location='mlflow-artifacts:/2', experiment_id='2', lifecycle_stage='active', name='Auto_logging', tags={}>,
 <Experiment: artifact_location='mlflow-artifacts:/6', experiment_id='6', lifecycle_stage='active', name='driver_accident', tags={}>]

In [4]:
exp = client.get_experiment_by_name('driver_accident')
exp

<Experiment: artifact_location='mlflow-artifacts:/6', experiment_id='6', lifecycle_stage='active', name='driver_accident', tags={}>

In [5]:
client.list_run_infos(exp.experiment_id)

[<RunInfo: artifact_uri='mlflow-artifacts:/6/e82ad3d17e1240d8993f253246a36c7d/artifacts', end_time=1669496564592, experiment_id='6', lifecycle_stage='active', run_id='e82ad3d17e1240d8993f253246a36c7d', run_uuid='e82ad3d17e1240d8993f253246a36c7d', start_time=1669496561387, status='FINISHED', user_id='jupyter-serart'>,
 <RunInfo: artifact_uri='mlflow-artifacts:/6/b1fe9f6262ce457e8c9af3dc9a293d4d/artifacts', end_time=1669496560748, experiment_id='6', lifecycle_stage='active', run_id='b1fe9f6262ce457e8c9af3dc9a293d4d', run_uuid='b1fe9f6262ce457e8c9af3dc9a293d4d', start_time=1669496544797, status='FINISHED', user_id='jupyter-serart'>,
 <RunInfo: artifact_uri='mlflow-artifacts:/6/f88c03239f0e49a19d33b6a0c69fc3df/artifacts', end_time=1669496543008, experiment_id='6', lifecycle_stage='active', run_id='f88c03239f0e49a19d33b6a0c69fc3df', run_uuid='f88c03239f0e49a19d33b6a0c69fc3df', start_time=1669496520251, status='FINISHED', user_id='jupyter-serart'>,
 <RunInfo: artifact_uri='mlflow-artifacts:/

In [6]:
# the [-1] means the first element 

run_info = client.list_run_infos(exp.experiment_id)[-1]
run_info

<RunInfo: artifact_uri='mlflow-artifacts:/6/ab9b6ed175964ec38889da670b9f4fd5/artifacts', end_time=1669496496210, experiment_id='6', lifecycle_stage='active', run_id='ab9b6ed175964ec38889da670b9f4fd5', run_uuid='ab9b6ed175964ec38889da670b9f4fd5', start_time=1669496419775, status='FINISHED', user_id='jupyter-serart'>

In [7]:
run_id = run_info.run_id

In [8]:
run = client.get_run(run_id)
run

<Run: data=<RunData: metrics={'f1': 0.9026566874250129,
 'precision': 0.9081670192219259,
 'recall': 0.8972128205710651,
 'roc_auc': 0.9028009447899076}, params={'best_params': "{'depth': 6, 'l2_leaf_reg': 5, 'learning_rate': 0.1}",
 'categorial features': "['sex', 'car_class']",
 'features': "['age', 'sex', 'car_class', 'driving_experience', "
             "'speeding_penalties', 'parking_penalties', 'total_car_accident']",
 'model_type': "<class 'catboost.core.CatBoostClassifier'>",
 'param_grid': "{'learning_rate': [0.03, 0.1, 0.05], 'depth': [2, 4, 6], "
               "'l2_leaf_reg': [1, 3, 5, 7, 9, 20]}",
 'target': 'has_car_accident'}, tags={'auto_tracking': 'false',
 'framework': 'Catboost',
 'mlflow.log-model.history': '[{"run_id": "ab9b6ed175964ec38889da670b9f4fd5", '
                             '"artifact_path": "driver_accident", '
                             '"utc_time_created": "2022-11-26 '
                             '21:00:35.491172", "flavors": {"python_function": '

In [9]:
run.data.metrics

{'roc_auc': 0.9028009447899076,
 'precision': 0.9081670192219259,
 'recall': 0.8972128205710651,
 'f1': 0.9026566874250129}

In [10]:
run.data.params

{'features': "['age', 'sex', 'car_class', 'driving_experience', 'speeding_penalties', 'parking_penalties', 'total_car_accident']",
 'categorial features': "['sex', 'car_class']",
 'target': 'has_car_accident',
 'model_type': "<class 'catboost.core.CatBoostClassifier'>",
 'param_grid': "{'learning_rate': [0.03, 0.1, 0.05], 'depth': [2, 4, 6], 'l2_leaf_reg': [1, 3, 5, 7, 9, 20]}",
 'best_params': "{'depth': 6, 'l2_leaf_reg': 5, 'learning_rate': 0.1}"}

## Model API

In [11]:
last_models = client.list_registered_models()
reg_model = last_models[0]
reg_model

<RegisteredModel: creation_timestamp=1669294919484, description='', last_updated_timestamp=1669489703953, latest_versions=[<ModelVersion: creation_timestamp=1669294919510, current_stage='Production', description='', last_updated_timestamp=1669295533288, name='driver-accident', run_id='4b208b5457824871bcb055a3634379b3', run_link='', source='mlflow-artifacts:/3/4b208b5457824871bcb055a3634379b3/artifacts/driver-accident', status='READY', status_message='', tags={}, user_id='', version='1'>,
 <ModelVersion: creation_timestamp=1669489703953, current_stage='None', description='', last_updated_timestamp=1669489703953, name='driver-accident', run_id='3ad2b1294f4342a1be929c5c34735815', run_link='', source='mlflow-artifacts:/6/3ad2b1294f4342a1be929c5c34735815/artifacts/driver-accident', status='READY', status_message='', tags={}, user_id='', version='4'>], name='driver-accident', tags={}>

In [12]:
reg_model.latest_versions

[<ModelVersion: creation_timestamp=1669294919510, current_stage='Production', description='', last_updated_timestamp=1669295533288, name='driver-accident', run_id='4b208b5457824871bcb055a3634379b3', run_link='', source='mlflow-artifacts:/3/4b208b5457824871bcb055a3634379b3/artifacts/driver-accident', status='READY', status_message='', tags={}, user_id='', version='1'>,
 <ModelVersion: creation_timestamp=1669489703953, current_stage='None', description='', last_updated_timestamp=1669489703953, name='driver-accident', run_id='3ad2b1294f4342a1be929c5c34735815', run_link='', source='mlflow-artifacts:/6/3ad2b1294f4342a1be929c5c34735815/artifacts/driver-accident', status='READY', status_message='', tags={}, user_id='', version='4'>]

In [13]:
def get_last_prod_model(name):
    last_models = client.get_registered_model(name).latest_versions
    models = list(filter(lambda x: x.current_stage == 'Production', last_models))
    if len(models) == 0:
        return None
    else:
        return models[0]

In [14]:
model_version = get_last_prod_model('driver_accident')
model_version

<ModelVersion: creation_timestamp=1669494057325, current_stage='Production', description='', last_updated_timestamp=1669495020427, name='driver_accident', run_id='64fe89bcfe3549769ab9b9f456608914', run_link='', source='mlflow-artifacts:/6/64fe89bcfe3549769ab9b9f456608914/artifacts/driver_accident', status='READY', status_message='', tags={}, user_id='', version='2'>

In [15]:
m_version = model_version.version
m_version

'2'

## Data loading

In [16]:
df = pd.read_parquet('driver-stat.parquet')

In [17]:
FEATURES = ['age', 'sex', 'car_class', 'driving_experience', 'speeding_penalties', 'parking_penalties', 'total_car_accident']

In [18]:
test_df = df[FEATURES]

### Загрузка и применение модели

In [19]:
# logged_model = 'runs:/26b40e069ae34d82b35999f81671b606/driver_accident'
logged_model = f'models:/driver_accident/{m_version}'

loaded_model = mlflow.pyfunc.load_model(logged_model)

import pandas as pd
predict = loaded_model.predict(test_df)

In [20]:
roc_auc = roc_auc_score(df.has_car_accident, predict)
precision = precision_score(df.has_car_accident, predict)
recall = recall_score(df.has_car_accident, predict)
f1 = f1_score(df.has_car_accident, predict)

print(f'ROC AUC: {roc_auc}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1: {f1}')

ROC AUC: 0.8941573609910689
Precision: 0.9090475374029982
Recall: 0.9134531835205992
F1: 0.9112450354384692


### Сервировка модели с MLFlow

In [None]:
!mlflow models serve -m models:/driver_accident/1 --port 10201

2022/11/27 12:24:05 INFO mlflow.models.cli: Selected backend for flavor 'python_function'
2022/11/27 12:24:06 INFO mlflow.utils.conda: Conda environment mlflow-ee4e907f4890e4c573af555778236fa7b5d89cea already exists.
2022/11/27 12:24:06 INFO mlflow.pyfunc.backend: === Running command 'source activate mlflow-ee4e907f4890e4c573af555778236fa7b5d89cea 1>&2 && exec gunicorn --timeout=60 -b 127.0.0.1:10201 -w 1 ${GUNICORN_CMD_ARGS} -- mlflow.pyfunc.scoring_server.wsgi:app'
[2022-11-27 12:24:07 +0000] [165863] [INFO] Starting gunicorn 20.1.0
[2022-11-27 12:24:07 +0000] [165863] [INFO] Listening at: http://127.0.0.1:10201 (165863)
[2022-11-27 12:24:07 +0000] [165863] [INFO] Using worker: sync
[2022-11-27 12:24:07 +0000] [165871] [INFO] Booting worker with pid: 165871


Use this command in console:

```curl http://127.0.0.1:10201/invocations -H 'Content-Type: application/json' -d '{
    "dataframe_records": [{"age":18, "sex":"male", "car_class":"A", "driving_experience":5, "speeding_penalties":5, "parking_penalties":1, "total_car_accident":0}]
}'```