In [109]:
import os

import mlflow
from catboost import CatBoostClassifier

##### 1. Определим глобальные перменные

In [7]:
TRACKING_SERVER_HOST = "127.0.0.1"
TRACKING_SERVER_PORT = 5000

EXPERIMENT_NAME = "churn_nikolaistepanov"
REGISTRY_MODEL_NAME = "churn_model_nikolaistepanov_prepared"

##### 2. Подключимся к MLFLow 

In [8]:
os.environ["MLFLOW_S3_ENDPOINT_URL"] = "..."
os.environ["AWS_ACCESS_KEY_ID"] = "..."
os.environ["AWS_SECRET_ACCESS_KEY"] = "..."

In [9]:
mlflow.set_tracking_uri(f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}")
mlflow.set_registry_uri(f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}")

##### 3. Научимся доставть информацию из MLFlow
- информацию о запусках внутри эксперимента
- метрики
- артефакты
- модель

##### 3.1. Получим всю необходимую информацию

In [10]:
client = mlflow.MlflowClient()

In [63]:
experiment = client.get_experiment_by_name(EXPERIMENT_NAME)
experiment_id = experiment.experiment_id
experiment_runs = mlflow.search_runs(
    experiment_ids=[experiment_id],
).sort_values(by="start_time", ascending=False)

In [97]:
print(f"Путь до всех артефактов эксперимента: '{experiment.artifact_location}'")

Путь до всех артефактов эксперимента: 's3://s3-student-mle-case/7'


##### 3.2. Посмотрим на все наши запуски

In [45]:
experiment_runs

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.err1,metrics.err2,metrics.logloss,metrics.recall,...,metrics.paperless_billing_No,metrics.streaming_movies_Yes,metrics.type_Two year,metrics.gender_Male,tags.mlflow.log-model.history,tags.mlflow.source.git.commit,tags.mlflow.source.name,tags.mlflow.user,tags.mlflow.runName,tags.mlflow.source.type
0,1bf428a8ab16490e83dbd2283b92c512,7,FINISHED,s3://s3-student-mle-case/7/1bf428a8ab16490e83d...,2023-10-19 14:34:00.443000+00:00,2023-10-19 14:34:03.060000+00:00,0.058211,0.117842,7.505541,0.439929,...,,,,,"[{""run_id"": ""1bf428a8ab16490e83dbd2283b92c512""...",a57e0b5019c1d5d08d3311306bde3cf6c58997a2,/Users/nikolaistepanov/.pyenv/versions/3.11.3/...,nikolaistepanov,model_0_versioning,LOCAL
1,209f98b6c1ac445c9cd5e602a45c835c,7,FINISHED,s3://s3-student-mle-case/7/209f98b6c1ac445c9cd...,2023-10-19 14:19:29.233000+00:00,2023-10-19 14:19:32.013000+00:00,0.054294,0.124202,7.239428,0.458716,...,,,,,"[{""run_id"": ""209f98b6c1ac445c9cd5e602a45c835c""...",a57e0b5019c1d5d08d3311306bde3cf6c58997a2,/Users/nikolaistepanov/.pyenv/versions/3.11.3/...,nikolaistepanov,model_0_registry,LOCAL
2,0c2a8e91d1e04a9caaa647beb6820ec4,7,FINISHED,s3://s3-student-mle-case/7/0c2a8e91d1e04a9caaa...,2023-10-19 13:31:00.151000+00:00,2023-10-19 13:31:02.745000+00:00,0.057488,0.124202,7.341752,0.459318,...,,,,,"[{""run_id"": ""0c2a8e91d1e04a9caaa647beb6820ec4""...",a57e0b5019c1d5d08d3311306bde3cf6c58997a2,/Users/nikolaistepanov/.pyenv/versions/3.11.3/...,nikolaistepanov,model_0_custom_model,LOCAL
3,e5d9484a38d04a35ac457e4854f9912d,7,FINISHED,s3://s3-student-mle-case/7/e5d9484a38d04a35ac4...,2023-10-19 13:30:37.464000+00:00,2023-10-19 13:30:40.876000+00:00,0.057488,0.124202,7.341752,0.459318,...,,,,,"[{""run_id"": ""e5d9484a38d04a35ac457e4854f9912d""...",a57e0b5019c1d5d08d3311306bde3cf6c58997a2,/Users/nikolaistepanov/.pyenv/versions/3.11.3/...,nikolaistepanov,model_0,LOCAL
4,d9f8d580f14040408e8d4645e2bcaaf2,7,FINISHED,s3://s3-student-mle-case/7/d9f8d580f14040408e8...,2023-10-18 10:25:56.800000+00:00,2023-10-18 10:25:59.736000+00:00,,,,,...,2872.0,2732.0,1695.0,3555.0,,a57e0b5019c1d5d08d3311306bde3cf6c58997a2,/Users/nikolaistepanov/.pyenv/versions/3.11.3/...,nikolaistepanov,data_check,LOCAL


##### 3.2. Посмотрим все наши метрики. Для примера зафиксируем наш run

In [117]:
run = "0c2a8e91d1e04a9caaa647beb6820ec4"

In [57]:
experiment_runs[[
    "run_id", "start_time",
    "metrics.err1", 'metrics.err2', 'metrics.logloss',
    "metrics.recall", 'metrics.auc', 'metrics.f1', 'metrics.precision'
]].dropna()

Unnamed: 0,run_id,start_time,metrics.err1,metrics.err2,metrics.logloss,metrics.recall,metrics.auc,metrics.f1,metrics.precision
0,1bf428a8ab16490e83dbd2283b92c512,2023-10-19 14:34:00.443000+00:00,0.058211,0.117842,7.505541,0.439929,0.817497,0.530917,0.669355
1,209f98b6c1ac445c9cd5e602a45c835c,2023-10-19 14:19:29.233000+00:00,0.054294,0.124202,7.239428,0.458716,0.822151,0.552923,0.695825
2,0c2a8e91d1e04a9caaa647beb6820ec4,2023-10-19 13:31:00.151000+00:00,0.057488,0.124202,7.341752,0.459318,0.825564,0.549451,0.683594
3,e5d9484a38d04a35ac457e4854f9912d,2023-10-19 13:30:37.464000+00:00,0.057488,0.124202,7.341752,0.459318,0.825564,0.549451,0.683594


##### 3.3. Можем посмотреть как на конкретную метрику, так и на все

In [114]:
client.get_metric_history(run, "logloss")

[<Metric: key='logloss', step=0, timestamp=1697722260419, value=7.3417519678329475>]

In [115]:
client.get_run(run).data.metrics

{'err1': 0.057487579843860895,
 'err2': 0.1242015613910575,
 'auc': 0.8255639980800066,
 'precision': 0.68359375,
 'recall': 0.45931758530183725,
 'f1': 0.5494505494505495,
 'logloss': 7.3417519678329475}

##### 3.4. Достанем `dataframe` для обучения, который мы логировали в самом начале

In [116]:
run = "d9f8d580f14040408e8d4645e2bcaaf2"

In [86]:
run = client.get_run(run)
artifact_uri = run.info.artifact_uri

print(f"Путь артефакта в S3: '{artifact_uri}'")

Путь артефакта в S3: 's3://s3-student-mle-case/7/d9f8d580f14040408e8d4645e2bcaaf2/artifacts'


In [78]:
mlflow.artifacts.download_artifacts(artifact_uri, dst_path="./dataset")

'/Users/nikolaistepanov/YandexPracticum/mle-sprint2/mlflow/dataset/artifacts'

##### 3.5. Достанем модели

In [112]:
model_1_uri = client.get_model_version_download_uri(REGISTRY_MODEL_NAME, "1")
model_2_uri = client.get_model_version_download_uri(REGISTRY_MODEL_NAME, "2")

print(f"Путь до 1-ой модели в S3: '{model_1_uri}'")
print(f"Путь до 2-ой модели в S3: '{model_2_uri}'")

Путь до 1-ой модели в S3: 's3://s3-student-mle-case/7/209f98b6c1ac445c9cd5e602a45c835c/artifacts/models'
Путь до 2-ой модели в S3: 's3://s3-student-mle-case/7/1bf428a8ab16490e83dbd2283b92c512/artifacts/models'


In [106]:
model_1 = mlflow.catboost.load_model(model_1_uri)
model_2 = mlflow.catboost.load_model(model_2_uri)

In [111]:
assert isinstance(model_1, CatBoostClassifier)
assert isinstance(model_2, CatBoostClassifier)