In [1]:
import joblib
import json
import os

import mlflow
import pandas as pd

EXPERIMENT_NAME = "ozerge_PROJECT_SPRINT_2"
RUN_NAME = "logging_baseline_model"
TRACKING_SERVER_HOST = "127.0.0.1"
TRACKING_SERVER_PORT = 5000
REGISTRY_MODEL_NAME = "baseline_model"

os.environ["MLFLOW_S3_ENDPOINT_URL"] = "https://storage.yandexcloud.net"
os.environ["AWS_ACCESS_KEY_ID"] = os.getenv("AWS_ACCESS_KEY_ID")
os.environ["AWS_SECRET_ACCESS_KEY"] = os.getenv("AWS_SECRET_ACCESS_KEY")

mlflow.set_tracking_uri(f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}")
mlflow.set_registry_uri(f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}")

In [2]:
def log_model() -> str:
    """Логирование baseline модели в Mlflow."""

    #run_id = None  # Инициализируем переменную

    # Загрузка метрик
    with open("metrics/eval_results_cbr_l.json") as json_file:
        metrics = json.load(json_file)

    # Загрузка обученной модели
    with open("model/fitted_model_cbr_l.pkl", "rb") as fd:
        model = joblib.load(fd)

    # Получение параметров CatBoost модели
    model_params = model["model"].get_params()

    # Загрузка изначального датасета
    data = pd.read_csv("data/initial_data.csv")

    # Создание/использование существующего MLflow-эксперимента
    experiment = mlflow.get_experiment_by_name(EXPERIMENT_NAME)
    if not experiment:
        experiment_id = mlflow.create_experiment(EXPERIMENT_NAME)
    else:
        experiment_id = experiment.experiment_id

    # Детали модели
    pip_requirements = "./requirements_bm.txt"

    # Инициализация процедуры логирования
    with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:
        run_id = run.info.run_id

        mlflow.set_tags({
        "model_type": "CatBoostRegressor",
        "data_version": "2024-07-14",
        "preprocessing": "v2"})

        # Logging training data
        mlflow.log_artifact("data/initial_data.csv", "dataframe")

        # Logging metrics from CV-results
        mlflow.log_metrics(metrics)

        # Logging model parameters
        mlflow.log_params(model_params)

        # Registering the model in Mlflow Registry
        model_info = mlflow.sklearn.log_model(
            registered_model_name=REGISTRY_MODEL_NAME,
            sk_model=model,
            pip_requirements=pip_requirements,
            #signature=signature,
            #input_example=input_example,
            await_registration_for=60,
            artifact_path="models",
        )

    return run_id, experiment_id
if __name__ == "__main__":
    run_id, experiment_id = log_model()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
Successfully registered model 'baseline_model'.
2025/07/14 10:58:50 INFO mlflow.tracking._model_registry.client: Waiting up to 60 seconds for model version to finish creation. Model name: baseline_model, version 1
Created version '1' of model 'baseline_model'.


In [3]:
run = mlflow.get_run(run_id) # ваш код здесь

print("EXPERIMENT_NAME: ", EXPERIMENT_NAME)
print("experiment_id: ", experiment_id)
print("run_id: ", run_id)

EXPERIMENT_NAME:  ozerge_PROJECT_SPRINT_2
experiment_id:  31
run_id:  6c9b3f1eddaa4ec4ace99d1f56903966
