In [4]:
import os
import numpy as np
import pandas as pd

import mlflow
from mlflow.models import infer_signature

from datetime import datetime, timedelta
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, median_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn import datasets

In [2]:
os.getenv("MLFLOW_TRACKING_URI")

'http://mlflow-service:5000'

In [5]:
# Получим датасет California housing
housing = datasets.fetch_california_housing(as_frame=True)
# Объединим фичи и таргет в один np.array
data = pd.concat([housing["data"], pd.DataFrame(housing["target"])], axis=1)

In [7]:
FEATURES = [
    "MedInc", "HouseAge", "AveRooms", "AveBedrms", "Population", "AveOccup",
    "Latitude", "Longitude"
]
TARGET = "MedHouseVal"

In [8]:
# Сделать препроцессинг
# Разделить на фичи и таргет
X, y = data[FEATURES], data[TARGET]

# Разделить данные на обучение и тест
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Обучить стандартизатор на train
scaler = StandardScaler()
X_train_fitted = scaler.fit_transform(X_train)
X_test_fitted = scaler.transform(X_test)

# Обучить стандартизатор на train
scaler = StandardScaler()
X_train_fitted = scaler.fit_transform(X_train)
X_test_fitted = scaler.transform(X_test)

In [12]:
name = "MedHouseExp"
experiment_id = mlflow.create_experiment(name)
mlflow.set_experiment(experiment_id)

2025/04/16 10:41:37 INFO mlflow.tracking.fluent: Experiment with name '135293466297753618' does not exist. Creating a new experiment.


<Experiment: artifact_location='s3://mlops-webinar-example/mlflow/274632898515108876', creation_time=1744800097028, experiment_id='274632898515108876', last_update_time=1744800097028, lifecycle_stage='active', name='135293466297753618', tags={}>

In [13]:
with mlflow.start_run(run_name="my_first_run", experiment_id = experiment_id):
    # Обучить модель
    model = LinearRegression()
    model.fit(X_train_fitted, y_train)
    y_pred = model.predict(X_test_fitted)

    # Получить описание данных
    signature = infer_signature(X_test_fitted, y_pred)
    # Сохранить модель в артифактори
    model_info = mlflow.sklearn.log_model(model, name, signature=signature)
    # Сохранить метрики модели
    mlflow.evaluate(
        model_info.model_uri,
        data=X_test_fitted,
        targets=y_test.values,
        model_type="regressor",
        evaluators=["default"],
    )

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2025/04/16 10:45:35 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2025/04/16 10:45:35 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2025/04/16 10:45:35 INFO mlflow.tracking._tracking_service.client: 🏃 View run my_first_run at: http://mlflow-service:5000/#/experiments/135293466297753618/runs/44800a57471a445e93469df93c7eef8a.
2025/04/16 10:45:35 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow-service:5000/#/experiments/135293466297753618.
