In [None]:
import pandas as pd
import random
import mlflow
from mlflow.sklearn import log_model
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error, r2_score
import joblib
import os

data = pd.read_csv("../data/winequality-red.csv")
X = data.drop("quality", axis=1)
y = data["quality"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

mlflow.set_tracking_uri("../mlruns")
mlflow.set_experiment("wine-quality_gbr_auto")

best_model = None
best_score = -float("inf")
best_params = None
best_rmse = None

for _ in range(10):
    params = {
        "n_estimators": random.choice([50, 100, 150]),
        "learning_rate": random.choice([0.01, 0.05, 0.1]),
        "max_depth": random.choice([3, 5, 7]),
        "min_samples_split": random.choice([2, 3, 4]),
        "min_samples_leaf": random.choice([1, 2]),
        "random_state": 42
    }

    with mlflow.start_run():
        model = GradientBoostingRegressor(**params)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        rmse = root_mean_squared_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        mlflow.log_params(params)
        mlflow.log_metric("rmse", float(rmse))
        mlflow.log_metric("r2_score", float(r2))
        log_model(model, "model")

        print(f"✅ {params} -> RMSE: {rmse:.4f}, R2: {r2:.4f}")
        if r2 > best_score:
            best_model = model
            best_params = params
            best_score = r2
            best_rmse = rmse

os.makedirs("model", exist_ok=True)
joblib.dump(best_model, "../model/best_gbr.pkl")

print("\n🏆 Лучшая модель GBR:")
print(best_params)
print(f"RMSE: {best_rmse:.4f}, R2: {best_score:.4f}")




✅ {'n_estimators': 100, 'learning_rate': 0.1, 'max_depth': 5, 'min_samples_split': 3, 'min_samples_leaf': 2, 'random_state': 42} -> RMSE: 0.6064, R2: 0.4373




✅ {'n_estimators': 100, 'learning_rate': 0.1, 'max_depth': 5, 'min_samples_split': 3, 'min_samples_leaf': 1, 'random_state': 42} -> RMSE: 0.5876, R2: 0.4716




✅ {'n_estimators': 100, 'learning_rate': 0.05, 'max_depth': 3, 'min_samples_split': 2, 'min_samples_leaf': 1, 'random_state': 42} -> RMSE: 0.6071, R2: 0.4360




✅ {'n_estimators': 150, 'learning_rate': 0.05, 'max_depth': 7, 'min_samples_split': 2, 'min_samples_leaf': 2, 'random_state': 42} -> RMSE: 0.5939, R2: 0.4603




✅ {'n_estimators': 150, 'learning_rate': 0.05, 'max_depth': 5, 'min_samples_split': 3, 'min_samples_leaf': 1, 'random_state': 42} -> RMSE: 0.5933, R2: 0.4614




✅ {'n_estimators': 150, 'learning_rate': 0.01, 'max_depth': 5, 'min_samples_split': 2, 'min_samples_leaf': 1, 'random_state': 42} -> RMSE: 0.6258, R2: 0.4007




✅ {'n_estimators': 50, 'learning_rate': 0.1, 'max_depth': 5, 'min_samples_split': 2, 'min_samples_leaf': 1, 'random_state': 42} -> RMSE: 0.5942, R2: 0.4597




✅ {'n_estimators': 150, 'learning_rate': 0.05, 'max_depth': 5, 'min_samples_split': 3, 'min_samples_leaf': 1, 'random_state': 42} -> RMSE: 0.5933, R2: 0.4614




✅ {'n_estimators': 100, 'learning_rate': 0.05, 'max_depth': 3, 'min_samples_split': 3, 'min_samples_leaf': 1, 'random_state': 42} -> RMSE: 0.6069, R2: 0.4363




✅ {'n_estimators': 50, 'learning_rate': 0.1, 'max_depth': 3, 'min_samples_split': 2, 'min_samples_leaf': 1, 'random_state': 42} -> RMSE: 0.6072, R2: 0.4359

🏆 Лучшая модель GBR:
{'n_estimators': 100, 'learning_rate': 0.1, 'max_depth': 5, 'min_samples_split': 3, 'min_samples_leaf': 1, 'random_state': 42}
RMSE: 0.5876, R2: 0.4716
