## MLflow 

In [2]:
import mlflow
import mlflow.sklearn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# 데이터 로드
data = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)

# MLflow 실험 설정
mlflow.set_experiment("RandomForest test")

# 여러 하이퍼파라미터를 변경하며 실험
n_estimators_list = [10, 50, 100, 200]
max_depth_list = [3, 5, 10, None]

for n_estimators in n_estimators_list:
    for max_depth in max_depth_list:
        with mlflow.start_run():
            # 모델 학습
            model = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
            model.fit(X_train, y_train)
            
            # 예측 및 평가
            y_pred = model.predict(X_test)
            mse = mean_squared_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)

            # MLflow에 로그 기록
            mlflow.log_param("n_estimators", n_estimators)
            mlflow.log_param("max_depth", max_depth)
            mlflow.log_metric("mse", mse)
            mlflow.log_metric("r2_score", r2)

            # 모델 저장
            mlflow.sklearn.log_model(model, "random_forest_model")

            # 그래프 생성 및 저장
            plt.figure(figsize=(6, 4))
            plt.scatter(y_test, y_pred, alpha=0.6, color="blue")
            plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], '--r', linewidth=2)
            plt.xlabel("Actual")
            plt.ylabel("Predicted")
            plt.title(f"Prediction Scatter Plot (n={n_estimators}, depth={max_depth})")
            plt.savefig("scatter_plot.png")
            plt.close()

            mlflow.log_artifact("scatter_plot.png")  # MLflow에 그래프 저장

            print(f"Logged: n_estimators={n_estimators}, max_depth={max_depth}, MSE={mse:.4f}, R2={r2:.4f}")



Logged: n_estimators=10, max_depth=3, MSE=3036.8709, R2=0.4268




Logged: n_estimators=10, max_depth=5, MSE=3207.3641, R2=0.3946




Logged: n_estimators=10, max_depth=10, MSE=3074.7926, R2=0.4196




Logged: n_estimators=10, max_depth=None, MSE=3135.2893, R2=0.4082




Logged: n_estimators=50, max_depth=3, MSE=2863.2609, R2=0.4596




Logged: n_estimators=50, max_depth=5, MSE=2960.8654, R2=0.4412




Logged: n_estimators=50, max_depth=10, MSE=3064.4879, R2=0.4216




Logged: n_estimators=50, max_depth=None, MSE=3044.1991, R2=0.4254




Logged: n_estimators=100, max_depth=3, MSE=2812.7691, R2=0.4691




Logged: n_estimators=100, max_depth=5, MSE=2881.5791, R2=0.4561




Logged: n_estimators=100, max_depth=10, MSE=2974.1962, R2=0.4386




Logged: n_estimators=100, max_depth=None, MSE=2952.0106, R2=0.4428




Logged: n_estimators=200, max_depth=3, MSE=2774.2933, R2=0.4764




Logged: n_estimators=200, max_depth=5, MSE=2860.3928, R2=0.4601




Logged: n_estimators=200, max_depth=10, MSE=3004.9652, R2=0.4328




Logged: n_estimators=200, max_depth=None, MSE=2966.0242, R2=0.4402


In [None]:
import subprocess

process = subprocess.Popen(["mlflow", "ui", "--port", "5000"])

print("✅ MLflow UI is running at http://127.0.0.1:5000")

^C


## Best Run

In [8]:
from mlflow.tracking import MlflowClient

client = MlflowClient()
experiment = client.get_experiment_by_name("RandomForest test")
runs = client.search_runs(experiment.experiment_id, order_by=["metrics.r2_score DESC"], max_results=1)

best_run = runs[0]
print("Best Run ID:", best_run.info.run_id)
print("Best Params:", best_run.data.params)
print("Best Metrics:", best_run.data.metrics)

Best Run ID: 3b353c5f9828464b92c009482c63cd68
Best Params: {'max_depth': '3', 'n_estimators': '200'}
Best Metrics: {'mse': 2774.293307657063, 'r2_score': 0.4763658273031117}


## 모델 로드 및 재사용

In [9]:
from mlflow.tracking import MlflowClient

client = MlflowClient()
experiment = client.get_experiment_by_name("RandomForest test")
runs = client.search_runs(experiment.experiment_id, order_by=["metrics.r2_score DESC"], max_results=1)

best_run = runs[0]
print("Best Run ID:", best_run.info.run_id)
print("Best Params:", best_run.data.params)
print("Best Metrics:", best_run.data.metrics)

Best Run ID: 3b353c5f9828464b92c009482c63cd68
Best Params: {'max_depth': '3', 'n_estimators': '200'}
Best Metrics: {'mse': 2774.293307657063, 'r2_score': 0.4763658273031117}


## 모델 레지스트리에 등록

In [10]:
result = mlflow.register_model(
    f"runs:/{best_run.info.run_id}/random_forest_model",
    "RandomForestRegressor"
)
run_id = best_run.info.run_id

Successfully registered model 'RandomForestRegressor'.
Created version '1' of model 'RandomForestRegressor'.


In [16]:
import subprocess

run_id = "3b353c5f9828464b92c009482c63cd68"

cmd = [
    "mlflow", "models", "serve",
    "-m", f"runs:/{run_id}/random_forest_model",
    "-p", "1234",
    "--env-manager=local"
]

# 백그라운드 실행
process = subprocess.Popen(cmd)
print(f"MLflow model serving started at http://127.0.0.1:1234")

MLflow model serving started at http://127.0.0.1:1234


In [18]:
import mlflow
import mlflow.sklearn

# 모델 학습 후 등록
mlflow.sklearn.log_model(
    sk_model=model,
    artifact_path="random_forest_model",
    registered_model_name="RandomForestRegressor"
)

Registered model 'RandomForestRegressor' already exists. Creating a new version of this model...
Created version '2' of model 'RandomForestRegressor'.


<mlflow.models.model.ModelInfo at 0x1c60b0093c0>

In [None]:
import mlflow
from mlflow import MlflowClient
from sklearn.metrics import mean_squared_error
import numpy as np

client = MlflowClient()
experiment = client.get_experiment_by_name("RandomForest test")
experiment_id = experiment.experiment_id

runs = client.search_runs(
    experiment_ids=[experiment_id],
    order_by=["metrics.rmse ASC"],
    max_results=1
)

best_run = runs[0]
best_run_id = best_run.info.run_id
print("Best run_id:", best_run_id)
print("Best RMSE:", best_run.data.metrics["mse"])

model_name = "RandomForestRegressor"
model_uri = f"runs:/{best_run_id}/random_forest_model"

mv = mlflow.register_model(model_uri=model_uri, name=model_name)
print(f"Registered model version: {mv.version}")

client.set_registered_model_alias(model_name, "champion", mv.version)
print(f"✅ Champion model updated to version {mv.version}")

Registered model 'RandomForestRegressor' already exists. Creating a new version of this model...


Best run_id: d361a6c7651a4ec7a57c735e951323ce
Best RMSE: 2966.0241980337078
Registered model version: 3
✅ Champion model updated to version 3


Created version '3' of model 'RandomForestRegressor'.


In [22]:
import mlflow

# champion alias로 불러오기
champion_model = mlflow.sklearn.load_model("models:/RandomForestRegressor@champion")

# 예측
sample = [[0.03807591, 0.05068012, 0.06169621, 0.02187235, -0.0442235,
           -0.03482076, -0.04340085, -0.00259226, 0.01990749, -0.01764613]]
pred = champion_model.predict(sample)

print("Champion model prediction:", pred)

Champion model prediction: [228.705]
