In [10]:
import mlflow
import mlflow.sklearn
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import pandas as pd

# Tạo dữ liệu phân loại
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Khởi động một Run của MLflow
with mlflow.start_run():
    # Huấn luyện mô hình phân loại đơn giản nhất
    clf = RandomForestClassifier(n_estimators=10, random_state=42)
    clf.fit(X_train, y_train)

    # Dự đoán và tính toán accuracy
    y_pred = clf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)

    # Log các thông số và metric với MLflow
    mlflow.log_param("n_estimators", 10)
    mlflow.log_metric("accuracy", acc)

    # Log mô hình đã huấn luyện
    mlflow.sklearn.log_model(clf, "model")




In [11]:
hyperparameter_sets = [
    {"n_estimators": 50, "max_depth": 5},
    {"n_estimators": 100, "max_depth": 10},
    {"n_estimators": 150, "max_depth": 15},
]

for params in hyperparameter_sets:
    with mlflow.start_run():
        clf = RandomForestClassifier(n_estimators=params["n_estimators"], max_depth=params["max_depth"], random_state=42)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        
        mlflow.log_params(params)
        mlflow.log_metric("accuracy", acc)
        mlflow.sklearn.log_model(clf, "model")




In [12]:
import mlflow.pyfunc

best_run = mlflow.search_runs(order_by=["metrics.accuracy DESC"]).iloc[0]
best_run_id = best_run.run_id

# Đăng ký mô hình tốt nhất
model_uri = f"runs:/{best_run_id}/model"
mlflow.register_model(model_uri, "Best_Classifier_Model")


Registered model 'Best_Classifier_Model' already exists. Creating a new version of this model...
Created version '4' of model 'Best_Classifier_Model'.


<ModelVersion: aliases=[], creation_timestamp=1732351954589, current_stage='None', description=None, last_updated_timestamp=1732351954589, name='Best_Classifier_Model', run_id='832d596fb0c5424d9d082ea7c57e0c0f', run_link=None, source='file:///c:/FPT%20edu/MLOps/Final%20Project/mlruns/0/832d596fb0c5424d9d082ea7c57e0c0f/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=4>

In [13]:
from mlflow.tracking import MlflowClient

client = MlflowClient()

# Lấy thông tin phiên bản mới nhất của mô hình đã đăng ký
model_versions = client.search_model_versions("name='Best_Classifier_Model'")
latest_version = max(int(model.version) for model in model_versions)

# Chuyển phiên bản mới nhất sang stage "Production"
client.transition_model_version_stage(
    name="Best_Classifier_Model",
    version=str(latest_version),
    stage="Production"
)


  client.transition_model_version_stage(


<ModelVersion: aliases=[], creation_timestamp=1732351954589, current_stage='Production', description=None, last_updated_timestamp=1732351954635, name='Best_Classifier_Model', run_id='832d596fb0c5424d9d082ea7c57e0c0f', run_link=None, source='file:///c:/FPT%20edu/MLOps/Final%20Project/mlruns/0/832d596fb0c5424d9d082ea7c57e0c0f/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=4>

In [14]:
model = mlflow.pyfunc.load_model("models:/Best_Classifier_Model/Production")
model

  latest = client.get_latest_versions(name, None if stage is None else [stage])


mlflow.pyfunc.loaded_model:
  artifact_path: model
  flavor: mlflow.sklearn
  run_id: 832d596fb0c5424d9d082ea7c57e0c0f