In [None]:
# TRACKING 
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
import mltable
from sklearn.model_selection import train_test_split

# 1. Kết nối Workspace
ml_client = MLClient(
    credential = DefaultAzureCredential(),
    subscription_id = "your azure subscription_id",
    resource_group_name = "your azure resource_group_name",
    workspace_name = "your azure workspace_name",
)

# 2. Access Data 
data_asset = ml_client.data.get("training-data", version="1")
tbl = mltable.load(f'azureml:/{data_asset.id}')
df = tbl.to_pandas_dataframe()

# 3. Splitting data
X, y = df[['Pregnancies','PlasmaGlucose','DiastolicBloodPressure','TricepsThickness','SerumInsulin','BMI','DiabetesPedigree','Age']].values, df['Diabetic'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)

In [None]:
# Create an MLflow experiment
import mlflow
experiment_name = "mlflow-experiment-diabetes"
mlflow.set_experiment(experiment_name)

In [None]:
# sử dụng mlflow tracking và log dữ liệu
from sklearn.linear_model import LogisticRegression
import numpy as np

with mlflow.start_run():
    # 1. Kích hoạt autolog cho scikit-learn
    mlflow.sklearn.autolog()
    
    # Sau khi train model bên dưới, các log sẽ tự động hiện ra
    model = LogisticRegression(C=1/0.1, solver="liblinear").fit(X_train, y_train)
    y_hat = model.predict(X_test)
    
    # 2. Tắt autolog sau khi train, để nó ko ghi đè các log ở code bên dưới
    mlflow.sklearn.autolog(disable=True) 
    
    # 
    model = LogisticRegression(C=1/0.01, solver="liblinear").fit(X_train, y_train)

    y_hat = model.predict(X_test)
    acc = np.average(y_hat == y_test)

    mlflow.log_param("regularization_rate", 0.1)
    mlflow.log_metric("Accuracy", acc)


In [None]:
# log_artifact

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt
import numpy as np

with mlflow.start_run():
    model = DecisionTreeClassifier().fit(X_train, y_train)

    y_hat = model.predict(X_test)
    acc = np.average(y_hat == y_test)

    # plot ROC curve
    y_scores = model.predict_proba(X_test)

    fpr, tpr, thresholds = roc_curve(y_test, y_scores[:,1])
    fig = plt.figure(figsize=(6, 4))
    # Plot the diagonal 50% line
    plt.plot([0, 1], [0, 1], 'k--')
    # Plot the FPR and TPR achieved by our model
    plt.plot(fpr, tpr)
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.savefig("ROC-Curve.png")

    mlflow.log_param("estimator", "DecisionTreeClassifier")
    mlflow.log_metric("Accuracy", acc)
    mlflow.log_artifact("ROC-Curve.png")