# Track del entrenamiento de un modelo con mlflow

In [None]:
# mostrar info de azure-ai-ml

! pip show azure-ai-ml

## Conectar a workspace

In [None]:
# conectar
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

ml_client = MLClient.from_config(credential=DefaultAzureCredential())

print(f"Conectado al Workspace: {ml_client.workspace_name}")

## Configurar mlflow

In [None]:
# ! pip install mlflow
! pip show mlflow

In [26]:
# TODO: definir URI de seguimiento de MLflow en Azure ML
import mlflow

track_uri = ml_client.workspaces.get(ml_client.workspace_name).mlflow_tracking_uri
mlflow.set_tracking_uri(track_uri)

## Preparar los datos

In [28]:
import pandas as pd

print("Reading data...")
df = pd.read_csv('/home/azureuser/cloudfiles/code/Users/formacion/azure-machine-learning/2.Ciclo_Vida_Creación_Entrenamiento_Modelos/labs/diabetes-data/diabetes.csv')
df.head()

Reading data...


Unnamed: 0,PatientID,Pregnancies,PlasmaGlucose,DiastolicBloodPressure,TricepsThickness,SerumInsulin,BMI,DiabetesPedigree,Age,Diabetic
0,1354778,0,171,80,34,23,43.509726,1.213191,21,0
1,1147438,8,92,93,47,36,21.240576,0.158365,23,0
2,1640031,7,115,47,52,35,41.511523,0.079019,23,0
3,1883350,9,103,78,25,304,29.582192,1.28287,43,1
4,1424119,1,85,59,27,35,42.604536,0.549542,22,0


Dividir los datos en características y etiqueta (diabetes):

In [29]:
print("Splitting data...")
X, y = df[['Pregnancies','PlasmaGlucose','DiastolicBloodPressure','TricepsThickness','SerumInsulin','BMI','DiabetesPedigree','Age']].values, df['Diabetic'].values

Splitting data...


In [30]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)

## Crear un experimento mlflow

In [31]:
experiment_name = "mlflow-experiment-diabetes-2"
# TODO: crear experimento
experiment_name = 'mlflow_exp_diabetes_ric'
mlflow.set_experiment(experiment_name)

2025/05/13 09:51:31 INFO mlflow.tracking.fluent: Experiment with name 'mlflow_exp_diabetes_ric' does not exist. Creating a new experiment.


<Experiment: artifact_location='', creation_time=1747129891033, experiment_id='2d4ca20b-da61-4911-8443-b1ec8f5e4425', last_update_time=None, lifecycle_stage='active', name='mlflow_exp_diabetes_ric', tags={}>

## Entrenar y hacer tracking de modelos

In [32]:
from sklearn.linear_model import LogisticRegression

# TODO: Entrenar y hacer tracking de modelos
with mlflow.start_run():
    mlflow.sklearn.autolog()

    model = LogisticRegression(C=1/0.1, solver="liblinear").fit(X_train, y_train)

Matplotlib is building the font cache; this may take a moment.


🏃 View run upbeat_turnip_5fnmx7lg at: https://northcentralus.api.azureml.ms/mlflow/v2.0/subscriptions/7decb7a4-f615-4cc3-9d7d-5de10998373f/resourceGroups/naturgy/providers/Microsoft.MachineLearningServices/workspaces/naturgyml4/#/experiments/2d4ca20b-da61-4911-8443-b1ec8f5e4425/runs/8454bd5d-52f6-4006-962e-87c54c297333
🧪 View experiment at: https://northcentralus.api.azureml.ms/mlflow/v2.0/subscriptions/7decb7a4-f615-4cc3-9d7d-5de10998373f/resourceGroups/naturgy/providers/Microsoft.MachineLearningServices/workspaces/naturgyml4/#/experiments/2d4ca20b-da61-4911-8443-b1ec8f5e4425


Entrenar y hacer tracking de modelos usando un registro personalizado.

In [36]:
from sklearn.linear_model import LogisticRegression
import numpy as np

# TODO: deshabilitar autolog
mlflow.sklearn.autolog(disable=True)

with mlflow.start_run():
    model = LogisticRegression(C=1/0.1, solver="liblinear").fit(X_train, y_train)

    y_hat = model.predict(X_test)
    acc = np.average(y_hat == y_test)

    # TODO: Agregar métricas al tracking
    mlflow.log_param("regularization_rate",0.1)
    mlflow.log_metric("accuracy",acc)

🏃 View run shy_sponge_0pz5jq35 at: https://northcentralus.api.azureml.ms/mlflow/v2.0/subscriptions/7decb7a4-f615-4cc3-9d7d-5de10998373f/resourceGroups/naturgy/providers/Microsoft.MachineLearningServices/workspaces/naturgyml4/#/experiments/2d4ca20b-da61-4911-8443-b1ec8f5e4425/runs/0d349e58-2d8a-4e96-ae4e-6d0ad54b8b0a
🧪 View experiment at: https://northcentralus.api.azureml.ms/mlflow/v2.0/subscriptions/7decb7a4-f615-4cc3-9d7d-5de10998373f/resourceGroups/naturgy/providers/Microsoft.MachineLearningServices/workspaces/naturgyml4/#/experiments/2d4ca20b-da61-4911-8443-b1ec8f5e4425


Leer `regularization_rate` y `accuracy`:

In [None]:
from sklearn.linear_model import LogisticRegression
import numpy as np

with mlflow.start_run():
    model = LogisticRegression(C=1/0.01, solver="liblinear").fit(X_train, y_train)

    y_hat = model.predict(X_test)
    acc = np.average(y_hat == y_test)

    # TODO: Agregar métricas al tracking
    mlflow.log_param("regularization_rate",0.1)
    mlflow.log_metric("accuracy",acc)

🏃 View run clever_sheep_v33bjxpj at: https://northcentralus.api.azureml.ms/mlflow/v2.0/subscriptions/7decb7a4-f615-4cc3-9d7d-5de10998373f/resourceGroups/naturgy/providers/Microsoft.MachineLearningServices/workspaces/naturgyml4/#/experiments/2d4ca20b-da61-4911-8443-b1ec8f5e4425/runs/b47806fe-81c9-49d9-8a49-cf4e55534cc2
🧪 View experiment at: https://northcentralus.api.azureml.ms/mlflow/v2.0/subscriptions/7decb7a4-f615-4cc3-9d7d-5de10998373f/resourceGroups/naturgy/providers/Microsoft.MachineLearningServices/workspaces/naturgyml4/#/experiments/2d4ca20b-da61-4911-8443-b1ec8f5e4425


Ejecutar otro modelo para comparar:

In [37]:
from sklearn.tree import DecisionTreeClassifier
import numpy as np

with mlflow.start_run():
    model = DecisionTreeClassifier().fit(X_train, y_train)

    y_hat = model.predict(X_test)
    acc = np.average(y_hat == y_test)

    # TODO: Agregar métricas al tracking
    mlflow.log_param("estimador","DecisionTreeClassifier")
    mlflow.log_metric("accuracy",acc)

🏃 View run icy_lamp_p3f84x4r at: https://northcentralus.api.azureml.ms/mlflow/v2.0/subscriptions/7decb7a4-f615-4cc3-9d7d-5de10998373f/resourceGroups/naturgy/providers/Microsoft.MachineLearningServices/workspaces/naturgyml4/#/experiments/2d4ca20b-da61-4911-8443-b1ec8f5e4425/runs/138b2fce-c324-450e-b1db-3555b8efe30c
🧪 View experiment at: https://northcentralus.api.azureml.ms/mlflow/v2.0/subscriptions/7decb7a4-f615-4cc3-9d7d-5de10998373f/resourceGroups/naturgy/providers/Microsoft.MachineLearningServices/workspaces/naturgyml4/#/experiments/2d4ca20b-da61-4911-8443-b1ec8f5e4425


Logging de  un artefacto.

Por ejemplo, puede trazar la curva ROC y almacenar la trama como una imagen. La imagen se puede registrar como un artefacto.

In [39]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt
import numpy as np

with mlflow.start_run():
    model = DecisionTreeClassifier().fit(X_train, y_train)

    y_hat = model.predict(X_test)
    acc = np.average(y_hat == y_test)

    # plot ROC curve
    y_scores = model.predict_proba(X_test)

    fpr, tpr, thresholds = roc_curve(y_test, y_scores[:,1])
    fig = plt.figure(figsize=(6, 4))
    # Plot the diagonal 50% line
    plt.plot([0, 1], [0, 1], 'k--')
    # Plot the FPR and TPR achieved by our model
    plt.plot(fpr, tpr)
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.savefig("ROC-Curve.png")

    # TODO: Agregar ROC al tracking
    mlflow.log_param("estimador","DecisionTreeClassifier")
    mlflow.log_metric("accuracy",acc)
    mlflow.log_artifact('ROC-Curve.png')
    

🏃 View run happy_beard_cd9wgb4v at: https://northcentralus.api.azureml.ms/mlflow/v2.0/subscriptions/7decb7a4-f615-4cc3-9d7d-5de10998373f/resourceGroups/naturgy/providers/Microsoft.MachineLearningServices/workspaces/naturgyml4/#/experiments/2d4ca20b-da61-4911-8443-b1ec8f5e4425/runs/12e2df7c-c27a-42b0-9567-642cf02b2556
🧪 View experiment at: https://northcentralus.api.azureml.ms/mlflow/v2.0/subscriptions/7decb7a4-f615-4cc3-9d7d-5de10998373f/resourceGroups/naturgy/providers/Microsoft.MachineLearningServices/workspaces/naturgyml4/#/experiments/2d4ca20b-da61-4911-8443-b1ec8f5e4425


Revisar los resultados del modelo en la página de trabajos del Azure Machine Learning Studio.
- Parámetros en **Params** en la pesataña **Overview**.
- métricas en **Metrics** en la pesataña **Overview**, y en **Metrics**.
- artefactos en pestaña **Outputs + logs**.