In [1]:
import os
from dotenv import load_dotenv
import mlflow
from mlflow_utils import create_mlflow_experiment

import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor

load_dotenv()

True

In [2]:
# conectar con mlflow y minio (por http)
mlflow.set_tracking_uri("http://127.0.0.1:5000")

os.environ['MLFLOW_S3_ENDPOINT_URL'] = "http://127.0.0.1:9000"
os.environ['AWS_ACCESS_KEY_ID'] = os.getenv('KEY_ID')
os.environ['AWS_SECRET_ACCESS_KEY'] = os.getenv('ACCESS_KEY')

# Multiples

In [3]:
experiment_id = create_mlflow_experiment(
    experiment_name= "Nested Runs",
    artifact_location= "nested_run_artifacts",
    tags={"purpose":"learning"}
)

Experiment Nested Runs already exists.


In [4]:
with mlflow.start_run(run_name="parent") as parent:
    print("RUN ID parent:", parent.info.run_id)

    mlflow.log_param("parent_param", "parent_value")

    with mlflow.start_run(run_name="child1",nested=True) as child1:
        print("RUN ID child1:", child1.info.run_id)
        mlflow.log_param("child1_param", "child1_value")

        with mlflow.start_run(run_name="child_11", nested=True) as child_11:
            print("RUN ID child_11:", child_11.info.run_id )
            mlflow.log_param("child_11_param", "child_11_value")

        with mlflow.start_run(run_name="child_12", nested=True) as child_12:
            print("RUN ID child_12:", child_12.info.run_id)
            mlflow.log_param("child_12_param", "child_12_value")

    with mlflow.start_run(run_name="child2", nested=True) as child2:
        print("RUN ID child2:", child2.info.run_id)
        mlflow.log_param("child2_param", "child2_value")

RUN ID parent: a17a399e25ef4dfc83be3a0a64962d61
RUN ID child1: 1140133bd91c496d9c6f53c0206686be
RUN ID child_11: cdffeb12422646bbaa21402d817f4eca
RUN ID child_12: 897cd90c4cd14d2d9675ab595bf2c891
RUN ID child2: 5a86295af253402e8c772a92815a4ba2


In [5]:
print('tracking uri:', mlflow.get_tracking_uri())
print('artifact uri:', mlflow.get_artifact_uri())

tracking uri: http://127.0.0.1:5000
artifact uri: /mlflow/nested_run_artifacts/b81ebdce236142669e83b5668d8b947c/artifacts


In [6]:
mlflow.end_run()

# Otro ejemplo

In [7]:
def train_model(X, y, max_depth):
    # Crear el modelo de regresión logística
    model = DecisionTreeRegressor(max_depth=max_depth)

    # Entrenar el modelo
    model.fit(X, y)

    # Evaluar el modelo
    y_pred = model.predict(X)
    mae = np.mean(np.abs(y - y_pred))

    # Registrar métricas en MLflow
    mlflow.log_metric("MAE", mae)
    return model

In [8]:
# Cargar el conjunto de datos de Boston Housing
data = pd.read_csv("BostonHousing.csv")

# Separar las características y la variable objetivo
X = data.drop("medv", axis=1)
y = data["medv"]

In [9]:
mlflow.set_experiment("mlflow_tracking")

with mlflow.start_run(run_name="logging_artifacts16"):
    # Bucle sobre las penalty
    for max_depth in [2,3,4,5]:
        mlflow.autolog()
        # Experimento anidado para una configuración específica
        with mlflow.start_run(run_name=f"tree_{max_depth}", nested=True) as child:
            print("RUN ID:", child.info.run_id)
            # Entrenar el modelo
            model = train_model(X, y, max_depth)

2024/06/07 16:14:46 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


RUN ID: 3c975b52b25a4e3e9440fe0fab20390e


2024/06/07 16:14:58 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


RUN ID: 8d53f4cf39e54ae6bb315e8736729128


2024/06/07 16:15:04 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


RUN ID: 1f49155f049b4173af2852090e1d3773


2024/06/07 16:15:10 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


RUN ID: 4ec55c97d8484c27a404be4d93384820




In [10]:
print('tracking uri:', mlflow.get_tracking_uri())
print('artifact uri:', mlflow.get_artifact_uri())

tracking uri: http://127.0.0.1:5000
artifact uri: s3://mlflow/3/d93745e9bc564d779e15b3c04293860b/artifacts


In [11]:
mlflow.end_run()

In [12]:
print('ok_')

ok_
