In [1]:
import os
from dotenv import load_dotenv
import mlflow
from mlflow_utils import create_mlflow_experiment

import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor

load_dotenv()

True

In [2]:
# conectar con mlflow y minio (por http)
mlflow.set_tracking_uri("http://127.0.0.1:5000")

os.environ['MLFLOW_S3_ENDPOINT_URL'] = "http://127.0.0.1:9000"
os.environ['AWS_ACCESS_KEY_ID'] = os.getenv('KEY_ID')
os.environ['AWS_SECRET_ACCESS_KEY'] = os.getenv('ACCESS_KEY')

# Multiples

In [3]:
experiment_id = create_mlflow_experiment(
    experiment_name= "Nested Runs",
    artifact_location= "nested_run_artifacts",
    tags={"purpose":"learning"}
)

In [4]:
with mlflow.start_run(run_name="parent") as parent:
    print("RUN ID parent:", parent.info.run_id)

    mlflow.log_param("parent_param", "parent_value")

    with mlflow.start_run(run_name="child1",nested=True) as child1:
        print("RUN ID child1:", child1.info.run_id)
        mlflow.log_param("child1_param", "child1_value")

        with mlflow.start_run(run_name="child_11", nested=True) as child_11:
            print("RUN ID child_11:", child_11.info.run_id )
            mlflow.log_param("child_11_param", "child_11_value")

        with mlflow.start_run(run_name="child_12", nested=True) as child_12:
            print("RUN ID child_12:", child_12.info.run_id)
            mlflow.log_param("child_12_param", "child_12_value")

    with mlflow.start_run(run_name="child2", nested=True) as child2:
        print("RUN ID child2:", child2.info.run_id)
        mlflow.log_param("child2_param", "child2_value")

RUN ID parent: a814cb01346240d9af3a8c9c13e6a17e
RUN ID child1: a346a67e64e4485788c3d77b1a2bae75
RUN ID child_11: bc1734a332654a28a7aac37554f220cb
RUN ID child_12: 4ca965e3f37d49a4a274fde71c25bac3
RUN ID child2: 1e9da62d03ab49edb6185167cf8d9cb3


In [5]:
print('tracking uri:', mlflow.get_tracking_uri())
print('artifact uri:', mlflow.get_artifact_uri())

tracking uri: http://127.0.0.1:5000
artifact uri: /mlflow/nested_run_artifacts/a9e15fd54c5a4d2ba68a0b0e3531b24c/artifacts


In [6]:
mlflow.end_run()

# Otro ejemplo

In [7]:
def train_model(X, y, max_depth):
    # Crear el modelo de regresión logística
    model = DecisionTreeRegressor(max_depth=max_depth)

    # Entrenar el modelo
    model.fit(X, y)

    # Evaluar el modelo
    y_pred = model.predict(X)
    mae = np.mean(np.abs(y - y_pred))

    # Registrar métricas en MLflow
    mlflow.log_metric("MAE", mae)
    return model

In [8]:
# Cargar el conjunto de datos de Boston Housing
data = pd.read_csv("BostonHousing.csv")

# Separar las características y la variable objetivo
X = data.drop("medv", axis=1)
y = data["medv"]

In [9]:
mlflow.set_experiment("mlflow_tracking")

with mlflow.start_run(run_name="logging_artifacts16"):
    # Bucle sobre las penalty
    for max_depth in [2,3,4,5]:
        mlflow.autolog()
        # Experimento anidado para una configuración específica
        with mlflow.start_run(run_name=f"tree_{max_depth}", nested=True) as child:
            print("RUN ID:", child.info.run_id)
            # Entrenar el modelo
            model = train_model(X, y, max_depth)

2024/06/19 09:39:13 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


RUN ID: 5fdea689b67d46ceb8075dd46b09fb50


2024/06/19 09:39:27 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


RUN ID: 86000142a2574ba59eee4ef774aabf34


2024/06/19 09:40:22 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


RUN ID: 9f0bc25be1044aef954e41218004bdd1


2024/06/19 09:40:30 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


RUN ID: 6fa3038b5e68421a8947e0531aa64620




In [10]:
print('tracking uri:', mlflow.get_tracking_uri())
print('artifact uri:', mlflow.get_artifact_uri())

tracking uri: http://127.0.0.1:5000
artifact uri: s3://mlflow/3/746b1925e4474c9793099f0615ce3d06/artifacts


In [11]:
mlflow.end_run()

In [12]:
print('ok_')

ok_
