In [0]:
import mlflow
import mlflow.sklearn
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
import matplotlib.pyplot as plt

# Cargar datos
X, y = load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42
)

input_example = pd.DataFrame(X_test[:5], columns=load_diabetes().feature_names)
signature = mlflow.models.infer_signature(
    pd.DataFrame(X_train, columns=load_diabetes().feature_names),
    pd.Series(y_train)
)

with mlflow.start_run() as run:
    # Parámetros
    params = {"fit_intercept": True}
    #params = {"fit_intercept": False}
    mlflow.log_params(params)
    
    # Entrenamiento
    model = LinearRegression(**params)
    model.fit(X_train, y_train)
    
    # Predicción
    y_pred = model.predict(X_test)
    
    # Métricas
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    mlflow.log_metric("mse", mse)
    mlflow.log_metric("r2", r2)
    
    # Artefacto: gráfico de predicción vs real
    plt.figure()
    plt.scatter(y_test, y_pred)
    plt.xlabel("Real")
    plt.ylabel("Predicción")
    plt.title("Predicción vs Real")
    plt.savefig("/tmp/pred_vs_real.png")
    mlflow.log_artifact("/tmp/pred_vs_real.png")
    plt.close()
    
    # Guardar modelo con signature
    mlflow.sklearn.log_model(
        model,
        "model",
        signature=signature,
        input_example=input_example
    )
    
    # Guardar tabla de predicción vs real como csv y loggear como artifact
    pred_vs_real_df = pd.DataFrame({
        "Real": y_test,
        "Predicción": y_pred
    })
    pred_vs_real_df.to_csv("/tmp/pred_vs_real.csv", index=False)
    mlflow.log_artifact("/tmp/pred_vs_real.csv")
    
    # Evaluación con mlflow
    model_uri = f"runs:/{run.info.run_id}/model"
    eval_results = mlflow.evaluate(
        model=model_uri,
        data=pd.DataFrame(X_test, columns=load_diabetes().feature_names).assign(target=y_test),
        targets="target",
        model_type="regressor",
        evaluators=["default"]
    )

2025/11/07 16:15:43 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...


In [0]:
import mlflow
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
import pandas as pd

# Use absolute path for experiment name
experiment_name = "hyperparameter_tuning_example"

#id=mlflow.get_experiment_by_name(experiment_name)
mlflow.set_experiment(experiment_name="/Users/guillermo.henrion@gmail.com/hyperparameter_tuning_example")

# Load data
X, y = load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

param_grid = [
    {"fit_intercept": True},
    {"fit_intercept": False}
]

with mlflow.start_run() as parent_run:
    best_mse = float("inf")
    best_params = None
    results = []
    for i, params in enumerate(param_grid):
        with mlflow.start_run(run_name=f"child_run_{i}", nested=True):
            model = LinearRegression(**params)
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            mse = mean_squared_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
            mlflow.log_params(params)
            mlflow.log_metric("mse", mse)
            mlflow.log_metric("r2", r2)
            results.append({"params": params, "mse": mse, "r2": r2})
            if mse < best_mse:
                best_mse = mse
                best_params = params
    mlflow.log_metric("best_mse", best_mse)
    mlflow.log_params({"best_fit_intercept": best_params["fit_intercept"]})
    display(pd.DataFrame(results))

params,mse,r2
List(true),2900.1936284934827,0.4526027629719192
List(false),27961.75691676191,-4.277643647068198
