# Tarea 5: Challenger experiments

Samantha Sánchez Tinoco

In [24]:
import os, mlflow
from dotenv import load_dotenv

load_dotenv(override=True)  # Carga las variables del archivo .env
EXPERIMENT_NAME = "/Users/samantha.sancheztin@gmail.com/nyc-taxi-experiments"

mlflow.set_tracking_uri("databricks")
mlflow.set_registry_uri("databricks")
experiment = mlflow.set_experiment(experiment_name=EXPERIMENT_NAME)

## 1. Lectura de datos

In [2]:
import pickle
import pandas as pd
from sklearn.metrics import  root_mean_squared_error
from sklearn.feature_extraction import  DictVectorizer

In [3]:
def read_dataframe(filename):

    df = pd.read_parquet(filename)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)

    return df

In [4]:
df_train = read_dataframe('../data/green_tripdata_2025-01.parquet')
df_val = read_dataframe('../data/green_tripdata_2025-02.parquet')

## 2. Pre-procesamiento

In [5]:
# Feature engineering
def create_features(df):
    df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID']
    categorical = ['PU_DO']
    numerical = ['trip_distance']
    return df[categorical + numerical].to_dict(orient='records')

In [6]:
def fit_transform_dict_vectorizer(train_dicts):
    dv = DictVectorizer()
    return dv, dv.fit_transform(train_dicts)

def transform_dict_vectorizer(df, dv):
    test_dicts = create_features(df)
    return dv.transform(test_dicts)

In [7]:
# Preprocesamiento sin data leakage
train_dicts = create_features(df_train)
dv, X_train = fit_transform_dict_vectorizer(train_dicts)

# Transformar datos de validación
X_val = transform_dict_vectorizer(df_val, dv)

In [8]:
# Extraer variable objetivo
target = 'duration'
y_train = df_train[target].values
y_val = df_val[target].values

In [9]:
# Crear datasets MLflow
training_dataset = mlflow.data.from_numpy(X_train.data, targets=y_train, name="green_tripdata_2025-01")
validation_dataset = mlflow.data.from_numpy(X_val.data, targets=y_val, name="green_tripdata_2025-02")

## 3. Modelos

### Gradient Boost

In [10]:
import math
import optuna
from sklearn.ensemble import GradientBoostingRegressor
from optuna.samplers import TPESampler
from mlflow.models.signature import infer_signature

In [11]:
def objective_gb(trial: optuna.trial.Trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 50, 500),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "min_samples_split": trial.suggest_int("min_samples_split", 2, 20),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "random_state": 42
    }

    with mlflow.start_run(nested=True):
        mlflow.set_tag("model_family", "gradient_boosting")
        mlflow.log_params(params)

        gb = GradientBoostingRegressor(**params)
        gb.fit(X_train, y_train)
        
        y_pred = gb.predict(X_val)
        rmse = root_mean_squared_error(y_val, y_pred)
        mlflow.log_metric("rmse", rmse)

        signature = infer_signature(X_val, y_pred)
        mlflow.sklearn.log_model(
            gb,
            "model",
            signature=signature
        )

    return rmse

In [12]:
# Parent run para Gradient Boosting
with mlflow.start_run(run_name="Gradient Boosting Parent Experiment"):
    mlflow.set_tag("model_type", "gradient_boosting")
    
    sampler = TPESampler(seed=42)
    study_gb = optuna.create_study(direction="minimize", sampler=sampler)
    study_gb.optimize(objective_gb, n_trials=10)
    
    mlflow.log_metric("best_rmse", study_gb.best_value)
    mlflow.log_params(study_gb.best_params)

    print("Best parameters:", study_gb.best_params)
    print("Best RMSE:", study_gb.best_value)

[I 2025-10-27 18:06:29,476] A new study created in memory with name: no-name-c358cf97-b770-4a14-a018-dcafe038a22e
[I 2025-10-27 18:08:24,966] Trial 0 finished with value: 5.366910042903661 and parameters: {'n_estimators': 218, 'learning_rate': 0.28570714885887566, 'max_depth': 10, 'min_samples_split': 13, 'min_samples_leaf': 2, 'subsample': 0.5779972601681014}. Best is trial 0 with value: 5.366910042903661.


🏃 View run stately-flea-395 at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878/runs/3aaac098ed9745ccb1f38c2c0fa71a5c
🧪 View experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878


[I 2025-10-27 18:08:47,390] Trial 1 finished with value: 5.4508625983409305 and parameters: {'n_estimators': 76, 'learning_rate': 0.2611910822747312, 'max_depth': 9, 'min_samples_split': 15, 'min_samples_leaf': 1, 'subsample': 0.9849549260809971}. Best is trial 0 with value: 5.366910042903661.


🏃 View run ambitious-koi-629 at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878/runs/77467358bdb047ada53475a9ee86a2cf
🧪 View experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878


[I 2025-10-27 18:09:25,336] Trial 2 finished with value: 5.437186040925057 and parameters: {'n_estimators': 425, 'learning_rate': 0.07157834209670008, 'max_depth': 4, 'min_samples_split': 5, 'min_samples_leaf': 4, 'subsample': 0.762378215816119}. Best is trial 0 with value: 5.366910042903661.


🏃 View run marvelous-snake-942 at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878/runs/6cec105ab27d44518f3951bf60c91670
🧪 View experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878


[I 2025-10-27 18:10:06,311] Trial 3 finished with value: 5.363054425677847 and parameters: {'n_estimators': 244, 'learning_rate': 0.09445645065743215, 'max_depth': 9, 'min_samples_split': 4, 'min_samples_leaf': 3, 'subsample': 0.6831809216468459}. Best is trial 3 with value: 5.363054425677847.


🏃 View run big-skink-927 at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878/runs/c15098854ac94b6a9ec5b9579480b3ad
🧪 View experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878


[I 2025-10-27 18:10:30,942] Trial 4 finished with value: 5.406105509343811 and parameters: {'n_estimators': 255, 'learning_rate': 0.23770102880397392, 'max_depth': 4, 'min_samples_split': 11, 'min_samples_leaf': 6, 'subsample': 0.5232252063599989}. Best is trial 3 with value: 5.363054425677847.


🏃 View run funny-perch-867 at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878/runs/e24071efe9ff482cb94f073be10da2c9
🧪 View experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878


[I 2025-10-27 18:11:04,455] Trial 5 finished with value: 5.53903019202587 and parameters: {'n_estimators': 324, 'learning_rate': 0.059451995869314544, 'max_depth': 3, 'min_samples_split': 20, 'min_samples_leaf': 10, 'subsample': 0.9041986740582306}. Best is trial 3 with value: 5.363054425677847.


🏃 View run colorful-bass-214 at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878/runs/86e33efb38a9408987d0f3e67946760e
🧪 View experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878


[I 2025-10-27 18:11:56,716] Trial 6 finished with value: 5.450772998600317 and parameters: {'n_estimators': 187, 'learning_rate': 0.03832491306185132, 'max_depth': 9, 'min_samples_split': 10, 'min_samples_leaf': 2, 'subsample': 0.7475884550556351}. Best is trial 3 with value: 5.363054425677847.


🏃 View run victorious-gnu-547 at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878/runs/6946d9823f3b4f8c95c95474c8409498
🧪 View experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878


[I 2025-10-27 18:12:22,418] Trial 7 finished with value: 5.457099498654661 and parameters: {'n_estimators': 65, 'learning_rate': 0.2737029166028468, 'max_depth': 5, 'min_samples_split': 14, 'min_samples_leaf': 4, 'subsample': 0.7600340105889054}. Best is trial 3 with value: 5.363054425677847.


🏃 View run bittersweet-hare-638 at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878/runs/f4076f5c74d04d039260a87da95c0ebc
🧪 View experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878


[I 2025-10-27 18:14:05,866] Trial 8 finished with value: 5.414639986899689 and parameters: {'n_estimators': 296, 'learning_rate': 0.06360779210240283, 'max_depth': 12, 'min_samples_split': 16, 'min_samples_leaf': 10, 'subsample': 0.9474136752138245}. Best is trial 3 with value: 5.363054425677847.


🏃 View run mercurial-shoat-454 at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878/runs/bccb2953291d4fb3aa6db95a17a64c9d
🧪 View experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878


[I 2025-10-27 18:14:33,945] Trial 9 finished with value: 5.477046205025457 and parameters: {'n_estimators': 319, 'learning_rate': 0.2773435281567039, 'max_depth': 3, 'min_samples_split': 5, 'min_samples_leaf': 1, 'subsample': 0.6626651653816322}. Best is trial 3 with value: 5.363054425677847.


🏃 View run crawling-gnat-841 at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878/runs/20e25a035901424ba786ec562973be78
🧪 View experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878
Best parameters: {'n_estimators': 244, 'learning_rate': 0.09445645065743215, 'max_depth': 9, 'min_samples_split': 4, 'min_samples_leaf': 3, 'subsample': 0.6831809216468459}
Best RMSE: 5.363054425677847
🏃 View run Gradient Boosting Parent Experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878/runs/4acbce7a71064144b563f824c680bdff
🧪 View experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878


### Random Forest

In [13]:
from sklearn.ensemble import RandomForestRegressor

def objective_rf(trial: optuna.trial.Trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 500),
        "max_depth": trial.suggest_int("max_depth", 5, 50),
        "min_samples_split": trial.suggest_int("min_samples_split", 2, 20),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
        "max_features": trial.suggest_categorical("max_features", ["sqrt", "log2", None]),
        "random_state": 42
    }

    with mlflow.start_run(nested=True):
        mlflow.set_tag("model_family", "random_forest")
        mlflow.log_params(params)

        rf = RandomForestRegressor(**params)
        rf.fit(X_train, y_train)
        
        y_pred = rf.predict(X_val)
        rmse = root_mean_squared_error(y_val, y_pred)
        mlflow.log_metric("rmse", rmse)

        signature = infer_signature(X_val, y_pred)
        mlflow.sklearn.log_model(
            rf,
            "model",
            signature=signature
        )

    return rmse

In [15]:
# Parent run de random forest
with mlflow.start_run(run_name="Random Forest Parent Experiment"):
    mlflow.set_tag("model_type", "random_forest")
    
    sampler = TPESampler(seed=42)
    study_rf = optuna.create_study(direction="minimize", sampler=sampler)
    study_rf.optimize(objective_rf, n_trials=10)
    
    mlflow.log_metric("best_rmse", study_rf.best_value)
    mlflow.log_params(study_rf.best_params)

    print("RF - Best parameters:", study_rf.best_params)
    print("RF - Best RMSE:", study_rf.best_value)


[I 2025-10-27 18:14:58,584] A new study created in memory with name: no-name-377efe12-b331-4496-a7a7-7799cb5dbdce
[I 2025-10-27 18:16:13,111] Trial 0 finished with value: 6.882101920264759 and parameters: {'n_estimators': 250, 'max_depth': 48, 'min_samples_split': 15, 'min_samples_leaf': 6, 'max_features': 'sqrt'}. Best is trial 0 with value: 6.882101920264759.


🏃 View run sneaky-calf-437 at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878/runs/54860bc2afd0461e8b32a11b54adaa6e
🧪 View experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878


[I 2025-10-27 18:17:38,159] Trial 1 finished with value: 7.395500849046491 and parameters: {'n_estimators': 447, 'max_depth': 32, 'min_samples_split': 15, 'min_samples_leaf': 1, 'max_features': 'sqrt'}. Best is trial 0 with value: 6.882101920264759.


🏃 View run blushing-dog-42 at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878/runs/e69f4de84979466eacb76f5ff02c97af
🧪 View experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878




🏃 View run incongruous-shad-828 at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878/runs/798f7183ad534c9b813eceb62f9974ec
🧪 View experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878


[I 2025-10-27 18:20:25,928] Trial 2 finished with value: 5.532191644130856 and parameters: {'n_estimators': 172, 'max_depth': 13, 'min_samples_split': 7, 'min_samples_leaf': 6, 'max_features': None}. Best is trial 2 with value: 5.532191644130856.


🏃 View run nebulous-robin-209 at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878/runs/f9e5980d00d8409fb1b6d903a240bc9c
🧪 View experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878


[I 2025-10-27 18:21:30,720] Trial 3 finished with value: 7.877918297156907 and parameters: {'n_estimators': 155, 'max_depth': 18, 'min_samples_split': 8, 'min_samples_leaf': 5, 'max_features': 'sqrt'}. Best is trial 2 with value: 5.532191644130856.


🏃 View run serious-sponge-836 at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878/runs/8e57f3253b9a4cc8818a9fceb123fcf9
🧪 View experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878


[I 2025-10-27 18:23:52,876] Trial 4 finished with value: 5.600834486465797 and parameters: {'n_estimators': 337, 'max_depth': 7, 'min_samples_split': 13, 'min_samples_leaf': 2, 'max_features': None}. Best is trial 2 with value: 5.532191644130856.


🏃 View run chill-snipe-633 at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878/runs/c126598eca3c4e33a6da843756ffc085
🧪 View experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878


[I 2025-10-27 18:34:56,632] Trial 5 finished with value: 5.543915375321774 and parameters: {'n_estimators': 424, 'max_depth': 19, 'min_samples_split': 3, 'min_samples_leaf': 7, 'max_features': None}. Best is trial 2 with value: 5.532191644130856.
[I 2025-10-27 18:41:11,846] Trial 6 finished with value: 5.5738298474509635 and parameters: {'n_estimators': 113, 'max_depth': 46, 'min_samples_split': 6, 'min_samples_leaf': 7, 'max_features': None}. Best is trial 2 with value: 5.532191644130856.


🏃 View run mysterious-pig-650 at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878/runs/4dffdbf80a7f411d90c86d57477be70c
🧪 View experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878


[I 2025-10-27 18:44:55,206] Trial 7 finished with value: 5.613139342322132 and parameters: {'n_estimators': 174, 'max_depth': 49, 'min_samples_split': 16, 'min_samples_leaf': 10, 'max_features': None}. Best is trial 2 with value: 5.532191644130856.


🏃 View run persistent-owl-962 at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878/runs/6fe1ed77fd174b379b11cd9b5ac3e514
🧪 View experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878




🏃 View run monumental-loon-389 at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878/runs/92a39c05471b461ab54097e04c85a203
🧪 View experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878


[I 2025-10-27 18:46:52,597] Trial 8 finished with value: 5.509241145526334 and parameters: {'n_estimators': 135, 'max_depth': 14, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_features': None}. Best is trial 8 with value: 5.509241145526334.
[I 2025-10-27 18:51:13,077] Trial 9 finished with value: 5.481313294776017 and parameters: {'n_estimators': 243, 'max_depth': 17, 'min_samples_split': 12, 'min_samples_leaf': 2, 'max_features': None}. Best is trial 9 with value: 5.481313294776017.


🏃 View run delicate-deer-446 at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878/runs/5566b145f1d74e97823c0a27b41c104f
🧪 View experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878
RF - Best parameters: {'n_estimators': 243, 'max_depth': 17, 'min_samples_split': 12, 'min_samples_leaf': 2, 'max_features': None}
RF - Best RMSE: 5.481313294776017
🏃 View run Random Forest Parent Experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878/runs/cd72374a48114ec8b968c209a11a943f
🧪 View experiment at: https://dbc-c669aeb4-7965.cloud.databricks.com/ml/experiments/873274240097878


## 4. Comparar ambos modelos y registrar challenger:

In [16]:
# Determinar mejor modelo
best_gb = study_gb.best_value
best_rf = study_rf.best_value

if best_gb <= best_rf:
    print("Challenger será Gradient Boosting")
    best_model_type = "gradient_boosting"
else:
    print("Challenger será Random Forest")
    best_model_type = "random_forest"

Challenger será Gradient Boosting


In [33]:
model_name = "workspace.default.nyc-taxi-model"

In [34]:
runs = mlflow.search_runs(
    experiment_names=[EXPERIMENT_NAME],
    order_by=["metrics.rmse ASC"],
    output_format="list"
)

# Obtener el mejor run
if len(runs) > 0:
    best_run = runs[0]
    print("Champion Run encontrado:")
    print(f"Run ID: {best_run.info.run_id}")
    print(f"RMSE: {best_run.data.metrics['rmse']}")
    print(f"Params: {best_run.data.params}")
else:
    print("No se encontraron runs con métrica RMSE.")

Champion Run encontrado:
Run ID: c15098854ac94b6a9ec5b9579480b3ad
RMSE: 5.363054425677847
Params: {'learning_rate': '0.09445645065743215', 'max_depth': '9', 'min_samples_leaf': '3', 'min_samples_split': '4', 'n_estimators': '244', 'random_state': '42', 'subsample': '0.6831809216468459'}


In [36]:
mlflow.set_registry_uri("databricks-uc")


result = mlflow.register_model(
    model_uri=f"runs:/{best_run.info.run_id}/model",
    name=model_name
)


Registered model 'workspace.default.nyc-taxi-model' already exists. Creating a new version of this model...


Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Created version '3' of model 'workspace.default.nyc-taxi-model'.


## 5. Evaluar Champion vs Challenger con los datos de marzo 2025

In [None]:
# Descarga Marzo 2025 (si no existe)
import urllib.request
os.makedirs("../data", exist_ok=True)
url = "https://nyc-tlc.s3.amazonaws.com/trip+data/green_tripdata_2025-03.parquet"
file_path = "../data/green_tripdata_2025-03.parquet"

if not os.path.exists(file_path):
    urllib.request.urlretrieve(url, file_path)

df_test = read_dataframe(file_path)
X_test = transform_dict_vectorizer(df_test, dv)
y_test = df_test[target].values


In [None]:
# Cargar modelos desde registry
import mlflow.pyfunc

def load_by_alias(alias):
    versions = mlflow.search_model_versions(f"name='{model_name}'")
    for v in versions:
        if v.tags.get("alias") == alias:
            return mlflow.pyfunc.load_model(f"models:/{model_name}/{v.version}")
    return None

model_champion = load_by_alias("champion")
model_challenger = load_by_alias("challenger")

y_pred_champion = model_champion.predict(X_test)
y_pred_challenger = model_challenger.predict(X_test)

rmse_champion = root_mean_squared_error(y_test, y_pred_champion)
rmse_challenger = root_mean_squared_error(y_test, y_pred_challenger)

print(f"Champion RMSE: {rmse_champion:.3f}")
print(f"Challenger RMSE: {rmse_challenger:.3f}")


### ¿Se debe de cambiar el champion?