# Modelos supervisados avanzados

In [24]:
import pandas as pd
import numpy as np
import mlflow

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, StackingClassifier, BaggingClassifier, VotingClassifier
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, f1_score

In [3]:
df = pd.read_csv('../data/raw/hotel_bookings.csv')
FEATURES = ["lead_time", "stays_in_week_nights", "children", "adr", "booking_changes" ]
TARGET = 'is_canceled'
X = df[FEATURES]
y = df[TARGET]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
import dagshub
dagshub.init(repo_owner='kerasPro', repo_name='ML2_Clase', mlflow=True)
import mlflow

In [None]:
# Este solo crea el experimento si no existe
# mlflow.create_experiment("ML2 - Advanced_Supervised_Models")

In [None]:
# No es necesario si se usa dagshub.init
# mlflow.set_tracking_uri("https://dagshub.com/kerasPro/ML2_Clase.mlflow")

In [5]:
# Este crea y apunta al experimento si existe
mlflow.set_experiment("ML2 - Advanced_Supervised_Models")

<Experiment: artifact_location='mlflow-artifacts:/759c95e8db454838a96e8cc6ca031a93', creation_time=1760745689163, experiment_id='0', last_update_time=1760745689163, lifecycle_stage='active', name='ML2 - Advanced_Supervised_Models', tags={}>

In [6]:
mlflow.autolog()

2025/10/17 20:56:54 INFO mlflow.tracking.fluent: Autologging successfully enabled for lightgbm.
2025/10/17 20:56:55 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2025/10/17 20:56:55 INFO mlflow.tracking.fluent: Autologging successfully enabled for xgboost.


## Baseline

In [15]:
#mlflow.autolog()
with mlflow.start_run(run_name= "Baseline - Dummy Classifier") as run:
  algorithm = DummyClassifier(strategy='most_frequent')
  algorithm.fit(X_train, y_train)
  predictions = algorithm.predict(X_test)
  _accuracy_score = accuracy_score(y_test, predictions)
  _f1_score = f1_score(y_test, predictions)

  mlflow.log_metrics({
      "accuracy": _accuracy_score,
      "f1": _f1_score
  })


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


🏃 View run Baseline - Dummy Classifier at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0/runs/b36ef37938bb4993824736fd3d0729be
🧪 View experiment at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0


## Logistic Regressor

In [27]:
with mlflow.start_run(run_name="Logistic Regression") as run:
    algorithm = LogisticRegression(max_iter=1000)

    pipeline = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),
        ('regresion_logistica', algorithm)
    ])

    pipeline.fit(X_train, y_train)

    predictions = pipeline.predict(X_test)

    _accuracy_score = accuracy_score(y_test, predictions)
    _f1_score = f1_score(y_test, predictions)
    
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }
        
    )



🏃 View run Logistic Regression at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0/runs/779c98406b0c4b26934ae266552f206c
🧪 View experiment at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0


## Ensamble 1: Baggin

In [25]:
with mlflow.start_run(run_name="Bagging Classifier") as run:

    algorithm = BaggingClassifier()
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),
            ("bagging", algorithm)
        ]
    )
    pipeline.fit(X_train, y_train)

    predictions = pipeline.predict(X_test)

    _accuracy_score = accuracy_score(y_test, predictions)
    _f1_score = f1_score(y_test, predictions)
    
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }
        
    )



🏃 View run Bagging Classifier at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0/runs/86d027a9f042403aba5e19ddc15f02d4
🧪 View experiment at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0


## Ensamble 2: Random Forest

In [26]:
with mlflow.start_run(run_name="Random Forest Classifier") as run:

    algorithm = RandomForestClassifier()
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),
            ("random_forest", algorithm)
        ]
    )
    pipeline.fit(X_train, y_train)

    predictions = pipeline.predict(X_test)

    _accuracy_score = accuracy_score(y_test, predictions)
    _f1_score = f1_score(y_test, predictions)
    
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }
        
    )



🏃 View run Random Forest Classifier at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0/runs/d134f3e030bd4931a518b9b6505cb19c
🧪 View experiment at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0


## Ensamble 3: XGBosst

In [28]:
with mlflow.start_run(run_name="XGBoost Classifier") as run:

    algorithm = XGBClassifier()
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),
            ("xgboost", algorithm)
        ]
    )
    pipeline.fit(X_train, y_train)

    predictions = pipeline.predict(X_test)

    _accuracy_score = accuracy_score(y_test, predictions)
    _f1_score = f1_score(y_test, predictions)
    
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }
        
    )



🏃 View run XGBoost Classifier at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0/runs/b80e45978f6a4a42b1296a407336279f
🧪 View experiment at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0


## Ensamble 4: LightGBM

In [29]:
with mlflow.start_run(run_name="LightGBM Classifier") as run:

    algorithm = LGBMClassifier()
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),
            ("lightgbm", algorithm)
        ]
    )
    pipeline.fit(X_train, y_train)

    predictions = pipeline.predict(X_test)

    _accuracy_score = accuracy_score(y_test, predictions)
    _f1_score = f1_score(y_test, predictions)
    
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }
        
    )



[LightGBM] [Info] Number of positive: 35253, number of negative: 60259
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005307 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 561
[LightGBM] [Info] Number of data points in the train set: 95512, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.369095 -> initscore=-0.536101
[LightGBM] [Info] Start training from score -0.536101




🏃 View run LightGBM Classifier at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0/runs/28367589978347d5aa15bb1f2de80bb7
🧪 View experiment at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0


## Ensamble 5: CatBoost

In [30]:
with mlflow.start_run(run_name="CatBoost Classifier") as run:

    algorithm = CatBoostClassifier()
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),
            ("catboost", algorithm)
        ]
    )
    pipeline.fit(X_train, y_train)

    predictions = pipeline.predict(X_test)

    _accuracy_score = accuracy_score(y_test, predictions)
    _f1_score = f1_score(y_test, predictions)
    
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }
        
    )



Learning rate set to 0.07218
0:	learn: 0.6740801	total: 141ms	remaining: 2m 21s
1:	learn: 0.6580425	total: 161ms	remaining: 1m 20s
2:	learn: 0.6426875	total: 180ms	remaining: 59.8s
3:	learn: 0.6306332	total: 196ms	remaining: 48.8s
4:	learn: 0.6207402	total: 224ms	remaining: 44.7s
5:	learn: 0.6124347	total: 250ms	remaining: 41.5s
6:	learn: 0.6050446	total: 271ms	remaining: 38.4s
7:	learn: 0.5984623	total: 295ms	remaining: 36.6s
8:	learn: 0.5936365	total: 316ms	remaining: 34.8s
9:	learn: 0.5884607	total: 334ms	remaining: 33.1s
10:	learn: 0.5848249	total: 351ms	remaining: 31.5s
11:	learn: 0.5814009	total: 367ms	remaining: 30.2s
12:	learn: 0.5785191	total: 381ms	remaining: 29s
13:	learn: 0.5760991	total: 398ms	remaining: 28s
14:	learn: 0.5734632	total: 417ms	remaining: 27.4s
15:	learn: 0.5717640	total: 433ms	remaining: 26.6s
16:	learn: 0.5697388	total: 451ms	remaining: 26.1s
17:	learn: 0.5682169	total: 462ms	remaining: 25.2s
18:	learn: 0.5666814	total: 476ms	remaining: 24.6s
19:	learn: 0.5



🏃 View run CatBoost Classifier at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0/runs/0d07cb251cc74b1da787d89264c06ecc
🧪 View experiment at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0


## Ensamble 6: Voting

In [None]:
with mlflow.start_run(run_name="voting Classifier ") as run:

    algorithm1 = RandomForestClassifier()
    algorithm2 = BaggingClassifier()
    algorithm3 = CatBoostClassifier()
    voting_classifier = VotingClassifier(
        estimators=[
            ('random_forest', algorithm1),
            ('bagging', algorithm2),
            ('catboost', algorithm3)
        ],
        voting='hard' #mayoría de votos, 'soft' para probabilidades
    )
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),
            ("voting", voting_classifier)
        ]
    )
    pipeline.fit(X_train, y_train)

    predictions = pipeline.predict(X_test)

    _accuracy_score = accuracy_score(y_test, predictions)
    _f1_score = f1_score(y_test, predictions)
    
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }
        
    )

In [None]:
with mlflow.start_run(run_name="voting Classifier _ Probabilidades") as run:

    algorithm1 = RandomForestClassifier()
    algorithm2 = BaggingClassifier()
    algorithm3 = CatBoostClassifier()
    voting_classifier = VotingClassifier(
        estimators=[
            ('random_forest', algorithm1),
            ('bagging', algorithm2),
            ('catboost', algorithm3)
        ],
        voting='soft' 
    )
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),
            ("voting", voting_classifier)
        ]
    )
    pipeline.fit(X_train, y_train)

    predictions = pipeline.predict(X_test)

    _accuracy_score = accuracy_score(y_test, predictions)
    _f1_score = f1_score(y_test, predictions)
    
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }
        
    )

## Ensamble 7: Stacking

In [33]:
with mlflow.start_run(run_name="Stacking Classifier") as run:

    algorithm1 = RandomForestClassifier()
    algorithm2 = BaggingClassifier()
    algorithm3 = CatBoostClassifier()
    stacking_classifier = StackingClassifier(
        estimators=[
            ('random_forest', algorithm1),
            ('bagging', algorithm2),
            ('catboost', algorithm3)
        ],
        final_estimator=XGBClassifier()
    )
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),
            ("stacking", stacking_classifier)
        ]
    )
    pipeline.fit(X_train, y_train)

    predictions = pipeline.predict(X_test)

    _accuracy_score = accuracy_score(y_test, predictions)
    _f1_score = f1_score(y_test, predictions)

    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }

    )



Learning rate set to 0.07218
0:	learn: 0.6740801	total: 13.2ms	remaining: 13.2s
1:	learn: 0.6580425	total: 22.3ms	remaining: 11.1s
2:	learn: 0.6426875	total: 30.9ms	remaining: 10.3s
3:	learn: 0.6306332	total: 40.8ms	remaining: 10.2s
4:	learn: 0.6207402	total: 50.1ms	remaining: 9.97s
5:	learn: 0.6124347	total: 57.9ms	remaining: 9.59s
6:	learn: 0.6050446	total: 66.6ms	remaining: 9.45s
7:	learn: 0.5984623	total: 73.4ms	remaining: 9.1s
8:	learn: 0.5936365	total: 83.3ms	remaining: 9.17s
9:	learn: 0.5884607	total: 90.3ms	remaining: 8.94s
10:	learn: 0.5848249	total: 99.6ms	remaining: 8.95s
11:	learn: 0.5814009	total: 107ms	remaining: 8.78s
12:	learn: 0.5785191	total: 117ms	remaining: 8.92s
13:	learn: 0.5760991	total: 127ms	remaining: 8.93s
14:	learn: 0.5734632	total: 135ms	remaining: 8.89s
15:	learn: 0.5717640	total: 142ms	remaining: 8.73s
16:	learn: 0.5697388	total: 152ms	remaining: 8.81s
17:	learn: 0.5682169	total: 163ms	remaining: 8.87s
18:	learn: 0.5666814	total: 171ms	remaining: 8.83s
19



🏃 View run Stacking Classifier at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0/runs/9ce508566ee0449cba4036eb361bb553
🧪 View experiment at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0
