# Modelos supervisados avanzados

In [24]:
import pandas as pd
import numpy as np
import mlflow

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, StackingClassifier, BaggingClassifier, VotingClassifier
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, f1_score

In [3]:
df = pd.read_csv('../data/raw/hotel_bookings.csv')
FEATURES = ["lead_time", "stays_in_week_nights", "children", "adr", "booking_changes" ]
TARGET = 'is_canceled'
X = df[FEATURES]
y = df[TARGET]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
import dagshub
dagshub.init(repo_owner='kerasPro', repo_name='ML2_Clase', mlflow=True)
import mlflow

In [None]:
# Este solo crea el experimento si no existe
# mlflow.create_experiment("ML2 - Advanced_Supervised_Models")

In [None]:
# No es necesario si se usa dagshub.init
# mlflow.set_tracking_uri("https://dagshub.com/kerasPro/ML2_Clase.mlflow")

In [5]:
# Este crea y apunta al experimento si existe
mlflow.set_experiment("ML2 - Advanced_Supervised_Models")

<Experiment: artifact_location='mlflow-artifacts:/759c95e8db454838a96e8cc6ca031a93', creation_time=1760745689163, experiment_id='0', last_update_time=1760745689163, lifecycle_stage='active', name='ML2 - Advanced_Supervised_Models', tags={}>

In [6]:
mlflow.autolog()

2025/10/17 20:56:54 INFO mlflow.tracking.fluent: Autologging successfully enabled for lightgbm.
2025/10/17 20:56:55 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2025/10/17 20:56:55 INFO mlflow.tracking.fluent: Autologging successfully enabled for xgboost.


## Baseline

In [15]:
#mlflow.autolog()
with mlflow.start_run(run_name= "Baseline - Dummy Classifier") as run:
  algorithm = DummyClassifier(strategy='most_frequent')
  algorithm.fit(X_train, y_train)
  predictions = algorithm.predict(X_test)
  _accuracy_score = accuracy_score(y_test, predictions)
  _f1_score = f1_score(y_test, predictions)

  mlflow.log_metrics({
      "accuracy": _accuracy_score,
      "f1": _f1_score
  })


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


🏃 View run Baseline - Dummy Classifier at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0/runs/b36ef37938bb4993824736fd3d0729be
🧪 View experiment at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0


## Logistic Regressor

In [27]:
with mlflow.start_run(run_name="Logistic Regression") as run:
    algorithm = LogisticRegression(max_iter=1000)

    pipeline = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),
        ('regresion_logistica', algorithm)
    ])

    pipeline.fit(X_train, y_train)

    predictions = pipeline.predict(X_test)

    _accuracy_score = accuracy_score(y_test, predictions)
    _f1_score = f1_score(y_test, predictions)
    
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }
        
    )



🏃 View run Logistic Regression at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0/runs/779c98406b0c4b26934ae266552f206c
🧪 View experiment at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0


## Ensamble 1: Baggin

In [25]:
with mlflow.start_run(run_name="Bagging Classifier") as run:

    algorithm = BaggingClassifier()
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),
            ("bagging", algorithm)
        ]
    )
    pipeline.fit(X_train, y_train)

    predictions = pipeline.predict(X_test)

    _accuracy_score = accuracy_score(y_test, predictions)
    _f1_score = f1_score(y_test, predictions)
    
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }
        
    )



🏃 View run Bagging Classifier at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0/runs/86d027a9f042403aba5e19ddc15f02d4
🧪 View experiment at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0


## Ensamble 2: Random Forest

In [26]:
with mlflow.start_run(run_name="Random Forest Classifier") as run:

    algorithm = RandomForestClassifier()
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),
            ("random_forest", algorithm)
        ]
    )
    pipeline.fit(X_train, y_train)

    predictions = pipeline.predict(X_test)

    _accuracy_score = accuracy_score(y_test, predictions)
    _f1_score = f1_score(y_test, predictions)
    
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }
        
    )



🏃 View run Random Forest Classifier at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0/runs/d134f3e030bd4931a518b9b6505cb19c
🧪 View experiment at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0


## Ensamble 3: XGBosst

In [28]:
with mlflow.start_run(run_name="XGBoost Classifier") as run:

    algorithm = XGBClassifier()
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),
            ("xgboost", algorithm)
        ]
    )
    pipeline.fit(X_train, y_train)

    predictions = pipeline.predict(X_test)

    _accuracy_score = accuracy_score(y_test, predictions)
    _f1_score = f1_score(y_test, predictions)
    
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }
        
    )



🏃 View run XGBoost Classifier at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0/runs/b80e45978f6a4a42b1296a407336279f
🧪 View experiment at: https://dagshub.com/kerasPro/ML2_Clase.mlflow/#/experiments/0


## Ensamble 4: LightGBM

In [None]:
with mlflow.start_run(run_name="LightGBM Classifier") as run:

    algorithm = LGBMClassifier()
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),
            ("lightgbm", algorithm)
        ]
    )
    pipeline.fit(X_train, y_train)

    predictions = pipeline.predict(X_test)

    _accuracy_score = accuracy_score(y_test, predictions)
    _f1_score = f1_score(y_test, predictions)
    
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }
        
    )

## Ensamble 5: CatBoost

In [None]:
with mlflow.start_run(run_name="CatBoost Classifier") as run:

    algorithm = CatBoostClassifier()
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),
            ("catboost", algorithm)
        ]
    )
    pipeline.fit(X_train, y_train)

    predictions = pipeline.predict(X_test)

    _accuracy_score = accuracy_score(y_test, predictions)
    _f1_score = f1_score(y_test, predictions)
    
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }
        
    )

## Ensamble 6: Voting

In [None]:
with mlflow.start_run(run_name="voting Classifier ") as run:

    algorithm1 = RandomForestClassifier()
    algorithm2 = BaggingClassifier()
    algorithm3 = CatBoostClassifier()
    voting_classifier = VotingClassifier(
        estimators=[
            ('random_forest', algorithm1),
            ('bagging', algorithm2),
            ('catboost', algorithm3)
        ],
        voting='hard' #mayoría de votos, 'soft' para probabilidades
    )
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),
            ("voting", voting_classifier)
        ]
    )
    pipeline.fit(X_train, y_train)

    predictions = pipeline.predict(X_test)

    _accuracy_score = accuracy_score(y_test, predictions)
    _f1_score = f1_score(y_test, predictions)
    
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }
        
    )

In [None]:
with mlflow.start_run(run_name="voting Classifier _ Probabilidades") as run:

    algorithm1 = RandomForestClassifier()
    algorithm2 = BaggingClassifier()
    algorithm3 = CatBoostClassifier()
    voting_classifier = VotingClassifier(
        estimators=[
            ('random_forest', algorithm1),
            ('bagging', algorithm2),
            ('catboost', algorithm3)
        ],
        voting='soft' 
    )
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),
            ("voting", voting_classifier)
        ]
    )
    pipeline.fit(X_train, y_train)

    predictions = pipeline.predict(X_test)

    _accuracy_score = accuracy_score(y_test, predictions)
    _f1_score = f1_score(y_test, predictions)
    
    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }
        
    )

## Ensamble 7: Stacking

In [None]:
with mlflow.start_run(run_name="Stacking Classifier") as run:

    algorithm1 = RandomForestClassifier()
    algorithm2 = BaggingClassifier()
    algorithm3 = CatBoostClassifier()
    stacking_classifier = StackingClassifier(
        estimators=[
            ('random_forest', algorithm1),
            ('bagging', algorithm2),
            ('catboost', algorithm3)
        ],
        final_estimator=XGBClassifier()
    )
    pipeline = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="mean")),
            ("stacking", stacking_classifier)
        ]
    )
    pipeline.fit(X_train, y_train)

    predictions = pipeline.predict(X_test)

    _accuracy_score = accuracy_score(y_test, predictions)
    _f1_score = f1_score(y_test, predictions)

    mlflow.log_metrics(
        {
            "accuracy": _accuracy_score,
            "f1": _f1_score
        }

    )