In [6]:
import pandas as pd
import numpy as np
from prophet import Prophet
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error
import lightgbm as lgb
from datetime import timedelta
from reportlab.lib.pagesizes import A4
from reportlab.pdfgen import canvas
from reportlab.platypus import Image as RLImage
import os
import matplotlib.pyplot as plt

# 1. Cargar y preparar datos
df = pd.read_csv("data/ocupacion_hotelera.csv")
df['fecha'] = pd.to_datetime(df['fecha'])
df = df.sort_values(['hotel_nombre', 'fecha'])

# 2. Crear y unir predicciones de Prophet por hotel
df['prophet_yhat'] = np.nan

for hotel in df['hotel_nombre'].unique():
    df_hotel = df[df['hotel_nombre'] == hotel][['fecha', 'reservas_confirmadas']].rename(columns={'fecha': 'ds', 'reservas_confirmadas': 'y'})
    if len(df_hotel) < 30:  # evitar errores con pocos datos
        continue

    modelo = Prophet(daily_seasonality=True, weekly_seasonality=True, yearly_seasonality=True)
    modelo.fit(df_hotel)

    future = modelo.make_future_dataframe(periods=0)
    forecast = modelo.predict(future)

    df.loc[df['hotel_nombre'] == hotel, 'prophet_yhat'] = forecast['yhat'].values

# 3. Eliminar filas sin predicción Prophet
df = df.dropna(subset=['prophet_yhat'])

# 4. Features adicionales
df['anio'] = df['fecha'].dt.year
df['mes'] = df['fecha'].dt.month
df['dia'] = df['fecha'].dt.day
df['dia_semana'] = df['fecha'].dt.dayofweek

le = LabelEncoder()
df['hotel_id'] = le.fit_transform(df['hotel_nombre'])

# Lags
df['reservas_lag1'] = df.groupby('hotel_id')['reservas_confirmadas'].shift(1)
df['cancelaciones_lag1'] = df.groupby('hotel_id')['cancelaciones'].shift(1)

# Festivos simulados
festivos = pd.to_datetime(['2019-01-01', '2019-12-25', '2020-01-01', '2020-12-25'])
df['es_festivo'] = df['fecha'].isin(festivos).astype(int)
df['es_fin_de_semana'] = df['dia_semana'].isin([5, 6]).astype(int)
df['hay_evento_local'] = df.groupby('hotel_id')['fecha'].transform(lambda x: (np.random.randint(0, 10, size=len(x)) == 0).astype(int))

# Feature interacción
df['precio_por_ocupacion'] = df['precio_promedio_noche'] / (df['tasa_ocupacion'] + 1)

# Eliminar NaNs
df = df.dropna()

# 5. Selección de features
features = [
    'anio', 'mes', 'dia', 'dia_semana', 'hotel_id',
    'tasa_ocupacion', 'precio_promedio_noche',
    'reservas_lag1', 'cancelaciones_lag1',
    'es_festivo', 'es_fin_de_semana', 'hay_evento_local',
    'precio_por_ocupacion', 'prophet_yhat'  # <-- aquí está la magia
]
X = df[features]
y = df[['reservas_confirmadas', 'cancelaciones']]

# 6. Entrenamiento
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

modelo = MultiOutputRegressor(
    lgb.LGBMRegressor(
        objective='regression',
        n_estimators=100,
        max_depth=5,
        learning_rate=0.1,
        random_state=42
    )
)

modelo.fit(X_train, y_train)

# 7. Evaluación
y_pred = modelo.predict(X_test)
rmse_reservas = np.sqrt(mean_squared_error(y_test['reservas_confirmadas'], y_pred[:, 0]))
rmse_cancelaciones = np.sqrt(mean_squared_error(y_test['cancelaciones'], y_pred[:, 1]))

print(f"Prophet + LightGBM RMSE Reservas: {rmse_reservas:.2f}")
print(f"Prophet + LightGBM RMSE Cancelaciones: {rmse_cancelaciones:.2f}")


08:53:25 - cmdstanpy - INFO - Chain [1] start processing
08:53:25 - cmdstanpy - INFO - Chain [1] done processing
08:53:25 - cmdstanpy - INFO - Chain [1] start processing
08:53:25 - cmdstanpy - INFO - Chain [1] done processing
08:53:26 - cmdstanpy - INFO - Chain [1] start processing
08:53:26 - cmdstanpy - INFO - Chain [1] done processing
08:53:26 - cmdstanpy - INFO - Chain [1] start processing
08:53:26 - cmdstanpy - INFO - Chain [1] done processing
08:53:26 - cmdstanpy - INFO - Chain [1] start processing
08:53:27 - cmdstanpy - INFO - Chain [1] done processing
08:53:27 - cmdstanpy - INFO - Chain [1] start processing
08:53:27 - cmdstanpy - INFO - Chain [1] done processing
08:53:28 - cmdstanpy - INFO - Chain [1] start processing
08:53:28 - cmdstanpy - INFO - Chain [1] done processing
08:53:28 - cmdstanpy - INFO - Chain [1] start processing
08:53:28 - cmdstanpy - INFO - Chain [1] done processing
08:53:29 - cmdstanpy - INFO - Chain [1] start processing
08:53:29 - cmdstanpy - INFO - Chain [1]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002144 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1273
[LightGBM] [Info] Number of data points in the train set: 43820, number of used features: 14
[LightGBM] [Info] Start training from score 223.392948
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002176 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1273
[LightGBM] [Info] Number of data points in the train set: 43820, number of used features: 14
[LightGBM] [Info] Start training from score 27.448631
Prophet + LightGBM RMSE Reservas: 57.67
Prophet + LightGBM RMSE Cancelaciones: 12.62


In [None]:
# Crear carpeta para los resultados
os.makedirs("predicciones_png", exist_ok=True)

# Cargar datos históricos
df_hist = df
df_hist['fecha'] = pd.to_datetime(df_hist['fecha'])
df_hist = df_hist.sort_values(['hotel_nombre', 'fecha'])

# Bucle por cada hotel
for hotel in df_hist['hotel_nombre'].unique():
    print(f"Procesando: {hotel}")

    df_hotel = df_hist[df_hist['hotel_nombre'] == hotel]
    if len(df_hotel) < 60:
        print(f"Saltado: {hotel} (datos insuficientes)")
        continue

    df_prophet = df_hotel[['fecha', 'reservas_confirmadas']].rename(columns={'fecha': 'ds', 'reservas_confirmadas': 'y'})
    modelo_prophet = Prophet(daily_seasonality=True)
    modelo_prophet.fit(df_prophet)

    future = modelo_prophet.make_future_dataframe(periods=30)
    forecast = modelo_prophet.predict(future)

    # Crear dataframe para LightGBM
    df_future = forecast[['ds', 'yhat']].rename(columns={'ds': 'fecha', 'yhat': 'prophet_yhat'})
    df_future = df_future[df_future['fecha'] > df_hotel['fecha'].max()].copy()
    df_future['hotel_nombre'] = hotel
    df_future['hotel_id'] = le.transform([hotel])[0]

    # Simular features
    df_future['anio'] = df_future['fecha'].dt.year
    df_future['mes'] = df_future['fecha'].dt.month
    df_future['dia'] = df_future['fecha'].dt.day
    df_future['dia_semana'] = df_future['fecha'].dt.dayofweek
    df_future['es_festivo'] = df_future['fecha'].dt.strftime("%m-%d").isin(["01-01", "12-25"]).astype(int)
    df_future['es_fin_de_semana'] = df_future['dia_semana'].isin([5, 6]).astype(int)
    df_future['hay_evento_local'] = np.random.choice([0, 1], size=len(df_future))

    # Agregar últimos valores
    last = df_hotel.iloc[-1]
    df_future['reservas_lag1'] = last['reservas_confirmadas']
    df_future['cancelaciones_lag1'] = last['cancelaciones']
    df_future['tasa_ocupacion'] = last['tasa_ocupacion']
    df_future['precio_promedio_noche'] = last['precio_promedio_noche']
    df_future['precio_por_ocupacion'] = df_future['precio_promedio_noche'] / (df_future['tasa_ocupacion'] + 1)

    # Features para LightGBM
    features = [
        'anio', 'mes', 'dia', 'dia_semana', 'hotel_id',
        'tasa_ocupacion', 'precio_promedio_noche',
        'reservas_lag1', 'cancelaciones_lag1',
        'es_festivo', 'es_fin_de_semana', 'hay_evento_local',
        'precio_por_ocupacion', 'prophet_yhat'
    ]

    # Predicción
    X_future = df_future[features]
    pred = modelo.predict(X_future)
    df_future['reservas_lightgbm'] = pred[:, 0]
    df_future['cancelaciones_lightgbm'] = pred[:, 1]

    # Guardar gráfico
    plt.figure(figsize=(10, 4))
    plt.plot(df_future['fecha'], df_future['prophet_yhat'], '--', label='Prophet (baseline)')
    plt.plot(df_future['fecha'], df_future['reservas_lightgbm'], label='LightGBM (reservas)', linewidth=2)
    plt.plot(df_future['fecha'], df_future['cancelaciones_lightgbm'], label='LightGBM (cancelaciones)', linewidth=2)
    plt.title(f"Predicción - {hotel}")
    plt.xlabel("Fecha")
    plt.ylabel("Cantidad")
    plt.legend()
    plt.tight_layout()

    plot_path = f"predicciones_png/{hotel}_plot.png"
    plt.savefig(plot_path)
    plt.close()

    # Crear PNG
    png_path = f"predicciones_png/hotel_{df_future['hotel_id'].iloc[0]}.pdf"
    c = canvas.Canvas(png_path, pagesize=A4)
    width, height = A4

    c.setFont("Helvetica-Bold", 16)
    c.drawString(50, height - 50, f"Predicción de demanda hotelera")
    c.setFont("Helvetica", 12)
    c.drawString(50, height - 75, f"Hotel: {hotel}")
    c.drawString(50, height - 95, f"Periodo: {df_future['fecha'].min().date()} a {df_future['fecha'].max().date()}")

    # Insertar imagen
    c.drawImage(plot_path, 50, height - 400, width=500, preserveAspectRatio=True)

    # Insertar tabla (resumen primeras 5 filas)
    y_start = height - 420
    c.setFont("Helvetica", 10)
    for i, row in df_future.head(5).iterrows():
        texto = f"{row['fecha'].date()} - Reservas: {int(row['reservas_lightgbm'])} - Cancelaciones: {int(row['cancelaciones_lightgbm'])}"
        c.drawString(50, y_start - (i * 15), texto)

    c.save()

print("PNGs generados en la carpeta: predicciones_pdfs")


09:00:04 - cmdstanpy - INFO - Chain [1] start processing


Procesando: Alletra Boutique Hotel


09:00:04 - cmdstanpy - INFO - Chain [1] done processing
09:00:05 - cmdstanpy - INFO - Chain [1] start processing


Procesando: Alletra Diamond Grand Hotel


09:00:05 - cmdstanpy - INFO - Chain [1] done processing
09:00:06 - cmdstanpy - INFO - Chain [1] start processing


Procesando: Alletra Haven


09:00:06 - cmdstanpy - INFO - Chain [1] done processing


Procesando: Alletra Resort


09:00:06 - cmdstanpy - INFO - Chain [1] start processing
09:00:06 - cmdstanpy - INFO - Chain [1] done processing
09:00:07 - cmdstanpy - INFO - Chain [1] start processing


Procesando: Apollo Diamond Suites


09:00:07 - cmdstanpy - INFO - Chain [1] done processing
09:00:08 - cmdstanpy - INFO - Chain [1] start processing


Procesando: Apollo Executive Beach Resort


09:00:08 - cmdstanpy - INFO - Chain [1] done processing
09:00:08 - cmdstanpy - INFO - Chain [1] start processing


Procesando: Apollo Resort & Spa


09:00:08 - cmdstanpy - INFO - Chain [1] done processing
09:00:09 - cmdstanpy - INFO - Chain [1] start processing


Procesando: Apollo Towers


09:00:09 - cmdstanpy - INFO - Chain [1] done processing
09:00:10 - cmdstanpy - INFO - Chain [1] start processing
09:00:10 - cmdstanpy - INFO - Chain [1] done processing


Procesando: Aruba Lodge


09:00:10 - cmdstanpy - INFO - Chain [1] start processing


Procesando: Aruba Luxury Lodge


09:00:10 - cmdstanpy - INFO - Chain [1] done processing
09:00:11 - cmdstanpy - INFO - Chain [1] start processing


Procesando: Cray Villas


09:00:11 - cmdstanpy - INFO - Chain [1] done processing
09:00:12 - cmdstanpy - INFO - Chain [1] start processing


Procesando: Ezmeral Grand Hotel


09:00:12 - cmdstanpy - INFO - Chain [1] done processing
09:00:12 - cmdstanpy - INFO - Chain [1] start processing


Procesando: GreenLake Digital Business Suites


09:00:13 - cmdstanpy - INFO - Chain [1] done processing
09:00:13 - cmdstanpy - INFO - Chain [1] start processing


Procesando: GreenLake Platinum Heritage Inn


09:00:13 - cmdstanpy - INFO - Chain [1] done processing
09:00:14 - cmdstanpy - INFO - Chain [1] start processing


Procesando: InfoSight Boutique Hotel


09:00:14 - cmdstanpy - INFO - Chain [1] done processing
09:00:15 - cmdstanpy - INFO - Chain [1] start processing


Procesando: Nimble Inn


09:00:15 - cmdstanpy - INFO - Chain [1] done processing
09:00:15 - cmdstanpy - INFO - Chain [1] start processing


Procesando: Pointnext Signature Residences & Suites


09:00:15 - cmdstanpy - INFO - Chain [1] done processing
09:00:16 - cmdstanpy - INFO - Chain [1] start processing


Procesando: Primera Grand


09:00:16 - cmdstanpy - INFO - Chain [1] done processing
09:00:17 - cmdstanpy - INFO - Chain [1] start processing


Procesando: ProLiant Haven


09:00:17 - cmdstanpy - INFO - Chain [1] done processing


Procesando: ProLiant Place


09:00:18 - cmdstanpy - INFO - Chain [1] start processing
09:00:18 - cmdstanpy - INFO - Chain [1] done processing
09:00:18 - cmdstanpy - INFO - Chain [1] start processing


Procesando: ProLiant Towers


09:00:18 - cmdstanpy - INFO - Chain [1] done processing
09:00:19 - cmdstanpy - INFO - Chain [1] start processing


Procesando: Simplivity Golden Plaza Hotel


09:00:19 - cmdstanpy - INFO - Chain [1] done processing
09:00:20 - cmdstanpy - INFO - Chain [1] start processing


Procesando: Synergy Golden Grand Hotel


09:00:20 - cmdstanpy - INFO - Chain [1] done processing
09:00:20 - cmdstanpy - INFO - Chain [1] start processing


Procesando: dHCI Executive Boutique Hotel


09:00:20 - cmdstanpy - INFO - Chain [1] done processing
09:00:21 - cmdstanpy - INFO - Chain [1] start processing


Procesando: dHCI Platinum Beach Resort


09:00:21 - cmdstanpy - INFO - Chain [1] done processing


PNGs generados en la carpeta: predicciones_pdfs
