In [None]:
# -*- coding: utf-8 -*-
# =============================================================================
# PRUEBA DEL MODELO PROPHET A NIVEL DE PA√çS
# =============================================================================
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from prophet import Prophet
import warnings

warnings.filterwarnings('ignore')

# ASUMIMOS QUE 'df_featured' y 'df_prophet_base' YA EXISTEN DEL SCRIPT ANTERIOR

# Funciones de m√©tricas y optimizaci√≥n de Prophet
def mape(y_true, y_pred, eps=1e-8):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    mask = y_true != 0
    if not np.any(mask): return 0.0
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

def smape(y_true, y_pred, eps=1e-8):
    return np.mean(2.0 * np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred) + eps)) * 100

def compute_metrics(y_true, y_pred):
    y_true_orig = np.expm1(y_true)
    y_pred_orig = np.expm1(y_pred)
    return {
        "MAE": mean_absolute_error(y_true_orig, y_pred_orig),
        "RMSE": np.sqrt(mean_squared_error(y_true_orig, y_pred_orig)),
        "R2": r2_score(y_true_orig, y_pred_orig),
        "MAPE": mape(y_true_orig, y_pred_orig),
        "SMAPE": smape(y_true_orig, y_pred_orig)
    }

def optimize_prophet_country(train_df, regressors):
    # Usamos una configuraci√≥n est√°ndar para Prophet, ya que la optimizaci√≥n es costosa por pa√≠s.
    # Se puede expandir con un grid de hiperpar√°metros si es necesario.
    m = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
    if regressors:
        for r in regressors:
            m.add_regressor(r)
    m.fit(train_df)
    return m

# =============================================================================
# PIPELINE DE PROPHET POR PA√çS CON VALIDACI√ìN CRUZADA
# =============================================================================
print("üöÄ Iniciando pipeline de Prophet a nivel de pa√≠s...")

n_splits = 5 # Usaremos 5 pliegues, pero ajustado a series m√°s cortas
unique_countries = df_featured['country'].unique()
all_country_results = []
regressors_prophet = [r for r in df_prophet_base.columns if r not in ['ds', 'y']]


# Bucle exterior: Itera sobre cada pa√≠s
for country in tqdm(unique_countries, desc="Procesando pa√≠ses"):
    
    df_country = df_prophet_base[df_featured['country'] == country].copy().reset_index(drop=True)
    
    # Se necesita un m√≠nimo de datos para una validaci√≥n cruzada de 5 pliegues
    if len(df_country) < n_splits + 1:
        print(f"‚ö†Ô∏è  Saltando {country}: datos insuficientes ({len(df_country)} registros).")
        continue

    # Bucle interior: Validaci√≥n cruzada para el pa√≠s actual
    tscv_country = TimeSeriesSplit(n_splits=n_splits)
    for fold, (train_idx, test_idx) in enumerate(tscv_country.split(df_country)):
        
        # 1. Divisi√≥n de datos para el fold actual
        prophet_train = df_country.iloc[train_idx]
        prophet_test = df_country.iloc[test_idx]

        if prophet_train.empty or prophet_test.empty:
            continue

        # 2. Preprocesamiento (Escalado de regresores) DENTRO del fold
        if regressors_prophet:
            scaler = StandardScaler()
            prophet_train[regressors_prophet] = scaler.fit_transform(prophet_train[regressors_prophet])
            prophet_test[regressors_prophet] = scaler.transform(prophet_test[regressors_prophet])
        
        # 3. Entrenamiento del modelo
        try:
            model = optimize_prophet_country(prophet_train, regressors_prophet)
            
            # 4. Predicci√≥n
            future_df = model.make_future_dataframe(periods=0) # Usamos solo las fechas de test
            future_df = prophet_test.drop(columns='y')
            forecast = model.predict(future_df)
            
            # 5. Evaluaci√≥n
            metrics = compute_metrics(prophet_test["y"].values, forecast["yhat"].values)
            metrics['country'] = country
            metrics['fold'] = fold + 1
            all_country_results.append(metrics)
        
        except Exception as e:
            print(f"‚ùå Error procesando {country} en fold {fold+1}: {e}")


# =============================================================================
# 8. RESUMEN DE RESULTADOS DE PROPHET POR PA√çS
# =============================================================================
if all_country_results:
    df_results_country = pd.DataFrame(all_country_results)

    print("\n\nüìä Resumen de Rendimiento de Prophet por Pa√≠s (media de m√©tricas):")
    summary_country = df_results_country.groupby("country")[["MAE", "RMSE", "R2"]].mean().sort_values("R2", ascending=False)
    print(summary_country)

    print("\n\nüìà Rendimiento Promedio General de Prophet (a trav√©s de todos los pa√≠ses):")
    print(summary_country.mean())

    # Visualizaci√≥n de la distribuci√≥n del R2 entre pa√≠ses
    plt.figure(figsize=(12, 8))
    sns.boxplot(x=df_results_country['R2'])
    plt.title('Distribuci√≥n del R¬≤ de Prophet entre todos los pa√≠ses')
    plt.xlabel('R¬≤ Score')
    plt.show()
else:
    print("\nNo se generaron resultados para analizar.")



SyntaxError: unmatched ']' (3113550249.py, line 136)