In [2]:
# Importación librerías
import pandas as pd
import numpy as np
from pycaret.regression import *
from pycaret.time_series import *
from prophet import Prophet
from scipy.stats import uniform
from mango import Tuner
import json
import tensorflow
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tfkeras.models import Sequential
from keras.layers import GRU, Dense, Dropout,BatchNormalization
import keras_tuner as kt
from keras.optimizers import Adam
from keras import backend as K
from keras.callbacks import EarlyStopping

In [None]:
import logging
logger = logging.getLogger('cmdstanpy')
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.CRITICAL)

In [None]:
# Lectura de datos
df = pd.read_excel("reporte_diario_campaña_limpio.xlsx").drop(columns="Unnamed: 0")

In [None]:
# Definición error 
def wape(y,y_pred,**kwargs):
    wape = np.sum(np.abs(y - y_pred)) / np.sum(y)
    return wape

In [None]:
# Entrenamiento Pycaret Regresión
def entrenamiento_pycaret_reg(datos_entrenamiento, datos_prueba,target):
    reg = RegressionExperiment()
    reg.setup(datos_entrenamiento, target=target, session_id=47, preprocess=False, verbose=False,
              fold=3,system_log=False,memory=False,data_split_shuffle=False,fold_strategy="timeseries"),
    reg.add_metric('wape', 'WAPE', wape, greater_is_better=False)
    best_reg = reg.compare_models(sort="WAPE", verbose=False, fold = 3)
    best_model_reg = reg.create_model(best_reg,fold=3,verbose = False)
    tuned_reg = reg.tune_model(best_model_reg,verbose=False, fold=3,optimize = "WAPE")
    pred_reg = reg.predict_model(tuned_reg, data=datos_prueba[['año', 'mes', "dia", 'dia_semana', 'state_holiday']])
    return pred_reg, best_reg,tuned_reg.get_params()

In [None]:
# Entrenamiento Pycaret Time series
def entrenamiento_pycaret_ts(datos_entrenamiento,datos_prueba,n_periodos,target,fold):
    ts= TSForecastingExperiment()
    ts.setup(datos_entrenamiento, target=target, session_id=47,fh=n_periodos,  verbose=False,fold_strategy="rolling",fold=fold)
    best_ts = ts.compare_models(sort="MAPE", verbose=False)
    best_model_ts = ts.create_model(best_ts,verbose = False)
    tuned_ts = ts.tune_model(best_model_ts,verbose=False,optimize = "MAPE")
    pred_ts = ts.predict_model(tuned_ts, X=datos_prueba[['año', 'mes', "dia", 'dia_semana', 'state_holiday']], fh=n_periodos, verbose=False)
    
    return pred_ts, best_ts,tuned_ts.get_params()

In [None]:
# Entrenamiento Prophet
def entrenamiento_prophet(datos, holidays, n_periodos):

    n = len(datos[:-n_periodos])
    n_train = int(n * 0.8)  
    train_df = datos[:n_train]
    val_df = datos[n_train:]
    test_df = datos[-n_periodos:]
    
    param_space = dict(
        growth=['linear', 'flat'],
        n_changepoints=range(0, 55, 5),
        changepoint_range=uniform(0.5, 0.5),
        yearly_seasonality=[True, False],
        weekly_seasonality=[True, False],
        daily_seasonality=[True, False],
        seasonality_mode=['additive', 'multiplicative'],
        seasonality_prior_scale=uniform(5.0, 15.0),
        changepoint_prior_scale=uniform(0.0, 0.1),
        interval_width=uniform(0.2, 0.8),
        uncertainty_samples=[500, 1000, 1500, 2000]
    )
    
    def objective_function(args_list, train_df, val_df):
        results = []
        
        for params in args_list:
            model = Prophet(holidays=holidays, **params)
            model.fit(train_df)
            future = model.make_future_dataframe(periods=len(val_df), freq='D')
            forecast = model.predict(future)
            predictions_tuned = forecast.tail(len(val_df))
            error = wape(val_df['y'], predictions_tuned['yhat'])   
            results.append(error)
            
        return results

    conf_Dict = dict()
    conf_Dict['initial_random'] = 10
    conf_Dict['num_iteration'] = 50

    tuner = Tuner(param_space, lambda x: objective_function(x, train_df, test_df), conf_Dict)
    results = tuner.minimize()

    model = Prophet(holidays=holidays, **results['best_params'])
    model.fit(train_df)
    future = model.make_future_dataframe(periods=len(test_df), freq='D')
    forecast = model.predict(future)
    
    return forecast['yhat'],results['best_params']


In [None]:
# Entrenamiento GRU
def entrenamiento_gru(data,target,linea):
    if target == 'interpolado_real_calls':
        features = ['año', 'mes', "dia", 'dia_semana', 'state_holiday']
    else: 
        features = ['año', 'mes', "dia", 'dia_semana', 'state_holiday','interpolado_real_calls']
    X = data[features]
    y = data[target]
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()

    def train_test_split_by_date(data, date_column, train_end_date, test_start_date, time_step):
        data[date_column] = pd.to_datetime(data[date_column])

        adjusted_test_start_date = pd.to_datetime(test_start_date) - pd.Timedelta(days=time_step)

        train_data = data[data[date_column] <= train_end_date]
        test_data = data[data[date_column] >= adjusted_test_start_date]
        
        return train_data, test_data
    train_data, test_data = train_test_split_by_date(data, 'fecha', '2023-12-31', '2024-01-01',14)

    X_train, y_train = train_data[features], train_data[target]
    X_test, y_test = test_data[features], test_data[target]

    X_train_scaled = scaler_X.fit_transform(X_train)
    y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1))

    X_test_scaled = scaler_X.transform(X_test)
    y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1))

    def create_sequences(X, y, time_steps=10):
        Xs, ys = [], []
        for i in range(len(X) - time_steps):
            Xs.append(X[i:(i + time_steps)])
            ys.append(y[i + time_steps])
        return np.array(Xs), np.array(ys)

    time_steps = 14
    X_train_seq, y_train_seq = create_sequences(X_train_scaled, y_train_scaled, time_steps)
    X_test_seq, y_test_seq = create_sequences(X_test_scaled, y_test_scaled, time_steps)
    def wape_metric(y_true, y_pred):
        return K.sum(K.abs(y_true - y_pred)) / K.sum(K.abs(y_true))
    def build_robust_gru_model(hp):
        model = Sequential()
        
        # Capas GRU
        for i in range(hp.Int('gru_layers', 1, 3)):
            model.add(GRU(units=hp.Int(f'gru_units_{i}', min_value=64, max_value=512, step=64),
                        return_sequences=(i < hp.Int('gru_layers', 1, 3) - 1),
                        recurrent_dropout=hp.Float(f'recurrent_dropout_{i}', min_value=0.0, max_value=0.3, step=0.1)))
            model.add(Dropout(rate=hp.Float(f'dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
        
        # Capa de normalización por lotes
        model.add(BatchNormalization())

        # Capas densas finales
        for i in range(hp.Int('dense_layers', 1, 3)):
            model.add(Dense(units=hp.Int(f'dense_units_{i}', min_value=32, max_value=256, step=32), activation='relu'))
            model.add(Dropout(rate=hp.Float(f'dense_dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))

        model.add(Dense(1))

        # Compilación del modelo
        model.compile(optimizer=Adam(hp.Float('learning_rate', min_value=1e-5, max_value=1e-2, sampling='LOG')),
                    loss='mse',  # Seguimos utilizando 'mse' para el entrenamiento
                    metrics=[wape_metric])  # Pero optimizamos usando WAPE
        
        return model

    tuner = kt.RandomSearch(
        build_robust_gru_model,
        objective=kt.Objective('val_wape_metric', direction='min'), 
        max_trials=15,  
        executions_per_trial=2, 
        seed = 47,
        directory=f'gru_tuning_{linea}_{target}',
        project_name=f'gru_tuning_timeseries_{linea}_{target}')

    early_stopping = EarlyStopping(
        monitor='val_wape_metric', 
        patience=7, 
        restore_best_weights=True,
        mode = "min" 
    )
    # Búsqueda de hiperparámetros
    tuner.search(
        X_train_seq, 
        y_train_seq, 
        epochs=30, 
        validation_split=0.2, 
        batch_size=32,
        callbacks=[early_stopping] )
    best_model = tuner.get_best_models(num_models=1)[0]

    best_model.fit(X_train_seq, y_train_seq, epochs=100, validation_split=0.2, batch_size=32)

    y_pred_gru_tuned = best_model.predict(X_test_seq)
    y_pred_gru_tuned_rescaled = scaler_y.inverse_transform(y_pred_gru_tuned)

    return y_pred_gru_tuned_rescaled


In [None]:
def validacion_multiple(dfs, target, linea):

    """
    Realiza una validación múltiple utilizando diferentes enfoques de predicción (Pycaret Regresión, Pycaret Series Temporales, Facebook Prophet y Promedio Móvil) 

    Args:
        dfs (pd.DataFrame): DataFrame con los datos históricos
        target (str): Nombre de la columna objetivo a predecir
        linea (str): Nombre de la línea específica a analizar

    Returns:
        dict: Un diccionario que contiene el error mínimo, el modelo que lo obtuvo, un DataFrame con los errores de cada modelo, 
              un DataFrame con las predicciones diarias y los mejores modelos ajustados por pycaret.
    """
    
    datos = dfs[dfs["linea"] == linea]
    datos = datos.sort_values(by=["linea", "fecha", "año", "mes", "dia_semana"])
    datos = datos.reset_index(drop=True)

    n_periodos=datos[datos['fecha'].dt.year == 2024].shape[0] # Datos del 2024
    
    datos_red = datos.copy()
    datos_prophet = datos[["fecha", target]].rename(columns={"fecha": 'ds', target: 'y'})
    state_holidays = datos[datos['state_holiday'] == 1]["fecha"].unique()
    state_holidays = pd.DataFrame({'ds': pd.to_datetime(state_holidays), 'holiday': 'state_holiday'})

    if target == "interpolado_real_calls":
        datos = datos[['año', 'mes', "dia", 'dia_semana', target, 'state_holiday']].dropna()
    else: 
        datos = datos[['año', 'mes', "dia", 'dia_semana', target, 'state_holiday',"interpolado_real_calls"]].dropna()
    datos_entrenamiento = datos[:-n_periodos]
    datos_prueba = datos[-n_periodos:]
    news = datos_prueba.copy()

    if linea == "linea_4":
        fold =2
    else:
        fold = 3
    # Entrenamiento con regresión
    pred_reg, mejor_modelo_reg, parametros_reg = entrenamiento_pycaret_reg(datos_entrenamiento, datos_prueba,target)
    news["pred_regr"] = pred_reg["prediction_label"]
    
    # Entrenamiento con Time Series
    pred_ts, mejor_modelo_ts, parametros_ts = entrenamiento_pycaret_ts(datos_entrenamiento,datos_prueba,n_periodos,target,fold)
    news["pred_ts"] = pred_ts['y_pred']
    
    # Promedio reg-ts
    news["pred_promedio"] = (news["pred_regr"] + news["pred_ts"]) / 2
    news = news.reset_index(drop=True)
    
    # Entrenamiento con Prophet
    pronostico, mejores_parametros_prophet= entrenamiento_prophet(datos_prophet, state_holidays, n_periodos)
    news["pred_fb"] = pronostico[-n_periodos:].reset_index(drop=True)

    # Entrenamiento con gru
    pred_gru = entrenamiento_gru(datos_red,target,linea)
    news["pred_gru"] = pred_gru


    news = pd.merge(news, df[['año', 'mes', "dia", "fecha", target]], how='left').dropna()
    news = news.sort_values(['año', 'mes', "dia", "fecha", target])
    
    # Eliminar valores negativos
    for i in range(8):
        min_target = datos_entrenamiento.groupby(["dia_semana"]).agg({target: "min"})[target][i]
        news.loc[(news["pred_ts"] < 0) & (news["dia_semana"] == i), "pred_ts"] = min_target
        news.loc[(news["pred_fb"] < 0) & (news["dia_semana"] == i), "pred_fb"] = min_target
        news.loc[(news["pred_regr"] < 0) & (news["dia_semana"] == i), "pred_regr"] = min_target
        news.loc[(news["pred_promedio"] < 0) & (news["dia_semana"] == i), "pred_promedio"] = min_target
        news.loc[(news["pred_gru"] < 0) & (news["dia_semana"] == i), "pred_gru"] = min_target
    

    news["linea"] = linea
    
    # Diccionario final
    dic_final = {
        "pycaret_reg": {"modelo": mejor_modelo_reg,"parametros":parametros_reg},
        "pycaret_ts": {"modelo": mejor_modelo_ts,"parametros":parametros_ts},
        "prophet": mejores_parametros_prophet
    }
    return news, dic_final



# CALLS

In [None]:
lineas = df["linea"].unique()
validacion_calls = []
diccionario_calls = {}

for linea in lineas:
    print(f"Procesando línea: {linea}")
    validacion, diccionario = validacion_multiple(df, "interpolado_real_calls", linea)
    validacion_calls.append(validacion)
    diccionario_calls[linea] = diccionario


In [None]:
validacion_diaria_calls = pd.concat(validacion_calls, ignore_index=True)


In [None]:
validacion_diaria_calls.to_excel("validacion_diaria_calls.xlsx")


In [None]:
def convertir_a_serializable(obj):
    if hasattr(obj, 'get_params'):
        return obj.get_params()
    return str(obj) 

In [None]:
diccionario_serializable = {k: convertir_a_serializable(v) for k, v in diccionario_calls.items()}

In [None]:
with open('diccionario_calls.txt', 'w') as file:
    json.dump(diccionario_serializable, file, indent=4)

## AHT

In [None]:
lineas = df["linea"].unique()
validaciones_aht = []
diccionarios_aht = {}

for linea in lineas:
    print(f"Procesando línea: {linea}")
    validacion_aht, diccionario_aht = validacion_multiple(df, "interpolado_real_aht", linea)
    validaciones_aht.append(validacion_aht)
    diccionarios_aht[linea] = diccionario_aht

In [None]:
validacion_diaria_aht = pd.concat(validaciones_aht, ignore_index=True)


In [None]:
diccionario_serializable_aht = {k: convertir_a_serializable(v) for k, v in diccionarios_aht.items()}

In [None]:
validacion_diaria_aht.to_excel("validacion_diaria_aht.xlsx")
with open('diccionario_aht.txt', 'w') as file:
    json.dump(diccionario_serializable_aht, file, indent=4)