In [1]:
# Importación librerías
import pandas as pd
import numpy as np
from prophet import Prophet
from sklearn import *
from sklearn.ensemble import *
import json
from pycaret.containers.models.time_series import BaseCdsDtForecaster
from sktime.transformations.series.summarize import WindowSummarizer
from sktime.forecasting.croston import Croston
from lightgbm import LGBMRegressor
from sktime.forecasting.arima import ARIMA, AutoARIMA
from scipy.stats import uniform
from mango import Tuner
from sktime.forecasting.base import ForecastingHorizon
from sklearn.linear_model import OrthogonalMatchingPursuit,LinearRegression
from catboost import CatBoostRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout,BatchNormalization
import keras_tuner as kt
from tensorflow. keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler


In [2]:
# Lectura de datos
df = pd.read_excel("reporte_diario_campaña_limpio.xlsx").drop(columns='Unnamed: 0')
festivos=pd.read_excel("prophet_festivos.xlsx")
entrenamiento_calls = pd.read_excel("validacion_diaria_calls.xlsx").drop(columns='Unnamed: 0')
entrenamiento_aht = pd.read_excel("validacion_diaria_aht.xlsx").drop(columns='Unnamed: 0')
with open('diccionario_calls.txt', 'r') as file:
    contenido = file.read()
diccionario_calls = json.loads(contenido)
for key in diccionario_calls:
    diccionario_calls[key] = eval(diccionario_calls[key])
with open('diccionario_aht.txt', 'r') as file:
    contenido = file.read()
diccionario_aht = json.loads(contenido)
for key in diccionario_aht:
    diccionario_aht[key] = eval(diccionario_aht[key])


In [3]:
# Definición error 
def wape(y,y_pred,**kwargs):
    wape = np.sum(np.abs(y - y_pred)) / np.sum(y)
    return wape

In [4]:
# Entrenamiento GRU
def entrenamiento_gru(data,target,linea,nuevos_periodos):
    if target == 'interpolado_real_calls':
        features = ['año', 'mes', "dia", 'dia_semana', 'state_holiday']
    else: 
        features = ['año', 'mes', "dia", 'dia_semana', 'state_holiday','interpolado_real_calls']
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()

    def train_test_split_by_date(data, date_column, train_end_date, test_start_date, time_step,target):
        data[date_column] = pd.to_datetime(data[date_column])

        adjusted_test_start_date = pd.to_datetime(test_start_date) - pd.Timedelta(days=time_step)

        if target == 'interpolado_real_calls':
            train_data = data[data[date_column] <= train_end_date][['año', 'mes', "dia", 'dia_semana', 'state_holiday',target]].dropna()
            test_data = data[data[date_column] >= adjusted_test_start_date][['año', 'mes', "dia", 'dia_semana', 'state_holiday',target]].dropna()
        else:
            train_data = data[data[date_column] <= train_end_date][['año', 'mes', "dia", 'dia_semana', 'state_holiday',target,'interpolado_real_calls']].dropna()
            test_data = data[data[date_column] >= adjusted_test_start_date][['año', 'mes', "dia", 'dia_semana', 'state_holiday',target,'interpolado_real_calls']].dropna()
        return train_data, test_data
    
    train_data, test_data = train_test_split_by_date(data, 'fecha', '2023-12-31', '2024-01-01',14,target)

    X_train, y_train = train_data[features], train_data[target]
    X_test, y_test = test_data[features], test_data[target]

    X_train_scaled = scaler_X.fit_transform(X_train)
    y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1))

    X_test_scaled = scaler_X.transform(X_test)
    y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1))

    def create_sequences(X, y, time_steps=10):
        Xs, ys = [], []
        for i in range(len(X) - time_steps):
            Xs.append(X[i:(i + time_steps)])
            ys.append(y[i + time_steps])
        return np.array(Xs), np.array(ys)

    time_steps = 14
    X_train_seq, y_train_seq = create_sequences(X_train_scaled, y_train_scaled, time_steps)
    X_test_seq, y_test_seq = create_sequences(X_test_scaled, y_test_scaled, time_steps)
    def wape_metric(y_true, y_pred):
        return K.sum(K.abs(y_true - y_pred)) / K.sum(K.abs(y_true))
    def build_robust_gru_model(hp):
        model = Sequential()
        
        # Capas GRU
        for i in range(hp.Int('gru_layers', 1, 3)):
            model.add(GRU(units=hp.Int(f'gru_units_{i}', min_value=64, max_value=512, step=64),
                        return_sequences=(i < hp.Int('gru_layers', 1, 3) - 1),
                        recurrent_dropout=hp.Float(f'recurrent_dropout_{i}', min_value=0.0, max_value=0.3, step=0.1)))
            model.add(Dropout(rate=hp.Float(f'dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))
        
        # Capa de normalización por lotes
        model.add(BatchNormalization())

        # Capas densas finales
        for i in range(hp.Int('dense_layers', 1, 3)):
            model.add(Dense(units=hp.Int(f'dense_units_{i}', min_value=32, max_value=256, step=32), activation='relu'))
            model.add(Dropout(rate=hp.Float(f'dense_dropout_{i}', min_value=0.1, max_value=0.5, step=0.1)))

        model.add(Dense(1))

        # Compilación del modelo
        model.compile(optimizer=Adam(hp.Float('learning_rate', min_value=1e-5, max_value=1e-2, sampling='LOG')),
                    loss='mse',  # Seguimos utilizando 'mse' para el entrenamiento
                    metrics=[wape_metric])  # Pero optimizamos usando WAPE
        
        return model

    tuner = kt.RandomSearch(
        build_robust_gru_model,
        objective=kt.Objective('val_wape_metric', direction='min'), 
        max_trials=10,  
        executions_per_trial=1, 
        seed = 47,
        directory=f'gru_tuning_{linea}_{target}',
        project_name=f'gru_tuning_timeseries_{linea}_{target}')

    early_stopping = EarlyStopping(
        monitor='val_wape_metric', 
        patience=7, 
        restore_best_weights=True,
        mode = "min" 
    )
    # Búsqueda de hiperparámetros
    tuner.search(
        X_train_seq, 
        y_train_seq, 
        epochs=20, 
        validation_split=0.2, 
        batch_size=32,
        callbacks=[early_stopping] )
    best_model = tuner.get_best_models(num_models=1)[0]

    best_model.fit(X_train_seq, y_train_seq, epochs=80, validation_split=0.2, batch_size=32)

    y_pred_gru_tuned = best_model.predict(nuevos_periodos)
    y_pred_gru_tuned_rescaled = scaler_y.inverse_transform(y_pred_gru_tuned)

    return y_pred_gru_tuned_rescaled


In [44]:
def nuevas_predicciones(mejor_modelo, df,linea,target,diccionario,festivos, llamadas_futuras):
    datos = df[df["linea"]== linea].reset_index(drop=True)
    datos_prophet = datos[["fecha", target]].rename(columns={"fecha": 'ds', target: 'y'})
    state_holidays = datos[datos['state_holiday'] == 1]["fecha"].unique()
    state_holidays = pd.DataFrame({'ds': pd.to_datetime(state_holidays), 'holiday': 'state_holiday'})
    n_fecha = (pd.Timestamp('2024-12-31') - pd.to_datetime(df["fecha"]).max()).days
    nuevos_periodos = pd.DataFrame({
        "fecha": pd.date_range(start=df["fecha"].max(), periods=n_fecha + 1, freq='D')
    })

    nuevos_periodos["año"] = nuevos_periodos["fecha"].dt.year
    nuevos_periodos["mes"] = nuevos_periodos["fecha"].dt.month
    nuevos_periodos["dia"] = nuevos_periodos["fecha"].dt.day
    nuevos_periodos["dia_semana"] = nuevos_periodos["fecha"].dt.weekday


    fes_filtrado = festivos[["fecha", "state_holiday"]]
    nuevos_periodos = pd.merge(nuevos_periodos, fes_filtrado, how="left", on="fecha")
    nuevos_periodos["state_holiday"].fillna(0, inplace=True)
    nuevos_periodos.loc[nuevos_periodos['state_holiday'] == 1, 'dia_semana'] = 7

    nuevos_periodos=nuevos_periodos.set_index(pd.RangeIndex(start=datos.index[-1]+1, stop=datos.index[-1]+1+len(nuevos_periodos), step=1))[["año","mes","dia","dia_semana",'state_holiday']]

    if target == "interpolado_real_aht":
        llamadas_futuras=llamadas_futuras[llamadas_futuras["linea"]== linea].reset_index(drop=True)
        nuevos_periodos["interpolado_real_calls"]=llamadas_futuras["interpolado_real_calls"].values

    nuevos_periodos = nuevos_periodos.sort_values(by=["año", "mes", "dia", "dia_semana"])

    

    if mejor_modelo == "pred_regr":
        modelo_dic= diccionario[linea]["pycaret_reg"]
        if'catboost' in str(modelo_dic["modelo"]):
            modelo = CatBoostRegressor()
        else: 
            modelo = modelo_dic["modelo"]
        modelo_reg =modelo.set_params(**modelo_dic["parametros"])
        if target == "interpolado_real_calls":
            datos = datos[['año', 'mes', "dia", 'dia_semana', 'state_holiday',target]].dropna()
            X, Y= datos[['año', 'mes', "dia", 'dia_semana', 'state_holiday']],datos[target]
        else:
            datos = datos[['año', 'mes', "dia", 'dia_semana', 'state_holiday',"interpolado_real_calls",target]].dropna()
            X, Y= datos[['año', 'mes', "dia", 'dia_semana', 'state_holiday',"interpolado_real_calls"]],datos[target]


        pred = modelo_reg.fit(X, Y).predict(nuevos_periodos)
        nuevos_periodos["prediccion"]=pred
    
    elif mejor_modelo =="pred_ts":
        modelo_dic= diccionario[linea]["pycaret_ts"]
        modelo =modelo_dic["modelo"].set_params(**modelo_dic["parametros"])
        if target == "interpolado_real_calls":
            datos = datos[['año', 'mes', "dia", 'dia_semana', 'state_holiday',target]].dropna()
            X, Y= datos[['año', 'mes', "dia", 'dia_semana', 'state_holiday']],datos[target]
        else:
            datos = datos[['año', 'mes', "dia", 'dia_semana', 'state_holiday',"interpolado_real_calls",target]].dropna()
            X, Y= datos[['año', 'mes', "dia", 'dia_semana', 'state_holiday',"interpolado_real_calls"]],datos[target]
        
        fh = ForecastingHorizon(range(1, len(nuevos_periodos)+1))
        pred = modelo.fit(X = X, y = Y).predict(X =nuevos_periodos,fh=fh)
        
        nuevos_periodos["prediccion"]=pred

    elif mejor_modelo== "pred_promedio":
        modelo_dic_reg= diccionario[linea]["pycaret_reg"]
        if'catboost' in str(modelo_dic_reg["modelo"]):
            modelo = CatBoostRegressor()
        else: 
            modelo = modelo_dic_reg["modelo"]
        modelo_reg =modelo.set_params(**modelo_dic_reg["parametros"])
        if target == "interpolado_real_calls":
            datos = datos[['año', 'mes', "dia", 'dia_semana', 'state_holiday',target]].dropna()
            X, Y= datos[['año', 'mes', "dia", 'dia_semana', 'state_holiday']],datos[target]
        else:
            datos = datos[['año', 'mes', "dia", 'dia_semana', 'state_holiday',"interpolado_real_calls",target]].dropna()
            X, Y= datos[['año', 'mes', "dia", 'dia_semana', 'state_holiday',"interpolado_real_calls"]],datos[target]

        pred_reg = modelo_reg.fit(X, Y).predict(nuevos_periodos)

        modelo_dic_ts= diccionario_calls["linea_1"]["pycaret_ts"]
        modelo_ts =modelo_dic_ts["modelo"].set_params(**modelo_dic_ts["parametros"])
        fh = ForecastingHorizon(range(1, len(nuevos_periodos)+1))
        pred_ts = modelo_ts.fit(X = X, y = Y).predict(X =nuevos_periodos,fh=fh)

        nuevos_periodos["prediccion"]=(pred_reg + pred_ts)/2

    elif mejor_modelo == "pred_fb":
        prophet_params= diccionario[linea]["prophet"]
        model = Prophet(holidays=state_holidays, **prophet_params)
        model.fit(datos_prophet)
        future = model.make_future_dataframe(periods=len(nuevos_periodos), freq='D')
        forecast = model.predict(future)
        nuevos_periodos["prediccion"]=forecast["yhat"][-len(nuevos_periodos):]

    elif mejor_modelo == "pred_gru":
        nuevos_periodos["prediccion"] = entrenamiento_gru(datos,target,linea,nuevos_periodos)
    
                
    if(linea == "lines_3"): 
        nuevos_periodos.loc[nuevos_periodos['dia_semana'] == 6, 'prediccion'] = 0
        
        
    nuevos_periodos["linea"]=linea

    return nuevos_periodos
    

## Calls

In [40]:
lineas = entrenamiento_calls['linea'].unique()
wape_metrics_calls = {}
pred_calls=pd.DataFrame()

for linea in lineas:
    print(linea)
    df_linea = entrenamiento_calls[entrenamiento_calls['linea'] == linea]
    y_true = df_linea['interpolado_real_calls']
    pred_columns = ['pred_ts', 'pred_regr', 'pred_promedio', 'pred_fb', 'pred_gru']

    wape_metrics_calls[linea] = {col: round(wape(y_true, df_linea[col]),2) for col in pred_columns}
    mejor_modelo = min(wape_metrics_calls[linea], key=wape_metrics_calls[linea].get)
    print(mejor_modelo)
    predicciones_calls=nuevas_predicciones(mejor_modelo, df,linea,"interpolado_real_calls",diccionario_calls,festivos,None)
    pred_calls=pd.concat([pred_calls,predicciones_calls])

linea_1
pred_ts
linea_2
pred_ts
linea_3
pred_regr
linea_4
pred_promedio
linea_5
pred_regr


In [41]:
pred_calls.to_excel("prediccion_calls.xlsx")

In [45]:
llamadas_futuras =pred_calls[["prediccion","linea"]].reset_index(drop=True)
llamadas_futuras.rename(columns={'prediccion': 'interpolado_real_calls'}, inplace=True)

lineas = entrenamiento_aht['linea'].unique()
wape_metrics_aht = {}
pred_aht=pd.DataFrame()

for linea in lineas:
    print(linea)
    df_linea = entrenamiento_aht[entrenamiento_aht['linea'] == linea]
    y_true = df_linea['interpolado_real_aht']
    pred_columns = ['pred_ts', 'pred_regr', 'pred_promedio', 'pred_fb', 'pred_gru']

    wape_metrics_aht[linea] = {col: round(wape(y_true, df_linea[col]),2) for col in pred_columns}
    mejor_modelo = min(wape_metrics_aht[linea], key=wape_metrics_aht[linea].get)
    print(mejor_modelo)
    predicciones_aht= nuevas_predicciones(mejor_modelo,df,linea,"interpolado_real_aht",diccionario_aht,festivos,llamadas_futuras)
    pred_aht=pd.concat([pred_aht,predicciones_aht])

linea_1
pred_promedio
linea_2
pred_regr
linea_3
pred_ts
linea_4
pred_fb


01:14:53 - cmdstanpy - INFO - Chain [1] start processing
01:14:53 - cmdstanpy - INFO - Chain [1] done processing


linea_5
pred_regr


In [46]:
pred_aht.to_excel("prediccion_aht.xlsx")