In [6]:


import pandas as pd
import numpy as np
from datetime import timedelta
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
import keras_tuner as kt

AttributeError: partially initialized module 'pandas' has no attribute '_pandas_datetime_CAPI' (most likely due to a circular import)

In [None]:
# 1. Carga y parseo de fechas
df = pd.read_csv('/mnt/data/case_data_full.csv')
df['date'] = pd.to_datetime(df['date'])

# 2. Generación de ventanas (look-back=4) y target a 4 semanas
LOOK_BACK = 4
HORIZON = 4

# Crear lags de i_cases
group_cols = ['name','level','disease','classification']
df = df.sort_values(group_cols + ['date'])
for lag in range(1, LOOK_BACK + 1):
    df[f'lag_{lag}'] = df.groupby(group_cols)['i_cases'].shift(lag)

# Target: casos a HORIZON semanas
df['target'] = df.groupby(group_cols)['i_cases'].shift(-HORIZON)

# Eliminar filas con NaN en lags o target
cols_to_dropna = [f'lag_{i}' for i in range(1, LOOK_BACK+1)] + ['target']
df = df.dropna(subset=cols_to_dropna)

# 3. Hold‑out: último mes para validación
max_date = df['date'].max()
cutoff = max_date - pd.DateOffset(months=1)

train_df = df[df['date'] <= cutoff]
val_df   = df[df['date'] >  cutoff]

# 4. Preparar features y etiqueta
# Variables categóricas
cat_cols = ['name','level','disease','classification']
ohe = OneHotEncoder(sparse=False, handle_unknown='ignore')
ohe.fit(train_df[cat_cols])

def build_X(df_subset):
    # Lags
    lag_feats = df_subset[[f'lag_{i}' for i in range(1, LOOK_BACK+1)]].values
    # Categóricas
    cat_feats = ohe.transform(df_subset[cat_cols])
    # Estáticas numéricas
    static_feats = df_subset[['Population','incidence']].values
    # Concatenar todo
    return np.hstack([lag_feats, static_feats, cat_feats])

X_train = build_X(train_df)
X_val   = build_X(val_df)
y_train = train_df['target'].values
y_val   = val_df['target'].values

print("Shapes → X_train:", X_train.shape, "X_val:", X_val.shape)

# 5. Definir builder para Keras Tuner
def model_builder(hp):
    model = keras.Sequential()
    # Capa de entrada implícita

    # Número de capas ocultas: entre 1 y 3
    for i in range(hp.Int("num_layers", 1, 3)):
        units = hp.Int(f"units_{i}", min_value=16, max_value=128, step=16)
        model.add(keras.layers.Dense(units, activation="relu"))
        dropout_rate = hp.Float(f"dropout_{i}", 0.0, 0.5, step=0.1)
        if dropout_rate > 0:
            model.add(keras.layers.Dropout(dropout_rate))

    # Capa de salida (una predicción)
    model.add(keras.layers.Dense(1, activation="linear"))

    # Compilación
    lr = hp.Float("learning_rate", 1e-4, 1e-2, sampling="log")
    model.compile(
        optimizer=keras.optimizers.Adam(lr),
        loss="mse",
        metrics=[keras.metrics.MeanAbsolutePercentageError(name="mape")]
    )
    return model

# 6. Configurar Keras Tuner (Random Search)
tuner = kt.RandomSearch(
    model_builder,
    objective="val_mape",
    max_trials=20,
    executions_per_trial=1,
    directory="tuner_dir",
    project_name="ts_forecast"
)

# 7. Buscar la mejor arquitectura
stop_early = keras.callbacks.EarlyStopping(monitor="val_loss", patience=5)
tuner.search(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=32,
    callbacks=[stop_early],
    verbose=1
)

# 8. Resultados
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
print("Mejores hiperparámetros encontrados:")
for param, val in best_hp.values.items():
    print(f"  {param}: {val}")

best_model = tuner.get_best_models(num_models=1)[0]
print("\nEvaluación en validación:")
best_model.evaluate(X_val, y_val, verbose=2)
