### Imports for Model

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pylab as plt
from IPython import display
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import EarlyStopping
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay


import sys
FUENTES_DIR = "/lib"
sys.path.append(FUENTES_DIR)

DATOS_DIR = "data/"
import os
import requests
from datetime import date, datetime, timedelta

# Asegurar carpeta de datos
os.makedirs(DATOS_DIR, exist_ok=True)

# Parámetros de periodo: primer día de marzo 2025 hasta hoy
START_DATE = "2025-03-10"
END_DATE = date.today().isoformat()

# Ruta local para guardar el CSV crudo
raw_csv_path = os.path.join(DATOS_DIR, 'training.csv')

# URL remota a consultar
remote_url = f"http://localhost:8080/?start={START_DATE}&end={END_DATE}"

# Flags: forzar descarga o re-procesado (poner True para forzar)
FORCE_DOWNLOAD = False
FORCE_REPROCESS = False

if FORCE_DOWNLOAD and os.path.exists(raw_csv_path):
    os.remove(raw_csv_path)

print("remote_url: ", remote_url)

if not os.path.exists(raw_csv_path):
    print(f"Descargando datos de {remote_url} ...")
    try:
        resp = requests.get(remote_url, timeout=30)
        resp.raise_for_status()
        with open(raw_csv_path, 'wb') as f:
            f.write(resp.content)
        print(f"Guardado CSV en {raw_csv_path}")
    except Exception as e:
        print("Error descargando datos desde localhost:", e)
        raise
else:
    print(f"Archivo CSV crudo ya existe en {raw_csv_path}")

data = pd.read_csv(raw_csv_path)

2025-11-01 20:25:27.983067: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-01 20:25:28.007154: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


remote_url:  http://localhost:8080/?start=2025-03-10&end=2025-11-01
Archivo CSV crudo ya existe en data/training.csv


2025-11-01 20:25:28.499925: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


### Preprocessing 

In [2]:

from lib.preprocessing import preprocess_data

data = preprocess_data(data)

# Separar features y labels
X = data.drop(columns=["consumption"])
T = data["consumption"]

data['daylight'].value_counts()

0   2025-03-10 03:00:00+00:00
1   2025-03-10 03:05:00+00:00
2   2025-03-10 03:10:00+00:00
3   2025-03-10 03:15:00+00:00
4   2025-03-10 03:20:00+00:00
Name: date, dtype: datetime64[ns, UTC]
0   2025-03-10 09:45:51+00:00
1   2025-03-10 09:45:51+00:00
2   2025-03-10 09:45:51+00:00
3   2025-03-10 09:45:51+00:00
4   2025-03-10 09:45:51+00:00
Name: sunrise_dt, dtype: datetime64[ns, UTC]
0   2025-03-10 22:18:04+00:00
1   2025-03-10 22:18:04+00:00
2   2025-03-10 22:18:04+00:00
3   2025-03-10 22:18:04+00:00
4   2025-03-10 22:18:04+00:00
Name: sunset_dt, dtype: datetime64[ns, UTC]


daylight
1    31727
0    27912
Name: count, dtype: int64

In [3]:
from lib.normalization import normalize_data

X_train, X_test, T_train, T_test = normalize_data(X, T)

### Model

In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LeakyReLU
from tensorflow.keras import optimizers

# Modelo MLP con LeakyReLU y He initialization para capas ocultas
model = Sequential([
    Dense(64, kernel_initializer='he_normal', input_dim=X_train.shape[1]),
    LeakyReLU(alpha=0.1),
    Dense(32, kernel_initializer='he_normal'),
    LeakyReLU(alpha=0.1),
    Dense(16, kernel_initializer='he_normal'),
    LeakyReLU(alpha=0.1),
    Dense(1, activation='linear')  # salida lineal para regresión
])

optimizer = optimizers.Adam(learning_rate=0.001)

model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1762039528.873290  682746 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 7249 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3080, pci bus id: 0000:01:00.0, compute capability: 8.6


### Training

In [5]:
import tensorflow as tf

es = EarlyStopping(monitor='val_loss', patience=75, min_delta=0.0001)

if tf.config.list_physical_devices('GPU'):
  print("Entrenando en GPU.")
else:
  print("Advertencia: No se detectó GPU, se entrenará en CPU.")

history = model.fit(X_train, T_train,batch_size=32,
                    epochs=1000,
                    verbose=1,
                    validation_data=(X_test, T_test),callbacks=[es])

# Guardar modelo
model.save('models/energy_consumption_model.keras')

Entrenando en GPU.
Epoch 1/1000


2025-11-01 20:25:29.512617: I external/local_xla/xla/service/service.cc:163] XLA service 0x7f230800bf90 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-11-01 20:25:29.512628: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): NVIDIA GeForce RTX 3080, Compute Capability 8.6
2025-11-01 20:25:29.522408: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-11-01 20:25:29.573679: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91400


[1m 304/1491[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 497us/step - loss: 34278510.3289 - mae: 5679.9061

I0000 00:00:1762039530.056657  682878 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1491/1491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 6529594.0000 - mae: 1669.1589 - val_loss: 739359.2500 - val_mae: 687.7680
Epoch 2/1000
[1m1491/1491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 6529594.0000 - mae: 1669.1589 - val_loss: 739359.2500 - val_mae: 687.7680
Epoch 2/1000
[1m1491/1491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 610us/step - loss: 619249.7500 - mae: 630.0375 - val_loss: 540706.8125 - val_mae: 586.8127
Epoch 3/1000
[1m1491/1491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 610us/step - loss: 619249.7500 - mae: 630.0375 - val_loss: 540706.8125 - val_mae: 586.8127
Epoch 3/1000
[1m1491/1491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 610us/step - loss: 485608.5312 - mae: 555.4780 - val_loss: 452278.4688 - val_mae: 533.0674
Epoch 4/1000
[1m1491/1491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 610us/step - loss: 485608.5312 - mae: 555.4780 - val_loss: 452278.4688 -

### Evaluation

In [7]:
# Baseline tabular model: Gradient Boosting (sklearn)
from sklearn.experimental import enable_hist_gradient_boosting  # noqa: F401
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Notar: X_train/X_test ya están escalados; HGBR no lo requiere, pero no afecta
hgb = HistGradientBoostingRegressor(
    learning_rate=0.05,
    max_depth=None,
    max_iter=500,
    l2_regularization=0.0,
    early_stopping=True,
    validation_fraction=0.2,
    random_state=42,
)

hgb.fit(X_train, T_train)

pred_hgb = hgb.predict(X_test)
mae_hgb = mean_absolute_error(T_test, pred_hgb)
rmse_hgb = mean_squared_error(T_test, pred_hgb, squared=False)
r2_hgb = r2_score(T_test, pred_hgb)

print(f"HGBR -> MAE: {mae_hgb:.3f} | RMSE: {rmse_hgb:.3f} | R2: {r2_hgb:.3f}")

# Comparación rápida con el modelo Keras (si existe en memoria)
try:
    pred_nn = model.predict(X_test, verbose=0).reshape(-1)
    mae_nn = mean_absolute_error(T_test, pred_nn)
    rmse_nn = mean_squared_error(T_test, pred_nn, squared=False)
    r2_nn = r2_score(T_test, pred_nn)
    print(f"Keras -> MAE: {mae_nn:.3f} | RMSE: {rmse_nn:.3f} | R2: {r2_nn:.3f}")
except Exception as e:
    print("No se pudo evaluar el modelo Keras en esta celda:", e)

# Plot comparativo
import matplotlib.pyplot as plt
plt.figure(figsize=(6,6))
plt.scatter(T_test, pred_hgb, alpha=0.5, label='HGBR')
try:
    plt.scatter(T_test, pred_nn, alpha=0.5, label='Keras', s=16)
except:
    pass
plt.plot([min(T_test), max(T_test)], [min(T_test), max(T_test)], 'k--', lw=1)
plt.xlabel('Valor real')
plt.ylabel('Predicción')
plt.title('Comparación de modelos')
plt.legend()
plt.tight_layout()
plt.show()


TypeError: got an unexpected keyword argument 'squared'

In [8]:
pred_scaled = model.predict(X_test)
pred = pred_scaled
T_real = T_test

# Evaluación del modelo
loss, metric = model.evaluate(X_test, T_test, verbose=0)
print(f"Loss de test: {loss:.4f}, Métrica: {metric:.4f}")
# TODO: modificar esto
# Predicciones
y_pred = model.predict(X_test)

plt.scatter(T_real, pred, alpha=0.6)
plt.xlabel("Valor real")
plt.ylabel("Predicción")
plt.title("Predicciones vs Valores reales")
plt.show()

[1m373/373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 649us/step
[1m373/373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 649us/step


[1m373/373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 649us/step
[1m373/373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 649us/step


NameError: name 'y_scaler' is not defined

In [None]:
# Hardcoded inference example
import numpy as np
import pandas as pd
import os
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler

# Payload hardcodeado (proveído por el usuario)
payload = {
    "date": "2025-11-01T16:50Z",
    "temperature": 26.11,
    "humidity": 49,
    "rain": 0.0,
    "snow": 0.0,
    "pressure": 1013.0,
    "wind_speed": 2.47,
    "wind_direction": 358,
    "clouds": 100,
    "sunrise": 1761986859,
    "sunset": 1762035786,
    "working_day": False,
    "holiday": False,
}

# Construir DataFrame de una fila
row = {
    "date": [payload.get("date")],
    "temperature": [payload.get("temperature")],
    "humidity": [payload.get("humidity")],
    "rain": [payload.get("rain")],
    "snow": [payload.get("snow")],
    "pressure": [payload.get("pressure")],
    "wind_speed": [payload.get("wind_speed")],
    "wind_direction": [payload.get("wind_direction")],
    "clouds": [payload.get("clouds")],
    "sunrise": [int(payload.get("sunrise"))],
    "sunset": [int(payload.get("sunset"))],
    "working_day": [payload.get("working_day")],
    "holiday": [payload.get("holiday")],
}

df = pd.DataFrame(row)
# Aplicar preprocesamiento (usa la función definida en lib/preprocessing.py)
try:
    df_proc = preprocess_data(df)
except Exception as e:
    print("Error en preprocesamiento:", e)
    raise

# Preparar X para la inferencia
X_inf = df_proc.values

# Cargar modelo
model_path = os.path.join('models', 'energy_consumption_model.keras')
if not os.path.exists(model_path):
    print(f"Modelo no encontrado en {model_path}. Ejecuta la celda de training antes para crearlo.")
else:
    model = load_model(model_path)
    print("Modelo cargado.")

    # Escalado: si existe X_train en el kernel (ejecutaste entrenamiento), lo usamos para ajustar el scaler,
    # si no, escalamos localmente (advertencia: escalar con una sola muestra es poco fiable).
    try:
        scaler = StandardScaler()
        if 'X_train' in globals():
            print("Ajustando scaler sobre X_train existente en el notebook...")
            scaler.fit(X_train)
            X_scaled = scaler.transform(X_inf)
        else:
            print("No se encontró X_train en el entorno. Haciendo fit_transform sobre la muestra (precario).")
            X_scaled = scaler.fit_transform(X_inf)
    except Exception as e:
        print("Error al escalar features:", e)
        X_scaled = X_inf

    # Predicción (sin ninguna desnormalización)
    pred = model.predict(X_scaled)
    # Mostrar la predicción tal cual (sin inverse_transform)
    try:
        print(f"Predicción de consumo energético (sin desnormalizar): {pred.reshape(-1)[0]:.2f} unidades.")
    except Exception:
        print("Predicción de consumo energético (sin desnormalizar):", pred)


0   2025-11-01 16:50:00+00:00
Name: date, dtype: datetime64[ns, UTC]
0   2025-11-01 08:47:39+00:00
Name: sunrise_dt, dtype: datetime64[ns, UTC]
0   2025-11-01 22:23:06+00:00
Name: sunset_dt, dtype: datetime64[ns, UTC]
Modelo cargado.
Ajustando scaler sobre X_train existente en el notebook...
Error al escalar features: name 'X_inf' is not defined


NameError: name 'X_inf' is not defined