# Correlación Carga ↔ Temperatura ↔ Cortes (AMBA)

**Este notebook corre end-to-end con datos de ejemplo en `data/raw/`.**

In [None]:

# %pip install pandas numpy matplotlib scipy statsmodels requests
import os, warnings
from pathlib import Path
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import statsmodels.api as sm

warnings.filterwarnings("ignore")
TZ = "America/Argentina/Buenos_Aires"
BASE = Path().resolve()
DATA_RAW = BASE / "data" / "raw"
DATA_INTERIM = BASE / "data" / "interim"
DATA_PROCESSED = BASE / "data" / "processed"
OUTPUTS = BASE / "outputs"
for p in [DATA_RAW, DATA_INTERIM, DATA_PROCESSED, OUTPUTS]:
    p.mkdir(parents=True, exist_ok=True)
DATE_START = "2025-01-01"; DATE_END = "2025-03-31"
PATH_DEMANDA_HORARIA = DATA_RAW / "sample_demand_gba_hourly.csv"
PATH_TEMPERATURA_D   = DATA_RAW / "sample_temperature_daily.csv"
PATH_SAIDI_SAIFI     = DATA_RAW / "sample_enre_saidi_saifi.csv"
PATH_CORTES_D        = DATA_RAW / "sample_cortes_daily.csv"


In [None]:

# Carga de datos (demo)
dem_h = pd.read_csv(PATH_DEMANDA_HORARIA, parse_dates=["datetime"])
if dem_h["datetime"].dt.tz is None:
    dem_h["datetime"] = dem_h["datetime"].dt.tz_localize("UTC").dt.tz_convert(TZ)
dem_h = dem_h[(dem_h["datetime"]>=pd.Timestamp(DATE_START, tz=TZ)) & (dem_h["datetime"]<=pd.Timestamp(DATE_END, tz=TZ)+pd.Timedelta(days=1))]
temp_d = pd.read_csv(PATH_TEMPERATURA_D, parse_dates=["date"])
temp_d["date"] = temp_d["date"].dt.tz_localize(TZ).dt.normalize()
saifi = pd.read_csv(PATH_SAIDI_SAIFI, parse_dates=["semester_start","semester_end"])
cortes_d = pd.read_csv(PATH_CORTES_D, parse_dates=["date"])
cortes_d["date"] = cortes_d["date"].dt.tz_localize(TZ).dt.normalize()
display(dem_h.head()); display(temp_d.head()); display(saifi.head()); display(cortes_d.head())


In [None]:

# Feature engineering
dem_h["date"] = dem_h["datetime"].dt.normalize()
agg = dem_h.groupby(["date","region"])["demand_mw"].agg(demand_mean="mean", demand_max="max").reset_index()
temp_daily = temp_d.groupby("date")[["tmean","tmax","tmin"]].mean().reset_index()
df = agg.merge(temp_daily, on="date", how="left")
df["CDD_22"] = (df["tmean"] - 22).clip(lower=0)
df["HDD_18"] = (18 - df["tmean"]).clip(lower=0)
for k in range(1, 4):
    for col in ["tmean","tmax","tmin","CDD_22","HDD_18"]:
        df[f"{col}_lag{k}"] = df[col].shift(k)
display(df.head())


In [None]:

# Correlaciones
def corr_table(data: pd.DataFrame, y: str, xs: list) -> pd.DataFrame:
    rows = []
    sub = data.dropna(subset=[y]+xs)
    for x in xs:
        pear = stats.pearsonr(sub[x], sub[y])
        spear = stats.spearmanr(sub[x], sub[y])
        rows.append({
            "y": y, "x": x,
            "pearson_r": pear[0], "pearson_p": pear[1],
            "spearman_r": spear[0], "spearman_p": spear[1],
            "n": len(sub)
        })
    return pd.DataFrame(rows).sort_values(by="pearson_r", ascending=False)

X_COLS = ["tmean","tmax","tmin","CDD_22","HDD_18","tmean_lag1","tmean_lag2","tmean_lag3"]
ct = corr_table(df, "demand_mean", X_COLS)
ct.to_csv(OUTPUTS / "correlaciones_demanda_vs_temp.csv", index=False)
ct


In [None]:

# Plot
sub = df.dropna(subset=["demand_mean","tmean"])
plt.figure(figsize=(6,4))
plt.scatter(sub["tmean"], sub["demand_mean"])
plt.xlabel("Temperatura media diaria (°C)")
plt.ylabel("Demanda media diaria (MW)")
plt.title("Relación Demanda vs. Temperatura (GBA, diario)")
plt.grid(True, alpha=0.3)
plt.show()


In [None]:

# Modelo Poisson simple para cortes
temp_daily = temp_d.groupby("date")[["tmean","tmax","tmin"]].mean().reset_index()
cortes = cortes_d.merge(temp_daily, on="date", how="left").dropna(subset=["cortes_count","tmean"])
for k in range(1, 4):
    cortes[f"tmean_lag{k}"] = cortes.groupby("zona")["tmean"].shift(k)
model_data = cortes.dropna(subset=["tmean","tmean_lag1","cortes_count"]).copy()
X = sm.add_constant(model_data[["tmean","tmean_lag1"]]); y = model_data["cortes_count"]
poisson_model = sm.GLM(y, X, family=sm.families.Poisson())
poisson_res = poisson_model.fit()
print(poisson_res.summary())


In [None]:

# Guardados
df.to_csv(DATA_PROCESSED / "gba_daily_demand_temp.csv", index=False)
print("Guardado:", DATA_PROCESSED / "gba_daily_demand_temp.csv")
print("Guardado:", OUTPUTS / "correlaciones_demanda_vs_temp.csv")
