In [2]:
# === Celda 1: Imports y configuración ===
from pathlib import Path
import os, re, warnings
from datetime import datetime, timedelta
from io import BytesIO

import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.backends.backend_pdf import PdfPages

import seaborn as sns

# scikit-learn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# plotly
import plotly.express as px
import plotly.io as pio

warnings.filterwarnings("ignore")
matplotlib.rcParams["figure.dpi"] = 120


In [3]:
# === Celda 2: Rutas, mapeo y utilidades ===

# Detectar base según entorno: cluster o local
RUTA_BASE = Path(
    os.getenv("DATAAQUA_BASE", "/lustre/home/mvalenzuela/Workspace/DataAqua-dashboard")
)
if not RUTA_BASE.exists():
    # fallback si no está en el cluster, usar la carpeta actual (tu PC local)
    RUTA_BASE = Path(".").resolve()

# Carpeta de datos relativa al repo
RUTA_SALIDA_UNISON = RUTA_BASE / "data" / "Salidas_ETo12_con_uac_y_hh" / "Periodo de Cultivo ETo"

# Carpeta de salida de reportes PDF
RUTA_REPORTES = RUTA_BASE / "reports" / "modelos"
RUTA_REPORTES.mkdir(parents=True, exist_ok=True)

# Mapeo de columnas (igual al del dashboard, añadiendo UAC/HH)
MAP_UNISON = {
    "Año_ (YEAR)": "Year", "AÃ±o_ (YEAR)": "Year",
    "Día (DOY)": "DOY",   "DÃ­a (DOY)": "DOY",
    "Tmax (T2M_MAX)": "Tmax", "Tmin (T2M_MIN)": "Tmin",
    "HR (RH2M)": "HR", "Ux (WS2M)": "Ux",
    "Rs (ALLSKY_SFC_SW_DWN)": "Rs",
    "Rl_ (ALLSKY_SFC_LW_DWN)": "Rl",
    "Ptot_ (PRECTOTCORR)": "Ptot",
    "Pef_": "Pef", "Tmean_": "Tmean", "es_": "es", "ea_": "ea",
    "delta_": "delta", "P_": "P", "gamma_": "gamma",
    "Rns_": "Rns", "Rnl_": "Rnl", "Rn_": "Rn", "Rso_": "Rso",
    "Kc_": "Kc", "decada_": "decada",
    "ET0": "ET0", "ETc": "ETc", "ETverde": "ETverde", "ETazul": "ETazul",
    "Year": "Year", "DOY": "DOY", "Dia": "Dia",
    "UACverde_m3_ha": "UACverde_m3_ha",
    "UACazul_m3_ha": "UACazul_m3_ha",
    "HHverde_m3_ton": "HHverde_m3_ton",
    "HHazul_m3_ton": "HHazul_m3_ton",
}

COLUMNAS_NUM = [
    "Year","DOY","ET0","ETc","ETverde","ETazul","Pef","decada",
    "Rns","Rnl","Rs","Tmean","HR","Ux","Kc","Tmax","Tmin",
    "UACverde_m3_ha","UACazul_m3_ha","HHverde_m3_ton","HHazul_m3_ton"
]

def _year_doy_to_date(y, doy):
    try:
        base = datetime(int(y), 1, 1)
        return base + timedelta(days=int(doy) - 1)
    except Exception:
        return pd.NaT

def leer_y_normalizar(path: str) -> pd.DataFrame:
    p = Path(path)
    if not p.exists():
        return pd.DataFrame()

    last_err = None
    for enc in ("utf-8","latin-1"):
        try:
            df = pd.read_csv(p, encoding=enc)
            last_err = None
            break
        except UnicodeDecodeError as e:
            last_err = e
            continue
    if last_err is not None:
        df = pd.read_csv(p)

    df.columns = [c.strip() for c in df.columns]
    df = df.rename(columns=lambda c: MAP_UNISON.get(c, c))

    # si no existe 'Día' y tenemos DOY, crea 'Día' para compatibilidad con la libreta
    if "Día" not in df.columns and "DOY" in df.columns:
        df["Día"] = pd.to_numeric(df["DOY"], errors="coerce")

    for c in set(COLUMNAS_NUM).intersection(df.columns):
        df[c] = pd.to_numeric(df[c], errors="coerce")

    # Fecha y Día de ciclo (opcional)
    if {"Year","DOY"}.issubset(df.columns):
        fechas = [_year_doy_to_date(y,d) for y,d in zip(df["Year"], df["DOY"])]
        df["Fecha"] = pd.to_datetime(fechas)
        if df["Fecha"].notna().any():
            f0 = df["Fecha"].dropna().iloc[0]
            df["Dia_ciclo"] = (df["Fecha"] - f0).dt.days.astype("Int64")
        else:
            df["Dia_ciclo"] = pd.Series(pd.NA, index=df.index, dtype="Int64")
    else:
        df["Fecha"] = pd.NaT
        df["Dia_ciclo"] = pd.Series(pd.NA, index=df.index, dtype="Int64")

    return df

def parse_unison_filename(filename: str):
    m = re.match(r"([A-Za-zÁÉÍÓÚáéíóúñÑ\s]+)-FAO56-(\d{4})(?:-(\d{4}))?-SALIDA\.csv$", filename, re.I)
    if not m: return None, None
    reg, y1, y2 = m.groups()
    if reg == "VillaAllende": reg = "Villa de Allende"
    if reg == "Etchhojoa":    reg = "Etchojoa"
    ciclo = y1 if not y2 else f"{y1}-{y2}"
    return reg.strip(), ciclo

def construir_catalogo(base_dir: Path) -> pd.DataFrame:
    rows = []
    if not base_dir.exists():
        return pd.DataFrame(columns=["Region","Ciclo","Ruta"])
    for reg_folder in sorted(os.listdir(base_dir)):
        d = base_dir / reg_folder
        if not d.is_dir(): continue
        for f in sorted(os.listdir(d)):
            if not f.lower().endswith(".csv"): continue
            reg, ciclo = parse_unison_filename(f)
            if reg and ciclo:
                rows.append({"Region": reg, "Ciclo": ciclo, "Ruta": str(d / f)})
    return pd.DataFrame(rows).sort_values(["Region","Ciclo"]).reset_index(drop=True)

CAT = construir_catalogo(RUTA_SALIDA_UNISON)
display(CAT.head())


Unnamed: 0,Region,Ciclo,Ruta
0,Cajeme,2010-2011,/lustre/home/mvalenzuela/Workspace/DataAqua-da...
1,Cajeme,2011-2012,/lustre/home/mvalenzuela/Workspace/DataAqua-da...
2,Cajeme,2012-2013,/lustre/home/mvalenzuela/Workspace/DataAqua-da...
3,Cajeme,2013-2014,/lustre/home/mvalenzuela/Workspace/DataAqua-da...
4,Cajeme,2014-2015,/lustre/home/mvalenzuela/Workspace/DataAqua-da...


In [4]:
# === Celda 3: Capturador de figuras y helpers PDF ===

class FiguraCapture:
    """
    Captura TODAS las figuras nuevas creadas por matplotlib mientras se ejecuta el bloque del profesor.
    """
    def __enter__(self):
        self._before = set(plt.get_fignums())
        return self
    def __exit__(self, exc_type, exc, tb):
        self.figs = []
        after = set(plt.get_fignums())
        new_ids = sorted(list(after - self._before))
        for fid in new_ids:
            fig = plt.figure(fid)
            self.figs.append(fig)

def add_plotly_fig_as_matplotlib(fig):
    """
    Convierte un fig de Plotly a imagen (PNG) usando kaleido y lo mete en una Figure de matplotlib
    para poder guardarlo en el PDF.
    """
    buf = pio.to_image(fig, format="png", scale=2)  # requiere `kaleido`
    bio = BytesIO(buf)
    img = mpimg.imread(bio, format='png')
    mfig = plt.figure(figsize=(12,6))
    ax = mfig.add_subplot(111)
    ax.imshow(img)
    ax.axis('off')
    return mfig

def pagina_portada(pdf: PdfPages, region: str, ciclos: list):
    fig = plt.figure(figsize=(12,7)); plt.axis('off')
    titulo = f"Reporte de Modelos — {region}"
    subt = "Ciclos incluidos: " + ", ".join(ciclos)
    plt.text(0.5, 0.65, titulo, ha='center', va='center', fontsize=24, weight='bold')
    plt.text(0.5, 0.45, subt,   ha='center', va='center', fontsize=12)
    plt.text(0.5, 0.15, f"Generado: {datetime.now().strftime('%Y-%m-%d %H:%M')}",
             ha='center', va='center', fontsize=10, alpha=0.7)
    pdf.savefig(fig); plt.close(fig)

def kpis_basicos(df: pd.DataFrame) -> dict:
    mask = df["ETc"].notna() if "ETc" in df else pd.Series(False, index=df.index)
    dias = int(mask.sum())
    etc_total = float(df.loc[mask, "ETc"].sum())     if "ETc"     in df else np.nan
    etv_total = float(df.loc[mask, "ETverde"].sum()) if "ETverde" in df else np.nan
    eta_total = float(df.loc[mask, "ETazul"].sum())  if "ETazul"  in df else np.nan
    tmax = float(df["Tmax"].max()) if "Tmax" in df else np.nan
    tmin = float(df["Tmin"].min()) if "Tmin" in df else np.nan
    return {"dias":dias,"etc_total":etc_total,"etv_total":etv_total,"eta_total":eta_total,"tmax":tmax,"tmin":tmin}

def pagina_kpis(pdf: PdfPages, region: str, ciclo: str, df: pd.DataFrame):
    k = kpis_basicos(df)
    fig, ax = plt.subplots(figsize=(12,6)); ax.axis('off')
    ax.text(0.02, 0.95, f"{region} — {ciclo}", fontsize=16, weight='bold')
    lines = [
        f"Días del ciclo: {k['dias']}",
        f"ETc total [mm]: {k['etc_total']:.1f}" if not np.isnan(k['etc_total']) else "ETc total: —",
        f"ETverde total [mm]: {k['etv_total']:.1f}" if not np.isnan(k['etv_total']) else "ETverde total: —",
        f"ETazul total [mm]: {k['eta_total']:.1f}" if not np.isnan(k['eta_total']) else "ETazul total: —",
        f"Tmax / Tmin [°C]: {k['tmax']:.1f} / {k['tmin']:.1f}" if not np.isnan(k['tmax']) else "Tmax/Tmin: —",
    ]
    for i, L in enumerate(lines):
        ax.text(0.05, 0.8 - 0.1*i, L, fontsize=12)
    pdf.savefig(fig); plt.close(fig)

def anexar_figuras(pdf: PdfPages, figs: list, titulo_encabezado: str = None):
    if titulo_encabezado:
        fig = plt.figure(figsize=(12,1.2)); plt.axis('off')
        plt.text(0.02, 0.5, titulo_encabezado, va='center', fontsize=14, weight='bold')
        pdf.savefig(fig); plt.close(fig)
    for f in figs:
        pdf.savefig(f); plt.close(f)


In [5]:
# === Celda 4: BLOQUE DEL PROFESOR (integrado) ===
def correr_bloques_profesor(df: pd.DataFrame, region: str, ciclo: str):
    """
    Replicamos la libreta del profesor, usando 'df' ya cargado/normalizado.
    Se generan las mismas figuras. Las de Plotly se exportan a imagen para meterlas al PDF.
    """
    
    logs = []   # <--- agrega esta línea

    # ---------- Correlación ----------
    variables = ['Tmax', 'Tmin', 'Tmean', 'HR', 'Ux', 'Rs', 'ET0', 'ETc']
    df_selected = df[[v for v in variables if v in df.columns]].dropna()
    if not df_selected.empty and df_selected.shape[1] >= 2:
        corr_matrix = df_selected.corr()
        plt.figure(figsize=(8, 6))
        mask = np.triu(np.ones_like(corr_matrix))
        sns.heatmap(corr_matrix, annot=True, cmap='viridis', fmt=".2f", square=True, mask=mask)
        plt.title("Matriz de Correlación entre Variables Meteorológicas")
        plt.tight_layout()

    # ---------- Dispersión (Tmax, Rs, HR, Ux) vs ET0; ET0 vs ETc ----------
    variables_disp = [c for c in ['Tmax','HR','Ux','Rs','ET0','ETc'] if c in df.columns]
    if all(c in df.columns for c in ['Tmax','HR','Ux','Rs','ET0']) or 'ETc' in df.columns:
        fig, axes = plt.subplots(2, 3, figsize=(18, 10))
        axes = np.array(axes)
        # guardas para no explotar si falta algo
        try:
            if 'Tmax' in df and 'ET0' in df:
                sns.scatterplot(x='Tmax', y='ET0', data=df, ax=axes[0, 0], color='blue')
                axes[0, 0].set_title('Tmax vs ET0')
            if 'Rs' in df and 'ET0' in df:
                sns.scatterplot(x='Rs', y='ET0', data=df, ax=axes[0, 1], color='green')
                axes[0, 1].set_title('Rs vs ET0')
            if 'HR' in df and 'ET0' in df:
                sns.scatterplot(x='HR', y='ET0', data=df, ax=axes[0, 2], color='red')
                axes[0, 2].set_title('HR vs ET0')
            if 'Ux' in df and 'ET0' in df:
                sns.scatterplot(x='Ux', y='ET0', data=df, ax=axes[1, 0], color='purple')
                axes[1, 0].set_title('Ux vs ET0')
            if 'ET0' in df and 'ETc' in df:
                sns.scatterplot(x='ET0', y='ETc', data=df, ax=axes[1, 1], color='orange')
                axes[1, 1].set_title('ET0 vs ETc')
            # elimina el último si sobra
            try:
                fig.delaxes(axes[1, 2])
            except Exception:
                pass
            fig.suptitle(f"Dispersión — {region} ({ciclo})")
            fig.tight_layout()
        except Exception:
            plt.close(fig)

    # ---------- Regresión Lineal para ET0 ----------
    features = [c for c in ['Tmax','Tmin','HR','Ux','Rs'] if c in df.columns]
    target = 'ET0'
    if target in df.columns and len(features) >= 2:
        df_model = df[features + [target]].dropna()
        if not df_model.empty:
            X = df_model[features]
            y = df_model[target]
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

            model = LinearRegression()
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)

            r2 = r2_score(y_test, y_pred)
            mse = mean_squared_error(y_test, y_pred)
            #print(f"[{region} {ciclo}] Regresión Lineal ET0 — R²: {r2:.4f}  MSE: {mse:.4f}")
            
            logs.append(f"[{region} {ciclo}] Regresión Lineal ET0 — R²: {r2:.4f}  MSE: {mse:.4f}")

            plt.figure(figsize=(8, 6))
            sns.scatterplot(x=y_test, y=y_pred)
            lim_min = min(y_test.min(), y_pred.min())
            lim_max = max(y_test.max(), y_pred.max())
            plt.plot([lim_min, lim_max], [lim_min, lim_max], 'r--')
            plt.xlabel('ET0 Real')
            plt.ylabel('ET0 Predicho')
            plt.title(f"Comparación entre ET0 Real y Predicho — {region} ({ciclo})")
            plt.tight_layout()

    # ---------- Random Forest para ET0 ----------
    if target in df.columns and len(features) >= 2:
        df_rf = df[features + [target]].dropna()
        if not df_rf.empty:
            X = df_rf[features]
            y = df_rf[target]
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

            rf = RandomForestRegressor(n_estimators=100, random_state=42)
            rf.fit(X_train, y_train)
            y_pred = rf.predict(X_test)

            r2_rf = r2_score(y_test, y_pred)
            mse_rf = mean_squared_error(y_test, y_pred)
            #print(f"[{region} {ciclo}] Random Forest ET0 — R²: {r2_rf:.4f}  MSE: {mse_rf:.4f}")
            #print(f"Importancias: {dict(zip(features, rf.feature_importances_))}")

            logs.append(f"[{region} {ciclo}] Random Forest ET0 — R²: {r2_rf:.4f}  MSE: {mse_rf:.4f}")
            logs.append(f"Importancias: {dict(zip(features, rf.feature_importances_))}")

            # Importancias (barras)
            imp_series = pd.Series(rf.feature_importances_, index=features).sort_values(ascending=False)
            plt.figure(figsize=(8,4))
            imp_series.plot(kind='bar')
            plt.title(f"Importancia de variables (RF) — {region} ({ciclo})")
            plt.ylabel("Importancia")
            plt.tight_layout()

    # ---------- Agrupamientos (KMeans) con meteo ----------
    meteo_cols = [c for c in ['Tmax','Tmin','HR','Ux','Rs'] if c in df.columns]
    Xmet = df[meteo_cols].dropna() if meteo_cols else pd.DataFrame()
    if not Xmet.empty and Xmet.shape[1] >= 2:
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(Xmet)

        # método del codo
        inertia = []
        ks = list(range(2, 10))
        for k in ks:
            km = KMeans(n_clusters=k, random_state=42)
            km.fit(X_scaled)
            inertia.append(km.inertia_)
        plt.figure(figsize=(7,4))
        plt.plot(ks, inertia, marker='o')
        plt.title("Método del codo")
        plt.xlabel("Número de clústeres")
        plt.ylabel("Inercia")
        plt.tight_layout()

        k_final = 5
        km = KMeans(n_clusters=k_final, random_state=42)
        df['Grupo'] = km.fit_predict(X_scaled)

        if 'Tmax' in df and 'Rs' in df and 'Grupo' in df:
            plt.figure(figsize=(7,5))
            sns.scatterplot(data=df, x='Tmax', y='Rs', hue='Grupo', palette='Set2')
            plt.title(f"Clasificación de días climáticos — {region} ({ciclo})")
            plt.tight_layout()

        # Días por grupo
        if 'Día' in df and 'Grupo' in df:
            # pares (grupo, día)
            dias_por_grupo = { k: df[df['Grupo'] == k]['Día'].dropna().astype(int).tolist()
                               for k in sorted(df['Grupo'].dropna().unique()) }
            filas = [(g, d) for g, dias in dias_por_grupo.items() for d in dias]
            df_dias = pd.DataFrame(filas, columns=['Grupo','Día'])
            df['grupo_index'] = df.groupby('Grupo').cumcount()
            df_dias['grupo_index'] = df_dias.groupby('Grupo').cumcount()
            df_grupos = df.merge(df_dias, on=['Grupo','grupo_index'], how='left').drop(columns=['grupo_index'])

            # scatter Día vs Grupo
            plot_df_grupos = df_grupos[['Día_y','Grupo']].dropna()
            if not plot_df_grupos.empty:
                plt.figure(figsize=(10, 5))
                sns.scatterplot(data=plot_df_grupos, x='Día_y', y='Grupo', hue='Grupo', palette='Set2')
                plt.title(f'Distribución de días por grupo — {region} ({ciclo})')
                plt.xlabel('Día del año'); plt.ylabel('Grupo')
                plt.tight_layout()

            # ---------- Boxplots con Plotly por década/variable/Grupo ----------
            # Si existen columnas necesarias:
            if 'decada' in df_grupos.columns:
                # variables para box (usa meteo_cols si existen)
                for var in meteo_cols:
                    try:
                        figpx = px.box(
                            df_grupos.dropna(subset=['decada', var, 'Grupo']),
                            x='decada', y=var, color='Grupo',
                            title=f'{var} por grupo y década — {region} ({ciclo})',
                            labels={'decada':'Década', var:var, 'Grupo':'Grupo'},
                            color_discrete_sequence=px.colors.qualitative.Set2,
                            points='all'
                        )
                        figpx.update_layout(boxmode='group', legend_title='Grupo', template='plotly_white')
                        # exportar a imagen y agregar al PDF
                        mfig = add_plotly_fig_as_matplotlib(figpx)
                        plt.close(mfig)
                        # NOTA: no se añade aquí; el capturador solo recoge matplotlib generadas en esta función.
                        # Para asegurarnos de que entren al PDF, mostramos la figura matplotlib creada:
                        mfig = add_plotly_fig_as_matplotlib(figpx)
                        plt.show()  # se registra en la lista de matplotlib
                    except Exception:
                        pass

    # ---------- Clusters agregando ET0 y ETc ----------
    vars2 = [c for c in ['Tmax','Tmin','HR','Ux','Rs','ET0','ETc'] if c in df.columns]
    if len(vars2) >= 3:
        df1 = df.copy()
        data = df1[vars2].dropna()
        if not data.empty:
            scaler = StandardScaler()
            data_scaled = scaler.fit_transform(data)

            kmeans = KMeans(n_clusters=5, random_state=42)
            df1['cluster'] = kmeans.fit_predict(data_scaled)

            # estadísticas descriptivas (solo imprime)
            stats = df1.groupby('cluster')[vars2].agg(['mean'])
            #print("Estadísticas descriptivas por grupo:")
            #print(stats)

            logs.append("Estadísticas descriptivas por grupo (medias):")
            logs.append(stats.to_string())
            
            # boxplots (seaborn) por variable
            n = len(vars2)
            nrows = int(np.ceil((n)/2))
            ncols = 2
            fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(16, 4*nrows))
            axes = np.array(axes).reshape(nrows, ncols)
            idx = 0
            for r in range(nrows):
                for c in range(ncols):
                    if idx < n:
                        var = vars2[idx]
                        sns.boxplot(x='cluster', y=var, data=df1, ax=axes[r, c], palette='Set1')
                        axes[r, c].set_title(f'Distribución de {var} por grupo', fontsize=12)
                        axes[r, c].set_xlabel('Grupo'); axes[r, c].set_ylabel(var)
                        idx += 1
                    else:
                        axes[r, c].set_visible(False)
            fig.suptitle(f"Boxplots por grupo — {region} ({ciclo})")
            fig.tight_layout()

            # otro KMeans a 5 clústeres (como en la libreta)
            k_final = 5
            km = KMeans(n_clusters=k_final, random_state=42)
            df1['Grupo'] = km.fit_predict(data_scaled)

            if 'Tmax' in df1 and 'Rs' in df1:
                plt.figure(figsize=(7,5))
                sns.scatterplot(data=df1, x='Tmax', y='Rs', hue='Grupo', palette='Set1')
                plt.title(f"Clasificación de días climáticos (ET0/ETc incluidos) — {region} ({ciclo})")
                plt.tight_layout()
    return logs

In [6]:
# === Celda 5: Proceso por lote (PDF por región con TODAS las figuras del profesor) ===

CAT = construir_catalogo(RUTA_SALIDA_UNISON)
if CAT.empty:
    raise SystemExit("No se encontraron archivos en la ruta de datos.")

regiones = sorted(CAT["Region"].unique())
for region in regiones:
    cat_reg = CAT[CAT["Region"] == region].sort_values("Ciclo")
    ciclos = list(cat_reg["Ciclo"].unique())
    salida_pdf = RUTA_REPORTES / f"{region.replace(' ', '_')}_model_report.pdf"
    print(f"🧾 Generando PDF para {region}: {salida_pdf}")

    with PdfPages(salida_pdf) as pdf:
        # Portada
        pagina_portada(pdf, region, ciclos)

        # Por cada ciclo
        for ciclo, ruta in cat_reg[["Ciclo","Ruta"]].itertuples(index=False):
            print(f"   ▸ Procesando {region} — {ciclo}")
            df = leer_y_normalizar(ruta)
            if df.empty:
                print("     (sin datos, se omite)"); continue

            # KPI básicos (siempre)
            pagina_kpis(pdf, region, ciclo, df)

            # Captura TODAS las figuras matplotlib que genere el bloque del profesor
            # with FiguraCapture() as cap:
            #     correr_bloques_profesor(df, region, ciclo)

            # # Agrega todas las figuras capturadas
            # if getattr(cap, "figs", None):
            #     anexar_figuras(pdf, cap.figs, titulo_encabezado=f"{region} — {ciclo}")

            with FiguraCapture() as cap:
                logs = correr_bloques_profesor(df, region, ciclo)

            # 1) Página de métricos/texto (si hay)
            if logs:
                texto = "\n".join(logs)
                fig_txt, ax_txt = plt.subplots(figsize=(12, 7))
                ax_txt.axis('off')
                ax_txt.text(0.02, 0.98, f"{region} — {ciclo}  ·  Resultados y métricas", fontsize=14, weight='bold', va='top')
                ax_txt.text(0.02, 0.92, texto, fontsize=10, va='top', family='monospace', wrap=True)
                pdf.savefig(fig_txt); plt.close(fig_txt)

            # 2) Todas las figuras capturadas
            if getattr(cap, "figs", None):
                anexar_figuras(pdf, cap.figs, titulo_encabezado=f"{region} — {ciclo}")

            

    print(f"✅ PDF guardado: {salida_pdf}\n")


🧾 Generando PDF para Cajeme: /lustre/home/mvalenzuela/Workspace/DataAqua-dashboard/reports/modelos/Cajeme_model_report.pdf
   ▸ Procesando Cajeme — 2010-2011
[Cajeme 2010-2011] Regresión Lineal ET0 — R²: 0.9819  MSE: 0.0449
[Cajeme 2010-2011] Random Forest ET0 — R²: 0.9626  MSE: 0.0927
Importancias: {'Tmax': 0.20208644906173734, 'Tmin': 0.014226840370550393, 'HR': 0.08866895971177362, 'Ux': 0.10640749371139885, 'Rs': 0.5886102571445399}
Estadísticas descriptivas por grupo:
              Tmax       Tmin         HR        Ux         Rs       ET0  \
              mean       mean       mean      mean       mean      mean   
cluster                                                                   
0        25.649655   9.604483  32.723103  2.868966  18.367586  5.250256   
1        28.909773  14.040909  37.941136  1.967727  16.805227  4.563267   
2        32.000000  16.340513  35.322821  2.766667  25.240513  7.280296   
3        31.530833  14.322917  27.826250  2.505833  23.517083  6.885244 