In [2]:
from google.colab import files

# This will prompt you to upload the files from your local computer
uploaded = files.upload()

Saving datos_unidos_merida_2024_sem35_38_FINAL_v2.csv to datos_unidos_merida_2024_sem35_38_FINAL_v2.csv


Código 1 -> Este código funcionaría si tuvieramos más datos historicos, de uno o dos años

In [10]:
# =========================================================
# Predicción de huevos con XGBoost + lags climáticos + z-score por estación
# Train = semanas 35–37 | Test = semana 38
# Búsqueda honesta con validación temporal interna (sem 36 y 37)
# =========================================================
!pip -q install scikit-learn pandas numpy xgboost

import json, numpy as np, pandas as pd
from pathlib import Path
from sklearn.cluster import KMeans
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from xgboost import XGBRegressor

# -------------------------
# 0) Reproducibilidad
# -------------------------
SEED = 42
np.random.seed(SEED)

# -------------------------
# 1) Cargar base
# -------------------------
CSV = "datos_unidos_merida_2024_sem35_38_FINAL_v2.csv"
df = pd.read_csv(CSV)
df.columns = df.columns.str.strip().str.lower()

# Normaliza nombres clave
rename_map = {"semana":"week","huevos":"eggs","tmax":"tmax","tmin":"tmin","precip":"precip","evap":"evap"}
df.rename(columns={k:v for k,v in rename_map.items() if k in df.columns}, inplace=True)

req = {"x","y","station_id","week","eggs"}
faltan = req - set(df.columns)
if faltan:
    raise ValueError(f"Faltan columnas mínimas: {faltan}")

df["station_id"] = df["station_id"].astype(str)
for v in ["tmax","tmin","precip","evap"]:
    if v not in df.columns:
        df[v] = np.nan

# Solo 35–38, por si acaso
df = df[df["week"].isin([35,36,37,38])].copy()

# -------------------------
# 2) Orden y FEATURES CAUSALES base (sin fuga)
# -------------------------
df = df.sort_values(["station_id","x","y","week"]).reset_index(drop=True)

# Lags de huevos (por trampa)
df["lag_1_week"] = df.groupby(["x","y"])["eggs"].shift(1)
df["rolling_mean_4"] = df.groupby(["x","y"])["eggs"].transform(lambda s: s.shift(1).rolling(4, min_periods=1).mean())
df["prom_hist"] = df.groupby(["x","y"])["eggs"].transform(lambda s: s.shift(1).expanding(min_periods=1).mean())

# Lags climáticos por estación (causal)
for v in ["tmax","tmin","precip","evap"]:
    df[f"{v}_lag1"] = df.groupby("station_id")[v].shift(1)

# -------------------------
# 3) Split temporal: train=35–37, test=38
# -------------------------
train = df[df["week"].isin([35,36,37])].copy()
test  = df[df["week"]==38].copy()

# -------------------------
# 4) Densidad por zona (solo con TRAIN) y aplicar a TEST
# -------------------------
ROUND = 0.003  # ~300 m aprox
def zonify(s, prec=ROUND): return (s/prec).round()*prec

train["x_zone"] = zonify(train["x"]); train["y_zone"] = zonify(train["y"])
dens = (train.groupby(["x_zone","y_zone"], as_index=False)
              .agg(densidad=("eggs","sum"), conteo=("eggs","count")))
for d in [train, test]:
    d["x_zone"] = zonify(d["x"]); d["y_zone"] = zonify(d["y"])
train = train.merge(dens, on=["x_zone","y_zone"], how="left")
test  = test.merge(dens, on=["x_zone","y_zone"], how="left")
for c in ["densidad","conteo"]:
    train[c] = train[c].fillna(0.0); test[c] = test[c].fillna(0.0)

# -------------------------
# 5) Clustering espacial (KMeans) solo con TRAIN
# -------------------------
kmeans = KMeans(n_clusters=5, random_state=SEED, n_init=10)
kmeans.fit(train[["x","y"]])
train["cluster"] = kmeans.predict(train[["x","y"]])
test["cluster"]  = kmeans.predict(test[["x","y"]])

# -------------------------
# 6) Z-score climáticos por estación usando SOLO TRAIN
# -------------------------
stats = (train.groupby("station_id")[["tmax","tmin","precip","evap"]]
               .agg(["mean","std"]))
# aplanar columnas
stats.columns = [f"{a}_{b}" for a,b in stats.columns]
stats = stats.reset_index()

def add_station_z(df_part):
    d = df_part.merge(stats, on="station_id", how="left")
    for v in ["tmax","tmin","precip","evap"]:
        mu = d[f"{v}_mean"]; sd = d[f"{v}_std"].replace(0,np.nan)
        d[f"{v}_z"] = (d[v]-mu)/sd
    return d

train = add_station_z(train)
test  = add_station_z(test)

# -------------------------
# 7) Ensamblar FEATURES finales
# -------------------------
features = [
    # tiempo/espacio
    "week","x","y",
    # clima crudo y lags
    "tmax","tmin","precip","evap","tmax_lag1","tmin_lag1","precip_lag1","evap_lag1",
    # clima z-score
    "tmax_z","tmin_z","precip_z","evap_z",
    # dinámica huevos
    "lag_1_week","rolling_mean_4","prom_hist",
    # densidad/estructura
    "densidad","conteo","cluster",
    # categórica
    "station_id"
]
for c in features:
    if c not in train.columns:
        train[c]=np.nan; test[c]=np.nan

y_train = train["eggs"].astype(float).copy()
y_test  = test["eggs"].astype(float).copy()

# Imputación sencilla para columnas clave (usar mediana de TRAIN)
fill_cols = [
    "lag_1_week","rolling_mean_4","prom_hist",
    "tmax","tmin","precip","evap","tmax_lag1","tmin_lag1","precip_lag1","evap_lag1",
    "tmax_z","tmin_z","precip_z","evap_z",
    "densidad","conteo"
]
for col in fill_cols:
    med = train[col].median()
    train[col] = train[col].fillna(med)
    test[col]  = test[col].fillna(med)

# One-hot de station_id para XGB (fuera del pipeline para controlar columnas)
train_X = pd.get_dummies(train[features], columns=["station_id"], dummy_na=False)
test_X  = pd.get_dummies(test[features],  columns=["station_id"], dummy_na=False)
test_X  = test_X.reindex(columns=train_X.columns, fill_value=0)

# -------------------------
# 8) Validación temporal interna (honesta) para elegir hiperparámetros
#    Folds: validar en semana 36 y 37 (entrenando con semanas previas)
# -------------------------
def metrics(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    return {"RMSE": float(np.sqrt(mse)), "MAE": float(mean_absolute_error(y_true,y_pred)), "R2": float(r2_score(y_true,y_pred))}

def fit_eval_cfg(cfg, df_train_full, train_X_full, y_train_full):
    # folds: validar en 36 y 37
    scores = []
    for val_week in [36, 37]:
        tr_idx = df_train_full["week"] < val_week
        vl_idx = df_train_full["week"] == val_week
        X_tr, y_tr = train_X_full[tr_idx], y_train_full[tr_idx]
        X_vl, y_vl = train_X_full[vl_idx], y_train_full[vl_idx]

        xgb = XGBRegressor(
            n_estimators=cfg["n_estimators"],
            max_depth=cfg["max_depth"],
            learning_rate=cfg["learning_rate"],
            subsample=cfg["subsample"],
            colsample_bytree=cfg["colsample_bytree"],
            reg_lambda=cfg["reg_lambda"],
            random_state=SEED,
            n_jobs=-1
        )
        xgb.fit(X_tr, y_tr)
        pred_vl = xgb.predict(X_vl)
        r2_vl = r2_score(y_vl, pred_vl)
        scores.append(r2_vl)
    return np.mean(scores)

configs = [
    {"n_estimators":700, "max_depth":6, "learning_rate":0.05, "subsample":0.9, "colsample_bytree":0.9, "reg_lambda":1.2},
    {"n_estimators":1000,"max_depth":7, "learning_rate":0.04, "subsample":0.9, "colsample_bytree":0.9, "reg_lambda":1.0},
    {"n_estimators":1200,"max_depth":7, "learning_rate":0.03, "subsample":0.95,"colsample_bytree":0.9, "reg_lambda":1.1},
    {"n_estimators":1500,"max_depth":8, "learning_rate":0.03, "subsample":0.9, "colsample_bytree":0.95,"reg_lambda":1.3},
]

best_cfg = None
best_cv = -1e9
for cfg in configs:
    cv_r2 = fit_eval_cfg(cfg, train, train_X, y_train)
    print(f"CV temporal (R2 medio val36/37) para {cfg} -> {cv_r2:.4f}")
    if cv_r2 > best_cv:
        best_cv = cv_r2
        best_cfg = cfg

print("\nMejor config por CV temporal:", best_cfg, " | R2 medio:", round(best_cv,4))

# -------------------------
# 9) Entrenar final en 35–37 con mejor config y evaluar en 38
# -------------------------
best_xgb = XGBRegressor(
    n_estimators=best_cfg["n_estimators"],
    max_depth=best_cfg["max_depth"],
    learning_rate=best_cfg["learning_rate"],
    subsample=best_cfg["subsample"],
    colsample_bytree=best_cfg["colsample_bytree"],
    reg_lambda=best_cfg["reg_lambda"],
    random_state=SEED,
    n_jobs=-1
)
best_xgb.fit(train_X, y_train)

pred_train = best_xgb.predict(train_X)
pred_test  = best_xgb.predict(test_X)

m_train = metrics(y_train, pred_train)
m_test  = metrics(y_test,  pred_test)

print("\n== MÉTRICAS FINALES ==")
print("Train:", json.dumps(m_train, indent=2))
print("Test: ", json.dumps(m_test,  indent=2))

# -------------------------
# 10) Guardar artefactos
# -------------------------
Path("salidas").mkdir(exist_ok=True)
train_out = train[["x","y","station_id","week","eggs"]].copy()
test_out  = test[ ["x","y","station_id","week","eggs"]].copy()
train_out["pred_xgb"] = pred_train
test_out["pred_xgb"]  = pred_test

train_out.to_csv("salidas/train_predicciones_xgb.csv", index=False)
test_out.to_csv("salidas/test_predicciones_xgb.csv", index=False)
with open("salidas/metricas_train.json","w") as f: json.dump(m_train, f, indent=2)
with open("salidas/metricas_test.json","w") as f: json.dump(m_test, f, indent=2)
with open("salidas/mejor_config_xgb.json","w") as f: json.dump(best_cfg, f, indent=2)

print("\n✅ Artefactos guardados en 'salidas/'.")


CV temporal (R2 medio val36/37) para {'n_estimators': 700, 'max_depth': 6, 'learning_rate': 0.05, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_lambda': 1.2} -> -0.0761
CV temporal (R2 medio val36/37) para {'n_estimators': 1000, 'max_depth': 7, 'learning_rate': 0.04, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_lambda': 1.0} -> -0.0947
CV temporal (R2 medio val36/37) para {'n_estimators': 1200, 'max_depth': 7, 'learning_rate': 0.03, 'subsample': 0.95, 'colsample_bytree': 0.9, 'reg_lambda': 1.1} -> -0.0857
CV temporal (R2 medio val36/37) para {'n_estimators': 1500, 'max_depth': 8, 'learning_rate': 0.03, 'subsample': 0.9, 'colsample_bytree': 0.95, 'reg_lambda': 1.3} -> -0.1095

Mejor config por CV temporal: {'n_estimators': 700, 'max_depth': 6, 'learning_rate': 0.05, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_lambda': 1.2}  | R2 medio: -0.0761

== MÉTRICAS FINALES ==
Train: {
  "RMSE": 20.51963463183992,
  "MAE": 13.16157172867462,
  "R2": 0.7567006443332298
}
Test:  {
  "RMSE"

Codigo 2 -> Este código da el resultado por cada punto de recolección de datos del clima

In [23]:
# =========================================================
# MODELO ESPACIAL (XGBoost)
# - Usa 4 semanas (35–38) como muestras independientes
# - CV agrupada por estación (GroupKFold)
# - Métricas: RMSE, MAE, R2
# - Mapa predictivo en rejilla por semana
# =========================================================
!pip -q install xgboost scikit-learn pandas numpy folium

import json, numpy as np, pandas as pd
from pathlib import Path
from sklearn.model_selection import GroupKFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from xgboost import XGBRegressor
import folium

SEED = 42
np.random.seed(SEED)

# -------------------------
# 1) Cargar datos
# -------------------------
CSV = "datos_unidos_merida_2024_sem35_38_FINAL_v2.csv"
df = pd.read_csv(CSV)
df.columns = df.columns.str.strip().str.lower()

# Renombrar claves si vienen en mayúsculas
rename_map = {"semana":"week","huevos":"eggs","tmax":"tmax","tmin":"tmin","precip":"precip","evap":"evap"}
df.rename(columns={k:v for k,v in rename_map.items() if k in df.columns}, inplace=True)

# Filtros y tipos
df = df[df["week"].isin([35,36,37,38])].copy()
df["station_id"] = df["station_id"].astype(str)
for v in ["tmax","tmin","precip","evap"]:
    if v not in df.columns: df[v] = np.nan

# -------------------------
# 2) Ingeniería mínima (espacial) — sin fuga temporal
# -------------------------
# Densidad por zona por semana (captura hotspots por semana)
ROUND = 0.001  # ~200m aprox
def zonify(s, prec=ROUND): return (s/prec).round()*prec

df["x_zone"] = zonify(df["x"])
df["y_zone"] = zonify(df["y"])
dens = (df.groupby(["week","x_zone","y_zone"], as_index=False)
          .agg(densidad=("eggs","sum"), conteo=("eggs","count")))
df = df.merge(dens, on=["week","x_zone","y_zone"], how="left")

# Curvatura espacial suave
df["x2"] = df["x"]**2
df["y2"] = df["y"]**2
df["xy"] = df["x"]*df["y"]

# -------------------------
# 3) Features y target
# -------------------------
features_num = [
    "week","x","y","x2","y2","xy",
    "tmax","tmin","precip","evap",
    "densidad","conteo"
]
features_cat = ["station_id"]
target = "eggs"

# Imputación segura
for c in features_num:
    if c not in df.columns: df[c] = np.nan

X_num = df[features_num].copy()
X_cat = df[features_cat].copy()
y = df[target].astype(float).copy()
groups = df["station_id"].copy()  # para CV agrupada

# -------------------------
# 4) Pipeline (imputación + OneHot + XGB)
# -------------------------
preprocess = ColumnTransformer(
    transformers=[
        ("num", SimpleImputer(strategy="median"), features_num),
        ("cat", Pipeline([("imp", SimpleImputer(strategy="most_frequent")),
                          ("oh", OneHotEncoder(handle_unknown="ignore"))]),
         features_cat)
    ],
    remainder="drop"
)

xgb = XGBRegressor(
    n_estimators=1200,
    max_depth=7,
    learning_rate=0.04,
    subsample=0.9,
    colsample_bytree=0.9,
    reg_lambda=1.1,
    random_state=SEED,
    n_jobs=-1
)

pipe = Pipeline([("prep", preprocess), ("xgb", xgb)])

# -------------------------
# 5) Validación cruzada agrupada por estación (honesta espacialmente)
# -------------------------
gkf = GroupKFold(n_splits=min(5, df["station_id"].nunique()))
r2s, rmses, maes = [], [], []

for fold, (tr, vl) in enumerate(gkf.split(df, y, groups=groups), 1):
    pipe.fit(pd.concat([X_num.iloc[tr], X_cat.iloc[tr]], axis=1), y.iloc[tr])
    pred = pipe.predict(pd.concat([X_num.iloc[vl], X_cat.iloc[vl]], axis=1))
    rmse = float(np.sqrt(mean_squared_error(y.iloc[vl], pred)))
    mae  = float(mean_absolute_error(y.iloc[vl], pred))
    r2   = float(r2_score(y.iloc[vl], pred))
    r2s.append(r2); rmses.append(rmse); maes.append(mae)
    print(f"Fold {fold}: RMSE={rmse:.2f}  MAE={mae:.2f}  R2={r2:.3f}")

print("\n== CV agrupada por estación (promedio) ==")
print(json.dumps({
    "RMSE_mean": float(np.mean(rmses)),
    "MAE_mean":  float(np.mean(maes)),
    "R2_mean":   float(np.mean(r2s))
}, indent=2))

# Entrenar en TODO para usar luego en grid
pipe.fit(pd.concat([X_num, X_cat], axis=1), y)

# -------------------------
# 6) Predicción en rejilla para cada semana (mapa)
#    - La rejilla toma clima por estación más cercana usando promedios por estación-semana
# -------------------------
# Centroides por estación (para asignación espacial)
centroids = df.groupby("station_id", as_index=False)[["x","y"]].mean().rename(columns={"x":"x_est","y":"y_est"})

# Clima promedio por estación-semana (ya está en df)
clima_sem = df.groupby(["station_id","week"], as_index=False)[["tmax","tmin","precip","evap"]].mean()

# Rejilla sobre bounding box
min_x, max_x = df["x"].min(), df["x"].max()
min_y, max_y = df["y"].min(), df["y"].max()
STEP = 0.003  # ~300m
grid_x = np.arange(min_x, max_x + STEP, STEP)
grid_y = np.arange(min_y, max_y + STEP, STEP)
grid = pd.DataFrame([(x0,y0) for y0 in grid_y for x0 in grid_x], columns=["x","y"])

# Función para asignar estación más cercana (vectorizada)
def assign_nearest_station(grid_df, centroids_df):
    A_lat = np.radians(grid_df["y"].to_numpy().reshape(-1,1))
    A_lon = np.radians(grid_df["x"].to_numpy().reshape(-1,1))
    B_lat = np.radians(centroids_df["y_est"].to_numpy().reshape(1,-1))
    B_lon = np.radians(centroids_df["x_est"].to_numpy().reshape(1,-1))
    dlat = B_lat - A_lat
    dlon = B_lon - A_lon
    a = np.sin(dlat/2.0)**2 + np.cos(A_lat)*np.cos(B_lat)*np.sin(dlon/2.0)**2
    d = 2 * 6371.0 * np.arcsin(np.sqrt(a))  # km
    idx = d.argmin(axis=1)
    return centroids_df["station_id"].to_numpy()[idx]

# Preparar carpeta de salidas
Path("salidas").mkdir(exist_ok=True)

all_weeks = sorted(df["week"].unique().tolist())  # [35,36,37,38]
for w in all_weeks:
    g = grid.copy()
    g["week"] = w
    # estación más cercana
    g["station_id"] = assign_nearest_station(g, centroids)
    # clima por estación-semana
    g = g.merge(clima_sem, on=["station_id","week"], how="left")
    # si falta clima por alguna estación/semana, imputar con medias globales de esa semana
    for v in ["tmax","tmin","precip","evap"]:
        med = df.loc[df["week"]==w, v].median()
        g[v] = g[v].fillna(med)

    # features extra coherentes con entrenamiento
    g["x2"] = g["x"]**2
    g["y2"] = g["y"]**2
    g["xy"] = g["x"]*g["y"]

    # densidad por zona (no observada en grid → 0 por defecto)
    g["x_zone"] = zonify(g["x"]); g["y_zone"] = zonify(g["y"])
    g = g.merge(dens[dens["week"]==w][["x_zone","y_zone","densidad","conteo"]],
                on=["x_zone","y_zone"], how="left")
    for c in ["densidad","conteo"]:
        g[c] = g[c].fillna(0.0)

    # preparar columnas como en el entrenamiento
    g_feat = g[features_num + features_cat].copy()
    # pasar por el pipeline entrenado
    pred = pipe.predict(g_feat)
    g["pred_eggs"] = pred

    # Guardar CSV por semana
    out_csv = f"salidas/pred_grid_semana_{w}.csv"
    g[["x","y","station_id","week","tmax","tmin","precip","evap","pred_eggs"]].to_csv(out_csv, index=False)
    print(f"CSV rejilla semana {w} -> {out_csv}")

    # Mapa rápido (Folium)
    center = [g["y"].mean(), g["x"].mean()]
    m = folium.Map(location=center, zoom_start=12, tiles="cartodbpositron")

    # Escala de radios (metros) según percentiles
    p5, p95 = np.percentile(g["pred_eggs"], [5, 95]) if len(g) > 20 else (g["pred_eggs"].min(), g["pred_eggs"].max())
    if p95 <= p5: p95 = p5 + 1.0
    def scale_radius(v, rmin=40, rmax=350):
        v = np.clip(v, p5, p95)
        return float(rmin + (rmax - rmin) * (v - p5) / (p95 - p5))

    for _, r in g.iterrows():
        folium.Circle(
            location=[r["y"], r["x"]],  # [lat, lon] -> [y, x]
            radius=scale_radius(r["pred_eggs"]),
            color=None,
            fill=True,
            fill_opacity=0.5,
            popup=folium.Popup(
                f"<b>Semana:</b> {int(w)}<br>"
                f"<b>Estación:</b> {r['station_id']}<br>"
                f"<b>Pred. huevos:</b> {r['pred_eggs']:.1f}", max_width=250)
        ).add_to(m)

    out_html = f"salidas/mapa_semana_{w}.html"
    m.save(out_html)
    print(f"Mapa semana {w} -> {out_html}")

# -------------------------
# 7) Métricas en TODO el conjunto (hold-in CV ya impresa)
#    Si quieres una métrica única “out-of-fold” por estación:
# -------------------------
# (Opcional) Generar predicciones out-of-fold para cada fold y consolidar una métrica global


Fold 1: RMSE=41.59  MAE=15.62  R2=0.290
Fold 2: RMSE=23.17  MAE=9.62  R2=0.590
Fold 3: RMSE=19.41  MAE=9.45  R2=0.591
Fold 4: RMSE=16.55  MAE=8.38  R2=0.685
Fold 5: RMSE=16.38  MAE=11.14  R2=0.516

== CV agrupada por estación (promedio) ==
{
  "RMSE_mean": 23.421181501903284,
  "MAE_mean": 10.841415393422517,
  "R2_mean": 0.5344291738910264
}
CSV rejilla semana 35 -> salidas/pred_grid_semana_35.csv
Mapa semana 35 -> salidas/mapa_semana_35.html
CSV rejilla semana 36 -> salidas/pred_grid_semana_36.csv
Mapa semana 36 -> salidas/mapa_semana_36.html
CSV rejilla semana 37 -> salidas/pred_grid_semana_37.csv
Mapa semana 37 -> salidas/mapa_semana_37.html
CSV rejilla semana 38 -> salidas/pred_grid_semana_38.csv
Mapa semana 38 -> salidas/mapa_semana_38.html


Mapa

In [25]:
# ==============================
# Asegurar predicciones y mapa Folium
# Requiere: df, pipe, features_num, features_cat
# ==============================
import folium
from branca.colormap import LinearColormap
from pathlib import Path
import numpy as np

# 1) Garantizar columnas x/y (por si vienen como lon/lat)
if 'x' not in df.columns and 'lon' in df.columns:
    df['x'] = df['lon']
if 'y' not in df.columns and 'lat' in df.columns:
    df['y'] = df['lat']

# 2) Asegurar dtype y columnas del modelo
if 'station_id' in df.columns:
    df['station_id'] = df['station_id'].astype(str)

# Si aún no existe pred_eggs, la calculamos con el pipeline entrenado
if 'pred_eggs' not in df.columns:
    # Asegurar que todas las columnas numéricas requeridas existan
    for c in features_num:
        if c not in df.columns:
            df[c] = np.nan
    # Preparar X como en el entrenamiento
    X_full = pd.concat([df[features_num], df[features_cat]], axis=1)
    df['pred_eggs'] = pipe.predict(X_full)

# 3) Construir el mapa
centro_lat, centro_lon = 20.9674, -89.5926  # Mérida aprox
m = folium.Map(location=[centro_lat, centro_lon], zoom_start=12, tiles='OpenStreetMap')

# Colormap robusto (si vmin==vmax, separa un poco para evitar error)
vmin = float(np.nanmin(df['pred_eggs'].values))
vmax = float(np.nanmax(df['pred_eggs'].values))
if not np.isfinite(vmin) or not np.isfinite(vmax):
    vmin, vmax = 0.0, 1.0
if vmax <= vmin:
    vmax = vmin + 1.0

colormap = LinearColormap(colors=['blue', 'yellow', 'red'],
                          vmin=vmin, vmax=vmax, caption='Predicción de huevos')

# Pintar puntos
for r in df.itertuples(index=False):
    # radio y color
    pred = float(getattr(r, 'pred_eggs'))
    color = colormap(pred)
    radius = max(3, float(np.log1p(max(pred, 0))) * 2.0)

    # coordenadas (y = lat, x = lon)
    lat = float(getattr(r, 'y'))
    lon = float(getattr(r, 'x'))

    # textos defensivos por si faltan columnas
    week = int(getattr(r, 'week', -1))
    eggs_real = getattr(r, 'eggs', np.nan)
    dens = getattr(r, 'densidad', 0.0)
    cont = getattr(r, 'conteo', 0.0)
    stid = getattr(r, 'station_id', 'NA')

    popup = folium.Popup(
        f"<b>Semana:</b> {week}<br>"
        f"<b>Estación:</b> {stid}<br>"
        f"<b>Huevos reales:</b> {eggs_real:.0f}<br>"
        f"<b>Huevos predichos:</b> {pred:.1f}<br>"
        f"<b>Densidad zona:</b> {dens:.0f} (conteo {cont:.0f})",
        max_width=320
    )

    folium.CircleMarker(
        location=[lat, lon],
        radius=radius,
        color=color,
        fill=True,
        fill_opacity=0.7,
        weight=1,
        popup=popup
    ).add_to(m)

# Añadir colormap y marcador del centro
colormap.add_to(m)
folium.Marker([centro_lat, centro_lon],
              popup="<b>Centro de Mérida</b>",
              icon=folium.Icon(color='green', icon='info-sign')).add_to(m)

# Guardar
Path("salidas").mkdir(exist_ok=True)
out_html = "salidas/mapa_predicciones_merida.html"
m.save(out_html)
print(f"🗺️ Mapa guardado: {out_html}")

🗺️ Mapa guardado: salidas/mapa_predicciones_merida.html


In [None]:
from google.colab import drive
drive.mount('/content/drive')