### Inicialización de librerías y marca temporal de ejecución

In [1]:
import pandas as pd
from datetime import datetime
import os
import re
import time
import requests
import pandas as pd
from urllib.parse import urlencode
fecha = datetime.now().strftime('%Y%m%d_%H%M')

### Carga y visualización inicial del fichero de mallado

In [2]:
df = pd.read_csv('mallado.csv')
df.head()

Unnamed: 0,poblacion_total,lat,lon
0,14458,43.010088,-9.188413
1,260901,37.257363,-7.200928
2,217987,37.96537,-7.138822
3,20120,39.499208,-7.17137
4,169450,42.03367,-7.933326


### Descarga y agregación diaria masiva de datos climáticos PVGIS (2019–2023)

In [3]:
# ===========================================
# PVGIS - Procesamiento masivo 2019-2023
# Salida diaria en UTC y Europe/Madrid
# ===========================================

# ---------- Parámetros ----------
INPUT_CSV = "mallado.csv"
OUTPUT_OK_UTC = f'pvgis_diario_2019_2023_utc_{fecha}.csv'
OUTPUT_OK_LOCAL = f'pvgis_diario_2019_2023_europe_madrid_{fecha}.csv'
OUTPUT_ERR = f'pvgis_errores_2019_2023_{fecha}.csv'
START_YEAR, END_YEAR = 2019, 2023
LOCAL_TZ = "Europe/Madrid"
SLEEP_BETWEEN_CALLS_SEC = 0.2  # pausa entre consulta con el servicio (límite ~30 req/s)

# ---------- Utilidades ----------
PVGIS_BASE = "https://re.jrc.ec.europa.eu/api/v5_3/seriescalc"

def build_seriescalc_url(lat, lon, startyear=None, endyear=None, raddatabase=None, components=0, outputformat="json"):
    params = {"lat": lat, "lon": lon}
    if startyear is not None:
        params["startyear"] = int(startyear)
    if endyear is not None:
        params["endyear"] = int(endyear)
    if raddatabase:
        params["raddatabase"] = raddatabase
    params["components"] = int(components)
    params["outputformat"] = outputformat
    return f"{PVGIS_BASE}?{urlencode(params)}"

def parse_time_mixed(s):
    """Acepta 'YYYY-MM-DDTHH:MM:SSZ' y 'YYYYMMDD:HHMM'."""
    s = str(s)
    if re.fullmatch(r"\d{8}:\d{4}", s):
        return pd.to_datetime(s, format="%Y%m%d:%H%M", utc=True)
    return pd.to_datetime(s, utc=True, errors="coerce")

def choose_radiation_column(df):
    """Prefiere G(i); si no existe, usa G(h). Devuelve el nombre de la columna o None."""
    if "G(i)" in df.columns:
        return "G(i)"
    if "G(h)" in df.columns:
        return "G(h)"
    return None

def aggregate_daily(df, rad_col, tz=None):
    """
    Agrega a diario. Si tz es None, trabaja en UTC (índice debe ser tz-aware UTC).
    Si tz es string (p.ej. 'Europe/Madrid'), convierte con tz_convert antes de resamplear.
    Devuelve DataFrame agregado (índice diario) sin lat/lon aún.
    """
    if tz:
        df_use = df.tz_convert(tz)
    else:
        df_use = df

    # Diccionario de agregaciones
    agg_dict = {}
    if "T2m" in df_use.columns:
        agg_dict.update({
            "T2m_min": ("T2m", "min"),
            "T2m_max": ("T2m", "max"),
            "T2m_mean": ("T2m", "mean"),
        })
    if rad_col and rad_col in df_use.columns:
        agg_dict["Rad_mean"] = (rad_col, "mean")
    if "WS10m" in df_use.columns:
        agg_dict["WS10m_mean"] = ("WS10m", "mean")

    if not agg_dict:
        # Nada que agregar
        return None

    daily = df_use.resample("D").agg(**agg_dict)

    # Renombrado y redondeo
    rename_map = {
        "T2m_min": "Temp_min_C",
        "T2m_max": "Temp_max_C",
        "T2m_mean": "Temp_media_C",
        "Rad_mean": "RadiacionGlobal_media_Wm2",
        "WS10m_mean": "Viento_media_ms",
    }
    daily = daily.rename(columns=rename_map).round({
        "Temp_min_C": 2,
        "Temp_max_C": 2,
        "Temp_media_C": 2,
        "RadiacionGlobal_media_Wm2": 1,
        "Viento_media_ms": 2
    })

    return daily

def process_point(lat, lon, start_year=START_YEAR, end_year=END_YEAR, timeout=60):
    """
    Descarga serie horaria 2019-2023 para un punto y devuelve:
      (daily_utc, daily_local, None) si OK,
      (None, None, error_dict) si falla.
    """
    url = build_seriescalc_url(lat, lon, startyear=start_year, endyear=end_year, components=0, outputformat="json")
    try:
        r = requests.get(url, timeout=timeout)
    except requests.RequestException as e:
        return None, None, {"lat": lat, "lon": lon, "status": None, "error": f"RequestException: {e}"}

    if r.status_code != 200:
        err_msg = None
        try:
            err_msg = r.json().get("message")
        except Exception:
            err_msg = r.text[:500]
        return None, None, {"lat": lat, "lon": lon, "status": r.status_code, "error": err_msg}

    try:
        data = r.json()
        hourly = data.get("outputs", {}).get("hourly", None)
        if hourly is None or len(hourly) == 0:
            return None, None, {"lat": lat, "lon": lon, "status": 200, "error": "Sin 'outputs.hourly' o vacío"}
        df = pd.DataFrame(hourly)
    except Exception as e:
        return None, None, {"lat": lat, "lon": lon, "status": 200, "error": f"JSON/estructura inesperada: {e}"}

    # Parseo de fechas
    if "time" not in df.columns:
        return None, None, {"lat": lat, "lon": lon, "status": 200, "error": "Columna 'time' no encontrada"}
    df["time"] = df["time"].map(parse_time_mixed)
    if df["time"].isna().all():
        return None, None, {"lat": lat, "lon": lon, "status": 200, "error": "No se pudo parsear ninguna fecha"}

    # Ordenar e indexar
    df = df.sort_values("time").set_index("time")

    # Asegurar formato numéricos
    for col in ["T2m", "WS10m"]:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors="coerce")

    rad_col = choose_radiation_column(df)
    if rad_col:
        df[rad_col] = pd.to_numeric(df[rad_col], errors="coerce")

    # Verificación de columnas útiles
    has_any = any(c in df.columns for c in ["T2m", "WS10m", rad_col] if c)
    if not has_any:
        return None, None, {"lat": lat, "lon": lon, "status": 200, "error": "No hay columnas útiles (T2m/WS10m/G(i)/G(h))"}

    # Agregación diaria en UTC y en hora local
    daily_utc = aggregate_daily(df, rad_col, tz=None)     # --> UTC
    daily_local = aggregate_daily(df, rad_col, tz=LOCAL_TZ) # --> Local

    if daily_utc is None and daily_local is None:
        return None, None, {"lat": lat, "lon": lon, "status": 200, "error": "No se pudo construir agregación diaria"}

    # Añadir columnas de fecha y lat/lon
    if daily_utc is not None:
        daily_utc = daily_utc.copy()
        daily_utc["date_utc"] = daily_utc.index.tz_convert("UTC").date
        daily_utc = daily_utc.reset_index(drop=True)
        daily_utc["lat"] = lat
        daily_utc["lon"] = lon

    if daily_local is not None:
        daily_local = daily_local.copy()
        daily_local["date_local"] = daily_local.index.tz_convert(LOCAL_TZ).date
        daily_local = daily_local.reset_index(drop=True)
        daily_local["lat"] = lat
        daily_local["lon"] = lon

    return daily_utc, daily_local, None

# ---------- Carga de coordenadas ----------
if not os.path.exists(INPUT_CSV):
    raise FileNotFoundError(f"No se encuentra {INPUT_CSV} en el directorio actual: {os.getcwd()}")

coords = pd.read_csv(INPUT_CSV)
if not {"lat", "lon"}.issubset(set(coords.columns)):
    raise ValueError(f"El fichero debe contener columnas 'lat' y 'lon'. Columnas leídas: {coords.columns.tolist()}")

# ---------- Bucle principal ----------
all_utc = []
all_local = []
errors = []

print(f"Total de puntos: {len(coords)}. Descargando 2019–2023...")

for i, row in coords.iterrows():
    lat, lon = float(row["lat"]), float(row["lon"])
    print(f"[{i+1}/{len(coords)}] lat={lat}, lon={lon} ...", end=" ")

    daily_utc, daily_local, err = process_point(lat, lon)
    if err is not None:
        errors.append(err)
        print(f"ERROR -> {err.get('status')}: {err.get('error')}")
    else:
        if daily_utc is not None:
            all_utc.append(daily_utc)
        if daily_local is not None:
            all_local.append(daily_local)
        print("OK")
    time.sleep(SLEEP_BETWEEN_CALLS_SEC)

# ---------- Guardar salidas ----------
def ensure_and_save(df_list, out_path, date_col_name):
    if not df_list:
        return 0
    out = pd.concat(df_list, ignore_index=True)

    # Orden de columnas
    cols_common = ["lat", "lon", date_col_name,
                   "Temp_min_C", "Temp_max_C", "Temp_media_C",
                   "RadiacionGlobal_media_Wm2", "Viento_media_ms"]
    for c in cols_common:
        if c not in out.columns:
            out[c] = pd.NA
    out = out[cols_common]
    out.to_csv(out_path, index=False)
    return len(out)

n_utc = ensure_and_save(all_utc, OUTPUT_OK_UTC, "date_utc")
n_local = ensure_and_save(all_local, OUTPUT_OK_LOCAL, "date_local")

if n_utc:
    print(f"\n✅ Guardado resultados UTC: {OUTPUT_OK_UTC} (filas: {n_utc})")
else:
    print("\n⚠️ No se obtuvieron resultados UTC.")

if n_local:
    print(f"✅ Guardado resultados Europe/Madrid: {OUTPUT_OK_LOCAL} (filas: {n_local})")
else:
    print("⚠️ No se obtuvieron resultados Europe/Madrid.")

if errors:
    err_df = pd.DataFrame(errors)
    err_df.to_csv(OUTPUT_ERR, index=False)
    print(f"⚠️ Guardado log de errores: {OUTPUT_ERR} (filas: {len(err_df)})")
else:
    print("✅ No hubo errores.")

Total de puntos: 27. Descargando 2019–2023...
[1/27] lat=43.01008776, lon=-9.188413044333332 ... OK
[2/27] lat=37.257363038571434, lon=-7.200928419999999 ... OK
[3/27] lat=37.965370157307696, lon=-7.138822035 ... OK
[4/27] lat=39.49920750818182, lon=-7.171370430363637 ... OK
[5/27] lat=42.033670423414634, lon=-7.933325822585366 ... OK
[6/27] lat=42.82670514307985, lon=-7.964607534338403 ... OK
[7/27] lat=36.841508026165414, lon=-5.42390867006015 ... OK
[8/27] lat=38.095268199430606, lon=-5.974638156576512 ... OK
[9/27] lat=39.98708255200837, lon=-5.740223103259415 ... OK
[10/27] lat=41.29388043232274, lon=-5.572345771344744 ... OK
[11/27] lat=42.69003347615384, lon=-5.659510861418462 ... OK
[12/27] lat=37.01394045681416, lon=-3.5120165887123895 ... OK
[13/27] lat=38.03343402121848, lon=-3.4943284926764706 ... OK
[14/27] lat=39.97014820842106, lon=-3.5742388449760765 ... OK
[15/27] lat=41.327208103962704, lon=-3.6299377959358976 ... OK
[16/27] lat=42.73470266318117, lon=-3.4119604462524