In [6]:
import os
import re
import time
import requests
import pandas as pd
from tqdm import tqdm

# ================== SETTINGS (edit these) ==================


# NEW: use the updated coordinates file (CSV with 209 countries)
COORDS_PATH = os.path.join("coordinates_countries_full_209.csv")
# This file should contain columns like: Area / Country + Latitude + Longitude

# Each parameter gets its own subfolder
PARAM_CONFIGS = [
    {
        "param": "PRECTOTCORR_SUM",
        "subfolder": "precipitation_csv"   # monthly precip (corrected)
    },
    {
        "param": "ALLSKY_SFC_SW_DWN",
        "subfolder": "solar_radiation_csv" # all-sky surface shortwave down
    },
    {
        "param": "T2M",
        "subfolder": "temperature_csv"     # 2m air temperature
    },
]

START_YEAR, END_YEAR = 1981, 2023
COMMUNITY  = "ag"
UNITS      = "metric"
TIMEOUT_S  = 45
PAUSE_S    = 0.25          # gentle rate-limit between requests
RETRIES    = 4             # retry attempts per country
OVERWRITE  = False         # if False, skip if CSV already exists
# ===========================================================

def sanitize_filename(name: str) -> str:
    """
    Turn a country name into a safe filename.
    (The *value* of the name is kept unchanged in memory; this is only for the file path.)
    """
    name = re.sub(r"[\\/:*?\"<>|]+", "_", str(name))
    name = re.sub(r"\s+", "_", name).strip("_")
    return name

def build_power_url(lat: float, lon: float, param: str) -> str:
    """
    Build NASA POWER API URL for a single parameter.
    """
    return (
        "https://power.larc.nasa.gov/api/temporal/monthly/point"
        f"?start={START_YEAR}&end={END_YEAR}"
        f"&latitude={lat}&longitude={lon}"
        f"&community={COMMUNITY}&parameters={param}"
        f"&format=csv&units={UNITS}&header=true"
    )

def normalize_text(resp: requests.Response) -> str:
    try:
        return resp.content.decode("utf-8", errors="ignore")
    except Exception:
        return resp.text

def fetch_csv(lat: float, lon: float, param: str) -> str:
    """
    Fetch CSV text from NASA POWER for given lat, lon, and parameter.
    """
    url = build_power_url(lat, lon, param)
    last_err = None
    for attempt in range(RETRIES):
        try:
            r = requests.get(url, timeout=TIMEOUT_S)
            if r.status_code == 200:
                return normalize_text(r)
            last_err = RuntimeError(f"HTTP {r.status_code}: {r.text[:200]}")
        except Exception as e:
            last_err = e
        # simple backoff
        time.sleep(0.8 * (attempt + 1))
    raise RuntimeError(f"Failed after retries: {last_err}")

def load_coords(path: str) -> pd.DataFrame:
    """
    Load coordinates from CSV (or Excel) and normalize to:
      - Area (canonical country name, same as in your file)
      - Latitude
      - Longitude
    The Area values will be used exactly as in this file.
    """
    ext = os.path.splitext(path)[1].lower()
    if ext == ".csv":
        df = pd.read_csv(path)
    else:
        df = pd.read_excel(path)

    # Normalize column names: detect Area/Country + Lat + Lon
    ren = {}
    for c in df.columns:
        cl = str(c).strip().lower()
        if cl in ("area", "country", "country name", "name"):
            ren[c] = "Area"
        elif "lat" in cl:
            ren[c] = "Latitude"
        elif "lon" in cl or "long" in cl:
            ren[c] = "Longitude"

    df = df.rename(columns=ren)

    # Keep only the necessary columns
    df = df[["Area", "Latitude", "Longitude"]].copy()
    df["Area"] = df["Area"].astype(str).str.strip()
    df["Latitude"]  = pd.to_numeric(df["Latitude"],  errors="coerce")
    df["Longitude"] = pd.to_numeric(df["Longitude"], errors="coerce")
    df = df.dropna(subset=["Area", "Latitude", "Longitude"]).reset_index(drop=True)
    return df

def main():
    coords = load_coords(COORDS_PATH)
    print(f"Loaded {len(coords)} coordinate rows from {COORDS_PATH}.")
    print("Example rows:")
    print(coords.head())

    # Loop over each parameter configuration
    for cfg in PARAM_CONFIGS:
        param = cfg["param"]
        subfolder = cfg["subfolder"]
        out_dir = os.path.join(subfolder)
        os.makedirs(out_dir, exist_ok=True)

        print("\n======================================")
        print(f"Downloading parameter: {param}")
        print(f"Saving to folder: {out_dir}")
        print("======================================")

        success, fail = 0, 0

        for _, row in tqdm(coords.iterrows(), total=len(coords)):
            area_name = row["Area"]          # canonical country name (as in the CSV)
            lat       = float(row["Latitude"])
            lon       = float(row["Longitude"])

            # Use sanitized name only for filename safety
            safe_area = sanitize_filename(area_name)

            fname = (
                f"{safe_area}_"
                f"{lat:.4f}_{lon:.4f}_"
                f"{param}_{START_YEAR}_{END_YEAR}.csv"
            )
            out_path = os.path.join(out_dir, fname)

            if not OVERWRITE and os.path.exists(out_path):
                # already downloaded for this param
                continue

            try:
                csv_text = fetch_csv(lat, lon, param)
                with open(out_path, "w", encoding="utf-8", newline="") as f:
                    # Optionally, you could prepend a small comment with the exact Area name:
                    f.write(f"# Area: {area_name}\n")
                    f.write(csv_text)
                success += 1
            except Exception as e:
                fail += 1
                print(f"[error] {area_name} ({lat},{lon}) [{param}]: {e}")
            time.sleep(PAUSE_S)

        print(f"\nDone for parameter: {param}")
        print(f"Saved to: {out_dir}")
        print(f"Downloaded: {success}   Failed: {fail}")

    print("\nAll parameters completed.")

if __name__ == "__main__":
    main()


Loaded 209 coordinate rows from coordinates_countries_full_209.csv.
Example rows:
        Area  Latitude  Longitude
0    Albania     41.33      19.82
1    Algeria     28.03       1.66
2     Angola    -11.20      17.87
3  Argentina    -38.42     -63.62
4    Armenia     40.07      45.04

Downloading parameter: PRECTOTCORR_SUM
Saving to folder: precipitation_csv


100%|██████████| 209/209 [17:57<00:00,  5.15s/it]



Done for parameter: PRECTOTCORR_SUM
Saved to: precipitation_csv
Downloaded: 209   Failed: 0

Downloading parameter: ALLSKY_SFC_SW_DWN
Saving to folder: solar_radiation_csv


100%|██████████| 209/209 [14:00<00:00,  4.02s/it]



Done for parameter: ALLSKY_SFC_SW_DWN
Saved to: solar_radiation_csv
Downloaded: 209   Failed: 0

Downloading parameter: T2M
Saving to folder: temperature_csv


100%|██████████| 209/209 [17:48<00:00,  5.11s/it]


Done for parameter: T2M
Saved to: temperature_csv
Downloaded: 209   Failed: 0

All parameters completed.



