In [2]:
import os
import re
import math
import time
import requests
import pandas as pd

NOAA_TOKEN = os.getenv("NOAA_TOKEN") or "PASTE_YOUR_TOKEN_STRING_HERE"

SESSION = requests.Session()
SESSION.headers.update({"token": NOAA_TOKEN})

BASE = "https://www.ncei.noaa.gov/cdo-web/api/v2"

def get_json(url, params, pause=0.4):
    """Thin wrapper with tiny pause to be nice to the API."""
    r = SESSION.get(url, params=params, timeout=30)
    r.raise_for_status()
    time.sleep(pause)
    return r.json()

# --- Step A: Find the correct GHCND station for LaGuardia (LGA) ---
# Use a tight bounding box around LGA (~0.2°) and search GHCND stations.
extent = "40.6,-74.1,41.0,-73.6"   # south,west,north,east (NYC area around LGA)
stations_url = f"{BASE}/stations"
params = {
    "datasetid": "GHCND",
    "extent": extent,
    "limit": 1000,
}

stations = get_json(stations_url, params).get("results", [])

# Prefer names that contain 'LAGUARDIA' or 'LA GUARDIA'
def looks_like_lga(name: str) -> bool:
    return bool(re.search(r"\bLA ?GUARDIA\b", name.upper()))

candidates = [s for s in stations if looks_like_lga(s.get("name",""))]

if not candidates:
    # fallback: take the closest “USW000…” airport station in the box
    candidates = [s for s in stations if s["id"].startswith("GHCND:USW")]

if not candidates:
    raise RuntimeError("Could not find a suitable station near LaGuardia.")

# Usually this resolves to GHCND:USW00014732 (LaGuardia Airport)
station = sorted(candidates, key=lambda s: len(s["name"]))[0]
station_id = station["id"]
station["id"], station["name"]




HTTPError: 400 Client Error:  for url: https://www.ncei.noaa.gov/cdo-web/api/v2/stations?datasetid=GHCND&extent=40.6%2C-74.1%2C41.0%2C-73.6&limit=1000

In [4]:
import os, requests, pandas as pd
from datetime import date

NOAA_TOKEN = os.getenv("NOAA_TOKEN", "QNXPwAeNDheFnZzPsFnbzqaSbKdsucXw")
BASE = "https://www.ncei.noaa.gov/cdo-web/api/v2"
SESSION = requests.Session()
SESSION.headers.update({"token": NOAA_TOKEN})

STATION_ID = "GHCND:USW00014732"   # LaGuardia
START = "2022-01-01"
END   = "2022-12-31"

params = {
    "datasetid": "GHCND",
    "stationid": STATION_ID,
    "startdate": START,
    "enddate": END,
    "units": "standard",   # 'standard' = inches/°F; 'metric' also works
    "limit": 1000
}

r = SESSION.get(f"{BASE}/data", params=params, timeout=30)
r.raise_for_status()
raw = r.json().get("results", [])

len(raw), raw[:3]


(1000,
 [{'date': '2022-01-01T00:00:00',
   'datatype': 'ADPT',
   'station': 'GHCND:USW00014732',
   'attributes': ',,W,',
   'value': 100},
  {'date': '2022-01-01T00:00:00',
   'datatype': 'ASLP',
   'station': 'GHCND:USW00014732',
   'attributes': ',,W,',
   'value': 10078},
  {'date': '2022-01-01T00:00:00',
   'datatype': 'ASTP',
   'station': 'GHCND:USW00014732',
   'attributes': ',,W,',
   'value': 10078}])

In [6]:
# Raw is long format (date, datatype, value). Pivot to wide per day.
df = pd.DataFrame(raw)
df["date"] = pd.to_datetime(df["date"]).dt.date

wide = (
    df.pivot_table(index="date", columns="datatype", values="value", aggfunc="first")
      .rename_axis(None, axis=1)
      .reset_index()
)

# Keep common fields (present when available at station):
cols = [c for c in ["date","TMAX","TMIN","PRCP","SNOW","AWND"] if c in wide.columns]
wide = wide[cols]

# Optional unit tweaks (NOAA GHCND uses tenths for some variables when 'metric';
# with units='standard' above values are already in °F and inches; keep as-is.)
# If you used units='metric' you'll often need:
#   wide["TMAX"] = wide["TMAX"] / 10.0
#   wide["TMIN"] = wide["TMIN"] / 10.0
#   wide["PRCP"] = wide["PRCP"] / 10.0  # mm
#   wide["AWND"] = wide["AWND"] / 10.0  # m/s

wide.sort_values("date", inplace=True)
wide.to_csv("laguardia_weather_2022.csv", index=False)
wide.head(), wide.shape


(         date  TMAX  TMIN  PRCP  SNOW  AWND
 0  2022-01-01  57.0  50.0  0.76   0.0   6.3
 1  2022-01-02  60.0  39.0  0.04   0.0   9.6
 2  2022-01-03  39.0  24.0  0.00   0.0  14.3
 3  2022-01-04  36.0  21.0  0.00   0.0   8.7
 4  2022-01-05  48.0  32.0  0.24   0.0   7.6,
 (54, 6))