In [8]:
# ================================================================
#   Cell 2 · Pull hourly PM₂.₅ for Reno, NV  (OpenAQ v3, robust)
# ================================================================
API_KEY = "4f9e0c40319224317a3f51a4b502552849cadf9768a64c47db245b5bb8d545b8"

BBOX          = "-119.90,39.48,-119.75,39.58"   # Reno city & suburbs
START, END    = "2024-07-01", "2024-07-31"
LOC_LIMIT     = 100      # rows per page (locations)
MEAS_LIMIT    = 1000     # rows per page (measurements)

import os, requests, pandas as pd
from tqdm import trange

DATA_DIR = "../data"
os.makedirs(DATA_DIR, exist_ok=True)
headers = {"X-API-Key": API_KEY}

# ------------------------------------------------ 1) discover sensors
sensor_ids, page = [], 1
while True:
    resp = requests.get(
        "https://api.openaq.org/v3/locations",
        params={"bbox": BBOX, "limit": LOC_LIMIT, "page": page},
        headers=headers, timeout=15
    ).json()
    if "error" in resp:
        raise RuntimeError(f"OpenAQ error: {resp['error']['message']}")
    for rec in resp["results"]:
        for s in rec["sensors"]:
            if s["parameter"]["name"] == "pm25":
                sensor_ids.append(s["id"])
    if len(resp["results"]) < LOC_LIMIT:
        break
    page += 1

print(f"Found {len(sensor_ids)} PM₂.₅ sensors inside Reno box.\n")

# ------------------------------------------------ 2) download measurements
rows = []
for idx, sid in enumerate(sensor_ids, 1):
    print(f"⏳  Sensor {idx}/{len(sensor_ids)}  (id={sid})")
    page = 1
    while True:
        r = requests.get(
            f"https://api.openaq.org/v3/sensors/{sid}/hours",
            params={"date_from": START, "date_to": END,
                    "page": page, "limit": MEAS_LIMIT},
            headers=headers, timeout=15
        ).json()

        # graceful failure messages
        if "error" in r:
            print("   ⚠️ ", r["error"]["message"])
            break

        recs = r.get("results", [])          # ← avoid KeyError
        if not recs:                         # empty list ⇒ nothing more
            break

        rows.extend({
            "sensor_id": sid,
            "datetime_utc": rec["period"]["datetimeFrom"]["utc"],
            "pm25": rec["value"]
        } for rec in recs)

        if len(recs) < MEAS_LIMIT:           # last page for this sensor
            break
        page += 1

# ------------------------------------------------ 3) save
df = pd.DataFrame(rows)
out_csv = f"{DATA_DIR}/pm25_Reno_{START}.csv"
df.to_csv(out_csv, index=False)
print(f"\n✔ Done!  Saved {len(df):,} rows ➜ {out_csv}")
df.head()


Found 6 PM₂.₅ sensors inside Reno box.

⏳  Sensor 1/6  (id=3852)
⏳  Sensor 2/6  (id=25573)
⏳  Sensor 3/6  (id=7613457)
⏳  Sensor 4/6  (id=10141010)
⏳  Sensor 5/6  (id=10141063)
⏳  Sensor 6/6  (id=10141031)

✔ Done!  Saved 56,639 rows ➜ ../data/pm25_Reno_2024-07-01.csv


Unnamed: 0,sensor_id,datetime_utc,pm25
0,3852,2016-03-06T19:00:00Z,1.0
1,3852,2016-03-06T20:00:00Z,2.0
2,3852,2016-03-07T14:00:00Z,0.0
3,3852,2016-03-07T15:00:00Z,2.0
4,3852,2016-03-10T08:00:00Z,7.0
