# Important Note:
Weather Approximation Method:
Because voter turnout is available only at the statewide level, Election Day weather was matched to turnout using a single representative NOAA weather station in each state’s capital city (airport-based climate stations). While weather can vary across different parts of a state, capital-city stations provide the most complete and stable records and are commonly used in statewide analyses. The weather variables should therefore be interpreted as general Election Day conditions rather than precise conditions experienced by all voters.

Uses the capital airport

In [10]:
import time
from pathlib import Path
import pandas as pd
import requests
from requests.exceptions import ReadTimeout, ConnectionError, HTTPError

In [None]:
BASE_URL = "https://www.ncei.noaa.gov/access/services/data/v1"

# One representative station per state (capital airport)
STATE_INFO = {
    "New York": {
        "station_id": "USW00014735",  # Albany International Airport
        "state_abbr": "NY",
    },
    "California": {
        "station_id": "USW00023271",  # Sacramento International Airport
        "state_abbr": "CA",
    },
    "Ohio": {
        "station_id": "USW00014821",  # John Glenn Columbus Intl Airport
        "state_abbr": "OH",
    },
    "Michigan": {
        "station_id": "USW00014836",  # Capital Region Intl (Lansing)
        "state_abbr": "MI",
    },
    "Florida": {
        "station_id": "USW00093805",  # Tallahassee International Airport
        "state_abbr": "FL",
    },
    "Utah": {
        "station_id": "USW00024127",  # Salt Lake City International Airport
        "state_abbr": "UT",
    },
}

# Election Day dates for Presidential + Midterm elections
ELECTION_DATES = {
    # Presidential
    1980: ("1980-11-04", "Presidential"),
    1984: ("1984-11-06", "Presidential"),
    1988: ("1988-11-08", "Presidential"),
    1992: ("1992-11-03", "Presidential"),
    1996: ("1996-11-05", "Presidential"),
    2000: ("2000-11-07", "Presidential"),
    2004: ("2004-11-02", "Presidential"),
    2008: ("2008-11-04", "Presidential"),
    2012: ("2012-11-06", "Presidential"),
    2016: ("2016-11-08", "Presidential"),
    2020: ("2020-11-03", "Presidential"),
    2024: ("2024-11-05", "Presidential"),

    # Midterms
    1982: ("1982-11-02", "Midterm"),
    1986: ("1986-11-04", "Midterm"),
    1990: ("1990-11-06", "Midterm"),
    1994: ("1994-11-08", "Midterm"),
    1998: ("1998-11-03", "Midterm"),
    2002: ("2002-11-05", "Midterm"),
    2006: ("2006-11-07", "Midterm"),
    2010: ("2010-11-02", "Midterm"),
    2014: ("2014-11-04", "Midterm"),
    2018: ("2018-11-06", "Midterm"),
    2022: ("2022-11-08", "Midterm"),
}

# Output path (adjust if your project structure is different)
OUT_PATH = Path("data/processed-data/election_day_weather_full.csv")
OUT_PATH.parent.mkdir(parents=True, exist_ok=True)


def fetch_weather_with_retry(station_id: str, date: str, max_retries: int = 5):
    """
    Fetch TMAX, TMIN, PRCP, SNOW for a single station on a single date.
    Retries on timeouts / connection errors with exponential backoff.
    """
    params = {
        "dataset": "daily-summaries",
        "stations": station_id,
        "startDate": date,
        "endDate": date,
        "dataTypes": "TMAX,TMIN,PRCP,SNOW",
        "units": "standard",  # Fahrenheit, inches
        "format": "json",
        "includeStationName": "1",
        "includeStationLocation": "1",
    }

    for attempt in range(1, max_retries + 1):
        try:
            resp = requests.get(
                BASE_URL,
                params=params,
                timeout=60,  # seconds
            )
            resp.raise_for_status()
            data = resp.json()
            if not data:
                print(f"  No data returned for {station_id} on {date}")
                return None
            return data[0]

        except (ReadTimeout, ConnectionError) as e:
            print(
                f"  Timeout/connection error for {station_id} {date} "
                f"(attempt {attempt}/{max_retries}): {e}"
            )
            sleep_sec = 2 ** attempt  # 2, 4, 8, 16, ...
            print(f"  Sleeping {sleep_sec} seconds before retry...")
            time.sleep(sleep_sec)

        except HTTPError as e:
            # 4xx / 5xx – log and bail on this request
            print(f"  HTTP error for {station_id} {date}: {e}")
            return None

        except ValueError as e:
            # JSON decoding error or unexpected format
            print(f"  JSON parse error for {station_id} {date}: {e}")
            return None

    print(f"  Giving up on {station_id} {date} after {max_retries} attempts.")
    return None


# 3. Main loop over years × states
rows = []

for year, (date, election_type) in sorted(ELECTION_DATES.items()):
    for state, info in STATE_INFO.items():
        station_id = info["station_id"]
        state_abbr = info["state_abbr"]

        print(f"Fetching {state} ({state_abbr}) — {year} [{election_type}] on {date}")
        
        rec = fetch_weather_with_retry(station_id, date)
        
        # Be polite to NOAA servers even on fail
        time.sleep(0.7)

        if rec is None:
            print(f"  Skipping {state} {year} — no data returned.")
            continue

        rows.append({
            "STATE": state,
            "STATE_ABB": state_abbr,
            "YEAR": year,
            "ELECTION_TYPE": election_type,
            "ELECTION_DATE": date,
            "STATION_ID": rec.get("STATION"),
            "STATION_NAME": rec.get("NAME"),
            "LATITUDE": rec.get("LATITUDE"),
            "LONGITUDE": rec.get("LONGITUDE"),
            "TMAX": rec.get("TMAX"),
            "TMIN": rec.get("TMIN"),
            "PRCP": rec.get("PRCP"),
            "SNOW": rec.get("SNOW"),
        })

df = pd.DataFrame(rows)

# Convert numeric fields
for col in ["TMAX", "TMIN", "PRCP", "SNOW", "LATITUDE", "LONGITUDE"]:
    df[col] = pd.to_numeric(df[col], errors="coerce")

# Derived variables
df["TAVG"] = (df["TMAX"] + df["TMIN"]) / 2
df["RAIN_ANY"] = (df["PRCP"] > 0).astype("Int64")
df["SNOW_ANY"] = (df["SNOW"] > 0).astype("Int64")

# Save to CSV
df.to_csv(OUT_PATH, index=False)

print(f"Saved {len(df)} rows to {OUT_PATH}")
df.head()


Fetching New York (NY) — 1980 [Presidential] on 1980-11-04
Fetching California (CA) — 1980 [Presidential] on 1980-11-04
  HTTP error for USW00023271 1980-11-04: 503 Server Error: Service Unavailable for url: https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&stations=USW00023271&startDate=1980-11-04&endDate=1980-11-04&dataTypes=TMAX%2CTMIN%2CPRCP%2CSNOW&units=standard&format=json&includeStationName=1&includeStationLocation=1
  Skipping California 1980 — no data returned.
Fetching Ohio (OH) — 1980 [Presidential] on 1980-11-04
Fetching Michigan (MI) — 1980 [Presidential] on 1980-11-04
Fetching Florida (FL) — 1980 [Presidential] on 1980-11-04
Fetching Utah (UT) — 1980 [Presidential] on 1980-11-04
Fetching New York (NY) — 1982 [Midterm] on 1982-11-02
Fetching California (CA) — 1982 [Midterm] on 1982-11-02
Fetching Ohio (OH) — 1982 [Midterm] on 1982-11-02
Fetching Michigan (MI) — 1982 [Midterm] on 1982-11-02
Fetching Florida (FL) — 1982 [Midterm] on 1982-11-02
Fetchin

Unnamed: 0,STATE,STATE_ABB,YEAR,ELECTION_TYPE,ELECTION_DATE,STATION_ID,STATION_NAME,LATITUDE,LONGITUDE,TMAX,TMIN,PRCP,SNOW,TAVG,RAIN_ANY,SNOW_ANY
0,New York,NY,1980,Presidential,1980-11-04,USW00014735,"ALBANY INTERNATIONAL AIRPORT, NY US",42.74722,-73.79913,53,43,0.0,0.0,48.0,0,0
1,Ohio,OH,1980,Presidential,1980-11-04,USW00014821,"JOHN GLENN INTERNATIONAL AIRPORT, OH US",39.99068,-82.87703,59,46,0.07,0.0,52.5,1,0
2,Michigan,MI,1980,Presidential,1980-11-04,USW00014836,"LANSING CAPITAL CITY AIRPORT, MI US",42.77609,-84.59972,53,35,0.12,0.0,44.0,1,0
3,Florida,FL,1980,Presidential,1980-11-04,USW00093805,"TALLAHASSEE REGIONAL AIRPORT, FL US",30.39354,-84.35136,81,63,0.03,0.0,72.0,1,0
4,Utah,UT,1980,Presidential,1980-11-04,USW00024127,"SALT LAKE CITY INTERNATIONAL AIRPORT, UT US",40.77069,-111.96503,67,38,0.0,0.0,52.5,0,0


In [12]:
df

Unnamed: 0,STATE,STATE_ABB,YEAR,ELECTION_TYPE,ELECTION_DATE,STATION_ID,STATION_NAME,LATITUDE,LONGITUDE,TMAX,TMIN,PRCP,SNOW,TAVG,RAIN_ANY,SNOW_ANY
0,New York,NY,1980,Presidential,1980-11-04,USW00014735,"ALBANY INTERNATIONAL AIRPORT, NY US",42.74722,-73.79913,53,43,0.00,0.0,48.0,0,0
1,Ohio,OH,1980,Presidential,1980-11-04,USW00014821,"JOHN GLENN INTERNATIONAL AIRPORT, OH US",39.99068,-82.87703,59,46,0.07,0.0,52.5,1,0
2,Michigan,MI,1980,Presidential,1980-11-04,USW00014836,"LANSING CAPITAL CITY AIRPORT, MI US",42.77609,-84.59972,53,35,0.12,0.0,44.0,1,0
3,Florida,FL,1980,Presidential,1980-11-04,USW00093805,"TALLAHASSEE REGIONAL AIRPORT, FL US",30.39354,-84.35136,81,63,0.03,0.0,72.0,1,0
4,Utah,UT,1980,Presidential,1980-11-04,USW00024127,"SALT LAKE CITY INTERNATIONAL AIRPORT, UT US",40.77069,-111.96503,67,38,0.00,0.0,52.5,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131,California,CA,2024,Presidential,2024-11-05,USW00023271,"SACRAMENTO 5 ESE, CA US",38.55520,-121.41830,70,47,0.00,0.0,58.5,0,0
132,Ohio,OH,2024,Presidential,2024-11-05,USW00014821,"JOHN GLENN INTERNATIONAL AIRPORT, OH US",39.99068,-82.87703,79,62,0.00,0.0,70.5,0,0
133,Michigan,MI,2024,Presidential,2024-11-05,USW00014836,"LANSING CAPITAL CITY AIRPORT, MI US",42.77609,-84.59972,69,59,0.50,0.0,64.0,1,0
134,Florida,FL,2024,Presidential,2024-11-05,USW00093805,"TALLAHASSEE REGIONAL AIRPORT, FL US",30.39354,-84.35136,89,69,0.00,0.0,79.0,0,0


## Missing Rows

In [13]:
df[(df["STATE"] == "California") & (df["YEAR"] == 1980)]

df[(df["STATE"] == "Ohio") & (df["YEAR"] == 1986)]

Unnamed: 0,STATE,STATE_ABB,YEAR,ELECTION_TYPE,ELECTION_DATE,STATION_ID,STATION_NAME,LATITUDE,LONGITUDE,TMAX,TMIN,PRCP,SNOW,TAVG,RAIN_ANY,SNOW_ANY


In [None]:
manual_rows = pd.DataFrame([
    {
        "STATE": "California",
        "STATE_ABB": "CA",
        "YEAR": 1980,
        "ELECTION_TYPE": "Presidential",
        "ELECTION_DATE": "1980-11-04",
        "STATION_ID": "USW00023271",
        "STATION_NAME": "SACRAMENTO 5 ESE, CA US",
        "LATITUDE": 38.5552,
        "LONGITUDE": -121.4183,
        "TMAX": 84,
        "TMIN": 55,
        "PRCP": 0.0,
        "SNOW": None,   
        "TAVG": 69.5,
        "RAIN_ANY": 0,
        "SNOW_ANY": 0,
    },
    {
        "STATE": "Ohio",
        "STATE_ABB": "OH",
        "YEAR": 1986,
        "ELECTION_TYPE": "Midterm",
        "ELECTION_DATE": "1986-11-04",
        "STATION_ID": "USW00014821",
        "STATION_NAME": "JOHN GLENN INTERNATIONAL AIRPORT, OH US",
        "LATITUDE": 39.99068,
        "LONGITUDE": -82.87703,
        "TMAX": 50,
        "TMIN": 44,
        "PRCP": 0.2,
        "SNOW": 0.0,
        "TAVG": 47.0,
        "RAIN_ANY": 1,
        "SNOW_ANY": 0,
    }
])

df = pd.concat([df, manual_rows], ignore_index=True)


In [15]:
df.to_csv(OUT_PATH, index=False)
print("Final rows:", df.shape[0])


Final rows: 138


In [16]:
df

Unnamed: 0,STATE,STATE_ABB,YEAR,ELECTION_TYPE,ELECTION_DATE,STATION_ID,STATION_NAME,LATITUDE,LONGITUDE,TMAX,TMIN,PRCP,SNOW,TAVG,RAIN_ANY,SNOW_ANY
0,New York,NY,1980,Presidential,1980-11-04,USW00014735,"ALBANY INTERNATIONAL AIRPORT, NY US",42.74722,-73.79913,53,43,0.00,0.0,48.0,0,0
1,Ohio,OH,1980,Presidential,1980-11-04,USW00014821,"JOHN GLENN INTERNATIONAL AIRPORT, OH US",39.99068,-82.87703,59,46,0.07,0.0,52.5,1,0
2,Michigan,MI,1980,Presidential,1980-11-04,USW00014836,"LANSING CAPITAL CITY AIRPORT, MI US",42.77609,-84.59972,53,35,0.12,0.0,44.0,1,0
3,Florida,FL,1980,Presidential,1980-11-04,USW00093805,"TALLAHASSEE REGIONAL AIRPORT, FL US",30.39354,-84.35136,81,63,0.03,0.0,72.0,1,0
4,Utah,UT,1980,Presidential,1980-11-04,USW00024127,"SALT LAKE CITY INTERNATIONAL AIRPORT, UT US",40.77069,-111.96503,67,38,0.00,0.0,52.5,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133,Michigan,MI,2024,Presidential,2024-11-05,USW00014836,"LANSING CAPITAL CITY AIRPORT, MI US",42.77609,-84.59972,69,59,0.50,0.0,64.0,1,0
134,Florida,FL,2024,Presidential,2024-11-05,USW00093805,"TALLAHASSEE REGIONAL AIRPORT, FL US",30.39354,-84.35136,89,69,0.00,0.0,79.0,0,0
135,Utah,UT,2024,Presidential,2024-11-05,USW00024127,"SALT LAKE CITY INTERNATIONAL AIRPORT, UT US",40.77069,-111.96503,43,33,0.01,0.0,38.0,1,0
136,California,CA,1980,Presidential,1980-11-04,USW00023271,"SACRAMENTO 5 ESE, CA US",38.55520,-121.41830,84,55,0.00,,69.5,0,0
