In [6]:
import os
import time
import requests
import pandas as pd

In [7]:
BEA_API_KEY = os.getenv("BEA_API_KEY", "290EFCBB-FAEF-4B15-AED6-BBFB7B290205")  # <-- replace or set env var
URL = "https://apps.bea.gov/api/data/"

In [8]:
# Map BEA industry classifications to NAICS-2 style sectors (for QCEW joins)
# BEA uses a few grouped codes (31G=Manufacturing, 44RT=Retail, 48TW=Transport/Warehousing, G=Government)
BEA_TO_NAICS2 = {
    "11": "11",
    "21": "21",
    "22": "22",
    "23": "23",
    "31G": "31",   # Manufacturing
    "42": "42",
    "44RT": "44",  # Retail Trade
    "48TW": "48",  # Transportation & Warehousing
    "51": "51",
    "52": "52",
    "53": "53",
    "54": "54",
    "55": "55",
    "56": "56",
    "61": "61",
    "62": "62",
    "71": "71",
    "72": "72",
    "81": "81",
    "G":  "92",    # Government (map to NAICS 92 public sector if you want to keep it)
}

In [11]:
def fetch_bea_cagdp5(year: int, geo="COUNTY", timeout=60) -> pd.DataFrame:
    """Fetch nominal GDP by county & industry (CAGDP5) for a single year."""
    params = {
        "UserID": BEA_API_KEY,
        "method": "GetData",
        "datasetname": "Regional",
        "LineCode": "ALL",
        "TableName": "CAGDP5",     # Nominal GDP by County & Industry (current $)
        # IMPORTANT: no LineCode => returns all industries + totals
        "Year": str(year),
        "GeoFIPS": "COUNTY",            # "COUNTY" = all U.S. counties; use "CA" for California only
        "ResultFormat": "JSON"
    }
    r = requests.get(URL, params=params, timeout=timeout)
    r.raise_for_status()
    payload = r.json()
    beaapi = payload.get("BEAAPI", {})
    if "Results" not in beaapi:
        raise RuntimeError(f"BEA API error: {beaapi.get('Error', beaapi)}")
    df = pd.DataFrame(beaapi["Results"]["Data"])
    # normalize column names
    df.columns = [c.lower() for c in df.columns]
    # keep county rows (5-digit FIPS)
    df = df[df["geofips"].str.len() == 5].copy()
    # numeric GDP value (DataValue is string with commas)
    df["gdp_current_usd"] = pd.to_numeric(df["datavalue"].str.replace(",", ""), errors="coerce")
    # keep only useful columns
    keep = ["geofips", "geoname", "industryclassification", "description", "timeperiod", "gdp_current_usd", "unitofmeasure"]
    df = df[keep].rename(columns={"timeperiod": "year"})
    # map BEA industry classification to NAICS2
    df["naics2"] = df["industryclassification"].map(BEA_TO_NAICS2)
    # you can drop non-mapped rows (e.g., totals) or keep them as aggregates
    df = df[~df["naics2"].isna()].copy()
    # ensure types
    df["year"] = df["year"].astype(int)
    df["county_fips"] = df["geofips"]  # alias for clarity with QCEW joins
    return df

def fetch_years(years):
    frames = []
    for y in years:
        frames.append(fetch_bea_cagdp5(y))
        time.sleep(0.2)  # be nice to the API
    return pd.concat(frames, ignore_index=True)

In [12]:
if __name__ == "__main__":
    # Example: get 2022 for all U.S. counties
    df = fetch_bea_cagdp5(2022)
    # Optional: filter just California for a quick test
    # df = df[df["geoname"].str.endswith(", CA")].copy()

RuntimeError: BEA API error: {'APIErrorCode': '40', 'APIErrorDescription': 'The dataset requested requires parameters that were missing from the request.', 'ErrorDetail': {'Description': 'Invalid Value for Parameter TableName'}}

In [None]:
  # Save tidy file ready for QCEW join: (county_fips, naics2, year, gdp_current_usd)
    tidy = df[["county_fips", "naics2", "year", "gdp_current_usd"]].copy()
    tidy.to_csv("bea_cagdp5_county_naics2_2022.csv", index=False)

In [None]:
    print(tidy.head(), tidy.shape)