In [5]:
# Enable autoreload so changes in lib.py are picked up automatically
%load_ext autoreload
%autoreload 2

import pandas as pd
import requests

# data_dir is defined inside config.py; this placeholder keeps linters happy
data_dir = None
%run ../../config.py

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Project root: /Users/eric/proj/scratch/WirelessIncome
Data directory: /Users/eric/proj/scratch/WirelessIncome/data


# ACS 2022 Tract Population

This notebook queries the Census ACS 5-Year 2022 dataset for total population (`B01003_001E`) by census tract, filters to the contiguous United States, and saves the result as a Parquet file for downstream analysis.

In [6]:
ACS_YEAR = 2022
DATASET_BASE = f"https://api.census.gov/data/{ACS_YEAR}/acs/acs5"
POPULATION_VAR = "B01003_001E"

states_url = f"{DATASET_BASE}?get=NAME&for=state:*"
response = requests.get(states_url, timeout=30)
response.raise_for_status()
raw_states = response.json()

# Temporary dataframe for filtering
t = pd.DataFrame(raw_states[1:], columns=raw_states[0])
states_df = (
    t.loc[:, ["NAME", "state"]]
    .assign(state=lambda df_: df_["state"].str.zfill(2))
    .sort_values("state")
    .reset_index(drop=True)
)
state_fips = states_df["state"].tolist()

print(f"Using {len(state_fips)} states and territories")
states_df.head()

Using 52 states and territories


Unnamed: 0,NAME,state
0,Alabama,1
1,Alaska,2
2,Arizona,4
3,Arkansas,5
4,California,6


In [7]:
records = []
for idx, state in enumerate(state_fips, start=1):
    url = f"{DATASET_BASE}?get=NAME,{POPULATION_VAR}&for=tract:*&in=state:{state}"
    response = requests.get(url, timeout=60)
    response.raise_for_status()
    payload = response.json()
    t = pd.DataFrame(payload[1:], columns=payload[0])
    t["state"] = t["state"].str.zfill(2)
    t["county"] = t["county"].str.zfill(3)
    t["tract"] = t["tract"].str.zfill(6)
    records.append(t)
    print(f"{idx:02d}/{len(state_fips)} state {state}: {len(t)} tracts")

if not records:
    raise RuntimeError("No ACS tract population data was fetched.")

df = pd.concat(records, ignore_index=True)
df = df.rename(columns={POPULATION_VAR: "population"})
df["population"] = pd.to_numeric(df["population"], errors="coerce")
df["GEOID"] = df["state"] + df["county"] + df["tract"]
df["acs_year"] = ACS_YEAR
df = df[["GEOID", "NAME", "population", "acs_year", "state", "county", "tract"]]

df.head()

01/52 state 01: 1437 tracts
02/52 state 02: 177 tracts
02/52 state 02: 177 tracts
03/52 state 04: 1765 tracts
03/52 state 04: 1765 tracts
04/52 state 05: 823 tracts
04/52 state 05: 823 tracts
05/52 state 06: 9129 tracts
05/52 state 06: 9129 tracts
06/52 state 08: 1447 tracts
06/52 state 08: 1447 tracts
07/52 state 09: 884 tracts
07/52 state 09: 884 tracts
08/52 state 10: 262 tracts
08/52 state 10: 262 tracts
09/52 state 11: 206 tracts
09/52 state 11: 206 tracts
10/52 state 12: 5160 tracts
10/52 state 12: 5160 tracts
11/52 state 13: 2796 tracts
11/52 state 13: 2796 tracts
12/52 state 15: 461 tracts
12/52 state 15: 461 tracts
13/52 state 16: 456 tracts
13/52 state 16: 456 tracts
14/52 state 17: 3265 tracts
14/52 state 17: 3265 tracts
15/52 state 18: 1696 tracts
15/52 state 18: 1696 tracts
16/52 state 19: 896 tracts
16/52 state 19: 896 tracts
17/52 state 20: 829 tracts
17/52 state 20: 829 tracts
18/52 state 21: 1306 tracts
18/52 state 21: 1306 tracts
19/52 state 22: 1388 tracts
19/52 stat

Unnamed: 0,GEOID,NAME,population,acs_year,state,county,tract
0,1001020100,Census Tract 201; Autauga County; Alabama,1865,2022,1,1,20100
1,1001020200,Census Tract 202; Autauga County; Alabama,1861,2022,1,1,20200
2,1001020300,Census Tract 203; Autauga County; Alabama,3492,2022,1,1,20300
3,1001020400,Census Tract 204; Autauga County; Alabama,3987,2022,1,1,20400
4,1001020501,Census Tract 205.01; Autauga County; Alabama,4121,2022,1,1,20501


In [8]:
output_path = data_dir / "totalpop_acs5_2022.parquet"
df.to_parquet(output_path, index=False)
print(f"Saved {len(df):,} tract records to {output_path}")
output_path

Saved 85,396 tract records to /Users/eric/proj/scratch/WirelessIncome/data/totalpop_acs5_2022.parquet


PosixPath('/Users/eric/proj/scratch/WirelessIncome/data/totalpop_acs5_2022.parquet')