# Census Tract Characteristics (Urban/Rural, Metro, Commute)

This notebook retrieves tract-level characteristics from the Census APIs, including 2020 decennial urban/rural counts and 2022 ACS 5-year commute statistics, then saves the merged dataset to `data_dir` for downstream analysis.

In [26]:
# Enable autoreload and import configuration
%load_ext autoreload
%autoreload 2

import pandas as pd
import geopandas as gpd
import numpy as np
import requests
import geopandas as gpd

# Placeholder for linters; actual value defined via config
data_dir = None
%run ../../config.py

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Project root: /Users/eric/proj/scratch/WirelessIncome
Data directory: /Users/eric/proj/scratch/WirelessIncome/data


In [24]:
chara_url = "https://ers.usda.gov/sites/default/files/_laserfiche/DataFiles/53241/RUCA-codes-2020-tract.csv"

# USDA encodes this file as Latin-1, so specify the encoding to avoid decode errors.
df = pd.read_csv(chara_url, encoding="latin-1", dtype=str, low_memory=False)
print(f"Loaded {len(df):,} rows from RUCA file")
df.head()

Loaded 85,528 rows from RUCA file


Unnamed: 0,TractFIPS23,CountyFIPS23,CountyCode23,CountyName23,TractFIPS20,TractCode20,TractName20,CountyFIPS20,CountyCode20,CountyName20,...,PrimaryRUCADescription,PrimaryDestinationCode,PrimaryDestinationName,SecondaryRUCA,SecondaryRUCADescription,SecondaryDestinationCode,SecondaryDestinationName,Population,LandArea,PopDensity
0,1001020100,1001,1,Autauga County,1001020100,20100,Census Tract 201,1001,1,Autauga County,...,Metropolitan core,58600,"Montgomery, AL",1,"Metropolitan core, no addtional code",1101005904,Census Tract 59.04; Montgomery County; Alabama,1775,3.8,467.9
1,1001020200,1001,1,Autauga County,1001020200,20200,Census Tract 202,1001,1,Autauga County,...,Metropolitan core,58600,"Montgomery, AL",1,"Metropolitan core, no addtional code",1101005904,Census Tract 59.04; Montgomery County; Alabama,2055,1.3,1602.7
2,1001020300,1001,1,Autauga County,1001020300,20300,Census Tract 203,1001,1,Autauga County,...,Metropolitan core,58600,"Montgomery, AL",1,"Metropolitan core, no addtional code",1101005904,Census Tract 59.04; Montgomery County; Alabama,3216,2.1,1557.1
3,1001020400,1001,1,Autauga County,1001020400,20400,Census Tract 204,1001,1,Autauga County,...,Metropolitan core,58600,"Montgomery, AL",1,"Metropolitan core, no addtional code",1101005904,Census Tract 59.04; Montgomery County; Alabama,4246,2.5,1722.5
4,1001020501,1001,1,Autauga County,1001020501,20501,Census Tract 205.01,1001,1,Autauga County,...,Metropolitan core,58600,"Montgomery, AL",1,"Metropolitan core, no addtional code",1101005904,Census Tract 59.04; Montgomery County; Alabama,4322,2.4,1804.4


In [27]:
tracts_url = "https://www2.census.gov/geo/tiger/GENZ2022/shp/cb_2022_us_tract_500k.zip"
tracts_gdf = gpd.read_file(tracts_url)
tracts_gdf = tracts_gdf[["GEOID", "NAME", "STATEFP", "COUNTYFP", "TRACTCE"]]
print(f"Loaded {len(tracts_gdf):,} tract geometries from 2022 TIGER/Line")

tract_cols = [col for col in ["TractFIPS23", "TractFIPS20", "TractFIPS"] if col in df.columns]
if not tract_cols:
    raise ValueError("RUCA file is missing expected TractFIPS columns")

link_checks = []
geoids = set(tracts_gdf["GEOID"].astype(str))
for col in tract_cols:
    values = df[col].astype(str).str.strip().str.zfill(11)
    df[col + "_normalized"] = values
    matches = values.isin(geoids)
    link_checks.append(
        {
            "column": col,
            "count": len(values),
            "matches": matches.sum(),
            "match_pct": matches.mean(),
        }
    )

link_df = pd.DataFrame(link_checks)
print("TractFIPS linkage quality vs GEOID:")
link_df

Loaded 85,185 tract geometries from 2022 TIGER/Line
TractFIPS linkage quality vs GEOID:


Unnamed: 0,column,count,matches,match_pct
0,TractFIPS23,85528,85185,0.99599
1,TractFIPS20,85528,84306,0.985712


In [28]:
df.head().T


Unnamed: 0,0,1,2,3,4
TractFIPS23,01001020100,01001020200,01001020300,01001020400,01001020501
CountyFIPS23,01001,01001,01001,01001,01001
CountyCode23,001,001,001,001,001
CountyName23,Autauga County,Autauga County,Autauga County,Autauga County,Autauga County
TractFIPS20,01001020100,01001020200,01001020300,01001020400,01001020501
TractCode20,020100,020200,020300,020400,020501
TractName20,Census Tract 201,Census Tract 202,Census Tract 203,Census Tract 204,Census Tract 205.01
CountyFIPS20,01001,01001,01001,01001,01001
CountyCode20,001,001,001,001,001
CountyName20,Autauga County,Autauga County,Autauga County,Autauga County,Autauga County


In [34]:
df.UrbanCore.value_counts()

UrbanCore
1    66793
0    18735
Name: count, dtype: int64

In [36]:
# Reduce columns and rename to shorter, lowercase names with underscores
df = df[[
    "TractFIPS23",
    "CountyName20",
    "UrbanCoreType",
    "PrimaryRUCADescription",
    "PrimaryDestinationCode",
    "PopDensity"
]].rename(columns={
    "TractFIPS23": "GEOID",
    "CountyName20": "county_name",
    "UrbanCoreType": "urban_core_type",
    "PrimaryRUCADescription": "primary_ruca_desc",
    "PrimaryDestinationCode": "primary_dest_code",
    "PopDensity": "pop_density"
})

print(f"Reduced to {len(df.columns)} columns: {list(df.columns)}")
df.head()

Reduced to 6 columns: ['GEOID', 'county_name', 'urban_core_type', 'primary_ruca_desc', 'primary_dest_code', 'pop_density']


Unnamed: 0,GEOID,county_name,urban_core_type,primary_ruca_desc,primary_dest_code,pop_density
0,1001020100,Autauga County,Metro core,Metropolitan core,58600,467.9
1,1001020200,Autauga County,Metro core,Metropolitan core,58600,1602.7
2,1001020300,Autauga County,Metro core,Metropolitan core,58600,1557.1
3,1001020400,Autauga County,Metro core,Metropolitan core,58600,1722.5
4,1001020501,Autauga County,Metro core,Metropolitan core,58600,1804.4


In [37]:
output_path = data_dir / "tract_characteristics.parquet"
df.to_parquet(output_path, index=False)
print(f"Saved {len(df):,} tract characteristics to {output_path}")

Saved 85,528 tract characteristics to /Users/eric/proj/scratch/WirelessIncome/data/tract_characteristics.parquet
