### 5-YEAR ACS SOCIOECONOMIC DATA BY TRACTS (2022)

In [1]:
# Modules.
import cenpy
import pandas as pd
import numpy as np
from pathlib import Path

In [2]:
# Paths.
acs_dir = Path("data/acs")
acs_dir.mkdir(parents = True, exist_ok = True)

output_file = acs_dir / "acs_socioeconomic_tract_2022.csv"

In [3]:
# Connect to ACS 2022 5-year table.
api = cenpy.remote.APIConnection("ACSDT5Y2022")

# NYC counties.
nyc_counties = ["005", "047", "061", "081", "085"]

# ACS variables.
acs_variables = {
    # Core.
    "total_pop": "B01003_001E",
    "median_income": "B19013_001E",
    "poverty_all": "B17001_001E",
    "poverty_count": "B17001_002E",
    "no_vehicle_hh": "B25044_003E",

    # Education.
    "edu_bachelors": "B15003_022E",
    "edu_masters": "B15003_023E",
    "edu_professional": "B15003_024E",
    "edu_doctorate": "B15003_025E",
    "edu_total": "B15003_001E",

    # Housing tenure.
    "owner_hh": "B25003_002E",
    "renter_hh": "B25003_003E",
    "hh_total": "B25003_001E",

    # Limited English.
    "limited_english": "B16005_007E", 
    "limited_english_total": "B16005_001E"
}

cols = ["NAME"] + list(acs_variables.values())

In [4]:
# Download ACS for all NYC counties at the tract level.
records = []

for county in nyc_counties:
    print(f"Downloading ACS for county {county}.")

    df = api.query(
        cols = cols,
        geo_unit = "tract",
        geo_filter = {"state": "36", "county": county}
    )

    records.append(df)

acs = pd.concat(records, ignore_index = True)

Downloading ACS for county 005.
Downloading ACS for county 047.
Downloading ACS for county 061.
Downloading ACS for county 081.
Downloading ACS for county 085.


In [5]:
# Construct GEOID.
acs["GEOID"] = acs["state"] + acs["county"] + acs["tract"]

In [6]:
# Rename ACS columns.
rename_map = {v: k for k, v in acs_variables.items()}

acs = acs.rename(columns = rename_map)

In [7]:
# Convert to numeric and fix ACS placeholders for unknown data.

placeholders = [
    -666666666, -888888888, -222222222, -333333333,
    -666666666.0, -888888888.0, -222222222.0, -333333333.0,
]

acs = acs.replace(placeholders, np.nan)

for col in acs_variables.keys():
    acs[col] = pd.to_numeric(acs[col], errors = "coerce")

In [8]:
# Remove non-residential tracts.
acs = acs[acs["total_pop"] >= 50].copy()

In [9]:
# Feature engineering.

# Poverty rate.
acs["poverty_rate"] = acs["poverty_count"] / acs["poverty_all"]
acs["poverty_rate"] = acs["poverty_rate"].replace([np.inf, -np.inf], np.nan)

# Centered version.
# Transforms from raw proportion to centered (mean-subtracted) variable.
# Helps with interpretation. Can also be an interaction term with extreme heat to stabilize.
acs["poverty_rate_c"] = acs["poverty_rate"] - acs["poverty_rate"].mean()

# Education.
acs["edu_bachelors_plus"] = (
    acs["edu_bachelors"] +
    acs["edu_masters"] +
    acs["edu_professional"] +
    acs["edu_doctorate"]
)

acs["pct_bachelors_plus"] = acs["edu_bachelors_plus"] / acs["edu_total"]
acs["pct_bachelors_plus"] = acs["pct_bachelors_plus"].replace([np.inf, -np.inf], np.nan)

# Housing tenure.
acs["pct_renters"] = acs["renter_hh"] / acs["hh_total"]
acs["pct_renters"] = acs["pct_renters"].replace([np.inf, -np.inf], np.nan)


# Limited English proficiency.
acs["pct_limited_english"] = acs["limited_english"] / acs["limited_english_total"]
acs["pct_limited_english"] = acs["pct_limited_english"].replace([np.inf, -np.inf], np.nan)

In [10]:
# Impute missing values.
# Only a handful should be missing, fix by tract median or global median
for col in [
    "poverty_rate","pct_bachelors_plus",
    "pct_renters","pct_limited_english"
]:
    acs[col] = acs.groupby("county")[col].transform(
        lambda x: x.fillna(x.median())
    )
    acs[col] = acs[col].fillna(acs[col].median())

In [11]:
# Save.
acs.to_csv(output_file, index = False)
print("Saved:", output_file)

acs.head()

Saved: data\acs\acs_socioeconomic_tract_2022.csv


Unnamed: 0,NAME,total_pop,median_income,poverty_all,poverty_count,no_vehicle_hh,edu_bachelors,edu_masters,edu_professional,edu_doctorate,...,state,county,tract,GEOID,poverty_rate,poverty_rate_c,edu_bachelors_plus,pct_bachelors_plus,pct_renters,pct_limited_english
0,Census Tract 1; Bronx County; New York,4446,-666666666,0,0,0,34,7,25,9,...,36,5,100,36005000100,0.257322,,75,0.019405,0.856193,0.001799
1,Census Tract 2; Bronx County; New York,4870,115064,4870,688,73,489,619,16,36,...,36,5,200,36005000200,0.141273,-0.024086,1160,0.323751,0.397895,0.019587
2,Census Tract 4; Bronx County; New York,6257,100553,6257,378,119,995,338,206,0,...,36,5,400,36005000400,0.060412,-0.104947,1539,0.337057,0.389779,0.006332
3,Census Tract 16; Bronx County; New York,6177,41362,5961,893,7,682,123,0,0,...,36,5,1600,36005001600,0.149807,-0.015552,805,0.194351,0.794104,0.015487
4,Census Tract 19.01; Bronx County; New York,2181,49500,2178,623,0,283,133,22,9,...,36,5,1901,36005001901,0.286042,0.120683,447,0.304911,1.0,0.0
