In [1]:
import requests
import pandas as pd
from dotenv import load_dotenv
import os

In [2]:
load_dotenv()           # loads from .env automatically
API_KEY = os.getenv("CENSUS_API_KEY")
print("Loaded API_KEY:", bool(API_KEY))

Loaded API_KEY: True


In [3]:


BASE_URL = "https://api.census.gov/data/2023/acs/acs5"
VARS = [
    "B02001_001E",  # total_pop (race-based)
    "B02001_002E",  # white_alone
    "B02001_003E",  # black_alone
    "B02001_005E",  # asian_alone
    "B02015_002E",  # chinese_persons
    "B16001_075E", "B16001_076E", "B16001_077E",  # chinese_speakers
    "B16001_003E", "B16001_004E",                # spanish_speakers
    "B03003_001E",  # total_pop_ethnicity
    "B03003_003E"   # hispanic_pop
]

# Fetch
params = {
    "get": ",".join(VARS),
    "for": "zip code tabulation area:*",
    "key": API_KEY
}
resp = requests.get(BASE_URL, params=params)
resp.raise_for_status()
data = resp.json()

# Load & safe-cast
cols = data[0]
df = pd.DataFrame(data[1:], columns=cols)
for v in VARS:
    df[v] = pd.to_numeric(df[v], errors="coerce").fillna(0).astype(int)

# Rename
df = df.rename(columns={
    "zip code tabulation area": "zip_code",
    "B02001_001E": "total_pop",
    "B02001_002E": "white_alone",
    "B02001_003E": "black_alone",
    "B02001_005E": "asian_alone",
    "B02015_002E": "chinese_persons",
    "B16001_075E": "chinese_speakers_total",
    "B16001_076E": "chinese_speakers_very_well",
    "B16001_077E": "chinese_speakers_less_well",
    "B16001_003E": "spanish_speakers_very_well",
    "B16001_004E": "spanish_speakers_less_well",
    "B03003_001E": "total_pop_ethnicity",
    "B03003_003E": "hispanic_pop"
})

# Compute percentages & drop intermediates
# df["spanish_speakers_total"] = (
#     df["spanish_speakers_very_well"] +
#     df["spanish_speakers_less_well"]
# )
df["pct_white"]            = df["white_alone"]           / df["total_pop"]
df["pct_black"]            = df["black_alone"]           / df["total_pop"]
df["pct_asian"]            = df["asian_alone"]           / df["total_pop"]
df["pct_chinese"]          = df["chinese_persons"]       / df["total_pop"]
df["pct_hispanic"]        = df["hispanic_pop"]         / df["total_pop"]
# df["pct_chinese_speaking"] = df["chinese_speakers_total"]/ df["total_pop"]
# df["pct_spanish_speaking"] = df["spanish_speakers_total"] / df["total_pop"]

df = df.drop(columns=[
    "chinese_speakers_very_well",
    "chinese_speakers_less_well",
    "spanish_speakers_very_well",
    "spanish_speakers_less_well",
    "total_pop_ethnicity",
    "chinese_speakers_total"
])

# 1. Capture current columns
cols = list(df.columns)

# 2. Move 'zip' to the front
cols.insert(0, cols.pop(cols.index("zip_code")))

# 3. Reorder the DataFrame
df = df[cols]

# Inspect
df.head()

Unnamed: 0,zip_code,total_pop,white_alone,black_alone,asian_alone,chinese_persons,hispanic_pop,pct_white,pct_black,pct_asian,pct_chinese,pct_hispanic
0,601,16721,13904,314,19,0,16630,0.831529,0.018779,0.001136,0.0,0.994558
1,602,37510,13781,520,44,18,35950,0.367395,0.013863,0.001173,0.00048,0.958411
2,603,48317,35550,1572,8,8,47521,0.735766,0.032535,0.000166,0.000166,0.983525
3,606,5435,3697,12,15,0,5373,0.680221,0.002208,0.00276,0.0,0.988592
4,610,25413,6582,525,0,0,24663,0.259001,0.020659,0.0,0.0,0.970488


In [4]:
pd.DataFrame.to_csv(
    df,
    "imported_data/extra_census_data.csv",
    index=False,
    encoding="utf-8"
)