In [None]:
# Load configuration
%run ../../config.py

In [None]:
import requests
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import os

In [None]:
# Download Census TIGER tract shapefiles for 2022
# TIGER files are available from Census FTP

print("Downloading Census TIGER tract shapefiles...")

# National tract shapefile URL for 2022
tiger_url = "https://www2.census.gov/geo/tiger/TIGER2022/TRACT/tl_2022_us_tract.zip"
tiger_path = os.path.join(data_dir, "tl_2022_us_tract.zip")

# Download the file if it doesn't exist
if os.path.exists(tiger_path):
    print(f"TIGER shapefile already exists at {tiger_path}, skipping download")
else:
    print(f"Downloading from {tiger_url}...")
    response = requests.get(tiger_url, stream=True)
    with open(tiger_path, 'wb') as f:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)
    print(f"Saved to {tiger_path}")

# Load the tract shapefile
print("Loading tract shapefile...")
tracts_gdf = gpd.read_file(f"zip://{tiger_path}")

# Create GEOID to match income data
tracts_gdf['GEOID'] = tracts_gdf['GEOID']

print(f"\nLoaded {len(tracts_gdf)} census tracts")
print(f"Columns: {list(tracts_gdf.columns)}")

print(f"\nSample tracts:")
print(tracts_gdf.head())

In [None]:
# Get Census data - 2022 ACS 5-Year median household income by tract
# B19013_001E is median household income
# We'll get all tracts in all states

print("Fetching ACS 5-Year median household income data by tract...")

# Get state FIPS codes first
states_url = "https://api.census.gov/data/2022/acs/acs5?get=NAME&for=state:*"
states_response = requests.get(states_url)
states_data = states_response.json()
state_fips = [row[1] for row in states_data[1:]]  # Skip header

print(f"Found {len(state_fips)} states/territories")

# Fetch median income for all tracts in each state
all_tracts = []

for i, state_fips_code in enumerate(state_fips[:5]):  # Start with first 5 states for testing
    try:
        url = f"https://api.census.gov/data/2022/acs/acs5?get=NAME,B19013_001E&for=tract:*&in=state:{state_fips_code}"
        response = requests.get(url)
        data = response.json()
        
        # Convert to dataframe
        df = pd.DataFrame(data[1:], columns=data[0])
        all_tracts.append(df)
        print(f"  State {state_fips_code}: {len(df)} tracts")
    except Exception as e:
        print(f"  Error fetching state {state_fips_code}: {e}")

# Combine all data
income_df = pd.concat(all_tracts, ignore_index=True)

# Clean up the data
income_df['median_income'] = pd.to_numeric(income_df['B19013_001E'], errors='coerce')
income_df['GEOID'] = income_df['state'] + income_df['county'] + income_df['tract']

print(f"\nTotal tracts with income data: {len(income_df)}")
print(f"Tracts with valid income: {income_df['median_income'].notna().sum()}")
print(f"\nIncome statistics:")
print(income_df['median_income'].describe())
print(f"\nSample data:")
print(income_df[['NAME', 'median_income', 'GEOID']].head(10))

In [None]:
# Join income data to tract geometries
print("Joining income data to tract shapefile...")

tracts_with_income = tracts_gdf.merge(
    income_df[['GEOID', 'median_income', 'NAME']], 
    on='GEOID', 
    how='left',
    suffixes=('_tract', '_income')
)

print(f"\nTracts with income data: {tracts_with_income['median_income'].notna().sum()}")
print(f"Tracts without income data: {tracts_with_income['median_income'].isna().sum()}")

# Quick map of median income
fig, ax = plt.subplots(figsize=(15, 10))
tracts_with_income.plot(
    column='median_income',
    ax=ax,
    legend=True,
    cmap='YlOrRd',
    edgecolor='black',
    linewidth=0.1,
    missing_kwds={'color': 'lightgrey'}
)
plt.title('Median Household Income by Census Tract (2022 ACS 5-Year)')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.tight_layout()
plt.show()

print("\nIncome data ready for analysis!")