# Data Download & Assembly

This notebook downloads and assembles macroeconomic and housing datasets used
to analyze housing affordability as a user funnel.

Data sources:
- FRED: mortgage rates, income, house price index
- Census ACS: median household income and home value (state-level)

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import requests
!pip install fredapi
from fredapi import Fred



In [2]:
# Setup FRED API
fred = Fred(api_key="aee244b1602e2ea22e6391474b37fbcf")

# Download mortgage rate
mortgage = fred.get_series("MORTGAGE30US")
mortgage = mortgage.reset_index()
mortgage.columns = ["date", "mortgage_rate"]

mortgage["year"] = mortgage["date"].dt.year
mortgage.head()


Unnamed: 0,date,mortgage_rate,year
0,1971-04-02,7.33,1971
1,1971-04-09,7.31,1971
2,1971-04-16,7.31,1971
3,1971-04-23,7.31,1971
4,1971-04-30,7.29,1971


In [3]:
# Download House Price Index (FHFA)
hpi = fred.get_series("USSTHPI")  # U.S. All-Transactions HPI
hpi = hpi.reset_index()
hpi.columns = ["date", "hpi"]

hpi["year"] = hpi["date"].dt.year
hpi.head()

Unnamed: 0,date,hpi,year
0,1975-01-01,59.99,1975
1,1975-04-01,60.92,1975
2,1975-07-01,61.38,1975
3,1975-10-01,62.24,1975
4,1976-01-01,62.89,1976


In [4]:
# Download Income Proxy (FRED)
income = fred.get_series("DSPIC96")  # Real Disposable Personal Income
income = income.reset_index()
income.columns = ["date", "real_income"]

income["year"] = income["date"].dt.year
income.head()

Unnamed: 0,date,real_income,year
0,1959-01-01,2318.4,1959
1,1959-02-01,2325.4,1959
2,1959-03-01,2338.7,1959
3,1959-04-01,2353.8,1959
4,1959-05-01,2366.6,1959


In [5]:
# Aggregate to Annual Level
mortgage_annual = (mortgage.groupby("year")["mortgage_rate"].mean().reset_index())

hpi_annual = (hpi.groupby("year")["hpi"].mean().reset_index())

income_annual = (income.groupby("year")["real_income"].mean().reset_index())

In [6]:
# Merge FRED Datasets
fred_data = (mortgage_annual.merge(hpi_annual, on="year", how="inner").merge(income_annual, on="year", how="inner"))
fred_data.head()

Unnamed: 0,year,mortgage_rate,hpi,real_income
0,1975,9.047115,61.1325,4549.2
1,1976,8.865849,65.57,4694.383333
2,1977,8.845192,73.44,4842.325
3,1978,9.641731,83.6975,5063.141667
4,1979,11.203654,95.1425,5161.966667


In [7]:
# Download Census ACS Data (State Level)
url = "https://api.census.gov/data/2022/acs/acs1"
params = {
    "get": "NAME,B19013_001E,B25077_001E",
    "for": "state:*"}

response = requests.get(url, params=params)
data = response.json()

acs = pd.DataFrame(data[1:], columns=data[0])

acs.rename(columns={
    "B19013_001E": "median_household_income",
    "B25077_001E": "median_home_value"
}, inplace=True)

# Clean and Convert
acs["median_household_income"] = pd.to_numeric(acs["median_household_income"], errors="coerce")

acs["median_home_value"] = pd.to_numeric(acs["median_home_value"], errors="coerce")

acs["year"] = 2022
acs.head()

Unnamed: 0,NAME,median_household_income,median_home_value,state,year
0,Alabama,59674,200900,1,2022
1,Alaska,88121,336900,2,2022
2,Arizona,74568,402800,4,2022
3,Arkansas,55432,179800,5,2022
4,California,91551,715900,6,2022


In [8]:
# Combine Macro and ACS Data
housing_data = acs.merge(fred_data, on="year", how="left")
housing_data.head()

Unnamed: 0,NAME,median_household_income,median_home_value,state,year,mortgage_rate,hpi,real_income
0,Alabama,59674,200900,1,2022,5.344038,607.9425,16287.583333
1,Alaska,88121,336900,2,2022,5.344038,607.9425,16287.583333
2,Arizona,74568,402800,4,2022,5.344038,607.9425,16287.583333
3,Arkansas,55432,179800,5,2022,5.344038,607.9425,16287.583333
4,California,91551,715900,6,2022,5.344038,607.9425,16287.583333


In [9]:
# Feature Engineering (Prep for Funnel)
housing_data["price_to_income_ratio"] = (housing_data["median_home_value"] / housing_data["median_household_income"])
housing_data.head()

Unnamed: 0,NAME,median_household_income,median_home_value,state,year,mortgage_rate,hpi,real_income,price_to_income_ratio
0,Alabama,59674,200900,1,2022,5.344038,607.9425,16287.583333,3.366625
1,Alaska,88121,336900,2,2022,5.344038,607.9425,16287.583333,3.823152
2,Arizona,74568,402800,4,2022,5.344038,607.9425,16287.583333,5.401781
3,Arkansas,55432,179800,5,2022,5.344038,607.9425,16287.583333,3.243614
4,California,91551,715900,6,2022,5.344038,607.9425,16287.583333,7.819685


In [10]:
# Save Output Files
housing_data.to_csv("data/housing_affordability_data.csv", index=False)
fred_data.to_csv("data/fred_macro_data.csv", index=False)