# 01_fetch_data
- Import raw Census data and save to data_raw folder
- Check briefly that raw data looks okay

In [2]:
import os
from dotenv import load_dotenv
from pathlib import Path
from haf.io_utils import fetch_census_table, load_local_csv

Load environment variables

In [3]:
load_dotenv()
PROJECT_ROOT = Path(os.getenv("PROJECT_ROOT"))
CENSUS_API_KEY = os.getenv("CENSUS_API_KEY")

### Import data from Census API

#### Parameters for census import

In [4]:
table_variables = [
    "NAME",
    "B25070_001E",  # Total
    "B25070_002E",  # Less than 10.0
    "B25070_003E",  # 10.0 to 14.9
    "B25070_004E",  # 15.0 to 19.9
    "B25070_005E",  # 20.0 to 24.9
    "B25070_006E",  # 25.0 to 29.9
    "B25070_007E",  # 30.0 to 34.9
    "B25070_008E",  # 35.0 to 39.9
    "B25070_009E",  # 40.0 to 49.9
    "B25070_010E",  # 50.0 or more
    "B25070_011E"   # Not computed
]
geography = "metropolitan statistical area/micropolitan statistical area:*"
years = [2016, 2017, 2018, 2019, 2021, 2022, 2023]

In [5]:
for year in years:
    file_path = PROJECT_ROOT / "data_raw" / f"B25070_{year}.csv"
    fetch_census_table(CENSUS_API_KEY, table_variables, geography, year, file_path)

Data already exists at C:\Projects\housing-affordability\data_raw\B25070_2016.csv -- nothing changed
Data already exists at C:\Projects\housing-affordability\data_raw\B25070_2017.csv -- nothing changed
Data already exists at C:\Projects\housing-affordability\data_raw\B25070_2018.csv -- nothing changed
Data already exists at C:\Projects\housing-affordability\data_raw\B25070_2019.csv -- nothing changed
Data already exists at C:\Projects\housing-affordability\data_raw\B25070_2021.csv -- nothing changed
Data already exists at C:\Projects\housing-affordability\data_raw\B25070_2022.csv -- nothing changed
Data already exists at C:\Projects\housing-affordability\data_raw\B25070_2023.csv -- nothing changed


### Sanity Check

In [7]:
check_path = PROJECT_ROOT / "data_raw" / "B25070_2021.csv"
df = load_local_csv(check_path)

In [8]:
df.head()

Unnamed: 0,NAME,B25070_001E,B25070_002E,B25070_003E,B25070_004E,B25070_005E,B25070_006E,B25070_007E,B25070_008E,B25070_009E,B25070_010E,B25070_011E,metropolitan statistical area/micropolitan statistical area
0,"Killeen-Temple, TX Metro Area",76338.0,3033.0,6016.0,7810.0,10188.0,9596.0,6194.0,5654.0,6410.0,16755.0,4682.0,28660
1,"Kingsport-Bristol, TN-VA Metro Area",34503.0,2373.0,2592.0,3698.0,3892.0,3334.0,2060.0,1827.0,2706.0,6226.0,5795.0,28700
2,"Kingston, NY Metro Area",19206.0,487.0,1788.0,2107.0,1298.0,3003.0,1439.0,571.0,1639.0,5386.0,1488.0,28740
3,"Klamath Falls, OR Micro Area",9333.0,481.0,798.0,668.0,367.0,1044.0,704.0,772.0,462.0,3381.0,656.0,28900
4,"Knoxville, TN Metro Area",109268.0,4826.0,11144.0,11926.0,14357.0,12755.0,9356.0,5260.0,10711.0,19976.0,8957.0,28940
