# 01_fetch_data
- Import raw Census data and save to data_raw folder
- Check briefly that raw data looks okay

In [7]:
import os
from dotenv import load_dotenv
from pathlib import Path
from haf.io_utils import fetch_census_table, check_table

Load environment variables

In [8]:
load_dotenv()
PROJECT_ROOT = Path(os.getenv("PROJECT_ROOT"))
CENSUS_API_KEY = os.getenv("CENSUS_API_KEY")

### Import data from Census API

#### Parameters for census import

In [9]:
table_variables = [
    "NAME",
    "B25070_001E",  # Total
    "B25070_002E",  # Less than 10.0
    "B25070_003E",  # 10.0 to 14.9
    "B25070_004E",  # 15.0 to 19.9
    "B25070_005E",  # 20.0 to 24.9
    "B25070_006E",  # 25.0 to 29.9
    "B25070_007E",  # 30.0 to 34.9
    "B25070_008E",  # 35.0 to 39.9
    "B25070_009E",  # 40.0 to 49.9
    "B25070_010E",  # 50.0 or more
    "B25070_011E"   # Not computed
]
geography = "metropolitan statistical area/micropolitan statistical area:*"
years = [2016, 2017, 2018, 2019, 2021, 2022, 2023]

In [10]:
for year in years:
    file_path = PROJECT_ROOT / "data_raw" / f"B25070_{year}.csv"
    fetch_census_table(CENSUS_API_KEY, table_variables, geography, year, file_path)

Data already exists at C:\Projects\housing-affordability\data_raw\B25070_2016.csv -- nothing changed
Data already exists at C:\Projects\housing-affordability\data_raw\B25070_2017.csv -- nothing changed
Data already exists at C:\Projects\housing-affordability\data_raw\B25070_2018.csv -- nothing changed
Data already exists at C:\Projects\housing-affordability\data_raw\B25070_2019.csv -- nothing changed
Data already exists at C:\Projects\housing-affordability\data_raw\B25070_2021.csv -- nothing changed
Data already exists at C:\Projects\housing-affordability\data_raw\B25070_2022.csv -- nothing changed
Data already exists at C:\Projects\housing-affordability\data_raw\B25070_2023.csv -- nothing changed


### Sanity Check

In [11]:
check_path = PROJECT_ROOT / "data_raw" / "B25070_2023.csv"
df = check_table(check_path)

In [12]:
df.head()

Unnamed: 0,NAME,B25070_001E,B25070_002E,B25070_003E,B25070_004E,B25070_005E,B25070_006E,B25070_007E,B25070_008E,B25070_009E,B25070_010E,B25070_011E,metropolitan statistical area/micropolitan statistical area
0,"Aberdeen, WA Micro Area",7681.0,205.0,1523.0,725.0,615.0,1116.0,431.0,610.0,543.0,1512.0,401.0,10140
1,"Abilene, TX Metro Area",25067.0,522.0,1670.0,4174.0,2302.0,1957.0,1673.0,2712.0,3262.0,4861.0,1934.0,10180
2,"Adrian, MI Micro Area",7902.0,345.0,754.0,1255.0,432.0,451.0,1492.0,108.0,646.0,1443.0,976.0,10300
3,"Aguadilla, PR Metro Area",30177.0,657.0,1648.0,1811.0,2330.0,815.0,1343.0,1193.0,588.0,2924.0,16868.0,10380
4,"Akron, OH Metro Area",94990.0,3336.0,9661.0,11159.0,10435.0,9377.0,8051.0,6217.0,7753.0,23846.0,5155.0,10420
