In [3]:
import pandas as pd 
import censusdis.data as ced

#pandas settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [4]:
acs5_profile = "acs/acs5/profile" # ACS 5-year profile estimates
years = [2021, 2022]
all_states = "*" # all states
all_counties = "*"  # all counties

In [5]:
var_map = {
  #Population
  "DP05_0033E": "pop_tot", # Total population

  #Educational Attainment
  "DP02_0060PE": "pct_ed_lt9", # Estimate of educational attainment less than 9th grade
  "DP02_0061PE": "pct_ed_9_12", # Estimate of educational attainment 9th to 12th grade, no diploma
  "DP02_0062PE": "pct_ed_hs", # Estimate of educational attainment high school graduate (includes equivalency) 
  "DP02_0063PE": "pct_ed_sc", # Estimate of educational attainment some college, no degree
  "DP02_0064PE": "pct_ed_asc", # Estimate of educational attainment associate's degree
  "DP02_0065PE": "pct_ed_b", # Estimate of educational attainment bachelor's degree
  "DP02_0066PE": "pct_ed_gr", # Estimate of educational attainment graduate or professional degree

  #Age 65 and over
  "DP05_0024PE": "pct_age_gte65", # Estimate of population age 65 and over

  #Race
  "DP05_0037PE" : "pct_race_white", #Estimate of total population white alone
  "DP05_0038PE" : "pct_race_black", #Estimate of total population black or African American alone
  "DP05_0039PE" : "pct_race_aian", #Estimate of total population American Indian and Alaska Native alone
  "DP05_0044PE" : "pct_race_asian", #Estimate of total population Asian alone
  "DP05_0052PE" : "pct_race_nhopi", #Estimate of total population Native Hawaiian and Other Pacific Islander alone
  "DP05_0057PE" : "pct_race_other", #Estimate of total population some other race

  #Ethnicity
  "DP05_0071PE" : "pct_eth_hisp", #Estimate of total population Hispanic or Latino (any race)

  #Sex
  "DP05_0002PE" : "pct_sex_male", #Estimate of total population male
  "DP05_0003PE" : "pct_sex_female", #Estimate of total population female


  #Occupation
  "DP03_0005PE" : "pct_occ_unemp", # Estimate of civilian employed population 16 years unemployed
  "DP03_0027PE" : "pct_occ_mgt", # Estimate of civilian employed population 16 years and over in management, business, science, and arts occupations
  "DP03_0028PE" : "pct_occ_svc", # Estimate of civilian employed population 16 years and over in service occupations
  "DP03_0029PE" : "pct_occ_sales", # Estimate of civilian employed population 16 years and over in sales and office occupations
  "DP03_0030PE" : "pct_occ_nat_res", # Estimate of civilian employed population 16 years and over in natural resources, construction, and maintenance occupations
  "DP03_0031PE" : "pct_occ_prod", # Estimate of civilian employed population 16 years and over in production, transportation, and material moving occupations

  #Health Insurance Coverage
  "DP03_0099PE": "pct_hlth_unins", # Estimate of civilian noninstitutionalized population without health insurance coverage

  #Income (Estimates not available, percentage only)
  "DP03_0128PE": "pct_ses_pov", # Percentage estimate of all people whose income in past 12 is below poverty level; all people

  #Transportation
  "DP04_0058PE": "pct_tp_veh_0", # Estimate of occupied housing units with no vehicles available
  "DP04_0059PE": "pct_tp_veh_1", # Estimate of occupied housing units with 1 vehicle available
  "DP04_0060PE": "pct_tp_veh_2", # Estimate of occupied housing units with 2 vehicles available
  "DP04_0061PE": "pct_tp_veh_3", # Estimate of occupied housing units with 3 vehicles available

  #Broadband Internet Subscription
  "DP02_0154PE": "pct_dg_bb_int", # Estimate of households with internet (Broadband) subscription
  
}

vars = ["NAME"] + list(var_map.keys())
acs_data = {}
for year in years:
    acs_data[year] = ced.download(acs5_profile, year, vars, state=all_states, county=all_counties)

#create three dataframes
acs_data_2021 = acs_data[2021]
acs_data_2022 = acs_data[2022]

#rename columns
acs_data_2021.rename(columns=var_map, inplace=True)
acs_data_2022.rename(columns=var_map, inplace=True)
#lower case
acs_data_2021.columns = acs_data_2021.columns.str.lower()
acs_data_2022.columns = acs_data_2022.columns.str.lower()
#combine state and county into FIPS code
acs_data_2021['fips'] = acs_data_2021['state'] + acs_data_2021['county'].str.zfill(3)
acs_data_2022['fips'] = acs_data_2022['state'] + acs_data_2022['county'].str.zfill(3)

In [6]:
acs_data_2021.head()

Unnamed: 0,state,county,name,pop_tot,pct_ed_lt9,pct_ed_9_12,pct_ed_hs,pct_ed_sc,pct_ed_asc,pct_ed_b,pct_ed_gr,pct_age_gte65,pct_race_white,pct_race_black,pct_race_aian,pct_race_asian,pct_race_nhopi,pct_race_other,pct_eth_hisp,pct_sex_male,pct_sex_female,pct_occ_unemp,pct_occ_mgt,pct_occ_svc,pct_occ_sales,pct_occ_nat_res,pct_occ_prod,pct_hlth_unins,pct_ses_pov,pct_tp_veh_0,pct_tp_veh_1,pct_tp_veh_2,pct_tp_veh_3,pct_dg_bb_int,fips
0,1,1,"Autauga County, Alabama",58239,2.0,8.4,32.8,19.6,9.1,16.4,11.7,15.1,75.1,19.7,0.2,1.1,0.0,0.3,3.0,48.4,51.6,1.6,38.1,15.1,21.9,9.5,15.5,7.9,13.6,5.4,31.5,33.8,29.3,85.5,1001
1,1,3,"Baldwin County, Alabama",227131,2.1,6.9,27.4,21.7,9.5,20.6,11.8,20.6,84.5,8.8,0.6,1.0,0.0,1.8,4.7,48.8,51.2,2.2,36.1,17.8,23.6,9.5,13.0,9.7,9.2,2.4,31.7,43.2,22.7,87.9,1003
2,1,5,"Barbour County, Alabama",25259,7.4,16.9,36.7,20.5,7.3,6.7,4.4,19.0,45.5,47.4,0.3,0.4,0.0,3.9,4.7,52.9,47.1,3.9,25.6,20.7,19.9,12.2,21.6,10.4,26.5,11.2,33.3,33.8,21.7,64.6,1005
3,1,7,"Bibb County, Alabama",22412,6.2,13.3,43.9,18.0,6.7,7.9,4.0,16.0,75.9,22.3,0.1,0.2,0.0,0.1,2.8,54.9,45.1,4.8,22.0,16.8,21.5,16.6,23.1,10.0,16.9,6.3,24.7,37.7,31.3,74.6,1007
4,1,9,"Blount County, Alabama",58884,6.8,9.6,35.1,21.5,12.1,9.3,5.6,18.0,92.5,1.3,0.3,0.4,0.1,2.1,9.5,50.1,49.9,3.3,28.2,13.8,21.6,15.3,21.1,10.0,13.2,4.5,25.0,32.1,38.4,81.0,1009


In [7]:
acs_data_2022.head()

Unnamed: 0,state,county,name,pop_tot,pct_ed_lt9,pct_ed_9_12,pct_ed_hs,pct_ed_sc,pct_ed_asc,pct_ed_b,pct_ed_gr,pct_age_gte65,pct_race_white,pct_race_black,pct_race_aian,pct_race_asian,pct_race_nhopi,pct_race_other,pct_eth_hisp,pct_sex_male,pct_sex_female,pct_occ_unemp,pct_occ_mgt,pct_occ_svc,pct_occ_sales,pct_occ_nat_res,pct_occ_prod,pct_hlth_unins,pct_ses_pov,pct_tp_veh_0,pct_tp_veh_1,pct_tp_veh_2,pct_tp_veh_3,pct_dg_bb_int,fips
0,1,1,"Autauga County, Alabama",58761,1.7,7.9,31.1,21.0,8.7,16.7,12.8,15.6,74.4,19.6,0.1,1.1,0.0,0.5,2.2,48.8,51.2,1.6,36.4,14.2,22.0,10.0,17.4,7.4,11.4,4.0,31.7,34.7,29.6,89.1,1001
1,1,3,"Baldwin County, Alabama",233420,2.0,6.4,27.8,21.7,9.6,20.0,12.5,21.2,84.0,8.3,0.4,0.9,0.0,1.9,3.7,48.9,51.1,2.0,35.6,17.1,24.2,9.9,13.1,9.3,10.2,2.3,31.1,42.5,24.2,88.9,1003
2,1,5,"Barbour County, Alabama",24877,7.3,16.2,36.8,19.9,7.9,6.6,5.3,19.8,45.5,46.9,0.3,0.5,0.0,4.4,5.2,52.1,47.9,2.5,27.7,20.2,19.5,12.5,20.1,10.7,24.2,11.7,33.3,32.0,22.9,68.2,1005
3,1,7,"Bibb County, Alabama",22251,6.4,14.7,40.9,19.1,8.0,6.6,4.3,16.8,75.8,20.7,0.1,0.3,0.0,0.5,1.0,53.5,46.5,4.4,22.2,15.3,20.6,16.4,25.5,8.4,20.6,7.5,25.4,35.2,31.8,79.7,1007
4,1,9,"Blount County, Alabama",59077,7.1,11.1,35.3,20.7,11.0,9.4,5.3,18.3,91.3,1.2,0.4,0.2,0.2,3.1,4.6,50.6,49.4,3.3,26.8,13.0,22.7,15.8,21.7,10.2,14.2,4.8,24.9,32.7,37.6,82.7,1009


In [None]:
#save to relative path
import os

# Get the current working directory
current_dir = os.getcwd()

# Save the files 
output_dir = os.path.join(current_dir, '..', '..', '02_data', '02.01_raw')
os.makedirs(output_dir, exist_ok=True)
acs_data_2021.to_csv(os.path.join(output_dir, 'acs_data_2021.csv'), index=False)
acs_data_2022.to_csv(os.path.join(output_dir, 'acs_data_2022.csv'), index=False)