In [66]:
import pandas as pd
import censusdis
from censusdis import data
import censusdis.data as ced

#pandas settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# ACS 5-year Estimates (2021)
- All States, All counties

In [67]:
acs5_profile = "acs/acs5/profile"
year = 2021 # 2021 ACS 5-year estimates
all_states = "*"
all_counties = "*" 

# Methods
1. Create variable dictonary to map variable names to variable descriptions
2. Use variable list to select variables from ACS table data
3. Include margin of error for each variable
4. Calculate percentage of total population or households for each variable
5. Calculate confidence interval for each percentage estimate
6. Export as csv for merge with BRFSS data 


In [69]:
var_map = {
  #Population
  "DP05_0001E": "pop_tot", # Total population
  "DP02_0059E": "pop_25", # Estimate of total population 25 years and over (for educational attainment)
  "DP03_0026E": "pop_lf", # Estimate of (civilian) population 16 years and over in labor force (for occupation estimates)

  #Total Households
  "DP02_0001E": "hh_tot", # Total households

  #Educational Attainment
  "DP02_0060E": "ed_lt9", # Estimate of educational attainment less than 9th grade
  "DP02_0061E": "ed_9_12", # Estimate of educational attainment 9th to 12th grade, no diploma
  "DP02_0062E": "ed_hs", # Estimate of educational attainment high school graduate (includes equivalency) 
  "DP02_0063E": "ed_sc", # Estimate of educational attainment some college, no degree
  "DP02_0064E": "ed_asc", # Estimate of educational attainment associate's degree
  "DP02_0065E": "ed_b", # Estimate of educational attainment bachelor's degree
  "DP02_0066E": "ed_gr", # Estimate of educational attainment graduate or professional degree

  "DP02_0060M": "m_ed_lt9", # Estimate of educational attainment less than 9th grade, margin of error
  "DP02_0061M": "m_ed_9_12", # Estimate of educational attainment 9th to 12th grade, no diploma, margin of error
  "DP02_0062M": "m_ed_hs", # Estimate of educational attainment high school graduate (includes equivalency) , margin of error
  "DP02_0063M": "m_ed_sc", # Estimate of educational attainment some college, no degree, margin of error
  "DP02_0064M": "m_ed_asc", # Estimate of educational attainment associate's degree, margin of error
  "DP02_0065M": "m_ed_b", # Estimate of educational attainment bachelor's degree, margin of error
  "DP02_0066M": "m_ed_gr", # Estimate of educational attainment graduate or professional degree, margin of error

  #Age 65 and over
  "DP05_0024E": "age_gte65", # Estimate of population age 65 and over
  
  "DP05_0024M": "m_age_gte65", # Estimate of population age 65 and over, margin of error

  #Race
  "DP05_0037E" : "race_white", #Estimate of total population white alone
  "DP05_0038E" : "race_black", #Estimate of total population black or African American alone
  "DP05_0039E" : "race_aian", #Estimate of total population American Indian and Alaska Native alone
  "DP05_0044E" : "race_asian", #Estimate of total population Asian alone
  "DP05_0052E" : "race_nhopi", #Estimate of total population Native Hawaiian and Other Pacific Islander alone
  "DP05_0057E" : "race_other", #Estimate of total population some other race

  "DP05_0037M" : "m_race_white", #Estimate of total population white alone, margin of error
  "DP05_0038M" : "m_race_black", #Estimate of total population black or African American alone, margin of error
  "DP05_0039M" : "m_race_aian", #Estimate of total population American Indian and Alaska Native alone, margin of error
  "DP05_0044M" : "m_race_asian", #Estimate of total population Asian alone, margin of error
  "DP05_0052M" : "m_race_nhopi", #Estimate of total population Native Hawaiian and Other Pacific Islander alone, margin of error
  "DP05_0057M" : "m_race_other", #Estimate of total population some other race, margin of error

  #Ethnicity
  "DP05_0071E" : "eth_hisp", #Estimate of total population Hispanic or Latino

  "DP05_0071M" : "m_eth_hisp", #Estimate of total population Hispanic or Latino, margin of error

  #Gender
  "DP05_0002E" : "sex_male", #Estimate of total population male
  "DP05_0003E" : "sex_female", #Estimate of total population female

  "DP05_0002M" : "m_sex_male", #Estimate of total population male, margin of error
  "DP05_0003M" : "m_sex_female", #Estimate of total population female, margin of error

  #Occupation
  "DP03_0005E" : "occ_unemp", # Estimate of civilian employed population 16 years unemployed
  "DP03_0027E" : "occ_mgt", # Estimate of civilian employed population 16 years and over in management, business, science, and arts occupations
  "DP03_0028E" : "occ_svc", # Estimate of civilian employed population 16 years and over in service occupations
  "DP03_0029E" : "occ_sales", # Estimate of civilian employed population 16 years and over in sales and office occupations
  "DP03_0030E" : "occ_nat_res", # Estimate of civilian employed population 16 years and over in natural resources, construction, and maintenance occupations
  "DP03_0031E" : "occ_prod", # Estimate of civilian employed population 16 years and over in production, transportation, and material moving occupations

  "DP03_0007M" : "m_occ_n_lab", # Estimate of civilian employed population 16 years and over not in labor force, margin of error
  "DP03_0027M" : "m_occ_mgt", # Estimate of civilian employed population 16 years and over in management, business, science, and arts occupations, margin of error
  "DP03_0028M" : "m_occ_svc", # Estimate of civilian employed population 16 years and over in service occupations, margin of error
  "DP03_0029M" : "m_occ_sales", # Estimate of civilian employed population 16 years and over in sales and office occupations, margin of error
  "DP03_0030M" : "m_occ_nat_res", # Estimate of civilian employed population 16 years and over in natural resources, construction, and maintenance occupations, margin of error
  "DP03_0031M" : "m_occ_prod", # Estimate of civilian employed population 16 years and over in production, transportation, and material moving occupations, margin of error

  #Health Insurance Coverage
  "DP03_0099E": "hlth_unins", # Estimate of civilian noninstitutionalized population without health insurance coverage

  "DP03_0099M": "m_hlth_unins", # Estimate of civilian noninstitutionalized population without health insurance coverage, margin of error

  #Income (Estimates not available, percentage only)
  "DP03_0128PE": "ses_pov", # Percentage estimate of all people whose income in past 12 is below poverty level; all people

  #Transportation
  "DP04_0058E": "tp_veh_0", # Estimate of occupied housing units with no vehicles available
  "DP04_0059E": "tp_veh_1", # Estimate of occupied housing units with 1 vehicle available
  "DP04_0060E": "tp_veh_2", # Estimate of occupied housing units with 2 vehicles available
  "DP04_0061E": "tp_veh_3", # Estimate of occupied housing units with 3 vehicles available

  "DP04_0058M": "m_tp_veh_0", # Estimate of occupied housing units with no vehicles available, margin of error
  "DP04_0059M": "m_tp_veh_1", # Estimate of occupied housing units with 1 vehicle available, margin of error
  "DP04_0060M": "m_tp_veh_2", # Estimate of occupied housing units with 2 vehicles available, margin of error
  "DP04_0061M": "m_tp_veh_3", # Estimate of occupied housing units with 3 vehicles available, margin of error

  #Broadband Internet Subscription
  "DP02_0154E": "dg_bb_int", # Estimate of households with internet (Broadband) subscription

  "DP02_0154M": "m_dg_bb_int", # Estimate of households with internet (Broadband) subscription, margin of error
  
}
vars = ["NAME"] + list(var_map.keys())

df = ced.download(acs5_profile, year, vars, state = all_states, county = all_counties)
df = df.rename(columns=var_map)
df.columns = map(str.lower, df.columns)

## Variables by denominator

In [70]:
# Variables that use total population
pop = [
    'age_gte65','race_white', 'race_black', 'race_aian', 'race_asian',
    'race_nhopi', 'race_other', 'hlth_unins', 
    'sex_male','sex_female'
]

# Variables that use 25+ population
edu_pop = [
    'ed_lt9', 'ed_9_12', 'ed_hs', 'ed_sc', 'ed_asc', 'ed_b', 'ed_gr',
]

# Variables that use 16+ population
occ_pop = [
    'occ_unemp', 'occ_mgt', 'eth_hisp',
    'occ_svc', 'occ_sales', 'occ_nat_res', 'occ_prod'
]

# Variables that use household count
hh = [
    'tp_veh_0','tp_veh_1', 'tp_veh_2', 'tp_veh_3', 'dg_bb_int'
]

## Percentage Calculation

In [71]:
for i in pop:
    df['pct_'+i] = df[i]/df['pop_tot']
for i in edu_pop:
    df['pct_'+i] = df[i]/df['pop_25']
for i in hh:
    df['pct_'+i] = df[i]/df['hh_tot']
for i in occ_pop:
    df['pct_'+i] = df[i]/df['pop_lf']

In [72]:
df.head(1)

Unnamed: 0,state,county,name,pop_tot,pop_25,pop_lf,hh_tot,ed_lt9,ed_9_12,ed_hs,ed_sc,ed_asc,ed_b,ed_gr,m_ed_lt9,m_ed_9_12,m_ed_hs,m_ed_sc,m_ed_asc,m_ed_b,m_ed_gr,age_gte65,m_age_gte65,race_white,race_black,race_aian,race_asian,race_nhopi,race_other,m_race_white,m_race_black,m_race_aian,m_race_asian,m_race_nhopi,m_race_other,eth_hisp,m_eth_hisp,sex_male,sex_female,m_sex_male,m_sex_female,occ_unemp,occ_mgt,occ_svc,occ_sales,occ_nat_res,occ_prod,m_occ_n_lab,m_occ_mgt,m_occ_svc,m_occ_sales,m_occ_nat_res,m_occ_prod,hlth_unins,m_hlth_unins,ses_pov,tp_veh_0,tp_veh_1,tp_veh_2,tp_veh_3,m_tp_veh_0,m_tp_veh_1,m_tp_veh_2,m_tp_veh_3,dg_bb_int,m_dg_bb_int,pct_age_gte65,pct_race_white,pct_race_black,pct_race_aian,pct_race_asian,pct_race_nhopi,pct_race_other,pct_hlth_unins,pct_sex_male,pct_sex_female,pct_ed_lt9,pct_ed_9_12,pct_ed_hs,pct_ed_sc,pct_ed_asc,pct_ed_b,pct_ed_gr,pct_tp_veh_0,pct_tp_veh_1,pct_tp_veh_2,pct_tp_veh_3,pct_dg_bb_int,pct_occ_unemp,pct_occ_mgt,pct_eth_hisp,pct_occ_svc,pct_occ_sales,pct_occ_nat_res,pct_occ_prod
0,1,1,"Autauga County, Alabama",58239,39614.0,25871,21856.0,803.0,3323.0,12977.0,7774.0,3593.0,6507.0,4637.0,237.0,444.0,866.0,805.0,551.0,703.0,573.0,8815,92.0,43755,11470,98,647,0,196,311,309,85,206,30,181,1775,,28206,30033,213.0,213.0,752,9850,3898,5656,2460,4007,846,873,483,612,384,469,4496,706,13.6,1182,6891,7383,6400,282,606,522,537,18679.0,477.0,0.151359,0.751301,0.196947,0.001683,0.011109,0.0,0.003365,0.077199,0.484315,0.515685,0.020271,0.083884,0.327586,0.196244,0.0907,0.16426,0.117055,0.054081,0.315291,0.337802,0.292826,0.854639,0.029067,0.380735,0.06861,0.150671,0.218623,0.095087,0.154884


## Confidence Intervals
- Margin of error is 90% confidence interval
- To calculate confidence interval for percentage estimate:
    - (estimate - margin of error, estimate + margin of error) = raw confidence interval
    - Calculate upper and lower percentage from total population or households = percentage confidence interval

In [49]:
#export to csv
df.to_csv('acs5y_2021.csv', index=False, header=True)