In [19]:
import os
import sys
import pandas as pd
import censusdis
from censusdis import data
import censusdis.data as ced

#pandas settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [15]:
acs5_profile = "acs/acs5/profile"
year = 2021 # 2021 ACS 5-year estimates
all_states = "*"
all_counties = "*" 

| Variables  | Level        | Concept                                       | Measure                                                                                       | Source               | Note                                                                        |
|------------|--------------|-----------------------------------------------|------------------------------------------------------------------------------------------------|----------------------|-----------------------------------------------------------------------------|
| RQ1 DV     | Outcome      | Diabetes Prevalence                           | Diagnosed diabetes among adults aged >=18 years; % of county                                   | BRFSS/PLACES 2021    | Upper and Lower confidence; crude and age-adjusted                          |
| RQ2 DV     | Outcome      | Self-Reported Health Status                   | Fair or poor self-rated health status among adults aged >=18 years; % of county                | BRFSS/PLACES 2021    | Upper and Lower confidence; crude and age-adjusted                          |
| IV         | Intermediary | Behaviors and Biological Factors: Internet Adoption| Households with internet (Broadband) subscription; %                                    | ACS 5Y 2021          | proportion and raw count with margin of error                               |
| IV         | Structural   | Internet Access: Social Policy/Digital Equity | Internet availability (<25/3 mbps)                                                           | NTIA                 | Endogenous variable                                                         |
| Covariates | Structural   | Gender, Age, Ethnicity                        | Aged 65 & Older, Gender, Race, Ethnicity                                                      | ACS 5Y 2021          | proportion and raw count with margin of error                               |
| Covariates | Structural   | Education, Occupation, Income                 | Educational attainment, Industry employed, employment status, income in past 12 months below poverty level | ACS 5Y 2021          | proportion and raw count with margin of error                               |
| Covariates | Intermediary | Material Circumstances                        | Transportation, Housing type, Health insurance coverage                                       | ACS 5Y 2021; BRFSS 2021 (insurance)| proportion and raw count with margin of error                  |
| Covariates | Intermediary | Behaviors and Biological Factors              | Obesity, smoking status, visit to doctor in past year (adults)                                | BRFSS/PLACES 2021    | Upper and Lower confidence; crude and age-adjusted                          |
| Covariates | Intermediary | Psychological Factors                         | Depression among adults, Mental health “not good” for >= 14 days among adults                  | BRFSS/PLACES 2021    | Upper and Lower confidence; crude and age-adjusted                          |
---

In [39]:
var_map = {
  #Educational Attainment
  "DP02_0060PE": "ed_lt9", # Percent of educational attainment less than 9th grade
  "DP02_0061PE": "ed_9_12", # Percent of educational attainment 9th to 12th grade, no diploma
  "DP02_0062PE": "ed_hs", # Percent of educational attainment high school graduate (includes equivalency) 
  "DP02_0063PE": "ed_sc", # Percent of educational attainment some college, no degree
  "DP02_0064PE": "ed_asc", # Percent of educational attainment associate's degree
  "DP02_0065PE": "ed_b", # Percent of educational attainment bachelor's degree
  "DP02_0066PE": "ed_gr", # Percent of educational attainment graduate or professional degree

  #Age 65 and over
  "DP05_0024PE": "age_gte65", # Percent of population age 65 and over

  #Race
  "DP05_0037PE" : "race_white", #Percent of total population white alone
  "DP05_0038PE" : "race_black", #Percent of total population black or African American alone
  "DP05_0039PE" : "race_aian", #Percent of total population American Indian and Alaska Native alone
  "DP05_0044PE" : "race_asian", #Percent of total population Asian alone
  "DP05_0052PE" : "race_nhopi", #Percent of total population Native Hawaiian and Other Pacific Islander alone
  "DP05_0057PE" : "race_other", #Percent of total population some other race

  #Ethnicity
  "DP05_0071PE" : "eth_hisp", #Percent of total population Hispanic or Latino

  #Gender
  "DP05_0002PE" : "sex_male", #Percent of total population male
  "DP05_0003PE" : "sex_female", #Percent of total population female

  #Occupation
  "DP03_0007PE" : "occ_n_lab", # Percent of civilian employed population 16 years and over not in labor force
  "DP03_0027PE" : "occ_mgt", # Percent of civilian employed population 16 years and over in management, business, science, and arts occupations
  "DP03_0028PE" : "occ_svc", # Percent of civilian employed population 16 years and over in service occupations
  "DP03_0029PE" : "occ_sales", # Percent of civilian employed population 16 years and over in sales and office occupations
  "DP03_0030PE" : "occ_nat_res", # Percent of civilian employed population 16 years and over in natural resources, construction, and maintenance occupations
  "DP03_0031PE" : "occ_prod", # Percent of civilian employed population 16 years and over in production, transportation, and material moving occupations

  #Housing Characteristics

  #Health Insurance Coverage
  "DP03_0099PE": "hlth_unins", # Percent of civilian noninstitutionalized population without health insurance coverage

  #Income
  "DP03_0128PE": "ses_pov", # Percent of all people whose income in past 12 is below poverty level; all people

  #Transportation
  "DP04_0058PE": "tp_veh_0", # Percent of occupied housing units with no vehicles available
  "DP04_0059PE": "tp_veh_1", # Percent of occupied housing units with 1 vehicle available
  "DP04_0060PE": "tp_veh_2", # Percent of occupied housing units with 2 vehicles available
  "DP04_0061PE": "tp_veh_3", # Percent of occupied housing units with 3 vehicles available

  #Broadband Internet Subscription
  "DP02_0154PE": "dg_bb_int", # Percent of households with internet (Broadband) subscription
  
}
vars = ["NAME"] + list(var_map.keys())

df = ced.download(acs5_profile, year, vars, state = all_states, county = all_counties)
df = df.rename(columns=var_map)
df.columns = map(str.lower, df.columns)

display(df.head(5), df.shape)

Unnamed: 0,state,county,name,ed_lt9,ed_9_12,ed_hs,ed_sc,ed_asc,ed_b,ed_gr,age_gte65,race_white,race_black,race_aian,race_asian,race_nhopi,race_other,eth_hisp,sex_male,sex_female,occ_n_lab,occ_mgt,occ_svc,occ_sales,occ_nat_res,occ_prod,hlth_unins,ses_pov,tp_veh_0,tp_veh_1,tp_veh_2,tp_veh_3,dg_bb_int
0,1,1,"Autauga County, Alabama",2.0,8.4,32.8,19.6,9.1,16.4,11.7,15.1,75.1,19.7,0.2,1.1,0.0,0.3,3.0,48.4,51.6,40.9,38.1,15.1,21.9,9.5,15.5,7.9,13.6,5.4,31.5,33.8,29.3,85.5
1,1,3,"Baldwin County, Alabama",2.1,6.9,27.4,21.7,9.5,20.6,11.8,20.6,84.5,8.8,0.6,1.0,0.0,1.8,4.7,48.8,51.2,40.8,36.1,17.8,23.6,9.5,13.0,9.7,9.2,2.4,31.7,43.2,22.7,87.9
2,1,5,"Barbour County, Alabama",7.4,16.9,36.7,20.5,7.3,6.7,4.4,19.0,45.5,47.4,0.3,0.4,0.0,3.9,4.7,52.9,47.1,54.6,25.6,20.7,19.9,12.2,21.6,10.4,26.5,11.2,33.3,33.8,21.7,64.6
3,1,7,"Bibb County, Alabama",6.2,13.3,43.9,18.0,6.7,7.9,4.0,16.0,75.9,22.3,0.1,0.2,0.0,0.1,2.8,54.9,45.1,50.3,22.0,16.8,21.5,16.6,23.1,10.0,16.9,6.3,24.7,37.7,31.3,74.6
4,1,9,"Blount County, Alabama",6.8,9.6,35.1,21.5,12.1,9.3,5.6,18.0,92.5,1.3,0.3,0.4,0.1,2.1,9.5,50.1,49.9,44.9,28.2,13.8,21.6,15.3,21.1,10.0,13.2,4.5,25.0,32.1,38.4,81.0


(3221, 33)

In [45]:
#drop Puerto Rick state = 72
df.drop(df[df['state'] == 72].index, inplace = True)

In [49]:
#export to csv
df.to_csv('acs5y_2021.csv', index=False, header=True)