In [None]:
import requests
import regex as re
import numpy as np
import pandas as pd
import tokens
import openpyxl
box_path = "../../Library/CloudStorage/Box-Box/EEOC data/"

In [None]:
#read in guide for variable names
census_vars = pd.read_excel("data/census_variable_guide.xlsx")
census_vars

## Get 2005-2009 ACS data

Note: We use separate queries for population demographics and race-specific unemployment rates because population demographics are from Data Profiles and White unemployment is from Subject Tables

You'll also need a Census API key to access these data, which you can save in a `tokens.py` file.

In [None]:
vars_09 = census_vars[census_vars['acs_round']==2009]
get_vars = vars_09[vars_09['source']=="variables"]

#get White unemployment and population demographics
new_emp_vars = ",".join(get_vars['original_name'].to_list())
q = f"https://api.census.gov/data/2009/acs/acs5?get=GEO_ID,NAME,{new_emp_vars}&for=county:*&in=state:*&key={tokens.CENSUS_KEY}"
r = requests.get(q)
resp = r.json()
headers = resp.pop(0)
emp_1 = pd.DataFrame(resp, columns=headers)

tmp_rename = dict(zip(get_vars['original_name'].to_list(),
                     get_vars['new_name'].to_list()))
emp_1 = emp_1.rename(columns = tmp_rename)

#get overall unemployment--this is accessed via a separate API call
get_vars = vars_09[vars_09['source']!="variables"]

new_emp_vars = ",".join(get_vars['original_name'].to_list())
q = f"https://api.census.gov/data/2009/acs/acs5/profile?get=GEO_ID,NAME,{new_emp_vars}&for=county:*&in=state:*&key={tokens.CENSUS_KEY}"
r = requests.get(q)
resp = r.json()
headers = resp.pop(0)
tmp_09 = pd.DataFrame(resp, columns=headers)

tmp_rename = dict(zip(get_vars['original_name'].to_list(),
                     get_vars['new_name'].to_list()))
tmp_09 = tmp_09.rename(columns = tmp_rename)
tmp_09 = tmp_09.drop(columns = ['state', 'county',
                               'NAME'])
emp_1 = pd.merge(emp_1, tmp_09, how='outer',on="GEO_ID")
emp_1

## Get 2010 - 2014 ACS data

In [None]:
vars_14 = census_vars[census_vars['acs_round']==2014]
get_vars = vars_14[vars_14['source']=="variables"]

#get White unemployment and population demographics
new_emp_vars = ",".join(get_vars['original_name'].to_list())
q = f"https://api.census.gov/data/2014/acs/acs5?get=GEO_ID,NAME,{new_emp_vars}&for=county:*&in=state:*&key={tokens.CENSUS_KEY}"
r = requests.get(q)
resp = r.json()
headers = resp.pop(0)
emp_2 = pd.DataFrame(resp, columns=headers)

tmp_rename = dict(zip(get_vars['original_name'].to_list(),
                     get_vars['new_name'].to_list()))
emp_2 = emp_2.rename(columns = tmp_rename)
emp_2 = emp_2.drop(columns=['NAME','state','county'])


#get overall unemployment--this is accessed via a separate API call 
get_vars = vars_14[vars_14['source']!="variables"]

new_emp_vars = ",".join(get_vars['original_name'].to_list())
q = f"https://api.census.gov/data/2014/acs/acs5/profile?get=GEO_ID,NAME,{new_emp_vars}&for=county:*&in=state:*&key={tokens.CENSUS_KEY}"
r = requests.get(q)
resp = r.json()
headers = resp.pop(0)
tmp_14 = pd.DataFrame(resp, columns=headers)

tmp_rename = dict(zip(get_vars['original_name'].to_list(),
                     get_vars['new_name'].to_list()))
tmp_14 = tmp_14.rename(columns = tmp_rename)
tmp_14 = tmp_14.drop(columns = ['state', 'county',
                               'NAME'])
emp_2 = pd.merge(emp_2, tmp_14, how='outer',on="GEO_ID")
emp_2

In [None]:
merged = pd.merge(emp_1,emp_2,on="GEO_ID", how = 'outer', suffixes=("_0509", "_1014"),
                 indicator=True)

In [None]:
merged['_merge'].value_counts()

This link: https://www.census.gov/programs-surveys/acs/technical-documentation/table-and-geography-changes/2009/geography-changes.html describes some of the changes to geography that occurred between the 2000 Census and 2009. Looking at one example, it seems like any issues have been resolved since one of the changed counties has the most up-to-date name in the earlier dataset.

In [None]:
merged[merged['NAME'].str.contains("Angoon")]

Bedford city failed to merge. See here for an explanation: https://www.census.gov/programs-surveys/acs/technical-documentation/table-and-geography-changes.2014.html#list-tab-71983198

It had been absorbed by Bedford County in a subsequent year.

In [None]:
merged[merged['_merge']!="both"]

Calculate non-Hispanic White labor force participation rate

In [None]:
in_lf_cols = [c for c in merged.columns.to_list() if (c.startswith("white_femal") or c.startswith("white_male"))]
in_lf_cols = [c.replace("0509","") for c in in_lf_cols if (c.endswith("0509")and "unem" not in c)]

suffixes = ["0509", "1014"]
for s in suffixes:
    tmp_cols = [c+s for c in in_lf_cols]
    print(tmp_cols)
    new_col = "white_lf_"+s
    print(new_col)
    #sum the number of White people in civilian labor force across numeric columns
    for t in tmp_cols:
        merged[t] = merged[t].astype(float)
    merged[new_col] = merged[tmp_cols].sum(axis=1)
    



Calculate number of unemployed non-Hispanic White people and non-Hispanic White unemployment rate

In [None]:
in_lf_cols = [c for c in merged.columns.to_list() if (c.startswith("white_femal") or c.startswith("white_male"))]
in_lf_cols = [c.replace("0509","") for c in in_lf_cols if (c.endswith("0509")and "unem" in c)]

suffixes = ["0509", "1014"]
for s in suffixes:
    tmp_cols = [c+s for c in in_lf_cols]
    new_col = "white_unemp_n_"+s
    print(new_col)
    #sum the number of unemployed White people across numeric columns
    for t in tmp_cols:
        merged[t] = merged[t].astype(float)
    merged[new_col] = merged[tmp_cols].sum(axis=1)
    #convert to unemployment rate
    new_col_2 = "white_unemp_rate_"+s
    lf_col = 'white_lf_'+s
    merged[new_col_2] = (merged[new_col]/merged[lf_col])*100
    
    

In [None]:
merged[tmp_cols].astype(float)

Check that the above calculations worked as expected

In [None]:
230+0+262+3

In [None]:
merged.loc[0,'white_unemp_n_1014'] #yup!

In [None]:
merged[['white_lf_0509','white_unemp_n_0509','white_unemp_rate_0509']]

In [None]:
merged[['white_lf_1014','white_unemp_n_1014','total_pop_1014','total_white_1014','white_unemp_rate_1014']]


In [None]:
merged['percent_white_0509'] = 100*(merged['total_white_0509'].astype(float)/merged['total_pop_0509'].astype(float))
merged['percent_white_1014'] = 100*(merged['total_white_1014'].astype(float)/merged['total_pop_1014'].astype(float))
merged['percent_poc_0509'] = 100 - merged['percent_white_0509']
merged['percent_poc_1014'] = 100 - merged['percent_white_1014']



In [None]:
merged.to_csv(box_path+"emp_pop_data_05_14.csv", index=False, encoding="utf-8-sig")