In [None]:
import requests
import regex as re
import numpy as np
import pandas as pd


In [None]:
import tokens

In [None]:
import openpyxl

In [None]:
box_path = "../../Library/CloudStorage/Box-Box/EEOC data/"

## Get 2010-2014 Demographics and Unemployment Rate

Note: We need separate queries for population demographics and white unemployment because population demographics are from Data Profiles and white unemployment is from Subject Tables

You'll also need a Census API key to access these data, which you can save in a `tokens.py` file.

In [None]:
pop_vars = ["DP05_0072PE","DP05_0072E",
            "DP05_0058E"]
variable_names = dict(zip(pop_vars,
                         ["prop_white_alone","white_alone_pop", 
                          "total_population"]))
pop_vars = ",".join(pop_vars)
q = f"https://api.census.gov/data/2014/acs/acs5/profile?get=GEO_ID,NAME,{pop_vars}&for=county:*&in=state:*&key={tokens.CENSUS_KEY}"
pop_r = requests.get(q)
pop_resp = pop_r.json()
headers = pop_resp.pop(0)
pop_df = pd.DataFrame(pop_resp, columns=headers)
pop_df = pop_df.rename(columns = {"GEO_ID":"geo_id",
                                  "NAME": "county_state",
                                  "DP05_0072PE": "percent_white",
                                  "DP05_0072E": "num_white",
                                  "DP05_0058E": "total_pop",
                                  "state": "state_fips",
                                  "county": "county_fips"})
pop_df["fixed_fip"] = pop_df["state_fips"]+pop_df["county_fips"]
pop_df = pop_df.drop(columns = ["state_fips", "county_fips","county_state"])

# info on white unemployment
#https://api.census.gov/data/2014/acs/acs5/subject/variables.html
emp_vars = ["S2301_C04_018E","S2301_C04_019E","S2301_C04_001E"]
emp_vars = ",".join(emp_vars)
#S2301_C04_018E = unemployment for white alone, not hispanic
#S2301_C04_019E = unemployment for total population between 20-65
#S2301_C04_001E = unemployment for total population over 16 y/o
q = f"https://api.census.gov/data/2014/acs/acs5/subject?get=GEO_ID,NAME,{emp_vars}&for=county:*&in=state:*&key={tokens.CENSUS_KEY}"
r = requests.get(q)
resp = r.json()
headers = resp.pop(0)
emp_df = pd.DataFrame(resp, columns=headers)
emp_df = emp_df.rename(columns = {"GEO_ID":"geo_id",
                          "NAME": "county_state",
                          "S2301_C04_018E": "unemploy_white",
                          "S2301_C04_019E": "unemploy_20_65",
                          "S2301_C04_001E": "unemploy_all",
                          "state":"state_fips",
                          "county": "county_fips"})
#emp_df
df = pd.merge(pop_df, emp_df, on = ["geo_id"])

#additional info on working population

#C23002H_017E = total white NH females in labor force 16-64
#C23002H_004E = total white NH males in labor force 16-64
#C23002H_008E = total white NH males in labor force unemployed 16-64
#C23002H_021E = total white NH females in labor force unemployed 16-64
#C23002H_024E = total white NH females in labor force > 65
#C23002H_026E = total white NH females in labor force unemployed > 65
#C23002H_011E = total white NH males in labor force > 65
#C23002H_013E = total white NH males in labor force unemployed > 65
new_emp_vars = ["C23002H_017E", "C23002H_004E",
               "C23002H_008E", "C23002H_021E",
               "C23002H_024E", "C23002H_026E",
               "C23002H_011E", "C23002H_013E"]
new_emp_vars = ",".join(new_emp_vars)
q = f"https://api.census.gov/data/2014/acs/acs5?get=GEO_ID,NAME,{new_emp_vars}&for=county:*&in=state:*&key={tokens.CENSUS_KEY}"
r = requests.get(q)
resp = r.json()
headers = resp.pop(0)
emp_1 = pd.DataFrame(resp, columns=headers)
emp_1 = emp_1.rename(columns = {"GEO_ID":"geo_id",
                          "NAME": "county_state",
                          "C23002H_017E": "white_females_inlfy",
                          "C23002H_004E": "white_males_inlfy",
                          "C23002H_008E": "white_males_inlf_unemy",
                            "C23002H_021E": "white_females_inlf_unemy",
                                "C23002H_013E": "white_males_inlf_unemo",
                                "C23002H_011E": "white_males_inlfo",
                                "C23002H_026E": "white_females_inlf_unemo",
                                "C23002H_024E": "white_females_inlfo",
                          "state":"state_fips",
                          "county": "county_fips"})


emp_1["white_females_inlf"] = emp_1.white_females_inlfo.astype(float) + emp_1.white_females_inlfy.astype(float)
emp_1["white_females_unem_inlf"] = emp_1.white_females_inlf_unemo.astype(float) + emp_1.white_females_inlf_unemy.astype(float)
emp_1["white_males_inlf"] = emp_1.white_males_inlfo.astype(float) + emp_1.white_males_inlfy.astype(float)
emp_1["white_males_unem_inlf"] = emp_1.white_males_inlf_unemo.astype(float) + emp_1.white_males_inlf_unemy.astype(float)
emp_1["white_unem_inlf"] = emp_1.white_females_unem_inlf.astype(float) + emp_1.white_males_unem_inlf.astype(float)
emp_1["white_inlf"] = emp_1.white_males_inlf.astype(float) + emp_1.white_females_inlf.astype(float)

emp_1 = emp_1[["white_unem_inlf", "white_inlf", "geo_id"]]
df = pd.merge(df, emp_1, how = "left", on = "geo_id")


In [None]:
df

In [None]:
df.to_csv(box_path+"white_unemployment_pop.csv", index=False, encoding="utf-8-sig")

## Get 2006-2010 Demographics and Unemployment

In [None]:
pop_vars = ["DP05_0072PE","DP05_0072E",
            "DP05_0058E"]
variable_names = dict(zip(pop_vars,
                         ["prop_white_alone","white_alone_pop", 
                          "total_population"]))
pop_vars = ",".join(pop_vars)
q = f"https://api.census.gov/data/2010/acs/acs5/profile?get=GEO_ID,NAME,{pop_vars}&for=county:*&in=state:*&key={tokens.CENSUS_KEY}"
pop_r = requests.get(q)
pop_resp = pop_r.json()
headers = pop_resp.pop(0)
pop_df_10 = pd.DataFrame(pop_resp, columns=headers)
pop_df_10 = pop_df_10.rename(columns = {"GEO_ID":"geo_id",
                                  "NAME": "county_state",
                                  "DP05_0072PE": "percent_white",
                                  "DP05_0072E": "num_white",
                                  "DP05_0058E": "total_pop",
                                  "state": "state_fips",
                                  "county": "county_fips"})
pop_df_10["fixed_fip"] = pop_df_10["state_fips"]+pop_df_10["county_fips"]
pop_df_10 = pop_df_10.drop(columns = ["state_fips", "county_fips","county_state"])

emp_vars = ["S2301_C04_018E","S2301_C04_019E","S2301_C04_001E"]
emp_vars = ",".join(emp_vars)
#S2301_C04_018E = unemployment for white alone, not hispanic
#S2301_C04_019E = unemployment for total population between 20-65
#S2301_C04_001E = unemployment for total population over 16 y/o
q = f"https://api.census.gov/data/2010/acs/acs5/subject?get=GEO_ID,NAME,{emp_vars}&for=county:*&in=state:*&key={tokens.CENSUS_KEY}"
r = requests.get(q)
resp = r.json()
headers = resp.pop(0)
emp_df_10 = pd.DataFrame(resp, columns=headers)
emp_df_10 = emp_df_10.rename(columns = {"GEO_ID":"geo_id",
                          "NAME": "county_state",
                          "S2301_C04_018E": "unemploy_white",
                          "S2301_C04_019E": "unemploy_20_65",
                          "S2301_C04_001E": "unemploy_all",
                          "state":"state_fips",
                          "county": "county_fips"})
#emp_df
df_10 = pd.merge(pop_df_10, emp_df_10, on = ["geo_id"])

In [None]:
df_10.to_csv(box_path+"white_unemployment_pop_2010.csv", index=False, encoding="utf-8-sig")