In [2]:
import sys
sys.path.append('../src/')

import pandas as pd
import data_io
import utils
import requests
import hire_tokens as tokens
CENSUS_KEY = tokens.CENSUS_KEY

### Run functions to get ACS data for calculating Neighborhood Deprivation Index in R

In [5]:
'''Functions to get 5-year estimates'''
#Retrieve ACS estimates to calculate NDI in R
#First, run this script, then go back to R (clustr.R), run the PCA, 
#and match with GFM data in census_and_gfm_analysis.ipynb
def get_acs_estimates_5_year(census_key, save):
    
    variable_df = pd.read_csv(data_io.input_cleaned/'census'/'census_variables.csv',
                              encoding='utf-8')
    
    variable_df = variable_df.loc[0:len(variable_df)-1, :]
    variables = variable_df['variable_call'].to_list()
    vars_to_get = ','.join(variables)
    base_request = ("https://api.census.gov/data/2017/acs/acs5/profile?get=NAME,"+
                    vars_to_get+
                    "&for=county:*&in=state:*&key=")
    full_request = base_request + census_key
    
    response = requests.get(full_request)
    results = response.json()
    df = pd.DataFrame(data=results[1:][:], columns=results[0][:])  # 1st row as the column names
    new_names = dict(zip(variable_df['variable_call'].to_list(), variable_df['variable_label'].to_list()))
    df.rename(columns=new_names, inplace=True)
    
    df['percent_crowding'] = df['percent_1_1.5_per_room'].astype(float) + df['more_1.5_per_room'].astype(float)
    df['percent_single_parent'] = df['single_dad'].astype(float) + df['single_mom'].astype(float)
    df['state_int'] = df['state'].astype(int)
    df = df[df['state_int']<=56]
    
    income_ineq_req = "https://api.census.gov/data/2017/acs/acs5?get=NAME,B19083_001E&for=county:*&in=state:*&key="
    income_ineq_req = income_ineq_req + census_key
    response = requests.get(income_ineq_req)
    results = response.json()
    
    new = pd.DataFrame(data=results[1:][:], columns=results[0][:])
    
    all_vars = pd.merge(df, new, how='left', on='NAME')
    all_vars.drop(columns=['state_x', 'county_x'], inplace=True)
    all_vars.rename(columns={'B19083_001E':'gini_income_inequality', 'state_y':'state',
                             'county_y':'county'}, inplace=True)
    all_vars['percent_less_35k'] = (all_vars['percent_less_10k'].astype(float) + all_vars['percent_10k_15k'].astype(float) + 
                                    all_vars['percent_15k_25k'].astype(float) + all_vars['percent_25k_35k'].astype(float))
    
    all_vars['state_county_fips_str'] = (all_vars['state'].apply(utils.format_state_fip)+
                                                                 all_vars['county'].apply(utils.format_county_fip))
    
    if save:
        all_vars.to_csv(data_io.input_cleaned/'census'/'acs_five_year_est.csv',encoding='utf-8',
                  index=False)
    return all_vars


#### Generate Census datasets for use in R

In [6]:
census = get_acs_estimates_5_year(CENSUS_KEY, save = True)

#### Use ..R/clust.R to generate neighborhood deprivation indices