### Merge ACS tract-level and place level data for diversity/poverty metrics

In [1]:
import os
if 'COLAB_GPU' in os.environ:
    from google.colab import  drive
    drive.mount('/drive')
    data_path = '/drive/Shared drives/Capstone/notebooks/data'
else:
    data_path = 'data'

In [2]:
import pandas as pd

us_tract = pd.read_csv(f'{data_path}/interim/us_tract_acs.csv')

# Calculate which tracts are high poverty - have poverty ratio below 1 for greater than 40% of residents
poverty_metric = 'poverty_ratio_below_1_people'

us_tract['high_poverty'] = us_tract['poverty_ratio_below_1_people'].apply(lambda x: 1 if x >.4 else 0)



### Calculate Poverty Metric for all US Counties

In [3]:
# a bit less than 5% of tracts are considered high poverty under this metric
us_tract['high_poverty'].value_counts()
# for col in list(us_tract.columns):
#   print(col)

cols = ['all_in_poverty', 'population', 'poverty_ratio_below_1_people', 'high_poverty', 'population_county']
#us_tract[cols]

In [4]:
county_poverty = us_tract.groupby(['year','state','county', 'high_poverty']).sum()[['population','all_in_poverty']].unstack().reset_index()

county_poverty = county_poverty.fillna(0) # fill empty values with 0

county_poverty['proportion_high_poverty_neighborhood'] = county_poverty[('all_in_poverty',  1)]/(county_poverty[('all_in_poverty',  1)]+ county_poverty[('all_in_poverty',  0)])

county_poverty.columns = county_poverty.columns.droplevel(1)

county_poverty = county_poverty[['year','state','county', 'proportion_high_poverty_neighborhood']]


county_poverty.head()

Unnamed: 0,year,state,county,proportion_high_poverty_neighborhood
0,2019,1,1,0.0
1,2019,1,3,0.0
2,2019,1,5,0.234909
3,2019,1,7,0.0
4,2019,1,9,0.0


### Calculate diversity metric

In [5]:
column_mapper = {'population': 'population_county', 'hispanic_or_latino':'hispanic_or_latino_county', 'white':'white_county','black':'black_county', 'native_american': 'native_american_county', 'asian':'asian_county', 'hawaiian':'hawaiian_county', 'some_other_race_alone':'some_other_race_alone_county','two_more_races':'two_more_races_county'}

groupings = ['population', 'hispanic_or_latino', 'white','black', 'native_american', 'asian', 'hawaiian', 'some_other_race_alone','two_more_races']
county_race = us_tract.groupby(['state', 'county', 'year'])[groupings].sum().reset_index()
county_race = county_race.rename(columns = column_mapper)

us_tract = pd.merge(us_tract, county_race, on = ['state', 'county', 'year'], how = 'left')

In [6]:
races = ['hispanic_or_latino', 'white','black', 'native_american', 'asian', 'hawaiian', 'some_other_race_alone','two_more_races']


races_exposure = ['hispanic_or_latino_exposure', 'white_exposure','black_exposure', 'native_american_exposure', 'asian_exposure', 'hawaiian_exposure', 'some_other_race_alone_exposure','two_more_races_exposure']


for race in races:
  us_tract[race+'_exposure'] = (us_tract[race]*(us_tract['population']-us_tract[race]))/(us_tract[race + '_county']*us_tract['population'])


us_tract_exposure = us_tract.groupby(['state', 'county', 'year'])[races_exposure].sum().reset_index()

us_tract_exposure.head()

Unnamed: 0,state,county,year,hispanic_or_latino_exposure,white_exposure,black_exposure,native_american_exposure,asian_exposure,hawaiian_exposure,some_other_race_alone_exposure,two_more_races_exposure
0,1,1,2019,0.95651,0.212265,0.731823,0.991504,0.972936,0.991068,0.955512,0.965904
1,1,3,2019,0.922474,0.125351,0.792728,0.978007,0.971191,0.998263,0.971838,0.970935
2,1,5,2019,0.920309,0.506731,0.499511,0.9905,0.989702,0.999374,0.927236,0.98217
3,1,7,2019,0.962017,0.202068,0.679886,0.996733,0.997198,0.0,0.999066,0.990628
4,1,9,2019,0.846191,0.044934,0.971955,0.996659,0.994517,0.993537,0.973111,0.978451


In [7]:
column_mapper = {'population': 'population_county', 'hispanic_or_latino':'hispanic_or_latino_county', 'white':'white_county','black':'black_county', 'native_american': 'native_american_county', 'asian':'asian_county', 'hawaiian':'hawaiian_county', 'some_other_race_alone':'some_other_race_alone_county','two_more_races':'two_more_races_county'}

groupings = ['population', 'hispanic_or_latino', 'white','black', 'native_american', 'asian', 'hawaiian', 'some_other_race_alone','two_more_races']



### Merge diversity and poverty df and create csv file

In [8]:

df_diversity_poverty = pd.merge(us_tract_exposure, county_poverty, on= ['state', 'county', 'year'], how = 'inner')

df_diversity_poverty.head()

Unnamed: 0,state,county,year,hispanic_or_latino_exposure,white_exposure,black_exposure,native_american_exposure,asian_exposure,hawaiian_exposure,some_other_race_alone_exposure,two_more_races_exposure,proportion_high_poverty_neighborhood
0,1,1,2019,0.95651,0.212265,0.731823,0.991504,0.972936,0.991068,0.955512,0.965904,0.0
1,1,3,2019,0.922474,0.125351,0.792728,0.978007,0.971191,0.998263,0.971838,0.970935,0.0
2,1,5,2019,0.920309,0.506731,0.499511,0.9905,0.989702,0.999374,0.927236,0.98217,0.234909
3,1,7,2019,0.962017,0.202068,0.679886,0.996733,0.997198,0.0,0.999066,0.990628,0.0
4,1,9,2019,0.846191,0.044934,0.971955,0.996659,0.994517,0.993537,0.973111,0.978451,0.0


In [9]:


df_diversity_poverty.to_csv(f'{data_path}/interim/county_diversity_poverty.csv', index=False)




In [10]:
df_diversity_poverty.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3209,3210,3211,3212,3213,3214,3215,3216,3217,3218
state,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,72.0,72.0,72.0,72.0,72.0,72.0,72.0,72.0,72.0,72.0
county,1.0,3.0,5.0,7.0,9.0,11.0,13.0,15.0,17.0,19.0,...,135.0,137.0,139.0,141.0,143.0,145.0,147.0,149.0,151.0,153.0
year,2019.0,2019.0,2019.0,2019.0,2019.0,2019.0,2019.0,2019.0,2019.0,2019.0,...,2019.0,2019.0,2019.0,2019.0,2019.0,2019.0,2019.0,2019.0,2019.0,2019.0
hispanic_or_latino_exposure,0.95651,0.922474,0.920309,0.962017,0.846191,0.930308,0.968031,0.885527,0.926053,0.978155,...,0.013605,0.013104,0.008462,0.007589,0.015619,0.032445,0.050671,0.002538,0.000804,0.006318
white_exposure,0.212265,0.125351,0.506731,0.202068,0.044934,0.695058,0.408666,0.209271,0.39343,0.070044,...,0.171079,0.264834,0.201365,0.465485,0.283495,0.166501,0.449506,0.416639,0.79907,0.245679
black_exposure,0.731823,0.792728,0.499511,0.679886,0.971955,0.209959,0.457972,0.610496,0.558683,0.928365,...,0.920172,0.859052,0.87932,0.9784,0.887603,0.938839,0.909083,0.950977,0.207681,0.945775
native_american_exposure,0.991504,0.978007,0.9905,0.996733,0.996659,0.0,0.996455,0.981954,0.993711,0.970712,...,0.992736,0.997173,0.99438,0.995694,0.0,0.990132,0.998196,0.0,0.0,0.988427
asian_exposure,0.972936,0.971191,0.989702,0.997198,0.994517,0.9914,0.982598,0.973751,0.972812,0.994112,...,0.957535,0.0,0.977863,0.993508,0.990677,0.998045,0.996843,0.0,0.99524,0.997178
hawaiian_exposure,0.991068,0.998263,0.999374,0.0,0.993537,0.0,0.0,0.995792,0.0,0.0,...,0.998985,0.0,0.0,0.0,0.0,0.999222,0.0,0.0,0.0,0.0
some_other_race_alone_exposure,0.955512,0.971838,0.927236,0.999066,0.973111,0.929885,0.988847,0.877792,0.987822,0.990375,...,0.926423,0.862423,0.9244,0.554438,0.839573,0.877698,0.624326,0.865965,0.974065,0.791164
