### Merge ACS tract-level and place level data for diversity/poverty metrics

In [None]:
data_path = 'data'

Mounted at /drive


In [None]:
import pandas as pd

ri_places = pd.read_csv(f'{data_path}/interim/ri_place_acs.csv')
us_tract = pd.read_csv(f'{data_path}/interim/ri_tract_acs.csv')
ri_mapper = pd.read_csv(f'{data_path}/interim/tract_place_mapper.csv')


#may change which metric we use 
# Calculate which tracts are high poverty - have poverty ratio below 1 for greater than 40% of residents
poverty_metric = 'poverty_ratio_below_1_people'

us_tract['high_poverty'] = us_tract['poverty_ratio_below_1_people'].apply(lambda x: 1 if x >.4 else 0)


ri_tract = us_tract[us_tract['state']==44]

In [None]:
ri_mapper = ri_mapper[['COUNTYFP', 'tract_ce', 'place_fp', 'core_cities']]

ri_mapper.head()

Unnamed: 0,COUNTYFP,tract_ce,place_fp,core_cities
0,7,15500,54640.0,Pawtucket
1,7,2700,59000.0,Providence
2,3,20603,,Not_core_city
3,7,1300,59000.0,Providence
4,7,11800,,Not_core_city


In [None]:

ri_tract = pd.merge(ri_mapper, ri_tract, how = 'right', left_on = 'tract_ce',right_on = 'tract')





In [None]:
core_cities = ['Providence', 'Central Falls', 'Pawtucket', 'Woonsocket']

ri_tract = ri_tract[ri_tract['core_cities'].isin(core_cities)]
ri_tract

Unnamed: 0,COUNTYFP,tract_ce,place_fp,core_cities,name,population,all_in_poverty,white,black,native_american,...,state,county,tract,year,poverty_ratio_below_1_people,poverty_ratio_below_1_families,poverty_ratio_below_2_families,poverty_ratio_below_1_children,poverty_ratio_below_2_children,high_poverty
0,7.0,2700.0,59000.0,Providence,"Census Tract 27, Providence County, Rhode Island",6068.0,2134.0,3505.0,1220.0,6.0,...,44,7,2700,2019,0.427741,0.441121,0.665421,0.538270,0.683261,1
1,7.0,10900.0,14140.0,Central Falls,"Census Tract 109, Providence County, Rhode Island",5019.0,1634.0,2850.0,656.0,86.0,...,44,7,10900,2019,0.353680,0.254882,0.551901,0.502608,0.831812,0
6,7.0,15000.0,54640.0,Pawtucket,"Census Tract 150, Providence County, Rhode Island",4597.0,347.0,2811.0,1005.0,0.0,...,44,7,15000,2019,0.075484,0.077138,0.291822,0.068777,0.351528,0
7,7.0,15600.0,54640.0,Pawtucket,"Census Tract 156, Providence County, Rhode Island",2125.0,176.0,1633.0,200.0,0.0,...,44,7,15600,2019,0.082824,0.050549,0.116484,0.115502,0.258359,0
8,7.0,16400.0,54640.0,Pawtucket,"Census Tract 164, Providence County, Rhode Island",4214.0,1126.0,1569.0,1341.0,0.0,...,44,7,16400,2019,0.269571,0.261949,0.472426,0.453975,0.754184,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
224,7.0,2400.0,59000.0,Providence,"Census Tract 24, Providence County, Rhode Island",7347.0,633.0,5800.0,752.0,8.0,...,44,7,2400,2019,0.116125,0.123037,0.224258,0.236869,0.319258,0
225,7.0,3200.0,59000.0,Providence,"Census Tract 32, Providence County, Rhode Island",3382.0,681.0,2747.0,272.0,0.0,...,44,7,3200,2019,0.201360,0.169884,0.193050,0.445860,0.464968,0
230,7.0,1700.0,59000.0,Providence,"Census Tract 17, Providence County, Rhode Island",3758.0,711.0,1793.0,250.0,0.0,...,44,7,1700,2019,0.189196,0.197098,0.442563,0.298348,0.580175,0
232,7.0,1900.0,59000.0,Providence,"Census Tract 19, Providence County, Rhode Island",4521.0,1355.0,2343.0,783.0,24.0,...,44,7,1900,2019,0.301984,0.271246,0.458673,0.350218,0.597380,0


### Calculate poverty metric for Core Cities

In [None]:

# 2 out of 70 are defined as high poverty in 2019
ri_tract['high_poverty'].value_counts()

0    68
1     2
Name: high_poverty, dtype: int64

In [None]:
place_poverty = ri_tract.groupby(['year','core_cities','high_poverty']).sum()[['population','all_in_poverty']].unstack().reset_index()

In [None]:
place_poverty = place_poverty.fillna(0)

place_poverty.columns

MultiIndex([(          'year', ''),
            (   'core_cities', ''),
            (    'population',  0),
            (    'population',  1),
            ('all_in_poverty',  0),
            ('all_in_poverty',  1)],
           names=[None, 'high_poverty'])

In [None]:
place_poverty['proportion_high_poverty_neighborhood'] = place_poverty[('all_in_poverty',  1)]/(place_poverty[('all_in_poverty',  1)]+ place_poverty[('all_in_poverty',  0)])

In [None]:
place_poverty = place_poverty[['year','core_cities','proportion_high_poverty_neighborhood']]

place_poverty.columns = place_poverty.columns.droplevel(1)
place_poverty.head()

Unnamed: 0,year,core_cities,proportion_high_poverty_neighborhood
0,2019,Central Falls,0.0
1,2019,Pawtucket,0.132284
2,2019,Providence,0.057129
3,2019,Woonsocket,0.0


### Calculate Poverty Metric for all US Counties

In [None]:
# a bit less than 5% of tracts are considered high poverty under this metric
us_tract['high_poverty'].value_counts()


0    70399
1     3423
Name: high_poverty, dtype: int64

In [None]:
county_poverty = us_tract.groupby(['year','state','county', 'high_poverty']).sum()[['population','all_in_poverty']].unstack().reset_index()

county_poverty = county_poverty.fillna(0) # fill empty values with 0

county_poverty['proportion_high_poverty_neighborhood'] = county_poverty[('all_in_poverty',  1)]/(county_poverty[('all_in_poverty',  1)]+ county_poverty[('all_in_poverty',  0)])

county_poverty.columns = county_poverty.columns.droplevel(1)

county_poverty = county_poverty[['year','state','county', 'proportion_high_poverty_neighborhood']]


county_poverty.head()

Unnamed: 0,year,state,county,proportion_high_poverty_neighborhood
0,2019,1,1,0.0
1,2019,1,3,0.0
2,2019,1,5,0.234909
3,2019,1,7,0.0
4,2019,1,9,0.0


### Calculate diversity metric

In [None]:
column_mapper = {'population': 'population_county', 'hispanic_or_latino':'hispanic_or_latino_county', 'white':'white_county','black':'black_county', 'native_american': 'native_american_county', 'asian':'asian_county', 'hawaiian':'hawaiian_county', 'some_other_race_alone':'some_other_race_alone_county','two_more_races':'two_more_races_county'}

groupings = ['population', 'hispanic_or_latino', 'white','black', 'native_american', 'asian', 'hawaiian', 'some_other_race_alone','two_more_races']
county_race = us_tract.groupby(['state', 'county', 'year'])[groupings].sum().reset_index()
county_race = county_race.rename(columns = column_mapper)

us_tract = pd.merge(us_tract, county_race, on = ['state', 'county', 'year'], how = 'left')

In [None]:
races = ['hispanic_or_latino', 'white','black', 'native_american', 'asian', 'hawaiian', 'some_other_race_alone','two_more_races']


races_exposure = ['hispanic_or_latino_exposure', 'white_exposure','black_exposure', 'native_american_exposure', 'asian_exposure', 'hawaiian_exposure', 'some_other_race_alone_exposure','two_more_races_exposure']


for race in races:
  us_tract[race+'_exposure'] = (us_tract[race]*(us_tract['population']-us_tract[race]))/(us_tract[race + '_county']*us_tract['population'])


us_tract_exposure = us_tract.groupby(['state', 'county', 'year'])[races_exposure].sum().reset_index()

us_tract_exposure.head()

Unnamed: 0,state,county,year,hispanic_or_latino_exposure,white_exposure,black_exposure,native_american_exposure,asian_exposure,hawaiian_exposure,some_other_race_alone_exposure,two_more_races_exposure
0,1,1,2019,0.95651,0.212265,0.731823,0.991504,0.972936,0.991068,0.955512,0.965904
1,1,3,2019,0.922474,0.125351,0.792728,0.978007,0.971191,0.998263,0.971838,0.970935
2,1,5,2019,0.920309,0.506731,0.499511,0.9905,0.989702,0.999374,0.927236,0.98217
3,1,7,2019,0.962017,0.202068,0.679886,0.996733,0.997198,0.0,0.999066,0.990628
4,1,9,2019,0.846191,0.044934,0.971955,0.996659,0.994517,0.993537,0.973111,0.978451


In [None]:
column_mapper = {'population': 'population_county', 'hispanic_or_latino':'hispanic_or_latino_county', 'white':'white_county','black':'black_county', 'native_american': 'native_american_county', 'asian':'asian_county', 'hawaiian':'hawaiian_county', 'some_other_race_alone':'some_other_race_alone_county','two_more_races':'two_more_races_county'}

groupings = ['population', 'hispanic_or_latino', 'white','black', 'native_american', 'asian', 'hawaiian', 'some_other_race_alone','two_more_races']
place_race = ri_tract.groupby(['state', 'core_cities', 'year'])[groupings].sum().reset_index()
place_race = place_race.rename(columns = column_mapper)

ri_tract = pd.merge(ri_tract, place_race, on = ['core_cities', 'year'], how = 'left')
ri_tract.head()


Unnamed: 0,COUNTYFP,tract_ce,place_fp,core_cities,name,population,all_in_poverty,white,black,native_american,...,state_y,population_county,hispanic_or_latino_county,white_county,black_county,native_american_county,asian_county,hawaiian_county,some_other_race_alone_county,two_more_races_county
0,7.0,2700.0,59000.0,Providence,"Census Tract 27, Providence County, Rhode Island",6068.0,2134.0,3505.0,1220.0,6.0,...,44,164453.0,67602.0,92595.0,27571.0,1378.0,10205.0,245.0,24678.0,7781.0
1,7.0,10900.0,14140.0,Central Falls,"Census Tract 109, Providence County, Rhode Island",5019.0,1634.0,2850.0,656.0,86.0,...,44,15534.0,10167.0,8366.0,1841.0,196.0,114.0,0.0,3816.0,1201.0
2,7.0,15000.0,54640.0,Pawtucket,"Census Tract 150, Providence County, Rhode Island",4597.0,347.0,2811.0,1005.0,0.0,...,44,71844.0,18147.0,44267.0,12618.0,449.0,1719.0,64.0,8653.0,4074.0
3,7.0,15600.0,54640.0,Pawtucket,"Census Tract 156, Providence County, Rhode Island",2125.0,176.0,1633.0,200.0,0.0,...,44,71844.0,18147.0,44267.0,12618.0,449.0,1719.0,64.0,8653.0,4074.0
4,7.0,16400.0,54640.0,Pawtucket,"Census Tract 164, Providence County, Rhode Island",4214.0,1126.0,1569.0,1341.0,0.0,...,44,71844.0,18147.0,44267.0,12618.0,449.0,1719.0,64.0,8653.0,4074.0


In [None]:
races = ['hispanic_or_latino', 'white','black', 'native_american', 'asian', 'hawaiian', 'some_other_race_alone','two_more_races']


races_exposure = ['hispanic_or_latino_exposure', 'white_exposure','black_exposure', 'native_american_exposure', 'asian_exposure', 'hawaiian_exposure', 'some_other_race_alone_exposure','two_more_races_exposure']


for race in races:
  ri_tract[race+'_exposure'] = (ri_tract[race]*(ri_tract['population']-ri_tract[race]))/(ri_tract[race + '_county']*ri_tract['population'])


ri_tract_exposure = ri_tract.groupby(['core_cities', 'year'])[races_exposure].sum().reset_index()

ri_tract_exposure

Unnamed: 0,core_cities,year,hispanic_or_latino_exposure,white_exposure,black_exposure,native_american_exposure,asian_exposure,hawaiian_exposure,some_other_race_alone_exposure,two_more_races_exposure
0,Central Falls,2019,0.33079,0.446157,0.876637,0.985467,0.983693,0.0,0.685372,0.915834
1,Pawtucket,2019,0.692662,0.329007,0.752447,0.964354,0.964351,0.990725,0.815983,0.923994
2,Providence,2019,0.466977,0.392604,0.775592,0.975048,0.895502,0.984697,0.780821,0.939925
3,Woonsocket,2019,0.785065,0.261701,0.873076,0.960645,0.917628,0.0,0.932557,0.910623


### Merge diversity and poverty df and create csv file

In [None]:

df_ri_diversity_poverty = pd.merge(ri_tract_exposure, place_poverty, on= ['core_cities', 'year'], how = 'inner')
df_diversity_poverty = pd.merge(us_tract_exposure, county_poverty, on= ['state', 'county', 'year'], how = 'inner')

df_ri_diversity_poverty.head()

Unnamed: 0,core_cities,year,hispanic_or_latino_exposure,white_exposure,black_exposure,native_american_exposure,asian_exposure,hawaiian_exposure,some_other_race_alone_exposure,two_more_races_exposure,proportion_high_poverty_neighborhood
0,Central Falls,2019,0.33079,0.446157,0.876637,0.985467,0.983693,0.0,0.685372,0.915834,0.0
1,Pawtucket,2019,0.692662,0.329007,0.752447,0.964354,0.964351,0.990725,0.815983,0.923994,0.132284
2,Providence,2019,0.466977,0.392604,0.775592,0.975048,0.895502,0.984697,0.780821,0.939925,0.057129
3,Woonsocket,2019,0.785065,0.261701,0.873076,0.960645,0.917628,0.0,0.932557,0.910623,0.0


In [None]:
# from google.colab import  drive
# drive.mount('/drive')

df_diversity_poverty.to_csv(f'{data_path}/interim/county_diversity_poverty.csv', index=False)

df_ri_diversity_poverty.to_csv(f'{data_path}/interim/ri_place_diversity_poverty.csv', index=False)


In [None]:
# import math

# # use this tutorial at the tract level - maybe want larger areas???
# # https://archives.huduser.gov/healthycommunities/sites/default/files/public/Racial%20Diversity%20using%20Shannon-Wiener%20Index.pdf
# races = ['white', 'black', 'native_american', 'asian',
#       # 'some_other_race_alone', 'two_more_races',
#        'hispanic_or_latino']
# ri_tract.columns

# # def get_log_1(df,columns):
# #     return df[columns].applymap(
# #         lambda x: math.log(x) if x != 0 else 0)
# list_5c = []
# # step 5b and 5c of tutorial - calculate log natural of each percent
# for race in races:
#   ri_tract[race+'_ln'] = ri_tract[race].apply(lambda x: math.log(x) if x != 0 else 0)
#   ri_tract[race+'_5c'] = ri_tract[race]*ri_tract[race+'_ln']
#   list_5c.append(race+'_5c')


# ri_tract[list_5c]

# ri_tract['diversity_index'] = -ri_tract[list_5c].sum(axis=1)

# ri_tract


