In [2]:
from shapely.geometry import shape, Point, Polygon
from shapely.ops import unary_union
import requests
import pandas as pd
from collections import defaultdict
import rtree as rt
from matplotlib import pyplot as plt

In [3]:
geojson_url = 'https://opendata.arcgis.com/datasets/7b8a64cab4a44c0f86f12c909c5d7f1a_23.geojson'
neighborhood_shapes = requests.get(geojson_url).json()

In [4]:
neighborhood_df = pd.DataFrame()

neighborhood_df['neighborhood'] = [feature['properties']['LABEL'] for feature in neighborhood_shapes['features']]
neighborhood_df['coordinates'] = [feature['geometry']['coordinates'] for feature in neighborhood_shapes['features']]
neighborhood_df['shape'] = [shape(feature['geometry']) for feature in neighborhood_shapes['features']]

neighborhood_df.set_index('neighborhood', inplace=True)
neighborhood_df

Unnamed: 0_level_0,coordinates,shape
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1
City of Agoura Hills,"[[[-118.72863616615186, 34.16792606740936], [-...",POLYGON ((-118.7286361661519 34.16792606740936...
City of Alhambra,"[[[-118.12108778660863, 34.10536850001744], [-...",POLYGON ((-118.1210877866086 34.10536850001744...
City of Arcadia,"[[[-118.00672156181867, 34.129345732849536], [...",POLYGON ((-118.0067215618187 34.12934573284954...
City of Artesia,"[[[-118.0878060494415, 33.88034075241325], [-1...",POLYGON ((-118.0878060494415 33.88034075241325...
City of Avalon,"[[[-118.33059689532007, 33.35588284185377], [-...",POLYGON ((-118.3305968953201 33.35588284185377...
...,...,...
City of Bradbury,"[[[-117.96662107191455, 34.1436831256545], [-1...","POLYGON ((-117.9666210719145 34.1436831256545,..."
City of Monrovia,"[[[-117.95971837035697, 34.16512436932725], [-...","POLYGON ((-117.959718370357 34.16512436932725,..."
City of Diamond Bar,"[[[-117.76765128706407, 34.01938468860782], [-...",POLYGON ((-117.7676512870641 34.01938468860782...
City of Pomona,"[[[-117.7485170118948, 34.11083531651805], [-1...",POLYGON ((-117.7485170118948 34.11083531651805...


In [5]:
test_url = 'http://s3-us-west-2.amazonaws.com/boundaries.latimes.com/archive/1.0/boundary-set/census-tracts-2012.geojson'
tract_shapes = requests.get(test_url).json()

In [6]:
census_tract_df = pd.DataFrame()
census_tract_df['tract'] = [int(feature['properties']['name']) for feature in tract_shapes['features']]
census_tract_df['coordinates'] = [feature['geometry']['coordinates'] for feature in tract_shapes['features']]
census_tract_df['shape'] = [shape(feature['geometry'])[0] for feature in tract_shapes['features']]
census_tract_df.set_index('tract', inplace=True)
census_tract_df

Unnamed: 0_level_0,coordinates,shape
tract,Unnamed: 1_level_1,Unnamed: 2_level_1
6037101110,"[[[[-118.302291, 34.258697], [-118.300787, 34....","POLYGON ((-118.302291 34.258697, -118.300787 3..."
6037101122,"[[[[-118.303334, 34.273536], [-118.303178, 34....","POLYGON ((-118.303334 34.273536, -118.303178 3..."
6037101210,"[[[[-118.299451, 34.255978], [-118.285924, 34....","POLYGON ((-118.299451 34.255978, -118.285924 3..."
6037101220,"[[[[-118.285924, 34.248959], [-118.285924, 34....","POLYGON ((-118.285924 34.248959, -118.285924 3..."
6037101300,"[[[[-118.272473, 34.232527], [-118.271936, 34....","POLYGON ((-118.272473 34.232527, -118.271936 3..."
...,...,...
6037980031,"[[[[-118.285303, 33.708598], [-118.283369, 33....","POLYGON ((-118.285303 33.708598, -118.283369 3..."
6037980033,"[[[[-118.244627, 33.710767], [-118.231803, 33....","POLYGON ((-118.244627 33.710767, -118.231803 3..."
6037990100,"[[[[-118.951142, 33.996432], [-118.950564, 34....","POLYGON ((-118.951142 33.996432, -118.950564 3..."
6037990200,"[[[[-118.631676, 34.000011], [-118.635977, 34....","POLYGON ((-118.631676 34.000011, -118.635977 3..."


In [25]:
n_dict = defaultdict(list)
for c_index, c_tract in census_tract_df.iterrows():
    c_shape = c_tract['shape']
    for n_index, n_tract in neighborhood_df.iterrows():
        n_shape = n_tract['shape']
        
        if n_shape.contains(c_shape.centroid):
            n_dict[n_index].append(c_index)
            break

In [35]:
for n in [n for n in neighborhood_df.index.tolist() if n not in n_dict.keys()]:
    n_dict[n] = []

In [94]:
tracts_df = pd.DataFrame([(key, var) for (key, L) in n_dict.items() for var in L], columns=['neighborhood', 'tracts'])
tracts_df = tracts_df.set_index('tracts')
tracts_df = tracts_df.sort_index()
tracts_df

# df = pd.DataFrame([(key, L) for (key, L) in n_dict.items()], columns=['neighborhood', 'tracts'])
# df = df.set_index('neighborhood')
# df = df.sort_index()
# df
# df.to_pickle('neighborhoods_tracts_df')

Unnamed: 0_level_0,neighborhood
tracts,Unnamed: 1_level_1
6037101110,Los Angeles - Tujunga
6037101122,Los Angeles - Tujunga
6037101210,Los Angeles - Tujunga
6037101220,Los Angeles - Tujunga
6037101300,Los Angeles - Tujunga
...,...
6037980025,City of Carson
6037980026,Los Angeles - Angeles National Forest
6037980028,Los Angeles - Westchester
6037980030,City of El Segundo


# CES grouping by neighborhood

In [229]:
ces_path = '/Users/kevinmarlis/Downloads/CalEnviroScreen_3.0.csv'
ces_df = pd.read_csv(ces_path)

# Only LA County tracts
ces_df = ces_df[(ces_df['L0CalEnviroScreen_3_0_tract'] >= 6037000000) & (ces_df['L0CalEnviroScreen_3_0_tract'] <= 6037999999)]

# Remove unused columns
ces_df = ces_df[[col for col in ces_df.columns.to_list() if 'L0CalEnviroScreen_3_0' in col]]
ces_df = ces_df.drop(['L0CalEnviroScreen_3_0_Tractdbl', 'L0CalEnviroScreen_3_0_TractArea', 'L0CalEnviroScreen_3_0_OBJECTID_'], axis=1)
ces_df = ces_df.drop([col for col in ces_df.columns.to_list() if col[-3:]=='TXT'], axis=1)
ces_df = ces_df.drop([c for c in ces_df.columns.to_list() if c[-1]=='P'], axis=1)

# Add neighborhood names
ces_df['neighborhood'] = ['']*ces_df.shape[0]
for index, row in ces_df.iterrows():
    row_tract = row['L0CalEnviroScreen_3_0_tract']
    try:
        ces_df.loc[index, 'neighborhood'] = tracts_df.loc[int(row_tract)]['neighborhood']
    except:
        continue

# Clean up and remove extra columns
ces_df.columns = ces_df.columns.str.replace('L0CalEnviroScreen_3_0_', '')
ces_df.columns = ces_df.columns.str.rstrip('_')
cols = ces_df.columns.to_list()
ces_df = ces_df[cols[0:1] + [cols[-1]] + cols[1:-1]]
ces_df = ces_df[['tract', 'neighborhood', 'ozone', 'pm', 'diesel', 'traffic', 'Polluti_1', 'asthma', 'PopCharSc', 'Children', 'Elderly']]
ces_df = ces_df.rename(columns={'asthma': 'asthma_rate', 'traffic': 'traffic_density', 'Polluti_1': 'pollution_score', 'PopCharSc': 'population_char_score', 'Children': 'children_percent', 'Elderly': 'elderly_percent'})
ces_df

Unnamed: 0,tract,neighborhood,ozone,pm,diesel,traffic_density,pollution_score,asthma_rate,population_char_score,children_percent,elderly_percent
5692,6037920336,City of Santa Clarita,0.064889,10.370000,9.071850,712.37,4.753963,24.67,6.184743,19.2,4.4
5693,6037920044,City of Santa Clarita,0.064889,9.955483,25.560952,691.79,4.263737,32.05,3.056230,12.1,6.9
5694,6037573003,City of Long Beach,0.042299,11.210000,29.120000,1062.94,6.597485,87.72,7.918507,13.5,7.8
5695,6037571704,City of Long Beach,0.042299,11.210000,21.637831,538.42,4.386786,71.39,8.089001,17.7,5.4
5696,6037570403,City of Long Beach,0.042299,12.050000,29.030000,1372.36,6.958873,75.70,8.443781,20.0,4.7
...,...,...,...,...,...,...,...,...,...,...,...
8030,6037430302,City of Monrovia,0.053109,10.370000,6.649666,445.83,5.108084,38.64,3.552984,11.4,13.1
8031,6037430723,City of Arcadia,0.051243,10.370000,11.290000,703.21,5.289270,18.19,2.951486,11.4,12.5
8032,6037431100,City of Monrovia,0.053109,10.790000,15.588629,1539.81,7.774767,37.78,5.154834,14.2,8.9
8033,6037533603,City of Bell,0.046178,12.050000,23.100000,952.70,7.663912,46.57,7.548068,17.8,9.6


In [227]:
# Group tracts by neighborhood
group = ces_df.groupby(['neighborhood'])

# Calculate mean of neighborhoods (but not tract values)
grouped_df = group[[c for c in ces_df.columns.to_list() if c != 'tract']].mean()

# Create list of tracts within neighborhood
tract_lists = group['tract'].apply(list)
grouped_df['tract_lists'] = tract_lists
grouped_df.to_pickle('CES_neighborhood_df.pkl')
grouped_df

Unnamed: 0_level_0,ozone,pm,diesel,traffic_density,pollution_score,asthma_rate,population_char_score,children_percent,elderly_percent,tract_lists
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
,0.044214,11.525000,33.667970,759.405625,5.535631,36.124375,2.958006,6.750000,13.356250,"[6037297601, 6037577504, 6037621326, 603762132..."
City of Agoura Hills,0.051865,9.536303,7.881444,1200.050000,4.629887,19.893333,2.959783,10.833333,11.266667,"[6037800332, 6037800327, 6037800324]"
City of Alhambra,0.049204,11.525000,22.743278,1382.278500,6.823814,31.803500,4.868749,9.980000,14.590000,"[6037481606, 6037481605, 6037480304, 603748080..."
City of Arcadia,0.051412,10.485542,12.843261,1135.541818,5.588927,16.549091,2.984220,9.972727,16.245455,"[6037430801, 6037430724, 6037430701, 603743170..."
City of Artesia,0.044312,11.210000,23.684496,1311.766667,5.672172,34.370000,5.959942,11.666667,12.766667,"[6037554802, 6037554900, 6037554801]"
...,...,...,...,...,...,...,...,...,...,...
Unincorporated - White Fence Farms,0.062163,6.182866,6.014267,737.770000,4.550852,38.490000,4.557277,10.600000,12.800000,[6037910202]
Unincorporated - Whittier,0.046178,12.050000,21.301501,1307.120000,6.867540,63.520000,4.339597,10.000000,13.950000,"[6037501501, 6037500300]"
Unincorporated - Whittier Narrows,0.046178,12.050000,22.470000,1920.210000,8.811509,54.750000,5.523925,13.000000,13.600000,[6037433802]
Unincorporated - Willowbrook,0.044312,12.050000,21.942599,1351.411429,7.358743,85.874286,8.034911,17.928571,7.214286,"[6037541500, 6037541400, 6037540400, 603754070..."
