In [1]:
import time
import pandas as pd
import numpy as np

import geopandas as gpd
from shapely.geometry import Point, Polygon

import warnings
warnings.filterwarnings('ignore')

import datetime as dt 
today = dt.datetime.today()

In [2]:
blocks = gpd.read_file('data/Census_Blocks__2010.geojson')
blocks = blocks[['GEOID', 'BLOCK', 'BLKGRP', 'P0010001', 'SqMiles', 'geometry']]
blks = blocks.set_index('GEOID')
print("Number of Census blocks: "+str(blocks.shape[0]))

Number of Census blocks: 6507


## Parks

In [3]:
!wget https://opendata.arcgis.com/datasets/287eaa2ecbff4d699762bbc6795ffdca_9.geojson
!mv 287eaa2ecbff4d699762bbc6795ffdca_9.geojson data/Parks_and_Recreation_Areas.geojson

--2018-07-11 21:05:51--  https://opendata.arcgis.com/datasets/287eaa2ecbff4d699762bbc6795ffdca_9.geojson
Resolving opendata.arcgis.com (opendata.arcgis.com)... 52.5.5.235, 52.1.111.28
Connecting to opendata.arcgis.com (opendata.arcgis.com)|52.5.5.235|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/json]
Saving to: ‘287eaa2ecbff4d699762bbc6795ffdca_9.geojson’

287eaa2ecbff4d69976     [ <=>                ] 567.32K  --.-KB/s    in 0.03s   

2018-07-11 21:05:51 (20.2 MB/s) - ‘287eaa2ecbff4d699762bbc6795ffdca_9.geojson’ saved [580933]



### Pull and recode data

In [4]:
dcgeo = gpd.read_file('data/Parks_and_Recreation_Areas.geojson')
cols = ['NAME', 'ACTIVE', 'DMPSTR', 'DRINKFOUNT', 'FENCE', 'PLYGRD', 'OUTSWIM', 'TOPO', 'VSTRSHCAN', 'WOODLAND', 'geometry']
dcgeo = dcgeo[cols]
dcgeo['park'] = dcgeo.ACTIVE.map({'Y': 1})
dcgeo['park_dumpster'] = pd.to_numeric(dcgeo.DMPSTR)
dcgeo['park_drinkfount'] = pd.to_numeric(dcgeo.DRINKFOUNT)
dcgeo['park_playground'] = pd.to_numeric(dcgeo.PLYGRD)
dcgeo['park_outdoorpool'] = pd.to_numeric(dcgeo.OUTSWIM)
dcgeo['park_trashcan'] = pd.to_numeric(dcgeo.VSTRSHCAN)
dcgeo['park_woodland'] = dcgeo.WOODLAND.map({'Y': 1})
dcgeo = dcgeo.drop(['NAME', 'ACTIVE', 'DMPSTR', 'DRINKFOUNT', 'FENCE', 'PLYGRD', 'OUTSWIM', 'TOPO', 'VSTRSHCAN', 'WOODLAND'], axis=1)

In [5]:
## Spatial join points to Census block polygons
print ('Started spatial join at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime()) )
geo_df = gpd.sjoin(blocks, dcgeo, how='left', op='intersects')
print ('Finished spatial join at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime()) )

Started spatial join at Wed, 11 Jul 2018 21:05:53 +0000
Finished spatial join at Wed, 11 Jul 2018 21:05:54 +0000


In [6]:
## Aggregate to single unique Census block (GEOID)
cols = dcgeo.drop(['geometry'], axis=1).columns
data = geo_df.groupby('GEOID')[cols].sum()
blks = blks.merge(data, how='left', left_index=True, right_index=True)

## Impervious Surfaces

In [7]:
dcgeo = pd.read_csv('data/impervious_surfaces.csv.gz').set_index('GEOID')
dcgeo.index = dcgeo.index.astype('str')
blks = blks.merge(dcgeo, how='left', left_index=True, right_index=True)

## Community Gardens

In [8]:
!wget https://opendata.arcgis.com/datasets/a82537b01c2141558ba5e9e13224d395_4.geojson
!mv a82537b01c2141558ba5e9e13224d395_4.geojson data/Community_Gardens.geojson

--2018-07-11 21:05:55--  https://opendata.arcgis.com/datasets/a82537b01c2141558ba5e9e13224d395_4.geojson
Resolving opendata.arcgis.com (opendata.arcgis.com)... 52.1.111.28, 52.5.5.235
Connecting to opendata.arcgis.com (opendata.arcgis.com)|52.1.111.28|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/json]
Saving to: ‘a82537b01c2141558ba5e9e13224d395_4.geojson’

a82537b01c2141558ba     [ <=>                ]  54.91K  --.-KB/s    in 0.007s  

2018-07-11 21:05:55 (7.81 MB/s) - ‘a82537b01c2141558ba5e9e13224d395_4.geojson’ saved [56230]



In [9]:
print ('Started pulling data at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime()) )
dcgeo = gpd.read_file('data/Community_Gardens.geojson')
dcgeo = dcgeo[dcgeo.geometry.isnull()==False]
print ('Finished pulling data at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime()) )
dcgeo = dcgeo[['OBJECTID', 'SHAPE_Area', 'geometry']]
dcgeo.columns = ['communitygarden_id', 'communitygarden_area', 'geometry']

Started pulling data at Wed, 11 Jul 2018 21:05:56 +0000
Finished pulling data at Wed, 11 Jul 2018 21:05:56 +0000


In [10]:
## Spatial join points to Census block polygons
print ('Started spatial join at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime())) 
geo_df = gpd.sjoin(blocks, dcgeo, how='left', op='intersects')
print ('Finished spatial join at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime())) 

Started spatial join at Wed, 11 Jul 2018 21:05:56 +0000
Finished spatial join at Wed, 11 Jul 2018 21:05:57 +0000


In [11]:
cols = dcgeo.drop(['geometry'], axis=1).columns
data = geo_df.groupby('GEOID')[cols].sum()
blks = blks.merge(data, how='left', left_index=True, right_index=True)

## Well

In [12]:
!wget https://opendata.arcgis.com/datasets/84fdf39aaa3a4e75ba9e7a167577daa8_41.geojson
!mv 84fdf39aaa3a4e75ba9e7a167577daa8_41.geojson data/DC_Well_Permits.geojson

--2018-07-11 21:05:58--  https://opendata.arcgis.com/datasets/84fdf39aaa3a4e75ba9e7a167577daa8_41.geojson
Resolving opendata.arcgis.com (opendata.arcgis.com)... 52.5.5.235, 52.1.111.28
Connecting to opendata.arcgis.com (opendata.arcgis.com)|52.5.5.235|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/json]
Saving to: ‘84fdf39aaa3a4e75ba9e7a167577daa8_41.geojson’

84fdf39aaa3a4e75ba9     [ <=>                ] 906.49K  --.-KB/s    in 0.03s   

2018-07-11 21:05:58 (25.7 MB/s) - ‘84fdf39aaa3a4e75ba9e7a167577daa8_41.geojson’ saved [928242]



In [13]:
print ('Started pulling data at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime()) )
dcgeo = gpd.read_file('data/DC_Well_Permits.geojson')
dcgeo = dcgeo[dcgeo.geometry.isnull()==False]
print ('Finished pulling data at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime()) )
dcgeo = dcgeo[['ESRI_OID', 'geometry']]
dcgeo['well_activity'] = 1*(dcgeo['ESRI_OID'].isnull()==False)

Started pulling data at Wed, 11 Jul 2018 21:05:59 +0000
Finished pulling data at Wed, 11 Jul 2018 21:05:59 +0000


In [14]:
## Spatial join points to Census block polygons
print ('Started spatial join at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime()) )
geo_df = gpd.sjoin(blocks, dcgeo, how='left', op='intersects')
print ('Finished spatial join at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime()) )

Started spatial join at Wed, 11 Jul 2018 21:05:59 +0000
Finished spatial join at Wed, 11 Jul 2018 21:06:00 +0000


In [15]:
cols = dcgeo.drop(['geometry'], axis=1).columns
data = geo_df.groupby('GEOID')[cols].sum()
blks = blks.merge(data, how='left', left_index=True, right_index=True)

## Alley Maintenance Inventory

In [16]:
!wget https://opendata.arcgis.com/datasets/a71e92b9ffa14362999f6b4a4c89f66b_10.geojson
!mv a71e92b9ffa14362999f6b4a4c89f66b_10.geojson data/Alley_Maintenance_Inventory.geojson

--2018-07-11 21:06:01--  https://opendata.arcgis.com/datasets/a71e92b9ffa14362999f6b4a4c89f66b_10.geojson
Resolving opendata.arcgis.com (opendata.arcgis.com)... 52.1.111.28, 52.5.5.235
Connecting to opendata.arcgis.com (opendata.arcgis.com)|52.1.111.28|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/json]
Saving to: ‘a71e92b9ffa14362999f6b4a4c89f66b_10.geojson’

a71e92b9ffa14362999     [ <=>                ]  10.00M  --.-KB/s    in 0.1s    

2018-07-11 21:06:02 (69.6 MB/s) - ‘a71e92b9ffa14362999f6b4a4c89f66b_10.geojson’ saved [10485542]



In [17]:
print ('Started pulling data at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime())) 
dcgeo = gpd.read_file('data/Alley_Maintenance_Inventory.geojson')
dcgeo = dcgeo[dcgeo.geometry.isnull()==False]
dcgeo = dcgeo[dcgeo.ALLEY_ID.isnull()==False]
print ('Finished pulling data at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime()) )

Started pulling data at Wed, 11 Jul 2018 21:06:03 +0000
Finished pulling data at Wed, 11 Jul 2018 21:06:05 +0000


In [18]:
dcgeo = dcgeo[['CATCH_BASINS', 'CONDITION', 'ENDMEASURE', 'HISTORIC', 'LENGTH', 'LOW_POINTS', 'MATERIAL',
              'LIGHTS', 'geometry']]
dcgeo.columns = ['alley_catchbasin', 'alley_condition', 'alley_endmeasure', 'alley_historic', 
                 'alley_length', 'alley_lowpoints', 'alley_material', 'alley_lights', 'geometry']

In [19]:
## Spatial join points to Census block polygons
print ('Started spatial join at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime()) )
geo_df = gpd.sjoin(blocks, dcgeo, how='left', op='intersects')
print ('Finished spatial join at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime()) )

Started spatial join at Wed, 11 Jul 2018 21:06:05 +0000
Finished spatial join at Wed, 11 Jul 2018 21:06:07 +0000


In [20]:
cols = dcgeo.drop(['geometry'], axis=1).columns
data = geo_df.groupby('GEOID')[cols].sum()
blks = blks.merge(data, how='left', left_index=True, right_index=True)

## Sidewalk Grates

In [21]:
!wget https://opendata.arcgis.com/datasets/dabde2b2dc88453ea569c180f7305baa_5.geojson
!mv dabde2b2dc88453ea569c180f7305baa_5.geojson data/Sidewalk_Grates.geojson

--2018-07-11 21:06:08--  https://opendata.arcgis.com/datasets/dabde2b2dc88453ea569c180f7305baa_5.geojson
Resolving opendata.arcgis.com (opendata.arcgis.com)... 52.5.5.235, 52.1.111.28
Connecting to opendata.arcgis.com (opendata.arcgis.com)|52.5.5.235|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/json]
Saving to: ‘dabde2b2dc88453ea569c180f7305baa_5.geojson’

dabde2b2dc88453ea56     [ <=>                ]   2.58M  --.-KB/s    in 0.06s   

2018-07-11 21:06:08 (43.7 MB/s) - ‘dabde2b2dc88453ea569c180f7305baa_5.geojson’ saved [2710030]



In [22]:
print ('Started pulling data at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime()) )
dcgeo = gpd.read_file('data/Sidewalk_Grates.geojson')
dcgeo = dcgeo[dcgeo.geometry.isnull()==False]
print ('Finished pulling data at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime()) )
dcgeo = dcgeo[['CAPTUREACTION', 'geometry']]
dcgeo['sidewalk_grates'] = 1*(dcgeo.CAPTUREACTION.isnull()==False)
dcgeo = dcgeo.drop('CAPTUREACTION', axis=1)

Started pulling data at Wed, 11 Jul 2018 21:06:09 +0000
Finished pulling data at Wed, 11 Jul 2018 21:06:10 +0000


In [23]:
## Spatial join points to Census block polygons
print ('Started spatial join at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime()) )
geo_df = gpd.sjoin(blocks, dcgeo, how='left', op='intersects')
print ('Finished spatial join at '+time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime()) )

Started spatial join at Wed, 11 Jul 2018 21:06:10 +0000
Finished spatial join at Wed, 11 Jul 2018 21:06:12 +0000


In [24]:
cols = dcgeo.drop(['geometry'], axis=1).columns
data = geo_df.groupby('GEOID')[cols].sum()
blks = blks.merge(data, how='left', left_index=True, right_index=True)
blks = blks.drop(['BLOCK', 'BLKGRP', 'P0010001', 'SqMiles', 'geometry', 'ESRI_OID'], axis=1)
blks.index.value_counts().head()

110010076051009    1
110010022012004    1
110010075022002    1
110010095042017    1
110010010012006    1
Name: GEOID, dtype: int64

## Push to csv

In [25]:
blks.to_csv('data/env_features.csv.gz', compression = 'gzip')