In [1]:
import pandas as pd
import json
from urllib.request import urlopen
import requests
from shapely.geometry import shape, Point
import pickle

#### Vision Zero GeoJSON data

In [2]:
arterial_slowzone = 'http://www.nyc.gov/html/dot/downloads/misc/arterial_slow_zones.json'
bike_priority_district = 'http://www.nyc.gov/html/dot/downloads/misc/bike_priority_districts.json'
enhanced_crossing = 'http://www.nyc.gov/html/dot/downloads/misc/enhanced_crossings.json'
leading_pedestrian_interval = 'http://www.nyc.gov/html/dot/downloads/misc/leading_pedestrian_interval_signals.json'
left_turn_calming = 'http://www.nyc.gov/html/dot/downloads/misc/left_turn_traffic_calming.json'
neighborhood_slowzones = 'http://www.nyc.gov/html/dot/downloads/misc/neighborhood_slow_zones.json'
safe_streets_seniors = 'http://www.nyc.gov/html/dot/downloads/misc/safe_streets_for_seniors.json'
speed_humps = 'http://www.nyc.gov/html/dot/downloads/misc/speed_humps.json'
signal_timing = 'http://www.nyc.gov/html/dot/downloads/misc/signal_timing.json'

#### Loading Sample Data

In [3]:
#pedestrian = pd.read_csv('../Data/pedestrian_1000.csv')
#collision = pd.read_csv('../Data/collision_1000.csv')

pedestrian = pd.read_csv('../Data/pedestrian.csv')

In [4]:
#Define some error threshold (if a point is less than this threshold, then consider it)
#0.0001
threshold = 1e-4

#### Define Methods

In [5]:
def loadGeoJson(url):
    response = urlopen(url)
    js = json.loads(response.read())
    return js

In [6]:
def existsInGeoJson(js, longitude, latitude, threshold):
    point = Point(longitude, latitude)
    # check each polygon to see if it contains the point
    found = False
    for feature in js['features']:
        polygon = shape(feature['geometry'])
        if polygon.distance(point) < threshold:
            #print ('Found containing polygon:', feature)
            found = True
            break
    return found

In [7]:
#If long, lat falls near the shape, then its in slowzone region.
def pointExists(row, threshold, jsondata):
    retvalue = 0
    if(existsInGeoJson(jsondata, row['LONGITUDE'], row['LATITUDE'], threshold)):
        retvalue = 1
    else:
        retvalue = 0
    return retvalue

Load GeoJSON

In [8]:
arterial_slowzone_JS = loadGeoJson(arterial_slowzone)
bike_priority_district_JS = loadGeoJson(bike_priority_district)

enhanced_crossing_JS = loadGeoJson(enhanced_crossing)
leading_pedestrian_interval_JS = loadGeoJson(leading_pedestrian_interval)
left_turn_calming_JS = loadGeoJson(left_turn_calming)
neighborhood_slowzones_JS = loadGeoJson(neighborhood_slowzones)
safe_streets_seniors_JS = loadGeoJson(safe_streets_seniors)
speed_humps_JS = loadGeoJson(speed_humps)
signal_timing_JS = loadGeoJson(signal_timing)

Augment dataset with new variables

In [9]:
pedestrian['arterial_SZ']=0
pedestrian['bike_PD']=0
pedestrian['enh_crossing']=0
pedestrian['lead_ped']=0
pedestrian['left_turn']=0
pedestrian['neighborhood_SZ']=0
pedestrian['safe_Streets']=0
pedestrian['speed_hump']=0
pedestrian['signal_timing']=0

In [10]:
from time import gmtime, strftime

In [11]:
strftime("%Y-%m-%d %H:%M:%S", gmtime())

'2018-04-15 16:49:21'

In [12]:
pedestrian['arterial_SZ'] = pedestrian.apply(pointExists, axis=1, threshold = threshold, jsondata=arterial_slowzone_JS )
pedestrian['bike_PD'] = pedestrian.apply(pointExists, axis=1, threshold = threshold, jsondata=bike_priority_district_JS )

pedestrian['enh_crossing'] = pedestrian.apply(pointExists, axis=1, threshold = threshold, jsondata=enhanced_crossing_JS )
pedestrian['lead_ped'] = pedestrian.apply(pointExists, axis=1, threshold = threshold, jsondata=leading_pedestrian_interval_JS )
pedestrian['left_turn'] = pedestrian.apply(pointExists, axis=1, threshold = threshold, jsondata=left_turn_calming_JS )
pedestrian['neighborhood_SZ'] = pedestrian.apply(pointExists, axis=1, threshold = threshold, jsondata=neighborhood_slowzones_JS )
pedestrian['safe_Streets'] = pedestrian.apply(pointExists, axis=1, threshold = threshold, jsondata=safe_streets_seniors_JS )
pedestrian['speed_hump'] = pedestrian.apply(pointExists, axis=1, threshold = threshold, jsondata=speed_humps_JS )
pedestrian['signal_timing'] = pedestrian.apply(pointExists, axis=1, threshold = threshold, jsondata=signal_timing_JS )

In [13]:
strftime("%Y-%m-%d %H:%M:%S", gmtime())

'2018-04-15 19:58:57'

In [14]:
pedestrian.head()

Unnamed: 0.1,Unnamed: 0,DATE,TIME,BOROUGH,ZIP.CODE,LATITUDE,LONGITUDE,LOCATION,ON.STREET.NAME,CROSS.STREET.NAME,...,VEHICLE.TYPE.CODE.5,arterial_SZ,bike_PD,enh_crossing,lead_ped,left_turn,neighborhood_SZ,safe_Streets,speed_hump,signal_timing
0,16,02/27/2018,10:13,BROOKLYN,11226.0,40.64269,-73.95764,"(40.64269, -73.95764)",CLARENDON ROAD,FLATBUSH AVENUE,...,,1,1,0,0,0,0,1,0,1
1,42,02/27/2018,10:56,,,40.8475,-73.86696,"(40.8475, -73.86696)",RHINELANDER AVENUE,WHITE PLAINS ROAD,...,,0,0,0,0,0,0,0,0,1
2,50,02/27/2018,11:08,QUEENS,11432.0,40.7052,-73.79926,"(40.7052, -73.79926)",,,...,,0,0,0,0,0,0,0,1,0
3,57,02/27/2018,11:30,BROOKLYN,11201.0,40.695232,-73.98326,"(40.695232, -73.98326)",,,...,,0,0,0,0,0,0,0,0,0
4,71,02/27/2018,12:03,BROOKLYN,11236.0,40.645027,-73.91998,"(40.645027, -73.91998)",CLARENDON ROAD,RALPH AVENUE,...,,0,1,0,0,0,0,1,0,1


In [15]:
pedestrian.to_pickle('pedestrians_variables.pkl')