In [22]:
import json
import datetime
import sys
import time
import pyspark
from operator import add
import numpy as np
import matplotlib.path as mplPath

In [23]:
def indexZones(shapeFilename):
    import rtree
    import fiona.crs
    import geopandas as gpd
    index = rtree.Rtree()
    zones = gpd.read_file(shapeFilename).to_crs(fiona.crs.from_epsg(2263))
    for idx,geometry in enumerate(zones.geometry):
        index.insert(idx, geometry.bounds)
    return (index, zones)

def findBlock(p, index, zones):
    match = index.intersection((p.x, p.y, p.x, p.y))
    for idx in match:
        z = mplPath.Path(np.array(zones.geometry[idx].exterior))
        
        if z.contains_point(np.array(p)):
            return zones['OBJECTID'][idx]
    return -1

In [25]:
def mapToZone(stations):
    import pyproj
    import shapely.geometry as geom
    proj = pyproj.Proj(init="epsg:2263", preserve_units=True)    
    index, zones = indexZones('block-groups-polygons.geojson')
    for station in stations[1:]:
        station = station.strip('').split(',')
        location = geom.Point(proj(station[5], station[4]))
        zone = findBlock(location, index, zones)
        yield (station[1], zone)

stations = sc.textFile("datasets/citibike_station.csv")
output = mapToZone(stations.collect()) # station, blockid
li = list(output)

In [28]:
import json
coordinatesList = []
with open ('block-groups-polygons.geojson') as dataFile:
    blockData = json.load(dataFile)
for k,v in li:
    for block in blockData['features']:
        if v == block['properties']['OBJECTID']:
            coordinatesList.append(block['geometry']['coordinates'])
            #print (k,v,block['geometry']['coordinates']) 

coordinatesList

[[[[-73.98996, 40.767188],
   [-73.99042, 40.766561],
   [-73.99088, 40.765934],
   [-73.99133, 40.76531],
   [-73.99179, 40.764684],
   [-73.99234, 40.764919],
   [-73.994631, 40.765881],
   [-73.99649, 40.766685],
   [-73.99603, 40.767303],
   [-73.99563, 40.767954],
   [-73.994688, 40.769219],
   [-73.9928, 40.768382],
   [-73.9908, 40.76754],
   [-73.98996, 40.767188]]],
 [[[-74.00796, 40.717122],
   [-74.00838, 40.716587],
   [-74.008823, 40.716014],
   [-74.0092, 40.716188],
   [-74.00911, 40.71692],
   [-74.00906, 40.717265],
   [-74.009, 40.717598],
   [-74.008943, 40.717925],
   [-74.00888, 40.718267],
   [-74.008819, 40.718642],
   [-74.00875, 40.719029],
   [-74.0087, 40.719296],
   [-74.00858, 40.719952],
   [-74.00847, 40.72061],
   [-74.008346, 40.721322],
   [-74.00823, 40.721966],
   [-74.008117, 40.722627],
   [-74.008, 40.723283],
   [-74.00797, 40.723416],
   [-74.00791, 40.7237],
   [-74.00763, 40.723497],
   [-74.00635, 40.722587],
   [-74.00591, 40.722253],
   [-7