In [273]:
import pandas as pd
import geopandas as gpd
import numpy as np
import json

In [353]:
requests = pd.read_csv("requests.csv")
requests.head()

Unnamed: 0,Timestamp,Requested_Pickup,Requested_Dropoff,Actual_Pickup,Actual_Dropoff
0,1380949200,1149,725,218,3601
1,1380949200,1225,1714,2003,3601
2,1380949200,1237,3143,1314,2218
3,1380949200,1323,3566,1330,2573
4,1380949200,1328,2466,1323,2473


In [354]:
requests['Hour'] = requests['Timestamp'].apply(lambda x: pd.Timestamp(x,unit='s',tz='America/New_York').hour)

In [355]:
requests['Serving'] = requests['Actual_Dropoff'].apply(lambda x: 0 if x ==-1 else 1 )

In [356]:
requests.head()

Unnamed: 0,Timestamp,Requested_Pickup,Requested_Dropoff,Actual_Pickup,Actual_Dropoff,Hour,Serving
0,1380949200,1149,725,218,3601,1,1
1,1380949200,1225,1714,2003,3601,1,1
2,1380949200,1237,3143,1314,2218,1,1
3,1380949200,1323,3566,1330,2573,1,1
4,1380949200,1328,2466,1323,2473,1,1


In [357]:
unserving = requests[requests['Serving']==0]

In [358]:
unserving.head()

Unnamed: 0,Timestamp,Requested_Pickup,Requested_Dropoff,Actual_Pickup,Actual_Dropoff,Hour,Serving
688,1380949380,1225,1279,-1,-1,1,0
689,1380949380,1260,1999,-1,-1,1,0
692,1380949380,1387,1034,-1,-1,1,0
694,1380949380,1389,2569,-1,-1,1,0
709,1380949380,2013,1998,-1,-1,1,0


In [359]:
pick_up_location_by_time = unserving.groupby(['Hour','Requested_Pickup']).count()['Timestamp'].reset_index()

In [360]:
pick_up_location_by_time.rename(columns={'Timestamp':'Count'}, inplace=True)
pick_up_location_by_time.head()

Unnamed: 0,Hour,Requested_Pickup,Count
0,0,0,1
1,0,2,5
2,0,3,4
3,0,4,1
4,0,6,7


In [361]:
geomanhattan = gpd.read_file('manhattan.geojson')
geomanhattan.count()

id           4411
w            9625
geometry    14036
dtype: int64

In [362]:
geomanhattan.dropna(subset=['id'], inplace=True)

In [363]:
allpoint= pd.DataFrame({'id' : np.tile(list(range(4411)), 24),'hour':np.tile(list(range(24)),4411)})

In [364]:
pick_up_location_by_time_all = pd.merge(allpoint,pick_up_location_by_time, 
                                        how='left',left_on=['id','hour'],right_on=['Requested_Pickup','Hour'])

In [365]:
pick_up_location_by_time_all = pick_up_location_by_time_all[['id','hour','Count']].fillna(0)
pick_up_location_by_time_all.head()

Unnamed: 0,id,hour,Count
0,0,0,1.0
1,1,1,0.0
2,2,2,0.0
3,3,3,0.0
4,4,4,0.0


In [366]:
#pick_up_location_by_time.to_json(r'pick_up_location_by_time.json',orient='split')

In [367]:
#pick_up_location_by_time.to_csv(r'pick_up_location_by_time.csv')


In [368]:
servingrate = pd.read_csv("servingrate.csv")
servingrate.head()

Unnamed: 0.1,Unnamed: 0,Hour,Total,Serving,rate
0,0,0,19313,6748,0.349402
1,1,1,10052,7297,0.725925
2,2,2,6070,5464,0.900165
3,3,3,3725,3443,0.924295
4,4,4,2567,2310,0.899883


In [369]:
pick_up_location_by_time = pick_up_location_by_time_all.merge(servingrate, left_on='hour',right_on='Hour')
pick_up_location_by_time['rate1']= 1- pick_up_location_by_time['rate']
pick_up_location_by_time.head()

Unnamed: 0.1,id,hour,Count,Unnamed: 0,Hour,Total,Serving,rate,rate1
0,0,0,1.0,0,0,19313,6748,0.349402,0.650598
1,24,0,49.0,0,0,19313,6748,0.349402,0.650598
2,48,0,0.0,0,0,19313,6748,0.349402,0.650598
3,72,0,0.0,0,0,19313,6748,0.349402,0.650598
4,96,0,0.0,0,0,19313,6748,0.349402,0.650598


In [275]:
records = []
for key, grp in pick_up_location_by_time.groupby(['Hour','rate1']):
    records.append({
        "hour": int(key[0]),
        "rate": float(key[1]),
        "perID": {
            row.id: row.Count for row in grp.itertuples()
        }})

In [276]:
with open('pick_up_location_by_time.json', 'w',encoding='utf-8') as f:
     json.dump(records, f, ensure_ascii=False, indent=4)

# Aggregate to Zipcode 

In [65]:
geomanhattan = gpd.read_file('manhattan.geojson')
geozip = gpd.read_file('nyc_zip.geojson')

In [66]:
geomanhattan.dropna(subset=['id'], inplace=True)

In [67]:
zipid = gpd.sjoin(geomanhattan,geozip)

In [68]:
zipid.head()

Unnamed: 0,id,w,geometry,index_right,zipcode
0,0.0,,POINT (-73.9975944 40.7140611),187,10013
1,1.0,,POINT (-73.9980743 40.7137811),187,10013
28,28.0,,POINT (-74.00596609999999 40.7203782),187,10013
29,29.0,,POINT (-74.00641950000001 40.7204244),187,10013
30,30.0,,POINT (-74.006055 40.7244231),187,10013


In [80]:
zipcode_id = zipid[['id','zipcode','geometry']]
zipcode_id.head()

Unnamed: 0,id,zipcode,geometry
0,0.0,10013,POINT (-73.9975944 40.7140611)
1,1.0,10013,POINT (-73.9980743 40.7137811)
28,28.0,10013,POINT (-74.00596609999999 40.7203782)
29,29.0,10013,POINT (-74.00641950000001 40.7204244)
30,30.0,10013,POINT (-74.006055 40.7244231)


In [83]:
unserving = unserving.merge(zipcode_id,left_on='Requested_Pickup',right_on='id')

In [85]:
pick_up_location_by_time_zipcode = unserving.groupby(['Hour','zipcode']).count()['id'].reset_index()

In [97]:
pick_up_location_by_time_zipcode.rename(columns={'id':'Count'}, inplace=True)
pick_up_location_by_time_zipcode.head()


Unnamed: 0,Hour,zipcode,Count
0,0,83,85
1,0,10001,1280
2,0,10002,430
3,0,10003,1422
4,0,10004,21


In [91]:
nyc_zip = list(geozip['zipcode'])

In [96]:
allpoint2= pd.DataFrame({'zipcode' : np.tile(nyc_zip, 24),'hour':np.tile(list(range(24)),263)})

In [98]:
pick_up_location_by_time_zipcode_all = pd.merge(allpoint2,pick_up_location_by_time_zipcode, 
                                        how='left',left_on=['zipcode','hour'],right_on=['zipcode','Hour'])

In [100]:
pick_up_location_by_time_zipcode_all = pick_up_location_by_time_zipcode_all[['zipcode','hour','Count']].fillna(0)
pick_up_location_by_time_zipcode_all.head()

Unnamed: 0,zipcode,hour,Count
0,10471,0,0.0
1,10463,1,0.0
2,10475,2,0.0
3,10464,3,0.0
4,11222,4,0.0


In [277]:
pick_up_location_by_time_zipcode = pick_up_location_by_time_zipcode_all.merge(servingrate, left_on='hour',right_on='Hour')
pick_up_location_by_time_zipcode['rate1']= 1- pick_up_location_by_time_zipcode['rate']
pick_up_location_by_time_zipcode.head()


Unnamed: 0.1,zipcode,hour,Count,Unnamed: 0,Hour,Total,Serving,rate,rate1
0,10471,0,0.0,0,0,19313,6748,0.349402,0.650598
1,11101,0,0.0,0,0,19313,6748,0.349402,0.650598
2,11105,0,0.0,0,0,19313,6748,0.349402,0.650598
3,11691,0,0.0,0,0,19313,6748,0.349402,0.650598
4,10279,0,0.0,0,0,19313,6748,0.349402,0.650598


In [278]:
records = []
for key, grp in pick_up_location_by_time_zipcode.groupby(['Hour','rate1']):
    records.append({
        "hour": int(key[0]),
        "rate": float(key[1]),
        "zipcode": {
            row.zipcode: row.Count for row in grp.itertuples()
        }})

In [279]:
with open('pick_up_location_by_time_zipcode.json', 'w',encoding='utf-8') as f:
     json.dump(records, f, ensure_ascii=False, indent=4)

In [260]:
shapefile = gpd.GeoDataFrame.from_file('ZIP_CODE_040114.shp')
shapefile.head()

Unnamed: 0,ZIPCODE,BLDGZIP,PO_NAME,POPULATION,AREA,STATE,COUNTY,ST_FIPS,CTY_FIPS,URL,SHAPE_AREA,SHAPE_LEN,geometry
0,11436,0,Jamaica,18681.0,22699300.0,NY,Queens,36,81,http://www.usps.com/,0.0,0.0,"POLYGON ((1038098.251871482 188138.3800067157,..."
1,11213,0,Brooklyn,62426.0,29631000.0,NY,Kings,36,47,http://www.usps.com/,0.0,0.0,"POLYGON ((1001613.712964058 186926.4395172149,..."
2,11212,0,Brooklyn,83866.0,41972100.0,NY,Kings,36,47,http://www.usps.com/,0.0,0.0,"POLYGON ((1011174.275535807 183696.33770971, 1..."
3,11225,0,Brooklyn,56527.0,23698630.0,NY,Kings,36,47,http://www.usps.com/,0.0,0.0,"POLYGON ((995908.3654508889 183617.6128015518,..."
4,11218,0,Brooklyn,72280.0,36868800.0,NY,Kings,36,47,http://www.usps.com/,0.0,0.0,"POLYGON ((991997.1134308875 176307.4958601296,..."


In [261]:
manhattan_zip = shapefile.merge(geozip,left_on='ZIPCODE', right_on='zipcode')

In [262]:
manhattan_zip = manhattan_zip[manhattan_zip['COUNTY']=='New York']

In [272]:
manhattan_zip.head()

Unnamed: 0_level_0,zipcode,geometry_y
zipcode,Unnamed: 1_level_1,Unnamed: 2_level_1
10463,10463,"(POLYGON ((-73.920646 40.887237, -73.920376 40..."
10463,10463,"(POLYGON ((-73.915441 40.875591, -73.915435 40..."
10034,10034,"(POLYGON ((-73.920621 40.873001, -73.920579 40..."
10033,10033,"(POLYGON ((-73.932131 40.86945, -73.93186 40.8..."
10040,10040,"(POLYGON ((-73.933789 40.863072, -73.933671 40..."


In [264]:
manhattan_zip = manhattan_zip[['zipcode','geometry_y']]

In [265]:
manhattan_zip.index = manhattan_zip['zipcode']

In [270]:
manhattan_zipcode = gpd.GeoSeries(manhattan_zip['geometry_y'])

In [271]:
manhattan_zipcode.to_file("manhattan_zip.geojson", driver='GeoJSON')