In [None]:
import pandas as pd
import numpy as np
import networkx as nx
from scipy.stats import zscore
from glob import glob
from itertools import product

import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import shapely
from shapely.ops import unary_union
from shapely.geometry import mapping, Polygon
from src.grid_generation import *

In [None]:
# Month of data to load (unable to read more than one more due to pc memory bandwidth)
YEAR = '2011'
MONTH = '05'
MINUTES_WINDOW_SIZE = '15Min'

In [None]:
data = pd.read_parquet(f"data/new_data/yellow_tripdata_{YEAR}-{MONTH}.parquet")
data

Unnamed: 0,VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,RatecodeID,store_and_fwd_flag,PULocationID,DOLocationID,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,congestion_surcharge,airport_fee
0,2,2011-05-01 00:29:00,2011-05-01 00:40:00,1,1.48,1,,148,231,2,7.7,0.5,0.5,0.0,0.0,0.0,8.7,,
1,2,2011-05-01 00:25:00,2011-05-01 00:38:00,1,1.63,1,,132,132,1,8.5,0.5,0.5,2.0,0.0,0.0,11.5,,
2,1,2011-05-01 00:03:25,2011-05-01 00:11:34,2,1.10,1,N,79,107,1,6.1,0.5,0.5,1.0,0.0,0.0,8.1,,
3,1,2011-05-01 00:14:54,2011-05-01 00:27:25,1,2.80,1,N,234,141,1,9.7,0.5,0.5,1.5,0.0,0.0,12.2,,
4,1,2011-05-01 00:33:50,2011-05-01 00:42:05,1,1.70,1,N,237,140,1,6.9,0.5,0.5,1.0,0.0,0.0,8.9,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15554863,1,2011-05-31 23:20:37,2011-05-31 23:27:08,1,1.00,1,N,48,161,2,5.7,0.5,0.5,0.0,0.0,0.0,6.7,,
15554864,1,2011-05-31 23:09:57,2011-06-01 00:00:31,1,13.70,1,N,186,241,2,38.1,0.5,0.5,0.0,0.0,0.0,39.1,,
15554865,1,2011-05-31 23:08:16,2011-05-31 23:12:42,1,1.70,1,N,161,263,2,5.7,0.5,0.5,0.0,0.0,0.0,6.7,,
15554866,1,2011-05-31 23:28:36,2011-05-31 23:32:00,1,0.80,1,N,75,151,1,4.5,0.5,0.5,1.1,0.0,0.0,6.6,,


In [None]:
# Join gdf geometry to data according to PULocationID
manhattan_gdf = gpd.read_file('data/new_data/taxi_zones/taxi_zones.shp')
manhattan_gdf = manhattan_gdf[manhattan_gdf.borough == 'Manhattan']

manhattan_gdf["PULocationID"] = manhattan_gdf.LocationID
data = manhattan_gdf.merge(data, on='PULocationID', how='inner')
data

Unnamed: 0,OBJECTID,Shape_Leng,Shape_Area,zone,LocationID,borough,geometry,PULocationID,VendorID,tpep_pickup_datetime,...,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,congestion_surcharge,airport_fee
0,4,0.043567,0.000112,Alphabet City,4,Manhattan,"POLYGON ((992073.467 203714.076, 992068.667 20...",4,2,2011-05-01 00:39:00,...,1,8.9,0.5,0.5,2.00,0.0,0.0,11.90,,
1,4,0.043567,0.000112,Alphabet City,4,Manhattan,"POLYGON ((992073.467 203714.076, 992068.667 20...",4,1,2011-05-01 00:45:27,...,1,8.1,0.5,0.5,1.82,0.0,0.0,10.92,,
2,4,0.043567,0.000112,Alphabet City,4,Manhattan,"POLYGON ((992073.467 203714.076, 992068.667 20...",4,2,2011-05-01 00:20:00,...,2,11.7,0.5,0.5,0.00,0.0,0.0,12.70,,
3,4,0.043567,0.000112,Alphabet City,4,Manhattan,"POLYGON ((992073.467 203714.076, 992068.667 20...",4,1,2011-05-01 00:45:14,...,2,4.9,0.5,0.5,0.00,0.0,0.0,5.90,,
4,4,0.043567,0.000112,Alphabet City,4,Manhattan,"POLYGON ((992073.467 203714.076, 992068.667 20...",4,2,2011-05-01 00:54:00,...,2,19.3,0.5,0.5,0.00,0.0,0.0,20.30,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14044492,263,0.037017,0.000066,Yorkville West,263,Manhattan,"POLYGON ((997493.323 220912.386, 997355.264 22...",263,1,2011-05-31 23:51:55,...,2,8.5,0.5,0.5,0.00,0.0,0.0,9.50,,
14044493,263,0.037017,0.000066,Yorkville West,263,Manhattan,"POLYGON ((997493.323 220912.386, 997355.264 22...",263,2,2011-05-31 23:53:00,...,2,6.1,0.5,0.5,0.00,0.0,0.0,7.10,,
14044494,263,0.037017,0.000066,Yorkville West,263,Manhattan,"POLYGON ((997493.323 220912.386, 997355.264 22...",263,2,2011-05-31 23:51:00,...,2,4.5,0.5,0.5,0.00,0.0,0.0,5.50,,
14044495,263,0.037017,0.000066,Yorkville West,263,Manhattan,"POLYGON ((997493.323 220912.386, 997355.264 22...",263,1,2011-05-31 23:34:12,...,1,22.5,0.5,0.5,4.70,0.0,0.0,28.20,,


In [None]:
# Change pickup and dropoff datetime to datetime format
data.tpep_pickup_datetime = pd.to_datetime(data["tpep_pickup_datetime"])
data.tpep_dropoff_datetime = pd.to_datetime(data["tpep_dropoff_datetime"])

In [None]:
# Extract year & month
data["y_m"] = pd.to_datetime(data["tpep_pickup_datetime"]).dt.strftime('%Y-%m')

In [None]:
# Filter date
mask = (data['y_m'] >= data['y_m'].head(1).values[0]) & (data['y_m'] <= data['y_m'].tail(1).values[0])
data = data.loc[mask]

In [None]:
# Get date only
data["y_m_d"] = pd.to_datetime(data["tpep_pickup_datetime"]).dt.strftime('%Y-%m-%d')

In [None]:
# Extract hour of a day for PU (12AM = 0, 11PM = 23)
data["hour"] = data.tpep_pickup_datetime.dt.hour

In [None]:
# Label rows according to 15 mins window
data["mins"] = data.tpep_pickup_datetime.dt.floor(MINUTES_WINDOW_SIZE).dt.minute

In [None]:
# Concat PULocationID with DOLocation
data['p2p'] = data[["PULocationID", "DOLocationID"]].astype(str).agg('_'.join, axis=1)

In [None]:
data.head()

Unnamed: 0,OBJECTID,Shape_Leng,Shape_Area,zone,LocationID,borough,geometry,PULocationID,VendorID,tpep_pickup_datetime,...,tolls_amount,improvement_surcharge,total_amount,congestion_surcharge,airport_fee,y_m,y_m_d,hour,mins,p2p
0,4,0.043567,0.000112,Alphabet City,4,Manhattan,"POLYGON ((992073.467 203714.076, 992068.667 20...",4,2,2011-05-01 00:39:00,...,0.0,0.0,11.9,,,2011-05,2011-05-01,0,30,4_68
1,4,0.043567,0.000112,Alphabet City,4,Manhattan,"POLYGON ((992073.467 203714.076, 992068.667 20...",4,1,2011-05-01 00:45:27,...,0.0,0.0,10.92,,,2011-05,2011-05-01,0,45,4_231
2,4,0.043567,0.000112,Alphabet City,4,Manhattan,"POLYGON ((992073.467 203714.076, 992068.667 20...",4,2,2011-05-01 00:20:00,...,0.0,0.0,12.7,,,2011-05,2011-05-01,0,15,4_68
3,4,0.043567,0.000112,Alphabet City,4,Manhattan,"POLYGON ((992073.467 203714.076, 992068.667 20...",4,1,2011-05-01 00:45:14,...,0.0,0.0,5.9,,,2011-05,2011-05-01,0,45,4_79
4,4,0.043567,0.000112,Alphabet City,4,Manhattan,"POLYGON ((992073.467 203714.076, 992068.667 20...",4,2,2011-05-01 00:54:00,...,0.0,0.0,20.3,,,2011-05,2011-05-01,0,45,4_41


In [None]:
zone_to_loc = data.groupby('PULocationID')['zone'].apply(list).reset_index()
zone_to_loc['a'] = zone_to_loc['zone'].apply(lambda x: x[0])
zone_to_loc.drop('zone', axis=1, inplace=True)
zone_to_loc.rename(columns={'a': 'zone'}, inplace=True)
zone_to_loc

Unnamed: 0,PULocationID,zone
0,4,Alphabet City
1,12,Battery Park
2,13,Battery Park City
3,24,Bloomingdale
4,41,Central Harlem
...,...,...
62,246,West Chelsea/Hudson Yards
63,249,West Village
64,261,World Trade Center
65,262,Yorkville East


In [None]:
zone_to_loc.to_csv('zone_to_loc.csv', index=False, header=True)

In [None]:
desired_rows = zone_to_loc[zone_to_loc['zone'] == 'Yorkville East']
desired_rows

Unnamed: 0,PULocationID,zone
65,262,Yorkville East


In [None]:
desired_rows = zone_to_loc[zone_to_loc['PULocationID'] == 50]
desired_rows

Unnamed: 0,PULocationID,zone
9,50,Clinton West


In [None]:
a = data['zone'].unique()
a

array(['Alphabet City', 'Battery Park', 'Battery Park City',
       'Bloomingdale', 'Central Harlem', 'Central Harlem North',
       'Central Park', 'Chinatown', 'Clinton East', 'Clinton West',
       'East Chelsea', 'East Harlem North', 'East Harlem South',
       'East Village', 'Financial District North',
       'Financial District South', 'Flatiron', 'Garment District',
       "Governor's Island/Ellis Island/Liberty Island", 'Gramercy',
       'Greenwich Village North', 'Greenwich Village South',
       'Hamilton Heights', 'Highbridge Park', 'Hudson Sq', 'Inwood',
       'Inwood Hill Park', 'Kips Bay', 'Lenox Hill East',
       'Lenox Hill West', 'Lincoln Square East', 'Lincoln Square West',
       'Little Italy/NoLiTa', 'Lower East Side', 'Manhattan Valley',
       'Manhattanville', 'Marble Hill', 'Meatpacking/West Village West',
       'Midtown Center', 'Midtown East', 'Midtown North', 'Midtown South',
       'Morningside Heights', 'Murray Hill',
       'Penn Station/Madison Sq W

In [None]:
zone_coordinates = {
    "Alphabet City": [(40.724968, -73.979102)],
    "Battery Park" : [(40.703552, -74.015918)],
    # 3 centres for large rectangle area
    "Battery Park City": [(40.706855, -74.017698), (40.710027, -74.016389),
     (40.714321, -74.015188)],
    "Bloomingdale": [(40.799797, -73.970068)],
    "Central Harlem": [(40.805117, -73.951074)],
    "Central Harlem North": [(40.819928, -73.940088)],
    # 2 centres for large rectangle area
    'Central Park': [(40.775342, -73.970470), (40.790225, -73.959741)],
    "Chinatown": [(40.715642, -73.996344)],
    # 2 centres for large rectangle area
    "Clinton East": [(40.757928, -73.992929), (40.765441, -73.987742)],
    "Clinton West": [(40.768362, -73.992886), (40.761504, -73.997821)],
    "East Chelsea": [(40.752011, -74.000439)],
    "East Harlem North": [(40.802156, -73.936367)],
    "East Harlem South": [(40.792019, -73.942546)],
    "East Village": [(40.727608, -73.985308)],
    "Financial District North": [(40.708320, -74.007667)],
    "Financial District South": [(40.704497, -74.010800)],
    "Flatiron": [(40.739214, -73.995774)],
    'Garment District': [(40.753808, -73.991441)],
    "Governor's Island/Ellis Island/Liberty Island": [(40.689822, -74.045559)],
    "Gramercy": [(40.735716, -73.984016)],
    "Greenwich Village North": [(40.734220, -73.994230)],
    "Greenwich Village South": [(40.729423, -73.996311)],
    "Hamilton Heights": [(40.825438, -73.949041)],
    "Highbridge Park": [(40.852908, -73.926095)],
    "Hudson Sq": [(40.726767, -74.007417)],
    "Inwood": [(40.867053, -73.917225)],
    "Inwood Hill Park": [(40.871953, -73.926237)],
    "Kips Bay": [(40.741597, -73.978170)],
    "Lenox Hill East": [(40.764617, -73.958364)],
    "Lenox Hill West": [(40.765535, -73.962494)],
    'Lincoln Square East': [(40.769943, -73.984406)],
    "Lincoln Square West": [(40.771634, -73.990414)],
    'Little Italy/NoLiTa': [(40.718843, -73.997602)],
    "Lower East Side": [(40.719302, -73.990713)],
    "Manhattan Valley": [(40.799244, -73.966724)],
    "Manhattanville": [(40.817577, -73.956842)],
    "Marble Hill": [(40.876735, -73.910451)],
    'Meatpacking/West Village West': [(40.734799, -74.008364)],
    "Midtown Center": [(40.759885, -73.976234)],
    "Midtown East": [(40.756667, -73.972372)],
    "Midtown North": [(40.764923, -73.978508)],
    "Midtown South": [(40.750384, -73.987303)],
    'Morningside Heights': [(40.809989, -73.962228)],
    "Murray Hill": [(40.749232, -73.978640)],
    'Penn Station/Madison Sq West': [(40.749581, -73.993049)],
    'Randalls Island': [(40.786749, -73.928162), (40.792565, -73.922283)],
    'Roosevelt Island': [(40.767494, -73.945048), (40.763009, -73.949125), (40.757547, -73.954060), (40.753321, -73.958266)],
    "Seaport": [(40.708404, -74.003457)],
    "SoHo": [(40.723807, -74.000636)],
    'Stuy Town/Peter Cooper Village': [(40.731306, -73.977166)],
    'Sutton Place/Turtle Bay North': [(40.757765, -73.961724)],
    'Times Sq/Theatre District': [(40.759328, -73.984615)],
    'TriBeCa/Civic Center': [(40.717598, -74.008964)],
    'Two Bridges/Seward Park': [(40.712459, -73.992083)],
    'UN/Turtle Bay South': [(40.755534, -73.968118)],
    'Union Sq': [(40.736288, -73.990207)],
    'Upper East Side North': [(40.782082, -73.955628)],
    'Upper East Side South': [(40.771032, -73.963867)],
    'Upper West Side North': [(40.790465, -73.973137)],
    'Upper West Side South': [(40.783121, -73.978287)],
    'Washington Heights North': [(40.856715, -73.932648)],
    'Washington Heights South': [(40.844866, -73.939042)],
    'West Chelsea/Hudson Yards': [(40.753829, -74.004824)],
    "West Village": [(40.734677, -74.002314)],
    'World Trade Center': [(40.709210, -74.013117)],
    "Yorkville East": [(40.776598, -73.946608)],
    "Yorkville West": [(40.777410, -73.950599)]
}

print(len(zone_coordinates))

67


In [None]:
import csv
import json
import googlemaps
import pprint
import pandas as pd

GOOGLE_MAP_KEY = "AIzaSyDYZpo0Doedq7oHlVrwlXH_pqL6I62yeVQ"

gmaps = googlemaps.Client(key=GOOGLE_MAP_KEY)

# school: schools start and end at set times
# food: people eat at set times
# leisure: people do leisure things usually after work
# attractions: people usially visit attractions in weekends
# transport: people usually go to airport in holidays, and trains when they need to commute
# ad_hoc: any time of day
# religious: more popular during festivals
features = {
    "school": ["school", "primary_school", "secondary_school", "university"],
    "food": ["bar", "cafe", "bakery", "meal_delivery", "meal_takeaway", "restaurant"],
    "leisure": ["beauty_salon", "bicycle_store", "book_store", "bowling_alley", "gym",
                "hair_care", "library", "liquor_store", "clothing_store", "department_store",
                "furniture_store", "lodging", "movie_theater", "night_club",
                "park", "shopping_mall", "spa", "stadium", "supermarket"],
    "attractions": ["amusement_park", "aquarium", "art_gallery", "casino", "museum",
                    "tourist_attraction", "zoo"],
    "transport": ["airport", "bus_station", "light_rail_station", "parking",
                  "subway_station", "taxi_stand", "train_station", "transit_station"],
    "adhoc": ["atm", "bank", "car_dealer", "convenience_store", "courthouse",
              "dentist", "doctor", "drugstore", "electrician", "electronics_store",
              "embassy", "fire_Station", "florist", "funeral_home", "gas_station",
              "hardware_store", "home_goods_store", "hospital", "insurance_agency",
              "jewelry_store", "laundry", "lawyer", "local_government_office", "locksmith",
              "moving_company", "painter", "pet_store", "pharmacy",
              "plumber", "police", "post_office", "real_estate_agency", "roofing_contractor",
              "shoe_store", "store", "storage", "travel_agency", "veterinary_care"],
    "religious": ["church", "cemetery", "hindu_temple", "mosque", "synagogue"]
}

def create_feature_template():
    feature_template = {'zone': None}
    for category, types in features.items():
        feature_template[category] = 0
        for type in types:
            category_name = category+"_"+type
            feature_template[category_name] = 0
    return feature_template

headers = create_feature_template().keys()

zone_features = pd.DataFrame(columns=headers)

for zone_name, coordinates in zone_coordinates.items():
    feature_template = create_feature_template()
    feature_template['zone'] = zone_name
    for coordinate in coordinates:
        long, lat = coordinate[1], coordinate[0]
        c = str(lat)+","+str(long)
        places_result = gmaps.places_nearby(location=c, radius=600, open_now=False)
        for nearby_place in places_result["results"]:
            types = nearby_place["types"]
            for t in types:
                for category, types in features.items():
                    if t in types:
                        feature_template[category] = 1
                        category_name = category+"_"+t
                        feature_template[category_name] = 1
    zone_features.loc[len(zone_features.index)] = list(feature_template.values())

zone_features

Unnamed: 0,zone,school,school_school,school_primary_school,school_secondary_school,school_university,food,food_bar,food_cafe,food_bakery,...,adhoc_store,adhoc_storage,adhoc_travel_agency,adhoc_veterinary_care,religious,religious_church,religious_cemetery,religious_hindu_temple,religious_mosque,religious_synagogue
0,Alphabet City,0,0,0,0,0,1,1,1,1,...,1,0,0,0,0,0,0,0,0,0
1,Battery Park,1,0,0,0,1,1,1,0,0,...,1,0,0,0,0,0,0,0,0,0
2,Battery Park City,1,1,0,1,1,1,1,0,0,...,1,0,0,0,1,1,0,0,0,0
3,Bloomingdale,1,1,0,0,0,1,1,0,1,...,1,0,0,0,0,0,0,0,0,0
4,Central Harlem,1,1,0,0,0,1,1,1,0,...,1,0,0,0,1,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62,West Chelsea/Hudson Yards,0,0,0,0,0,1,1,0,0,...,1,0,0,0,0,0,0,0,0,0
63,West Village,0,0,0,0,0,1,1,0,1,...,1,0,0,0,0,0,0,0,0,0
64,World Trade Center,0,0,0,0,0,0,0,0,0,...,1,0,0,0,1,1,0,0,0,0
65,Yorkville East,1,1,1,1,0,1,0,0,1,...,1,0,0,0,0,0,0,0,0,0


In [None]:
zone_features1 = pd.merge(zone_to_loc, zone_features, on='zone', how="inner")

In [None]:
zone_features1

Unnamed: 0,PULocationID,zone,school,school_school,school_primary_school,school_secondary_school,school_university,food,food_bar,food_cafe,...,adhoc_store,adhoc_storage,adhoc_travel_agency,adhoc_veterinary_care,religious,religious_church,religious_cemetery,religious_hindu_temple,religious_mosque,religious_synagogue
0,4,Alphabet City,0,0,0,0,0,1,1,1,...,1,0,0,0,0,0,0,0,0,0
1,12,Battery Park,1,0,0,0,1,1,1,0,...,1,0,0,0,0,0,0,0,0,0
2,13,Battery Park City,1,1,0,1,1,1,1,0,...,1,0,0,0,1,1,0,0,0,0
3,24,Bloomingdale,1,1,0,0,0,1,1,0,...,1,0,0,0,0,0,0,0,0,0
4,41,Central Harlem,1,1,0,0,0,1,1,1,...,1,0,0,0,1,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62,246,West Chelsea/Hudson Yards,0,0,0,0,0,1,1,0,...,1,0,0,0,0,0,0,0,0,0
63,249,West Village,0,0,0,0,0,1,1,0,...,1,0,0,0,0,0,0,0,0,0
64,261,World Trade Center,0,0,0,0,0,0,0,0,...,1,0,0,0,1,1,0,0,0,0
65,262,Yorkville East,1,1,1,1,0,1,0,0,...,1,0,0,0,0,0,0,0,0,0


In [None]:
zones_with_school = zone_features1.loc[zone_features1['school'] == 1, ['PULocationID', 'zone','school', 'school_school', 'school_primary_school', 'school_secondary_school', 'school_university']]
zones_with_school

Unnamed: 0,PULocationID,zone,school,school_school,school_primary_school,school_secondary_school,school_university
1,12,Battery Park,1,0,0,0,1
2,13,Battery Park City,1,1,0,1,1
3,24,Bloomingdale,1,1,0,0,0
4,41,Central Harlem,1,1,0,0,0
5,42,Central Harlem North,1,1,0,1,0
9,50,Clinton West,1,1,0,1,0
11,74,East Harlem North,1,1,1,0,1
12,75,East Harlem South,1,1,0,1,0
16,90,Flatiron,1,1,0,0,1
20,113,Greenwich Village North,1,1,0,0,1


In [None]:
zones_with_pri_school = zone_features1.loc[zone_features1['school_primary_school'] == 1, ['PULocationID', 'zone','school', 'school_school', 'school_primary_school', 'school_secondary_school', 'school_university']]
zones_with_pri_school

Unnamed: 0,PULocationID,zone,school,school_school,school_primary_school,school_secondary_school,school_university
11,74,East Harlem North,1,1,1,0,1
22,116,Hamilton Heights,1,1,1,1,1
46,202,Roosevelt Island,1,1,1,1,1
56,236,Upper East Side North,1,1,1,1,0
58,238,Upper West Side North,1,1,1,0,0
65,262,Yorkville East,1,1,1,1,0


In [None]:
zones_with_sec_school = zone_features1.loc[zone_features1['school_secondary_school'] == 1, ['PULocationID', 'zone','school', 'school_school', 'school_primary_school', 'school_secondary_school', 'school_university']]
zones_with_sec_school

Unnamed: 0,PULocationID,zone,school,school_school,school_primary_school,school_secondary_school,school_university
2,13,Battery Park City,1,1,0,1,1
5,42,Central Harlem North,1,1,0,1,0
9,50,Clinton West,1,1,0,1,0
12,75,East Harlem South,1,1,0,1,0
22,116,Hamilton Heights,1,1,1,1,1
23,120,Highbridge Park,1,1,0,1,0
29,141,Lenox Hill West,1,1,0,1,1
31,143,Lincoln Square West,1,1,0,1,1
35,152,Manhattanville,1,1,0,1,1
36,153,Marble Hill,1,1,0,1,0


In [None]:
zones_with_uni = zone_features1.loc[zone_features1['school_university'] == 1, ['PULocationID', 'zone','school', 'school_school', 'school_primary_school', 'school_secondary_school', 'school_university']]
zones_with_uni

Unnamed: 0,PULocationID,zone,school,school_school,school_primary_school,school_secondary_school,school_university
1,12,Battery Park,1,0,0,0,1
2,13,Battery Park City,1,1,0,1,1
11,74,East Harlem North,1,1,1,0,1
16,90,Flatiron,1,1,0,0,1
20,113,Greenwich Village North,1,1,0,0,1
21,114,Greenwich Village South,1,0,0,0,1
22,116,Hamilton Heights,1,1,1,1,1
28,140,Lenox Hill East,1,0,0,0,1
29,141,Lenox Hill West,1,1,0,1,1
30,142,Lincoln Square East,1,0,0,0,1


In [None]:
zones_with_food = zone_features1.loc[zone_features1['food'] == 1, ['PULocationID', 'zone', "food", "food_bar", "food_cafe", "food_bakery", "food_meal_delivery", "food_meal_takeaway", "food_restaurant"]]
zones_with_food

Unnamed: 0,PULocationID,zone,food,food_bar,food_cafe,food_bakery,food_meal_delivery,food_meal_takeaway,food_restaurant
0,4,Alphabet City,1,1,1,1,0,1,1
1,12,Battery Park,1,1,0,0,0,0,1
2,13,Battery Park City,1,1,0,0,1,1,1
3,24,Bloomingdale,1,1,0,1,0,1,1
4,41,Central Harlem,1,1,1,0,1,0,1
5,42,Central Harlem North,1,0,1,0,1,1,1
6,43,Central Park,1,1,1,0,0,0,1
7,45,Chinatown,1,1,1,1,0,0,1
9,50,Clinton West,1,1,0,0,0,1,1
10,68,East Chelsea,1,1,0,0,0,0,1


In [None]:
zones_with_food.shape

(55, 9)

In [None]:
zones_with_attractions = zone_features1.loc[zone_features1['attractions'] == 1, ['PULocationID', 'zone', "attractions", "attractions_amusement_park", "attractions_aquarium", "attractions_art_gallery", "attractions_casino", "attractions_museum", "attractions_tourist_attraction", "attractions_zoo"]]
zones_with_attractions

Unnamed: 0,PULocationID,zone,attractions,attractions_amusement_park,attractions_aquarium,attractions_art_gallery,attractions_casino,attractions_museum,attractions_tourist_attraction,attractions_zoo
1,12,Battery Park,1,0,0,0,0,1,1,0
2,13,Battery Park City,1,0,0,0,0,1,1,0
4,41,Central Harlem,1,0,0,0,0,1,1,0
5,42,Central Harlem North,1,0,0,0,0,0,1,0
6,43,Central Park,1,0,0,0,0,1,1,0
9,50,Clinton West,1,0,0,0,0,1,1,0
14,87,Financial District North,1,0,0,0,0,0,1,0
15,88,Financial District South,1,0,0,0,0,1,1,0
16,90,Flatiron,1,0,0,0,0,1,0,0
18,103,Governor's Island/Ellis Island/Liberty Island,1,0,0,0,0,1,1,0


In [None]:
zones_with_religious = zone_features1.loc[zone_features1['religious'] == 1, ['PULocationID', 'zone', "religious", "religious_church", "religious_cemetery", "religious_hindu_temple", "religious_mosque", "religious_synagogue"]]
zones_with_religious

Unnamed: 0,PULocationID,zone,religious,religious_church,religious_cemetery,religious_hindu_temple,religious_mosque,religious_synagogue
2,13,Battery Park City,1,1,0,0,0,0
4,41,Central Harlem,1,1,0,0,0,0
5,42,Central Harlem North,1,1,0,0,0,0
11,74,East Harlem North,1,1,0,0,0,0
15,88,Financial District South,1,1,0,0,0,0
22,116,Hamilton Heights,1,1,0,0,0,0
23,120,Highbridge Park,1,1,0,0,0,0
35,152,Manhattanville,1,1,0,0,0,1
36,153,Marble Hill,1,1,0,0,0,0
42,166,Morningside Heights,1,1,0,0,0,0


### Test with lower radius = 100m

In [None]:
import csv
import json
import googlemaps
import pprint
import pandas as pd

GOOGLE_MAP_KEY = "AIzaSyDYZpo0Doedq7oHlVrwlXH_pqL6I62yeVQ"

gmaps = googlemaps.Client(key=GOOGLE_MAP_KEY)

# school: schools start and end at set times
# food: people eat at set times
# leisure: people do leisure things usually after work
# attractions: people usially visit attractions in weekends
# transport: people usually go to airport in holidays, and trains when they need to commute
# ad_hoc: any time of day
# religious: more popular during festivals
features = {
    "school": ["school", "primary_school", "secondary_school", "university"],
    "food": ["bar", "cafe", "bakery", "meal_delivery", "meal_takeaway", "restaurant"],
    "leisure": ["beauty_salon", "bicycle_store", "book_store", "bowling_alley", "gym", "hair_care", "library", "liquor_store", "clothing_store", "department_store", "furniture_store", "lodging", "movie_theater", "night_club", "park", "shopping_mall", "spa", "stadium", "supermarket"],
    "attractions": ["amusement_park", "aquarium", "art_gallery", "casino", "museum", "tourist_attraction", "zoo"],
    "transport": ["airport", "bus_station", "light_rail_station", "parking", "subway_station", "taxi_stand", "train_station", "transit_station"],
    "adhoc": ["atm", "bank", "car_dealer", "convenience_store", "courthouse", "dentist", "doctor", "drugstore", "electrician", "electronics_store", "embassy", "fire_Station", "florist", "funeral_home", "gas_station", "hardware_store", "home_goods_store", "hospital", "insurance_agency", "jewelry_store", "laundry", "lawyer", "local_government_office", "locksmith", "moving_company", "painter", "pet_store", "pharmacy", "plumber", "police", "post_office", "real_estate_agency", "roofing_contractor", "shoe_store", "store", "storage", "travel_agency", "veterinary_care"],
    "religious": ["church", "cemetery", "hindu_temple", "mosque", "synagogue"]
}

def create_feature_template():
    feature_template = {'zone': None}
    for category, types in features.items():
        feature_template[category] = 0
        for type in types:
            category_name = category+"_"+type
            feature_template[category_name] = 0
    return feature_template

headers = create_feature_template().keys()

zone_features = pd.DataFrame(columns=headers)

for zone_name, coordinates in zone_coordinates.items():
    feature_template = create_feature_template()
    feature_template['zone'] = zone_name
    for coordinate in coordinates:
        long, lat = coordinate[1], coordinate[0]
        c = str(lat)+","+str(long)
        places_result = gmaps.places_nearby(location=c, radius=100, open_now=False)
        for nearby_place in places_result["results"]:
            types = nearby_place["types"]
            for t in types:
                for category, types in features.items():
                    if t in types:
                        feature_template[category] = 1
                        category_name = category+"_"+t
                        feature_template[category_name] = 1
    zone_features.loc[len(zone_features.index)] = list(feature_template.values())

zone_features

Unnamed: 0,zone,school,school_school,school_primary_school,school_secondary_school,school_university,food,food_bar,food_cafe,food_bakery,...,adhoc_store,adhoc_storage,adhoc_travel_agency,adhoc_veterinary_care,religious,religious_church,religious_cemetery,religious_hindu_temple,religious_mosque,religious_synagogue
0,Alphabet City,0,0,0,0,0,1,1,0,0,...,1,0,0,0,1,1,0,0,0,0
1,Battery Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Battery Park City,1,1,0,0,0,1,1,1,1,...,1,0,0,0,1,1,0,0,0,0
3,Bloomingdale,1,1,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,Central Harlem,1,1,0,0,0,1,0,0,0,...,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62,West Chelsea/Hudson Yards,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
63,West Village,0,0,0,0,0,1,1,1,0,...,1,0,0,0,0,0,0,0,0,0
64,World Trade Center,1,1,0,1,0,1,1,0,0,...,1,0,0,0,0,0,0,0,0,0
65,Yorkville East,0,0,0,0,0,1,0,1,0,...,1,0,1,0,0,0,0,0,0,0


In [None]:
zone_features1 = pd.merge(zone_to_loc, zone_features, on='zone', how="inner")
zone_features1.to_csv('zone_features.csv', index=False, header=True)

In [None]:
zones_with_school = zone_features1.loc[zone_features1['school'] == 1, ['PULocationID', 'zone','school', 'school_school', 'school_primary_school', 'school_secondary_school', 'school_university']]
zones_with_school

Unnamed: 0,PULocationID,zone,school,school_school,school_primary_school,school_secondary_school,school_university
2,13,Battery Park City,1,1,0,0,0
3,24,Bloomingdale,1,1,0,0,0
4,41,Central Harlem,1,1,0,0,0
8,48,Clinton East,1,0,0,0,1
12,75,East Harlem South,1,1,0,1,0
17,100,Garment District,1,1,0,0,0
20,113,Greenwich Village North,1,0,0,0,1
21,114,Greenwich Village South,1,1,0,0,1
22,116,Hamilton Heights,1,1,0,0,0
27,137,Kips Bay,1,1,1,1,0


In [None]:
zones_with_school.to_csv('zones_with_school.csv', index=False, header=True)

In [None]:
zones_with_pri_school = zone_features1.loc[zone_features1['school_primary_school'] == 1, ['PULocationID', 'zone','school', 'school_school', 'school_primary_school', 'school_secondary_school', 'school_university']]
zones_with_pri_school

Unnamed: 0,PULocationID,zone,school,school_school,school_primary_school,school_secondary_school,school_university
27,137,Kips Bay,1,1,1,1,0
53,232,Two Bridges/Seward Park,1,1,1,0,0


In [None]:
zones_with_pri_school.to_csv('zones_with_school_pri.csv', index=False, header=True)

In [None]:
zones_with_sec_school = zone_features1.loc[zone_features1['school_secondary_school'] == 1, ['PULocationID', 'zone','school', 'school_school', 'school_primary_school', 'school_secondary_school', 'school_university']]
zones_with_sec_school

Unnamed: 0,PULocationID,zone,school,school_school,school_primary_school,school_secondary_school,school_university
12,75,East Harlem South,1,1,0,1,0
27,137,Kips Bay,1,1,1,1,0
56,236,Upper East Side North,1,1,0,1,0
64,261,World Trade Center,1,1,0,1,0


In [None]:
zones_with_sec_school.to_csv('zones_with_school_sec.csv', index=False, header=True)

In [None]:
zones_with_uni = zone_features1.loc[zone_features1['school_university'] == 1, ['PULocationID', 'zone','school', 'school_school', 'school_primary_school', 'school_secondary_school', 'school_university']]
zones_with_uni

Unnamed: 0,PULocationID,zone,school,school_school,school_primary_school,school_secondary_school,school_university
8,48,Clinton East,1,0,0,0,1
20,113,Greenwich Village North,1,0,0,0,1
21,114,Greenwich Village South,1,1,0,0,1
30,142,Lincoln Square East,1,1,0,0,1
41,164,Midtown South,1,0,0,0,1
42,166,Morningside Heights,1,0,0,0,1
58,238,Upper West Side North,1,1,0,0,1
61,244,Washington Heights South,1,1,0,0,1


In [None]:
zones_with_uni.to_csv('zones_with_school_uni.csv', index=False, header=True)

In [None]:
zones_with_food = zone_features1.loc[zone_features1['food'] == 1, ['PULocationID', 'zone', "food", "food_bar", "food_cafe", "food_bakery", "food_meal_delivery", "food_meal_takeaway", "food_restaurant"]]
zones_with_food

Unnamed: 0,PULocationID,zone,food,food_bar,food_cafe,food_bakery,food_meal_delivery,food_meal_takeaway,food_restaurant
0,4,Alphabet City,1,1,0,0,0,0,1
2,13,Battery Park City,1,1,1,1,1,1,1
4,41,Central Harlem,1,0,0,0,0,0,1
5,42,Central Harlem North,1,0,0,0,0,0,1
7,45,Chinatown,1,1,0,0,0,0,1
8,48,Clinton East,1,1,0,1,0,1,1
9,50,Clinton West,1,1,1,0,0,1,1
13,79,East Village,1,1,1,0,1,0,1
14,87,Financial District North,1,0,1,0,0,0,0
15,88,Financial District South,1,1,0,0,0,0,1


In [None]:
zones_with_food.to_csv('zones_with_food.csv', index=False, header=True)

In [None]:
zones_with_food_bar = zone_features1.loc[zone_features1['food_bar'] == 1, ['PULocationID', 'zone', "food", "food_bar", "food_cafe", "food_bakery", "food_meal_delivery", "food_meal_takeaway", "food_restaurant"]]
zones_with_food_bar

Unnamed: 0,PULocationID,zone,food,food_bar,food_cafe,food_bakery,food_meal_delivery,food_meal_takeaway,food_restaurant
0,4,Alphabet City,1,1,0,0,0,0,1
2,13,Battery Park City,1,1,1,1,1,1,1
7,45,Chinatown,1,1,0,0,0,0,1
8,48,Clinton East,1,1,0,1,0,1,1
9,50,Clinton West,1,1,1,0,0,1,1
13,79,East Village,1,1,1,0,1,0,1
15,88,Financial District South,1,1,0,0,0,0,1
17,100,Garment District,1,1,0,0,0,0,1
24,125,Hudson Sq,1,1,0,0,0,0,1
25,127,Inwood,1,1,0,0,0,0,1


In [None]:
zones_with_food_bar.to_csv('zones_with_food_bar.csv', index=False, header=True)

In [None]:
zones_with_food_morning = zone_features1.loc[(zone_features1['food_cafe'] == 1) | (zone_features1['food_bakery'] == 1), ['PULocationID', 'zone', "food", "food_bar", "food_cafe", "food_bakery", "food_meal_delivery", "food_meal_takeaway", "food_restaurant"]]
zones_with_food_morning

Unnamed: 0,PULocationID,zone,food,food_bar,food_cafe,food_bakery,food_meal_delivery,food_meal_takeaway,food_restaurant
2,13,Battery Park City,1,1,1,1,1,1,1
8,48,Clinton East,1,1,0,1,0,1,1
9,50,Clinton West,1,1,1,0,0,1,1
13,79,East Village,1,1,1,0,1,0,1
14,87,Financial District North,1,0,1,0,0,0,0
21,114,Greenwich Village South,1,0,1,0,0,0,0
28,140,Lenox Hill East,1,0,1,1,1,0,1
29,141,Lenox Hill West,1,0,0,1,0,0,1
30,142,Lincoln Square East,1,0,1,0,0,0,1
32,144,Little Italy/NoLiTa,1,1,1,1,0,1,1


In [None]:
zones_with_food_morning.to_csv('zones_with_food_morning.csv', index=False, header=True)

In [None]:
zones_with_food_restaurant = zone_features1.loc[zone_features1['food_restaurant'] == 1, ['PULocationID', 'zone', "food", "food_bar", "food_cafe", "food_bakery", "food_meal_delivery", "food_meal_takeaway", "food_restaurant"]]
zones_with_food_restaurant

Unnamed: 0,PULocationID,zone,food,food_bar,food_cafe,food_bakery,food_meal_delivery,food_meal_takeaway,food_restaurant
0,4,Alphabet City,1,1,0,0,0,0,1
2,13,Battery Park City,1,1,1,1,1,1,1
4,41,Central Harlem,1,0,0,0,0,0,1
5,42,Central Harlem North,1,0,0,0,0,0,1
7,45,Chinatown,1,1,0,0,0,0,1
8,48,Clinton East,1,1,0,1,0,1,1
9,50,Clinton West,1,1,1,0,0,1,1
13,79,East Village,1,1,1,0,1,0,1
15,88,Financial District South,1,1,0,0,0,0,1
16,90,Flatiron,1,0,0,0,1,1,1


In [None]:
zones_with_food_restaurant.to_csv('zones_with_food_restaurant.csv', index=False, header=True)

In [None]:
zones_with_attractions = zone_features1.loc[zone_features1['attractions'] == 1, ['PULocationID', 'zone', "attractions", "attractions_amusement_park", "attractions_aquarium", "attractions_art_gallery", "attractions_casino", "attractions_museum", "attractions_tourist_attraction", "attractions_zoo"]]
zones_with_attractions

Unnamed: 0,PULocationID,zone,attractions,attractions_amusement_park,attractions_aquarium,attractions_art_gallery,attractions_casino,attractions_museum,attractions_tourist_attraction,attractions_zoo
0,4,Alphabet City,1,0,0,1,0,0,1,0
1,12,Battery Park,1,0,0,0,0,1,1,0
2,13,Battery Park City,1,0,0,0,0,0,1,0
7,45,Chinatown,1,0,0,0,0,0,1,0
8,48,Clinton East,1,0,0,1,0,0,0,0
9,50,Clinton West,1,0,0,1,0,0,0,0
12,75,East Harlem South,1,0,0,0,0,0,1,0
13,79,East Village,1,0,0,0,0,1,0,0
14,87,Financial District North,1,0,0,0,0,0,1,0
18,103,Governor's Island/Ellis Island/Liberty Island,1,0,0,0,0,0,1,0


In [None]:
zones_with_attractions.to_csv('zones_with_attractions.csv', index=False, header=True)

In [None]:
zones_with_religious = zone_features1.loc[zone_features1['religious'] == 1, ['PULocationID', 'zone', "religious", "religious_church", "religious_cemetery", "religious_hindu_temple", "religious_mosque", "religious_synagogue"]]
zones_with_religious

Unnamed: 0,PULocationID,zone,religious,religious_church,religious_cemetery,religious_hindu_temple,religious_mosque,religious_synagogue
0,4,Alphabet City,1,1,0,0,0,0
2,13,Battery Park City,1,1,0,0,0,0
20,113,Greenwich Village North,1,1,0,0,0,0
28,140,Lenox Hill East,1,1,0,0,0,0
30,142,Lincoln Square East,1,1,0,0,0,0
31,143,Lincoln Square West,1,1,0,0,0,0
34,151,Manhattan Valley,1,1,0,0,0,1
42,166,Morningside Heights,1,1,0,0,0,0
53,232,Two Bridges/Seward Park,1,1,0,0,0,0
61,244,Washington Heights South,1,1,0,0,0,0


In [None]:
zones_with_religious.to_csv('zones_with_religious.csv', index=False, header=True)