In [36]:
import pandas as pd
import os
import numpy as np
import pandas_profiling

# This line is needed to display plots inline in Jupyter Notebook
%matplotlib inline

# Required for basic python plotting functionality
import matplotlib.pyplot as plt

# Required for formatting dates later in the case
import datetime
import matplotlib.dates as mdates

# Required to display image inline
from IPython.display import Image

# Advanced plotting functionality with seaborn
import seaborn as sns
sns.set(style="whitegrid") # can set style depending on how you'd like it to look

import folium  #needed for interactive map
from folium.plugins import HeatMap

import branca

import geopandas
from shapely.geometry import Polygon
from shapely.geometry import Point
from geopandas import GeoDataFrame

plotsize = (15,6)

In [37]:
path = 'Dataset/Parquets'
yellow_trips = pd.read_parquet(path + '/nyc_bouroughs_trips_yellow.parquet', engine = 'pyarrow')
yellow_trips.drop(['borough', 'nta_name', 'shape_area','county_fips','shape_leng','boro_code','nta_code','under_5_years','5-9_years','10-14_years','15-19_years','20-24_years','25-29_years','30-34_years','35-39_years','40-44_years','45-49_years','50-54_years','55-59_years','60-64_years','over_65_years','median_age','people_per_acre','households','less_than_10,000','10000_to_14999','15000_to_24999','25000_to_34999','35000_to_49999','50000_to_74999','75000_to_99999','100000_to_149999','150000_to_199999','200000_or_more','median_income','mean_income'], inplace = True, axis = 1)
uber_trips = pd.read_parquet(path + '/nyc_bouroughs_trips_uber.parquet', engine = 'pyarrow')
uber_trips.drop(['borough', 'nta_name', 'shape_area','county_fips','shape_leng','boro_code','nta_code','under_5_years','5-9_years','10-14_years','15-19_years','20-24_years','25-29_years','30-34_years','35-39_years','40-44_years','45-49_years','50-54_years','55-59_years','60-64_years','over_65_years','median_age','people_per_acre','households','less_than_10,000','10000_to_14999','15000_to_24999','25000_to_34999','35000_to_49999','50000_to_74999','75000_to_99999','100000_to_149999','150000_to_199999','200000_or_more','median_income','mean_income'], inplace = True, axis = 1)
green_trips = pd.read_parquet(path + '/nyc_bouroughs_trips_green.parquet', engine = 'pyarrow')
green_trips.drop(['borough', 'nta_name', 'shape_area','county_fips','shape_leng','boro_code','nta_code','under_5_years','5-9_years','10-14_years','15-19_years','20-24_years','25-29_years','30-34_years','35-39_years','40-44_years','45-49_years','50-54_years','55-59_years','60-64_years','over_65_years','median_age','people_per_acre','households','less_than_10,000','10000_to_14999','15000_to_24999','25000_to_34999','35000_to_49999','50000_to_74999','75000_to_99999','100000_to_149999','150000_to_199999','200000_or_more','median_income','mean_income'], inplace = True, axis = 1)
mta_trips = pd.read_parquet(path + '/nyc_bouroughs_trips_mta.parquet', engine = 'pyarrow')
mta_trips.drop(['borough', 'nta_name', 'shape_area','county_fips','shape_leng','boro_code','nta_code','under_5_years','5-9_years','10-14_years','15-19_years','20-24_years','25-29_years','30-34_years','35-39_years','40-44_years','45-49_years','50-54_years','55-59_years','60-64_years','over_65_years','median_age','people_per_acre','households','less_than_10,000','10000_to_14999','15000_to_24999','25000_to_34999','35000_to_49999','50000_to_74999','75000_to_99999','100000_to_149999','150000_to_199999','200000_or_more','median_income','mean_income'], inplace = True, axis = 1)


green_trips_do = pd.read_parquet(path + '/nyc_bouroughs_trips_green_dropoff.parquet', engine = 'pyarrow')
green_trips_do.drop(['borough', 'nta_name', 'shape_area','county_fips','shape_leng','boro_code','nta_code','under_5_years','5-9_years','10-14_years','15-19_years','20-24_years','25-29_years','30-34_years','35-39_years','40-44_years','45-49_years','50-54_years','55-59_years','60-64_years','over_65_years','median_age','people_per_acre','households','less_than_10,000','10000_to_14999','15000_to_24999','25000_to_34999','35000_to_49999','50000_to_74999','75000_to_99999','100000_to_149999','150000_to_199999','200000_or_more','median_income','mean_income'], inplace = True, axis = 1)
yellow_trips_do = pd.read_parquet(path + '/nyc_bouroughs_trips_yellow_dropoff.parquet', engine = 'pyarrow')
yellow_trips_do.drop(['borough', 'nta_name', 'shape_area','county_fips','shape_leng','boro_code','nta_code','under_5_years','5-9_years','10-14_years','15-19_years','20-24_years','25-29_years','30-34_years','35-39_years','40-44_years','45-49_years','50-54_years','55-59_years','60-64_years','over_65_years','median_age','people_per_acre','households','less_than_10,000','10000_to_14999','15000_to_24999','25000_to_34999','35000_to_49999','50000_to_74999','75000_to_99999','100000_to_149999','150000_to_199999','200000_or_more','median_income','mean_income'], inplace = True, axis = 1)
mta_trips_do = pd.read_parquet(path + '/nyc_bouroughs_trips_mta_dropoff.parquet', engine = 'pyarrow')
mta_trips_do.drop(['borough', 'nta_name', 'shape_area','county_fips','shape_leng','boro_code','nta_code','under_5_years','5-9_years','10-14_years','15-19_years','20-24_years','25-29_years','30-34_years','35-39_years','40-44_years','45-49_years','50-54_years','55-59_years','60-64_years','over_65_years','median_age','people_per_acre','households','less_than_10,000','10000_to_14999','15000_to_24999','25000_to_34999','35000_to_49999','50000_to_74999','75000_to_99999','100000_to_149999','150000_to_199999','200000_or_more','median_income','mean_income'], inplace = True, axis = 1)

# Opens the map as a GeoDataFrame
path = 'Dataset/Original'
nyc_bouroughs = geopandas.read_file(path + '/Neighborhood Tabulation Areas.geojson')

In [38]:
print(yellow_trips.shape)
yellow_trips = yellow_trips.merge(yellow_trips_do, how = 'inner', on = ['ntacode', 'ntaname', 'boro_name', 'population'],
                                 suffixes=('_pickup', '_dropoff'))
print(yellow_trips.shape)

(195, 6)
(195, 8)


In [39]:
yellow_trips.head()

Unnamed: 0,ntacode,ntaname,boro_name,trips_pickup,population,trips_population_pickup,trips_dropoff,trips_population_dropoff
0,BK88,Borough Park,Brooklyn,134.0,106357.0,0.00126,1707.0,0.01605
1,QN51,Murray Hill,Queens,57.0,51739.0,0.001102,1065.0,0.020584
2,QN27,East Elmhurst,Queens,1484.0,23150.0,0.064104,3399.0,0.146825
3,QN07,Hollis,Queens,11.0,20269.0,0.000543,244.0,0.012038
4,MN06,Manhattanville,Manhattan,7500.0,22950.0,0.326797,12387.0,0.539739


In [40]:
print(green_trips.shape)
green_trips = green_trips.merge(green_trips_do, how = 'inner', on = ['ntacode', 'ntaname', 'boro_name', 'population'],
                                 suffixes=('_pickup', '_dropoff'))
print(green_trips.shape)

(195, 6)
(195, 8)


In [41]:
green_trips.head()

Unnamed: 0,ntacode,ntaname,boro_name,trips_pickup,population,trips_population_pickup,trips_dropoff,trips_population_dropoff
0,BK88,Borough Park,Brooklyn,1090.0,106357.0,0.010249,3090.0,0.029053
1,QN51,Murray Hill,Queens,397.0,51739.0,0.007673,5605.0,0.108332
2,QN27,East Elmhurst,Queens,4494.0,23150.0,0.194125,16714.0,0.721987
3,QN07,Hollis,Queens,197.0,20269.0,0.009719,997.0,0.049188
4,MN06,Manhattanville,Manhattan,29576.0,22950.0,1.288715,28482.0,1.241046


In [42]:
print(mta_trips.shape)
mta_trips = mta_trips.merge(mta_trips_do, how = 'inner', on = ['ntacode', 'ntaname', 'boro_name', 'population'],
                                 suffixes=('_pickup', '_dropoff'))
print(mta_trips.shape)

(195, 6)
(195, 8)


In [43]:
mta_trips.head()

Unnamed: 0,ntacode,ntaname,boro_name,trips_pickup,population,trips_population_pickup,trips_dropoff,trips_population_dropoff
0,BK88,Borough Park,Brooklyn,17877870.0,106357.0,168.093026,14727211.0,138.469598
1,QN51,Murray Hill,Queens,0.0,51739.0,0.0,0.0,0.0
2,QN27,East Elmhurst,Queens,0.0,23150.0,0.0,0.0,0.0
3,QN07,Hollis,Queens,0.0,20269.0,0.0,0.0,0.0
4,MN06,Manhattanville,Manhattan,6684424.0,22950.0,291.260305,2909020.0,126.754684


In [44]:
nyc_ntas = nyc_bouroughs.merge(yellow_trips, how = 'inner', on = ['ntacode', 'ntaname', 'boro_name'])
nyc_ntas.rename({'trips_pickup':'trips_pickup_yellow',
                 'trips_population_pickup':'trips_population_pickup_yellow',
                 'trips_dropoff':'trips_dropoff_yellow',
                 'trips_population_dropoff':'trips_population_dropoff_yellow'}, axis = 1,
               inplace = True)
nyc_ntas.dtypes

ntacode                              object
shape_area                           object
county_fips                          object
ntaname                              object
shape_leng                           object
boro_name                            object
boro_code                            object
geometry                           geometry
trips_pickup_yellow                 float64
population                          float64
trips_population_pickup_yellow      float64
trips_dropoff_yellow                float64
trips_population_dropoff_yellow     float64
dtype: object

In [45]:
nyc_ntas = nyc_ntas.merge(green_trips, how = 'inner', on = ['ntacode', 'ntaname', 'boro_name', 'population'])
nyc_ntas.rename({'trips_pickup':'trips_pickup_green',
                 'trips_population_pickup':'trips_population_pickup_green',
                 'trips_dropoff':'trips_dropoff_green',
                 'trips_population_dropoff':'trips_population_dropoff_green'}, axis = 1,
               inplace = True)
nyc_ntas.head()

Unnamed: 0,ntacode,shape_area,county_fips,ntaname,shape_leng,boro_name,boro_code,geometry,trips_pickup_yellow,population,trips_population_pickup_yellow,trips_dropoff_yellow,trips_population_dropoff_yellow,trips_pickup_green,trips_population_pickup_green,trips_dropoff_green,trips_population_dropoff_green
0,BK88,54005018.7472,47,Borough Park,39247.2280737,Brooklyn,3,"MULTIPOLYGON (((-73.97605 40.63128, -73.97717 ...",134.0,106357.0,0.00126,1707.0,0.01605,1090.0,0.010249,3090.0,0.029053
1,QN51,52488276.477,81,Murray Hill,33266.904811,Queens,4,"MULTIPOLYGON (((-73.80379 40.77561, -73.80099 ...",57.0,51739.0,0.001102,1065.0,0.020584,397.0,0.007673,5605.0,0.108332
2,QN27,19726845.26,81,East Elmhurst,19816.7115378,Queens,4,"MULTIPOLYGON (((-73.86110 40.76366, -73.85993 ...",1484.0,23150.0,0.064104,3399.0,0.146825,4494.0,0.194125,16714.0,0.721987
3,QN07,22887772.7683,81,Hollis,20976.3358374,Queens,4,"MULTIPOLYGON (((-73.75726 40.71814, -73.75589 ...",11.0,20269.0,0.000543,244.0,0.012038,197.0,0.009719,997.0,0.049188
4,MN06,10647077.6122,61,Manhattanville,17040.6865482,Manhattan,1,"MULTIPOLYGON (((-73.94608 40.82126, -73.94640 ...",7500.0,22950.0,0.326797,12387.0,0.539739,29576.0,1.288715,28482.0,1.241046


In [46]:
mta_trips.dtypes

ntacode                      object
ntaname                      object
boro_name                    object
trips_pickup                float64
population                  float64
trips_population_pickup     float64
trips_dropoff               float64
trips_population_dropoff    float64
dtype: object

In [47]:
nyc_ntas = nyc_ntas.merge(mta_trips, how = 'inner', on = ['ntacode', 'ntaname', 'boro_name', 'population'])
nyc_ntas.rename({'trips_pickup':'trips_pickup_mta',
                 'trips_population_pickup':'trips_population_pickup_mta',
                 'trips_dropoff':'trips_dropoff_mta',
                 'trips_population_dropoff':'trips_population_dropoff_mta'}, axis = 1,
               inplace = True)
nyc_ntas.head()

Unnamed: 0,ntacode,shape_area,county_fips,ntaname,shape_leng,boro_name,boro_code,geometry,trips_pickup_yellow,population,...,trips_dropoff_yellow,trips_population_dropoff_yellow,trips_pickup_green,trips_population_pickup_green,trips_dropoff_green,trips_population_dropoff_green,trips_pickup_mta,trips_population_pickup_mta,trips_dropoff_mta,trips_population_dropoff_mta
0,BK88,54005018.7472,47,Borough Park,39247.2280737,Brooklyn,3,"MULTIPOLYGON (((-73.97605 40.63128, -73.97717 ...",134.0,106357.0,...,1707.0,0.01605,1090.0,0.010249,3090.0,0.029053,17877870.0,168.093026,14727211.0,138.469598
1,QN51,52488276.477,81,Murray Hill,33266.904811,Queens,4,"MULTIPOLYGON (((-73.80379 40.77561, -73.80099 ...",57.0,51739.0,...,1065.0,0.020584,397.0,0.007673,5605.0,0.108332,0.0,0.0,0.0,0.0
2,QN27,19726845.26,81,East Elmhurst,19816.7115378,Queens,4,"MULTIPOLYGON (((-73.86110 40.76366, -73.85993 ...",1484.0,23150.0,...,3399.0,0.146825,4494.0,0.194125,16714.0,0.721987,0.0,0.0,0.0,0.0
3,QN07,22887772.7683,81,Hollis,20976.3358374,Queens,4,"MULTIPOLYGON (((-73.75726 40.71814, -73.75589 ...",11.0,20269.0,...,244.0,0.012038,197.0,0.009719,997.0,0.049188,0.0,0.0,0.0,0.0
4,MN06,10647077.6122,61,Manhattanville,17040.6865482,Manhattan,1,"MULTIPOLYGON (((-73.94608 40.82126, -73.94640 ...",7500.0,22950.0,...,12387.0,0.539739,29576.0,1.288715,28482.0,1.241046,6684424.0,291.260305,2909020.0,126.754684


In [48]:
nyc_ntas = nyc_ntas.merge(uber_trips, how = 'inner', on = ['ntacode', 'ntaname', 'boro_name', 'population'])
nyc_ntas.rename({'trips':'trips_pickup_uber',
                 'trips_population':'trips_population_pickup_uber'}, axis = 1,
               inplace = True)
nyc_ntas.head()

Unnamed: 0,ntacode,shape_area,county_fips,ntaname,shape_leng,boro_name,boro_code,geometry,trips_pickup_yellow,population,...,trips_pickup_green,trips_population_pickup_green,trips_dropoff_green,trips_population_dropoff_green,trips_pickup_mta,trips_population_pickup_mta,trips_dropoff_mta,trips_population_dropoff_mta,trips_pickup_uber,trips_population_pickup_uber
0,BK88,54005018.7472,47,Borough Park,39247.2280737,Brooklyn,3,"MULTIPOLYGON (((-73.97605 40.63128, -73.97717 ...",134.0,106357.0,...,1090.0,0.010249,3090.0,0.029053,17877870.0,168.093026,14727211.0,138.469598,17450.0,0.16407
1,QN51,52488276.477,81,Murray Hill,33266.904811,Queens,4,"MULTIPOLYGON (((-73.80379 40.77561, -73.80099 ...",57.0,51739.0,...,397.0,0.007673,5605.0,0.108332,0.0,0.0,0.0,0.0,7116.0,0.137536
2,QN27,19726845.26,81,East Elmhurst,19816.7115378,Queens,4,"MULTIPOLYGON (((-73.86110 40.76366, -73.85993 ...",1484.0,23150.0,...,4494.0,0.194125,16714.0,0.721987,0.0,0.0,0.0,0.0,15110.0,0.6527
3,QN07,22887772.7683,81,Hollis,20976.3358374,Queens,4,"MULTIPOLYGON (((-73.75726 40.71814, -73.75589 ...",11.0,20269.0,...,197.0,0.009719,997.0,0.049188,0.0,0.0,0.0,0.0,1995.0,0.098426
4,MN06,10647077.6122,61,Manhattanville,17040.6865482,Manhattan,1,"MULTIPOLYGON (((-73.94608 40.82126, -73.94640 ...",7500.0,22950.0,...,29576.0,1.288715,28482.0,1.241046,6684424.0,291.260305,2909020.0,126.754684,21381.0,0.931634


In [49]:
nyc_ntas.dtypes

ntacode                              object
shape_area                           object
county_fips                          object
ntaname                              object
shape_leng                           object
boro_name                            object
boro_code                            object
geometry                           geometry
trips_pickup_yellow                 float64
population                          float64
trips_population_pickup_yellow      float64
trips_dropoff_yellow                float64
trips_population_dropoff_yellow     float64
trips_pickup_green                  float64
trips_population_pickup_green       float64
trips_dropoff_green                 float64
trips_population_dropoff_green      float64
trips_pickup_mta                    float64
trips_population_pickup_mta         float64
trips_dropoff_mta                   float64
trips_population_dropoff_mta        float64
trips_pickup_uber                   float64
trips_population_pickup_uber    

In [50]:
def normalize(base, variable):
    return (base[variable]-base[variable].min())/\
                (base[variable].max()-base[variable].min())

In [51]:
nyc_ntas_2 = nyc_ntas.copy()

nyc_ntas_2['norm_trips_pickup_yellow'] = normalize(nyc_ntas_2, 'trips_pickup_yellow')
nyc_ntas_2['norm_trips_dropoff_yellow'] = normalize(nyc_ntas_2, 'trips_dropoff_yellow')
nyc_ntas_2['norm_trips_population_pickup_yellow'] = normalize(nyc_ntas_2, 'trips_population_pickup_yellow')
nyc_ntas_2['norm_trips_population_dropoff_yellow'] = normalize(nyc_ntas_2, 'trips_population_dropoff_yellow')

In [52]:
nyc_ntas_2['norm_trips_pickup_green'] = normalize(nyc_ntas_2, 'trips_pickup_green')
nyc_ntas_2['norm_trips_dropoff_green'] = normalize(nyc_ntas_2, 'trips_dropoff_green')
nyc_ntas_2['norm_trips_population_pickup_green'] = normalize(nyc_ntas_2, 'trips_population_pickup_green')
nyc_ntas_2['norm_trips_population_dropoff_green'] = normalize(nyc_ntas_2, 'trips_population_dropoff_green')

In [53]:
nyc_ntas_2['norm_trips_pickup_mta'] = normalize(nyc_ntas_2, 'trips_pickup_mta')
nyc_ntas_2['norm_trips_dropoff_mta'] = normalize(nyc_ntas_2, 'trips_dropoff_mta')
nyc_ntas_2['norm_trips_population_pickup_mta'] = normalize(nyc_ntas_2, 'trips_population_pickup_mta')
nyc_ntas_2['norm_trips_population_dropoff_mta'] = normalize(nyc_ntas_2, 'trips_population_dropoff_mta')

In [54]:
nyc_ntas_2['norm_trips_pickup_uber'] = normalize(nyc_ntas_2, 'trips_pickup_uber')
nyc_ntas_2['norm_trips_population_pickup_uber'] = normalize(nyc_ntas_2, 'trips_population_pickup_uber')

In [20]:
nyc_ntas_2.head()

Unnamed: 0,ntacode,shape_area,county_fips,ntaname,shape_leng,boro_name,boro_code,geometry,trips_pickup_yellow,population,...,trips_pickup_green,trips_population_pickup_green,trips_dropoff_green,trips_population_dropoff_green,trips_pickup_mta,trips_population_pickup_mta,trips_dropoff_mta,trips_population_dropoff_mta,trips_pickup_uber,trips_population_pickup_uber
0,BK88,54005018.7472,47,Borough Park,39247.2280737,Brooklyn,3,"MULTIPOLYGON (((-73.97605 40.63128, -73.97717 ...",0.000109,106357.0,...,0.004385,0.001887,0.030045,0.011007,0.081769,0.022011,0.081175,0.021851,0.007156,0.001926
1,QN51,52488276.477,81,Murray Hill,33266.904811,Queens,4,"MULTIPOLYGON (((-73.80379 40.77561, -73.80099 ...",4.6e-05,51739.0,...,0.001597,0.001413,0.054498,0.041042,0.0,0.0,0.0,0.0,0.002918,0.001615
2,QN27,19726845.26,81,East Elmhurst,19816.7115378,Queens,4,"MULTIPOLYGON (((-73.86110 40.76366, -73.85993 ...",0.001206,23150.0,...,0.018079,0.035747,0.162513,0.273528,0.0,0.0,0.0,0.0,0.006196,0.007663
3,QN07,22887772.7683,81,Hollis,20976.3358374,Queens,4,"MULTIPOLYGON (((-73.75726 40.71814, -73.75589 ...",9e-06,20269.0,...,0.000793,0.00179,0.009694,0.018635,0.0,0.0,0.0,0.0,0.000818,0.001156
4,MN06,10647077.6122,61,Manhattanville,17040.6865482,Manhattan,1,"MULTIPOLYGON (((-73.94608 40.82126, -73.94640 ...",0.006094,22950.0,...,0.118981,0.237308,0.276936,0.470175,0.030573,0.038139,0.016034,0.020003,0.008768,0.010938


In [55]:
nyc_ntas_2['trips_pickup_yellow'] = nyc_ntas_2['trips_pickup_yellow']*20
nyc_ntas_2['trips_dropoff_yellow'] = nyc_ntas_2['trips_dropoff_yellow']*20
nyc_ntas_2['trips_population_pickup_yellow'] = nyc_ntas_2['trips_population_pickup_yellow']*20
nyc_ntas_2['trips_population_dropoff_yellow'] = nyc_ntas_2['trips_population_dropoff_yellow']*20
nyc_ntas_2['trips_pickup_green'] = nyc_ntas_2['trips_pickup_green']*5
nyc_ntas_2['trips_dropoff_green'] = nyc_ntas_2['trips_dropoff_green']*5
nyc_ntas_2['trips_population_pickup_green'] = nyc_ntas_2['trips_population_pickup_green']*5
nyc_ntas_2['trips_population_dropoff_green'] = nyc_ntas_2['trips_population_dropoff_green']*5
nyc_ntas_2['trips_pickup_mta'] = nyc_ntas_2['trips_pickup_mta']*100
nyc_ntas_2['trips_dropoff_mta'] = nyc_ntas_2['trips_dropoff_mta']*100
nyc_ntas_2['trips_population_pickup_mta'] = nyc_ntas_2['trips_population_pickup_mta']*100
nyc_ntas_2['trips_population_dropoff_mta'] = nyc_ntas_2['trips_population_dropoff_mta']*100
nyc_ntas_2['trips_pickup_uber'] = nyc_ntas_2['trips_pickup_uber']*1
nyc_ntas_2['trips_population_pickup_uber'] = nyc_ntas_2['trips_population_pickup_uber']*1

In [22]:
nyc_ntas_2.describe()

Unnamed: 0,trips_pickup_yellow,population,trips_population_pickup_yellow,trips_dropoff_yellow,trips_population_dropoff_yellow,trips_pickup_green,trips_population_pickup_green,trips_dropoff_green,trips_population_dropoff_green,trips_pickup_mta,trips_population_pickup_mta,trips_dropoff_mta,trips_population_dropoff_mta,trips_pickup_uber,trips_population_pickup_uber
count,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0
mean,0.030854,41839.328205,0.017649,0.032925,0.019236,0.068775,0.065944,0.166226,0.132868,0.037727,0.022949,0.034146,0.020852,0.039274,0.023578
std,0.109276,22413.053588,0.081875,0.106858,0.081209,0.151703,0.14583,0.224086,0.176537,0.089941,0.077285,0.088991,0.076786,0.116371,0.085292
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.8e-05,26172.0,2.1e-05,0.000561,0.000437,0.000515,0.000698,0.018095,0.016807,0.0,0.0,0.0,0.0,0.001745,0.001502
50%,0.000107,36891.0,6.1e-05,0.00118,0.00074,0.00669,0.005611,0.066157,0.059888,0.009365,0.007001,0.007254,0.005168,0.004382,0.002712
75%,0.002112,53896.0,0.000782,0.008976,0.003853,0.040482,0.039079,0.232097,0.18235,0.040186,0.023101,0.036756,0.018741,0.017204,0.008155
max,1.0,132378.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [23]:
nyc_ntas_2.dtypes

ntacode                              object
shape_area                           object
county_fips                          object
ntaname                              object
shape_leng                           object
boro_name                            object
boro_code                            object
geometry                           geometry
trips_pickup_yellow                 float64
population                          float64
trips_population_pickup_yellow      float64
trips_dropoff_yellow                float64
trips_population_dropoff_yellow     float64
trips_pickup_green                  float64
trips_population_pickup_green       float64
trips_dropoff_green                 float64
trips_population_dropoff_green      float64
trips_pickup_mta                    float64
trips_population_pickup_mta         float64
trips_dropoff_mta                   float64
trips_population_dropoff_mta        float64
trips_pickup_uber                   float64
trips_population_pickup_uber    

In [56]:
nyc_ntas_2['trips_pickup'] = nyc_ntas_2['trips_pickup_yellow'] + nyc_ntas_2['trips_pickup_green'] + \
                                nyc_ntas_2['trips_pickup_mta'] + nyc_ntas_2['trips_pickup_uber']

nyc_ntas_2['norm_trips_pickup'] = nyc_ntas_2['norm_trips_pickup_yellow'] + nyc_ntas_2['norm_trips_pickup_green'] + \
                                    nyc_ntas_2['norm_trips_pickup_mta'] + nyc_ntas_2['norm_trips_pickup_uber']

nyc_ntas_2['trips_dropoff'] = nyc_ntas_2['trips_dropoff_yellow'] + nyc_ntas_2['trips_dropoff_green'] + \
                                nyc_ntas_2['trips_dropoff_mta']

nyc_ntas_2['norm_trips_dropoff'] = nyc_ntas_2['norm_trips_dropoff_yellow'] + nyc_ntas_2['norm_trips_dropoff_green'] + \
                                nyc_ntas_2['norm_trips_dropoff_mta']

nyc_ntas_2['trips_population_pickup'] = nyc_ntas_2['trips_population_pickup_yellow'] + \
                                        nyc_ntas_2['trips_population_pickup_green'] + \
                                        nyc_ntas_2['trips_population_pickup_mta'] + \
                                        nyc_ntas_2['trips_population_pickup_uber']

nyc_ntas_2['trips_population_dropoff'] = nyc_ntas_2['trips_population_dropoff_yellow'] + \
                                        nyc_ntas_2['trips_population_dropoff_green'] + \
                                        nyc_ntas_2['trips_population_dropoff_mta']

# Maps for NTA analysis

## Pick-up general

In [25]:
# interactive visualization for beat-specific crime rate in 2017

m_trips = folium.Map(location=[40.738, -73.98],
                        zoom_start=11,
                        tiles="OpenStreetMap")

colormap = branca.colormap.LinearColormap(
    colors=['white','yellow','orange','red','darkred'],
    #index=beat_cn['count'].quantile([0.2,0.4,0.6,0.8]),b
    vmin=nyc_ntas_2.norm_trips_dropoff.min(),
    vmax=nyc_ntas_2.norm_trips_dropoff.max()
)

style_function = lambda x: {
    'fillColor': colormap(x['properties']['norm_trips_dropoff']),
    'color': 'black',
    'weight':2,
    'fillOpacity':0.5
}

stategeo = folium.GeoJson(
    nyc_ntas_2.to_json(),
    name='Chicago beats',
    style_function=style_function,
    tooltip=folium.GeoJsonTooltip(
        fields=['boro_name', 'ntaname', 'norm_trips_dropoff'],
        aliases=['Borough', 'NTA', 'Total tips (Normalized)'], 
        localize=True
    )
).add_to(m_trips)

colormap.add_to(m_trips)

path = 'Results'
m_trips.save(path + "/Maps/trips_nta_total_dropoff.html")

In [70]:
# interactive visualization for beat-specific crime rate in 2017

m_trips = folium.Map(location=[40.738, -73.98],
                        zoom_start=11,
                        tiles="OpenStreetMap")

colormap = branca.colormap.LinearColormap(
    colors=['white','yellow','orange','red','darkred'],
    #index=beat_cn['count'].quantile([0.2,0.4,0.6,0.8]),b
    vmin=nyc_ntas_2.norm_trips_pickup.min(),
    vmax=nyc_ntas_2.norm_trips_pickup.max()
)

style_function = lambda x: {
    'fillColor': colormap(x['properties']['norm_trips_pickup']),
    'color': 'black',
    'weight':2,
    'fillOpacity':0.5
}

stategeo = folium.GeoJson(
    nyc_ntas_2.to_json(),
    name='Chicago beats',
    style_function=style_function,
    tooltip=folium.GeoJsonTooltip(
        fields=['boro_name', 'ntaname', 'norm_trips_pickup'],
        aliases=['Borough', 'NTA', 'Total tips (Normalized)'], 
        localize=True
    )
).add_to(m_trips)

colormap.add_to(m_trips)

path = 'Results'
m_trips.save(path + "/Maps/trips_nta_total_pickup_norm.html")

In [71]:
nyc_ntas_2['log_trips_pickup'] = np.log(nyc_ntas_2['trips_pickup'])

In [73]:
# interactive visualization for beat-specific crime rate in 2017

m_trips = folium.Map(location=[40.738, -73.98],
                        zoom_start=11,
                        tiles="OpenStreetMap")

colormap = branca.colormap.LinearColormap(
    colors=['white','yellow','orange','red','darkred'],
    #index=beat_cn['count'].quantile([0.2,0.4,0.6,0.8]),b
    vmin=nyc_ntas_2.log_trips_pickup.min(),
    vmax=nyc_ntas_2.log_trips_pickup.max()
)

style_function = lambda x: {
    'fillColor': colormap(x['properties']['log_trips_pickup']),
    'color': 'black',
    'weight':2,
    'fillOpacity':0.5
}

stategeo = folium.GeoJson(
    nyc_ntas_2.to_json(),
    name='Chicago beats',
    style_function=style_function,
    tooltip=folium.GeoJsonTooltip(
        fields=['boro_name', 'ntaname', 'log_trips_pickup'],
        aliases=['Borough', 'NTA', 'Total tips (log)'], 
        localize=True
    )
).add_to(m_trips)

colormap.add_to(m_trips)

path = 'Results'
m_trips.save(path + "/Maps/trips_nta_total_pickup_log.html")

In [74]:
# interactive visualization for beat-specific crime rate in 2017

m_trips = folium.Map(location=[40.738, -73.98],
                        zoom_start=11,
                        tiles="OpenStreetMap")

colormap = branca.colormap.LinearColormap(
    colors=['white','yellow','orange','red','darkred'],
    #index=beat_cn['count'].quantile([0.2,0.4,0.6,0.8]),b
    vmin=nyc_ntas_2.trips_pickup.min(),
    vmax=nyc_ntas_2.trips_pickup.max()
)

style_function = lambda x: {
    'fillColor': colormap(x['properties']['trips_pickup']),
    'color': 'black',
    'weight':2,
    'fillOpacity':0.5
}

stategeo = folium.GeoJson(
    nyc_ntas_2.to_json(),
    name='Chicago beats',
    style_function=style_function,
    tooltip=folium.GeoJsonTooltip(
        fields=['boro_name', 'ntaname', 'trips_pickup'],
        aliases=['Borough', 'NTA', 'Total tips (Total)'], 
        localize=True
    )
).add_to(m_trips)

colormap.add_to(m_trips)

path = 'Results'
m_trips.save(path + "/Maps/trips_nta_total_pickup_tot.html")

In [68]:
yellow = (nyc_ntas_2['trips_pickup_yellow'] > nyc_ntas_2['trips_pickup_green']) & \
            (nyc_ntas_2['trips_pickup_yellow'] > nyc_ntas_2['trips_pickup_mta']) & \
            (nyc_ntas_2['trips_pickup_yellow'] > nyc_ntas_2['trips_pickup_uber']) 

green = (nyc_ntas_2['trips_pickup_green'] > nyc_ntas_2['trips_pickup_yellow']) & \
            (nyc_ntas_2['trips_pickup_green'] > nyc_ntas_2['trips_pickup_mta']) & \
            (nyc_ntas_2['trips_pickup_green'] > nyc_ntas_2['trips_pickup_uber']) 

mta = (nyc_ntas_2['trips_pickup_mta'] > nyc_ntas_2['trips_pickup_yellow']) & \
            (nyc_ntas_2['trips_pickup_mta'] > nyc_ntas_2['trips_pickup_green']) & \
            (nyc_ntas_2['trips_pickup_mta'] > nyc_ntas_2['trips_pickup_uber']) 

uber = (nyc_ntas_2['trips_pickup_uber'] > nyc_ntas_2['trips_pickup_yellow']) & \
            (nyc_ntas_2['trips_pickup_uber'] > nyc_ntas_2['trips_pickup_green']) & \
            (nyc_ntas_2['trips_pickup_uber'] > nyc_ntas_2['trips_pickup_mta']) 

nyc_ntas_2['max_coverage_type'] = np.where(yellow, 1,
                                           np.where(green, 2,
                                                    np.where(mta, 3,
                                                             np.where(uber, 4, 0))))

nyc_ntas_2['max_coverage_trips'] = np.where(yellow, nyc_ntas_2['trips_pickup_yellow'],
                                           np.where(green, nyc_ntas_2['trips_pickup_green'],
                                                    np.where(mta, nyc_ntas_2['trips_pickup_mta'],
                                                             np.where(uber, nyc_ntas_2['trips_pickup_uber'], 0))))

nyc_ntas_2['lab_max_coverage_type'] = np.where(yellow, 'Yellow Cabs',
                                           np.where(green, 'Green Cabs',
                                                    np.where(mta, 'MTA',
                                                             np.where(uber, 'Uber', 'No Coverage'))))

In [35]:
nyc_ntas_2.loc[nyc_ntas_2['boro_name'] == 'Manhattan'].drop('geometry', axis = 1).to_csv('Results/asdf.csv', sep = ',')

In [69]:
# interactive visualization for beat-specific crime rate in 2017

m_trips = folium.Map(location=[40.738, -73.98],
                        zoom_start=11,
                        tiles="OpenStreetMap")

colormap = branca.colormap.LinearColormap(
    colors=['yellow','green','blue','black'],
    #index=beat_cn['count'].quantile([0.2,0.4,0.6,0.8]),b
    vmin=nyc_ntas_2.max_coverage_type.min(),
    vmax=nyc_ntas_2.max_coverage_type.max()
)

style_function = lambda x: {
    'fillColor': colormap(x['properties']['max_coverage_type']),
    'color': 'black',
    'weight':2,
    'fillOpacity':0.5
}

stategeo = folium.GeoJson(
    nyc_ntas_2.to_json(),
    name='Chicago beats',
    style_function=style_function,
    tooltip=folium.GeoJsonTooltip(
        fields=['boro_name', 'ntaname', 'lab_max_coverage_type', 'max_coverage_trips'],
        aliases=['Borough', 'NTA', 'Max Coverage (Pick-up)', 'No. Trips'], 
        localize=True
    )
).add_to(m_trips)

colormap.add_to(m_trips)

path = 'Results'
m_trips.save(path + "/Maps/trips_nta_max_coverage.html")

In [76]:
yellow = (nyc_ntas_2['trips_dropoff_yellow'] > nyc_ntas_2['trips_dropoff_green']) & \
            (nyc_ntas_2['trips_dropoff_yellow'] > nyc_ntas_2['trips_dropoff_mta']) 

green = (nyc_ntas_2['trips_dropoff_green'] > nyc_ntas_2['trips_dropoff_yellow']) & \
            (nyc_ntas_2['trips_dropoff_green'] > nyc_ntas_2['trips_dropoff_mta'])

mta = (nyc_ntas_2['trips_dropoff_mta'] > nyc_ntas_2['trips_dropoff_yellow']) & \
            (nyc_ntas_2['trips_dropoff_mta'] > nyc_ntas_2['trips_dropoff_green'])

nyc_ntas_2['max_coverage_type_dropoff'] = np.where(yellow, 1,
                                                   np.where(green, 2,
                                                            np.where(mta, 3,0)))

nyc_ntas_2['lab_max_coverage_type_dropoff'] = np.where(yellow, 'Yellow Cabs',
                                           np.where(green, 'Green Cabs',
                                                    np.where(mta, 'MTA', 'No Coverage')))

In [77]:
# interactive visualization for beat-specific crime rate in 2017

m_trips = folium.Map(location=[40.738, -73.98],
                        zoom_start=11,
                        tiles="OpenStreetMap")

colormap = branca.colormap.LinearColormap(
    colors=['white', 'yellow','green','blue'],
    #index=beat_cn['count'].quantile([0.2,0.4,0.6,0.8]),b
    vmin=nyc_ntas_2.max_coverage_type_dropoff.min(),
    vmax=nyc_ntas_2.max_coverage_type_dropoff.max()
)

style_function = lambda x: {
    'fillColor': colormap(x['properties']['max_coverage_type_dropoff']),
    'color': 'black',
    'weight':2,
    'fillOpacity':0.5
}

stategeo = folium.GeoJson(
    nyc_ntas_2.to_json(),
    name='Chicago beats',
    style_function=style_function,
    tooltip=folium.GeoJsonTooltip(
        fields=['boro_name', 'ntaname', 'lab_max_coverage_type_dropoff'],
        aliases=['Borough', 'NTA', 'Max Coverage (Drop-off)'], 
        localize=True
    )
).add_to(m_trips)

colormap.add_to(m_trips)

path = 'Results'
m_trips.save(path + "/Maps/trips_nta_max_coverage_dropoff.html")