In [1]:
import sys
print('Python version: ', sys.version)

Python version:  3.6.10 |Anaconda, Inc.| (default, Mar 25 2020, 18:53:43) 
[GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)]


In [2]:
%matplotlib inline
%config InlineBackend.figure_format = 'svg'

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import folium

## Position Data

In [3]:
# data from https://github.com/chriswhong/nycturnstiles/blob/master/geocoded.csv
geo_df = pd.read_csv('../data/turnstile/geocoded_ca_unit.csv').dropna()
geo_df

Unnamed: 0,remote unit,control area,station,lines,division,latitude,longitude
0,R470,X002,ELTINGVILLE PK,Z,SRT,40.544600,-74.164581
1,R544,PTH02,HARRISON,1,PTH,40.738879,-74.155533
2,R165,S102,TOMPKINSVILLE,1,SRT,40.636948,-74.074824
3,R070,S101,ST. GEORGE,1,SRT,40.643738,-74.073622
4,R070,S101A,ST. GEORGE,1,SRT,40.643738,-74.073622
...,...,...,...,...,...,...,...
745,R253,N210,174-175 ST,BD,IND,40.845892,-73.910136
746,R253,N212,174-175 ST,BD,IND,40.845892,-73.910136
747,R001,A060,WHITEHALL ST,R1,BMT,40.703082,-74.012983
748,R001,A058,WHITEHALL ST,R1,BMT,40.703082,-74.012983


In [4]:
# Build map 
station_loc_map = folium.Map(location=[40.738, -73.98],
    zoom_start=11, tiles='cartodbpositron')

# Plot coordinates using comprehension list
for index, row in geo_df.iterrows():
    folium.CircleMarker(location=[row['latitude'], row['longitude']],
    color='#0080bb', fill_color='#0080bb', radius=1).add_to(station_loc_map) 

# Display map in Jupyter
station_loc_map

### Trip Data from trunstile_cleaning

In [5]:
count_df = pd.read_csv('../data/turnstile/station_daily_diffs.csv')
count_df

Unnamed: 0,C/A,UNIT,STATION,DATE,ENTRIES_DIFF,EXITS_DIFF
0,A002,R051,59 ST,02/15/2020,6445.0,3259.0
1,A002,R051,59 ST,02/16/2020,6122.0,3888.0
2,A002,R051,59 ST,02/17/2020,9228.0,6073.0
3,A002,R051,59 ST,02/18/2020,11858.0,7907.0
4,A002,R051,59 ST,02/19/2020,12519.0,7999.0
...,...,...,...,...,...,...
46913,TRAM2,R469,RIT-ROOSEVELT,04/13/2020,238.0,65.0
46914,TRAM2,R469,RIT-ROOSEVELT,04/14/2020,433.0,61.0
46915,TRAM2,R469,RIT-ROOSEVELT,04/15/2020,395.0,71.0
46916,TRAM2,R469,RIT-ROOSEVELT,04/16/2020,408.0,60.0


In [6]:
station_df = count_df[['C/A', 'UNIT', 'STATION']].drop_duplicates()
station_df

Unnamed: 0,C/A,UNIT,STATION
0,A002,R051,59 ST
63,A006,R079,5 AV/59 ST
126,A007,R079,5 AV/59 ST
189,A010,R080,57 ST-7 AV
252,A011,R080,57 ST-7 AV
...,...,...,...
46605,S101,R070,ST. GEORGE
46668,S101A,R070,ST. GEORGE
46731,S102,R165,TOMPKINSVILLE
46792,TRAM1,R468,RIT-MANHATTAN


## Join

In [7]:
geostation_df = pd.merge(station_df, geo_df, how='outer',
    left_on=['C/A', 'UNIT'], right_on=['control area', 'remote unit'], suffixes=['_l', '_r'])
geostation_df

Unnamed: 0,C/A,UNIT,STATION,remote unit,control area,station,lines,division,latitude,longitude
0,A002,R051,59 ST,R051,A002,LEXINGTON AVE,456NQR,BMT,40.762796,-73.967686
1,A006,R079,5 AV/59 ST,R079,A006,5 AVE-59 ST,NQR,BMT,40.764909,-73.973372
2,A007,R079,5 AV/59 ST,R079,A007,5 AVE-59 ST,NQR,BMT,40.764909,-73.973372
3,A010,R080,57 ST-7 AV,R080,A010,57 ST-7 AVE,NQR,BMT,40.764755,-73.980646
4,A011,R080,57 ST-7 AV,R080,A011,57 ST-7 AVE,NQR,BMT,40.764755,-73.980646
...,...,...,...,...,...,...,...,...,...,...
781,,,,R328,R532G,METS-WILLETS PT,7,IRT,40.754622,-73.845625
782,,,,R414,N182A,HOWARD BCH-JFK,A,IND,40.660476,-73.830301
783,,,,R459,OB01,ORCHARD BEACH,6,IND,40.852417,-73.828082
784,,,,R537,JFK04,JFK JAMAICA CT2,E,IND,40.643942,-73.782356


In [8]:
geostation_df[geostation_df.isna().any(axis=1)]

Unnamed: 0,C/A,UNIT,STATION,remote unit,control area,station,lines,division,latitude,longitude
27,A049,R088,CORTLANDT ST,,,,,,,
40,A077,R028,FULTON ST,,,,,,,
41,A081,R028,FULTON ST,,,,,,,
42,A082,R028,FULTON ST,,,,,,,
80,C015,R246,PROSPECT AV,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
781,,,,R328,R532G,METS-WILLETS PT,7,IRT,40.754622,-73.845625
782,,,,R414,N182A,HOWARD BCH-JFK,A,IND,40.660476,-73.830301
783,,,,R459,OB01,ORCHARD BEACH,6,IND,40.852417,-73.828082
784,,,,R537,JFK04,JFK JAMAICA CT2,E,IND,40.643942,-73.782356


In [9]:
geostation_nona_df = geostation_df.dropna()
geostation_nona_df

Unnamed: 0,C/A,UNIT,STATION,remote unit,control area,station,lines,division,latitude,longitude
0,A002,R051,59 ST,R051,A002,LEXINGTON AVE,456NQR,BMT,40.762796,-73.967686
1,A006,R079,5 AV/59 ST,R079,A006,5 AVE-59 ST,NQR,BMT,40.764909,-73.973372
2,A007,R079,5 AV/59 ST,R079,A007,5 AVE-59 ST,NQR,BMT,40.764909,-73.973372
3,A010,R080,57 ST-7 AV,R080,A010,57 ST-7 AVE,NQR,BMT,40.764755,-73.980646
4,A011,R080,57 ST-7 AV,R080,A011,57 ST-7 AVE,NQR,BMT,40.764755,-73.980646
...,...,...,...,...,...,...,...,...,...,...
742,S101,R070,ST. GEORGE,R070,S101,ST. GEORGE,1,SRT,40.643738,-74.073622
743,S101A,R070,ST. GEORGE,R070,S101A,ST. GEORGE,1,SRT,40.643738,-74.073622
744,S102,R165,TOMPKINSVILLE,R165,S102,TOMPKINSVILLE,1,SRT,40.636948,-74.074824
745,TRAM1,R468,RIT-MANHATTAN,R468,TRAM1,RIT-MANHATTAN,R,RIT,40.761268,-73.964016


In [10]:
# Build map 
station_loc_map = folium.Map(location=[40.738, -73.98],
    zoom_start=11, tiles='cartodbpositron')

# Plot coordinates using comprehension list
for index, row in geostation_nona_df.iterrows():
    folium.CircleMarker(location=[row['latitude'], row['longitude']],
    color='#0080bb', fill_color='#0080bb', radius=1).add_to(station_loc_map) 

# Display map in Jupyter
station_loc_map