In [1]:
# ridership data:
from __future__ import print_function, division

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import seaborn as sns

%matplotlib inline

def get_data(week_nums):
    url = "http://web.mta.info/developers/data/nyct/turnstile/turnstile_{}.txt"
    dfs = []
    for week_num in week_nums:
        file_url = url.format(week_num)
        dfs.append(pd.read_csv(file_url))
    return pd.concat(dfs)
        
week_nums = [190406, 190413, 190420, 190427]
mta = get_data(week_nums)

mta.columns = [column.strip() for column in mta.columns]

mta.DATE.value_counts().sort_index()

mta.head()

Unnamed: 0,C/A,UNIT,SCP,STATION,LINENAME,DIVISION,DATE,TIME,DESC,ENTRIES,EXITS
0,A002,R051,02-00-00,59 ST,NQR456W,BMT,03/30/2019,00:00:00,REGULAR,6999064,2373568
1,A002,R051,02-00-00,59 ST,NQR456W,BMT,03/30/2019,04:00:00,REGULAR,6999084,2373576
2,A002,R051,02-00-00,59 ST,NQR456W,BMT,03/30/2019,08:00:00,REGULAR,6999107,2373622
3,A002,R051,02-00-00,59 ST,NQR456W,BMT,03/30/2019,12:00:00,REGULAR,6999214,2373710
4,A002,R051,02-00-00,59 ST,NQR456W,BMT,03/30/2019,16:00:00,REGULAR,6999451,2373781


Source for station_locations:
stat_loc = https://data.ny.gov/Transportation/NYC-Transit-Subway-Entrance-And-Exit-Data/i9wp-a4ja

In [2]:
station_locations = pd.read_csv('NYC_Transit_Subway_Entrance_And_Exit_Data.csv')

In [3]:
station_locations.shape

(1868, 32)

In [4]:
station_locations.head()

Unnamed: 0,Division,Line,Station Name,Station Latitude,Station Longitude,Route1,Route2,Route3,Route4,Route5,...,ADA,ADA Notes,Free Crossover,North South Street,East West Street,Corner,Entrance Latitude,Entrance Longitude,Station Location,Entrance Location
0,BMT,4 Avenue,25th St,40.660397,-73.998091,R,,,,,...,False,,False,4th Ave,25th St,SW,40.660489,-73.99822,"(40.660397, -73.998091)","(40.660489, -73.99822)"
1,BMT,4 Avenue,25th St,40.660397,-73.998091,R,,,,,...,False,,False,4th Ave,25th St,SE,40.660323,-73.997952,"(40.660397, -73.998091)","(40.660323, -73.997952)"
2,BMT,4 Avenue,36th St,40.655144,-74.003549,N,R,,,,...,False,,True,4th Ave,36th St,NW,40.654676,-74.004306,"(40.655144, -74.003549)","(40.654676, -74.004306)"
3,BMT,4 Avenue,36th St,40.655144,-74.003549,N,R,,,,...,False,,True,4th Ave,36th St,NE,40.654365,-74.004113,"(40.655144, -74.003549)","(40.654365, -74.004113)"
4,BMT,4 Avenue,36th St,40.655144,-74.003549,N,R,,,,...,False,,True,4th Ave,36th St,NW,40.65449,-74.004499,"(40.655144, -74.003549)","(40.65449, -74.004499)"


In [5]:
station_locations.columns

Index(['Division', 'Line', 'Station Name', 'Station Latitude',
       'Station Longitude', 'Route1', 'Route2', 'Route3', 'Route4', 'Route5',
       'Route6', 'Route7', 'Route8', 'Route9', 'Route10', 'Route11',
       'Entrance Type', 'Entry', 'Exit Only', 'Vending', 'Staffing',
       'Staff Hours', 'ADA', 'ADA Notes', 'Free Crossover',
       'North South Street', 'East West Street', 'Corner', 'Entrance Latitude',
       'Entrance Longitude', 'Station Location', 'Entrance Location'],
      dtype='object')

In [6]:
station_locations.dtypes

Division               object
Line                   object
Station Name           object
Station Latitude      float64
Station Longitude     float64
Route1                 object
Route2                 object
Route3                 object
Route4                 object
Route5                 object
Route6                 object
Route7                 object
Route8                float64
Route9                float64
Route10               float64
Route11               float64
Entrance Type          object
Entry                  object
Exit Only              object
Vending                object
Staffing               object
Staff Hours            object
ADA                      bool
ADA Notes              object
Free Crossover           bool
North South Street     object
East West Street       object
Corner                 object
Entrance Latitude     float64
Entrance Longitude    float64
Station Location       object
Entrance Location      object
dtype: object

In [7]:
import math

In [8]:
from math import sin, cos, sqrt, atan2, radians
R = 6373 # Approximate radius of Earth in km

Source for latitude and longitude of schools:
https://latitude.to/articles-by-country/us/united-states/

In [9]:
station_locations = station_locations.drop(['Route1', 'Route2', 'Route3', 'Route4', 'Route5',
       'Route6', 'Route7', 'Route8', 'Route9', 'Route10', 'Route11',
       'Entrance Type', 'Entry', 'Exit Only', 'Vending', 'Staffing',
       'Staff Hours', 'ADA', 'ADA Notes', 'Free Crossover',
       'North South Street', 'East West Street', 'Corner', 'Entrance Latitude',
       'Entrance Longitude'], axis=1, errors='ignore')

In [10]:
station_locations.dtypes

Division              object
Line                  object
Station Name          object
Station Latitude     float64
Station Longitude    float64
Station Location      object
Entrance Location     object
dtype: object

# Distance from each Station to Major Universities:

In [11]:
station_locations.dropna(subset=['Station Latitude'], axis=0, inplace=True)

In [12]:
station_locations.dropna(subset=['Station Longitude'], axis=0, inplace=True)

In [13]:
station_locations['Columbia Latitude'] = 40.804496782
station_locations['Columbia Longitude'] = -73.957162838

station_locations['NYU Lat'] = 40.724663768
station_locations['NYU Long'] = -73.990329372

station_locations['CUNY Lat'] = 40.8197406
station_locations['CUNY Long'] = -73.9505357

station_locations['Fashion Lat'] = 40.741330368
station_locations['Fashion Long'] = -73.989829374

station_locations['Cooper Lat'] = 40.72927
station_locations['Cooper Long'] = -73.99058

In [14]:
def haversine_np(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees)

    All args must be of equal length.    

    """
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2

    c = 2 * np.arcsin(np.sqrt(a))
    km = 6367 * c
    return km


In [15]:
# Distance to Columbia University in km:
station_locations['Distance to Columbia'] = haversine_np(station_locations['Station Longitude'],
                                             station_locations['Station Latitude'],
                                             station_locations['Columbia Longitude'],
                                             station_locations['Columbia Latitude'])

In [16]:
# Distance to NYU in km: (40.724663768 -73.990329372)
station_locations['Distance to NYU'] = haversine_np(station_locations['Station Longitude'],
                                             station_locations['Station Latitude'],
                                             station_locations['NYU Long'],
                                             station_locations['NYU Lat'])

In [17]:
# Distance to CUNY City College of New York: (40.8197406 -73.9505357)
station_locations['Distance to CUNY'] = haversine_np(station_locations['Station Longitude'],
                                             station_locations['Station Latitude'],
                                             station_locations['CUNY Long'],
                                             station_locations['CUNY Lat'])

In [18]:
# Distance to Fashion Institute of Technology: (40.741330368 -73.989829374)
station_locations['Distance to Fashion'] = haversine_np(station_locations['Station Longitude'],
                                             station_locations['Station Latitude'],
                                             station_locations['Fashion Long'],
                                             station_locations['Fashion Lat'])

In [19]:
# Distance to Cooper Union for Advancement of Science and Art: (40.72927 -73.99058)
station_locations['Distance to Cooper'] = haversine_np(station_locations['Station Longitude'],
                                             station_locations['Station Latitude'],
                                             station_locations['Cooper Long'],
                                             station_locations['Cooper Lat'])

In [20]:
station_locations.head()

Unnamed: 0,Division,Line,Station Name,Station Latitude,Station Longitude,Station Location,Entrance Location,Columbia Latitude,Columbia Longitude,NYU Lat,...,CUNY Long,Fashion Lat,Fashion Long,Cooper Lat,Cooper Long,Distance to Columbia,Distance to NYU,Distance to CUNY,Distance to Fashion,Distance to Cooper
0,BMT,4 Avenue,25th St,40.660397,-73.998091,"(40.660397, -73.998091)","(40.660489, -73.99822)",40.804497,-73.957163,40.724664,...,-73.950536,40.74133,-73.989829,40.72927,-73.99058,16.379785,7.171532,18.154136,9.020621,7.679638
1,BMT,4 Avenue,25th St,40.660397,-73.998091,"(40.660397, -73.998091)","(40.660323, -73.997952)",40.804497,-73.957163,40.724664,...,-73.950536,40.74133,-73.989829,40.72927,-73.99058,16.379785,7.171532,18.154136,9.020621,7.679638
2,BMT,4 Avenue,36th St,40.655144,-74.003549,"(40.655144, -74.003549)","(40.654676, -74.004306)",40.804497,-73.957163,40.724664,...,-73.950536,40.74133,-73.989829,40.72927,-73.99058,17.050322,7.805283,18.82761,9.646968,8.309424
3,BMT,4 Avenue,36th St,40.655144,-74.003549,"(40.655144, -74.003549)","(40.654365, -74.004113)",40.804497,-73.957163,40.724664,...,-73.950536,40.74133,-73.989829,40.72927,-73.99058,17.050322,7.805283,18.82761,9.646968,8.309424
4,BMT,4 Avenue,36th St,40.655144,-74.003549,"(40.655144, -74.003549)","(40.65449, -74.004499)",40.804497,-73.957163,40.724664,...,-73.950536,40.74133,-73.989829,40.72927,-73.99058,17.050322,7.805283,18.82761,9.646968,8.309424


Which stations are within 1 km of each university?
- Walkable distance is approximately 1 km or less

In [21]:
df_columbia = station_locations[station_locations['Distance to Columbia'] <=1]
stations_near_columbia = df_columbia['Station Name'].unique()
print(stations_near_columbia)

['116th St' '125th St' 'Cathedral Parkway-110th St'
 '116th St-Columbia University' '110th St-Central Park North']


In [22]:
df_nyu = station_locations[station_locations['Distance to NYU'] <=1]
stations_near_nyu = df_nyu['Station Name'].unique()
print(stations_near_nyu)

['8th St' 'Prince St' '3rd Av' 'Bowery' 'Essex St' '2nd Av'
 'Broadway-Lafayette St' 'Delancey St' 'Grand St' 'Astor Place'
 'Bleecker St' 'Spring St']


In [23]:
df_cuny = station_locations[station_locations['Distance to CUNY'] <=1]
stations_near_cuny = df_cuny['Station Name'].unique()
print(stations_near_cuny)

['125th St' '135th St' '145th St' '137th St-City College']


In [24]:
df_fashion = station_locations[station_locations['Distance to Fashion'] <=1]
stations_near_fashion = df_fashion['Station Name'].unique()
print(stations_near_fashion)

['23rd St' '28th St' '34th St' 'Union Square' '3rd Av' '6th Av' '14th St'
 '18th St' '14th St-Union Square' '33rd St']


In [25]:
df_cooper = station_locations[station_locations['Distance to Cooper'] <=1]
stations_near_cooper = df_cooper['Station Name'].unique()
print(stations_near_cooper)

['8th St' 'Prince St' 'Union Square' '1st Av' '3rd Av' '2nd Av'
 'Broadway-Lafayette St' 'West 4th St' '14th St-Union Square'
 'Astor Place' 'Bleecker St' 'Spring St']


In [26]:
stations_near_universities = np.concatenate((stations_near_columbia, 
                                             stations_near_nyu,
                                             stations_near_cuny,
                                             stations_near_fashion,
                                             stations_near_cooper))
print(np.sort(stations_near_universities))

['110th St-Central Park North' '116th St' '116th St-Columbia University'
 '125th St' '125th St' '135th St' '137th St-City College' '145th St'
 '14th St' '14th St-Union Square' '14th St-Union Square' '18th St'
 '1st Av' '23rd St' '28th St' '2nd Av' '2nd Av' '33rd St' '34th St'
 '3rd Av' '3rd Av' '3rd Av' '6th Av' '8th St' '8th St' 'Astor Place'
 'Astor Place' 'Bleecker St' 'Bleecker St' 'Bowery'
 'Broadway-Lafayette St' 'Broadway-Lafayette St'
 'Cathedral Parkway-110th St' 'Delancey St' 'Essex St' 'Grand St'
 'Prince St' 'Prince St' 'Spring St' 'Spring St' 'Union Square'
 'Union Square' 'West 4th St']


## Number of Universities within walking distance of each station

In [27]:
stations_near_universities = pd.DataFrame(stations_near_universities)
stations_near_universities[0].value_counts()

3rd Av                          3
Astor Place                     2
8th St                          2
125th St                        2
Union Square                    2
14th St-Union Square            2
Broadway-Lafayette St           2
Bleecker St                     2
2nd Av                          2
Spring St                       2
Prince St                       2
14th St                         1
18th St                         1
116th St                        1
110th St-Central Park North     1
6th Av                          1
1st Av                          1
145th St                        1
116th St-Columbia University    1
Delancey St                     1
Grand St                        1
28th St                         1
Bowery                          1
33rd St                         1
34th St                         1
Essex St                        1
23rd St                         1
Cathedral Parkway-110th St      1
West 4th St                     1
135th St      

# Stations Near Tech Companies:

https://www.findlatitudeandlongitude.com/

In [28]:
station_locations['WeWork Latitude'] = 40.7405
station_locations['WeWork Longitude'] = -73.995938

station_locations['Google Lat'] = 40.743224
station_locations['Google Long'] = -74.007985

station_locations['Facebook Lat'] = 40.730852
station_locations['Facebook Long'] = -73.991364

station_locations['Spotify Lat'] = 40.710609
station_locations['Spotify Long'] = -74.012956

station_locations['Amazon Lat'] = 40.753496
station_locations['Amazon Long'] = -73.998658

In [29]:
# Distance to WeWork Technology in km:
station_locations['Distance to WeWork'] = haversine_np(station_locations['Station Longitude'],
                                             station_locations['Station Latitude'],
                                             station_locations['WeWork Longitude'],
                                             station_locations['WeWork Latitude'])

In [30]:
# Distance to Google in km:
station_locations['Distance to Google'] = haversine_np(station_locations['Station Longitude'],
                                             station_locations['Station Latitude'],
                                             station_locations['Google Long'],
                                             station_locations['Google Lat'])

In [31]:
# Distance to Facebook in km:
station_locations['Distance to Facebook'] = haversine_np(station_locations['Station Longitude'],
                                             station_locations['Station Latitude'],
                                             station_locations['Facebook Long'],
                                             station_locations['Facebook Lat'])

In [32]:
# Distance to Spotify in km:
station_locations['Distance to Spotify'] = haversine_np(station_locations['Station Longitude'],
                                             station_locations['Station Latitude'],
                                             station_locations['Spotify Long'],
                                             station_locations['Spotify Lat'])

In [33]:
# Distance to Amazon in km:
station_locations['Distance to Amazon'] = haversine_np(station_locations['Station Longitude'],
                                             station_locations['Station Latitude'],
                                             station_locations['Amazon Long'],
                                             station_locations['Amazon Lat'])

In [34]:
station_locations.head()

Unnamed: 0,Division,Line,Station Name,Station Latitude,Station Longitude,Station Location,Entrance Location,Columbia Latitude,Columbia Longitude,NYU Lat,...,Facebook Long,Spotify Lat,Spotify Long,Amazon Lat,Amazon Long,Distance to WeWork,Distance to Google,Distance to Facebook,Distance to Spotify,Distance to Amazon
0,BMT,4 Avenue,25th St,40.660397,-73.998091,"(40.660397, -73.998091)","(40.660489, -73.99822)",40.804497,-73.957163,40.724664,...,-73.991364,40.710609,-74.012956,40.753496,-73.998658,8.903303,9.241825,7.849808,5.718686,10.345747
1,BMT,4 Avenue,25th St,40.660397,-73.998091,"(40.660397, -73.998091)","(40.660323, -73.997952)",40.804497,-73.957163,40.724664,...,-73.991364,40.710609,-74.012956,40.753496,-73.998658,8.903303,9.241825,7.849808,5.718686,10.345747
2,BMT,4 Avenue,36th St,40.655144,-74.003549,"(40.655144, -74.003549)","(40.654676, -74.004306)",40.804497,-73.957163,40.724664,...,-73.991364,40.710609,-74.012956,40.753496,-73.998658,9.506845,9.795032,8.475472,6.214323,10.937141
3,BMT,4 Avenue,36th St,40.655144,-74.003549,"(40.655144, -74.003549)","(40.654365, -74.004113)",40.804497,-73.957163,40.724664,...,-73.991364,40.710609,-74.012956,40.753496,-73.998658,9.506845,9.795032,8.475472,6.214323,10.937141
4,BMT,4 Avenue,36th St,40.655144,-74.003549,"(40.655144, -74.003549)","(40.65449, -74.004499)",40.804497,-73.957163,40.724664,...,-73.991364,40.710609,-74.012956,40.753496,-73.998658,9.506845,9.795032,8.475472,6.214323,10.937141


Which stations are within 1 km of each large tech company?
- Walkable distance is approximately 1 km or less

In [35]:
df_wework = station_locations[station_locations['Distance to WeWork'] <=1]
stations_near_wework = df_wework['Station Name'].unique()
print(stations_near_wework)

['23rd St' '28th St' 'Union Square' '6th Av' '8th Av' '14th St'
 'West 4th St' '18th St' 'Christopher St' '14th St-Union Square']


In [36]:
df_google = station_locations[station_locations['Distance to Google'] <=1]
stations_near_google = df_google['Station Name'].unique()
print(stations_near_google)

['8th Av' '14th St' '23rd St' '18th St']


In [37]:
df_facebook = station_locations[station_locations['Distance to Facebook'] <=1]
stations_near_facebook = df_cuny['Station Name'].unique()
print(stations_near_facebook)

['125th St' '135th St' '145th St' '137th St-City College']


In [38]:
df_spotify = station_locations[station_locations['Distance to Spotify'] <=1]
stations_near_spotify = df_fashion['Station Name'].unique()
print(stations_near_spotify)

['23rd St' '28th St' '34th St' 'Union Square' '3rd Av' '6th Av' '14th St'
 '18th St' '14th St-Union Square' '33rd St']


In [39]:
df_amazon = station_locations[station_locations['Distance to Amazon'] <=1]
stations_near_amazon = df_amazon['Station Name'].unique()
print(stations_near_amazon)

['23rd St' '34th St' '42nd St' '28th St' 'Times Square'
 '34 St Hudson Yards']


In [40]:
stations_near_tech = np.concatenate((stations_near_wework, 
                                             stations_near_google,
                                             stations_near_facebook,
                                             stations_near_spotify,
                                             stations_near_amazon))
print(np.sort(stations_near_tech))

['125th St' '135th St' '137th St-City College' '145th St' '14th St'
 '14th St' '14th St' '14th St-Union Square' '14th St-Union Square'
 '18th St' '18th St' '18th St' '23rd St' '23rd St' '23rd St' '23rd St'
 '28th St' '28th St' '28th St' '33rd St' '34 St Hudson Yards' '34th St'
 '34th St' '3rd Av' '42nd St' '6th Av' '6th Av' '8th Av' '8th Av'
 'Christopher St' 'Times Square' 'Union Square' 'Union Square'
 'West 4th St']


## Number of Tech Companies within walking distance of each Station

In [41]:
stations_near_tech = pd.DataFrame(stations_near_universities)
stations_near_tech[0].value_counts()

3rd Av                          3
Astor Place                     2
8th St                          2
125th St                        2
Union Square                    2
14th St-Union Square            2
Broadway-Lafayette St           2
Bleecker St                     2
2nd Av                          2
Spring St                       2
Prince St                       2
14th St                         1
18th St                         1
116th St                        1
110th St-Central Park North     1
6th Av                          1
1st Av                          1
145th St                        1
116th St-Columbia University    1
Delancey St                     1
Grand St                        1
28th St                         1
Bowery                          1
33rd St                         1
34th St                         1
Essex St                        1
23rd St                         1
Cathedral Parkway-110th St      1
West 4th St                     1
135th St      

## Total Number of Tech Companies AND/OR Universities within walking distance of each station:

In [42]:
near_tech_or_univ = pd.concat([stations_near_universities,stations_near_tech])
near_tech_or_univ = pd.DataFrame(near_tech_or_univ)
near_tech_or_univ[0].value_counts()

3rd Av                          6
Astor Place                     4
8th St                          4
125th St                        4
Union Square                    4
14th St-Union Square            4
Broadway-Lafayette St           4
Bleecker St                     4
2nd Av                          4
Spring St                       4
Prince St                       4
14th St                         2
18th St                         2
116th St                        2
110th St-Central Park North     2
6th Av                          2
1st Av                          2
145th St                        2
116th St-Columbia University    2
Delancey St                     2
Grand St                        2
28th St                         2
Bowery                          2
33rd St                         2
34th St                         2
Essex St                        2
23rd St                         2
Cathedral Parkway-110th St      2
West 4th St                     2
135th St      