In [63]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

# Selecting subway stations by geographical coordinates
It turns out that all but four of the largest 100 tech companies are located within a small slice of Manhattan, New York. The code below defines that region, which we will then use to filter for subway stations where we anticipate people will be more receptive to a women-in-tech conference. [Click here for the link to the article.](https://www.builtinnyc.com/2016/12/13/big-tech-companies-nyc-locations)  

In [5]:
#coordinates for tech companies centered around World Trade Center (smaller circle)
point1 = (40.708984, -74.0111239) #SW Corner
point2 = (40.712274, -74.007657)  #SE Corner
point3 = (40.714746, -74.009200)  #NE Corner
point4 = (40.714830, -74.016226) #NW corner

In [6]:
#coordinates that include almost all large tech companies(larger circle)
point5 = (40.762261, -73.966260) #NE corner
point6 = (40.701598, -74.012487) #SE corner
point7 = (40.767958, -73.995653) #W corner

In [7]:
def coords_max_min(points):
   '''
   Accepts a list of coordinate pairs, [(latitude, longitude)].
   Returns a tuple with the maximum and minimum longitude and lattitudes
   (max_lat, min_lat, max_longs, min_longs)
   '''

   lats, longs = [], []

   for point in points:
       lats.append(point[0])
       longs.append(point[1])

   return (max(lats), min(lats), max(longs), min(longs))

In [8]:
shorter_points = [point1, point2, point3, point4]
longer_points = [point5, point6, point7]


In [9]:
station_entrances = pd.read_csv('http://web.mta.info/developers/data/nyct/subway/StationEntrances.csv')

In [10]:
station_entrances.head()


Unnamed: 0,Division,Line,Station_Name,Station_Latitude,Station_Longitude,Route_1,Route_2,Route_3,Route_4,Route_5,...,Staffing,Staff_Hours,ADA,ADA_Notes,Free_Crossover,North_South_Street,East_West_Street,Corner,Latitude,Longitude
0,BMT,Astoria,Ditmars Blvd,40.775036,-73.912034,N,Q,,,,...,FULL,,False,,True,31st St,23rd Ave,NW,40.775149,-73.912074
1,BMT,Astoria,Ditmars Blvd,40.775036,-73.912034,N,Q,,,,...,FULL,,False,,True,31st St,23rd Ave,NE,40.77481,-73.912151
2,BMT,Astoria,Ditmars Blvd,40.775036,-73.912034,N,Q,,,,...,FULL,,False,,True,31st St,23rd Ave,NE,40.775025,-73.911891
3,BMT,Astoria,Ditmars Blvd,40.775036,-73.912034,N,Q,,,,...,FULL,,False,,True,31st St,23rd Ave,NW,40.774938,-73.912337
4,BMT,Astoria,Astoria Blvd-Hoyt Av,40.770258,-73.917843,N,Q,,,,...,FULL,,False,,True,31st St,Hoyt Ave South,SW,40.770313,-73.917978


## Getting the short-list of companies located within the higher concentration nucleus in lower-west Manhattan
Here we use the `coords_max_min` function to get the maximum and minimum latitutude and longitude points for filtering stations.

In [11]:
lat_max, lat_min, long_max, long_min = coords_max_min(shorter_points) 
#Filters out stations outside of max & min lat/long coordinates 
short_list = station_entrances[(station_entrances['Station_Latitude'] > lat_min) & (station_entrances['Station_Latitude'] < lat_max)\
            & (station_entrances['Station_Longitude'] > long_min) & (station_entrances['Station_Longitude'] < long_max)]

In [12]:
short_list.describe()

Unnamed: 0,Station_Latitude,Station_Longitude,Route_8,Route_9,Route_10,Route_11,Latitude,Longitude
count,29.0,29.0,7.0,0.0,0.0,0.0,29.0,29.0
mean,40.712262,-74.009357,5.0,,,,40.712482,-74.009238
std,0.001568,0.000925,0.0,,,,0.001693,0.000982
min,40.710197,-74.011029,5.0,,,,40.710165,-74.011198
25%,40.710668,-74.009781,5.0,,,,40.710944,-74.009891
50%,40.712582,-74.009509,5.0,,,,40.712681,-74.00919
75%,40.714111,-74.008585,5.0,,,,40.713949,-74.008428
max,40.714111,-74.007691,5.0,,,,40.715099,-74.007739


In [13]:
set(short_list['Station_Name']) #Unique stations within the concentration of tech firms around World Trade Center

{'Broadway-Nassau',
 'Chambers St',
 'Cortlandt St',
 'Fulton St',
 'Park Place',
 'World Trade Center'}

## Getting a broader swath of Manhattan
The coordinates used in this list select for a map of Manhattan.

In [14]:
lat_max, lat_min, long_max, long_min = coords_max_min(longer_points)
#Filters out all stations outside of Silicon Alley
longer_list = station_entrances[(station_entrances['Station_Latitude'] > lat_min) & (station_entrances['Station_Latitude'] < lat_max)\
            & (station_entrances['Station_Longitude'] > long_min) & (station_entrances['Station_Longitude'] < long_max)]

In [15]:
longer_list

Unnamed: 0,Division,Line,Station_Name,Station_Latitude,Station_Longitude,Route_1,Route_2,Route_3,Route_4,Route_5,...,Staffing,Staff_Hours,ADA,ADA_Notes,Free_Crossover,North_South_Street,East_West_Street,Corner,Latitude,Longitude
20,BMT,Broadway,5th Av,40.764811,-73.973347,N,Q,R,,,...,FULL,,False,,True,5th Ave,60th St,NW,40.765156,-73.972501
21,BMT,Broadway,5th Av,40.764811,-73.973347,N,Q,R,,,...,FULL,,False,,True,5th Ave,60th St,SE,40.764728,-73.972396
22,BMT,Broadway,5th Av,40.764811,-73.973347,N,Q,R,,,...,FULL,,False,,True,5th Ave,60th St,NE,40.764849,-73.972306
23,BMT,Broadway,5th Av,40.764811,-73.973347,N,Q,R,,,...,FULL,,False,,True,5th Ave,60th St,SE,40.764657,-73.972229
24,BMT,Broadway,57th St,40.764664,-73.980658,N,Q,R,,,...,FULL,,False,,True,7th Ave,57th St,SW,40.765365,-73.980341
25,BMT,Broadway,57th St,40.764664,-73.980658,N,Q,R,,,...,FULL,,False,,True,7th Ave,57th St,SE,40.765298,-73.980033
26,BMT,Broadway,57th St,40.764664,-73.980658,N,Q,R,,,...,FULL,,False,,True,7th Ave,57th St,NW,40.765796,-73.980012
27,BMT,Broadway,57th St,40.764664,-73.980658,N,Q,R,,,...,FULL,,False,,True,7th Ave,57th St,NE,40.765639,-73.979764
28,BMT,Broadway,49th St,40.759901,-73.984139,N,Q,R,,,...,FULL,,True,Northbound Only,False,7th Ave,49th St,SW,40.760486,-73.983974
29,BMT,Broadway,49th St,40.759901,-73.984139,N,Q,R,,,...,FULL,,True,Northbound Only,False,7th Ave,49th St,NW,40.760621,-73.983904


In [38]:
set(longer_list['Station_Name']) #Unique stations within entire span of Silicon Alley

{'14th St',
 '14th St-Union Square',
 '18th St',
 '1st Av',
 '23rd St',
 '28th St',
 '2nd Av',
 '33rd St',
 '34th St',
 '3rd Av',
 '42nd St',
 '47-50th Sts Rockefeller Center',
 '49th St',
 '50th St',
 '51st St',
 '57th St',
 '59th St',
 '5th Av',
 '5th Av-53rd St',
 '6th Av',
 '7th Av',
 '8th Av',
 '8th St',
 'Astor Place',
 'Bleecker St',
 'Bowery',
 'Broad St',
 'Broadway-Lafayette St',
 'Broadway-Nassau',
 'Brooklyn Bridge-City Hall',
 'Canal St',
 'Canal St (UL)',
 'Chambers St',
 'Christopher St',
 'City Hall',
 'Cortlandt St',
 'Delancey St',
 'East Broadway',
 'Essex St',
 'Franklin St',
 'Fulton St',
 'Grand Central',
 'Grand Central-42nd St',
 'Grand St',
 'Houston St',
 'Lexington Av',
 'Lexington Av-53rd St',
 'Park Place',
 'Prince St',
 'Spring St',
 'Times Square',
 'Times Square-42nd St',
 'Union Square',
 'Wall St',
 'West 4th St',
 'World Trade Center'}

In [58]:
longer_list['Station_Name'].nunique()

56

In [27]:
longer_list.groupby(['Station_Name']).size() #Size possibly worth considering for best stations to send street team

Station_Name
14th St                           26
14th St-Union Square               6
18th St                            6
1st Av                             4
23rd St                           40
28th St                           17
2nd Av                             4
33rd St                            9
34th St                           41
3rd Av                             4
42nd St                           18
47-50th Sts Rockefeller Center    17
49th St                            7
50th St                           16
51st St                            9
57th St                           16
59th St                            7
5th Av                            10
5th Av-53rd St                     5
6th Av                             2
7th Av                             4
8th Av                             2
8th St                             8
Astor Place                        2
Bleecker St                        7
Bowery                             2
Broad St                 

In [62]:
longer_list.loc[longer_list['Station_Name'] == 'Grand Central-42nd St'] #Checking if large # of rows means duplicate stations

Unnamed: 0,Division,Line,Station_Name,Station_Latitude,Station_Longitude,Route_1,Route_2,Route_3,Route_4,Route_5,...,Staffing,Staff_Hours,ADA,ADA_Notes,Free_Crossover,North_South_Street,East_West_Street,Corner,Latitude,Longitude
1655,IRT,Lexington,Grand Central-42nd St,40.751776,-73.976848,GS,4,5,6,7,...,NONE,,True,,True,Park Ave,42nd St,SE,40.751741,-73.977691
1656,IRT,Lexington,Grand Central-42nd St,40.751776,-73.976848,GS,4,5,6,7,...,NONE,,True,,True,Park Ave,42nd St,SE,40.751653,-73.977753
1657,IRT,Lexington,Grand Central-42nd St,40.751776,-73.976848,GS,4,5,6,7,...,NONE,,True,,True,Lexington Ave,42nd St,SW,40.751352,-73.976396
1658,IRT,Lexington,Grand Central-42nd St,40.751776,-73.976848,GS,4,5,6,7,...,NONE,,True,,True,Park Ave,42nd St,SE,40.751647,-73.977382
1728,IRT,Flushing,Grand Central-42nd St,40.751431,-73.976041,GS,4,5,6,7,...,NONE,,True,,True,3rd Ave,42nd St,SW,40.750851,-73.975078
1783,IRT,Lexington,Grand Central-42nd St,40.751776,-73.976848,GS,4,5,6,7,...,NONE,,True,,True,Park Ave,42nd St,SE,40.751721,-73.977197
1823,IRT,Lexington,Grand Central-42nd St,40.751776,-73.976848,GS,4,5,6,7,...,PART,11am-7pm everyday,True,,True,Lexington Ave,43rd St,SE,40.751801,-73.975294
1824,IRT,Lexington,Grand Central-42nd St,40.751776,-73.976848,GS,4,5,6,7,...,PART,11am-7pm everyday,True,,True,Lexington Ave,42nd St,NE,40.751345,-73.975409
1825,IRT,Lexington,Grand Central-42nd St,40.751776,-73.976848,GS,4,5,6,7,...,PART,11am-7pm everyday,True,,True,Lexington Ave,42nd St,NW,40.751701,-73.976087


In [20]:
longer_list.groupby(['Division','Line','Station_Name']).size() #Looking for duplicate stations within larger python Set of stations 

Division  Line              Station_Name             
BMT       Broadway          23rd St                       8
                            28th St                       4
                            34th St                       4
                            49th St                       7
                            57th St                       8
                            5th Av                        7
                            8th St                        8
                            Canal St (UL)                 7
                            City Hall                     3
                            Cortlandt St                  4
                            Lexington Av                  4
                            Prince St                     4
                            Times Square-42nd St          4
                            Union Square                  4
          Canarsie          1st Av                        4
                            3rd Av            

In [52]:
longer_list.groupby(['Station_Name', 'Line', 'Division']).size() #Checking for duplicate stations

Station_Name                    Line              Division
14th St                         6 Avenue          IND         10
                                8 Avenue          IND          8
                                Broadway-7th Ave  IRT          8
14th St-Union Square            Lexington         IRT          6
18th St                         Broadway-7th Ave  IRT          6
1st Av                          Canarsie          BMT          4
23rd St                         6 Avenue          IND          8
                                8 Avenue          IND         11
                                Broadway          BMT          8
                                Broadway-7th Ave  IRT          4
                                Lexington         IRT          9
28th St                         Broadway          BMT          4
                                Broadway-7th Ave  IRT          6
                                Lexington         IRT          7
2nd Av                         

In [45]:
short_list.groupby(['East_West_Street', 'North_South_Street', 'Station_Name']).size()

East_West_Street  North_South_Street  Station_Name      
Barclay St        Church St           World Trade Center    1
Broadway          Park Pl             Park Place            1
Chambers St       Church St           Chambers St           4
Cortlandt St      Church St           Cortlandt St          2
Dey St            Church St           Cortlandt St          2
Fulton St         Broadway            Broadway-Nassau       2
                                      Fulton St             3
John St           Broadway            Fulton St             2
Murray St         Church St           Chambers St           2
Park Pl           Church St           World Trade Center    4
Vesey St          Church St           World Trade Center    1
Warren St         Church St           Chambers St           3
dtype: int64

In [60]:
longer_list.to_pickle('longer_list.pkl')

In [None]:
del longer_list