In [163]:
import pandas as pd
import datetime as dt
import numpy as np
import pyproj

In [164]:
#we're only concerned with the busiest subway stations.
#subway_station_coords.csv is a local file linking station names to Lat, Lon coordinates
#for user friendliness when assembling the list, the coordinates are a single string
#we split them into lat, lon, and join to our main df with a merge

coords = pd.read_csv('subway_station_coords.csv')
lats = []
lons = []

for item in coords.COORDS:
    lats.append(float(item.split(',')[0]))
    lons.append(float(item.split(',')[1]))
    
coords['LAT'] = lats
coords['LON'] = lons
coords.drop(columns =['COORDS'], inplace=True)
coords.head()

#coordinate transformation from lat, long to Google Maps Web Mercator
#we define a function using prproj, apply it to the df, split the
#resulting tuple, and drop the unnecessary columns

project_projection = pyproj.Proj("+init=EPSG:4326")  # wgs84
google_projection = pyproj.Proj("+init=EPSG:3857")  # default google projection

def coord_transform(row):
    x, y = pyproj.transform(project_projection, google_projection, row['LON'], row['LAT'])
    return x, y
coords['COORD'] = coords.apply(coord_transform, axis=1)

#the function above returns a tuple. There is an elegant way to
#split the tuple into two columns, but this works
coords[['X_COORD', 'Y_COORD']] = coords['COORD'].apply(pd.Series)
coords.drop(columns=['LAT', 'LON','COORD'], inplace=True)
coords.head()

#while we're here, we also define the center of Manhattan to map from
ny_longitude = -73.912074
ny_latitude = 40.775149
ny_lon, ny_lat = pyproj.transform(project_projection, google_projection, ny_longitude, ny_latitude)

In [165]:
#import out Top20 data from the cleaning phase, and check the max and min

df = pd.read_csv('Top20_Benson.csv')
df = df.merge(coords, on='STATION')
df.head()
print('Max People: %.0f \nMin People: %.0f' % (df.PEOPLE.max(), df.PEOPLE.min()))

Max People: 103838 
Min People: 1250


In [166]:
#not a fan of the default bokeh palettes, so we make our own

from matplotlib.colors import LinearSegmentedColormap
from matplotlib.colors import rgb2hex

cm = LinearSegmentedColormap.from_list('colors', ['#fdd49e','#870000'], N=100)
pal = []

for i in range(100):
    pal.append(rgb2hex(cm(i)))

In [167]:
#I was asked if I could scale the points by number of people
#np.interpolate makes this easy, if we get the highs and lows for the range

high = df.PEOPLE.max()
low = df.PEOPLE.min()

df['SIZE'] = np.interp(df.PEOPLE, [low, high], [5, 25])

In [172]:
#and now for the MAGIC!
from bokeh.plotting import figure, show, output_file, reset_output
from bokeh.tile_providers import CARTODBPOSITRON
from bokeh.models import ColumnDataSource, ColorBar, HoverTool, LinearColorMapper
from bokeh.palettes import *

#changing this mask and title make it easy to make new plots
mask = (df['WEEKDAY']==0)&(df['HOUR']==12)
title = 'Weekday 0800-1200'

#a little housekeeping
reset_output()
output_file(title + " Map.html")

#associate the data with the plotting functions
mapper = LinearColorMapper(palette=pal, low=low, high=high)
data = ColumnDataSource(df[mask])

#add a hover tool for
hover_tool = HoverTool(tooltips =[
    ('Station', '@STATION'),
    ('People', '@PEOPLE')
])

# and plot! center the figure in Manhattan, set the axis to mercator coordinates
p = figure(x_range=(ny_lon-20000, ny_lon+10000), y_range=(ny_lat-13000, ny_lat+7000),
           x_axis_type="mercator", y_axis_type="mercator",
           plot_width=900, plot_height=600, title=title)
# add a circle for each station, sized and colored by the number of people
p.circle(source=data, x='X_COORD', y='Y_COORD', size='SIZE',
         color={'field':'PEOPLE', 'transform': mapper}, alpha=0.8, line_color="black",
        legend=False)
#add open source geotiles
p.add_tile(CARTODBPOSITRON)
#hovertool, colorbar, and title aesthetics
p.add_tools(hover_tool)
color_bar = ColorBar(color_mapper=mapper, width=20,  location=(0,0), title='People')
p.add_layout(color_bar, 'right')
p.title.text_font_size = "36px"

show(p)