In [108]:
import pandas as pd
import datetime as dt
import numpy as np
import pyproj

In [120]:
#construction of a dummy dataset to test bokeh

multiple =4

stations = ['FULTON ST',
 'CORTLANDT ST',
 'PATH NEW WTC',
 '34 ST-PENN STA',
 '14 ST-UNION SQ',
 'WALL ST',
 'GRD CNTRL-42 ST',
 '23 ST',
 'CANAL ST',
 'METS-WILLETS PT',
 '96 ST',
 'EXCHANGE PLACE',
 '42 ST-PORT AUTH',
 '14 ST',
 '161/YANKEE STAD',
 'LACKAWANNA',
 '125 ST',
 '5 AV/53 ST',
 'CITY / BUS',
 'HARRISON']

dummy_df = pd.DataFrame({
    'STATION' : stations,
    'PEOPLE' : [np.random.randint(0,500) for i in range(5*multiple)],
    'DAY': [np.random.randint(0,6) for i in range(5*multiple)],
    'TIME' : [np.random.randint(0,5)*4 for i in range(5*multiple)],
})


#lon, lat = pyproj.transform(project_projection, google_projection, longitude, latitude)

dummy_df.head()

Unnamed: 0,STATION,PEOPLE,DAY,TIME
0,FULTON ST,161,0,12
1,CORTLANDT ST,393,2,8
2,PATH NEW WTC,107,5,16
3,34 ST-PENN STA,221,5,12
4,14 ST-UNION SQ,412,5,4


In [110]:
#we're only concerned with the busiest subway stations.
#subway_station_coords.csv is a local file linking station names to Lat, Lon coordinates
#for user friendliness, the coordinates are a single string
#we split them into lat, lon, and join to our main df with a merge

coords = pd.read_csv('subway_station_coords.csv')
lats = []
lons = []

print(coords.COORDS[0])

for item in coords.COORDS:
    lats.append(float(item.split(',')[0]))
    lons.append(float(item.split(',')[1]))

coords['LAT'] = lats
coords['LON'] = lons
coords.drop(columns =['COORDS'], inplace=True)
coords.head()

df = pd.merge(dummy_df, coords, on='STATION')
df.head()

40.709488, -74.008353


Unnamed: 0,STATION,PEOPLE,DAY,TIME,LAT,LON
0,FULTON ST,282,1,8,40.709488,-74.008353
1,CORTLANDT ST,321,5,8,40.711261,-74.010786
2,PATH NEW WTC,467,5,16,40.712857,-74.009877
3,34 ST-PENN STA,120,4,12,40.751308,-73.990235
4,14 ST-UNION SQ,220,0,16,40.735039,-73.990763


In [115]:
#coordinate transformation from lat, long to Google Maps Web Mercator
#we define a function using prproj, apply it to the df, split the
#resulting tuple, and drop the unnecessary columns

project_projection = pyproj.Proj("+init=EPSG:4326")  # wgs84
google_projection = pyproj.Proj("+init=EPSG:3857")  # default google projection

def coord_transform(row):
    x, y = pyproj.transform(project_projection, google_projection, row['LON'], row['LAT'])
    return x, y

df['COORD'] = df.apply(coord_transform, axis=1)
df[['X_COORD', 'Y_COORD']] = df['COORD'].apply(pd.Series)
df.drop(columns=['LAT', 'LON','COORD'], inplace=True)
df.head()


Unnamed: 0,STATION,PEOPLE,DAY,TIME,X_COORD,Y_COORD
0,FULTON ST,282,1,8,-8238572.0,4969585.0
1,CORTLANDT ST,321,5,8,-8238843.0,4969846.0
2,PATH NEW WTC,467,5,16,-8238742.0,4970080.0
3,34 ST-PENN STA,120,4,12,-8236555.0,4975729.0
4,14 ST-UNION SQ,220,0,16,-8236614.0,4973338.0


In [117]:
#not a fan of the default bokeh palettes, so we make our own

from matplotlib.colors import LinearSegmentedColormap
from matplotlib.colors import rgb2hex

cm = LinearSegmentedColormap.from_list('colors', ['#fdd49e','#870000'], N=100)
pal = []

for i in range(100):
    pal.append(rgb2hex(cm(i)))

In [118]:
#I was asked in I could scale the points
#np.interpolate makes this easy, if we get the highs and lows for the range

high = df.PEOPLE.max()
low = df.PEOPLE.min()

df['SIZE'] = np.interp(df.PEOPLE, [low, high], [5, 25])

In [119]:
from bokeh.plotting import figure, show, output_file, reset_output
from bokeh.tile_providers import CARTODBPOSITRON
from bokeh.models import ColumnDataSource, ColorBar, HoverTool, LinearColorMapper
from bokeh.palettes import *

reset_output()

output_file("tile.html")

mapper = LinearColorMapper(palette=pal, low=0, high=500)

data = ColumnDataSource(df)
hover_tool = HoverTool(tooltips =[
    ('Station', '@STATION'),
    ('People', '@PEOPLE')
])

ny_longitude = -73.912074
ny_latitude = 40.775149

ny_lon, ny_lat = pyproj.transform(project_projection, google_projection, ny_longitude, ny_latitude)

print(ny_lon, ny_lat)

# range bounds supplied in web mercator coordinates
p = figure(x_range=(ny_lon-20000, ny_lon+20000), y_range=(ny_lat-10000, ny_lat+10000),
           x_axis_type="mercator", y_axis_type="mercator",
           plot_width=1200, plot_height=600)
p.circle(source=data, x='X_COORD', y='Y_COORD', legend='STATION', size='SIZE',
         color={'field':'PEOPLE', 'transform': mapper}, alpha=0.8,
        line_color="black")
p.add_tile(CARTODBPOSITRON)
p.add_tools(hover_tool)
color_bar = ColorBar(color_mapper=mapper, width=20,  location=(0,0), title='People')
p.add_layout(color_bar, 'right')
#p.xaxis.ticker = np.arange(-74.0, -73.7, 0.05)
#p.yaxis.ticker = np.arange(40.7, 40.9, 0.05)

show(p)

-8227854.441154755 4979232.550263305


In [55]:
OrRd[9][2:7][::-1]

['#fdd49e', '#fdbb84', '#fc8d59', '#ef6548', '#d7301f']

In [67]:
OrRd[9][2:7][::-1]

['#fdd49e', '#fdbb84', '#fc8d59', '#ef6548', '#d7301f']