# MTA Interactive Map of MetroCard Usage by Type

This map shows the usage rates of different types of MetroCards (i.e. Full Fare, 7-Day Unlimited, 30-Day Unlimited) at every station, which are represented by the dots. For every station, if you hover over the dot, you can see information about the percentage of swipes at the station for each MetroCard type. The color of the dot represents the percentile rank of each station in terms of usage (see the legend on the right of the map). The darker the color, the more a station registers swipes for that type of MetroCard (as a percentage). There is also a line graph that represents overall percent of total swipes for all stations over time. 

Due to the limitaions of Bokeh within a Jupyter Notebook, some tables and graphs were unable to be shown here. For the full experience, please follow the instructions below on how to run the interactive map as a Bokeh server. Below is a screenshot of the full interactive map:

Otherwise, please scroll down to the bottom for the interactive map.

<img src='screenshot.png'>

## Bokeh Server Instructions
To run the script, please install bokeh using either of the following commands:
<br><i>conda install -c bokeh/channel/dev bokeh</i>
<br>or
<br><i>pip install --pre -i https://pypi.anaconda.org/bokeh/channel/dev/simple bokeh --extra-index-url https://pypi.python.org/simple/</i>

Then, you can use the following command to open the interactive map (it will open in a browser tab):
<br><i>bokeh serve --show interactive_map.py</i>

In [1]:
from bokeh.models import ColumnDataSource, DatetimeTickFormatter, PrintfTickFormatter, Range1d
from bokeh.models import HoverTool, PanTool, WheelZoomTool, ColorBar
from bokeh.models.mappers import LinearColorMapper
from bokeh.models.tickers import FixedTicker
from bokeh.plotting import figure
from bokeh.tile_providers import WMTSTileSource
from bokeh.palettes import YlOrRd8, viridis

import pandas as pd
import numpy as np
from datetime import date
from bokeh.io import output_notebook, push_notebook, show
from ipywidgets import interact
output_notebook()

In [2]:
### Function to convert lat/long coordinates to web Mercator coordinates
## Input: DataFrame, Latitude Column Name, Longitude Column Name
# Credit to jbednar's example notebook on plotting Uber data:
# https://anaconda.org/jbednar/uber/notebook
def lonlat_to_meters(df, lat_name, lon_name):
    lat = df[lat_name]
    lon = df[lon_name]
    origin_shift = 2 * np.pi * 6378137 / 2.0
    df['x'] = lon * origin_shift / 180.0
    df['y'] = np.log(np.tan((90 + lat) * np.pi / 360.0)) / (np.pi / 180.0)
    df['y'] = df['y'] * origin_shift / 180.0
    
### Function that calculates percentage of total swipes given a dictionary of dataframes
## Input: Dictionary of DataFrames
## Returns: Arrays containing percentages for each type of metrocard
def overall_percents(df_dict):
    ff_percents, sevenD_percents, thirtyD_percents = [], [], []

    for df in df_dict.values():
        total_swipes = df['FF'].sum() + df['7D_UNL'].sum() + df['30D_UNL'].sum()
        ff_swipes = (df['FF'].sum() / total_swipes) * 100
        sevenD_swipes = (df['7D_UNL'].sum() / total_swipes) * 100
        thirtyD_swipes = (df['30D_UNL'].sum() / total_swipes) * 100
        ff_percents.append(ff_swipes)
        sevenD_percents.append(sevenD_swipes)
        thirtyD_percents.append(thirtyD_swipes)

    return ff_percents, sevenD_percents, thirtyD_percents

### Function to read in data and convert lat/long to web mercator given a list of csv's
## Input: List of csv filenames
## Returns: Dictionary of csv files
def process_data(csv_list):
    df_dict = {}

    for i in range(len(csv_list)):
        df = pd.read_csv(csv_list[i])
        lonlat_to_meters(df, 'LATITUDE', 'LONGITUDE') # Coordinate conversion
        df_dict[i] = df

    return df_dict

### Documentation
def callback_change_dataframe(Period, Card, Line):
    # New datasources
    global df_dict
    card_type_dict = {'Full Fare' : 'FF_PCT', '7-Day Unlimited' : '7D_UNL_PCT', '30-Day Unlimited' : '30D_UNL_PCT'}
    df_index = {'Period 1' : 0, 'Period 2' : 1, 'Period 3' : 2, 'Period 4' : 3}
    df = df_dict[df_index[Period]] # Update dataframe source
    card_type = card_type_dict[Card] # Get card type

    if (Line != 'ALL'):
        df = df[df.LINES.str.contains(Line)]

    # Update Map
    _, bins = pd.qcut(df[card_type], 5, retbins=True)
    
    for i in range(len(colors)):
        new_data = dict()
        mask = df[(df[card_type] > bins[i]) & (df[card_type] < bins[i + 1])]
        cds = ColumnDataSource.from_df(mask)
        sources[i].data = cds
    
    push_notebook()

In [3]:
### MAIN
# Read in data
csv_list = ['pricehike1_final.csv', 'pricehike2_final.csv', 'pricehike3_final.csv', 'pricehike4_final.csv']
df_dict = process_data(csv_list)
df = df_dict[0] # Use for initial   
card_type_dict = {0 : 'FF_PCT', 1 : '7D_UNL_PCT', 2 : '30D_UNL_PCT'}

### PLOT SETTINGS
NYC = x_range, y_range = ((df.x.min(), df.x.max()), (df.y.min(), df.y.max()))
plot_width, plot_height = int(750), int(600)

### BASEMAP URL
url = 'http://a.basemaps.cartocdn.com/dark_all/{Z}/{X}/{Y}.png'
attribution = "Map tiles by Carto, under CC BY 3.0. Data by OpenStreetMap, under ODbL"

### TOOL SETTINGS
hover = HoverTool(
    tooltips=[
        ('STN', '@STATION'),
        ('LINES', '@LINES'),
        ('Full Fare %', '@FF_PCT'),
        ('7D UNL %', '@7D_UNL_PCT'),
        ('30D UNL %', '@30D_UNL_PCT')
    ])
tools = [PanTool(), WheelZoomTool(), hover]

# Create map with above settings
fig = figure(tools=tools, toolbar_location='left', x_range=x_range, y_range=y_range,
                plot_width=plot_width, plot_height=plot_height)
fig.add_tile(WMTSTileSource(url=url, attribution=attribution))
fig.axis.visible = False
fig.xgrid.grid_line_color = None
fig.ygrid.grid_line_color = None

# Create initial plot
_, bins = pd.qcut(df['FF_PCT'], 5, retbins=True)
colors = YlOrRd8[0:5][::-1]
sources = {}

for i in range(len(colors)):
    mask = df[(df['FF_PCT'] > bins[i]) & (df['FF_PCT'] < bins[i + 1])]
    cds = ColumnDataSource(mask)
    sources[i] = cds
    fig.circle('x', 'y', line_color=None, fill_color=colors[i], size=5, source=sources[i])

# ColorBar Legend
color_mapper = LinearColorMapper(palette=colors, low=0, high=100)
color_bar = ColorBar(color_mapper=color_mapper, ticker=FixedTicker(ticks=[0, 20, 40, 60, 80, 100]),
                        label_standoff=12, border_line_color=None, location=(0,0), title='Percentile')
fig.add_layout(color_bar, 'right')

In [4]:
## Plot a line chart representing percentage of total swipes over time
ff_percents, sevenD_percents, thirtyD_percents = overall_percents(df_dict)
line_colors = viridis(11)[::-1]
line_chart = figure(plot_width=400, plot_height=400, x_axis_type='datetime', toolbar_location=None, title='% of Total Swipes',
          x_axis_label='Year')
x = [date(2010, 12, 29), date(2013, 3, 2), date(2015, 3, 21), date(2016, 12, 31)]
line_chart.line(x, ff_percents, line_width=5, line_color=line_colors[0], legend='Full Fare')
line_chart.line(x, sevenD_percents, line_width=5, line_color=line_colors[3], legend='7D UNL')
line_chart.line(x, thirtyD_percents, line_width=5, line_color=line_colors[6], legend='30D UNL')
line_chart.circle(x, ff_percents, fill_color='white', size=7)
line_chart.circle(x, sevenD_percents, fill_color='white', size=7)
line_chart.circle(x, thirtyD_percents, fill_color='white', size=7)
line_chart.xaxis.formatter = DatetimeTickFormatter(years='%Y')
line_chart.yaxis.formatter = PrintfTickFormatter(format='%f%%')
line_chart.xaxis.major_label_orientation = 3.14/4
line_chart.set(y_range=Range1d(0, 75))

In [5]:
show(line_chart)

In [6]:
show(fig, notebook_handle=True)

## Instructions
Please use the wheel zoom feature for a closer look. Below are widgets that you can use to interact with the map. The first widget controls the time period (legend shown below), the second filters the type of metrocard and the last filters by subway line.

In [7]:
interact(callback_change_dataframe, Period=['Period 1', 'Period 2', 'Period 3', 'Period 4'],
        Card=['Full Fare', '7-Day Unlimited', '30-Day Unlimited'], 
        Line=['ALL', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'J', 'L', 'M', 'N', 'Q', 'R', 'S', 'Z', '1', '2', '3', '4', '5', '6', '7'])

## Legend for Price Periods
Period 1: Before December 30, 2010 Price Hike - \$2.25 base fare

Period 2: Before March 3, 2013 Price Hike - \$2.25 base fare, $2.50 SingleRide MetroCard ticket fare

Period 3: Before March 22, 2015 Price Hike - \$2.50 base fare, $2.75 SingleRide MetroCard ticket fare

Period 4: After March 22, 2015 Price Hike (up to Oct 16, 2016) - \$2.75 base fare, $3.00 SingleRide MetroCard ticket fare