In [1]:
import pandas as pd
import numpy as np

from bokeh.plotting import show, figure
from bokeh.models import ColumnDataSource, HoverTool, MultiLine
from bokeh.palettes import viridis
from bokeh.io import output_notebook

output_notebook()

In [2]:
#read data into dataframe
df = pd.read_csv('D:/Data/Combined.csv')

df['TimeStamp'] =  pd.to_datetime(df['TimeStamp'], format='%Y%m%d %H:%M')
df['ActivityType'] = df['ActivityType'].str.replace('Pseudo', '')

df = df.groupby([pd.Grouper(key='TimeStamp', freq='D'), 'RouteCode', 'TripDepartureTime', 'Ordinal', 'ActivityType'])['CardNumber'].count()
df = df.unstack(['TripDepartureTime']).fillna(0)
df = df.stack().unstack(['RouteCode']).fillna(0)
df = df.stack().unstack(['Ordinal']).fillna(0).sort_index()


In [3]:
#Split Boarding and alighting into two dataframes for subtracting to get onboard volume
df1 = df[df.index.get_level_values('ActivityType').isin(['Boarding'])].reset_index().drop(
    columns=['ActivityType']).set_index(['RouteCode', 'TripDepartureTime', 'TimeStamp'])
df2 = df[df.index.get_level_values('ActivityType').isin(['Alighting'])].reset_index().drop(
    columns=['ActivityType']).set_index(['RouteCode', 'TripDepartureTime', 'TimeStamp'])


In [4]:
#Subtract dataframes and calculate the cumulative sum
df3 = df1.subtract(df2, fill_value=0).cumsum(axis='columns')#.reset_index()

df3 = df3.stack().reset_index()

df3['TripDepartureTime'] =  pd.to_datetime(df3['TripDepartureTime'], format='%H:%M')
df3 = df3.groupby(['TimeStamp','RouteCode','Ordinal', pd.Grouper(key='TripDepartureTime', freq='900s')]).sum()
df3 = df3.unstack(['Ordinal'])
df3 = df3.droplevel(0, axis=1).reset_index()

df3 = df3.set_index(['TimeStamp', 'RouteCode', 'TripDepartureTime'])
df3 = df3.stack().unstack(['TripDepartureTime']).fillna(0).sort_index()
df3 = df3.rolling(4, axis=1).sum()
df3 = df3.stack().unstack(['Ordinal']).fillna(0).sort_index()
df3 = df3.groupby(['RouteCode', 'TripDepartureTime']).quantile(q=0.8).round()

In [5]:
#filter dataframe by route
df4 = df3.loc["T01R(2018-10-27)"]
df4 = df4.loc[(df4.sum(axis=1) != 0), (df4.sum(axis=0) != 0)]

#reshape dataframe for graph
columns = df4.columns.tolist()
df4['xt']= df4.values.tolist()
df4['x'] = [columns for x in range(len(df4.index))]
df4 = df4[['x', 'xt']]
df4['color'] = viridis(len(df4.index))
df4 = df4.reset_index()
df4['TripDepartureTime'] = df4['TripDepartureTime'].dt.strftime('%H:%M')
#df4.to_csv('D:/Data/Combined2.csv')

In [6]:
df3.stack().reset_index().groupby('RouteCode')[0].max().reset_index().to_csv('D:/Data/maxvol.csv')

In [7]:

source = ColumnDataSource(df4)

p = figure(plot_height=400)

renderer = p.multi_line(xs='x', ys='xt',
                        line_width=2, line_color='color', line_alpha=0.5,
                        hover_line_color='color', hover_line_alpha=1.0,
                        source=source)

selected_line = MultiLine(line_width=4, line_color='black', line_alpha=1)
nonselected_line = MultiLine(line_width=2, line_color='color', line_alpha=0.5)

renderer.selection_glyph = selected_line
renderer.nonselection_glyph = nonselected_line

p.add_tools(HoverTool(show_arrow=False, line_policy='next', tooltips=[
    ('Trip Departure Time', '@TripDepartureTime'),
    ("(x,y)", "($x, $y)")
]))

show(p)


In [199]:
df.to_csv('D:/Data/all.csv')