In [1]:
import matplotlib.pyplot as plt
import plotly.express as px
import utils
import importlib
import matplotlib.dates as mdates
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import matplotlib.ticker as plticker
importlib.reload(utils)
import datetime as dt
import numpy as np
import warnings

import pandas as pd
from pandas.errors import SettingWithCopyWarning
# from pandas.core.common import SettingWithCopyWarning

warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)



* load logs
* pick trips/events
* x-axis are time
* y-axis # of passengers picked up
* text on top of line is explanation or overload bus id

In [2]:
def merge_all_indices(valid_df, all_indices):
    dt_indices = np.unique(np.asarray(all_indices))
    dt_indices = pd.to_datetime(dt_indices) 
    valid_df = valid_df.set_index('actual_time')
    a = pd.DataFrame(index=dt_indices)
    all_indices = valid_df.join(a, how='right').fillna(0)
    return all_indices

# SUBPLOTS

In [36]:
# pick a trip where mcts dispatched to but not the baseline
log_files = ['10_2022_OLD_C/no_inject_20221001_200IT_10CAP.log',
             '10_2022_OLD_C/BL/no_inject_BL_20221001_0IT_10CAP.log']
df_arr = {}
labels = ["mcts", "bl"]
for i, log_file in enumerate(log_files):
    df = utils.get_bus_df(log_file)
    dispatch_df = utils.get_dispatch_df(log_file)
    df_arr[labels[i]] = [df, dispatch_df]

In [103]:
df

Unnamed: 0,actual_time,bus_id,trip_id,scheduled_time,stop_id,got_on_bus,waiting_to_board,offs,remain,load,overload_id
0,2022-10-01 04:40:09,137,268257,2022-10-01 04:22:00,HICHICNN,2,2,0,0,2,0
2,2022-10-01 04:40:26,137,268257,2022-10-01 04:22:55,MTVCURSN,0,0,2,0,0,0
4,2022-10-01 04:40:47,137,268257,2022-10-01 04:23:25,MXIMTVIE,0,0,0,0,0,0
6,2022-10-01 04:41:32,137,268257,2022-10-01 04:24:41,MXIZELID,0,0,0,0,0,0
8,2022-10-01 04:42:13,137,268257,2022-10-01 04:25:24,BELHHINN,6,6,0,0,6,0
...,...,...,...,...,...,...,...,...,...,...,...
93877,2022-10-02 01:05:35,42,268209,2022-10-02 00:56:44,BELHHIWF,0,0,8,0,20,1
93879,2022-10-02 01:06:17,42,268209,2022-10-02 00:58:07,MXOZELID,0,0,2,0,18,1
93881,2022-10-02 01:07:01,42,268209,2022-10-02 01:00:06,MXOMTVIE,0,0,0,0,18,1
93883,2022-10-02 01:07:22,42,268209,2022-10-02 01:00:45,MTVCURNN,23,23,0,0,41,1


In [98]:
mcts_trips_dispatch = df_arr["mcts"][1].trip_id.unique()
bl_trips_dispatch = df_arr["bl"][1].trip_id.unique()

mcts_only_dispatches = np.setdiff1d(mcts_trips_dispatch, bl_trips_dispatch)
# print(mcts_only_dispatches)

np.random.seed(100)
random_trip = np.random.choice(mcts_only_dispatches)
print(random_trip)

# find trips before and after this selected trips. 
tdf = df_arr["mcts"][0]
start_time = tdf.query("trip_id == @random_trip").scheduled_time.min()
end_time = tdf.query("trip_id == @random_trip").scheduled_time.max()

time_range_m = 15
bl_plus_mcts_trip_ids = []
for i in range(len(log_files)):
    tdf = df_arr[labels[i]][1]
    tdf = tdf[(tdf['actual_time'] >= start_time - pd.Timedelta(minutes=time_range_m)) & (tdf['actual_time'] <= start_time + pd.Timedelta(minutes=time_range_m))]
    # tdf = tdf.drop_duplicates('bus_id',keep='last')
    bl_plus_mcts_trip_ids.extend(tdf.trip_id.to_list())
    print(labels[i], tdf.trip_id.to_list())
# bl_plus_mcts_trip_ids

265060
mcts ['268568', '265061', '265944', '265060', '270430', '265080', '265958']
bl ['267788', '268797', '265080', '265356', '266000']


In [100]:
dt_indices = df.actual_time.unique().tolist() + dispatch_df.actual_time.unique().tolist()

In [102]:
trip_ids = bl_plus_mcts_trip_ids

fig = make_subplots(
    rows=2, cols=1,
    horizontal_spacing = 0.05, vertical_spacing=0.1
)

for df_index in [0, 1]:
  tdf = df_arr[labels[df_index]][0]
  tdf = tdf[tdf['trip_id'].isin(trip_ids)]
  all_indices = merge_all_indices(tdf, dt_indices)

  only_valid = all_indices[all_indices['trip_id'].isin(trip_ids)]
  min_time = (only_valid.index.min() - dt.timedelta(minutes=30)).time()
  max_time = (only_valid.index.max() + dt.timedelta(minutes=30)).time()
  merged_df = all_indices.between_time(min_time, max_time)
  merged_df['zero'] = 0
  merged_df['remain'] = merged_df['remain'] * -1
  
  fig.add_trace(go.Scatter(x=merged_df.index, y=merged_df.zero, line=dict(color='black', width=1), showlegend=False), row=df_index+1, col=1)

  # marker_pattern_shape=".", marker_pattern_fillmode='replace'
  traces = [go.Bar(x=trip_df.index, y=trip_df.got_on_bus, width=25000, name="regular bus passenger served", marker_color='grey', marker_opacity=0.5, showlegend=(i==0 and df_index == 0)) \
              for i, (trip_id, trip_df) in enumerate(merged_df.query("trip_id != 0 and overload_id == 0").groupby("trip_id"))]
  fig.add_traces(traces, rows=df_index+1, cols=1)

  fig.add_vrect(x0=merged_df.index.min(), x1=merged_df.index.max(), 
            annotation_text=f"Total served:{int(tdf.got_on_bus.sum())}", 
            annotation_position="top left", annotation_textangle=0, annotation=dict(font=dict(size=20), bgcolor="rgba(255,255,255,0.8)"),
            fillcolor="green", opacity=0, line_width=0, row=df_index+1, col=1)
  
  marker_colors = ['blue', 'blue']
  traces = [go.Bar(x=trip_df.index, y=trip_df.got_on_bus, width=35000, name="overflow bus passengers served", marker_color=marker_colors[i%2], showlegend=(i==0 and df_index == 0)) \
              for i, (trip_id, trip_df) in enumerate(merged_df.query("trip_id != 0 and overload_id == 1").groupby("trip_id"))]
  fig.add_traces(traces, rows=df_index+1, cols=1)

  dispatch_df = df_arr[labels[df_index]][1]
  dispatch_df = dispatch_df[dispatch_df['trip_id'].isin(trip_ids)]
  all_indices = merge_all_indices(dispatch_df, dt_indices).query("bus_id != 0")
      
  for i, (k, v) in enumerate(all_indices.sort_index().iterrows()):
    

      fig.add_trace(go.Scatter(x=[k,k], 
                          y=[0, 0], 
                          mode='lines', 
                          line=dict(color='black', dash='dash'),
                          name='overflow dispatched', showlegend=(i==0 and df_index == 0)), row=df_index+1, col=1)
      
      trip_id = v['trip_id']
      bus_id = v['bus_id']
      fig.add_vline(x=k, line_width=2, line_dash="dash", line_color="black", row=df_index+1, col=1)
      a = merged_df.query(f"bus_id == '{bus_id}' and trip_id == '{trip_id}'")
      a_start = a.index.min()
      a_end = a.index.max()
      a_sum_served = int(a.got_on_bus.sum())
      # a_start, a_end, a_sum_served
      if i % 2 == 0:
        shift = -80
      else:
        shift = 0
      # print(a_sum_served, i, shift)
      fig.add_vrect(x0=k, x1=a_end, 
              #   annotation_text=f"Total served:{a_sum_served}", annotation_position="top left",
                annotation_text=f"{bus_id}:{a_sum_served}", annotation_position="top left", annotation_textangle=-90, annotation=dict(font=dict(size=20), yshift=shift, bgcolor="rgba(255,255,255,0.8)"),
                fillcolor="green", opacity=0, line_width=0, row=df_index+1, col=1)

# fig.update_layout(showlegend=False)
fig.update_layout(margin={"r":10,"t":10,"l":10,"b":10}, width=1400, height=600)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1,
    font=dict(size=15)
))
fig.update_yaxes(range=[-40, 40], title='passengers served')
fig.update_yaxes(range=[0, 40])

# fig.write_image("plots/timeseries_comparison.eps")
# fig.show()

## BREAKDOWN HANDLING

In [537]:
log_files = ['logs/all_day_nobreak/no_inject_20210607_BL_nobreak.log',
             'logs/all_day_break_issue/no_inject_20210607_BL.log',
             'logs/all_day_break_issue/no_inject_20210607_all_served_IT200.log']
df_arr = []
dispatch_arr = []
for log_file in log_files:
    df = utils.get_bus_df(log_file)
    # dispatch_df = utils.get_dispatch_df(log_file)
    df_arr.append(df)
    # dispatch_arr.append(dispatch_df)
    
# for trip_id in trip_ids:
#     for df in df_arr:
#         gob = df.query("trip_id == '245771'").sort_values('actual_time').got_on_bus.sum()
#         rem = df.query("trip_id == '245771'").sort_values('actual_time').remain.sum()

In [538]:
import re
def get_dispatch_df(log_file):
    # Dispatching overflow bus 44 from BNA @ stop 8ABROSN
    DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
    lines_before = []
    bus_stats = []
    with open(log_file) as file:
        for line in file:
            lines_before.append(line)
            if 'Dispatching overflow' in line:
                dispatch_info = lines_before[-3].rstrip()
                data = line.rstrip()
                in_brackets = re.findall("\[(.*?)\]", data)
                out_brackets = re.findall(r"(.*?)(?:\[.*?\]|$)", data)
                log_text = out_brackets[2].strip().split(" ")
                
                time = dt.datetime.strptime(in_brackets[1], DATETIME_FORMAT)
                overflow_bus_id = log_text[3]
                from_stop = log_text[5]
                dispatch_stop = log_text[-1]
                
                dispatch_trip = dispatch_info.split(" ")[-1].split("'")[1]
                # print(time, overflow_bus_id, from_stop, dispatch_stop)
                
                entry = {"actual_time":time,
                        "bus_id":overflow_bus_id,
                        "from_stop":from_stop,
                        "trip_id":dispatch_trip,
                        "stop_id":dispatch_stop,
                        "type":"overflow"}
                bus_stats.append(entry)
                lines_before = []
            
            if 'Sending takeover overflow bus' in line:
                dispatch_info = lines_before[-3].rstrip()
                data = line.rstrip()
                in_brackets = re.findall("\[(.*?)\]", data)
                out_brackets = re.findall(r"(.*?)(?:\[.*?\]|$)", data)
                log_text = out_brackets[2].strip().split(" ")
                
                time = dt.datetime.strptime(in_brackets[1], DATETIME_FORMAT)
                
                overflow_bus_id = dispatch_info.split(" ")[-3].split("'")[1]
                broken_bus = dispatch_info.split(" ")[-1].split("'")[1]
                
                trip_id = df.query(f"bus_id == '{broken_bus}'").iloc[-1].trip_id
                stop_id = log_text[-1]
                from_stop = log_text[-4]
                
                entry = {"actual_time":time,
                        "bus_id":overflow_bus_id,
                        "from_stop":from_stop,
                        "trip_id":trip_id,
                        "stop_id":stop_id,
                        "type":"broken"}
                bus_stats.append(entry)
                lines_before = []
    bus_stats = pd.DataFrame(bus_stats)
    bus_stats = bus_stats.drop_duplicates()
    return bus_stats

dispatch_arr = []
for log_file in log_files:
    dispatch_df = get_dispatch_df(log_file)
    dispatch_arr.append(dispatch_df)

In [515]:
# Find other trips of bus 1829
# "242142"
# "242177"
# "242177"
# "242147"

# "242181"
        
# "241652"
# "241678"
# "242151"
# "242185"
# "241656"
# "241680"
# "242155"
# "242189"
# "241660"
# "241682"
# "242159"
# "242193"
# "241664"
# "241684"
# "242163"
# "242197"
# "241668"
# "241686"
# "242167"
# "242201"
# "241670"
# "241688"
# "242171"
# "242205"
# "241672"
# "241690"
# "243471"
# "243465"
# "243516"
      
df.query("bus_id == '45' and trip_id == '242181'")

Unnamed: 0,actual_time,bus_id,trip_id,scheduled_time,stop_id,got_on_bus,waiting_to_board,offs,remain,load,overload_id
11416,2021-06-07 09:14:24,45,242181,2021-06-07 08:59:00,JEF8AVEF,0,0,0,0,1,1
11422,2021-06-07 09:14:32,45,242181,2021-06-07 09:00:02,JEF6AVEN,3,3,0,0,4,1
11432,2021-06-07 09:14:50,45,242181,2021-06-07 09:01:42,5AVJRGSN,0,0,1,0,3,1
11464,2021-06-07 09:15:13,45,242181,2021-06-07 09:03:46,5AVHARSN,2,2,0,0,5,1
11485,2021-06-07 09:15:44,45,242181,2021-06-07 09:07:00,MCC5_11,0,0,5,0,0,1


In [516]:
dispatch_arr[1].set_index('actual_time').between_time('8:00', '9:20')
# 245790 245787 245855 244871 243379

Unnamed: 0_level_0,bus_id,from_stop,trip_id,stop_id,type
actual_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-06-07 08:03:21,44,100OAKS,243264,MAYDUPSN,overflow
2021-06-07 08:05:00,43,MCC5_4,245784,UNI2AWN,overflow
2021-06-07 08:06:22,43,MCC4_24,242146,33AJOHSN,overflow
2021-06-07 08:09:52,41,MCC5_6,243518,GRAFERSN,overflow
2021-06-07 08:11:04,42,MCSHERM,243177,LEBOLDNN,overflow
2021-06-07 08:12:12,43,33AJOHNM,245281,H70ERIWN,overflow
2021-06-07 08:18:35,41,LPSCI,246302,UNI7AEN,overflow
2021-06-07 08:23:00,42,MCSHERM,244794,NXOPBODY,overflow
2021-06-07 08:28:28,41,MCC4_23,244871,MURBORNF,overflow
2021-06-07 08:29:26,45,WALMARTC,245786,1SSPRSM,overflow


In [517]:
fig = make_subplots(
    rows=3, cols=1,
    horizontal_spacing = 0.05, vertical_spacing=0.1
)
trip_ids = ["242177", "242147", "242181", "241652", "241678", "242151", "242185"]# + ["245790", "245787", "245855", "244871", "243379"]

for df_index in [0, 1, 2]:
  tdf = df_arr[df_index]
  tdf = tdf[tdf['trip_id'].isin(trip_ids)]
  dt_indices = df_arr[df_index].actual_time.unique().tolist() + dispatch_arr[df_index].actual_time.unique().tolist()
  all_indices = merge_all_indices(tdf, dt_indices)

  only_valid = all_indices[all_indices['trip_id'].isin(trip_ids)]
  min_time = (only_valid.index.min() - dt.timedelta(minutes=5)).time()
  max_time = (only_valid.index.max() - dt.timedelta(minutes=5)).time()
  merged_df = all_indices.between_time(min_time, max_time)
  merged_df = merged_df[merged_df['trip_id'].isin(trip_ids)]
  merged_df['zero'] = 0
  merged_df['remain'] = merged_df['remain'] * -1
  
  fig.add_trace(go.Scatter(x=merged_df.index, y=merged_df.zero, line=dict(color='black', width=1), showlegend=False), row=df_index+1, col=1)

  # marker_pattern_shape=".", marker_pattern_fillmode='replace'
  traces = [go.Bar(x=trip_df.index, y=trip_df.got_on_bus, width=25000, name="regular bus passenger served", marker_color='grey', marker_opacity=0.5, showlegend=(i==0 and df_index == 0)) \
              for i, (trip_id, trip_df) in enumerate(merged_df.query("trip_id != 0 and overload_id == 0").groupby("trip_id"))]
  fig.add_traces(traces, rows=df_index+1, cols=1)

  fig.add_vrect(x0=merged_df.index.min(), x1=merged_df.index.max(), 
            annotation_text=f"Total served:{int(merged_df.got_on_bus.sum())}", 
            annotation_position="top left", annotation_textangle=0, annotation=dict(font=dict(size=20), bgcolor="rgba(255,255,255,0.8)"),
            fillcolor="green", opacity=0, line_width=0, row=df_index+1, col=1)
  
  marker_colors = ['blue', 'blue']
  traces = [go.Bar(x=trip_df.index, y=trip_df.got_on_bus, width=35000, name="overflow bus passengers served", marker_color=marker_colors[i%2], showlegend=(i==0 and df_index == 0)) \
              for i, (trip_id, trip_df) in enumerate(merged_df.query("trip_id != 0 and overload_id == 1").groupby("trip_id"))]
  fig.add_traces(traces, rows=df_index+1, cols=1)

  dispatch_df = dispatch_arr[df_index]
  dispatch_df = dispatch_df[dispatch_df['trip_id'].isin(trip_ids)]
  all_indices = merge_all_indices(dispatch_df, dt_indices).query("bus_id != 0")
  display(all_indices)
  for i, (k, v) in enumerate(all_indices.sort_index().iterrows()):
      fig.add_trace(go.Scatter(x=[k,k], 
                          y=[0, 0], 
                          mode='lines', 
                          line=dict(color='black', dash='dash'),
                          name='overflow dispatched', showlegend=(i==0 and df_index == 0)), row=df_index+1, col=1)
      
      trip_id = v['trip_id']
      bus_id = v['bus_id']
      fig.add_vline(x=k, line_width=2, line_dash="dash", line_color="black", row=df_index+1, col=1)
      a = merged_df.query(f"bus_id == '{bus_id}'")
      # a = merged_df.query(f"bus_id == '{bus_id}' and trip_id == '{trip_id}'")
      a_start = a.index.min()
      a_end = a.index.max()
      a_sum_served = int(a.got_on_bus.sum())
      # a_start, a_end, a_sum_served
      if i % 2 == 0:
        shift = -80
      else:
        shift = 0
      # print(a_sum_served, i, shift)
      fig.add_vrect(x0=k, x1=a_end, 
              #   annotation_text=f"Total served:{a_sum_served}", annotation_position="top left",
                annotation_text=f"{bus_id}:{a_sum_served}", annotation_position="top left", annotation_textangle=0, annotation=dict(font=dict(size=20), yshift=shift, bgcolor="rgba(255,255,255,0.8)"),
                fillcolor="green", opacity=0, line_width=0, row=df_index+1, col=1)

  if df_index > 0:
    fig.add_vline(x='2021-06-07 09:00:00', line_width=2, line_dash="solid", line_color="red", row=df_index+1, col=1)
  
  fig.add_trace(go.Scatter(x=['2021-06-07 09:00:00', '2021-06-07 09:00:00'], 
                      y=[0, 0], 
                      mode='lines', 
                      line=dict(color='red', dash='solid'),
                      name='disruption event', showlegend=(df_index == 0)), row=df_index+1, col=1)
    
  fig.update_yaxes(range=[0, 50])

# fig.update_layout(showlegend=False)
fig.update_layout(margin={"r":10,"t":10,"l":10,"b":10}, width=1400, height=600)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1,
    font=dict(size=15)
))
# fig.update_yaxes(range=[-40, 40], title='passengers served')

# fig.write_image("plots/timeseries_comparison_breakdowns.eps")
fig.show()

Unnamed: 0,bus_id,from_stop,trip_id,stop_id,type
2021-06-07 10:29:00,42,HICHICNN,242151,JEF9AWN,overflow


Unnamed: 0,bus_id,from_stop,trip_id,stop_id,type
2021-06-07 09:04:19,44,MCC5_6,242181,JEF8AVEF,broken


Unnamed: 0,bus_id,from_stop,trip_id,stop_id,type
2021-06-07 08:27:24,42,WALMARTC,242147,MCC5_8,overflow
2021-06-07 09:12:13,45,DOMFREEN,242181,JEF8AVEF,broken


In [508]:
adf = df_arr[1].query("bus_id == '45' and actual_time > '2021-06-07 09:14:24'")
with pd.option_context('display.max_rows', 5,):
    display(adf)

Unnamed: 0,actual_time,bus_id,trip_id,scheduled_time,stop_id,got_on_bus,waiting_to_board,offs,remain,load,overload_id
10883,2021-06-07 09:15:07,45,244796,2021-06-07 09:15:07,MXOZELID,0,0,1,0,54,1
10964,2021-06-07 09:17:06,45,244796,2021-06-07 09:17:06,MXOMTVIE,0,0,0,0,54,1
...,...,...,...,...,...,...,...,...,...,...,...
42948,2021-06-08 00:31:29,45,243468,2021-06-07 23:10:25,JAM5AEN,0,0,0,0,0,1
42949,2021-06-08 00:31:48,45,243468,2021-06-07 23:12:00,MCC5_4,0,0,0,0,0,1
