In [310]:
import io
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import random
import folium
import datetime
import time

from PIL import Image, ImageDraw, ImageFont
from selenium import webdriver
from shapely.geometry import MultiPoint
from folium.plugins import MarkerCluster
from branca.element import Figure

In [311]:
ROOT_DIR = os.path.dirname(os.path.abspath("top_level_file.txt"))
ROUTE_PATH = os.path.join(ROOT_DIR, "data/route_data/route_data.xlsx")
OUTPUT_PATH = os.path.join(ROOT_DIR, "output")
DELAY_PATH = os.path.join(ROOT_DIR, "data/route_data/ttc-bus-delay-data-2020_final.xlsx")

def save_fig(name):
    path = OUTPUT_PATH + "/" + name + ".png"
    plt.savefig(path)

mpl.rcParams['font.family'] = 'SF Mono'
mpl.rcParams['font.size'] = 12

In [312]:
df_shape = pd.read_excel(ROUTE_PATH, sheet_name='shapes')
df_route = pd.read_excel(ROUTE_PATH, sheet_name='routes')
df_trips = pd.read_excel(ROUTE_PATH, sheet_name='trips')

In [313]:
sheets = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
df_list = []

for sheet in sheets:
    df = pd.read_excel(DELAY_PATH, sheet_name=sheet)
    df['Month'] = sheet
    df_list.append(df)

df_2020 = pd.concat(df_list, ignore_index=True)

for i in df_2020.index:
    val = df_2020['Incident'][i]
    if "Late" in val:
        if "Mech" in val:
            df_2020['Incident'][i] = "Mechanical"
        elif "Management" in val:
            df_2020['Incident'][i] = "Management"
        elif "Operator" in val:
            df_2020['Incident'][i] = "Operator"
        elif "Vision" in val:
            df_2020['Incident'][i] = "Vision"
        else:
            df_2020['Incident'][i] = "General Delay"
    
    elif "Mech" in val:
        df_2020['Incident'][i] = "Mechanical"
    elif "Secu" in val or "Investi" in val:
        df_2020['Incident'][i] = "Security /\nInvestigation"
    elif "Road" in val:
        df_2020['Incident'][i] = "Road Block"
    elif "Opera" in val:
        df_2020['Incident'][i] = "Operator"
    elif "Utili" in val:
        df_2020['Incident'][i] = "Off Route"
    elif "Colli" in val:
        df_2020['Incident'][i] = "Collision"
    elif "Emer" in val:
        df_2020['Incident'][i] = "Emergency\nServices"
        
    if df_2020.at[i, 'Delay'] > 500:
        df_2020.at[i, 'Delay'] = 500

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2020['Incident'][i] = "Mechanical"
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2020['Incident'][i] = "Security /\nInvestigation"
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2020['Incident'][i] = "Operator"
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2020['Incident'][i] = "Mechanical"
A v

In [314]:
# shape_id -> route_id

unique_shape_ids = list(df_shape['shape_id'].unique())
route_ids = []
for uid in unique_shape_ids:
    val = df_trips[df_trips['shape_id']==uid]['route_id'].to_list()[0]
    route_ids.append(val)
    
shape_to_route = dict([(k, v) for (k, v) in zip(unique_shape_ids, route_ids)])

In [315]:
# route_id -> route_short_name

unique_route_ids = list(set(route_ids))
unique_route_ids.sort()
short_nums = []

for uid in unique_route_ids:
    val = df_route[df_route['route_id']==uid]['route_short_name'].to_list()[0]
    short_nums.append(val)
    
route_to_num = dict([(k, v) for (k, v) in zip(unique_route_ids, short_nums)])

In [316]:
df_shape['route_num'] = ""
df_shape['route_id'] = ""

for uid in unique_shape_ids:
    df_shape.loc[df_shape['shape_id']==uid, 'route_num'] = route_to_num[shape_to_route[uid]]
    df_shape.loc[df_shape['shape_id']==uid, 'route_id'] = shape_to_route[uid]

df_shape

Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled,route_num,route_id
0,886387,43.775608,-79.346046,1,0.0000,10,64815
1,886387,43.775536,-79.346361,2,0.0262,10,64815
2,886387,43.775521,-79.346449,3,0.0335,10,64815
3,886387,43.775518,-79.346575,4,0.0435,10,64815
4,886387,43.775544,-79.346697,5,0.0540,10,64815
...,...,...,...,...,...,...,...
352015,890357,43.762831,-79.404955,50,4.8289,4,65027
352016,890357,43.762279,-79.407368,51,5.0313,4,65027
352017,890357,43.762048,-79.408461,52,5.1230,4,65027
352018,890357,43.761750,-79.409808,53,5.2360,4,65027


In [331]:
df_shape_grouped = df_shape.groupby(by='route_num')
coords_dict = {}
for name, group in df_shape_grouped:
    shape_size = 0
    for route, shape in group.groupby(by='shape_id'):
        if shape.shape[0] > shape_size:
            shape_size = shape.shape[0]
            lat = shape['shape_pt_lat']
            lon = shape['shape_pt_lon']

    coords_dict[name] = [(lt, lo) for (lt, lo) in zip(lat, lon)]

exceptions = []
coords_dict = dict([(k, v) for (k, v) in coords_dict.items() if k not in exceptions])

# adding route centroids in delays df for marker cluster locations

df_2020['coords_lat'] = ""
df_2020['coords_lon'] = ""

for route, coords in coords_dict.items():
    mid = int(len(coords) / 2)
    df_2020.loc[df_2020['Route']==route, 'coords_lat'] = coords_dict[route][mid][0]
    df_2020.loc[df_2020['Route']==route, 'coords_lon'] = coords_dict[route][mid][1]

In [350]:
def color_change(val):
    if val <= 60:
        return 'green'
    elif 60 < val < 240:
        return 'yellow'
    else:
        return 'red'
    
route_issue_count = df_2020.groupby(by='Route').size()
route_issue_count = dict([(k, v) for (k, v) in zip(route_issue_count.index, route_issue_count.values)])
max_route_issue_count = max([v for (k, v) in route_issue_count.items()])
    
figure = Figure(height=600,width=800)
m = folium.Map(location=[43.7, -79.3832],tiles='cartodbdark_matter',zoom_start=11.5, min_zoom=10, max_zoom=30)
figure.add_child(m)
subway_col = {1: 'yellow', 2: "green", 3: "cyan", 4: "purple"}

for key, value in coords_dict.items():
    
    if key in [1, 2, 3, 4]:
        color = subway_col[key]
        weight = 4
        opacity = 0.3
    else:
        color = 'red' #'#{:02x}{:02x}{:02x}'.format(*map(lambda x: random.randint(0, 255), range(3)))
        try:
            weight = 8 * (route_issue_count[key] / max_route_issue_count)**2
            opacity = route_issue_count[key] / max_route_issue_count
            size = 14 * route_issue_count[key] / max_route_issue_count
        except KeyError:
            weight = 0.1
            opacity = 0.1
    
    f = folium.FeatureGroup(f"Route {key}", show=True)
    line_1 = folium.vector_layers.PolyLine(value, tooltip=f'<b>Route {key}</b>', color=color, weight=weight, opacity=opacity).add_to(f)
    f.add_to(m)
    
# marker clusters

marker_cluster = MarkerCluster(locations=None, name="Delays").add_to(m)

for lat, lon, val, inc in zip(df_2020['coords_lat'].to_list(), df_2020['coords_lon'].to_list(), df_2020['Delay'].to_list(), df_2020['Incident'].to_list()):
    if type(lat) == str or type(lon) == str or np.isnan(val):
        continue
    else:
        popup = f"{inc}, {val} mins"
        folium.CircleMarker(location=[lat, lon],
                            radius=5,
                            popup=popup,
                            fill_color=color_change(val),
                            color="grey",
                            fill_opacity=0.9).add_to(marker_cluster)

folium.LayerControl().add_to(m)

html_path = 'map.html'
m.save(html_path)

In [348]:
ttl_issues = sum([v for (k, v) in route_issue_count.items() if k in [36, 35, 52, 29, 504, 54, 60, 32, 501, 39, 133]])
ttl = sum([v for (k, v) in route_issue_count.items()])
ttl_issues / ttl

# 36 Finch West (Bus) … 30,000
# 35 Jane (Bus) … 26,500
# 52 Lawrence West (Bus) … 25,200
# 29 Dufferin (Bus) … 24,100
# 504 King (Streetcar) … 22,500
# 54 Lawrence East (Bus) … 20,900
# 60 Steeles West (Bus) … 18,400
# 32 Eglinton West (Bus) … 18,300
# 501 Queen (Streetcar) … 18,200
# 39 Finch East (Bus) … 18,000

0.22692872785740026

In [294]:
def time_plus(time, timedelta):
    start = datetime.datetime(
        2000, 1, 1,
        hour=time.hour, minute=time.minute, second=time.second)
    end = start + timedelta
    return end.time()

timeline = [time_plus(datetime.time(0, 0, 0), datetime.timedelta(hours=n/2)) for n in range(2, 50)]

In [301]:
for t in range(1, 48):
    figure = Figure(height=600,width=800)
    m2 = folium.Map(location=[43.7, -79.3832],tiles='cartodbdark_matter',zoom_start=11, min_zoom=10, max_zoom=30)
    figure.add_child(m2)
    
    df_2020c = df_2020.copy()
    
    for i in df_2020c.index:
        if type(df_2020c.at[i, 'Time']) != type(datetime.time(0, 0, 0)):
            df_2020c.at[i, 'Time'] = datetime.time(0, 0, 0)

    df_2020c = df_2020c[df_2020c['Month'].isin(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul'])]
    
    df_2020c = df_2020c[(df_2020c['Time'] >= timeline[t - 1]) & (df_2020c['Time'] < timeline[t])]

    for name, group in df_2020c.groupby(by="Route"):
        lat, lon, rt = group['coords_lat'].to_list()[0], group['coords_lon'].to_list()[0], group['Route'].to_list()[0]
        if type(lat) == str or type(lon) == str:
            continue
        else:
            
            folium.CircleMarker(location=[lat, lon],
                          radius=float(30 * route_issue_count[rt].item() / max_route_issue_count),
                          popup=f"{rt}",
                          fill=True,
                          color="#E80018",
                          fill_color="#E80018",
                          opacity=1,
                          fill_opacity=0.1).add_to(m2)

    html_path = f'output/htmlfiles/{t}.html'
    m2.save(html_path)
    
    browser = webdriver.Safari()
    path = f"file:///Users/neelgokhale/Desktop/Python%20Projects/EY%20Case%20Project/output/htmlfiles/{t}.html"
    browser.get(path)
    time.sleep(1)
    browser.save_screenshot(f'output/imgs/{t}.png')
    browser.quit()

    f = f'output/imgs/{t}.png'.encode()
    image = Image.open(f)
    draw = ImageDraw.ImageDraw(image)
    font = ImageFont.truetype('Consolas.ttf', 30)
    draw.text((40, image.height - 60), 
              f'Time: {timeline[t]}',
              fill=(255, 255, 255), 
              font=font)
    image.save(f'output/imgs/{t}.png')
