In [24]:
import pandas as pd
from tqdm import tqdm
from datetime import datetime
from datetime import date
import altair as alt
from altair import datum

from statistics import *

file = pd.read_csv("merged_stop_data.csv", dtype = str)
file = file[file["Bus"] != "0"]
file = file[file["Time"].notna()]
file = file[file["Date"].notna()]
file = file[file["Route"].isin(["Gold", "Silver", "Green"])]

In [25]:
def dataset_filter(dataset, start_stop, destination_stop, flag):
    file = dataset[dataset["Bus"] != "0"]
    file = file[file["Time"].notna()]
    file = file[file["Date"].notna()]

    if flag == 0:
        file = file[file["Route"].isin(["Gold"])]
    if flag == 1:
        file = file[file["Route"].isin(["Green"])]
    if flag == 2:
        file = file[file["Route"].isin(["Silver"])]

    file = file[file["Stop"].isin([start_stop, destination_stop])]
    return file

In [26]:
def route_filter(dataset, start_stop, destination_stop, flag):
    filter = dataset_filter(dataset, start_stop, destination_stop, flag)
    time_column = [datetime.strptime(f"{date}_{time}", "%m/%d/%Y_%H:%M:%S") for time, date in zip(filter["Time"], filter["Date"])]
    filter = filter.assign(Converted_Time=time_column)
    return filter

In [27]:
def calculate_distance(dataset, start_stop, destination_stop, hour, day, month, flag, failsafe):
    times = []
    for bus, data in dataset.groupby("Bus"):
        previous_stop = ""
        previous_time = 0
        previous_route = ""

        for current_stop, current_time, route in zip(data["Stop"], data["Converted_Time"], data["Route"]):

            correct_stop_order = current_stop == destination_stop and previous_stop == start_stop
            consistent_route = previous_route == route
            correct_hour = current_time.hour == hour
            # correct_day = current_time.weekday() == day
            correct_month = current_time.month == month

            if(correct_stop_order and consistent_route and correct_hour and correct_month and current_time.year != 2020):
                time_difference = current_time - previous_time
                times.append(round(time_difference.seconds / 60, 2))

            previous_stop = current_stop
            previous_time = current_time
            previous_route = route
    
    filtered_time = [int(t) for t in times if t < 20 and t >= 1]
    if(len(filtered_time) == 0):
        if failsafe == 0:
            return -1, []
        else:
            return 1
    # print(f"Median at hour {hour} on weekday {day + 1} in month {month} of 2021: {median(filtered_time)} minutes.")
    return median(filtered_time)

In [100]:
now = datetime.now()
first_location = "CRI Deck"
second_location = "Lot 5A"
filter = route_filter(file, first_location, second_location, 2)
dist = calculate_distance(filter, first_location, second_location, now.hour, now.weekday(), now.month, 2, 1)

hours = []
times = []

if(now.hour < 20):
    for i in range(now.hour + 1, now.hour + 6):
        twelve_condition = "PM" if i - 12 == 0 else "AM"
        time = f"{i - 12} PM" if i > 12 else f"{i} {twelve_condition}"

        if(i == now.hour):
            hours.append("Now")
        else:
            hours.append(time)
        times.append(calculate_distance(filter, first_location, second_location, i, now.weekday(), now.month, 2, 1))

In [107]:
df = pd.DataFrame({"x" : hours, "y" : times})

alt.themes.enable("vox")
chart = alt.Chart(df).encode(
    x=alt.X("x:N", title="Hour", sort=None),
    y=alt.Y("y", title="Minutes"),
    opacity=alt.value(0.5),
    color=alt.condition((datum.y > dist), alt.ColorValue("red"), alt.ColorValue("green")),
    size=alt.value(80),

).properties(
    width=500,
    title="Bus Travel Time Forcast (Next Five Hours)"
)
chart.mark_bar()

