In [None]:
import pandas as pd
import os
import plotly.express as px
import plotly.io as pio

In [None]:
DATASETS_PATH = 'data/Arrival_Departure/Working'
MASTER_DATASET = 'post_covid'
MASTER_DATASET_PATH = os.path.join(DATASETS_PATH, MASTER_DATASET)
imp_routes = [ '22', '29', '15', '45', '44', '42', '17', '23', '31', '26', '111', '24', '33', '14']

In [None]:
def process_csv(path, imp_routes = [ '22', '29', '15', '45', '44', '42', '17', '23', '31', '26', '111', '24', '33', '14']):
    df = pd.read_csv(path)
    
    nan_count = df['delay'].isna().sum()
    # print(f"Number of rows with NaN: {nan_count}")
    df = df.dropna()
    
    df = df[df['route_id'].isin(imp_routes)]
    df['service_date'] = pd.to_datetime(df['service_date'])
    df['delay'] = df['delay'].clip(lower=0)
    
    return df

In [None]:
def stop_avg_delay_per_route(path, period = None, save_plots = False):
    df = process_csv(path)
    route_stop_avg_delay = df.groupby(['route_id', 'stop_id'])['delay'].mean().reset_index()
    delay_threshold = 5
    route_stop_avg_delay = route_stop_avg_delay[route_stop_avg_delay['delay'] > delay_threshold]
    color_palette = [
        "#FF5733",  # Red-Orange
        "#33FF57",  # Green
        "#3357FF",  # Blue
        "#FF33A1",  # Pink
        "#FFD733",  # Yellow
        "#FF8C00",  # Orange
        "#8A2BE2",  # Blue-Violet
        "#FF1493",  # Deep Pink
        "#00FFFF",  # Cyan
        "#800080",  # Purple
        "#C71585",  # Medium Violet Red
        "#00FF7F",  # Spring Green
        "#B22222",  # Firebrick
        "#A52A2A"   # Brown
    ]
    c = 0
    for i in route_stop_avg_delay['route_id'].unique():
        xyz = route_stop_avg_delay[route_stop_avg_delay['route_id'] == i]
        fig = px.bar(
            xyz, 
            x='stop_id', 
            y='delay', 
            color_discrete_sequence=[color_palette[c]],
            title="Greater than " + str(delay_threshold) + " min Delay Contribution per Stop at Route " + i,
            labels={'stop_id': 'Stop ID', 'delay': 'Total Delay (minutes)', 'route_id': 'Route ID'},
            barmode='stack',
            hover_data=['route_id', 'stop_id', 'delay'],
        )
        c = c + 1
        fig.update_layout(
            bargap=0.6,
            xaxis_title='Stop ID',
            yaxis_title='Average Delay (minutes)',
            xaxis_tickangle=45,
            showlegend=False, 
            height=600,  
            margin=dict(t=40, b=40, l=40, r=40),
            xaxis={'type': 'category', 'tickmode': 'array', 'tickvals': xyz['stop_id'].unique()}
        )
        fig.show()

        if save_plots:
            pio.write_image(fig, "./Plots/Stops/stop_avg_delay_route_"+i+period+".jpg", format="jpg", width=1000, height=600, scale=1.5)



# Pre - Covid

In [None]:
period = "_pre_covid"

d = "data/Arrival_Departure/Working/delay_pre_covid.csv"

stop_avg_delay_per_route(d, period, True)

# Covid

In [None]:
period = "_covid"

d = "data/Arrival_Departure/Working/delay_covid.csv"

stop_avg_delay_per_route(d, period, True)

# Post - Covid

In [None]:
period = "_post_covid"

d = "data/Arrival_Departure/Working/delay_post_covid.csv"

stop_avg_delay_per_route(d, period, True)