In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd



In [None]:
# Example file for testing / developing a quick understanding
filename = "MBTA-Bus-Arrival-Departure-Times_2024-12.csv"


In [None]:
df = pd.read_csv(filename, low_memory=False)

print(df.head())
print(df.shape)
print(df.describe)

In [None]:
# Calculate delay for each line
df['time_difference'] = pd.to_datetime(df['actual']) - pd.to_datetime(df['scheduled'])
df['delay_seconds'] = df['time_difference'].dt.total_seconds()

In [None]:
df['delay_headway'] = df['headway'] - df['scheduled_headway']

In [None]:
# Following findings from other file
filtered_df = df[(df['delay_seconds'] >= -1200) & (df['delay_seconds'] <= 1200)]

In [None]:
filtered_df['abs_delay'] = filtered_df['delay_seconds'].abs()

avg_delay_per_route = filtered_df.groupby('route_id')['abs_delay'].mean().reset_index()
avg_delay_per_route.rename(columns={'abs_delay': 'average_delay'}, inplace=True)

print(avg_delay_per_route)


In [None]:
target_routes = {"22", "29", "15", "45", "28", "44", "42", "17", "23", "31", "26", "111", "24", "33", "14"}


colors = ['red' if route in target_routes else 'skyblue' for route in avg_delay_per_route['route_id']]

# Plot a bar chart using the colors list
plt.figure(figsize=(12, 6))
plt.bar(avg_delay_per_route['route_id'], avg_delay_per_route['average_delay'], color=colors)
plt.xlabel('Route')
plt.ylabel('Average Absolute Delay (seconds)')
plt.title('Average Absolute Delay per Route (Highlighted Routes in Red)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()