In [None]:
from utils import *
import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'Arial'
ROOT.resolve()

Read GTFS timetable

In [128]:
agencies, routes, trips, stops, stop_times, calendar, calendar_dates, shapes, frequencies = load_full_gtfs(ROOT / "18SepGB_GTFS_Timetables_Downloaded/yorkshire", include=['shapes.txt', 'frequencies.txt'])

Read "Real" GTFS

In [129]:
real_agencies, real_routes, real_trips, real_stops, real_stop_times, real_calendar, real_calendar_dates, real_shapes = load_full_gtfs(ROOT / "real-19SepGB_GTFS_Timetables", include=['shapes.txt'])

In [None]:
agency_id = 'OP931'
bus_num = '13A'
service_id = 208 # Runs mon, tues, weds, thurs, fri.
headsign = "Middleton"
start_stop = '45010683'  #Corn ex b
end_stop = '45013651' # oakwell mount
route_id = routes[(routes.agency_id == agency_id) & (routes.route_short_name == bus_num)].route_id.values[0]
route_id

In [136]:
def journey_time_for_specific_route(trips, stop_times, route_id:str, headsign:str, start_stop, end_stop, service_id=None):
    """
    Calculate the journey times throughout the day for all trips on a given route and service.

    Parameters
    ----------
    trips: Pandas.DataFrame
        trips.txt from the GTFS file.

    stop_times: Pandas.DataFrame
        stop_times.txt from the GTFS file

    route_id: str
        The ID for this route.

    service_id: int
        Optional - Service ID for this bus. Details the days of the week and dates the service runs.
        
    headsign: str
        Display on the front of the bus. Usually where the bus finishes.

    Returns
    -------
    this_bus_journey_times: Pandas.DataFrame
        Dataframe with trip_ids, time_of_day and time_taken_minutes columns.
    """
    # Get the trips for this bus
    if service_id:
        this_bus_trips = trips.loc[(trips.route_id == route_id) & (trips.service_id == service_id) & (trips.trip_headsign == headsign)]
    else:
        this_bus_trips = trips.loc[(trips.route_id == route_id) & (trips.trip_headsign == headsign)]
    # Get a list of unique trip_ids
    this_bus_unique_trips_list = this_bus_trips.trip_id.unique()

    # Get the stop_times for this bus
    journey_stops = list((start_stop, end_stop))

    
    # Get the start and end stop
    # start_stop = this_bus_stop_times[this_bus_stop_times.stop_sequence == this_bus_stop_times.stop_sequence.min()]['stop_id'].values[0]
    # end_stop = this_bus_stop_times[this_bus_stop_times.stop_sequence == this_bus_stop_times.stop_sequence.max()]['stop_id'].values[0]
    # print(start_stop, end_stop)
    this_bus_stop_times = stop_times.loc[(stop_times.trip_id.isin(this_bus_unique_trips_list))].copy() #@TODO replace the indices here with variables fro stop_ids.
    # Read the arrival time as a pandas datetime
    this_bus_stop_times['arrival_time'] = pd.to_datetime(this_bus_stop_times.arrival_time, format='%H:%M:%S')

    # Filter the trips where there is only one stop out of the two. Prevents 0min journey times.
    this_bus_stop_times = this_bus_stop_times.groupby('trip_id').filter(lambda x: len(x) > 1)

    # Group by trip and calculate the time taken from the first to last stops. 
    # Then get the time of day using the start time of the journey.
    this_bus_journey_times = this_bus_stop_times.groupby('trip_id')['arrival_time'].agg(
        time_of_day=lambda x: x.min().time(),  # Get the first stop time as "time_of_day"
        time_taken_minutes=lambda x: (x.max() - x.min()).total_seconds() / 60  # Time taken between first and last stop
    )
    # Convert 'time_of_day' (datetime.time) to seconds since midnight for plotting
    this_bus_journey_times['time_of_day_seconds'] = this_bus_journey_times['time_of_day'].apply(lambda x: x.hour * 3600 + x.minute * 60 + x.second)
    
    this_bus_journey_times.sort_values(by='time_of_day', inplace=True)

    return this_bus_journey_times, this_bus_stop_times

In [None]:
timetabled, tt_times = journey_time_for_specific_route(trips, stop_times, route_id, headsign, start_stop, end_stop, service_id)
real, real_times = journey_time_for_specific_route(real_trips, real_stop_times, route_id, headsign, start_stop, end_stop, service_id)
real

In [None]:
fig, ax = plt.subplots()
# Plot using seaborn
sns.lineplot(data=timetabled, x='time_of_day_seconds', y='time_taken_minutes', marker='o', color='blue', ax=ax)
sns.scatterplot(data=real, x='time_of_day_seconds', y='time_taken_minutes', marker='x', lw=2, color='orange', ax=ax)
# Optional: Formatting the y-axis back to HH:MM format
plt.gca().xaxis.set_major_formatter(plt.FuncFormatter(lambda s, _: f'{int(s//3600):02}:{int((s%3600)//60):02}'))
ax.set_xlabel('Time of Day (HH:MM)')
ax.set_ylabel('Journey time (minutes)')
ax.set_title(f'Time Taken vs Time of Day for the number {bus_num} bus in Leeds \n from Corn Exchange F to Oakwell Mount. \nReal-time data 19/09/24. Timetable w/c 18/09/24')
ax.set_ylim(0, 100)  # Set x-axis min/max
# plt.legend()
# Show plot
plt.show()