# Best bus stops

Find the best bus stops from PTV data.


Many bus routes don't have night services. So I'd like to find the bus stops that have night services.

In [1]:
import components as comp
import pandas as pd
import numpy as np

In [2]:
# Utilize a custom module to convert PTV's gtfs.zip data into a Series of Pandas DataFrames

df = comp.process_gtfs_zip('http://data.ptv.vic.gov.au/downloads/gtfs.zip', '')
# 2m30s

In [3]:
ptv = df.set_index(['branch_id', 'table_name'], inplace=False)['df']

In [4]:
# Create a DataFrame for each bus table in the GTFS data
df_bus_agency : pd.DataFrame = ptv['4']['agency']
df_bus_calendar : pd.DataFrame = ptv['4']['calendar']
df_bus_calendar_dates : pd.DataFrame = ptv['4']['calendar_dates']
df_bus_routes : pd.DataFrame = ptv['4']['routes']
df_bus_shapes : pd.DataFrame = ptv['4']['shapes']
df_bus_stops : pd.DataFrame = ptv['4']['stops']
df_bus_stop_times : pd.DataFrame = ptv['4']['stop_times']
df_bus_trips : pd.DataFrame = ptv['4']['trips']

In [5]:
def get_max_row(df : pd.DataFrame, by: str):
    """
    Return the row with the maximum/minimum value of 'by' column in the given dataframe

    df.max() and df.min() only returns the considered column instead of the whole row.
    """
    return df.loc[df[by].idxmax()]

def get_min_row(df : pd.DataFrame, by: str):
    """
    Return the row with the maximum/minimum value of 'by' column in the given dataframe

    df.max() and df.min() only returns the considered column instead of the whole row.
    """
    return df.loc[df[by].idxmin()]

In [12]:
dfbst = pd.merge(df_bus_stop_times, df_bus_trips, on='trip_id', how='left')
dfbst = pd.merge(dfbst, df_bus_calendar, on='service_id', how='left')
# monday	tuesday	wednesday	thursday	friday	saturday	sunday	start_date	end_date
dfbst = dfbst[['trip_id', 'arrival_time', 'departure_time', 'stop_id', 'stop_sequence', 'route_id', 'service_id', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday', 'start_date', 'end_date']]

In [13]:
pd.melt(dfbst, id_vars=['trip_id', 'arrival_time', 'departure_time', 'stop_id', 'stop_sequence', 'route_id', 'service_id', 'start_date', 'end_date'], value_vars=['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'], var_name='day', value_name='running')

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,route_id,service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date
0,43-477--1-MF1-8728614,05:56:00,05:56:00,6725,1,43-477-aus-1,MF1-43-477-aus,0,0,0,0,1,0,0,20240202,20240505
1,43-477--1-MF1-8728614,05:56:00,05:56:00,6726,2,43-477-aus-1,MF1-43-477-aus,0,0,0,0,1,0,0,20240202,20240505
2,43-477--1-MF1-8728614,06:00:00,06:00:00,9095,3,43-477-aus-1,MF1-43-477-aus,0,0,0,0,1,0,0,20240202,20240505
3,43-477--1-MF1-8728614,06:01:00,06:01:00,27586,4,43-477-aus-1,MF1-43-477-aus,0,0,0,0,1,0,0,20240202,20240505
4,43-477--1-MF1-8728614,06:02:00,06:02:00,27587,5,43-477-aus-1,MF1-43-477-aus,0,0,0,0,1,0,0,20240202,20240505
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3985294,23-959--1-Sat3-6,30:12:00,30:12:00,20471,49,23-959-aus-1,Sat3-23-959-aus,1,0,0,0,0,1,0,20240202,20240505
3985295,23-959--1-Sat3-6,30:13:00,30:13:00,20472,50,23-959-aus-1,Sat3-23-959-aus,1,0,0,0,0,1,0,20240202,20240505
3985296,23-959--1-Sat3-6,30:13:00,30:13:00,20473,51,23-959-aus-1,Sat3-23-959-aus,1,0,0,0,0,1,0,20240202,20240505
3985297,23-959--1-Sat3-6,30:14:00,30:14:00,20474,52,23-959-aus-1,Sat3-23-959-aus,1,0,0,0,0,1,0,20240202,20240505
