## Route Level Service Hours and Daily Trips

This notebook uses the Partridge library to process two GTFS feeds. It computes the number of daily trips and service hours by route for each feed, allowing the user to see differences between two transit schedules.

Author: Nick Caros, MIT Transit Lab. June 2021.

In [4]:
import partridge as ptg
import pandas as pd
import numpy as np
import datetime

In [7]:
# User input: choose the appropriate feed filters
route_type = ['3']
route_desc = ['Key Bus', 'Commuter Bus', 'Local Bus']

# User input: specify the GTFS feeds
gtfs_inpath_base = r'cta/CTA_GTFS_SPRING2020.zip' 
gtfs_inpath_comp = r'cta/CTA_GTFS_SPRING2021.zip'

# User input: specify non-holiday weekdays within the GTFS feed time ranges that will be used for service ids
sample_date_base = datetime.date(2020, 5, 1) 
sample_date_comp = datetime.date(2021, 5, 5)

# User input: specify output file paths
trip_outpath = 'cta_trip_comparison.csv'
sh_outpath = 'cta_servicehour_comparison.csv'

In [9]:
# Import GTFS
service_ids_base = ptg.read_service_ids_by_date(gtfs_inpath_base)[sample_date_base]
view = {'routes.txt': {'route_type': route_type, 'route_desc': route_desc}, 'trips.txt': {'service_id': service_ids_base}}
feed_base = ptg.load_feed(gtfs_inpath_base, view)

service_ids_comp = ptg.read_service_ids_by_date(gtfs_inpath_comp)[sample_date_comp]
view = {'routes.txt': {'route_type': route_type, 'route_desc': route_desc}, 'trips.txt': {'service_id': service_ids_comp}}
feed_comp = ptg.load_feed(gtfs_inpath_comp, view)

In [10]:
# Get daily trip counts
trips_base = feed_base.trips[['route_id', 'trip_id']]
trip_counts_base = trips_base.groupby(['route_id']).size().to_dict()

trips_comp = feed_comp.trips[['route_id', 'trip_id']]
trip_counts_comp = trips_comp.groupby(['route_id']).size().to_dict()

comparison_dict = {}
for route in trip_counts_base:
    comparison_dict[route] = {}
    comparison_dict[route]['base'] = trip_counts_base[route]
    try: 
        comparison_dict[route]['comp'] = trip_counts_comp[route]
    except KeyError:
        comparison_dict[route]['comp'] = 0
        
for route in trip_counts_comp:
    if route not in comparison_dict:
        comparison_dict[route] = {}
        comparison_dict[route]['base'] = 0
        comparison_dict[route]['comp'] = trip_counts_comp[route]

trip_df = pd.DataFrame.from_dict(comparison_dict, orient='index')
trip_df.head()

Unnamed: 0,base,comp
1,61,51
100,53,52
103,132,128
106,134,133
108,71,71


In [11]:
# Now compute service hours
st = feed_base.stop_times
gb = st.groupby(['trip_id'])
durations = gb.agg({'arrival_time' : [np.min, np.max]})
durations.columns = durations.columns.to_flat_index()
durations.reset_index(inplace=True)
st_dict = {}
for trip in durations.values.tolist():
    tripid = trip[0]
    start = trip[1]
    end = trip[2]
    st_dict[tripid] = (end - start) / 3600

servicehr_dict = {}
for trip in trips_base.values.tolist():
    if trip[0] in servicehr_dict:
        servicehr_dict[trip[0]]['base'] += st_dict[trip[1]]
    else:
        servicehr_dict[trip[0]] = {}
        servicehr_dict[trip[0]]['base'] = st_dict[trip[1]]

st = feed_comp.stop_times
gb = st.groupby(['trip_id'])
durations = gb.agg({'arrival_time' : [np.min, np.max]})
durations.columns = durations.columns.to_flat_index()
durations.reset_index(inplace=True)
st_dict = {}
for trip in durations.values.tolist():
    tripid = trip[0]
    start = trip[1]
    end = trip[2]
    st_dict[tripid] = (end - start) / 3600

for trip in trips_comp.values.tolist():
    if trip[0] in servicehr_dict:
        if 'comp' in servicehr_dict[trip[0]]:
            servicehr_dict[trip[0]]['comp'] += st_dict[trip[1]]
        else:
            servicehr_dict[trip[0]]['comp'] = st_dict[trip[1]]
    else:
        servicehr_dict[trip[0]] = {}
        servicehr_dict[trip[0]]['comp'] = st_dict[trip[1]]
        
for trip in servicehr_dict:
    if 'base' not in servicehr_dict[trip]:
        servicehr_dict[trip]['base'] = 0
    elif 'comp' not in servicehr_dict[trip]:
        servicehr_dict[trip]['comp'] = 0
        
sh_df = pd.DataFrame.from_dict(servicehr_dict, orient='index')    
sh_df.head()

Unnamed: 0,base,comp
1,31.425,25.825
100,25.433333,24.725
103,64.2,63.366667
106,39.55,39.333333
108,28.158333,28.741667


In [12]:
# Export results to CSV
sh_df.to_csv(sh_outpath)
trip_df.to_csv(trip_outpath)