## Stop-to-stop Running Times

This notebook uses the Partridge library to process two GTFS feeds. It computes the stop-to-stop running times for each feed, allowing the user to see differences between two transit schedules.

Author: Nick Caros, MIT Transit Lab. June 2021.

In [1]:
import partridge as ptg
import pandas as pd
import datetime
from collections import deque

In [4]:
# User input: choose the appropriate feed filters
route_type = ['3']
route_desc = ['Key Bus', 'Commuter Bus', 'Local Bus']
route_id = ['1', '39', '57']

# User input: specify the GTFS feeds
gtfs_inpath_base = r'cta/CTA_GTFS_SPRING2020.zip' 
gtfs_inpath_comp = r'cta/CTA_GTFS_SPRING2021.zip'

# User input: specify non-holiday weekdays within the GTFS feed time ranges that will be used for service ids
sample_date_base = datetime.date(2020, 5, 1) 
sample_date_comp = datetime.date(2021, 5, 5)

# User input: specify output file path
runtime_outpath = 'cta_runtime_comparison.csv'

In [5]:
# Import GTFS feeds
def get_gtfs_feed(sample_date, gtfs_inpath, route_type, route_desc, route_id):
    service_id_list = ptg.read_service_ids_by_date(gtfs_inpath)[sample_date]
    view = {'routes.txt': {'route_type': route_type, 'route_desc': route_desc, 'route_id': route_id}, 'trips.txt': {'service_id': service_id_list}}
    return ptg.load_feed(gtfs_inpath, view)

feed_base = get_gtfs_feed(sample_date_base, gtfs_inpath_base, route_type, route_desc, route_id)
feed_comp = get_gtfs_feed(sample_date_comp, gtfs_inpath_comp, route_type, route_desc, route_id)

In [6]:
# Get scheduled segment running times
def get_segment_times(feed):
    st = feed.stop_times
    trips = feed.trips[['route_id', 'trip_id', 'direction_id']]
    stops = feed.stops[['stop_id', 'stop_name']]
    
    st = pd.merge(st, trips, on='trip_id')
    
    st = st[['route_id', 'trip_id', 'arrival_time', 'stop_id', 'stop_sequence', 'direction_id']]
    st = st.sort_values(by=['route_id', 'direction_id', 'trip_id', 'stop_sequence'])
    
    next_stop = deque(list(st.stop_id))
    next_stop.rotate(-1)
    next_stop = list(next_stop)
    st['next_stop'] = next_stop 
    
    next_time = deque(list(st.arrival_time))
    next_time.rotate(-1)
    next_time = list(next_time)
    st['next_time'] = next_time
    
    next_trip = deque(list(st.trip_id))
    next_trip.rotate(-1)
    next_trip = list(next_trip)
    st['next_trip'] = next_trip
    
    st = st[st['trip_id'] == st['next_trip']]
    st = st[['route_id', 'trip_id', 'arrival_time', 'next_time', 'stop_id', 'next_stop', 'direction_id', 'stop_sequence']]
    
    rt_list = []
    for segment in st.values.tolist():
        start = segment[2]
        end = segment[3]
        rt_list.append((end-start)/60)
    
    st['duration'] = rt_list
    st = pd.merge(st, stops, on='stop_id')
    st = pd.merge(st, stops, left_on='next_stop', right_on='stop_id')
    
    count = st.groupby(['route_id', 'stop_id_x', 'stop_name_x', 'stop_id_y', 'stop_name_y', 'direction_id', 'stop_sequence'], as_index=False)['duration'].count()
    is_max = count.groupby(['route_id', 'stop_id_x', 'stop_name_x', 'stop_id_y', 'stop_name_y'])['duration'].transform(max) == count['duration']
    is_common = count['duration'] > 10
    mean = st.groupby(['route_id', 'stop_id_x', 'stop_name_x', 'stop_id_y', 'stop_name_y', 'direction_id', 'stop_sequence'], as_index=False)['duration'].mean()
    df = mean[is_max & is_common]
    
    return df

seg_times_base = get_segment_times(feed_base)
seg_times_comp = get_segment_times(feed_comp)

In [7]:
# Compare the results
comp_df = pd.merge(seg_times_base, seg_times_comp, how='outer', on=['route_id', 'stop_id_x', 'stop_id_y', 'direction_id'])
comp_df.columns = ['route_id', '1st_stop_id', '1st_stop_name_base', '2nd_stop_id', '2nd_stop_name_base',
                   'direction_id', 'sequence_base', 'duration_base', '1st_stop_name_comp', '2nd_stop_name_comp',
                   'sequence_comp', 'duration_comp']

comp_df = comp_df[['route_id', 'direction_id', 'sequence_base', 'sequence_comp',
                  '1st_stop_id', '1st_stop_name_base', '1st_stop_name_comp',
                  '2nd_stop_id', '2nd_stop_name_base', '2nd_stop_name_comp',
                  'duration_base', 'duration_comp']]
comp_df.head()

Unnamed: 0,route_id,direction_id,sequence_base,sequence_comp,1st_stop_id,1st_stop_name_base,1st_stop_name_comp,2nd_stop_id,2nd_stop_name_base,2nd_stop_name_comp,duration_base,duration_comp
0,1,1,31.0,31.0,1108,Adams & S. Wacker,Adams & S. Wacker,12713,Adams & Canal,Adams & Canal,1.919048,1.888889
1,1,1,32.0,32.0,12713,Adams & Canal,Adams & Canal,67,Jackson & Canal,Jackson & Canal,1.673077,1.645833
2,1,0,2.0,2.0,14461,Jackson & Chicago River,Jackson & Chicago River,68,Jackson & Franklin,Jackson & Franklin,1.886538,2.580556
3,1,1,25.0,25.0,14485,Michigan & Ida B Wells Drive,Michigan & Ida B Wells Drive,75,Michigan & Van Buren,Michigan & Van Buren,0.494286,0.476543
4,1,1,17.0,17.0,14760,Michigan & 16th Street,Michigan & 16th Street,18318,Michigan & 14th Street,Michigan & 14th Street,2.558571,2.462346


In [8]:
# Export to CSV
comp_df.to_csv(runtime_outpath)        