# Combining scheduled and actual headway calcs
### Export a geojson for a given route and day, containing all bus stops on that route with headway stats for that day

In [1]:
# import chn-ghost-buses files
import sys
import geopandas as gpd
sys.path.append('/Users/kristenhahn/repos/chn-ghost-buses')
from data_analysis.static_gtfs_analysis import *
# from scrape_data.scrape_data import *
from scrape_schedule_versions import *

# import headways file
from headways import *

In [2]:
# Values to use for testing
gtfs_version_id = '20230721'

In [3]:
# Use Laurie's code to get gtfs feed data
gtfs_feed = download_extract_format(gtfs_version_id) 

INFO:root:Downloading CTA data
INFO:root:Download complete
INFO:root:Extracting data from CTA zipfile version 20230721
Loading stops.txt:   0%|          | 0/7 [00:00<?, ?it/s]INFO:root:stops.txt loaded
Loading stop_times.txt:   0%|          | 0/7 [00:00<?, ?it/s]INFO:root:stop_times.txt loaded
Loading routes.txt:  29%|██▊       | 2/7 [00:03<00:09,  1.87s/it]    INFO:root:routes.txt loaded
Loading trips.txt:  29%|██▊       | 2/7 [00:03<00:09,  1.87s/it] INFO:root:trips.txt loaded
Loading calendar.txt:  29%|██▊       | 2/7 [00:03<00:09,  1.87s/it]INFO:root:calendar.txt loaded
Loading calendar_dates.txt:  29%|██▊       | 2/7 [00:03<00:09,  1.87s/it]INFO:root:calendar_dates.txt loaded
Loading shapes.txt:  29%|██▊       | 2/7 [00:03<00:09,  1.87s/it]        INFO:root:shapes.txt loaded
Loading shapes.txt: 100%|██████████| 7/7 [00:04<00:00,  1.56it/s]


# Get headway stats for an entire route on a single day
Takes 10 minutes on my computer...

In [None]:
%%capture --no-display

# Route 55, 7/26/2023

stats_all_stops_55_20230726 = get_stats_all_stops(gtfs_feed, '20', '2023-07-26')

In [None]:
# plot route 55 linestring and stops with headway data

linestring_55 = gpd.read_file('headway_summaries/route55_linestring.json')
stops_55 = gpd.read_file('headway_summaries/route55_2023-07-26.json')

m = linestring_55.explore(color='#41B6E6', tiles="CartoDB positron", )
stops_55.explore(m=m, color='#E4002B', marker_kwds=({'radius':3.0}))


In [None]:
%%capture --no-display
# oute 50 - Damen (bottom 20th %ile per the chn map in # of buses run compared to schedule) on 1/18/2023

stats_all_stops_50_20230118 = get_stats_all_stops(gtfs_feed, '50', '2023-01-18')

In [None]:
# plot route 50 linestring and stops with headway data

linestring_50 = gpd.read_file('headway_summaries/route50_linestring.json')
stops_50 = gpd.read_file('headway_summaries/route50_2023-01-18.json')

m = linestring_50.explore(color='#41B6E6', tiles="CartoDB positron", )
stops_50.explore(m=m, color='#E4002B', marker_kwds=({'radius':3.0}))

In [None]:
%%capture --no-display
# Try route 55 on a Sunday:

stats_all_stops_55_20230122 = get_stats_all_stops(gtfs_feed, '55', '2023-01-22')

In [None]:
# plot route 55 linestring and stops with Sunday 1/22/2023 headway data

linestring_55 = gpd.read_file('headway_summaries/route55_linestring.json')
stops_55_sunday = gpd.read_file('headway_summaries/route55_2023-01-22.json')

m = linestring_55.explore(color='#41B6E6', tiles="CartoDB positron", )
stops_55_sunday.explore(m=m, color='#E4002B', marker_kwds=({'radius':3.0}))

In [None]:
def get_stats_all_stops(gtfs_feed, route_id, service_date_string):
    '''
    Returns a geodataframe of every bus stop on a specified route, with stats on 
    actual and scheduled headways for a single service day.  This data is also exported as a
    geojson.\n

    Parameters:\n

    gtfs_feed is obtained using the download_extract_format() function from the ghost bus team.\n

    route_id is a route id as a string (for example, '55' for the 55 Garfield bus)\n

    service_date_string is in the format "YYYY-MM-DD", indicating the service date to be analyzed.
    Note that service dates can include spillover into the next calendar day, for bus routes that run
    past midnight.\n

    Data returned:\n

    Returns a geodataframe containing all stops with actual and scheduled headway statistics.\n
    
    Exports the headway data for each stop as a geojson to the headway_summaries directory.\n
    Also exports a linestring for the selected route as a geojson.
    '''

    # dataframe to contain final summary data for each stop
    stats_all_stops = pd.DataFrame()

    # get scheduled stop details
    scheduled_stop_details = get_scheduled_stop_details(gtfs_feed, route_id, service_date_string)
    # get scheduled stop ids
    scheduled_stop_ids = get_scheduled_stop_ids(scheduled_stop_details)


    # get vehicles
    vehicles = get_chn_vehicles(service_date_string)
    # get actual stop times
    actual_stoptimes = get_actual_stoptimes(route_id, vehicles)
    # get actual stop ids
    actual_stop_ids = get_actual_stop_ids(actual_stoptimes)


    # get common stops
    common_stops = actual_stop_ids.intersection(scheduled_stop_ids)


    for stop_id in common_stops:

        # list directions found in the data for this stop
        directions = actual_stoptimes.loc[actual_stoptimes['stpid'] == stop_id]['rtdir'].unique().tolist()

        for direction in directions:

            # get scheduled headway stats
            active_service_times = get_active_service_times(scheduled_stop_details,stop_id, direction)
            scheduled_headways = get_scheduled_headways(scheduled_stop_details, stop_id, direction)
            scheduled_headway_stats = get_headway_stats(scheduled_headways, 'headway', 'Scheduled')


            # get actual headway stats
            actual_headways = get_actual_headways(vehicles, route_id, stop_id, direction, active_service_times)
            # Remove rows without headways (first bus in each active service time)
            actual_headways = actual_headways[actual_headways['est_headway'].notnull()]
            print(f'stop = {stop_id}')
            print(actual_headways)


In [None]:

%%capture --no-display
# Route 79 on Friday 1/18/2023

stats_all_stops_79_20230210 = get_stats_all_stops(gtfs_feed, '79', '2023-01-18')


In [None]:
# # plot route 79 stops with stats on 2/10/22

# linestring_79 = gpd.read_file('headway_summaries/route79_linestring.json')
# stops_79_20220210 = gpd.read_file('headway_summaries/route79_2023-02-10.json')

# m = linestring_79.explore(color='#41B6E6', tiles="CartoDB positron", )
# stops_79_20220210.explore(m=m, color='#E4002B', marker_kwds=({'radius':3.0}))

In [None]:
vehicles = get_chn_vehicles('2023-01-18')

vehicles_20230118 = vehicles[vehicles['rt'] == '79']


# actual_headways = get_actual_headways(vehicles, route_id, stop_id, direction, active_service_times)
        
# actual_headway_stats = get_headway_stats(actual_headways, 'est_headway', 'Actual')


In [None]:

stop_details = get_scheduled_stop_details(gtfs_feed, '79', '2023-01-18')

stop_details.sort_values('stop_time')


In [None]:

active_service_times = get_active_service_times(stop_details,'2716', 'Eastbound')

active_service_times 


In [None]:

actual_headways = get_actual_headways(vehicles_20230118, '79', '2716','Eastbound',active_service_times)

In [None]:
scheduled_headways = get_scheduled_headways(stop_details, '2716', 'Eastbound')

scheduled_headways

# TEST

In [75]:
route_id = '20'
service_date_string = '2023-07-26'

In [76]:
# dataframe to contain final summary data for each stop
stats_all_stops = gpd.GeoDataFrame()


In [77]:



# get scheduled stop details
scheduled_stop_details = get_scheduled_stop_details(gtfs_feed, route_id, service_date_string)

# scheduled_stop_details


In [78]:

# get scheduled stop ids
scheduled_stop_ids = get_scheduled_stop_ids(scheduled_stop_details)

# scheduled_stop_ids


In [79]:


# get vehicles
vehicles = get_chn_vehicles(service_date_string)

# vehicles


In [80]:

# get actual stop times
actual_stoptimes = get_actual_stoptimes(route_id, vehicles)

# actual_stoptimes


In [81]:
# get actual stop ids
actual_stop_ids = get_actual_stop_ids(actual_stoptimes)

# actual_stop_ids

In [82]:


# get common stops
common_stops = actual_stop_ids.intersection(scheduled_stop_ids)

common_stops


{'1106',
 '1119',
 '1120',
 '1121',
 '12754',
 '14467',
 '14500',
 '14647',
 '14764',
 '14765',
 '14766',
 '14800',
 '14840',
 '14841',
 '14956',
 '14958',
 '14959',
 '15090',
 '15136',
 '15185',
 '15279',
 '15364',
 '15384',
 '15848',
 '15851',
 '15852',
 '16049',
 '16087',
 '16088',
 '16110',
 '17160',
 '17161',
 '17162',
 '17163',
 '17164',
 '17165',
 '17166',
 '17167',
 '17169',
 '17596',
 '17597',
 '17599',
 '17600',
 '17841',
 '18047',
 '18122',
 '18123',
 '18124',
 '18126',
 '388',
 '389',
 '390',
 '391',
 '394',
 '395',
 '3954',
 '396',
 '397',
 '402',
 '403',
 '406',
 '409',
 '412',
 '413',
 '415',
 '416',
 '417',
 '420',
 '421',
 '423',
 '425',
 '427',
 '428',
 '429',
 '430',
 '431',
 '432',
 '435',
 '436',
 '437',
 '439',
 '440',
 '442',
 '443',
 '445',
 '446',
 '448',
 '449',
 '450',
 '455',
 '456',
 '458',
 '459',
 '460',
 '462',
 '463',
 '464',
 '465',
 '466',
 '468',
 '469',
 '470',
 '4727',
 '473',
 '478',
 '479',
 '480',
 '482',
 '484',
 '485',
 '486',
 '487',
 '494',


In [90]:


# for stop_id in common_stops:

stop_id = '417'
# stop_id = '508'

# list directions found in the data for this stop
directions = actual_stoptimes.loc[actual_stoptimes['stpid'] == stop_id]['rtdir'].unique().tolist()

directions

['Eastbound']

In [48]:
direction = 'Eastbound'

# for direction in directions:


In [91]:

# get scheduled headway stats
active_service_times = get_active_service_times(scheduled_stop_details,stop_id, direction)
active_service_times


Unnamed: 0,start_time,end_time
0,2023-07-26 00:19:32+00:00,2023-07-26 03:11:32+00:00
0,2023-07-26 03:41:32+00:00,2023-07-26 23:59:33+00:00


In [92]:

scheduled_headways = get_scheduled_headways(scheduled_stop_details, stop_id, direction)

scheduled_headways

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,shape_dist_traveled,arrival_hour,departure_hour,route_id,service_id,direction,raw_date,stop_time,previous_stop_time,headway
10032,6570017849020,00:19:32,00:19:32,417,29,Washington/State,0,19619,0,0,20,65712,Eastbound,2023-07-26 00:00:00+00:00,2023-07-26 00:19:32+00:00,NaT,NaT
8294,6570013971030,00:45:32,00:45:32,417,29,Washington/State,0,19619,0,0,20,65703,Eastbound,2023-07-26 00:00:00+00:00,2023-07-26 00:45:32+00:00,2023-07-26 00:19:32+00:00,0 days 00:26:00
9425,6570015147020,01:11:32,01:11:32,417,29,Washington/State,0,19619,1,1,20,65712,Eastbound,2023-07-26 00:00:00+00:00,2023-07-26 01:11:32+00:00,2023-07-26 00:45:32+00:00,0 days 00:26:00
19492,6570032842030,01:41:32,01:41:32,417,29,Washington/State,0,19619,1,1,20,65704,Eastbound,2023-07-26 00:00:00+00:00,2023-07-26 01:41:32+00:00,2023-07-26 01:11:32+00:00,0 days 00:30:00
7120,6570010926030,02:11:32,02:11:32,417,29,Washington/State,0,19619,2,2,20,65704,Eastbound,2023-07-26 00:00:00+00:00,2023-07-26 02:11:32+00:00,2023-07-26 01:41:32+00:00,0 days 00:30:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18221,6570030805020,22:52:06,22:52:06,417,29,Michigan,0,19619,22,22,20,65701,Eastbound,2023-07-26 00:00:00+00:00,2023-07-26 22:52:06+00:00,2023-07-26 22:35:36+00:00,0 days 00:16:30
4297,6570005561020,23:07:04,23:07:04,417,29,Michigan,0,19619,23,23,20,65701,Eastbound,2023-07-26 00:00:00+00:00,2023-07-26 23:07:04+00:00,2023-07-26 22:52:06+00:00,0 days 00:14:58
2056,6570002053030,23:22:03,23:22:03,417,29,Michigan,0,19619,23,23,20,65702,Eastbound,2023-07-26 00:00:00+00:00,2023-07-26 23:22:03+00:00,2023-07-26 23:07:04+00:00,0 days 00:14:59
23811,6570041937020,23:40:32,23:40:32,417,29,Washington/State,0,19619,23,23,20,65701,Eastbound,2023-07-26 00:00:00+00:00,2023-07-26 23:40:32+00:00,2023-07-26 23:22:03+00:00,0 days 00:18:29


In [93]:
scheduled_headways['headway']

test_list = scheduled_headways['headway']



# test_list_filtered = 


In [94]:
scheduled_headway_stats = get_headway_stats(scheduled_headways, 'headway', 'Scheduled')

scheduled_headway_stats

Unnamed: 0,Scheduled mean headway (minutes),Scheduled 25th percentile headway (minutes),Scheduled median headway (minutes),Scheduled 75th percentile headway (minutes)
0,13,9,13,15


In [95]:


# get actual headway stats
actual_headways = get_actual_headways(vehicles, route_id, stop_id, direction, active_service_times)

actual_headways


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_stop_direction_servicetime['est_headway'].iloc[0] = None
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_stop_direction_servicetime['est_headway'].iloc[0] = None


Unnamed: 0,vid,tmstmp,lat,lon,hdg,pid,rt,des,pdist,dly,...,data_date,end_time,end_pdist,start_time,start_pdist,stpid,stop_pdist,rtdir,est_stop_time,est_headway
1839,8152,2023-07-26 00:47:00+00:00,41.880978,-87.698674,90,959,20,Washington/State,20839,False,...,2023-07-26,2023-07-26 00:47:00+00:00,20839,2023-07-26 00:42:00+00:00,14225,417,19627,Eastbound,2023-07-26 00:46:05+00:00,NaT
2588,1836,2023-07-26 01:12:00+00:00,41.880939,-87.702940,89,959,20,Washington/State,19681,False,...,2023-07-26,2023-07-26 01:12:00+00:00,19681,2023-07-26 01:07:00+00:00,14220,417,19627,Eastbound,2023-07-26 01:11:57+00:00,0 days 00:25:52
3345,8137,2023-07-26 01:47:00+00:00,41.881373,-87.672425,89,959,20,Washington/State,28005,False,...,2023-07-26,2023-07-26 01:47:00+00:00,28005,2023-07-26 01:42:00+00:00,18819,417,19627,Eastbound,2023-07-26 01:42:26+00:00,0 days 00:30:29
4088,8152,2023-07-26 02:42:00+00:00,41.880978,-87.698357,88,959,20,Washington/State,20925,False,...,2023-07-26,2023-07-26 02:42:00+00:00,20925,2023-07-26 02:37:00+00:00,14712,417,19627,Eastbound,2023-07-26 02:40:57+00:00,0 days 00:58:31
5211,8152,2023-07-26 04:12:00+00:00,41.881004,-87.696487,93,959,20,Washington/State,21430,False,...,2023-07-26,2023-07-26 04:12:00+00:00,21430,2023-07-26 04:07:00+00:00,14970,417,19627,Eastbound,2023-07-26 04:10:36+00:00,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
158201,1974,2023-07-26 22:37:00+00:00,41.881190,-87.685696,83,957,20,Michigan,24395,False,...,2023-07-26,2023-07-26 22:37:00+00:00,24395,2023-07-26 22:32:00+00:00,17964,417,19627,Eastbound,2023-07-26 22:33:18+00:00,0 days 00:15:45
159276,1990,2023-07-26 22:52:00+00:00,41.881190,-87.685692,89,957,20,Michigan,24396,False,...,2023-07-26,2023-07-26 22:52:00+00:00,24396,2023-07-26 22:47:00+00:00,17653,417,19627,Eastbound,2023-07-26 22:48:28+00:00,0 days 00:15:10
160608,1938,2023-07-26 23:07:00+00:00,41.881088,-87.689604,88,957,20,Michigan,23302,False,...,2023-07-26,2023-07-26 23:07:00+00:00,23302,2023-07-26 23:02:00+00:00,16111,417,19627,Eastbound,2023-07-26 23:04:27+00:00,0 days 00:15:59
161813,8177,2023-07-26 23:27:00+00:00,41.881251,-87.680991,89,957,20,Michigan,25677,False,...,2023-07-26,2023-07-26 23:27:00+00:00,25677,2023-07-26 23:22:00+00:00,18807,417,19627,Eastbound,2023-07-26 23:22:36+00:00,0 days 00:18:09


In [96]:
# Remove rows without headways (first bus in each active service time)
actual_headways = actual_headways[actual_headways['est_headway'].notnull()]
actual_headways


Unnamed: 0,vid,tmstmp,lat,lon,hdg,pid,rt,des,pdist,dly,...,data_date,end_time,end_pdist,start_time,start_pdist,stpid,stop_pdist,rtdir,est_stop_time,est_headway
2588,1836,2023-07-26 01:12:00+00:00,41.880939,-87.702940,89,959,20,Washington/State,19681,False,...,2023-07-26,2023-07-26 01:12:00+00:00,19681,2023-07-26 01:07:00+00:00,14220,417,19627,Eastbound,2023-07-26 01:11:57+00:00,0 days 00:25:52
3345,8137,2023-07-26 01:47:00+00:00,41.881373,-87.672425,89,959,20,Washington/State,28005,False,...,2023-07-26,2023-07-26 01:47:00+00:00,28005,2023-07-26 01:42:00+00:00,18819,417,19627,Eastbound,2023-07-26 01:42:26+00:00,0 days 00:30:29
4088,8152,2023-07-26 02:42:00+00:00,41.880978,-87.698357,88,959,20,Washington/State,20925,False,...,2023-07-26,2023-07-26 02:42:00+00:00,20925,2023-07-26 02:37:00+00:00,14712,417,19627,Eastbound,2023-07-26 02:40:57+00:00,0 days 00:58:31
5778,1934,2023-07-26 04:32:00+00:00,41.881214,-87.683380,86,2063,20,Illinois Center,25020,False,...,2023-07-26,2023-07-26 04:32:00+00:00,25020,2023-07-26 04:27:00+00:00,16103,417,19619,Eastbound,2023-07-26 04:28:58+00:00,0 days 00:18:22
6369,8137,2023-07-26 04:47:00+00:00,41.880939,-87.702972,89,2063,20,Illinois Center,19664,False,...,2023-07-26,2023-07-26 04:47:00+00:00,19664,2023-07-26 04:42:00+00:00,13250,417,19619,Eastbound,2023-07-26 04:46:58+00:00,0 days 00:18:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
158201,1974,2023-07-26 22:37:00+00:00,41.881190,-87.685696,83,957,20,Michigan,24395,False,...,2023-07-26,2023-07-26 22:37:00+00:00,24395,2023-07-26 22:32:00+00:00,17964,417,19627,Eastbound,2023-07-26 22:33:18+00:00,0 days 00:15:45
159276,1990,2023-07-26 22:52:00+00:00,41.881190,-87.685692,89,957,20,Michigan,24396,False,...,2023-07-26,2023-07-26 22:52:00+00:00,24396,2023-07-26 22:47:00+00:00,17653,417,19627,Eastbound,2023-07-26 22:48:28+00:00,0 days 00:15:10
160608,1938,2023-07-26 23:07:00+00:00,41.881088,-87.689604,88,957,20,Michigan,23302,False,...,2023-07-26,2023-07-26 23:07:00+00:00,23302,2023-07-26 23:02:00+00:00,16111,417,19627,Eastbound,2023-07-26 23:04:27+00:00,0 days 00:15:59
161813,8177,2023-07-26 23:27:00+00:00,41.881251,-87.680991,89,957,20,Michigan,25677,False,...,2023-07-26,2023-07-26 23:27:00+00:00,25677,2023-07-26 23:22:00+00:00,18807,417,19627,Eastbound,2023-07-26 23:22:36+00:00,0 days 00:18:09


In [97]:
actual_headway_stats = get_headway_stats(actual_headways, 'est_headway', 'Actual')

actual_headway_stats

Unnamed: 0,Actual mean headway (minutes),Actual 25th percentile headway (minutes),Actual median headway (minutes),Actual 75th percentile headway (minutes)
0,14,8,13,17


In [98]:

stop_df = pd.DataFrame()


In [99]:
stop_df['stop_id'] = [stop_id]


In [100]:
stop_df['route_id'] = [route_id]


In [101]:
# date
stop_df['date'] = [service_date_string]


In [102]:
# day of week
stop_df['day'] = pd.to_datetime(stop_df['date'],infer_datetime_format=True).dt.day_name()


In [103]:
stop_df['direction'] = [direction]


In [104]:
stop_df = pd.concat([stop_df, actual_headway_stats, scheduled_headway_stats], axis=1)

stop_df


Unnamed: 0,stop_id,route_id,date,day,direction,Actual mean headway (minutes),Actual 25th percentile headway (minutes),Actual median headway (minutes),Actual 75th percentile headway (minutes),Scheduled mean headway (minutes),Scheduled 25th percentile headway (minutes),Scheduled median headway (minutes),Scheduled 75th percentile headway (minutes)
0,417,20,2023-07-26,Wednesday,Eastbound,14,8,13,17,13,9,13,15


In [105]:

stats_all_stops = gpd.GeoDataFrame(pd.concat([stats_all_stops, stop_df]))

stats_all_stops


Unnamed: 0,stop_id,route_id,date,day,direction,Actual mean headway (minutes),Actual 25th percentile headway (minutes),Actual median headway (minutes),Actual 75th percentile headway (minutes),Scheduled mean headway (minutes),Scheduled 25th percentile headway (minutes),Scheduled median headway (minutes),Scheduled 75th percentile headway (minutes)
0,417,20,2023-07-26,Wednesday,Eastbound,14,8,13,17,13,9,13,15


In [106]:

stats_all_stops.reset_index(inplace = True, drop = True)

stats_all_stops

Unnamed: 0,stop_id,route_id,date,day,direction,Actual mean headway (minutes),Actual 25th percentile headway (minutes),Actual median headway (minutes),Actual 75th percentile headway (minutes),Scheduled mean headway (minutes),Scheduled 25th percentile headway (minutes),Scheduled median headway (minutes),Scheduled 75th percentile headway (minutes)
0,417,20,2023-07-26,Wednesday,Eastbound,14,8,13,17,13,9,13,15


In [107]:

# combine bus stop geospatial info with the stats dataframe
# to generate a geojson with stats for every stop point
patterns = get_patterns(vehicles, route_id)


In [108]:
stops = get_pattern_stops(patterns)


In [109]:
route_linestring = get_pattern_linestrings(patterns)


In [110]:

# merge stop geodataframe with headway stats
df_stops = gpd.GeoDataFrame(stops[['stpid', 'stpnm', 'geometry']])
df_stops

df_stops

Unnamed: 0,stpid,stpnm,geometry
2,450,Madison & Wabash,POINT (-87.62591 41.88214)
5,18126,Madison & Dearborn/State,POINT (-87.62910 41.88202)
10,18123,Madison & Lasalle,POINT (-87.63294 41.88200)
15,18124,Madison & Franklin,POINT (-87.63589 41.88198)
42,455,Madison & Clinton,POINT (-87.64092 41.88195)
...,...,...,...
123,1119,Michigan & Randolph,POINT (-87.62437 41.88543)
125,1120,Michigan & South Water,POINT (-87.62441 41.88653)
127,1121,Michigan & E. Wacker,POINT (-87.62442 41.88788)
133,3954,Wacker (Upper) & Columbus,POINT (-87.62103 41.88785)


In [111]:
print(stats_all_stops.columns)
print(df_stops.columns)

Index(['stop_id', 'route_id', 'date', 'day', 'direction',
       'Actual mean headway (minutes)',
       'Actual 25th percentile headway (minutes)',
       'Actual median headway (minutes)',
       'Actual 75th percentile headway (minutes)',
       'Scheduled mean headway (minutes)',
       'Scheduled 25th percentile headway (minutes)',
       'Scheduled median headway (minutes)',
       'Scheduled 75th percentile headway (minutes)'],
      dtype='object')
Index(['stpid', 'stpnm', 'geometry'], dtype='object')


In [112]:
stats_all_stops = gpd.GeoDataFrame(stats_all_stops.merge(df_stops, left_on='stop_id', right_on='stpid'))

stats_all_stops


  result.crs = self.crs


Unnamed: 0,stop_id,route_id,date,day,direction,Actual mean headway (minutes),Actual 25th percentile headway (minutes),Actual median headway (minutes),Actual 75th percentile headway (minutes),Scheduled mean headway (minutes),Scheduled 25th percentile headway (minutes),Scheduled median headway (minutes),Scheduled 75th percentile headway (minutes),stpid,stpnm,geometry
0,417,20,2023-07-26,Wednesday,Eastbound,14,8,13,17,13,9,13,15,417,Madison & Albany,POINT (-87.70314 41.88094)
1,417,20,2023-07-26,Wednesday,Eastbound,14,8,13,17,13,9,13,15,417,Madison & Albany,POINT (-87.70314 41.88094)
2,417,20,2023-07-26,Wednesday,Eastbound,14,8,13,17,13,9,13,15,417,Madison & Albany,POINT (-87.70314 41.88094)
3,417,20,2023-07-26,Wednesday,Eastbound,14,8,13,17,13,9,13,15,417,Madison & Albany,POINT (-87.70314 41.88094)
4,417,20,2023-07-26,Wednesday,Eastbound,14,8,13,17,13,9,13,15,417,Madison & Albany,POINT (-87.70314 41.88094)


In [113]:

stats_all_stops = stats_all_stops.drop('stpid', axis=1)
stats_all_stops = stats_all_stops.rename(columns={'stpnm':'stop name', 'stop_id':'stop id'})


In [114]:
stats_all_stops.reset_index(inplace = True, drop = True)



In [115]:


# Export stop data to geojson
json_filepath_stops = f'headway_summaries/route{route_id}_{service_date_string}.json'
stats_all_stops.to_file(filename=json_filepath_stops, driver='GeoJSON')

# export route linestring data to geojson
json_filepath_linestring = f'headway_summaries/route{route_id}_linestring.json'
route_linestring.to_file(json_filepath_linestring, driver='GeoJSON')

INFO:pyogrio._io:Created 5 records
INFO:pyogrio._io:Created 11 records
