In [None]:
import pathlib as pb
import json

import polars as pl
import polars_st as st
import marimo as mo
import folium as fl

import gtfs_kit_polars as gk


HERE = pb.Path(__file__).resolve().parent if "__file__" in globals() else pb.Path.cwd()
PROJECT_ROOT = HERE.parent  # notebooks/ -> project/
DATA = (PROJECT_ROOT / "data").resolve()

# Notes

- All outputs are LazyFrames, so we need to collect them to display them here

In [None]:
# List feed

gk.list_feed(DATA / "cairns_gtfs.zip").collect()

file_name,file_size
str,i64
"""calendar_dates.txt""",387
"""routes.txt""",1478
"""trips.txt""",143081
"""stops.txt""",26183
"""agency.txt""",199
"""stop_times.txt""",2561019
"""shapes.txt""",864694
"""calendar.txt""",337


In [None]:
# Read feed and describe

feed = gk.read_feed(DATA / "cairns_gtfs.zip", dist_units="m")
feed.describe().collect()

indicator,value
str,str
"""agencies""","""['Department of Transport and …"
"""timezone""","""Australia/Brisbane"""
"""start_date""","""20140526"""
"""end_date""","""20141228"""
"""num_routes""","""22"""
…,…
"""num_shapes""","""54"""
"""sample_date""","""20140529"""
"""num_routes_active_on_sample_da…","""20"""
"""num_trips_active_on_sample_dat…","""622"""


In [None]:
mo.output.append(feed.stop_times.head().collect())
feed_1 = feed.append_dist_to_stop_times()
mo.output.append(feed_1.stop_times.head().collect())

trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type
str,str,str,str,i32,i8,i8
"""CNS2014-CNS_MUL-Weekday-00-416…","""05:50:00""","""05:50:00""","""750337""",1,0,0
"""CNS2014-CNS_MUL-Weekday-00-416…","""05:50:00""","""05:50:00""","""750000""",2,0,0
"""CNS2014-CNS_MUL-Weekday-00-416…","""05:52:00""","""05:52:00""","""750001""",3,0,0
"""CNS2014-CNS_MUL-Weekday-00-416…","""05:54:00""","""05:54:00""","""750002""",4,0,0
"""CNS2014-CNS_MUL-Weekday-00-416…","""05:55:00""","""05:55:00""","""750003""",5,0,0

trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,shape_dist_traveled
str,str,str,str,i32,i8,i8,f64
"""CNS2014-CNS_MUL-Saturday-00-41…","""06:16:00""","""06:16:00""","""750337""",1,0,0,0.0
"""CNS2014-CNS_MUL-Saturday-00-41…","""06:16:00""","""06:16:00""","""750000""",2,0,0,468.64031
"""CNS2014-CNS_MUL-Saturday-00-41…","""06:18:00""","""06:18:00""","""750001""",3,0,0,1190.379646
"""CNS2014-CNS_MUL-Saturday-00-41…","""06:20:00""","""06:20:00""","""750002""",4,0,0,2154.78462
"""CNS2014-CNS_MUL-Saturday-00-41…","""06:21:00""","""06:21:00""","""750003""",5,0,0,2619.002693


In [None]:
week = feed_1.get_first_week()
dates = [week[0], week[6]]
dates

In [None]:
# Trip stats; reuse these for later speed ups

trip_stats = feed_1.compute_trip_stats().collect()
trip_stats

trip_id,route_id,route_short_name,route_type,direction_id,shape_id,stop_pattern_name,num_stops,start_time,end_time,start_stop_id,end_stop_id,duration,is_loop,distance,speed
str,str,str,i8,i8,str,str,u32,str,str,str,str,f64,bool,f64,f64
"""CNS2014-CNS_MUL-Weekday-00-416…","""110-423""","""110""",3,0,"""1100023""","""0-1""",35,"""05:50:00""","""06:50:00""","""750337""","""750449""",1.0,false,32.507121,32.507121
"""CNS2014-CNS_MUL-Saturday-00-41…","""110-423""","""110""",3,0,"""1100023""","""0-1""",35,"""06:16:00""","""07:10:00""","""750337""","""750449""",0.9,false,32.507121,36.119024
"""CNS2014-CNS_MUL-Weekday-00-416…","""110-423""","""110""",3,0,"""1100023""","""0-1""",35,"""06:20:00""","""07:20:00""","""750337""","""750449""",1.0,false,32.507121,32.507121
"""CNS2014-CNS_MUL-Weekday-00-416…","""110-423""","""110""",3,0,"""1100023""","""0-1""",35,"""06:50:00""","""07:50:00""","""750337""","""750449""",1.0,false,32.507121,32.507121
"""CNS2014-CNS_MUL-Weekday-00-416…","""110-423""","""110""",3,0,"""1100023""","""0-1""",35,"""07:15:00""","""08:20:00""","""750337""","""750449""",1.083333,false,32.507121,30.006574
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""CNS2014-CNS_MUL-Weekday-00-417…","""150E-423""","""150E""",3,1,"""150E0009""","""1-1""",40,"""20:15:00""","""21:22:00""","""750453""","""750412""",1.116667,false,38.423999,34.409551
"""CNS2014-CNS_MUL-Sunday-00-4180…","""150E-423""","""150E""",3,1,"""150E0008""","""1-1""",40,"""21:00:00""","""22:09:00""","""750453""","""750412""",1.15,false,38.139859,33.165095
"""CNS2014-CNS_MUL-Saturday-00-41…","""150E-423""","""150E""",3,1,"""150E0009""","""1-1""",40,"""21:13:00""","""22:22:00""","""750453""","""750412""",1.15,false,38.423999,33.412173
"""CNS2014-CNS_MUL-Weekday-00-417…","""150E-423""","""150E""",3,1,"""150E0009""","""1-1""",40,"""21:15:00""","""22:22:00""","""750453""","""750412""",1.116667,false,38.423999,34.409551


In [None]:
# Pass in trip stats to avoid recomputing them

network_stats = feed_1.compute_network_stats(dates).collect()
network_stats

date,num_stops,num_routes,num_trips,num_trip_starts,num_trip_ends,service_distance,service_duration,service_speed,peak_num_trips,peak_start_time,peak_end_time
str,u32,u32,u32,u32,u32,f64,f64,f64,i32,str,str
"""20140526""",416,20,622,622,622,13774.030583,472.6,29.145219,39,"""08:16:00""","""08:18:00"""
"""20140601""",411,14,266,266,266,6390.847266,197.683333,32.328711,17,"""14:31:00""","""14:37:00"""


In [None]:
nts = feed_1.compute_network_time_series(dates, num_minutes=6 * 60).collect()
nts

datetime,num_trip_starts,num_trip_ends,num_trips,service_distance,service_duration,service_speed
datetime[μs],f64,f64,f64,f64,f64,f64
2014-05-26 00:00:00,3.0,0.0,3.0,66.182329,2.033333,32.548687
2014-05-26 06:00:00,240.0,214.0,242.0,4954.226378,173.8,28.50533
2014-05-26 12:00:00,253.0,248.0,281.0,5505.33739,193.216667,28.493077
2014-05-26 18:00:00,126.0,155.0,160.0,3248.284486,103.55,31.369237
2014-06-01 00:00:00,0.0,0.0,0.0,26.198808,0.766667,34.172358
2014-06-01 06:00:00,80.0,68.0,80.0,1794.087434,55.766667,32.171323
2014-06-01 12:00:00,110.0,111.0,122.0,2615.694171,81.516667,32.087845
2014-06-01 18:00:00,76.0,84.0,87.0,1954.866853,59.633333,32.781445


In [None]:
gk.downsample(nts, num_minutes=12 * 60).collect()

datetime,num_trips,num_trip_starts,num_trip_ends,service_distance,service_duration,service_speed
datetime[μs],f64,f64,f64,f64,f64,f64
2014-05-26 00:00:00,242.0,243.0,214.0,5020.408707,175.833333,28.552087
2014-05-26 12:00:00,408.0,379.0,403.0,8753.621876,296.766667,29.496648
2014-06-01 00:00:00,80.0,80.0,68.0,1820.286242,56.533333,32.198459
2014-06-01 12:00:00,198.0,186.0,195.0,4570.561024,141.15,32.380879


In [None]:
# Stop time series
stop_ids = feed.stops.head(1).collect()["stop_id"].to_list()
sts = feed_1.compute_stop_time_series(
    dates, stop_ids=stop_ids, num_minutes=12 * 60
).collect()
sts

datetime,stop_id,num_trips
datetime[μs],str,i32
2014-05-26 00:00:00,"""750000""",13
2014-05-26 12:00:00,"""750000""",17
2014-06-01 00:00:00,"""750000""",5
2014-06-01 12:00:00,"""750000""",11


In [None]:
gk.downsample(sts, num_minutes=24 * 60).collect()

datetime,stop_id,num_trips
datetime[μs],str,i32
2014-05-26 00:00:00,"""750000""",30
2014-06-01 00:00:00,"""750000""",16


In [None]:
# Route time series

rts = feed_1.compute_route_time_series(dates, num_minutes=12 * 60).collect()
rts

datetime,route_id,num_trips,num_trip_starts,num_trip_ends,service_distance,service_duration,service_speed
datetime[μs],str,f64,f64,f64,f64,f64,f64
2014-05-26 00:00:00,"""110-423""",23.0,23.0,19.0,677.652816,21.016667,32.243592
2014-05-26 00:00:00,"""111-423""",22.0,22.0,18.0,713.781443,21.583333,33.070955
2014-05-26 00:00:00,"""112-423""",5.0,5.0,4.0,87.584926,2.483333,35.269098
2014-05-26 00:00:00,"""113-423""",3.0,3.0,3.0,74.298371,2.083333,35.663218
2014-05-26 00:00:00,"""120-423""",12.0,12.0,11.0,323.598592,9.583333,33.76681
…,…,…,…,…,…,…,…
2014-06-01 12:00:00,"""131N-423""",1.0,1.0,1.0,12.32682,0.483333,25.503766
2014-06-01 12:00:00,"""133-423""",12.0,11.0,12.0,181.92692,6.516667,27.917174
2014-06-01 12:00:00,"""142-423""",6.0,6.0,6.0,143.631505,5.1,28.16304
2014-06-01 12:00:00,"""143W-423""",24.0,22.0,24.0,506.182522,17.033333,29.717174


In [None]:
# Route timetable

route_id = feed_1.routes.head(1).collect()["route_id"].item(0)
feed_1.build_route_timetable(route_id, dates).collect()

trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,shape_dist_traveled,route_id,service_id,trip_headsign,direction_id,block_id,shape_id,date
str,str,str,str,i32,i8,i8,f64,str,str,str,i8,str,str,str
"""CNS2014-CNS_MUL-Weekday-00-416…","""05:50:00""","""05:50:00""","""750337""",1,0,0,0.0,"""110-423""","""CNS2014-CNS_MUL-Weekday-00""","""The Pier Cairns Terminus""",0,,"""1100023""","""20140526"""
"""CNS2014-CNS_MUL-Weekday-00-416…","""05:50:00""","""05:50:00""","""750000""",2,0,0,468.64031,"""110-423""","""CNS2014-CNS_MUL-Weekday-00""","""The Pier Cairns Terminus""",0,,"""1100023""","""20140526"""
"""CNS2014-CNS_MUL-Weekday-00-416…","""05:52:00""","""05:52:00""","""750001""",3,0,0,1190.379646,"""110-423""","""CNS2014-CNS_MUL-Weekday-00""","""The Pier Cairns Terminus""",0,,"""1100023""","""20140526"""
"""CNS2014-CNS_MUL-Weekday-00-416…","""05:54:00""","""05:54:00""","""750002""",4,0,0,2154.78462,"""110-423""","""CNS2014-CNS_MUL-Weekday-00""","""The Pier Cairns Terminus""",0,,"""1100023""","""20140526"""
"""CNS2014-CNS_MUL-Weekday-00-416…","""05:55:00""","""05:55:00""","""750003""",5,0,0,2619.002693,"""110-423""","""CNS2014-CNS_MUL-Weekday-00""","""The Pier Cairns Terminus""",0,,"""1100023""","""20140526"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""CNS2014-CNS_MUL-Sunday-00-4166…","""23:56:00""","""23:56:00""","""750038""",28,0,0,27827.737141,"""110-423""","""CNS2014-CNS_MUL-Sunday-00""","""Palm Cove""",1,,"""1100016""","""20140601"""
"""CNS2014-CNS_MUL-Sunday-00-4166…","""23:59:00""","""23:59:00""","""750339""",29,0,0,29533.971877,"""110-423""","""CNS2014-CNS_MUL-Sunday-00""","""Palm Cove""",1,,"""1100016""","""20140601"""
"""CNS2014-CNS_MUL-Sunday-00-4166…","""24:01:00""","""24:01:00""","""750039""",30,0,0,30445.639081,"""110-423""","""CNS2014-CNS_MUL-Sunday-00""","""Palm Cove""",1,,"""1100016""","""20140601"""
"""CNS2014-CNS_MUL-Sunday-00-4166…","""24:02:00""","""24:02:00""","""750040""",31,0,0,30692.659229,"""110-423""","""CNS2014-CNS_MUL-Sunday-00""","""Palm Cove""",1,,"""1100016""","""20140601"""


In [None]:
# Locate trips

loc = feed_1.locate_trips(dates[0], times=["08:00:00", "09:00:00"]).collect()
loc

trip_id,shape_id,route_id,direction_id,time,rel_dist,lon,lat
str,str,str,i8,str,f64,f64,f64
"""CNS2014-CNS_MUL-Weekday-00-418…","""1500020""","""150-423""",0,"""09:00:00""",1.0,145.779299,-16.920767
"""CNS2014-CNS_MUL-Weekday-00-417…","""1300017""","""130-423""",1,"""08:00:00""",0.975635,145.739956,-16.925606
"""CNS2014-CNS_MUL-Weekday-00-416…","""1100023""","""110-423""",0,"""09:00:00""",0.152945,145.669459,-16.762864
"""CNS2014-CNS_MUL-Weekday-00-416…","""1210012""","""121-423""",0,"""08:00:00""",0.496428,145.733331,-16.87531
"""CNS2014-CNS_MUL-Weekday-00-418…","""1430027""","""143-423""",1,"""09:00:00""",0.239443,145.753483,-16.933478
…,…,…,…,…,…,…,…
"""CNS2014-CNS_MUL-Weekday-00-416…","""1100024""","""110-423""",1,"""09:00:00""",0.878124,145.663138,-16.758061
"""CNS2014-CNS_MUL-Weekday-00-417…","""1410016""","""141-423""",0,"""08:00:00""",0.221598,145.728196,-16.961232
"""CNS2014-CNS_MUL-Weekday-00-416…","""1210013""","""121-423""",1,"""09:00:00""",1.0,145.692973,-16.906822
"""CNS2014-CNS_MUL-Weekday-00-416…","""1210013""","""121-423""",1,"""08:00:00""",1.0,145.692973,-16.906822


In [None]:
# Map routes

rsns = feed_1.routes.head().collect()["route_short_name"].to_list()[2:4]
feed_1.map_routes(route_short_names=rsns, show_stops=True)

In [None]:
# Alternatively map routes without stops using GeoPandas's explore

(
    feed.get_routes(as_geo=True)
    .collect()
    .st.to_geopandas()
    .explore(
        column="route_short_name",
        style_kwds=dict(weight=3),
        highlight_kwds=dict(weight=8),
        tiles="CartoDB positron",
    )
)

In [None]:
# Show screen line

trip_id = "CNS2014-CNS_MUL-Weekday-00-4166247"
m = feed_1.map_trips([trip_id], show_stops=True, show_direction=True)
screen_line = st.read_file(DATA / "cairns_screen_line.geojson")
screen_line_gj = json.loads(screen_line.st.to_geojson().row(0)[0])
keys_to_remove = [
    key
    for key in m._children.keys()
    if key.startswith("layer_control_") or key.startswith("fit_bounds_")
]
for key in keys_to_remove:
    m._children.pop(key)
fg = fl.FeatureGroup(name="Screen lines")
fl.GeoJson(
    screen_line_gj, style_function=lambda feature: {"color": "red", "weight": 2}
).add_to(fg)
fg.add_to(m)
fl.LayerControl().add_to(m)
m.fit_bounds(fg.get_bounds())
m

In [None]:
# Screen line counts

slc = feed_1.compute_screen_line_counts(screen_line, dates=dates).collect()
slc.filter(pl.col("trip_id") == trip_id)

date,screen_line_id,shape_id,trip_id,direction_id,route_id,route_short_name,route_type,crossing_direction,crossing_time,crossing_dist_m
str,str,str,str,i8,str,str,i8,i32,str,f64
"""20140526""","""sl000""","""1120011""","""CNS2014-CNS_MUL-Weekday-00-416…",0,"""112-423""","""112""",3,-1,"""08:09:43""",9399.456873
"""20140526""","""sl000""","""1120011""","""CNS2014-CNS_MUL-Weekday-00-416…",0,"""112-423""","""112""",3,1,"""08:13:02""",11910.653361
