In [None]:
import pathlib as pl
import json

import marimo as mo
import pandas as pd
import numpy as np
import geopandas as gp
import matplotlib
import folium as fl

import gtfs_kit as gk


HERE = pl.Path(__file__).resolve().parent if "__file__" in globals() else Path.cwd()
PROJECT_ROOT = HERE.parent  # notebooks/ -> project/
DATA = (PROJECT_ROOT / "data").resolve()

In [None]:
# List feed

gk.list_feed(DATA / "cairns_gtfs.zip")

Unnamed: 0,file_name,file_size
0,calendar_dates.txt,387
1,routes.txt,1478
2,trips.txt,143081
3,stops.txt,26183
4,agency.txt,199
5,stop_times.txt,2561019
6,shapes.txt,864694
7,calendar.txt,337


In [None]:
# Read feed and describe

feed = gk.read_feed(DATA / "cairns_gtfs.zip", dist_units="m")
feed.describe()

Unnamed: 0,indicator,value
0,agencies,[Department of Transport and Main Roads - TransLink Division (qconnect)]
1,timezone,Australia/Brisbane
2,start_date,20140526
3,end_date,20141228
4,num_routes,22
...,...,...
7,num_shapes,54
8,sample_date,20140529
9,num_routes_active_on_sample_date,20
10,num_trips_active_on_sample_date,622


In [None]:
mo.output.append(feed.stop_times)
feed_1 = feed.append_dist_to_stop_times()
mo.output.append(feed_1.stop_times)

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type
0,CNS2014-CNS_MUL-Weekday-00-4165878,05:50:00,05:50:00,750337,1,0,0
1,CNS2014-CNS_MUL-Weekday-00-4165878,05:50:00,05:50:00,750000,2,0,0
2,CNS2014-CNS_MUL-Weekday-00-4165878,05:52:00,05:52:00,750001,3,0,0
3,CNS2014-CNS_MUL-Weekday-00-4165878,05:54:00,05:54:00,750002,4,0,0
4,CNS2014-CNS_MUL-Weekday-00-4165878,05:55:00,05:55:00,750003,5,0,0
...,...,...,...,...,...,...,...
37785,CNS2014-CNS_MUL-Sunday-00-4180994,20:59:00,20:59:00,750364,11,0,0
37786,CNS2014-CNS_MUL-Sunday-00-4180994,21:02:00,21:02:00,750073,12,0,0
37787,CNS2014-CNS_MUL-Sunday-00-4180994,21:03:00,21:03:00,750050,13,0,0
37788,CNS2014-CNS_MUL-Sunday-00-4180994,21:06:00,21:06:00,750363,14,0,0

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,shape_dist_traveled
0,CNS2014-CNS_MUL-Saturday-00-4165937,06:16:00,06:16:00,750337,1,0,0,0.000000
1,CNS2014-CNS_MUL-Saturday-00-4165937,06:16:00,06:16:00,750000,2,0,0,468.640310
2,CNS2014-CNS_MUL-Saturday-00-4165937,06:18:00,06:18:00,750001,3,0,0,1190.379646
3,CNS2014-CNS_MUL-Saturday-00-4165937,06:20:00,06:20:00,750002,4,0,0,2154.784620
4,CNS2014-CNS_MUL-Saturday-00-4165937,06:21:00,06:21:00,750003,5,0,0,2619.002693
...,...,...,...,...,...,...,...,...
37785,CNS2014-CNS_MUL-Weekday-00-4180831,18:20:00,18:20:00,750416,25,0,0,28378.533436
37786,CNS2014-CNS_MUL-Weekday-00-4180831,18:20:00,18:20:00,750415,26,0,0,28921.840806
37787,CNS2014-CNS_MUL-Weekday-00-4180831,18:21:00,18:21:00,750414,27,0,0,29708.322706
37788,CNS2014-CNS_MUL-Weekday-00-4180831,18:23:00,18:23:00,750413,28,0,0,30870.740788


In [None]:
week = feed_1.get_first_week()
dates = [week[0], week[6]]
dates

In [None]:
# Trip stats; reuse these for later speed ups

trip_stats = feed_1.compute_trip_stats()
trip_stats

Unnamed: 0,trip_id,route_id,route_short_name,route_type,direction_id,shape_id,stop_pattern_name,num_stops,start_time,end_time,start_stop_id,end_stop_id,is_loop,duration,distance,speed
0,CNS2014-CNS_MUL-Weekday-00-4165878,110-423,110,3,0,1100023,0-1,35,05:50:00,06:50:00,750337,750449,0,1.000000,32.507121,32.507121
1,CNS2014-CNS_MUL-Saturday-00-4165937,110-423,110,3,0,1100023,0-1,35,06:16:00,07:10:00,750337,750449,0,0.900000,32.507121,36.119024
2,CNS2014-CNS_MUL-Weekday-00-4165879,110-423,110,3,0,1100023,0-1,35,06:20:00,07:20:00,750337,750449,0,1.000000,32.507121,32.507121
3,CNS2014-CNS_MUL-Weekday-00-4165880,110-423,110,3,0,1100023,0-1,35,06:50:00,07:50:00,750337,750449,0,1.000000,32.507121,32.507121
4,CNS2014-CNS_MUL-Weekday-00-4165881,110-423,110,3,0,1100023,0-1,35,07:15:00,08:20:00,750337,750449,0,1.083333,32.507121,30.006574
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1334,CNS2014-CNS_MUL-Weekday-00-4179080,150E-423,150E,3,1,150E0009,1-1,40,20:15:00,21:22:00,750453,750412,0,1.116667,38.423999,34.409551
1335,CNS2014-CNS_MUL-Sunday-00-4180866,150E-423,150E,3,1,150E0008,1-1,40,21:00:00,22:09:00,750453,750412,0,1.150000,38.139859,33.165095
1336,CNS2014-CNS_MUL-Saturday-00-4179090,150E-423,150E,3,1,150E0009,1-1,40,21:13:00,22:22:00,750453,750412,0,1.150000,38.423999,33.412173
1337,CNS2014-CNS_MUL-Weekday-00-4179081,150E-423,150E,3,1,150E0009,1-1,40,21:15:00,22:22:00,750453,750412,0,1.116667,38.423999,34.409551


In [None]:
# Pass in trip stats to avoid recomputing them

network_stats = feed_1.compute_network_stats(dates)
network_stats

Unnamed: 0,date,num_stops,num_routes,num_trips,num_trip_starts,num_trip_ends,peak_num_trips,peak_start_time,peak_end_time,service_distance,service_duration,service_speed
0,20140526,416,20,622,622,617,39,08:16:00,08:18:00,13774.030583,472.6,29.145219
1,20140601,411,14,266,266,263,17,14:31:00,14:37:00,6390.847266,197.683333,32.328711


In [None]:
nts = feed_1.compute_network_time_series(dates, freq="6h")
nts

Unnamed: 0,datetime,service_speed,num_trips,num_trip_ends,service_duration,num_trip_starts,service_distance
0,2014-05-26 00:00:00,32.548687,3,0,2.033333,3,66.182329
1,2014-05-26 06:00:00,28.50533,242,214,173.8,240,4954.226378
2,2014-05-26 12:00:00,28.493077,281,248,193.216667,253,5505.33739
3,2014-05-26 18:00:00,31.369237,160,155,103.55,126,3248.284486
4,2014-06-01 00:00:00,34.172358,0,0,0.766667,0,26.198808
5,2014-06-01 06:00:00,32.171323,80,68,55.766667,80,1794.087434
6,2014-06-01 12:00:00,32.087845,122,111,81.516667,110,2615.694171
7,2014-06-01 18:00:00,32.781445,87,84,59.633333,76,1954.866853


In [None]:
gk.downsample(nts, freq="12h")

Unnamed: 0,datetime,num_trips,num_trip_starts,num_trip_ends,service_distance,service_duration,service_speed
0,2014-05-26 00:00:00,242.0,243.0,214.0,5020.408707,175.833333,28.552087
1,2014-05-26 12:00:00,408.0,379.0,403.0,8753.621876,296.766667,29.496648
2,2014-06-01 00:00:00,80.0,80.0,68.0,1820.286242,56.533333,32.198459
3,2014-06-01 12:00:00,198.0,186.0,195.0,4570.561024,141.15,32.380879


In [None]:
# Stop time series
stop_ids = feed.stops.loc[:1, "stop_id"]
sts = feed_1.compute_stop_time_series(dates, stop_ids=stop_ids, freq="12h")
sts

Unnamed: 0,datetime,stop_id,num_trips
0,2014-05-26 00:00:00,750000,13
1,2014-05-26 00:00:00,750001,13
2,2014-05-26 12:00:00,750000,17
3,2014-05-26 12:00:00,750001,17
4,2014-06-01 00:00:00,750000,5
5,2014-06-01 00:00:00,750001,5
6,2014-06-01 12:00:00,750000,11
7,2014-06-01 12:00:00,750001,11


In [None]:
gk.downsample(sts, freq="d")

Unnamed: 0,datetime,stop_id,num_trips
0,2014-05-26,750000,30.0
1,2014-05-26,750001,30.0
2,2014-06-01,750000,16.0
3,2014-06-01,750001,16.0


In [None]:
# Route time series

rts = feed_1.compute_route_time_series(dates, freq="12h")
rts

Unnamed: 0,datetime,route_id,num_trips,num_trip_starts,num_trip_ends,service_distance,service_duration,service_speed
0,2014-05-26 00:00:00,110-423,23,23,19,677.652816,21.016667,32.243592
1,2014-05-26 00:00:00,111-423,22,22,18,713.781443,21.583333,33.070955
2,2014-05-26 00:00:00,112-423,5,5,4,87.584926,2.483333,35.269098
3,2014-05-26 00:00:00,113-423,3,3,3,74.298371,2.083333,35.663218
4,2014-05-26 00:00:00,120-423,12,12,11,323.598592,9.583333,33.766810
...,...,...,...,...,...,...,...,...
63,2014-06-01 12:00:00,131N-423,1,1,1,12.326820,0.483333,25.503766
64,2014-06-01 12:00:00,133-423,12,11,12,181.926920,6.516667,27.917174
65,2014-06-01 12:00:00,142-423,6,6,6,143.631505,5.100000,28.163040
66,2014-06-01 12:00:00,143W-423,24,22,24,506.182522,17.033333,29.717174


In [None]:
# Route timetable

route_id = feed_1.routes["route_id"].iat[0]
feed_1.build_route_timetable(route_id, dates)

Unnamed: 0,date,route_id,service_id,trip_id,trip_headsign,direction_id,block_id,shape_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,shape_dist_traveled
0,20140526,110-423,CNS2014-CNS_MUL-Weekday-00,CNS2014-CNS_MUL-Weekday-00-4165878,The Pier Cairns Terminus,0,,1100023,05:50:00,05:50:00,750337,1,0,0,0.000000
1,20140526,110-423,CNS2014-CNS_MUL-Weekday-00,CNS2014-CNS_MUL-Weekday-00-4165878,The Pier Cairns Terminus,0,,1100023,05:50:00,05:50:00,750000,2,0,0,468.640310
2,20140526,110-423,CNS2014-CNS_MUL-Weekday-00,CNS2014-CNS_MUL-Weekday-00-4165878,The Pier Cairns Terminus,0,,1100023,05:52:00,05:52:00,750001,3,0,0,1190.379646
3,20140526,110-423,CNS2014-CNS_MUL-Weekday-00,CNS2014-CNS_MUL-Weekday-00-4165878,The Pier Cairns Terminus,0,,1100023,05:54:00,05:54:00,750002,4,0,0,2154.784620
4,20140526,110-423,CNS2014-CNS_MUL-Weekday-00,CNS2014-CNS_MUL-Weekday-00-4165878,The Pier Cairns Terminus,0,,1100023,05:55:00,05:55:00,750003,5,0,0,2619.002693
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3045,20140601,110-423,CNS2014-CNS_MUL-Sunday-00,CNS2014-CNS_MUL-Sunday-00-4166102,Palm Cove,1,,1100016,23:56:00,23:56:00,750038,28,0,0,27827.737141
3046,20140601,110-423,CNS2014-CNS_MUL-Sunday-00,CNS2014-CNS_MUL-Sunday-00-4166102,Palm Cove,1,,1100016,23:59:00,23:59:00,750339,29,0,0,29533.971877
3047,20140601,110-423,CNS2014-CNS_MUL-Sunday-00,CNS2014-CNS_MUL-Sunday-00-4166102,Palm Cove,1,,1100016,24:01:00,24:01:00,750039,30,0,0,30445.639081
3048,20140601,110-423,CNS2014-CNS_MUL-Sunday-00,CNS2014-CNS_MUL-Sunday-00-4166102,Palm Cove,1,,1100016,24:02:00,24:02:00,750040,31,0,0,30692.659229


In [None]:
# Locate trips

rng = pd.date_range("1/1/2000", periods=24, freq="h")
times = [t.strftime("%H:%M:%S") for t in rng]
loc = feed_1.locate_trips(dates[0], times)
loc.head()

Unnamed: 0,shape_id,trip_id,time,rel_dist,route_id,direction_id,lon,lat
0,1100023,CNS2014-CNS_MUL-Weekday-00-4165878,06:00:00,0.152945,110-423,0,145.66949,-16.762839
1,1100023,CNS2014-CNS_MUL-Weekday-00-4165879,07:00:00,0.720587,110-423,0,145.732413,-16.86506
2,1100023,CNS2014-CNS_MUL-Weekday-00-4165880,07:00:00,0.152945,110-423,0,145.66949,-16.762839
3,1100023,CNS2014-CNS_MUL-Weekday-00-4165881,08:00:00,0.720587,110-423,0,145.732413,-16.86506
4,1100023,CNS2014-CNS_MUL-Weekday-00-4165882,08:00:00,0.202283,110-423,0,145.675422,-16.768954


In [None]:
# Map routes

rsns = feed_1.routes["route_short_name"].iloc[2:4]
feed_1.map_routes(route_short_names=rsns, show_stops=True)

In [None]:
# Alternatively map routes without stops using GeoPandas's explore

(
    feed.get_routes(as_gdf=True).explore(
        column="route_short_name",
        style_kwds=dict(weight=3),
        highlight_kwds=dict(weight=8),
        tiles="CartoDB positron",
    )
)

In [None]:
# Show screen line

trip_id = "CNS2014-CNS_MUL-Weekday-00-4166247"
m = feed_1.map_trips([trip_id], show_stops=True, show_direction=True)
screen_line = gp.read_file(DATA / "cairns_screen_line.geojson")
keys_to_remove = [
    key
    for key in m._children.keys()
    if key.startswith("layer_control_") or key.startswith("fit_bounds_")
]
for key in keys_to_remove:
    m._children.pop(key)
fg = fl.FeatureGroup(name="Screen lines")
fl.GeoJson(
    screen_line, style_function=lambda feature: {"color": "red", "weight": 2}
).add_to(fg)
fg.add_to(m)
fl.LayerControl().add_to(m)
m.fit_bounds(fg.get_bounds())
m

In [None]:
# Screen line counts

slc = feed_1.compute_screen_line_counts(screen_line, dates=dates)
slc.loc[lambda x: x["trip_id"] == trip_id]

  pd.concat([g0, g1])
