### GTFS Reference Data Access

In [None]:
from gtfs_data_access import (
    get_journey_by_id,
    get_current_journeys,
    get_trip_summary,
)

print("=== Current Route 1 Journeys (Next 3 Hours) ===")
journeys = get_current_journeys(route_ids=["1"], hours_ahead=1)
print(f"Found {len(journeys)} journey segments")
journeys.head(5)

In [None]:
import json

# trip_id: AFA24GEN-1093-Weekday-00_113950_1..N03R
# route: 1
# direction: North
# headsign: Van Cortlandt Park-242 St
# summary: 38 stops from "South Ferry" to "Van Cortlandt Park-242 St"
trip_id = "AFA24GEN-1093-Weekday-00_113950_1..N03R"

print("=== Specific Journey ===")
journeys = get_journey_by_id(trip_id)
print(f"Found {len(journeys)} journey segments")
journeys.head(5)

In [None]:
print("=== Partial Trip Summary ===")
partial_summary = get_trip_summary(trip_id, from_stop="14 St")
print(json.dumps(partial_summary, indent=2))

### Write to Turbopuffer

In [32]:
import os
import turbopuffer as tpuf
from gtfs_data_access import get_journeys_json
from dotenv import load_dotenv

load_dotenv()

tpuf.api_key = os.getenv("TURBOPUFFER_API_KEY")
tpuf.api_base_url = os.getenv("TURBOPUFFER_BASE_URL")

ns = tpuf.Namespace(f"mta-gtfs-journeys")
try:
    # Delete existing namespace
    ns.delete_all()
except tpuf.NotFoundError:
    pass

schema = {
    "id": {"type": "string"},
    "trip_id": {"type": "string", "filterable": True},
    "service_id": {"type": "string", "filterable": True},
    "route_id": {"type": "string", "filterable": True},
    "route_name": {"type": "string", "full_text_search": True},
    "direction": {"type": "string", "filterable": True},
    "trip_headsign": {"type": "string"},
    "stop_id": {"type": "string"},
    "stop_sequence": {"type": "int"},
    "stop_name": {"type": "string", "full_text_search": True},
    "next_stop_name": {"type": "string", "full_text_search": True},
    "arrival_time": {"type": "string", "filterable": True},
    "arrival_time_epoch": {"type": "int", "filterable": True},
}

# Write journey documents in batches
for batch in get_journeys_json(route_ids=["1", "2", "3"], batch_size=10000):
    ns.write(upsert_rows=batch, schema=schema)

print(f"Upserted {ns.approx_count():,} journey documents to namespace '{ns.name}'")

Upserted 108,707 journey documents to namespace 'mta-gtfs-journeys'


In [41]:
import pandas as pd
from tabulate import tabulate
from datetime import datetime, timedelta

time_now = datetime.now().time().strftime("%H:%M:%S")
query_time_now = pd.to_datetime(time_now, format="%H:%M:%S")
time_plus = (datetime.now() + timedelta(minutes=30)).time().strftime("%H:%M:%S")
query_time_plus = pd.to_datetime(time_plus, format="%H:%M:%S")
print(f"Train arrival time between: {time_now} and {time_plus}")

weekday = datetime.now().strftime("%A")
if weekday == "Sunday":
    service_id = "Sunday"
elif weekday == "Saturday":
    service_id = "Saturday"
else:
    service_id = "Weekday"

results = ns.query(
    rank_by=["stop_name", "BM25", "14 St"],
    filters=[
        "And",
        [
            ["direction", "Eq", "North"],
            ["service_id", "Eq", service_id],
            ["arrival_time_epoch", "Gt", query_time_now.value],
            ["arrival_time_epoch", "Lt", query_time_plus.value],
        ],
    ],
    top_k=20,
    include_attributes=[
        "route_name",
        "direction",
        "trip_headsign",
        "stop_name",
        "stop_sequence",
        "arrival_time",
        "next_stop_name",
    ],
)

df = pd.DataFrame([row.attributes for row in results.rows])
print(tabulate(df, headers='keys', tablefmt='psql', showindex=True))


Train arrival time between: 11:37:17 and 12:07:17
+----+---------------------------+-------------+---------------------------+-----------------+----------------+-------------+---------------------------+
|    | route_name                | direction   | next_stop_name            |   stop_sequence | arrival_time   | stop_name   | trip_headsign             |
|----+---------------------------+-------------+---------------------------+-----------------+----------------+-------------+---------------------------|
|  0 | Broadway - 7 Avenue Local | North       | 18 St                     |               9 | 11:38:30       | 14 St       | Van Cortlandt Park-242 St |
|  1 | Broadway - 7 Avenue Local | North       | 18 St                     |               9 | 11:44:30       | 14 St       | Van Cortlandt Park-242 St |
|  2 | Broadway - 7 Avenue Local | North       | 18 St                     |               9 | 11:49:30       | 14 St       | Van Cortlandt Park-242 St |
|  3 | Broadway - 7 Avenue