In [1]:
import pandas as pd
import json
from collections import defaultdict

# Read the input files
stops_df = pd.read_csv('input/stops.txt')
trips_df = pd.read_csv('input/trips.txt')
stop_times_df = pd.read_csv('input/stop_times.txt')

# Create a mapping of trip_id to route_id
trip_to_route = dict(zip(trips_df['trip_id'], trips_df['route_id']))

# Create a mapping of stop_id to route_ids
stop_to_routes = defaultdict(set)

# For each stop_time entry, get the route_id through the trip_id
for _, row in stop_times_df.iterrows():
    stop_id = row['stop_id']
    trip_id = row['trip_id']
    if trip_id in trip_to_route:
        route_id = trip_to_route[trip_id]
        stop_to_routes[stop_id].add(route_id)

# Convert sets to lists for JSON serialization
stop_routes_dict = {stop_id: list(routes) for stop_id, routes in stop_to_routes.items()}

# Save the results
with open('output/stop_routes.json', 'w') as f:
    json.dump(stop_routes_dict, f, indent=2)

print(f"Processed {len(stop_routes_dict)} stops with their associated routes")

# Display a sample of the results
list(stop_routes_dict.items())[:5]

Processed 125 stops with their associated routes


[('PF_A15_C', ['RED']),
 ('PF_A14_C', ['RED']),
 ('PF_A13_C', ['RED']),
 ('PF_A12_C', ['RED']),
 ('PF_A11_C', ['RED'])]