In [None]:
import pandas as pd
import os

#
# Path to unzipped GTFS files
data_path = "../data/static/unpacked/"  # Adjust if needed

# Load core GTFS files
agency = pd.read_csv(os.path.join(data_path, "agency.txt"))
calendar_dates = pd.read_csv(os.path.join(data_path, "calendar_dates.txt"))
calendar = pd.read_csv(os.path.join(data_path, "calendar.txt"))
routes = pd.read_csv(os.path.join(data_path, "routes.txt"))
shapes = pd.read_csv(os.path.join(data_path, "shapes.txt"))
stop_times = pd.read_csv(os.path.join(data_path, "stop_times.txt"))
stops = pd.read_csv(os.path.join(data_path, "stops.txt"))
transfers = pd.read_csv(os.path.join(data_path, "transfers.txt"))
trips = pd.read_csv(os.path.join(data_path, "trips.txt"))

# Display the first few rows of Routes  
routes.head()

Unnamed: 0,agency_id,route_id,route_short_name,route_long_name,route_type,route_desc,route_url,route_color,route_text_color
0,MTA NYCT,1,1,Broadway - 7 Avenue Local,1,Trains operate between 242 St in the Bronx and...,http://web.mta.info/nyct/service/pdf/t1cur.pdf,EE352E,
1,MTA NYCT,2,2,7 Avenue Express,1,"Trains operate between Wakefield-241 St, Bronx...",http://web.mta.info/nyct/service/pdf/t2cur.pdf,EE352E,
2,MTA NYCT,3,3,7 Avenue Express,1,"Trains operate between 148 St, 7 Av, Manhattan...",http://web.mta.info/nyct/service/pdf/t3cur.pdf,EE352E,
3,MTA NYCT,4,4,Lexington Avenue Express,1,Trains operate daily between Woodlawn/Jerome A...,http://web.mta.info/nyct/service/pdf/t4cur.pdf,00933C,
4,MTA NYCT,5,5,Lexington Avenue Express,1,"Weekdays daytime, most trains operate between ...",http://web.mta.info/nyct/service/pdf/t5cur.pdf,00933C,


In [3]:
routes["route_id"].nunique()

30

In [4]:
trips_merged = trips.merge(routes, on="route_id")
trips_merged[["trip_id", "route_id", "route_long_name", "direction_id"]].head()

Unnamed: 0,trip_id,route_id,route_long_name,direction_id
0,AFA23GEN-1038-Sunday-00_000600_1..S03R,1,Broadway - 7 Avenue Local,1
1,AFA23GEN-1038-Sunday-00_002600_1..S03R,1,Broadway - 7 Avenue Local,1
2,AFA23GEN-1038-Sunday-00_004600_1..S03R,1,Broadway - 7 Avenue Local,1
3,AFA23GEN-1038-Sunday-00_006600_1..S03R,1,Broadway - 7 Avenue Local,1
4,AFA23GEN-1038-Sunday-00_007200_1..N03R,1,Broadway - 7 Avenue Local,0


In [5]:
sample_trip_id = trips["trip_id"].iloc[0]
sample_stop_times = stop_times[stop_times["trip_id"] == sample_trip_id]
sample_stop_times = sample_stop_times.merge(stops, on="stop_id")

sample_stop_times[["stop_sequence", "stop_name", "arrival_time", "departure_time"]].sort_values("stop_sequence")

Unnamed: 0,stop_sequence,stop_name,arrival_time,departure_time
0,1,Van Cortlandt Park-242 St,00:06:00,00:06:00
1,2,238 St,00:07:30,00:07:30
2,3,231 St,00:09:00,00:09:00
3,4,Marble Hill-225 St,00:10:30,00:10:30
4,5,215 St,00:12:00,00:12:00
5,6,207 St,00:13:00,00:13:00
6,7,Dyckman St,00:14:30,00:14:30
7,8,191 St,00:16:00,00:16:00
8,9,181 St,00:17:30,00:17:30
9,10,168 St-Washington Hts,00:19:30,00:19:30


In [6]:
import folium

# Build a map centered at the first stop
first_stop = sample_stop_times.iloc[0]
m = folium.Map(location=[first_stop["stop_lat"], first_stop["stop_lon"]], zoom_start=12)

# Plot all stops on the trip
for _, row in sample_stop_times.iterrows():
    folium.CircleMarker(
        location=[row["stop_lat"], row["stop_lon"]],
        radius=4,
        popup=row["stop_name"],
        color='blue',
        fill=True,
    ).add_to(m)

m


In [7]:
# Choose a route and get one of its shape_ids
sample_route_id = "A"  # change this to any route you want
sample_trip = trips[trips["route_id"] == sample_route_id].iloc[0]
sample_shape_id = sample_trip["shape_id"]
sample_shape_id


'A..S74R'

In [8]:
# Extract shape data and sort by sequence
shape_data = shapes[shapes["shape_id"] == sample_shape_id].sort_values("shape_pt_sequence")
shape_data.head()

Unnamed: 0,shape_id,shape_pt_sequence,shape_pt_lat,shape_pt_lon
82204,A..S74R,0,40.868072,-73.919899
82205,A..S74R,1,40.867859,-73.920698
82206,A..S74R,2,40.867846,-73.920742
82207,A..S74R,3,40.867833,-73.920786
82208,A..S74R,4,40.867819,-73.920831


In [9]:
# Extract shape data and sort by sequence
shape_data = shapes[shapes["shape_id"] == sample_shape_id].sort_values("shape_pt_sequence")
shape_data.head()


Unnamed: 0,shape_id,shape_pt_sequence,shape_pt_lat,shape_pt_lon
82204,A..S74R,0,40.868072,-73.919899
82205,A..S74R,1,40.867859,-73.920698
82206,A..S74R,2,40.867846,-73.920742
82207,A..S74R,3,40.867833,-73.920786
82208,A..S74R,4,40.867819,-73.920831


In [10]:
import folium

# Center the map at the first point
start_lat = shape_data.iloc[0]["shape_pt_lat"]
start_lon = shape_data.iloc[0]["shape_pt_lon"]
m = folium.Map(location=[start_lat, start_lon], zoom_start=12)

# Create polyline for the full route
points = shape_data[["shape_pt_lat", "shape_pt_lon"]].values.tolist()
folium.PolyLine(points, color="blue", weight=3).add_to(m)

m

In [11]:
# Use the same sample_trip from earlier
sample_trip_id = sample_trip["trip_id"]

# Get stop_times for that trip and merge with stop coordinates
trip_stops = stop_times[stop_times["trip_id"] == sample_trip_id].sort_values("stop_sequence")
trip_stops = trip_stops.merge(stops, on="stop_id")

trip_stops[["stop_sequence", "stop_name", "stop_lat", "stop_lon"]].head()

Unnamed: 0,stop_sequence,stop_name,stop_lat,stop_lon
0,1,Inwood-207 St,40.868072,-73.919899
1,2,Dyckman St,40.865491,-73.927271
2,3,190 St,40.859022,-73.93418
3,4,181 St,40.851695,-73.937969
4,5,175 St,40.847391,-73.939704


In [12]:
import folium

# Start the map at the first shape point
m = folium.Map(location=[shape_data.iloc[0]["shape_pt_lat"], shape_data.iloc[0]["shape_pt_lon"]], zoom_start=12)

# Add the route shape
points = shape_data[["shape_pt_lat", "shape_pt_lon"]].values.tolist()
folium.PolyLine(points, color="blue", weight=3).add_to(m)

# Add stops as circle markers
for _, row in trip_stops.iterrows():
    folium.CircleMarker(
        location=[row["stop_lat"], row["stop_lon"]],
        radius=5,
        popup=row["stop_name"],
        color="red",
        fill=True,
        fill_opacity=0.8,
    ).add_to(m)

m