In [1]:
import pandas as pd

stop_times = pd.read_csv("stop_times.txt")
stops = pd.read_csv("stops.txt")


In [2]:
# Convert HH:MM:SS to timedelta
stop_times["arrival_time"] = pd.to_timedelta(stop_times["arrival_time"])
stop_times["departure_time"] = pd.to_timedelta(stop_times["departure_time"])

# Sort for proper sequence
stop_times = stop_times.sort_values(["trip_id", "stop_sequence"])


In [3]:
od_rows = []

for trip_id, group in stop_times.groupby("trip_id"):
    group = group.reset_index(drop=True)
    for i in range(len(group) - 1):
        origin = group.loc[i]
        dest = group.loc[i + 1]
        duration = (dest["arrival_time"] - origin["departure_time"]).total_seconds() / 60  # in minutes

        od_rows.append({
            "trip_id": trip_id,
            "origin_stop_id": origin["stop_id"],
            "destination_stop_id": dest["stop_id"],
            "duration_min": duration
        })

od_df = pd.DataFrame(od_rows)


In [None]:
od_summary = od_df.groupby(["origin_stop_id", "destination_stop_id"]).agg(
    trip_count=("trip_id", "count"),
    avg_duration_min=("duration_min", "mean")
).reset_index()


In [5]:
stops_subset = stops[["stop_id", "stop_name", "stop_lat", "stop_lon"]]

od_summary = od_summary.merge(stops_subset, left_on="origin_stop_id", right_on="stop_id")
od_summary = od_summary.rename(columns={
    "stop_name": "origin_name", "stop_lat": "origin_lat", "stop_lon": "origin_lon"
}).drop(columns=["stop_id"])

od_summary = od_summary.merge(stops_subset, left_on="destination_stop_id", right_on="stop_id")
od_summary = od_summary.rename(columns={
    "stop_name": "destination_name", "stop_lat": "destination_lat", "stop_lon": "destination_lon"
}).drop(columns=["stop_id"])


In [None]:
import folium
import pandas as pd
from folium.plugins import MarkerCluster

# Optional: filter for top N flows
od_top = od_summary.sort_values(by="trip_count", ascending=False).head(200)  # tweak this

hyderabad_map = folium.Map(location=[17.385, 78.4867], zoom_start=12, tiles="cartodbpositron")


In [None]:

for _, row in od_top.iterrows():
    origin = (row["origin_lat"], row["origin_lon"])
    dest = (row["destination_lat"], row["destination_lon"])

    line = folium.PolyLine(
        locations=[origin, dest],
        color="blue",
        weight=min(10, max(1, row["trip_count"] / 5)),  # scale line thickness
        opacity=0.6,
        tooltip=f"{row['origin_name']} ➝ {row['destination_name']}<br>"
                f"Trips: {row['trip_count']}<br>"
                f"Avg Duration: {row['avg_duration_min']:.1f} min"
    )
    line.add_to(hyderabad_map)


In [None]:
# Get unique stops from OD summary
all_stops = pd.concat([
    od_summary[["origin_stop_id", "origin_name", "origin_lat", "origin_lon"]].rename(
        columns={"origin_stop_id": "stop_id", "origin_name": "stop_name",
                 "origin_lat": "lat", "origin_lon": "lon"}),
    od_summary[["destination_stop_id", "destination_name", "destination_lat", "destination_lon"]].rename(
        columns={"destination_stop_id": "stop_id", "destination_name": "stop_name",
                 "destination_lat": "lat", "destination_lon": "lon"})
]).drop_duplicates("stop_id")

# Optional: use MarkerCluster for nicer grouping
marker_cluster = MarkerCluster().add_to(hyderabad_map)

for _, row in all_stops.iterrows():
    folium.CircleMarker(
        location=(row["lat"], row["lon"]),
        radius=4,
        popup=row["stop_name"],
        color="red",
        fill=True,
        fill_opacity=0.7
    ).add_to(marker_cluster)


In [None]:
hyderabad_map.save("hyderabad_od_map.html")
hyderabad_map  # If you're in a Jupyter notebook
# https://chatgpt.com/c/6807dd19-2064-8002-b51e-f5511bd891ad
# https://github.com/Code4GovTech/C4GT/issues/514