In [1]:
import pandas as pd
import networkx as nx

df = pd.read_csv("2021-04.csv")

df = df.rename(columns={"Departure": "departure_time", "Return": "return_time", "Departure station id": "dep_id", "Departure station name": "dep_name",
    "Return station id": "ret_id", "Return station name": "ret_name", "Covered distance (m)": "distance_m", "Duration (sec.)": "duration_s"})

df = df[(df["distance_m"] > 0) & (df["duration_s"] > 0)]
df = df.dropna(subset=["dep_id", "ret_id"])

G = nx.DiGraph()

for _, row in df[["dep_id", "dep_name"]].drop_duplicates().iterrows():
    G.add_node(row["dep_id"], name=row["dep_name"])

for _, row in df[["ret_id", "ret_name"]].drop_duplicates().iterrows():
    G.add_node(row["ret_id"], name=row["ret_name"])

agg = df.groupby(["dep_id", "ret_id"]).agg(trip_count=("dep_id", "size"),
    total_distance=("distance_m", "sum"), total_duration=("duration_s", "sum")).reset_index()

for _, r in agg.iterrows():
    G.add_edge(r["dep_id"], r["ret_id"], weight=r["trip_count"],
               total_distance=r["total_distance"], total_duration=r["total_duration"])

print("Number of nodes (stations):", G.number_of_nodes())
print("Number of edges (station pairs):", G.number_of_edges())
print("Total trips:", df.shape[0])
print("Average trip distance (m):", df["distance_m"].mean())
print("Average trip duration (s):", df["duration_s"].mean())

in_deg = G.in_degree(weight="weight")
out_deg = G.out_degree(weight="weight")

top_inbound = sorted(in_deg, key=lambda x: x[1], reverse=True)[:5]
top_outbound = sorted(out_deg, key=lambda x: x[1], reverse=True)[:5]

print("\nTop inbound stations (by trips arriving):")
for node, val in top_inbound:
    print(G.nodes[node]["name"], ":", val)

print("\nTop outbound stations (by trips departing):")
for node, val in top_outbound:
    print(G.nodes[node]["name"], ":", val)

print("\nIs the graph strongly connected?", nx.is_strongly_connected(G))

Number of nodes (stations): 351
Number of edges (station pairs): 32972
Total trips: 525926
Average trip distance (m): 2587.058589991748
Average trip duration (s): 1101.7891870719454

Top inbound stations (by trips arriving):
Itämerentori : 9978.0
Kalasatama (M) : 7846.0
Ympyrätalo : 7208.0
Pasilan asema : 6334.0
Töölönlahdenkatu : 6050.0

Top outbound stations (by trips departing):
Itämerentori : 9764.0
Pasilan asema : 7462.0
Kalasatama (M) : 7170.0
Ympyrätalo : 6786.0
Töölönlahdenkatu : 5516.0

Is the graph strongly connected? True
