## Get Initial Node

### Setup & Load the Graph

---

In [29]:
import sys
sys.path.insert(0, '..')

import networkx as nx
import pandas as pd
import pickle
from tqdm import tqdm

from utils import get_location, get_distance, getRouteTime

import warnings
warnings.filterwarnings('ignore')

In [8]:
# Load the graph from the pickle file
G = pickle.load(open("../data/graph.pickle", "rb"))

In [9]:
# Show sample nodes
print(list(G.nodes)[0:10])

['Interlaken Ost', 'Interlaken West', 'Spiez', 'Thun', 'Bern', 'Olten', 'Liestal', 'Basel SBB', 'Zürich HB', 'Chur']


In [11]:
# Show sample edges
print(list(G.edges(data=True))[0:10])

[('Interlaken Ost', 'Interlaken West', {'departure': Timestamp('2023-12-01 23:33:00'), 'arrival': Timestamp('2023-12-01 23:36:00'), 'duration': Timedelta('0 days 00:03:00'), 'journey_id': '85:11:1094:001', 'trip_name': 'Interlaken Ost -> Bern', 'type': 'train'}), ('Interlaken Ost', 'Interlaken West', {'departure': Timestamp('2023-12-01 07:00:00'), 'arrival': Timestamp('2023-12-01 07:04:00'), 'duration': Timedelta('0 days 00:04:00'), 'journey_id': '85:11:809:001', 'trip_name': 'Interlaken Ost -> Romanshorn', 'type': 'train'}), ('Interlaken Ost', 'Interlaken West', {'departure': Timestamp('2023-12-01 09:00:00'), 'arrival': Timestamp('2023-12-01 09:04:00'), 'duration': Timedelta('0 days 00:04:00'), 'journey_id': '85:11:813:001', 'trip_name': 'Interlaken Ost -> Romanshorn', 'type': 'train'}), ('Interlaken Ost', 'Interlaken West', {'departure': Timestamp('2023-12-01 11:00:00'), 'arrival': Timestamp('2023-12-01 11:04:00'), 'duration': Timedelta('0 days 00:04:00'), 'journey_id': '85:11:817:00

In [16]:
pr_stations = pd.read_csv("../data/pr_stations.csv")
pr_stations.head()

Unnamed: 0,station,station_abbr,longitude,latitude
0,Muttenz,MU,47.533591,7.647894
1,Pratteln,PR,47.522669,7.690817
2,Frenkendorf-Füllinsdorf,FRE,47.50147,7.719111
3,Liestal,LST,47.484461,7.731367
4,Lausen,LSN,47.470345,7.759763


### Adding the Initial Node to the Graph Demo

---

Compute the distance in meters between the initial station and all the other stations in the graph, and select the `k` closest stations:

In [78]:
# Setup input from the user
start_loc = "46.518732, 6.561944"
k = 1

# Find k-closest stations with PR
distances = [
    (end[0], get_distance(start_loc, f"{end[2]}, {end[3]}"))
    for _, end in pr_stations.iterrows()
]

# Sort the distances in place
distances.sort(key=lambda t: t[1])

# Show the k-closest stations
print(f"Closest {k} station(s):")
k_closest_stations = distances[:k]
for station, distance in k_closest_stations:
    print(f"- {station} is {distance/1000:.2f} km away")

Closest 1 station(s):
- Denges-Echandens is 2.01 km away


Now, compute the travel time for car, bike and walking in minutes between the initial station and the **closest stations** in the graph:

In [79]:
car_travel_time = [
    (
        station[0],
        getRouteTime(
            start_loc, get_location(station[0]), method="car"
        ),
    )
    for station in tqdm(k_closest_stations, desc="Calculating car travel time")
]

bike_travel_time = [
    (
        station[0],
        getRouteTime(
            start_loc, get_location(station[0]), method="bike"
        ),
    )
    for station in tqdm(k_closest_stations, desc="Calculating bike travel time")
]

walk_travel_time = [
    (
        station[0],
        getRouteTime(
            start_loc, get_location(station[0]), method="foot"
        ),
    )
    for station in tqdm(k_closest_stations, desc="Calculating walk travel time")
]

Calculating car travel time: 100%|██████████| 1/1 [00:00<00:00,  1.55it/s]
Calculating bike travel time: 100%|██████████| 1/1 [00:00<00:00,  1.47it/s]
Calculating walk travel time: 100%|██████████| 1/1 [00:00<00:00,  2.05it/s]


Now, combine all modes of transport into a single list. Next, add new edge from the initial station to the one of the `k` closest stations for each mode of transport.

In [80]:
travel_time = {
    "car": car_travel_time,
    "bike": bike_travel_time,
    "walk": walk_travel_time,
}

for method, times in travel_time.items():

    G.add_edges_from([
        ("Start", station, {"duration": time, "type": method})
        for station, time in times
    ])

Now, let's check the newly added edges:

In [81]:
sorted(list(G.edges("Start", data=True)), key=lambda t: t[2]["duration"])

[('Start', 'Denges-Echandens', {'duration': 637, 'type': 'car'}),
 ('Start', 'Denges-Echandens', {'duration': 637, 'type': 'car'}),
 ('Start', 'Denges-Echandens', {'duration': 1115, 'type': 'bike'}),
 ('Start', 'Denges-Echandens', {'duration': 1115, 'type': 'bike'}),
 ('Start', 'Denges-Echandens', {'duration': 4032, 'type': 'walk'}),
 ('Start', 'Denges-Echandens', {'duration': 4032, 'type': 'walk'})]

### Final Util Function

---

Now, let's wrap all the above steps into a function:

In [64]:
def get_travel_time(start_loc : str, closest_stations : list[tuple[str, float]], mode : str):
    """
    Calculates the travel time for a given mode of transport

    Args:
        start_loc (str): The starting location
        closest_stations (list[tuple(str, float)]): The closest stations
        mode (str): The mode of transport
    
    Returns:
        list[tuple(str, float)]: The travel time for each station
    """
    travel_time = [
        (
            station[0],
            getRouteTime(
                start_loc, get_location(station[0]), method=mode
            ),
        )
        for station in closest_stations
    ]

    return travel_time

def add_node(G : nx.Graph, start_loc : str, k : int) -> nx.Graph:
    """
    Adds a new node to the graph

    Args:
        G (nx.Graph): The graph
        start_loc (str): The starting location
        k (int): The number of closest stations to add
    
    Returns:
        nx.Graph: The graph with the new node
    """


    # Find k-closest stations with PR
    distances = [
        (end[0], get_distance(start_loc, f"{end[2]}, {end[3]}"))
        for _, end in pr_stations.iterrows()
    ]

    # Sort the distances in place
    distances.sort(key=lambda t: t[1])

    # Select the k-closest stations
    k_closest_stations = distances[:k]

    # Compute the travel time for each mode of transport for the k-closest stations
    travel_time = {
        "car": get_travel_time(start_loc, k_closest_stations, "car"),
        "bike": get_travel_time(start_loc, k_closest_stations, "bike"),
        "walk": get_travel_time(start_loc, k_closest_stations, "foot"),
    }

    # Add the start node with start_loc as attribute
    G.add_node("Start", pos=start_loc)

    # Finally, add the edges from the init node to the existing statiions in the graph
    for method, times in travel_time.items():

        G.add_edges_from([
            ("Start", station, {"duration": time, "type": method})
            for station, time in times
        ])

    return G

Let's test the function:

In [76]:
# Reload the graph from the pickle file
G = pickle.load(open("../data/graph.pickle", "rb"))

# Add the new node
start_loc = "46.518732, 6.561944"
G = add_node(G, start_loc=start_loc, k=1)

In [75]:
# Show the new edges
print(f"The start node (with pos <<{G.nodes['Start']['pos']}>>) has been added to the graph with the following edges:")
for e in sorted(list(G.edges("Start", data=True)), key=lambda t: t[2]["duration"]):
    print(f"- From {e[0]} to {e[1]} in {e[2]['duration']:.2f} minutes with {e[2]['type']}")

The start node (with pos <<46.518732, 6.561944>>) has been added to the graph with the following edges:
- From Start to Denges-Echandens in 637.00 minutes with car
- From Start to Denges-Echandens in 1115.00 minutes with bike
- From Start to Denges-Echandens in 4032.00 minutes with walk


---