In [3]:
import pandas as pd
import numpy as np
import networkx as nx
import heapq

from util import minutes_to_hours, time_to_minutes

In [4]:
df = pd.read_csv("/home/jovyan/homework2/data/stop_times.csv", index_col=0)
stops_lausanne_all = pd.read_csv("/home/jovyan/homework2/data/stops.csv", index_col=0)
stops_to_stops = pd.read_csv("/home/jovyan/homework2/data/stop_to_stop.csv", index_col=0)
df.columns = ['trip_id', 'stop_id', 'departure_time', 'arrival_time']
stops_lausanne_all.columns = ['stop_id', 'stop_name','stop_lat','stop_lon']
stops_to_stops.columns = ['stop_id_a', 'stop_id_b', 'distance']

In [5]:
def read_data(city, start_time, duration):

    def process_string(s):
        first_part = s.split(':')[0]  
        numbers_only = ''.join([ch for ch in first_part if ch.isdigit()])
        return numbers_only
        
    if int(city) == 1:
        df = pd.read_csv("/home/jovyan/homework2/data/stop_times.csv", index_col=0)
        stops_lausanne_all = pd.read_csv("/home/jovyan/homework2/data/stops.csv", index_col=0)
        stops_to_stops = pd.read_csv("/home/jovyan/homework2/data/stop_to_stop.csv", index_col=0)
        df.columns = ['trip_id', 'stop_id', 'departure_time', 'arrival_time']
        stops_lausanne_all.columns = ['stop_id', 'stop_name','stop_lat','stop_lon']
        stops_lausanne_all['stop_id'] = stops_lausanne_all['stop_id'].apply(process_string)
        stops_lausanne_all = stops_lausanne_all.groupby(['stop_id', 'stop_name']).max().reset_index()
        df['stop_id'] = df['stop_id'].apply(process_string)
        stops_to_stops.columns = ['stop_id_a', 'stop_id_b', 'distance']
        stops_to_stops['stop_id_a'] = stops_to_stops['stop_id_a'].apply(process_string)
        stops_to_stops['stop_id_b'] = stops_to_stops['stop_id_b'].apply(process_string)
        stops_to_stops = stops_to_stops[stops_to_stops['stop_id_a'] != stops_to_stops['stop_id_b']]
        stops_to_stops = stops_to_stops.groupby(['stop_id_a', 'stop_id_b']).max().reset_index()

        # calculate the time in minute
        df['departure_time_mins'] = df['departure_time'].apply(time_to_minutes)
        df['arrival_time_mins'] = df['arrival_time'].apply(time_to_minutes)
        
        # df filtered
        df_filtered = df[(df['arrival_time_mins'] >= start_time) & (df['arrival_time_mins'] <= start_time+duration)].copy()
        df_filtered.sort_values(by=['trip_id', 'arrival_time_mins', 'departure_time_mins'], inplace=True)
        df_filtered.reset_index(inplace=True)
        df_filtered.head()

    return df_filtered, stops_lausanne_all, stops_to_stops
        

In [6]:
def time_to_minutes2(time_str):
    """Converts a time string to minutes in a day."""
    hours, minutes= map(int, time_str.split(':'))
    return hours * 60 + minutes

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered.sort_values(by=['trip_id', 'arrival_time_mins', 'departure_time_mins'], inplace=True)


Unnamed: 0,index,trip_id,stop_id,departure_time,arrival_time,departure_time_mins,arrival_time_mins
0,195,1.TA.96-240-j24-1.1.H,8570064,13:36:00,13:36:00,816.0,816.0
1,196,1.TA.96-240-j24-1.1.H,8570063,13:40:00,13:40:00,820.0,820.0
2,197,1.TA.96-249-j24-1.7.H,8570178,12:59:00,12:59:00,779.0,779.0
3,227,1.TA.96-65-j24-1.11.H,8511210,12:21:00,12:21:00,741.0,741.0
4,228,1.TA.96-65-j24-1.11.H,8511214,12:23:00,12:23:00,743.0,743.0


In [7]:
def build_graph(start_time, city, expected_time):

    start_time_mins = time_to_minutes2(start_time)
    expected_arrival_time_mins = time_to_minutes2(expected_time)
    duration = expected_arrival_time_mins - start_time_mins
    df_filtered, all_stops, stops_to_stops = read_data(city, start_time_mins, duration)
    
    G = nx.MultiDiGraph()

    # add edges by taking transportation
    for _, row in df_filtered.iterrows():
        if _ + 1 < len(df_filtered) and df_filtered.loc[_ + 1, 'trip_id'] == row['trip_id']:
            next_stop = df_filtered.loc[_ + 1, 'stop_id']
            arr_time_next_stop = df_filtered.loc[_ + 1, 'arrival_time_mins']
            travel_time = arr_time_next_stop - row['departure_time_mins']
    
            # try:
            #     distance = stops_to_stops[(stops_to_stops['stop_id_a'] == row['stop_id']) & (stops_to_stops['stop_id_b'] == next_stop)]['distance']
            # except:
            #     distance = np.inf
            
            G.add_edge(row['stop_id'], next_stop, weight=travel_time, trip_id=row['trip_id'], 
                       departure_time_mins=row['departure_time_mins'], 
                       arrival_time_mins=arr_time_next_stop)

    # add edges by only walking 
    for _, row in stops_to_stops.iterrows():
        n1, n2 = row['stop_id_a'], row['stop_id_b']
        distance = row['distance']
        G.add_edge(n1, n2, weight=round(distance/50), trip_id="walking", walking_time = round(distance/50))            

    return G, all_stops

In [8]:
def find_routes(start_time, departure_id):
    start_time_mins = time_to_minutes2(start_time)
    
    # Dictionary to store minimum arrival times for each site
    min_arrival_time = {node: np.inf for node in G.nodes()}
    min_arrival_time[departure_id] = start_time_mins
    
    # priority queues, storage (arrival times, stations)
    priority_queue = [(start_time_mins, departure_id)]
    
    # dictionary records the best precursor node to reach each site and the itinerary used
    predecessor = {node: None for node in G.nodes()}
    
    while priority_queue:
        current_time, current_stop = heapq.heappop(priority_queue)
    
        # iterate over all trips from the current station
        for neighbor in G[current_stop]:
            for key, edge_attr in G[current_stop][neighbor].items():
                trip_id = edge_attr['trip_id']
                if trip_id != 'walking':
                    departure_time = edge_attr['departure_time_mins']
                    travel_time = edge_attr['arrival_time_mins'] - departure_time
                    wait_time = max(0, departure_time - current_time)
                else: 
                    departure_time = current_time
                    travel_time = edge_attr['walking_time']
                    wait_time = 0
                total_time = current_time + wait_time + travel_time
                
                # Update and add stations to the queue only if the newly calculated arrival time is shorter than the known arrival time
                if total_time < min_arrival_time[neighbor] and departure_time >= current_time:
                    min_arrival_time[neighbor] = total_time
                    predecessor[neighbor] = (current_stop, minutes_to_hours(current_time), trip_id)
                    heapq.heappush(priority_queue, (total_time, neighbor))

    return min_arrival_time, predecessor

In [9]:
def reconstruct_path(predecessor, all_stops, start, end):
    path = []
    step = end
    while step != start:
        if predecessor[step] is None:
            return None
        try:
            stop_name = all_stops[all_stops['stop_id'] == predecessor[step][0]]['stop_name'].values[0]
        except:
            stop_name = ''
        path.append((predecessor[step][0], stop_name, predecessor[step][1], predecessor[step][2])) 
        step = predecessor[step][0]
    path.reverse()
    return path

In [10]:
start_time = '12:00'
city = "1"
expected_time = "14:00"
departure = "Renens VD"
# departure = "Lausanne-Flon, pl. de l'Europe"
# departure = "St-Sulpice VD, Parc Scient."
# # departure_id = "8591818"
destination = "Lausanne-Flon, pl. de l'Europe"
# # destination_id = "8579238:0:D"

NameError: name 'multi_shortest_paths_dijkstra' is not defined

In [None]:
G, all_stops = build_graph(start_time, city, expected_time)

In [None]:
departure_id = all_stops[all_stops.stop_name == departure].stop_id.tolist()[0]
destination_id = all_stops[all_stops.stop_name == destination].stop_id.tolist()[0]

In [None]:
print("Number of nodes:", G.number_of_nodes())
print("Number of edges:", G.number_of_edges())

In [None]:
min_arrival_time, predecessor = find_routes(start_time, departure_id)

In [None]:
path = reconstruct_path(predecessor, all_stops, departure_id, destination_id)
# print("Path from", source, "to", end, ":")
for e in path:
    print(e[0], e[1], e[2], e[3])
print(destination_id, all_stops[all_stops['stop_id'] == destination_id]['stop_name'].values[0])

# Interaction and Visualization

In [None]:
import ipywidgets as widgets
from IPython.display import display
import plotly.graph_objects as go
import plotly.express as px

In [None]:
stops_lausanne_all = pd.read_csv("/home/jovyan/homework2/data/stops.csv", index_col=0)
stops_lausanne_all.columns = ['stop_id', 'stop_name','stop_lat','stop_lon']
stations = pd.unique(stops_lausanne_all.stop_name).tolist()
cities = ["1"]
time_slots = []
for i in range(0,24):
    for j in range(0,60):
        if j < 10:
            time_slots.append(str(i)+":0"+str(j))
        else:
            time_slots.append(str(i)+":"+str(j))

In [None]:
date_picker = widgets.DatePicker(
    description='Date:',
    disabled=False
)

objectID = widgets.Combobox(
    placeholder='City ID',
    options=cities,
    description='City: ',
    ensure_option=True,
    disabled=False
)

time_input = widgets.Combobox(
    placeholder='Choose Time (HH:MM)',
    options=time_slots,  
    description='Depart:',
    ensure_option=True,
    disabled=False
)


departure_station = widgets.Combobox(
    placeholder='Type or select',
    options=stations,
    description='From:',
    ensure_option=True,
    disabled=False
)

destination_station = widgets.Combobox(
    placeholder='Type or select',
    options=stations,
    description='To:',
    ensure_option=True,
    disabled=False
)

expected_arrival_time = widgets.Combobox(
    placeholder='Choose Time (HH:MM)',
    options=time_slots,
    description='Arrival at: ',
    ensure_option=True,
    disabled=False
)

button = widgets.Button(description="Find Routes")

output = widgets.Output()

def on_button_clicked(b):
    with output:
        output.clear_output()

        date = date_picker.value
        start_time = time_input.value
        departure = departure_station.value
        destination = destination_station.value
        city = objectID.value
        expected_time = expected_arrival_time.value

        G, all_stops = build_graph(start_time, city, expected_time)
        departure_id = pd.unique(all_stops[all_stops.stop_name == departure].stop_id).tolist()[0]
        destination_id = pd.unique(all_stops[all_stops.stop_name == destination].stop_id).tolist()[0]
        
        min_arrival_time, predecessor = find_routes(start_time, departure_id)
        path = reconstruct_path(predecessor, all_stops, departure_id, destination_id)

        # print("Path from", source, "to", end, ":")
        for e in path:
            print(e[0], e[1], e[2], e[3])
        print(destination_id, all_stops[all_stops['stop_id'] == destination_id]['stop_name'].values[0])
        
        print("Searching for routes...")
        print("Route: From {} to {} on {} at {}".format(departure_station.value, destination_station.value, date_picker.value, time_input.value))
        # 这里可以加上调用地图显示或者查找路线的函数
        

button.on_click(on_button_clicked)

display(objectID, date_picker, time_input, departure_station, destination_station, expected_arrival_time, button, output)