In [1]:
import pandas as pd
import networkx as nx
from datetime import datetime, timedelta
import heapq
import hashlib

data = pd.read_csv('..\connection_graph.csv', index_col=0)
data.head()
data.columns.values

  data = pd.read_csv('..\connection_graph.csv', index_col=0)


array(['company', 'line', 'departure_time', 'arrival_time', 'start_stop',
       'end_stop', 'start_stop_lat', 'start_stop_lon', 'end_stop_lat',
       'end_stop_lon'], dtype=object)

In [2]:
def normalize_time(time_str):
    hour, minute, second = map(int, time_str.split(':'))
    hour = hour % 24
    return f'{hour:02}:{minute:02}:{second:02}'

# Normalize departure_time and arrival_time columns
data['departure_time'] = data['departure_time'].apply(normalize_time)
data['arrival_time'] = data['arrival_time'].apply(normalize_time)

data.head()

Unnamed: 0,company,line,departure_time,arrival_time,start_stop,end_stop,start_stop_lat,start_stop_lon,end_stop_lat,end_stop_lon
0,MPK Autobusy,A,20:52:00,20:53:00,Zajezdnia Obornicka,Paprotna,51.148737,17.021069,51.147752,17.020539
1,MPK Autobusy,A,20:53:00,20:54:00,Paprotna,Obornicka (Wołowska),51.147752,17.020539,51.144385,17.023735
2,MPK Autobusy,A,20:54:00,20:55:00,Obornicka (Wołowska),Bezpieczna,51.144385,17.023735,51.14136,17.026376
3,MPK Autobusy,A,20:55:00,20:57:00,Bezpieczna,Bałtycka,51.14136,17.026376,51.136632,17.030617
4,MPK Autobusy,A,20:57:00,20:59:00,Bałtycka,Broniewskiego,51.136632,17.030617,51.135851,17.037383


In [3]:
def generate_hash(lat, lon):
        return hashlib.sha256(f"{lat}_{lon}".encode()).hexdigest()

data['start_stop_id'] = data.apply(lambda row: generate_hash(row['start_stop_lat'], row['start_stop_lon']), axis=1)
data['end_stop_id'] = data.apply(lambda row: generate_hash(row['end_stop_lat'], row['end_stop_lon']), axis=1)

data.head()

Unnamed: 0,company,line,departure_time,arrival_time,start_stop,end_stop,start_stop_lat,start_stop_lon,end_stop_lat,end_stop_lon,start_stop_id,end_stop_id
0,MPK Autobusy,A,20:52:00,20:53:00,Zajezdnia Obornicka,Paprotna,51.148737,17.021069,51.147752,17.020539,c11c66389f5c0c02141b2bf412b286ea3a0b82bccdbbd8...,7f3a7f877a70f6e1434e0e53f7a060d2c93c89e8ee322f...
1,MPK Autobusy,A,20:53:00,20:54:00,Paprotna,Obornicka (Wołowska),51.147752,17.020539,51.144385,17.023735,7f3a7f877a70f6e1434e0e53f7a060d2c93c89e8ee322f...,069b22abb9a894c7dcfe06e2215b67d660ffeec0ed71a0...
2,MPK Autobusy,A,20:54:00,20:55:00,Obornicka (Wołowska),Bezpieczna,51.144385,17.023735,51.14136,17.026376,069b22abb9a894c7dcfe06e2215b67d660ffeec0ed71a0...,777750c42f1ec2a9aeec809295c2e4056da90554e22996...
3,MPK Autobusy,A,20:55:00,20:57:00,Bezpieczna,Bałtycka,51.14136,17.026376,51.136632,17.030617,777750c42f1ec2a9aeec809295c2e4056da90554e22996...,4584214221503bb6ac00ca191b008a02950ad82a82a876...
4,MPK Autobusy,A,20:57:00,20:59:00,Bałtycka,Broniewskiego,51.136632,17.030617,51.135851,17.037383,4584214221503bb6ac00ca191b008a02950ad82a82a876...,8f1d1bd42d52557eeb64047ef07a1f439e5eb82a3cfd61...


In [5]:
graph = nx.MultiDiGraph()

for id, row in data.iterrows():
    graph.add_node(row['start_stop_id'], name=row['start_stop'], lat=row['start_stop_lat'], lon=row['start_stop_lon'])
    
    graph.add_node(row['end_stop_id'], name=row['end_stop'], lat=row['end_stop_lat'], lon=row['end_stop_lon'])

    edge_data = {
        'id': id,
        'company': row['company'],
        'line': row['line'],
        'departure_time': row['departure_time'],
        'arrival_time': row['arrival_time'],
    }
    graph.add_edge(row['start_stop_id'], row['end_stop_id'], **edge_data)

In [8]:
def parse_time(time_str):
    return datetime.strptime(time_str, '%H:%M:%S').time()

def time_difference(start, end):
    start_dt = datetime.combine(datetime.min, start)
    end_dt = datetime.combine(datetime.min, end)
    if end < start:  # crossing midnight
        end_dt += timedelta(days=1)
    return (end_dt - start_dt).total_seconds()

In [14]:
def shortest_path_dijkstra(graph, start_stop_name, end_stop_name, start_time):
    start_nodes = [n for n, d in graph.nodes(data=True) if d['name'] == start_stop_name]
    end_nodes = [n for n, d in graph.nodes(data=True) if d['name'] == end_stop_name]
    pq = [(0, start_node, [], start_time) for start_node in start_nodes]  # cost, node, path, current_time

    visited = set()

    while pq:
        try:
            current_cost, current_node, path, current_time = heapq.heappop(pq)
        except TypeError as error:
            print(error)
            print(pq)
            print(current_cost, current_node, path, current_time)
            break
        current_time = parse_time(current_time)
        if current_node in visited:
            continue

        visited.add(current_node)

        if current_node in end_nodes:
            return path, current_cost
        
        for next_node, edge_data in graph[current_node].items():
            for key, data in edge_data.items():
                departure_time = parse_time(data['departure_time'])
                arrival_time = parse_time(data['arrival_time'])
                
                if departure_time >= current_time:
                    waiting_time = time_difference(current_time, departure_time)
                    travel_time = time_difference(departure_time, arrival_time)
                    total_cost = current_cost + waiting_time + travel_time
                    heapq.heappush(pq, (total_cost, next_node, path + [data['id']], data['arrival_time']))

    return [], float('inf') 

[284175, 284176, 327262] 780.0


In [11]:
# def shortest_path_dijkstra(graph, start_stop, end_stop, start_time):
#     pq = []  # Priority queue
#     heapq.heappush(pq, (0, start_stop, None, None, start_time))  # (cost, node, prev_edge_id, prev_line, arrival_time)
#     visited = set()
#     best_costs = {node: float('inf') for node in graph.nodes}
#     best_costs[start_stop] = 0
#     best_prev = {node: None for node in graph.nodes}
    
#     while pq:
#         cost, node, prev_edge_id, prev_line, arrival_time = heapq.heappop(pq)
#         if node == end_stop:
#             path = []
#             while node is not None:
#                 path.append((node, prev_line, arrival_time))
#                 if best_prev[node] is not None:
#                     prev_edge_id, prev_line, arrival_time, node = best_prev[node]
#                 else:
#                     break
#             path.reverse()
#             return path, cost
        
#         if node in visited:
#             continue
#         visited.add(node)
        
#         for neighbor, edge_data in graph[node].items():
#             for edge_id, data in edge_data.items():  # Iterate over all edges between nodes
#                 line = data.get('line', None)
#                 departure_time = data.get('departure_time', None)
#                 arrival_time = data.get('arrival_time', None)
                
#                 if departure_time is None or arrival_time is None:
#                     print(f"Missing departure or arrival time for edge {edge_id}")
#                     continue  # Skip this edge if departure or arrival time is missing
                
#                 edge_cost = calculate_time_difference(arrival_time, departure_time)
                
#                 new_cost = cost + edge_cost
#                 if new_cost < best_costs[neighbor]:
#                     best_costs[neighbor] = new_cost
#                     best_prev[neighbor] = (edge_id, line, arrival_time, node)
#                     heapq.heappush(pq, (new_cost, neighbor, edge_id, line, arrival_time))
    
#     # If no path found, return None
#     return None, None

In [12]:
def print_schedule(path): # cost, node, path, current_time
    start_time = path[0][2]
    
    current_line = None
    for i in range(len(path) - 1):
        start_stop, line, start_time = path[i]
        end_stop, _, end_time = path[i + 1]
        
        if line != current_line:
            if current_line is not None:
                print(f"Arrival: {start_time}, To: {start_stop}")
            print(f"\nLine: {line}")
            print(f"Departure: {start_time}, From: {start_stop}")
            current_line = line
        start_time = end_time
    
    # Last stop
    print(f"Arrival: {end_time}, To: {end_stop}")


In [13]:
def print_path(path, data):
    if not path:
        print("No path found.")
        return
    
    total_time_seconds = 0
    current_line = None
    line_changes = -1  # Start with -1 to account for the first line not being a change

    for edge_id in path:
        edge_data = data.loc[edge_id]
        
        # Calculate waiting time if necessary
        if current_line is not None and current_line != edge_data['line']:
            print("waiting time: ...")  # Implement waiting time calculation if needed
        
        # Update line changes count
        if current_line != edge_data['line']:
            line_changes += 1
            current_line = edge_data['line']
        
        # Calculate travel time
        departure_time = parse_time(edge_data['departure_time'], '%H:%M:%S')
        arrival_time = parse_time(edge_data['arrival_time'], '%H:%M:%S')
        travel_time = time_difference(departure_time, arrival_time)
        total_time_seconds += travel_time
        
        print(f"Start stop: {edge_data['start_stop']}")
        print(f"Departure time: {edge_data['departure_time']}")
        print(f"Line: {edge_data['line']}")
        print(f"Arrival time: {edge_data['arrival_time']}")
        print(f"End stop: {edge_data['end_stop']}")
        print()  # Blank line for separation
        
    total_time_minutes = total_time_seconds / 60
    print(f"Total time in minutes: {total_time_minutes:.2f}")
    print(f"Number of line changes: {line_changes}")

# Assuming 'path' is the list of edge IDs found by your shortest path algorithm,
# and 'data' is your DataFrame containing the graph edges' details.
print_path(path, data)


TypeError: cannot unpack non-iterable int object

In [None]:
path, cost = shortest_path_dijkstra(graph, 'KRZYKI', 'PL. GRUNWALDZKI', '08:00:00')

print_path(path, graph)
# if path:
#         print("Schedule:")
#         print_schedule(path)
#         print(f"\nTotal cost: {cost} minutes")
# else:
#         print("No path found.")

TypeError: '<' not supported between instances of 'str' and 'int'

In [None]:
# def heuristic(lat_1, lon_1, lat_2, lon_2):
#     return ((lat_1 - lat_2) ** 2 + (lon_1 - lon_2) ** 2) ** 0.5  # Euclidean distance


# def shortest_path_a_star(graph, start_stop, end_stop, start_time):
#     g = {start_stop: 0}
#     h = {start_stop: 0}
#     f = {start_stop: 0}
#     open_set = {start_stop}
#     closed_set = set()
#     predecessors = {}

#     while open_set:
#         current_node = None
#         min_cost = float('inf')
#         for test_node in open_set:
#             if f[test_node] < min_cost:
#                 current_node = test_node
#                 min_cost = f[test_node]
#         if current_node == end_stop:
#             # Reconstruct the path
#             path = []
#             while current_node != start_stop:
#                 path.append(predecessors[current_node])
#                 current_node = predecessors[current_node]
#             path.reverse()  # Reverse the path to get it from start to end
#             return path

#         open_set.remove(current_node)
#         closed_set.add(current_node)

#         for neighbor, edge_data in graph[current_node].items():
#             for edge_id, data in edge_data.items():
#                 if neighbor not in closed_set:
#                     tentative_g = g[current_node] + calculate_time_difference(start_time, data['arrival_time'])
#                     if tentative_g < g.get(neighbor, float('inf')):
#                         g[neighbor] = tentative_g
#                         neighbor_node = graph.nodes[neighbor]
#                         current_node_obj = graph.nodes[current_node]
#                         h[neighbor] = heuristic(neighbor_node['lat'], neighbor_node['lon'],
#                                                 current_node_obj['lat'], current_node_obj['lon'])
#                         f[neighbor] = g[neighbor] + h[neighbor]
#                         if neighbor not in open_set:
#                             open_set.add(neighbor)
#                 elif neighbor in open_set:
#                     tentative_g = g[current_node] + calculate_time_difference(start_time, data['arrival_time'])
#                     if tentative_g < g.get(neighbor, float('inf')):
#                         g[neighbor] = tentative_g
#                         f[neighbor] = g[neighbor] + h[neighbor]
#         closed_set.remove(current_node)

#     return None  # No path found

def heuristic(lat_1, lon_1, lat_2, lon_2):
    return ((lat_1 - lat_2) ** 2 + (lon_1 - lon_2) ** 2) ** 0.5  # Euclidean distance

def shortest_path_a_star(graph, start_stop, end_stop, start_time):
    pq = []  # Priority queue
    heapq.heappush(pq, (0, start_stop, None, None, start_time))  # (cost, node, prev_edge_id, prev_line, arrival_time)
    visited = set()
    best_prev = {node: None for node in graph.nodes}
    
    while pq:
        _, node, prev_edge_id, prev_line, arrival_time = heapq.heappop(pq)
        if node == end_stop:
            path = []
            while node is not None:
                path.append((node, prev_line, arrival_time))
                _, prev_line, arrival_time, node = best_prev[node]
            path.reverse()
            return path
        
        if node in visited:
            continue
        visited.add(node)
        
        for neighbor, edge_data in graph[node].items():
            for edge_id, data in edge_data.items():  # Iterate over all edges between nodes
                line = data.get('line', None)
                departure_time = data.get('departure_time', None)
                arrival_time = data.get('arrival_time', None)
                neighbor_node = graph.nodes[neighbor]
                if departure_time is None or arrival_time is None:
                    print(f"Missing departure or arrival time for edge {edge_id}")
                    continue  # Skip this edge if departure or arrival time is missing
                
                edge_cost = calculate_time_difference(arrival_time, departure_time)
                new_cost = edge_cost
                h = heuristic(neighbor_node['lat'], neighbor_node['lon'], graph.nodes[end_stop]['lat'], graph.nodes[end_stop]['lon'])
                f = new_cost + h
                
                if best_prev[neighbor] is None or new_cost < best_prev[neighbor][0]:
                    best_prev[neighbor] = (new_cost, line, arrival_time, node)
                    heapq.heappush(pq, (f, neighbor, edge_id, line, arrival_time))
    
    # If no path found, return None
    return None


In [None]:
path = shortest_path_a_star(graph, 'KRZYKI', 'Sowia', '08:00:00')
print_schedule(path)