In [1]:
import pandas as pd
import networkx as nx
from datetime import datetime, timedelta
import heapq
import hashlib

data = pd.read_csv('..\connection_graph.csv', index_col=0)
data.head()
data.columns.values

  data = pd.read_csv('..\connection_graph.csv', index_col=0)


array(['company', 'line', 'departure_time', 'arrival_time', 'start_stop',
       'end_stop', 'start_stop_lat', 'start_stop_lon', 'end_stop_lat',
       'end_stop_lon'], dtype=object)

In [None]:
def normalize_time(time_str):
    hour, minute, second = map(int, time_str.split(':'))
    hour = hour % 24
    return f'{hour:02}:{minute:02}:{second:02}'

# Normalize departure_time and arrival_time columns
data['departure_time'] = data['departure_time'].apply(normalize_time)
data['arrival_time'] = data['arrival_time'].apply(normalize_time)

data.head()

In [None]:
def generate_hash(lat, lon):
        return hashlib.sha256(f"{lat}_{lon}".encode()).hexdigest()

# Apply the generate_hash function to each row for start stops and end stops
data['start_stop_id'] = data.apply(lambda row: generate_hash(row['start_stop_lat'], row['start_stop_lon']), axis=1)
data['end_stop_id'] = data.apply(lambda row: generate_hash(row['end_stop_lat'], row['end_stop_lon']), axis=1)

data.head()

In [None]:
print(len(data['start_stop_id'].unique()))
print(len(data['start_stop'].unique()))
print(len(data['end_stop_id'].unique()))
print(len(data['end_stop'].unique()))

In [None]:
graph = nx.MultiDiGraph()

for _, row in data.iterrows():
    # Add start stop node
    graph.add_node(row['start_stop_id'], name=row['start_stop'], lat=row['start_stop_lat'], lon=row['start_stop_lon'])
    
    # Add end stop node
    graph.add_node(row['end_stop_id'], name=row['end_stop'], lat=row['end_stop_lat'], lon=row['end_stop_lon'])

    # Add edge
    edge_data = {
        'id': row.index,
        'company': row['company'],
        'line': row['line'],
        'departure_time': row['departure_time'],
        'arrival_time': row['arrival_time'],
    }
    graph.add_edge(row['start_stop_id'], row['end_stop_id'], **edge_data)

In [None]:
print(graph.number_of_edges())
print(len(data))

In [None]:
def calculate_time_difference(departure_time, arrival_time, time_format='%H:%M:%S'):
    departure = datetime.strptime(departure_time, time_format)
    arrival = datetime.strptime(arrival_time, time_format)
    if arrival < departure:
        arrival += timedelta(days=1)
    
    return (arrival - departure).seconds / 60  # Minutes

In [None]:
def shortest_path_dijkstra(graph, start_stop, end_stop, start_time):
    pq = []  # Priority queue
    heapq.heappush(pq, (0, start_stop, None, None, start_time))  # (cost, node, prev_edge_id, prev_line, arrival_time)
    visited = set()
    best_costs = {node: float('inf') for node in graph.nodes}
    best_costs[start_stop] = 0
    best_prev = {node: None for node in graph.nodes}
    
    while pq:
        cost, node, prev_edge_id, prev_line, arrival_time = heapq.heappop(pq)
        if node == end_stop:
            path = []
            while node is not None:
                path.append((node, prev_line, arrival_time))
                if best_prev[node] is not None:
                    prev_edge_id, prev_line, arrival_time, node = best_prev[node]
                else:
                    break
            path.reverse()
            return path, cost
        
        if node in visited:
            continue
        visited.add(node)
        
        for neighbor, edge_data in graph[node].items():
            for edge_id, data in edge_data.items():  # Iterate over all edges between nodes
                line = data.get('line', None)
                departure_time = data.get('departure_time', None)
                arrival_time = data.get('arrival_time', None)
                
                if departure_time is None or arrival_time is None:
                    print(f"Missing departure or arrival time for edge {edge_id}")
                    continue  # Skip this edge if departure or arrival time is missing
                
                edge_cost = calculate_time_difference(arrival_time, departure_time)
                
                new_cost = cost + edge_cost
                if new_cost < best_costs[neighbor]:
                    best_costs[neighbor] = new_cost
                    best_prev[neighbor] = (edge_id, line, arrival_time, node)
                    heapq.heappush(pq, (new_cost, neighbor, edge_id, line, arrival_time))
    
    # If no path found, return None
    return None, None

In [None]:
def print_schedule(path):
    start_time = path[0][2]
    
    current_line = None
    for i in range(len(path) - 1):
        start_stop, line, start_time = path[i]
        end_stop, _, end_time = path[i + 1]
        
        if line != current_line:
            if current_line is not None:
                print(f"Arrival: {start_time}, To: {start_stop}")
            print(f"\nLine: {line}")
            print(f"Departure: {start_time}, From: {start_stop}")
            current_line = line
        start_time = end_time
    
    # Last stop
    print(f"Arrival: {end_time}, To: {end_stop}")


In [None]:
path, cost = shortest_path_dijkstra(graph, 'KRZYKI', 'PL. GRUNWALDZKI', '08:00:00')

if path:
        print("Schedule:")
        print_schedule(path)
        print(f"\nTotal cost: {cost} minutes")
else:
        print("No path found.")

In [None]:
# def heuristic(lat_1, lon_1, lat_2, lon_2):
#     return ((lat_1 - lat_2) ** 2 + (lon_1 - lon_2) ** 2) ** 0.5  # Euclidean distance


# def shortest_path_a_star(graph, start_stop, end_stop, start_time):
#     g = {start_stop: 0}
#     h = {start_stop: 0}
#     f = {start_stop: 0}
#     open_set = {start_stop}
#     closed_set = set()
#     predecessors = {}

#     while open_set:
#         current_node = None
#         min_cost = float('inf')
#         for test_node in open_set:
#             if f[test_node] < min_cost:
#                 current_node = test_node
#                 min_cost = f[test_node]
#         if current_node == end_stop:
#             # Reconstruct the path
#             path = []
#             while current_node != start_stop:
#                 path.append(predecessors[current_node])
#                 current_node = predecessors[current_node]
#             path.reverse()  # Reverse the path to get it from start to end
#             return path

#         open_set.remove(current_node)
#         closed_set.add(current_node)

#         for neighbor, edge_data in graph[current_node].items():
#             for edge_id, data in edge_data.items():
#                 if neighbor not in closed_set:
#                     tentative_g = g[current_node] + calculate_time_difference(start_time, data['arrival_time'])
#                     if tentative_g < g.get(neighbor, float('inf')):
#                         g[neighbor] = tentative_g
#                         neighbor_node = graph.nodes[neighbor]
#                         current_node_obj = graph.nodes[current_node]
#                         h[neighbor] = heuristic(neighbor_node['lat'], neighbor_node['lon'],
#                                                 current_node_obj['lat'], current_node_obj['lon'])
#                         f[neighbor] = g[neighbor] + h[neighbor]
#                         if neighbor not in open_set:
#                             open_set.add(neighbor)
#                 elif neighbor in open_set:
#                     tentative_g = g[current_node] + calculate_time_difference(start_time, data['arrival_time'])
#                     if tentative_g < g.get(neighbor, float('inf')):
#                         g[neighbor] = tentative_g
#                         f[neighbor] = g[neighbor] + h[neighbor]
#         closed_set.remove(current_node)

#     return None  # No path found

def heuristic(lat_1, lon_1, lat_2, lon_2):
    return ((lat_1 - lat_2) ** 2 + (lon_1 - lon_2) ** 2) ** 0.5  # Euclidean distance

def shortest_path_a_star(graph, start_stop, end_stop, start_time):
    pq = []  # Priority queue
    heapq.heappush(pq, (0, start_stop, None, None, start_time))  # (cost, node, prev_edge_id, prev_line, arrival_time)
    visited = set()
    best_prev = {node: None for node in graph.nodes}
    
    while pq:
        _, node, prev_edge_id, prev_line, arrival_time = heapq.heappop(pq)
        if node == end_stop:
            path = []
            while node is not None:
                path.append((node, prev_line, arrival_time))
                _, prev_line, arrival_time, node = best_prev[node]
            path.reverse()
            return path
        
        if node in visited:
            continue
        visited.add(node)
        
        for neighbor, edge_data in graph[node].items():
            for edge_id, data in edge_data.items():  # Iterate over all edges between nodes
                line = data.get('line', None)
                departure_time = data.get('departure_time', None)
                arrival_time = data.get('arrival_time', None)
                neighbor_node = graph.nodes[neighbor]
                if departure_time is None or arrival_time is None:
                    print(f"Missing departure or arrival time for edge {edge_id}")
                    continue  # Skip this edge if departure or arrival time is missing
                
                edge_cost = calculate_time_difference(arrival_time, departure_time)
                new_cost = edge_cost
                h = heuristic(neighbor_node['lat'], neighbor_node['lon'], graph.nodes[end_stop]['lat'], graph.nodes[end_stop]['lon'])
                f = new_cost + h
                
                if best_prev[neighbor] is None or new_cost < best_prev[neighbor][0]:
                    best_prev[neighbor] = (new_cost, line, arrival_time, node)
                    heapq.heappush(pq, (f, neighbor, edge_id, line, arrival_time))
    
    # If no path found, return None
    return None


In [None]:
path = shortest_path_a_star(graph, 'KRZYKI', 'Sowia', '08:00:00')
print_schedule(path)