In [None]:
import networkx as nx
import pandas as pd
from itertools import islice
from pandarallel import pandarallel

# 初始化 Pandarallel
pandarallel.initialize()

def get_od_path(edge, node, station2node_dict, od):
    """
    优化现有函数以更高效地计算最短路径和汇总路径数据，使用 parallel_apply 来并行处理数据。
    同时添加了容错处理，跳过不存在路径的OD对。
    """
    # 图初始化
    G = nx.DiGraph()
    G.add_nodes_from(node['id'])
    G.add_weighted_edges_from(edge[['u', 'v', 'length']].values)

    # 路径缓存
    path_cache = {}

    def get_shortest_paths(start_station, end_station):
        if (start_station, end_station) not in path_cache:
            try:
                start_node = station2node_dict[start_station]
                end_node = station2node_dict[end_station]
                paths_generator = nx.shortest_simple_paths(G, source=start_node, target=end_node, weight='length')
                shortest_paths = list(islice(paths_generator, 3))  # 获取前三条路径
                path_cache[(start_station, end_station)] = [list(map(int, path)) for path in shortest_paths]
            except nx.NetworkXNoPath:
                print(f"No path between {start_station} and {end_station}")
                path_cache[(start_station, end_station)] = []
        return path_cache[(start_station, end_station)]

    # 计算每对OD的路径
    od['paths'] = od.parallel_apply(lambda r: get_shortest_paths(r['station_id_x'], r['station_id_y']), axis=1)

    # 构建详细路径信息
    details = []
    for index, row in od.iterrows():
        for path_id, path in enumerate(row['paths']):
            if path:  # 仅当路径存在时才处理
                path_details = [{'u': u, 'v': v, 'path_id': path_id, 'od_index': index,
                                 'station_id_x': row['station_id_x'], 'station_id_y': row['station_id_y']}
                                for u, v in zip(path[:-1], path[1:])]
                details.extend(path_details)

    # 创建路径DataFrame
    path_df = pd.DataFrame(details)
    if not path_df.empty:  # 检查DataFrame是否为空
        merged_df = path_df.merge(edge[['u', 'v', 'length', 'edge_id']], on=['u', 'v'])
        # 计算累积长度
        merged_df['cumsum_length'] = merged_df.groupby(['station_id_x', 'station_id_y', 'path_id'])['length'].cumsum()
        # 汇总输出
        od_dis_table = merged_df[['station_id_x', 'station_id_y', 'edge_id', 'cumsum_length', 'path_id']]
        od_length = merged_df.groupby(['station_id_x', 'station_id_y', 'path_id'])['cumsum_length'].max().reset_index()
        od_length.rename(columns={'cumsum_length': 'length'}, inplace=True)
        return od_dis_table, od_length
    else:
        print("No paths available for any OD pairs.")
        return pd.DataFrame(), pd.DataFrame()  # 返回空的DataFrame

