In [9]:
import os

import pickle
import time
from multiprocessing import Pool
from time import sleep

import networkx as nx
import numpy as np
from sklearn.cluster import HDBSCAN
from tqdm.notebook import trange, tqdm
from scripts import centroids_graph_builder, graph_osm_loader, utils

Примеры как резолвить комьюнити

In [10]:
#resolve_communities
def resolve_communities(H: nx.Graph, **params) -> list[set[int]]:
    communities = nx.community.louvain_communities(H,
                                                   seed=1534,
                                                   weight='length',
                                                   resolution=params['r'])
    return utils.validate_cms(H, communities)

In [11]:
def resolve_by_hdbscan(H: nx.Graph):
    def f(a,b):
        u = int(a[2])
        v = int(b[2])
        if (u,v) in H.edges() or (v,u) in H.edges():
            return H.edges()[(u,v)]['length']
        return float('inf')
        # return nx.single_source_dijkstra(g, u,v,weight='length')[0]
    scan = HDBSCAN(metric=f, min_samples=1, max_cluster_size=30)
    x = np.array([[d['x'], d['y'], u] for u, d in g.nodes(data=True)])
    y = scan.fit_predict(x)
    communities = {}
    for i, u in enumerate(g.nodes):
        cls = y[i]
        if cls not in communities:
            communities[cls] = set()
        communities[cls].add(u)
    communities = [communities[cls] for cls in communities]
    del scan
    return utils.validate_cms(H, communities)

In [12]:
# find path between two points
def find_path_length_h(
        g0: nx.Graph,
        g1: nx.Graph,
        cms: list[set[int]] | tuple[set[int]],
        from_node: int,
        to_node: int) -> float:
    from_cluster = g0.nodes()[from_node]['cluster']
    to_cluster = g0.nodes()[to_node]['cluster']

    path = nx.single_source_dijkstra(g1, from_cluster, to_cluster, weight='length')[1]
    cls = set([u for u in path])
    g = centroids_graph_builder.extract_cluster_list_subgraph(g0, cls, cms)
    return nx.single_source_dijkstra(g, from_node, to_node, weight='length')

In [13]:
GRAPH_ID = 'R13470549'

In [14]:
# примеры id есть в graph_osm_loader.py
g = graph_osm_loader.get_graph(GRAPH_ID) # загрузка графа
len(g.nodes), len(g.edges)

(715, 1038)

In [15]:
utils.get_opt_cluster_count(len(g.nodes)) # оптимальное количество кластеров

128

In [16]:
cms = resolve_communities(g, r=26)
print(len(cms))

143


In [17]:
cls2n =  centroids_graph_builder.get_cls2n(g) # мапа кластер к соседним кластерам 

In [18]:
g1, cls2c = centroids_graph_builder.build_center_graph(g, cms, cls2n)

In [19]:
path = utils.get_path('pouits', f'points_{GRAPH_ID}.pickle')

if os.path.exists(path):
    with open(path, 'rb') as fp:
        points = pickle.load(fp)
        fp.close()
else:
    points = [utils.get_node_for_initial_graph_v2(g) for _ in trange(1000, desc='generate points')]
    with open(path, 'wb') as fp:
        pickle.dump(points, fp)
        fp.close()

In [20]:
NUM_ITERATION = 2 # чтобы уменьшить ошибку при вычислении времени выполнения, при каждом замере время меряется для NUM_ITERATION повторений
WORKER = 4 # количество потоков


def do_calc(data):
    pps, i = data

    stat = {}
    stat['l'] = []
    stat['h_l'] = []
    stat['p'] = []
    stat['h_p'] = []
    stat['time_l'] = []
    stat['time_h'] = []

    stat['delta'] = []
    # чисто чтобы tqdm нормально прогрузился 
    sleep(i/10)
    print('start', i)
    
    for p1, p2 in tqdm(pps, desc='find paths', position=i):
        if (p1, p2) in stat:
            continue
        # класический дейкстра
        l, p = None, None
        start = time.time()
        for i in range(NUM_ITERATION):
            l, p = nx.single_source_dijkstra(g, p1, p2, weight='length')
        time_l = time.time() - start
        
        # иерархический
        h_l, h_p = None, None
        start = time.time()
        for _ in range(NUM_ITERATION):
            h_l, h_p = find_path_length_h(g, g1,cms, p1, p2)
        time_h = time.time() - start
        
        delta = (h_l - l) / l * 100
        stat['l'].append(l) # длина обычного пути
        stat['h_l'].append(h_l) # длина иерархического пути
        stat['p'].append(p) # обычный путь
        stat['h_p'].append(h_p) # иерархический путь
        stat['delta'].append(delta) # разница в длине
        stat['time_l'].append(time_l) # обычное время 
        stat['time_h'].append(time_h) # иерархическое
    return stat

data = [([p for p in points[i::WORKER]], i) for i in range(WORKER)]
with Pool(WORKER) as p:
    res = p.map(do_calc, data)

stat = {}
for l in res:
    for d in l:
        if d not in stat:
            stat[d] = []
        stat[d].extend(l[d])
print('err_mean:', np.mean(stat['delta']))
print('err_min:', np.min(stat['delta']))
print('err_max:', np.max(stat['delta']))
print(np.mean(np.array(stat['time_l']) / np.array(stat['time_h'])))

start 0


find paths:   0%|          | 0/250 [00:00<?, ?it/s]

start 1


find paths:   0%|          | 0/250 [00:00<?, ?it/s]

start 2


find paths:   0%|          | 0/250 [00:00<?, ?it/s]

start 3


find paths:   0%|          | 0/250 [00:00<?, ?it/s]

err_mean: 9.628483568368349
err_min: 0.0
err_max: 108.87172925885227
1.5055948648468906
