# CIC - Parrallellizing graph networks

- Rami Tarabishi | @r9119

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import networkx as nx
import math
import time

In [2]:
# import twitch graph from csv
twitch_gamers = pd.read_csv('./data/large_twitch_edges.csv')
 
# create graph
G = nx.from_pandas_edgelist(twitch_gamers, source='numeric_id_1', target='numeric_id_2')

In [24]:
np.random.seed(12345)

def sample_connected_subgraph(G, num_nodes):
    # Step 1: Select a random starting node
    start_node = np.random.choice(list(G.nodes))
    
    # Step 2: Perform BFS to collect nodes and edges
    bfs_edges = list(nx.bfs_edges(G, source=start_node))
    bfs_nodes = [start_node] + [v for u, v in bfs_edges]
    
    # If we collected enough nodes, create the subgraph
    if len(bfs_nodes) >= num_nodes:
        sub_nodes = bfs_nodes[:num_nodes]
        subgraph = G.subgraph(sub_nodes).copy()
    else:
        raise ValueError(f"Unable to find {num_nodes} connected nodes in the graph.")
    
    return subgraph
    
num_nodes = 10_000

G_sampled = sample_connected_subgraph(G, num_nodes=num_nodes)

print(f"Number of nodes in the sampled subgraph: {len(G_sampled.nodes)}")

Number of nodes in the sampled subgraph: 10000


## Proximity Prestige:

In our SAN project, we noticed that proximity presitge was a metric that was quite computationally expensive. It has a time complexity of O(V^2 + VE) where V is the number of nodes and E is the number of edges, Ontop of that it needs a Breadth first search O(E + V) to calculate the shortest paths between a node and every other node. Even at a downsampled graph of only 20000 nodes (~170000 in our original graph) it took half an hour to compute. Scaled up to our whole graph it would take days to compute on a single process which was not feasible for us.

P.S. We did know about the `cugraph` project to add a cuda backend to networkx, but we had issues with the instalation (I have issues with my WSL and windows install, and my project partner doesnt have a machine with a cuda compatible GPU) therefore we subsampled the graph to make it more manageable. Which also gave me the opportunity to try and parallelize the computation for CIC.

### Regular non parallel approach:

In [16]:
# V, E = G_sampled_50.number_of_nodes(), G_sampled_50.number_of_edges()
# v2, e2 = G_sampled_20.number_of_nodes(), G_sampled_20.number_of_edges()
# v3, e3 = G_sampled_10.number_of_nodes(), G_sampled_10.number_of_edges()

# print(f"50k graph has {V} nodes and {E} edges.")
# print(f"20k graph has {v2} nodes and {e2} edges.")
# print(f"10k graph has {v3} nodes and {e3} edges.")

# complexity = (V**2) + V * E
# complexity2 = (v2**2) + v2 * e2
# complexity3 = (v3**2) + v3 * e3

# print(f"Complexity of 50k graph: {complexity}")
# print(f"Complexity of 20k graph: {complexity2}")    
# print(f"Complexity of 10k graph: {complexity3}")
# # print(f"Complexity of 20k: {30 * (complexity / complexity2)} minutes")

In [5]:
# def proximity_prestige(G):
#     n_nodes = len(G.nodes)
#     proximity_prestige_dict = {}
    
#     # Calculate shortest path lengths between all pairs of nodes
#     start = time.time()
#     shortest_path_lengths = dict(nx.all_pairs_shortest_path_length(G))
#     end = time.time()
#     print(f"Calculated shortest path lengths for {len(G.nodes)} nodes in {end - start:.2f} seconds.")
    
#     # Iterate over each node to calculate its Proximity Prestige
#     start = time.time()
#     for node in G.nodes():
#         prestige_sum = 0
        
#         for target, path_length in shortest_path_lengths[node].items():
#             if node != target and path_length > 0:
#                 prestige_sum += 1 / path_length
        
#         proximity_prestige_dict[node] = prestige_sum / (n_nodes - 1)

#     end = time.time()
#     print(f"Calculated Proximity Prestige for {len(G.nodes)} nodes in {end - start:.2f} seconds.")
    
#     return proximity_prestige_dict

# # start_time = time.time()
# proximity_prestige_scores = proximity_prestige(G_sampled)
# # end_time = time.time()
# # print(f"Calculated Proximity Prestige for {len(G_sampled_10.nodes)} nodes in {end_time - start_time:.2f} seconds.")

# # top_proximity_prestige = sorted(proximity_prestige_scores.items(), key=lambda x: x[1], reverse=True)[:5]
# # print("Top 5 nodes by Proximity prestige:")
# # for node, prestige in top_proximity_prestige:
# #     print(f"Knoten {node}: {prestige:.4f}")

Calculated shortest path lengths for 10000 nodes in 112.48 seconds.
Calculated Proximity Prestige for 10000 nodes in 6.32 seconds.


```yaml
Calculated shortest path lengths for 10000 nodes in 132.69 seconds.
Calculated Proximity Prestige for 10000 nodes in 7.53 seconds.

Calculated shortest path lengths for 20000 nodes in 1803.55 seconds.
Calculated Proximity Prestige for 20000 nodes in 32.11 seconds.

30000 nodes >100 minutes and im going to bed
```

#### Single processing unit on 50000 node graph:

```python
---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
File <timed exec>:19

File <timed exec>:5, in proximity_prestige(G)

File c:\Python311\Lib\site-packages\networkx\algorithms\shortest_paths\unweighted.py:193, in all_pairs_shortest_path_length(G, cutoff)
    191 # TODO This can be trivially parallelized.
    192 for n in G:
--> 193     yield (n, length(G, n, cutoff=cutoff))

File c:\Python311\Lib\site-packages\networkx\utils\backends.py:412, in _dispatch.__call__(self, backend, *args, **kwargs)
    409 def __call__(self, /, *args, backend=None, **kwargs):
    410     if not backends:
    411         # Fast path if no backends are installed
--> 412         return self.orig_func(*args, **kwargs)
    414     # Use `backend_name` in this function instead of `backend`
    415     backend_name = backend

File c:\Python311\Lib\site-packages\networkx\algorithms\shortest_paths\unweighted.py:62, in single_source_shortest_path_length(G, source, cutoff)
     60     cutoff = float("inf")
     61 nextlevel = [source]
---> 62 return dict(_single_shortest_path_length(G._adj, nextlevel, cutoff))

MemoryError: 
```

After ~5:30 hours (270 minutes) over night of running the coded ended up crashing due to a memory error, so clearly trying to scale the graph up to the full size locally wont be feasable without a dask cluster or some other parallelization/memory management method.

### Dask approach:

In [7]:
from dask.distributed import Client, LocalCluster

# Create a local cluster
cluster = LocalCluster(n_workers=8, threads_per_worker=2)
client = Client(cluster)

In [8]:
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 8
Total threads: 16,Total memory: 31.92 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:52209,Workers: 8
Dashboard: http://127.0.0.1:8787/status,Total threads: 16
Started: Just now,Total memory: 31.92 GiB

0,1
Comm: tcp://127.0.0.1:52258,Total threads: 2
Dashboard: http://127.0.0.1:52260/status,Memory: 3.99 GiB
Nanny: tcp://127.0.0.1:52212,
Local directory: C:\Users\rami0\AppData\Local\Temp\dask-scratch-space\worker-_lj0q049,Local directory: C:\Users\rami0\AppData\Local\Temp\dask-scratch-space\worker-_lj0q049

0,1
Comm: tcp://127.0.0.1:52244,Total threads: 2
Dashboard: http://127.0.0.1:52246/status,Memory: 3.99 GiB
Nanny: tcp://127.0.0.1:52214,
Local directory: C:\Users\rami0\AppData\Local\Temp\dask-scratch-space\worker-dv3kenli,Local directory: C:\Users\rami0\AppData\Local\Temp\dask-scratch-space\worker-dv3kenli

0,1
Comm: tcp://127.0.0.1:52245,Total threads: 2
Dashboard: http://127.0.0.1:52248/status,Memory: 3.99 GiB
Nanny: tcp://127.0.0.1:52216,
Local directory: C:\Users\rami0\AppData\Local\Temp\dask-scratch-space\worker-csxd0pcj,Local directory: C:\Users\rami0\AppData\Local\Temp\dask-scratch-space\worker-csxd0pcj

0,1
Comm: tcp://127.0.0.1:52262,Total threads: 2
Dashboard: http://127.0.0.1:52263/status,Memory: 3.99 GiB
Nanny: tcp://127.0.0.1:52218,
Local directory: C:\Users\rami0\AppData\Local\Temp\dask-scratch-space\worker-653ez039,Local directory: C:\Users\rami0\AppData\Local\Temp\dask-scratch-space\worker-653ez039

0,1
Comm: tcp://127.0.0.1:52265,Total threads: 2
Dashboard: http://127.0.0.1:52266/status,Memory: 3.99 GiB
Nanny: tcp://127.0.0.1:52220,
Local directory: C:\Users\rami0\AppData\Local\Temp\dask-scratch-space\worker-n7pc5eok,Local directory: C:\Users\rami0\AppData\Local\Temp\dask-scratch-space\worker-n7pc5eok

0,1
Comm: tcp://127.0.0.1:52251,Total threads: 2
Dashboard: http://127.0.0.1:52253/status,Memory: 3.99 GiB
Nanny: tcp://127.0.0.1:52222,
Local directory: C:\Users\rami0\AppData\Local\Temp\dask-scratch-space\worker-ed8atwob,Local directory: C:\Users\rami0\AppData\Local\Temp\dask-scratch-space\worker-ed8atwob

0,1
Comm: tcp://127.0.0.1:52252,Total threads: 2
Dashboard: http://127.0.0.1:52257/status,Memory: 3.99 GiB
Nanny: tcp://127.0.0.1:52224,
Local directory: C:\Users\rami0\AppData\Local\Temp\dask-scratch-space\worker-f3v_hes_,Local directory: C:\Users\rami0\AppData\Local\Temp\dask-scratch-space\worker-f3v_hes_

0,1
Comm: tcp://127.0.0.1:52250,Total threads: 2
Dashboard: http://127.0.0.1:52254/status,Memory: 3.99 GiB
Nanny: tcp://127.0.0.1:52226,
Local directory: C:\Users\rami0\AppData\Local\Temp\dask-scratch-space\worker-vfwsih0o,Local directory: C:\Users\rami0\AppData\Local\Temp\dask-scratch-space\worker-vfwsih0o


In [10]:
import networkx as nx
import dask
from dask import delayed, compute
from dask.distributed import wait

def single_source_shortest_path_length(G, source, cutoff=None):
    """Compute the shortest path lengths from source to all reachable nodes."""
    if source not in G:
        raise nx.NodeNotFound(f"Source {source} is not in G")
    if cutoff is None:
        cutoff = float("inf")
    nextlevel = [source]
    return dict(_single_shortest_path_length(G._adj, nextlevel, cutoff))

def _single_shortest_path_length(adj, firstlevel, cutoff):
    """Yields (node, level) in a breadth first search."""
    seen = set(firstlevel)
    nextlevel = firstlevel
    level = 0
    n = len(adj)
    for v in nextlevel:
        yield (v, level)
    while nextlevel and cutoff > level:
        level += 1
        thislevel = nextlevel
        nextlevel = []
        for v in thislevel:
            for w in adj[v]:
                if w not in seen:
                    seen.add(w)
                    nextlevel.append(w)
                    yield (w, level)
            if len(seen) == n:
                return

@delayed
def compute_batch_shortest_path_lengths(G, nodes, cutoff=None):
    results = {}
    for n in nodes:
        results[n] = single_source_shortest_path_length(G, n, cutoff=cutoff)
    return results

def all_pairs_shortest_path_length(G, cutoff=None, batch_size=10):
    """Computes the shortest path lengths between all nodes in `G`."""
    nodes = list(G.nodes())
    batched_nodes = [nodes[i:i + batch_size] for i in range(0, len(nodes), batch_size)]
    
    futures = [compute_batch_shortest_path_lengths(G, batch, cutoff=cutoff) for batch in batched_nodes]
    results = compute(*futures)
    
    # Flatten results
    shortest_path_lengths = {k: v for d in results for k, v in d.items()}
    
    return shortest_path_lengths

def proximity_prestige(G, batch_size=10, client=None, num_nodes=None):
    print("="*80)
    print("Batch size:", batch_size)
    print("="*40)

    start = time.time()
    shortest_path_lengths = dict(all_pairs_shortest_path_length(G, batch_size=batch_size))
    end = time.time()
    print(f"Calculated shortest path lengths for {num_nodes} nodes in {end - start:.2f} seconds.")

    proximity_prestige_dict = {}
    start = time.time()
    for node in G.nodes():
        prestige_sum = 0
        
        for target, path_length in shortest_path_lengths[node].items():
            if node != target and path_length > 0:
                prestige_sum += 1 / path_length
        
        proximity_prestige_dict[node] = prestige_sum / (num_nodes - 1)

    end = time.time()
    print(f"Calculated Proximity Prestige for {num_nodes} nodes in {end - start:.2f} seconds.", "\n")

    # Clean up memory after computation
    del shortest_path_lengths

    return proximity_prestige_dict

In [26]:
# batch_size = math.ceil(G_sampled.number_of_nodes() / 8)
batch_size = 1000

# G_scattered = client.scatter(G_sampled, broadcast=True)

proximity_prestige_scores = proximity_prestige(G_sampled, batch_size=batch_size, client=client, num_nodes=G_sampled.number_of_nodes()) # Was passing client as I was scatterring the graph but had issues with futures on the scatterred graph

# top_proximity_prestige = sorted(proximity_prestige_scores.items(), key=lambda x: x[1], reverse=True)[:5]
# print("Top 5 nodes by Proximity prestige:")
# for node, prestige in top_proximity_prestige:
#     print(f"Node {node}: {prestige:.4f}")

Batch size: 1000




Bluescreened at the last batch 40k nodes, I need more ram to run this on my machine.

### Demo of the speed increase:
Just a short comparison of the speed increase as more workers are added to the dask cluster.

In [25]:
if client:
    client.close()
    cluster.close()

# Start new cluster with 1 workers
cluster = LocalCluster(n_workers=1, threads_per_worker=2)
client = Client(cluster)

# Run proximity_prestige
batch_size = math.ceil(G_sampled.number_of_nodes() / 1)
proximity_prestige_scores = proximity_prestige(G_sampled, batch_size=batch_size, client=client, num_nodes=G_sampled.number_of_nodes())

Batch size: 10000
Calculated shortest path lengths for 10000 nodes in 87.77 seconds.
Calculated Proximity Prestige for 10000 nodes in 5.90 seconds. 



In [29]:
client.close()
cluster.close()

cluster = LocalCluster(n_workers=2, threads_per_worker=2)
client = Client(cluster)



In [30]:
# Run proximity_prestige
batch_size = math.ceil(G_sampled.number_of_nodes() / 2)
proximity_prestige_scores = proximity_prestige(G_sampled, batch_size=batch_size, client=client, num_nodes=G_sampled.number_of_nodes())

Batch size: 5000
Calculated shortest path lengths for 10000 nodes in 53.33 seconds.
Calculated Proximity Prestige for 10000 nodes in 6.11 seconds. 



In [31]:
client.close()
cluster.close()

cluster = LocalCluster(n_workers=4, threads_per_worker=2)
client = Client(cluster)

In [33]:
# Run proximity_prestige
batch_size = math.ceil(G_sampled.number_of_nodes() / 4)
proximity_prestige_scores = proximity_prestige(G_sampled, batch_size=batch_size, client=client, num_nodes=G_sampled.number_of_nodes())

Batch size: 2500
Calculated shortest path lengths for 10000 nodes in 35.83 seconds.
Calculated Proximity Prestige for 10000 nodes in 6.44 seconds. 



In [34]:
client.close()
cluster.close()

cluster = LocalCluster(n_workers=8, threads_per_worker=2)
client = Client(cluster)

In [35]:
# Run proximity_prestige
batch_size = math.ceil(G_sampled.number_of_nodes() / 8)
proximity_prestige_scores = proximity_prestige(G_sampled, batch_size=batch_size, client=client, num_nodes=G_sampled.number_of_nodes())

Batch size: 1250
Calculated shortest path lengths for 10000 nodes in 30.36 seconds.
Calculated Proximity Prestige for 10000 nodes in 7.21 seconds. 



In [36]:
client.shutdown()
client.close()
cluster.close()
# client.restart()

#### Differing batch sizes: (10000 node graph)

```yaml
================================================================================
Batch size (Matching number of cores): 1250
========================================
Calculated shortest path lengths for 10000 nodes in 29.02 seconds.
Calculated Proximity Prestige for 10000 nodes in 7.38 seconds. 

================================================================================
Batch size (Matching number of threads): 625
========================================
Calculated shortest path lengths for 10000 nodes in 27.90 seconds.
Calculated Proximity Prestige for 10000 nodes in 7.34 seconds. 

================================================================================
Batch size (4x cores, 2x Threads): 313
========================================
Calculated shortest path lengths for 10000 nodes in 29.81 seconds.
Calculated Proximity Prestige for 10000 nodes in 7.31 seconds.
```

#### Differing batch sizes: (20000 node graph)

```yaml
================================================================================
Batch size: 2500
========================================
Calculated shortest path lengths for 20000 nodes in 299.53 seconds.
Calculated Proximity Prestige for 20000 nodes in 31.31 seconds. 

================================================================================
Batch size: 1250
========================================
Calculated shortest path lengths for 20000 nodes in 333.22 seconds.
Calculated Proximity Prestige for 20000 nodes in 38.45 seconds. 

================================================================================
Batch size: 625
========================================
Calculated shortest path lengths for 20000 nodes in 337.15 seconds.
Calculated Proximity Prestige for 20000 nodes in 45.47 seconds. 
```

As I personally expected, having the batches line up perfectly with the number of workers (cores) was the most efficient way to run the code. As there isnt a second batch of overhead when the second task is moved to each worker is being processed. Although I am surprised that on the smaller 10000 node graph, doubling it to the number of threads ended up being a second faster, but im assuming thats because of uncertainty considering the small size of the graph.

### Parallelizing the proximity prestige calculation (Isnt working atm):

Till now I've just parallelized the BFS for finding all the shortest paths from a node, but the actual calculation of the proximity prestige is still done in a single thread. And doesnt have great scaling as it has to loop through both the nodes and each nodes shortest paths.

```yaml
Calculated shortest path lengths for 25000 nodes in 786.07 seconds.
Calculated Proximity Prestige for 25000 nodes in 155.03 seconds. 

Calculated shortest path lengths for 20000 nodes in 299.53 seconds.
Calculated Proximity Prestige for 20000 nodes in 31.31 seconds.
```

By just adding another 5000 nodes (25% increase), the time taken for the proximity prestige calculation increased by 5x. So the prestige calculation needs to be parallelized as well.

In [18]:
import networkx as nx
import dask
from dask import delayed, compute
from dask.distributed import wait

def single_source_shortest_path_length(G, source, cutoff=None):
    """Compute the shortest path lengths from source to all reachable nodes."""
    if source not in G:
        raise nx.NodeNotFound(f"Source {source} is not in G")
    if cutoff is None:
        cutoff = float("inf")
    nextlevel = [source]
    return dict(_single_shortest_path_length(G._adj, nextlevel, cutoff))

def _single_shortest_path_length(adj, firstlevel, cutoff):
    """Yields (node, level) in a breadth first search."""
    seen = set(firstlevel)
    nextlevel = firstlevel
    level = 0
    n = len(adj)
    for v in nextlevel:
        yield (v, level)
    while nextlevel and cutoff > level:
        level += 1
        thislevel = nextlevel
        nextlevel = []
        for v in thislevel:
            for w in adj[v]:
                if w not in seen:
                    seen.add(w)
                    nextlevel.append(w)
                    yield (w, level)
            if len(seen) == n:
                return

@delayed
def compute_batch_shortest_path_lengths(G, nodes, cutoff=None):
    results = {}
    for n in nodes:
        results[n] = single_source_shortest_path_length(G, n, cutoff=cutoff)
    return results

def all_pairs_shortest_path_length(G, cutoff=None, batch_size=10):
    """Computes the shortest path lengths between all nodes in `G`."""
    nodes = list(G.nodes())
    batched_nodes = [nodes[i:i + batch_size] for i in range(0, len(nodes), batch_size)]
    
    futures = [compute_batch_shortest_path_lengths(G, batch, cutoff=cutoff) for batch in batched_nodes]
    results = compute(*futures)
    
    # Flatten results
    shortest_path_lengths = {k: v for d in results for k, v in d.items()}
    
    return shortest_path_lengths

@delayed
def calculate_proximity_prestige(node, shortest_path_lengths, num_nodes):
    prestige_sum = 0
    
    for target, path_length in shortest_path_lengths[node].items():
        if node != target and path_length > 0:
            prestige_sum += 1 / path_length
            
    return node, prestige_sum / (num_nodes - 1)

@delayed
def process_batch_prox_prestige(batch, shortest_path_lengths, num_nodes):
    results = []
    for node in batch:
        result = calculate_proximity_prestige(node, shortest_path_lengths, num_nodes)
        results.append(result)
    return results

def proximity_prestige_parallel(G, batch_size=10, client=None, num_nodes=None):
    print("="*80)
    print("Batch size:", batch_size)
    print("="*40)

    start = time.time()
    shortest_path_lengths = dict(all_pairs_shortest_path_length(G, batch_size=batch_size))
    end = time.time()
    print(f"Calculated shortest path lengths for {num_nodes} nodes in {end - start:.2f} seconds.")

    proximity_prestige_dict = {}
    nodes = list(G.nodes())
    num_nodes = len(nodes)
    batched_nodes = [nodes[i:i + batch_size] for i in range(0, len(nodes), batch_size)]
    
    start = time.time()
    futures = [process_batch_prox_prestige(batch, shortest_path_lengths, num_nodes) for batch in batched_nodes]
    results = compute(*futures)

    for batch_results in results:
        for node, prestige in batch_results:
            proximity_prestige_dict[node] = prestige
        
    end = time.time()
    print(f"Calculated Proximity Prestige for {num_nodes} nodes in {end - start:.2f} seconds.", "\n")

    # Clean up memory after computation
    del shortest_path_lengths

    return proximity_prestige_dict

In [19]:
batch_size = math.ceil(G_sampled.number_of_nodes() / 8)

_ = proximity_prestige_parallel(G_sampled, batch_size=batch_size)

Batch size: 625
Calculated shortest path lengths for None nodes in 6.13 seconds.


KeyboardInterrupt: 

For some reason or another, the proximity prestige calculation just wasnt being sent to the client, and never ended up being calculated. Cant figure out why so I'll just leave it here for now.

## Old broken code:

Below is some of the old code I had, I made quite a few mistakes through the last couple of days trying to get the parallelization to work. A couple of my mistakes include:
- Not using the correct dask functions (I tried to use dask bags instead of delayed functions and futures).
- First only parallelizing the proximity prestige calculation (which is the least computationally expensive part of the code) instead of the BFS.
- Making every single node a future instead of using batches. Lead to many many memory errors, crashes and 10x longer compute times on very small graphs.
- Not setting up the local cluster correctly if at all.
- Trying to use dask delays on the default networkx implementation of calculating the shortest paths which didnt work at all, and is not ideal as the networkx implementation uses generators which afaik dont work well in dask delayed objects.


In [None]:
# def proximity_prestige_for_node(G, node, shortest_path_lengths):
#     prestige_sum = 0
#     for target, path_length in shortest_path_lengths[node].items():
#         if node != target and path_length > 0:
#             prestige_sum += 1 / path_length
#     return node, prestige_sum / (len(G.nodes()) - 1)

In [28]:
# import networkx as nx
# from dask import delayed, compute
# import dask

# @delayed
# def compute_shortest_path_lengths(G, node):
#     return node, dict(nx.single_source_shortest_path_length(G, node))

# def proximity_prestige(G):
#     futures = [compute_shortest_path_lengths(G, node) for node in G.nodes()]
#     results = compute(*futures)

#     shortest_path_lengths = dict(results)

#     proximity_prestige_dict = {}
#     for node in G.nodes():
#         prestige_sum = 0
#         for target, path_length in shortest_path_lengths[node].items():
#             if node != target and path_length > 0:
#                 prestige_sum += 1 / path_length
#         proximity_prestige_dict[node] = prestige_sum / (len(G.nodes()) - 1)
        
#     return proximity_prestige_dict

# proximity_prestige_scores = proximity_prestige(G_sampled)

# # top_proximity_prestige = sorted(proximity_prestige_scores.items(), key=lambda x: x[1], reverse=True)[:5]
# # print("Top 5 Knoten nach Proximity Prestige:")
# # for node, prestige in top_proximity_prestige:
# #     print(f"Knoten {node}: {prestige:.4f}")