The goal of this notebook is to find strategies that are able to find resolving sets as small as possible. We start with the Barabàsi-Albert model and then simulate the strategies on real graphs.

In [4]:
import networkx as nx
import random
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import pickle
import sys
sys.path.append('../')
from helpers import *
import plotly.graph_objs as go
%load_ext autoreload
%autoreload 2

## Barabási–Albert model

In [5]:
# By default, we will use the following parameters
# n = 500
# m = 30
# nb_graph = 1
# nb_of_iters = 100

# can be used to speed up simulations as we know the theoritical value
# lower_bound = 60
# upper_bound = 160

In [76]:
def sim_centrality(centrality_f, lower_bound=20, upper_bound=110, n=500, m=30, nb_graph=1, nb_of_iters=100, seed=0):
    solutions_prob_c = {}
    random.seed(seed)
    for _ in range(nb_graph):
        # Generate the random graph and compute shortest paths
        G = nx.barabasi_albert_graph(n, m)
        length = dict(nx.all_pairs_shortest_path_length(G))

        c = centrality_f(G)

        # normalize the betweenness centralities
        c_values = np.array(list(c.values()))
        c_norm = c_values / sum(c_values)
        node_list = list(c.keys())

        for nb in range(0, lower_bound):
            solutions_prob_c[nb] = solutions_prob_c.get(nb, 0) + 0  
        for nb in tqdm(range(lower_bound, upper_bound)): # G.number_of_nodes()
            num_nodes = nb # Number of nodes to sample
            count = 0
            for i in range(nb_of_iters):
                nodes = set(np.random.choice(node_list, p=c_norm, size=num_nodes, replace=False)) # Random set of nodes to test
                if is_resolving_set(G, nodes, length):
                    count += 1
            solutions_prob_c[nb] =  solutions_prob_c.get(nb, 0) + (count / nb_of_iters)
        for nb in range(upper_bound, n):
                solutions_prob_c[nb] = solutions_prob_c.get(nb, 0) + 1

    for i in range(n):
        solutions_prob_c[i] = solutions_prob_c[i] / nb_graph
        
    return solutions_prob_c

In [77]:
solutions_prob_bc_2 = sim_centrality(nx.betweenness_centrality, lower_bound=20, upper_bound=80, nb_graph=10)

100%|███████████████████████████████████████████| 60/60 [00:54<00:00,  1.11it/s]


In [151]:
solutions_prob_eigen = sim_centrality(nx.eigenvector_centrality, lower_bound=30, upper_bound=90, nb_graph=10)

100%|███████████████████████████████████████████| 60/60 [01:00<00:00,  1.01s/it]
100%|███████████████████████████████████████████| 60/60 [01:00<00:00,  1.01s/it]
100%|███████████████████████████████████████████| 60/60 [00:59<00:00,  1.00it/s]
100%|███████████████████████████████████████████| 60/60 [01:00<00:00,  1.01s/it]
100%|███████████████████████████████████████████| 60/60 [01:00<00:00,  1.01s/it]
100%|███████████████████████████████████████████| 60/60 [01:00<00:00,  1.00s/it]
100%|███████████████████████████████████████████| 60/60 [01:01<00:00,  1.02s/it]
100%|███████████████████████████████████████████| 60/60 [00:59<00:00,  1.00it/s]
100%|███████████████████████████████████████████| 60/60 [01:00<00:00,  1.00s/it]
100%|███████████████████████████████████████████| 60/60 [00:59<00:00,  1.00it/s]


In [152]:
solutions_prob_dc = sim_centrality(nx.degree_centrality, lower_bound=30, upper_bound=90, nb_graph=10)

100%|███████████████████████████████████████████| 60/60 [01:00<00:00,  1.00s/it]
100%|███████████████████████████████████████████| 60/60 [00:59<00:00,  1.00it/s]
100%|███████████████████████████████████████████| 60/60 [00:59<00:00,  1.00it/s]
100%|███████████████████████████████████████████| 60/60 [00:59<00:00,  1.00it/s]
100%|███████████████████████████████████████████| 60/60 [00:59<00:00,  1.00it/s]
100%|███████████████████████████████████████████| 60/60 [00:59<00:00,  1.01it/s]
100%|███████████████████████████████████████████| 60/60 [00:59<00:00,  1.01it/s]
100%|███████████████████████████████████████████| 60/60 [00:59<00:00,  1.01it/s]
100%|███████████████████████████████████████████| 60/60 [00:59<00:00,  1.01it/s]
100%|███████████████████████████████████████████| 60/60 [00:59<00:00,  1.01it/s]


In [153]:
solutions_prob_cc = sim_centrality(nx.closeness_centrality, lower_bound=20, upper_bound=100, nb_graph=10)

100%|███████████████████████████████████████████| 80/80 [01:20<00:00,  1.01s/it]
100%|███████████████████████████████████████████| 80/80 [01:21<00:00,  1.02s/it]
100%|███████████████████████████████████████████| 80/80 [01:21<00:00,  1.02s/it]
100%|███████████████████████████████████████████| 80/80 [01:21<00:00,  1.02s/it]
100%|███████████████████████████████████████████| 80/80 [01:20<00:00,  1.01s/it]
100%|███████████████████████████████████████████| 80/80 [01:20<00:00,  1.01s/it]
100%|███████████████████████████████████████████| 80/80 [01:21<00:00,  1.01s/it]
100%|███████████████████████████████████████████| 80/80 [01:21<00:00,  1.01s/it]
100%|███████████████████████████████████████████| 80/80 [01:20<00:00,  1.01s/it]
100%|███████████████████████████████████████████| 80/80 [01:21<00:00,  1.01s/it]


In [154]:
solutions_prob_flow_bc = sim_centrality(nx.current_flow_betweenness_centrality, lower_bound=30, upper_bound=90, nb_graph=10)


laplacian_matrix will return a scipy.sparse array instead of a matrix in Networkx 3.0.

100%|███████████████████████████████████████████| 60/60 [00:59<00:00,  1.01it/s]
100%|███████████████████████████████████████████| 60/60 [00:59<00:00,  1.01it/s]
100%|███████████████████████████████████████████| 60/60 [01:00<00:00,  1.01s/it]
100%|███████████████████████████████████████████| 60/60 [00:59<00:00,  1.00it/s]
100%|███████████████████████████████████████████| 60/60 [01:00<00:00,  1.01s/it]
100%|███████████████████████████████████████████| 60/60 [00:59<00:00,  1.00it/s]
100%|███████████████████████████████████████████| 60/60 [01:00<00:00,  1.00s/it]
100%|███████████████████████████████████████████| 60/60 [00:59<00:00,  1.00it/s]
100%|███████████████████████████████████████████| 60/60 [00:59<00:00,  1.00it/s]
100%|███████████████████████████████████████████| 60/60 [00:59<00:00,  1.00it/s]


In [155]:
solutions_prob_so = sim_centrality(nx.second_order_centrality, lower_bound=25, upper_bound=90, nb_graph=10)

100%|███████████████████████████████████████████| 65/65 [01:04<00:00,  1.01it/s]
100%|███████████████████████████████████████████| 65/65 [01:03<00:00,  1.02it/s]
100%|███████████████████████████████████████████| 65/65 [01:03<00:00,  1.02it/s]
100%|███████████████████████████████████████████| 65/65 [01:03<00:00,  1.02it/s]
100%|███████████████████████████████████████████| 65/65 [01:04<00:00,  1.01it/s]
100%|███████████████████████████████████████████| 65/65 [01:05<00:00,  1.00s/it]
100%|███████████████████████████████████████████| 65/65 [01:04<00:00,  1.01it/s]
100%|███████████████████████████████████████████| 65/65 [01:04<00:00,  1.01it/s]
100%|███████████████████████████████████████████| 65/65 [01:04<00:00,  1.02it/s]
100%|███████████████████████████████████████████| 65/65 [01:03<00:00,  1.02it/s]


In [156]:
# Random strategy

n = 500
m = 30
nb_graph = 10
s = {}
nb_of_iters = 100

# can be used to speed up simulations as we know the theoritical value
lower_bound = 20
upper_bound = 100

solutions_rand = {}
random.seed(0)
for _ in range(nb_graph):
    # Generate the random graph and compute shortest paths
    G = nx.barabasi_albert_graph(n, m)
    length = dict(nx.all_pairs_shortest_path_length(G))
    
    for nb in range(0, lower_bound):
        solutions_rand[nb] = solutions_rand.get(nb, 0) + 0  
    for nb in tqdm(range(lower_bound, upper_bound)): # G.number_of_nodes()
        num_nodes = nb # Number of nodes to sample
        node_list = list(G.nodes())
        count = 0
        for i in range(nb_of_iters):
            nodes = set(random.sample(node_list, num_nodes)) # Random set of nodes to test
            if is_resolving_set(G, nodes, length):
                count += 1
        solutions_rand[nb] =  solutions_rand.get(nb, 0) + (count / nb_of_iters)
    for nb in range(upper_bound, n):
            solutions_rand[nb] = solutions_rand.get(nb, 0) + 1

for i in range(n):
    solutions_rand[i] = solutions_rand[i] / nb_graph

100%|███████████████████████████████████████████| 80/80 [00:35<00:00,  2.23it/s]
100%|███████████████████████████████████████████| 80/80 [00:35<00:00,  2.23it/s]
100%|███████████████████████████████████████████| 80/80 [00:35<00:00,  2.23it/s]
100%|███████████████████████████████████████████| 80/80 [00:35<00:00,  2.24it/s]
100%|███████████████████████████████████████████| 80/80 [00:36<00:00,  2.22it/s]
100%|███████████████████████████████████████████| 80/80 [00:35<00:00,  2.24it/s]
100%|███████████████████████████████████████████| 80/80 [00:35<00:00,  2.24it/s]
100%|███████████████████████████████████████████| 80/80 [00:36<00:00,  2.22it/s]
100%|███████████████████████████████████████████| 80/80 [00:35<00:00,  2.25it/s]
100%|███████████████████████████████████████████| 80/80 [00:36<00:00,  2.22it/s]


In [213]:
# Strategy where we draw only from first quntile (degree)

n = 500
m = 30
nb_of_iters = 100
nb_graphs = 10

solutions_first_q = {}

lower_bound = 20
upper_bound = 90

number_of_quintile = 5

for _ in range(nb_graphs):
    G = nx.barabasi_albert_graph(n, m)

    node_list = list(G.nodes())

    degree_list = [(n, d) for n, d in G.degree()]
    degree_list.sort(key=lambda x: x[1], reverse=True)
    num_vertices = len(degree_list)
    num_vertices_per_decile = n // number_of_quintile
    decile_num = 1
    decile_vertices = []

    for i in range(n):
        vertex = degree_list[i][0]
        degree = degree_list[i][1]
        decile_vertices.append(vertex)
        if (i + 1) % num_vertices_per_decile == 0:
            nx.set_node_attributes(G, {v: decile_num for v in decile_vertices}, 'decile')
            decile_num += 1
            decile_vertices = []

    length = dict(nx.all_pairs_shortest_path_length(G))

    quintile = 1      
    nodes_in_decile = [node for node, decile in nx.get_node_attributes(G, 'decile').items() if decile == quintile]
    
    
    for nb in range(0, lower_bound):
        solutions_first_q[nb] = solutions_first_q.get(nb, 0) + 0  
    for nb in tqdm(range(lower_bound, upper_bound)): # G.number_of_nodes()
        num_nodes = nb # Number of nodes to sample
        node_list = list(G.nodes())
        count = 0
        for i in range(nb_of_iters):
            nodes = set(random.sample(nodes_in_decile, num_nodes)) # Random set of nodes to test
            if is_resolving_set(G, nodes, length):
                count += 1
        solutions_first_q[nb] =  solutions_first_q.get(nb, 0) + (count / nb_of_iters)
    for nb in range(upper_bound, n):
            solutions_first_q[nb] = solutions_first_q.get(nb, 0) + 1
    
for i in range(n):
    solutions_first_q[i] = solutions_first_q[i] / nb_graphs

100%|███████████████████████████████████████████| 70/70 [00:26<00:00,  2.62it/s]
100%|███████████████████████████████████████████| 70/70 [00:27<00:00,  2.58it/s]
100%|███████████████████████████████████████████| 70/70 [00:26<00:00,  2.66it/s]
100%|███████████████████████████████████████████| 70/70 [00:25<00:00,  2.72it/s]
100%|███████████████████████████████████████████| 70/70 [00:29<00:00,  2.39it/s]
100%|███████████████████████████████████████████| 70/70 [00:25<00:00,  2.70it/s]
100%|███████████████████████████████████████████| 70/70 [00:25<00:00,  2.77it/s]
100%|███████████████████████████████████████████| 70/70 [00:26<00:00,  2.63it/s]
100%|███████████████████████████████████████████| 70/70 [00:27<00:00,  2.50it/s]
100%|███████████████████████████████████████████| 70/70 [00:27<00:00,  2.59it/s]


In [177]:
for i in range(n):
    solutions_first_q[i] = solutions_first_q[i] / nb_graphs

In [187]:
#with open('simulations/transition_eigen_centrality.pickle', 'wb') as file:
    #pickle.dump(solutions_prob_eigen, file, protocol=pickle.HIGHEST_PROTOCOL)

In [72]:
#Can be used to directyl import the results of the simulations


#with open('simulations/Barbasi-Albert/transition_random.pickle', "rb") as file:
    #solutions_rand = pickle.load(file)
#with open('simulations/Barbasi-Albert/transition_betweenness_centrality.pickle', "rb") as file:
    #solutions_prob_bc = pickle.load(file)
#with open('simulations/Barbasi-Albert/transition_degree_centrality.pickle', "rb") as file:
    #solutions_prob_dc = pickle.load(file)
#with open('simulations/Barbasi-Albert/transition_so_centrality.pickle', "rb") as file:
    #solutions_prob_so = pickle.load(file)
#with open('simulations/Barbasi-Albert/transition_closness_centrality.pickle', "rb") as file:
    #solutions_prob_cc = pickle.load(file)
#with open('simulations/Barbasi-Albert/transition_eigen_centrality.pickle', "rb") as file:
    #solutions_prob_eigen = pickle.load(file)

In [1]:
# Define your data
x = list(solutions_rand.keys())
y = list(solutions_rand.values())

x1 = list(solutions_first_q.keys())
y1 = list(solutions_first_q.values())

x3 = list(solutions_prob_bc.keys())
y3 = list(solutions_prob_bc.values())

x4 = list(solutions_prob_dc.keys())
y4 = list(solutions_prob_dc.values())

x5 = list(solutions_prob_so.keys())
y5 = list(solutions_prob_so.values())

x6 = list(solutions_prob_cc.keys())
y6 = list(solutions_prob_cc.values())

x8 = list(solutions_prob_eigen.keys())
y8 = list(solutions_prob_eigen.values())

# Define the trace for the scatter plot
trace = go.Scatter(x=x, y=y, mode='markers+lines', name='Random nodes')
trace1 = go.Scatter(x=x1, y=y1, mode='markers+lines', name='Nodes with high degree (top 20%)')
trace3 = go.Scatter(x=x3, y=y3, mode='markers+lines', name='Betweenness centrality')
trace4 = go.Scatter(x=x4, y=y4, mode='markers+lines', name='Degree centrality')
trace5 = go.Scatter(x=x5, y=y5, mode='markers+lines', name='Second order centrality')
trace6 = go.Scatter(x=x6, y=y6, mode='markers+lines', name='Closeness centrality')
trace8 = go.Scatter(x=x8, y=y8, mode='markers+lines', name='Eigenvector centrality')


# Define the layout
layout = go.Layout(#title='Probability of resolving the graph as a function of the subset cardinality', 
                   #title_x=0.5,
                   xaxis=dict(title='Cardinality of the subset'), 
                   yaxis=dict(title='Probability of resolving the graph'),
                   legend=dict(x=0.67, y=0.08, orientation='v'))

# Combine the traces and layout into a figure
fig = go.Figure(data=[trace, trace1, trace3, trace4, trace5, trace6, trace8], layout=layout)

# Show the figure
fig.show()

NameError: name 'solutions_rand' is not defined

In [223]:
def sim_centrality_box(centrality_f, n=500, m=30, nb_graph=15, nb_of_iters=100, seed=0):
    
    box_sol = []
    random.seed(seed)
    for _ in tqdm(range(nb_graph)):
    
        # Generate the random graph and compute shortest paths
        G = nx.barabasi_albert_graph(n, m)
        length = dict(nx.all_pairs_shortest_path_length(G))

        c = centrality_f(G)

        # normalize the betweenness centralities
        c_values = np.array(list(c.values()))
        c_norm = c_values / sum(c_values)
        node_list = list(c.keys())
        for _ in range(nb_of_iters):
            nodes = np.random.choice(node_list, p=c_norm, size=len(G), replace=False) # Random set of nodes to test
            for nb in range(0, len(G) + 1):
                #print(nb)
                if is_resolving_set(G, nodes[:nb], length):
                    box_sol.append(nb)
                    break
    return box_sol

In [159]:
# Random

n = 500
m = 30
nb_graph = 15
nb_of_iters = 100

rand_box_sol = []
random.seed(0)
for _ in range(nb_graph):

    # Generate the random graph and compute shortest paths
    G = nx.barabasi_albert_graph(n, m)
    length = dict(nx.all_pairs_shortest_path_length(G))

    for _ in tqdm(range(nb_of_iters)):
        nodes = list(random.sample(list(G.nodes), n))
        for nb in range(0, len(G)):
            if is_resolving_set(G, nodes[:nb], length):
                rand_box_sol.append(nb)
                break

100%|█████████████████████████████████████████| 100/100 [00:16<00:00,  6.04it/s]
100%|█████████████████████████████████████████| 100/100 [00:15<00:00,  6.66it/s]
100%|█████████████████████████████████████████| 100/100 [00:14<00:00,  6.96it/s]
100%|█████████████████████████████████████████| 100/100 [00:15<00:00,  6.65it/s]
100%|█████████████████████████████████████████| 100/100 [00:14<00:00,  6.71it/s]
100%|█████████████████████████████████████████| 100/100 [00:15<00:00,  6.30it/s]
100%|█████████████████████████████████████████| 100/100 [00:14<00:00,  6.90it/s]
100%|█████████████████████████████████████████| 100/100 [00:14<00:00,  6.98it/s]
100%|█████████████████████████████████████████| 100/100 [00:15<00:00,  6.39it/s]
100%|█████████████████████████████████████████| 100/100 [00:15<00:00,  6.41it/s]
100%|█████████████████████████████████████████| 100/100 [00:14<00:00,  6.69it/s]
100%|█████████████████████████████████████████| 100/100 [00:15<00:00,  6.29it/s]
100%|███████████████████████

In [160]:
dc_box_sol = sim_centrality_box(nx.degree_centrality)

100%|███████████████████████████████████████████| 15/15 [06:50<00:00, 27.38s/it]


In [161]:
bc_box_sol = sim_centrality_box(nx.betweenness_centrality)

100%|███████████████████████████████████████████| 15/15 [05:02<00:00, 20.18s/it]


In [162]:
cc_box_sol = sim_centrality_box(nx.closeness_centrality)

100%|███████████████████████████████████████████| 15/15 [10:17<00:00, 41.19s/it]


In [163]:
so_box_sol = sim_centrality_box(nx.second_order_centrality)

100%|███████████████████████████████████████████| 15/15 [16:32<00:00, 66.14s/it]


In [224]:
eigen_box_sol = sim_centrality_box(nx.eigenvector_centrality)

  0%|                                                    | 0/15 [00:00<?, ?it/s]

start


  7%|██▉                                         | 1/15 [00:31<07:16, 31.17s/it]

start


 13%|█████▊                                      | 2/15 [01:03<06:55, 31.98s/it]

start


 20%|████████▊                                   | 3/15 [01:35<06:22, 31.86s/it]

start


 27%|███████████▋                                | 4/15 [02:07<05:51, 31.92s/it]

start


 33%|██████████████▋                             | 5/15 [02:38<05:16, 31.62s/it]

start


 40%|█████████████████▌                          | 6/15 [03:12<04:52, 32.47s/it]

start


 47%|████████████████████▌                       | 7/15 [03:40<04:08, 31.06s/it]

start


 53%|███████████████████████▍                    | 8/15 [04:10<03:34, 30.64s/it]

start


 60%|██████████████████████████▍                 | 9/15 [04:39<03:00, 30.02s/it]

start


 67%|████████████████████████████▋              | 10/15 [05:06<02:26, 29.31s/it]

start


 73%|███████████████████████████████▌           | 11/15 [05:36<01:57, 29.50s/it]

start


 80%|██████████████████████████████████▍        | 12/15 [06:11<01:33, 31.07s/it]

start


 87%|█████████████████████████████████████▎     | 13/15 [06:44<01:03, 31.68s/it]

start


 93%|████████████████████████████████████████▏  | 14/15 [07:16<00:31, 31.88s/it]

start


100%|███████████████████████████████████████████| 15/15 [07:49<00:00, 31.27s/it]


In [164]:
dict_result = {'Betweenness centrality': bc_box_sol, 'Degree centrality': dc_box_sol, 'Closeness centrality': cc_box_sol , 'Random': rand_box_sol, 'Second order centrality': so_box_sol, }

In [83]:
#with open('simulations/Barbasi-Albert/centrality_boxes_15graph_100iters.pickle', "rb") as file:
    #dict_result = pickle.load(file)

In [84]:
#with open('simulations/centrality_boxes_15graph_100iters.pickle', 'wb') as file:
    #pickle.dump(dict_result, file, protocol=pickle.HIGHEST_PROTOCOL)

In [232]:
fig = go.Figure()

for group, values in dict_result2.items():
    fig.add_trace(go.Box(y=values, name=group))

fig.update_layout(
    yaxis=dict(
        title="Size of the resolving set",
        titlefont=dict(size=12, color='black')
    )
)       
    
fig.show()

## Copenhagen fb_friends graph

In [7]:
# read the file and create a graph
G_fb = nx.read_edgelist('../Real graphs simulations/Copenhagen graphs/fb_friends.csv/edges.csv', delimiter=',')

# print the number of nodes and edges
print('Number of nodes:', G_fb.number_of_nodes())
print('Number of edges:', G_fb.number_of_edges())

Number of nodes: 800
Number of edges: 6429


In [236]:
def sim_centrality_real_g(centrality_f, lower_bound=20, upper_bound=800, nb_of_iters=400, seed=0, step=10):
    solutions_prob_c = {}
    random.seed(seed)
    for _ in range(nb_graph):
        # Generate the random graph and compute shortest paths
        G = G_fb
        length = dict(nx.all_pairs_shortest_path_length(G))

        c = centrality_f(G)

        # normalize the betweenness centralities
        c_values = np.array(list(c.values()))
        c_norm = c_values / sum(c_values)
        node_list = list(c.keys())

        for nb in range(0, lower_bound):
            solutions_prob_c[nb] = solutions_prob_c.get(nb, 0) + 0  
        for nb in tqdm(range(lower_bound, upper_bound+1, step)): # G.number_of_nodes()
            num_nodes = nb # Number of nodes to sample
            count = 0
            for i in range(nb_of_iters):
                nodes = set(np.random.choice(node_list, p=c_norm, size=num_nodes, replace=False)) # Random set of nodes to test
                if is_resolving_set(G, nodes, length):
                    count += 1
            solutions_prob_c[nb] =  solutions_prob_c.get(nb, 0) + (count / nb_of_iters)
        for nb in range(upper_bound+1, G_fb.number_of_nodes()+1):
                solutions_prob_c[nb] = solutions_prob_c.get(nb, 0) + 1
        
    return solutions_prob_c

In [237]:
solutions_prob_eigen_fb = sim_centrality_real_g(nx.eigenvector_centrality)

100%|███████████████████████████████████████████| 79/79 [35:17<00:00, 26.80s/it]


In [238]:
solutions_prob_dc_fb = sim_centrality_real_g(nx.degree_centrality)

100%|███████████████████████████████████████████| 79/79 [33:01<00:00, 25.08s/it]


In [239]:
solutions_prob_cc_fb = sim_centrality_real_g(nx.closeness_centrality)

100%|███████████████████████████████████████████| 79/79 [33:01<00:00, 25.08s/it]


In [240]:
solutions_prob_so_fb = sim_centrality_real_g(nx.second_order_centrality)

100%|███████████████████████████████████████████| 79/79 [33:11<00:00, 25.21s/it]


In [241]:
# Random strategy

nb_graph = 1
s = {}
nb_of_iters = 800
step = 10

# can be used to speed up simulations
lower_bound = 20
upper_bound = 800

solutions_rand_fb = {}
random.seed(0)
for _ in range(nb_graph):
    # Generate the random graph and compute shortest paths
    G = G_fb
    length = dict(nx.all_pairs_shortest_path_length(G))
    
    for nb in range(0, lower_bound):
        solutions_rand_fb[nb] = solutions_rand_fb.get(nb, 0) + 0  
    for nb in tqdm(range(lower_bound, upper_bound+1, step)):
        num_nodes = nb # Number of nodes to sample
        node_list = list(G.nodes())
        count = 0
        for i in range(nb_of_iters):
            nodes = set(random.sample(node_list, num_nodes)) # Random set of nodes to test
            if is_resolving_set(G, nodes, length):
                count += 1
        solutions_rand_fb[nb] =  solutions_rand_fb.get(nb, 0) + (count / nb_of_iters)
    for nb in range(upper_bound+1, G_fb.number_of_nodes()+1):
            solutions_rand_fb[nb] = solutions_rand_fb.get(nb, 0) + 1

100%|█████████████████████████████████████████| 79/79 [1:00:12<00:00, 45.73s/it]


In [242]:
# Betweenness centrality

# can be used to speed up simulations
lower_bound = 20
upper_bound = 800
nb_of_iters = 800
step = 10
solutions_prob_bc_fb = {}
for _ in range(nb_graph):
    # Generate the random graph and compute shortest paths
    G = G_fb
    length = dict(nx.all_pairs_shortest_path_length(G))

    c = nx.betweenness_centrality(G, endpoints=True)

    # normalize the betweenness centralities
    c_values = np.array(list(c.values()))
    c_norm = c_values / sum(c_values)
    node_list = list(c.keys())

    for nb in range(0, lower_bound):
        solutions_prob_bc_fb[nb] = solutions_prob_bc_fb.get(nb, 0) + 0  
    for nb in tqdm(range(lower_bound, upper_bound+1, step)): # G.number_of_nodes()
        num_nodes = nb # Number of nodes to sample
        count = 0
        for i in range(nb_of_iters):
            nodes = set(np.random.choice(node_list, p=c_norm, size=num_nodes, replace=False)) # Random set of nodes to test
            if is_resolving_set(G, nodes, length):
                count += 1
        solutions_prob_bc_fb[nb] = solutions_prob_bc_fb.get(nb, 0) + (count / nb_of_iters)
    for nb in range(upper_bound+1, G_fb.number_of_nodes()+1):
            solutions_prob_bc_fb[nb] = solutions_prob_bc_fb.get(nb, 0) + 1


100%|█████████████████████████████████████████| 79/79 [1:06:28<00:00, 50.48s/it]


In [23]:
# Boost the proba of low degree nodes

lower_bound = 20
upper_bound = 40

solutions_prob_ms_d = {}
for _ in range(nb_graph):
    # Generate the random graph and compute shortest paths
    G = G_fb
    length = dict(nx.all_pairs_shortest_path_length(G))

    c = nx.degree_centrality(G)
    a = list(set(c.values()))
    a.sort()
    c = {key: 1000*max(c.values()) if value in a[:2] else value for key, value in c.items()}
    # normalize the betweenness centralities
    c_values = np.array(list(c.values()))
    c_norm = c_values / sum(c_values)
    node_list = list(c.keys())

    for nb in range(0, lower_bound):
        solutions_prob_ms_d[nb] = solutions_prob_ms_d.get(nb, 0) + 0  
    for nb in tqdm(range(lower_bound, upper_bound, 1)): # G.number_of_nodes()
        num_nodes = nb # Number of nodes to sample
        count = 0
        for i in range(nb_of_iters):
            nodes = set(np.random.choice(node_list, p=c_norm, size=num_nodes, replace=False)) # Random set of nodes to test
            if is_resolving_set(G, nodes, length):
                count += 1
        solutions_prob_ms_d[nb] =  solutions_prob_ms_d.get(nb, 0) + (count / nb_of_iters)
    for nb in range(upper_bound, G_fb.number_of_nodes()):
            solutions_prob_ms_d[nb] = solutions_prob_ms_d.get(nb, 0) + 1


100%|███████████████████████████████████████████| 20/20 [00:44<00:00,  2.22s/it]


In [24]:
# Boost the proba of low betweenness nodes

lower_bound = 20
upper_bound = 40

solutions_prob_ms_bc = {}
for _ in range(nb_graph):
    # Generate the random graph and compute shortest paths
    G = G_fb
    length = dict(nx.all_pairs_shortest_path_length(G))

    c = nx.betweenness_centrality(G)
    a = list(set(c.values()))
    a.sort()
    #c = {key: -(v-max(c.values())-min(c.values())) for key, v in c.items()}
    c = {key: 1000*max(c.values()) if value in a[:1] else value for key, value in c.items()}
    # normalize the betweenness centralities
    c_values = np.array(list(c.values()))
    c_norm = c_values / sum(c_values)
    node_list = list(c.keys())

    for nb in range(0, lower_bound):
        solutions_prob_ms_bc[nb] = solutions_prob_ms_bc.get(nb, 0) + 0  
    for nb in tqdm(range(lower_bound, upper_bound, 1)): # G.number_of_nodes()
        num_nodes = nb # Number of nodes to sample
        count = 0
        for i in range(nb_of_iters):
            nodes = set(np.random.choice(node_list, p=c_norm, size=num_nodes, replace=False)) # Random set of nodes to test
            if is_resolving_set(G, nodes, length):
                count += 1
        solutions_prob_ms_bc[nb] =  solutions_prob_ms_bc.get(nb, 0) + (count / nb_of_iters)
    for nb in range(upper_bound, G_fb.number_of_nodes()):
            solutions_prob_ms_bc[nb] = solutions_prob_ms_bc.get(nb, 0) + 1


100%|███████████████████████████████████████████| 20/20 [00:44<00:00,  2.23s/it]


In [37]:
# Strategy 1 with degree

lower_bound = 20
upper_bound = 40

solutions_prob_ms_bc_add = {}
for _ in range(nb_graph):
    # Generate the random graph and compute shortest paths
    G = G_fb

    nb_of_nodes_in_resolving_set = 20

    length = dict(nx.all_pairs_shortest_path_length(G))

    c = nx.degree_centrality(G)

    intensities = {str(key): 0 for key in G.nodes}

    # normalize the betweenness centralities
    c_values = np.array(list(c.values()))
    c_norm = c_values / sum(c_values)
    node_list = list(c.keys())
    
    for i in range(100):
        # Random set of nodes to test
        nodes = set(np.random.choice(node_list, p=c_norm, size=nb_of_nodes_in_resolving_set, replace=False))
        resolved = set_resolved(G, nodes, length)
        for node in resolved:
            intensities[node] += +1
    
    small = [key for key, val in intensities.items() if val < 40]
    nodes_to_add = get_nodes_with_diff_neighbors(G, small)
    
    for nb in range(0, lower_bound):
        solutions_prob_ms_bc_add[nb] = solutions_prob_ms_bc_add.get(nb, 0) + 0  
    for nb in tqdm(range(lower_bound, upper_bound, 1)): # G.number_of_nodes()
        num_nodes = nb - len(nodes_to_add) # Number of nodes to sample
        count = 0
        for i in range(nb_of_iters):
            nodes = nodes_to_add + list(np.random.choice(node_list, p=c_norm, size=num_nodes, replace=False)) # Random set of nodes to test
            if is_resolving_set(G, nodes, length):
                count += 1
        solutions_prob_ms_bc_add[nb] =  solutions_prob_ms_bc_add.get(nb, 0) + (count / nb_of_iters)
    for nb in range(upper_bound, G_fb.number_of_nodes()):
            solutions_prob_ms_bc_add[nb] = solutions_prob_ms_bc_add.get(nb, 0) + 1

4


100%|███████████████████████████████████████████| 20/20 [00:52<00:00,  2.62s/it]


In [10]:
# Strategy 1 with betweenness

lower_bound = 20
upper_bound = 40
nb_of_iters = 4
solutions_prob_ms_bc_add = {}

G = G_fb
nodes_to_add = []
nb_of_nodes_in_resolving_set = 20

length = dict(nx.all_pairs_shortest_path_length(G))

c = nx.betweenness_centrality(G)

# normalize the betweenness centralities
c_values = np.array(list(c.values()))
c_norm = c_values / sum(c_values)
node_list = list(c.keys())

while True:
    intensities = {str(key): 0 for key in G.nodes}
    for i in range(100):
        # Random set of nodes to test
        nodes = set(np.random.choice(node_list, p=c_norm, size=nb_of_nodes_in_resolving_set, replace=False))
        resolved = set_resolved(G, nodes, length)
        for node in resolved:
            intensities[node] += +1

    small = [key for key, val in intensities.items() if val < 40]
    s = len(small)
    nodes_to_add += get_nodes_with_diff_neighbors(G, small)
    if s == 0:
        break
indices = [node_list.index(i) for i in nodes_to_add] 
node_list = [element for index, element in enumerate(node_list) if index not in indices]
c_norm = [element for index, element in enumerate(c_norm) if index not in indices]
c_norm = c_norm / sum(c_norm)

for nb in range(0, len(nodes_to_add)):
    solutions_prob_ms_bc_add[nb] = solutions_prob_ms_bc_add.get(nb, 0) + 0  
for nb in tqdm(range(len(nodes_to_add), upper_bound, 1)): # G.number_of_nodes()
    num_nodes = nb - len(nodes_to_add) # Number of nodes to sample
    count = 0
    for i in range(nb_of_iters):
        nodes = nodes_to_add + list(np.random.choice(node_list, p=c_norm, size=num_nodes, replace=False)) # Random set of nodes to test
        if is_resolving_set(G, nodes, length):
            count += 1
    solutions_prob_ms_bc_add[nb] =  solutions_prob_ms_bc_add.get(nb, 0) + (count / nb_of_iters)
for nb in range(upper_bound, G_fb.number_of_nodes()):
        solutions_prob_ms_bc_add[nb] = solutions_prob_ms_bc_add.get(nb, 0) + 1

KeyboardInterrupt: 

In [253]:
#with open('simulations/fb_transition_eigen_400iters_10step.pickle', 'wb') as file:
    #pickle.dump(solutions_prob_eigen_fb, file, protocol=pickle.HIGHEST_PROTOCOL)

In [38]:
# Define your data
x = list(solutions_rand_fb.keys())
y = list(solutions_rand_fb.values())


x2 = list(solutions_prob_d.keys())
y2 = list(solutions_prob_d.values())

x3 = list(solutions_prob_bc_fb.keys())
y3 = list(solutions_prob_bc_fb.values())

x4 = list(solutions_prob_dc_fb.keys())
y4 = list(solutions_prob_dc_fb.values())

x5 = list(solutions_prob_so_fb.keys())
y5 = list(solutions_prob_so_fb.values())

x6 = list(solutions_prob_cc_fb.keys())
y6 = list(solutions_prob_cc_fb.values())

x7 = list(solutions_prob_ms_bc_add.keys())
y7 = list(solutions_prob_ms_bc_add.values())

x8 = list(solutions_prob_eigen_fb.keys())
y8 = list(solutions_prob_eigen_fb.values())

x9 = list(solutions_prob_ms_d.keys())
y9 = list(solutions_prob_ms_d.values())

x10 = list(solutions_prob_ms_bc.keys())
y10 = list(solutions_prob_ms_bc.values())

# Define the trace for the scatter plot
trace = go.Scatter(x=x, y=y, mode='markers+lines', name='Random vertices')
trace2 = go.Scatter(x=x2, y=y2, mode='markers+lines', name='Random vertices (prop to degree)')
trace3 = go.Scatter(x=x3, y=y3, mode='markers+lines', name='Betweeness centrality')
trace4 = go.Scatter(x=x4, y=y4, mode='markers+lines', name='Degree centrality')
trace5 = go.Scatter(x=x5, y=y5, mode='markers+lines', name='Second order centrality)')
trace6 = go.Scatter(x=x6, y=y6, mode='markers+lines', name='Closness centrality')
trace7 = go.Scatter(x=x7, y=y7, mode='markers+lines', name='Magic sauce (betweenness + trick)')
trace8 = go.Scatter(x=x8, y=y8, mode='markers+lines', name='Eigenvector centrality')
trace9 = go.Scatter(x=x9, y=y9, mode='markers+lines', name='Magic Sauce (degree)')
trace10 = go.Scatter(x=x10, y=y10, mode='markers+lines', name='Magic sauce (betweenness)')

# Define the layout
layout = go.Layout(#title='Probability of resolving the graph as a function of the subset cardinality', 
                   #title_x=0.5,
                   xaxis=dict(title='Cardinality of the subset'), 
                   yaxis=dict(title='Probability of resolving the graph'))
                   #legend=dict(x=0.67, y=0.08, orientation='v'))

# Combine the traces and layout into a figure
fig = go.Figure(data=[trace, trace1, trace3, trace4, trace5, trace6, trace7, trace8, trace9, trace10], layout=layout)

# Show the figure
fig.show()

In [254]:
def sim_centrality_box_real(G, centrality_f, nb_graph=1, nb_of_iters=400):
    
    box_sol = []
    
    # Generate the random graph and compute shortest paths
    G = G_fb
    length = dict(nx.all_pairs_shortest_path_length(G))

    c = centrality_f(G)

    # normalize the centralities
    c_values = np.array(list(c.values()))
    c_norm = c_values / sum(c_values)
    node_list = list(c.keys())

    for _ in range(nb_of_iters):
        nodes = np.random.choice(node_list, p=c_norm, size=len(G), replace=False) # Random set of nodes to test
        for nb in range(0, len(G)):
            if is_resolving_set(G, nodes[:nb], length):
                box_sol.append(nb)
                break
                    
    return box_sol

In [170]:
# Betweenness

betweenness_box_sol = []

nb_of_iters = 100

# Generate the random graph and compute shortest paths
G = G_fb
length = dict(nx.all_pairs_shortest_path_length(G))

c = nx.betweenness_centrality(G, endpoints=True)

# normalize the centralities
c_values = np.array(list(c.values()))
c_norm = c_values / sum(c_values)
node_list = list(c.keys())

for _ in tqdm(range(nb_of_iters)):
    nodes = np.random.choice(node_list, p=c_norm, size=len(G), replace=False) # Random set of nodes to test
    for nb in range(0, len(G)):
        if is_resolving_set(G, nodes[:nb], length):
            betweenness_box_sol.append(nb)
            break

  1%|▍                                        | 1/100 [01:38<2:42:46, 98.65s/it]

653


  2%|▊                                        | 2/100 [02:24<1:49:58, 67.33s/it]

415


  3%|█▏                                       | 3/100 [03:05<1:29:24, 55.31s/it]

414


  4%|█▋                                       | 4/100 [03:48<1:20:41, 50.43s/it]

446


  5%|██                                       | 5/100 [04:30<1:15:09, 47.47s/it]

412


  6%|██▍                                      | 6/100 [05:04<1:07:21, 43.00s/it]

382


  7%|███                                        | 7/100 [05:20<52:41, 34.00s/it]

238


  8%|███▍                                       | 8/100 [05:29<40:14, 26.25s/it]

183


  9%|███▊                                       | 9/100 [06:17<49:50, 32.86s/it]

434


 10%|████                                    | 10/100 [08:32<1:36:34, 64.38s/it]

757


 11%|████▍                                   | 11/100 [10:03<1:47:49, 72.69s/it]

654


 12%|████▊                                   | 12/100 [10:36<1:28:50, 60.58s/it]

388


 13%|█████▏                                  | 13/100 [11:12<1:16:51, 53.01s/it]

396


 14%|█████▌                                  | 14/100 [11:34<1:02:55, 43.90s/it]

330


 15%|██████▎                                   | 15/100 [11:48<49:12, 34.73s/it]

232


 16%|██████▍                                 | 16/100 [13:22<1:13:44, 52.68s/it]

674


 17%|██████▊                                 | 17/100 [14:14<1:12:29, 52.41s/it]

495


 18%|███████▏                                | 18/100 [16:07<1:36:29, 70.60s/it]

759


 19%|███████▌                                | 19/100 [16:09<1:07:22, 49.91s/it]

89


 20%|████████                                | 20/100 [17:17<1:13:56, 55.46s/it]

595


 21%|████████▍                               | 21/100 [18:56<1:30:04, 68.41s/it]

718


 22%|████████▊                               | 22/100 [19:17<1:10:36, 54.32s/it]

327


 23%|█████████▋                                | 23/100 [19:46<59:47, 46.60s/it]

379


 24%|██████████                                | 24/100 [19:55<44:49, 35.39s/it]

214


 25%|██████████                              | 25/100 [21:28<1:05:52, 52.70s/it]

689


 26%|██████████▉                               | 26/100 [21:33<47:16, 38.33s/it]

146


 27%|███████████▎                              | 27/100 [22:22<50:44, 41.70s/it]

482


 28%|███████████▏                            | 28/100 [23:43<1:04:01, 53.36s/it]

635


 29%|███████████▌                            | 29/100 [24:53<1:09:04, 58.37s/it]

599


 30%|████████████                            | 30/100 [26:40<1:25:15, 73.08s/it]

742


 31%|████████████▍                           | 31/100 [27:35<1:17:40, 67.54s/it]

531


 32%|████████████▊                           | 32/100 [29:32<1:33:22, 82.39s/it]

786


 33%|█████████████▏                          | 33/100 [29:39<1:06:44, 59.77s/it]

185


 34%|█████████████▌                          | 34/100 [30:26<1:01:22, 55.79s/it]

492


 35%|██████████████▋                           | 35/100 [30:59<53:13, 49.13s/it]

416


 36%|██████████████▍                         | 36/100 [32:21<1:02:46, 58.85s/it]

655


 37%|███████████████▌                          | 37/100 [32:29<46:00, 43.81s/it]

209


 38%|███████████████▉                          | 38/100 [33:19<46:59, 45.48s/it]

507


 39%|████████████████▍                         | 39/100 [33:32<36:21, 35.76s/it]

258


 40%|████████████████▊                         | 40/100 [34:21<39:53, 39.90s/it]

511


 41%|█████████████████▏                        | 41/100 [34:49<35:38, 36.25s/it]

371


 42%|█████████████████▋                        | 42/100 [34:56<26:30, 27.41s/it]

183


 43%|██████████████████                        | 43/100 [35:56<35:25, 37.28s/it]

568


 44%|██████████████████▍                       | 44/100 [36:35<35:06, 37.62s/it]

445


 45%|██████████████████▉                       | 45/100 [38:29<55:29, 60.53s/it]

780


 46%|███████████████████▎                      | 46/100 [38:57<45:41, 50.77s/it]

378


 47%|███████████████████▋                      | 47/100 [40:24<54:34, 61.78s/it]

682


 48%|████████████████████▏                     | 48/100 [41:22<52:37, 60.71s/it]

553


 49%|████████████████████▌                     | 49/100 [41:38<40:11, 47.28s/it]

282


 50%|█████████████████████                     | 50/100 [41:51<30:38, 36.78s/it]

248


 51%|█████████████████████▍                    | 51/100 [43:09<40:16, 49.32s/it]

643


 52%|█████████████████████▊                    | 52/100 [43:35<33:45, 42.20s/it]

362


 53%|██████████████████████▎                   | 53/100 [43:54<27:44, 35.42s/it]

315


 54%|██████████████████████▋                   | 54/100 [44:26<26:13, 34.21s/it]

400


 55%|███████████████████████                   | 55/100 [44:37<20:30, 27.35s/it]

232


 56%|███████████████████████▌                  | 56/100 [45:05<20:10, 27.51s/it]

377


 57%|███████████████████████▉                  | 57/100 [46:24<30:50, 43.03s/it]

651


 58%|████████████████████████▎                 | 58/100 [47:23<33:21, 47.66s/it]

551


 59%|████████████████████████▊                 | 59/100 [48:23<35:12, 51.52s/it]

564


 60%|█████████████████████████▏                | 60/100 [49:50<41:24, 62.11s/it]

678


 61%|█████████████████████████▌                | 61/100 [50:15<33:02, 50.83s/it]

355


 62%|██████████████████████████                | 62/100 [51:25<35:51, 56.62s/it]

609


 63%|██████████████████████████▍               | 63/100 [52:07<32:11, 52.20s/it]

467


 64%|██████████████████████████▉               | 64/100 [53:07<32:44, 54.57s/it]

563


 65%|███████████████████████████▎              | 65/100 [53:49<29:38, 50.80s/it]

465


 66%|███████████████████████████▋              | 66/100 [55:20<35:37, 62.87s/it]

698


 67%|████████████████████████████▏             | 67/100 [56:56<40:10, 73.05s/it]

720


 68%|████████████████████████████▌             | 68/100 [58:04<38:05, 71.42s/it]

597


 69%|████████████████████████████▉             | 69/100 [58:25<29:08, 56.42s/it]

330


 70%|█████████████████████████████▍            | 70/100 [59:31<29:31, 59.04s/it]

582


 71%|█████████████████████████████▊            | 71/100 [59:33<20:22, 42.14s/it]

111


 72%|████████████████████████████▊           | 72/100 [1:00:46<23:56, 51.31s/it]

620


 73%|█████████████████████████████▏          | 73/100 [1:00:53<17:07, 38.05s/it]

187


 74%|█████████████████████████████▌          | 74/100 [1:01:50<18:58, 43.78s/it]

549


 75%|██████████████████████████████          | 75/100 [1:01:52<13:01, 31.28s/it]

99


 76%|██████████████████████████████▍         | 76/100 [1:02:48<15:26, 38.59s/it]

541


 77%|██████████████████████████████▊         | 77/100 [1:03:20<13:59, 36.48s/it]

407


 78%|███████████████████████████████▏        | 78/100 [1:04:37<17:50, 48.67s/it]

640


 79%|███████████████████████████████▌        | 79/100 [1:04:49<13:14, 37.83s/it]

251


 80%|████████████████████████████████        | 80/100 [1:06:35<19:26, 58.31s/it]

753


 81%|████████████████████████████████▍       | 81/100 [1:08:00<20:55, 66.08s/it]

668


 82%|████████████████████████████████▊       | 82/100 [1:08:14<15:10, 50.60s/it]

271


 83%|█████████████████████████████████▏      | 83/100 [1:08:49<13:00, 45.91s/it]

425


 84%|█████████████████████████████████▌      | 84/100 [1:09:04<09:47, 36.73s/it]

278


 85%|██████████████████████████████████      | 85/100 [1:10:16<11:48, 47.26s/it]

618


 86%|██████████████████████████████████▍     | 86/100 [1:10:51<10:08, 43.44s/it]

422


 87%|██████████████████████████████████▊     | 87/100 [1:11:22<08:36, 39.74s/it]

401


 88%|███████████████████████████████████▏    | 88/100 [1:11:50<07:14, 36.20s/it]

377


 89%|███████████████████████████████████▌    | 89/100 [1:12:57<08:20, 45.46s/it]

592


 90%|████████████████████████████████████    | 90/100 [1:14:10<08:58, 53.80s/it]

624


 91%|████████████████████████████████████▍   | 91/100 [1:14:30<06:33, 43.67s/it]

319


 92%|████████████████████████████████████▊   | 92/100 [1:16:11<08:06, 60.85s/it]

724


 93%|█████████████████████████████████████▏  | 93/100 [1:17:28<07:39, 65.61s/it]

636


 94%|█████████████████████████████████████▌  | 94/100 [1:19:11<07:41, 76.88s/it]

742


 95%|██████████████████████████████████████  | 95/100 [1:20:10<05:57, 71.47s/it]

555


 96%|██████████████████████████████████████▍ | 96/100 [1:21:46<05:15, 78.77s/it]

713


 97%|██████████████████████████████████████▊ | 97/100 [1:22:11<03:08, 62.82s/it]

360


 98%|███████████████████████████████████████▏| 98/100 [1:22:26<01:36, 48.50s/it]

276


 99%|███████████████████████████████████████▌| 99/100 [1:23:03<00:45, 45.03s/it]

433


100%|███████████████████████████████████████| 100/100 [1:23:05<00:00, 49.85s/it]

80





In [169]:
# Degree

degree_box_sol = []

nb_of_iters = 100

# Generate the random graph and compute shortest paths
G = G_fb
length = dict(nx.all_pairs_shortest_path_length(G))

c = nx.degree_centrality(G)

# normalize the centralities
c_values = np.array(list(c.values()))
c_norm = c_values / sum(c_values)
node_list = list(c.keys())

for _ in range(nb_of_iters):
    nodes = np.random.choice(node_list, p=c_norm, size=len(G), replace=False) # Random set of nodes to test
    for nb in range(0, len(G)):
        if is_resolving_set(G, nodes[:nb], length):
            degree_box_sol.append(nb)
            break

754
658
655
783
583
666
619
726
767
610
697
704
329
775
476
517
496
385
727
670
763
786
381
664
606
777
713
563
399
758
712
770
728
770
455
688
778
657
650
574
665
350
716
746
707
768
759
684
692
611
767
738
779
686
422
787
714
711
628
463
775
783
700
755
774
707
772
742
722
771
593
722
702
795
774
687
746
765
753
342
695
789
710
320
461
673
708
684
763
668
671
524
786
582
582
793
676
766
729
729


In [179]:
# Random

nb_of_iters = 100
rand_box_real = []

# Generate the random graph and compute shortest paths
G = G_fb
length = dict(nx.all_pairs_shortest_path_length(G))

for _ in tqdm(range(nb_of_iters)):
    nodes = list(random.sample(list(G.nodes), len(G)))
    for nb in range(0, len(G)):
        if is_resolving_set(G, nodes[:nb], length):
            rand_box_real.append(nb)
            print(nb)
            break

  1%|▍                                          | 1/100 [00:07<12:08,  7.36s/it]

316


  2%|▊                                          | 2/100 [00:39<36:13, 22.18s/it]

704


  3%|█▎                                         | 3/100 [00:40<20:11, 12.49s/it]

106


  4%|█▋                                         | 4/100 [00:50<18:00, 11.26s/it]

365


  5%|██▏                                        | 5/100 [00:52<12:41,  8.02s/it]

162


  6%|██▌                                        | 6/100 [00:59<11:52,  7.58s/it]

271


  7%|███                                        | 7/100 [01:14<15:24,  9.94s/it]

467


  8%|███▍                                       | 8/100 [01:18<12:38,  8.24s/it]

258


  9%|███▊                                       | 9/100 [01:31<14:40,  9.68s/it]

396


 10%|████▏                                     | 10/100 [01:44<16:01, 10.69s/it]

403


 11%|████▌                                     | 11/100 [01:46<12:03,  8.12s/it]

176


 12%|█████                                     | 12/100 [01:47<08:42,  5.94s/it]

108


 13%|█████▍                                    | 13/100 [02:06<14:05,  9.71s/it]

527


 14%|█████▉                                    | 14/100 [02:12<12:24,  8.66s/it]

303


 15%|██████▎                                   | 15/100 [02:25<14:16, 10.07s/it]

447


 16%|██████▋                                   | 16/100 [02:27<10:34,  7.55s/it]

132


 17%|███████▏                                  | 17/100 [02:29<08:06,  5.86s/it]

143


 18%|███████▌                                  | 18/100 [02:59<17:57, 13.14s/it]

678


 19%|███████▉                                  | 19/100 [03:01<13:20,  9.88s/it]

159


 20%|████████▍                                 | 20/100 [03:04<10:12,  7.66s/it]

182


 21%|████████▊                                 | 21/100 [03:44<23:11, 17.61s/it]

680


 22%|█████████▏                                | 22/100 [03:45<16:24, 12.62s/it]

93


 23%|█████████▋                                | 23/100 [03:57<15:54, 12.39s/it]

416


 24%|██████████                                | 24/100 [04:10<15:49, 12.49s/it]

401


 25%|██████████▌                               | 25/100 [04:14<12:24,  9.93s/it]

209


 26%|██████████▉                               | 26/100 [04:17<09:31,  7.72s/it]

169


 27%|███████████▎                              | 27/100 [04:33<12:25, 10.22s/it]

449


 28%|███████████▊                              | 28/100 [04:34<09:07,  7.60s/it]

112


 29%|████████████▏                             | 29/100 [04:38<07:49,  6.62s/it]

246


 30%|████████████▌                             | 30/100 [04:47<08:21,  7.16s/it]

321


 31%|█████████████                             | 31/100 [04:49<06:40,  5.81s/it]

170


 32%|█████████████▍                            | 32/100 [05:29<18:02, 15.92s/it]

749


 33%|█████████████▊                            | 33/100 [05:41<16:23, 14.67s/it]

390


 34%|██████████████▎                           | 34/100 [05:45<12:40, 11.53s/it]

247


 35%|██████████████▋                           | 35/100 [05:59<13:15, 12.23s/it]

454


 36%|███████████████                           | 36/100 [06:03<10:22,  9.73s/it]

216


 37%|███████████████▌                          | 37/100 [06:08<08:45,  8.34s/it]

253


 38%|███████████████▉                          | 38/100 [06:16<08:41,  8.41s/it]

297


 39%|████████████████▍                         | 39/100 [06:41<13:25, 13.20s/it]

590


 40%|████████████████▊                         | 40/100 [06:47<11:05, 11.10s/it]

277


 41%|█████████████████▏                        | 41/100 [06:52<09:13,  9.37s/it]

228


 42%|█████████████████▋                        | 42/100 [06:56<07:28,  7.74s/it]

214


 43%|██████████████████                        | 43/100 [06:58<05:45,  6.06s/it]

144


 44%|██████████████████▍                       | 44/100 [07:17<09:17,  9.95s/it]

488


 45%|██████████████████▉                       | 45/100 [07:37<11:48, 12.88s/it]

523


 46%|███████████████████▎                      | 46/100 [08:02<14:42, 16.34s/it]

581


 47%|███████████████████▋                      | 47/100 [08:31<17:57, 20.33s/it]

642


 48%|████████████████████▏                     | 48/100 [08:46<16:06, 18.58s/it]

440


 49%|████████████████████▌                     | 49/100 [08:52<12:42, 14.95s/it]

292


 50%|█████████████████████                     | 50/100 [08:57<09:57, 11.95s/it]

248


 51%|█████████████████████▍                    | 51/100 [09:05<08:40, 10.62s/it]

304


 52%|█████████████████████▊                    | 52/100 [09:35<13:11, 16.49s/it]

542


 53%|██████████████████████▎                   | 53/100 [09:39<09:55, 12.66s/it]

230


 54%|██████████████████████▋                   | 54/100 [09:41<07:23,  9.63s/it]

183


 55%|███████████████████████                   | 55/100 [09:48<06:41,  8.93s/it]

326


 56%|███████████████████████▌                  | 56/100 [10:03<07:48, 10.65s/it]

466


 57%|███████████████████████▉                  | 57/100 [10:24<09:54, 13.84s/it]

571


 59%|████████████████████████▊                 | 59/100 [10:37<06:26,  9.42s/it]

430
41


 60%|█████████████████████████▏                | 60/100 [10:50<07:04, 10.60s/it]

450


 61%|█████████████████████████▌                | 61/100 [11:18<10:15, 15.79s/it]

649


 62%|██████████████████████████                | 62/100 [11:31<09:22, 14.81s/it]

423


 63%|██████████████████████████▍               | 63/100 [11:31<06:25, 10.43s/it]

44


 64%|██████████████████████████▉               | 64/100 [11:40<06:01, 10.04s/it]

358


 65%|███████████████████████████▎              | 65/100 [11:42<04:32,  7.79s/it]

185


 66%|███████████████████████████▋              | 66/100 [12:17<08:53, 15.69s/it]

724


 67%|████████████████████████████▏             | 67/100 [12:21<06:41, 12.18s/it]

233


 68%|████████████████████████████▌             | 68/100 [12:24<05:05,  9.55s/it]

218


 69%|████████████████████████████▉             | 69/100 [12:28<04:05,  7.91s/it]

221


 70%|█████████████████████████████▍            | 70/100 [12:50<06:02, 12.08s/it]

566


 71%|█████████████████████████████▊            | 71/100 [13:17<08:00, 16.56s/it]

636


 72%|██████████████████████████████▏           | 72/100 [13:30<07:12, 15.45s/it]

436


 73%|██████████████████████████████▋           | 73/100 [13:54<08:08, 18.09s/it]

612


 74%|███████████████████████████████           | 74/100 [14:07<07:08, 16.49s/it]

434


 75%|███████████████████████████████▌          | 75/100 [14:09<05:06, 12.28s/it]

184


 76%|███████████████████████████████▉          | 76/100 [14:18<04:32, 11.37s/it]

371


 77%|████████████████████████████████▎         | 77/100 [14:23<03:31,  9.18s/it]

243


 78%|████████████████████████████████▊         | 78/100 [14:40<04:18, 11.74s/it]

517


 79%|█████████████████████████████████▏        | 79/100 [14:44<03:17,  9.43s/it]

236


 80%|█████████████████████████████████▌        | 80/100 [14:47<02:26,  7.33s/it]

185


 81%|██████████████████████████████████        | 81/100 [15:15<04:18, 13.60s/it]

655


 82%|██████████████████████████████████▍       | 82/100 [15:31<04:15, 14.19s/it]

483


 83%|██████████████████████████████████▊       | 83/100 [15:37<03:21, 11.85s/it]

307


 84%|███████████████████████████████████▎      | 84/100 [15:46<02:54, 10.88s/it]

357


 85%|███████████████████████████████████▋      | 85/100 [15:54<02:31, 10.09s/it]

350


 86%|████████████████████████████████████      | 86/100 [16:07<02:33, 10.97s/it]

396


 87%|████████████████████████████████████▌     | 87/100 [16:33<03:20, 15.46s/it]

615


 88%|████████████████████████████████████▉     | 88/100 [16:51<03:17, 16.44s/it]

533


 89%|█████████████████████████████████████▍    | 89/100 [17:10<03:07, 17.09s/it]

514


 90%|█████████████████████████████████████▊    | 90/100 [17:11<02:03, 12.37s/it]

135


 91%|██████████████████████████████████████▏   | 91/100 [17:23<01:49, 12.19s/it]

419


 92%|██████████████████████████████████████▋   | 92/100 [17:31<01:27, 10.92s/it]

333


 93%|███████████████████████████████████████   | 93/100 [17:59<01:52, 16.02s/it]

658


 94%|███████████████████████████████████████▍  | 94/100 [18:19<01:43, 17.19s/it]

538


 95%|███████████████████████████████████████▉  | 95/100 [18:30<01:15, 15.18s/it]

384


 96%|████████████████████████████████████████▎ | 96/100 [18:37<00:51, 12.76s/it]

322


 97%|████████████████████████████████████████▋ | 97/100 [18:43<00:32, 10.80s/it]

302


 98%|█████████████████████████████████████████▏| 98/100 [18:48<00:18,  9.04s/it]

266


 99%|█████████████████████████████████████████▌| 99/100 [19:06<00:11, 11.87s/it]

514


100%|█████████████████████████████████████████| 100/100 [19:18<00:00, 11.59s/it]

426





In [199]:
# Strategy 1 with degree

start1_degree_box_real = []

tresh = 50
nb_of_iters = 100

G = G_fb
nb_of_nodes_in_resolving_set = 20

length = dict(nx.all_pairs_shortest_path_length(G))
c = nx.degree_centrality(G)

# normalize the betweenness centralities
c_values = np.array(list(c.values()))
c_norm = c_values / sum(c_values)
node_list = list(c.keys())
ms_b_box_sol_real = []
nodes_to_add = []

while True:
    intensities = {str(key): 0 for key in G.nodes}
    for i in range(100):
        # Random set of nodes to test
        nodes = nodes_to_add + list(np.random.choice(node_list, p=c_norm, size=nb_of_nodes_in_resolving_set, replace=False))
        resolved = set_resolved(G, nodes, length)
        for node in resolved:
            intensities[node] += +1

    small = [key for key, val in intensities.items() if val < tresh]
    s = len(small)
    nodes_to_add += get_nodes_with_diff_neighbors(G, small)
    if s == 0:
        break
indices = [node_list.index(i) for i in nodes_to_add] 
node_list_without_indices_to_add = [element for index, element in enumerate(node_list) if index not in indices]
c_norm_without_indices_to_add = [element for index, element in enumerate(c_norm) if index not in indices]
c_norm_without_indices_to_add = c_norm_without_indices_to_add / sum(c_norm_without_indices_to_add)

for _ in tqdm(range(nb_of_iters)):
    nodes = nodes_to_add + list(np.random.choice(node_list_without_indices_to_add, p=c_norm_without_indices_to_add, size=len(node_list_without_indices_to_add), replace=False)) # Random set of nodes to test
    for nb in range(len(nodes_to_add), len(G)):
        if is_resolving_set(G, nodes[:nb], length):
            start1_degree_box_real.append(nb)
            break

  2%|▊                                          | 2/100 [00:00<00:18,  5.28it/s]

48
35


  3%|█▎                                         | 3/100 [00:00<00:29,  3.26it/s]

58


  5%|██▏                                        | 5/100 [00:01<00:29,  3.17it/s]

59
36


  7%|███                                        | 7/100 [00:02<00:24,  3.78it/s]

46
36


  8%|███▍                                       | 8/100 [00:02<00:34,  2.66it/s]

74


 10%|████▏                                     | 10/100 [00:03<00:30,  2.93it/s]

74
35


 12%|█████                                     | 12/100 [00:03<00:22,  3.92it/s]

46
34


 14%|█████▉                                    | 14/100 [00:04<00:20,  4.29it/s]

56
33


 16%|██████▋                                   | 16/100 [00:04<00:15,  5.45it/s]

36
31


 18%|███████▌                                  | 18/100 [00:05<00:24,  3.31it/s]

98
28


 19%|███████▉                                  | 19/100 [00:06<00:33,  2.42it/s]

70


 20%|████████▍                                 | 20/100 [00:06<00:30,  2.64it/s]

47


 21%|████████▊                                 | 21/100 [00:06<00:27,  2.88it/s]

46


 22%|█████████▏                                | 22/100 [00:07<00:31,  2.45it/s]

52
28


 24%|██████████                                | 24/100 [00:07<00:20,  3.73it/s]

34


 26%|██████████▉                               | 26/100 [00:07<00:17,  4.34it/s]

44
34


 27%|███████████▎                              | 27/100 [00:07<00:16,  4.40it/s]

44


 29%|████████████▏                             | 29/100 [00:09<00:34,  2.04it/s]

139
41


 31%|█████████████                             | 31/100 [00:10<00:25,  2.68it/s]

57
36


 32%|█████████████▍                            | 32/100 [00:10<00:21,  3.16it/s]

39


 34%|██████████████▎                           | 34/100 [00:10<00:16,  3.99it/s]

46
35


 36%|███████████████                           | 36/100 [00:11<00:16,  3.78it/s]

58
33


 37%|███████████████▌                          | 37/100 [00:11<00:17,  3.68it/s]

41


 38%|███████████████▉                          | 38/100 [00:12<00:22,  2.78it/s]

58


 39%|████████████████▍                         | 39/100 [00:12<00:22,  2.71it/s]

47


 41%|█████████████████▏                        | 41/100 [00:13<00:17,  3.47it/s]

54
28


 42%|█████████████████▋                        | 42/100 [00:13<00:17,  3.29it/s]

34


 44%|██████████████████▍                       | 44/100 [00:14<00:15,  3.68it/s]

50
28


 46%|███████████████████▎                      | 46/100 [00:14<00:11,  4.61it/s]

32
32


 47%|███████████████████▋                      | 47/100 [00:14<00:12,  4.33it/s]

47


 48%|████████████████████▏                     | 48/100 [00:14<00:13,  3.99it/s]

48


 49%|████████████████████▌                     | 49/100 [00:15<00:15,  3.29it/s]

44


 51%|█████████████████████▍                    | 51/100 [00:15<00:12,  3.86it/s]

58
33


 52%|█████████████████████▊                    | 52/100 [00:16<00:11,  4.06it/s]

39


 54%|██████████████████████▋                   | 54/100 [00:16<00:10,  4.29it/s]

50
42


 55%|███████████████████████                   | 55/100 [00:17<00:14,  3.13it/s]

73


 56%|███████████████████████▌                  | 56/100 [00:17<00:15,  2.88it/s]

65


 58%|████████████████████████▎                 | 58/100 [00:18<00:13,  3.08it/s]

74
38


 59%|████████████████████████▊                 | 59/100 [00:19<00:24,  1.71it/s]

116


 60%|█████████████████████████▏                | 60/100 [00:19<00:20,  1.99it/s]

43


 61%|█████████████████████████▌                | 61/100 [00:19<00:17,  2.19it/s]

59


 63%|██████████████████████████▍               | 63/100 [00:20<00:11,  3.10it/s]

55
35


 64%|██████████████████████████▉               | 64/100 [00:20<00:09,  3.65it/s]

39


 66%|███████████████████████████▋              | 66/100 [00:20<00:08,  4.16it/s]

51
40


 67%|████████████████████████████▏             | 67/100 [00:21<00:07,  4.47it/s]

42
30


 69%|████████████████████████████▉             | 69/100 [00:21<00:07,  4.33it/s]

55


 70%|█████████████████████████████▍            | 70/100 [00:21<00:07,  3.89it/s]

48


 72%|██████████████████████████████▏           | 72/100 [00:22<00:06,  4.13it/s]

49
36


 74%|███████████████████████████████           | 74/100 [00:22<00:06,  4.31it/s]

45
39


 76%|███████████████████████████████▉          | 76/100 [00:23<00:05,  4.74it/s]

46
32


 77%|████████████████████████████████▎         | 77/100 [00:23<00:04,  4.78it/s]

39


 78%|████████████████████████████████▊         | 78/100 [00:23<00:05,  4.20it/s]

49


 79%|█████████████████████████████████▏        | 79/100 [00:24<00:06,  3.42it/s]

63


 80%|█████████████████████████████████▌        | 80/100 [00:24<00:06,  3.29it/s]

54


 81%|██████████████████████████████████        | 81/100 [00:25<00:07,  2.70it/s]

60


 82%|██████████████████████████████████▍       | 82/100 [00:25<00:05,  3.07it/s]

39


 84%|███████████████████████████████████▎      | 84/100 [00:25<00:04,  3.88it/s]

38
35


 85%|███████████████████████████████████▋      | 85/100 [00:26<00:07,  1.88it/s]

113
30


 87%|████████████████████████████████████▌     | 87/100 [00:27<00:04,  2.94it/s]

29


 88%|████████████████████████████████████▉     | 88/100 [00:29<00:09,  1.31it/s]

138


 89%|█████████████████████████████████████▍    | 89/100 [00:29<00:07,  1.38it/s]

65


 90%|█████████████████████████████████████▊    | 90/100 [00:31<00:08,  1.15it/s]

116


 91%|██████████████████████████████████████▏   | 91/100 [00:31<00:06,  1.43it/s]

52


 92%|██████████████████████████████████████▋   | 92/100 [00:31<00:04,  1.76it/s]

49
27


 95%|███████████████████████████████████████▉  | 95/100 [00:31<00:01,  3.23it/s]

36
34


 96%|████████████████████████████████████████▎ | 96/100 [00:32<00:01,  3.69it/s]

38
44


 98%|█████████████████████████████████████████▏| 98/100 [00:32<00:00,  3.85it/s]

52


100%|█████████████████████████████████████████| 100/100 [00:34<00:00,  2.93it/s]

128
36





In [173]:
# Strategy 1 with betweenness

start1_betweenness_box_real = []

tresh = 50
nb_of_iters = 100

G = G_fb
nb_of_nodes_in_resolving_set = 20

length = dict(nx.all_pairs_shortest_path_length(G))
c = nx.betweenness_centrality(G, endpoints=True)

# normalize the betweenness centralities
c_values = np.array(list(c.values()))
c_norm = c_values / sum(c_values)
node_list = list(c.keys())
ms_b_box_sol_real = []
nodes_to_add = []

while True:
    intensities = {str(key): 0 for key in G.nodes}
    for i in range(100):
        # Random set of nodes to test
        nodes = nodes_to_add + list(np.random.choice(node_list, p=c_norm, size=nb_of_nodes_in_resolving_set, replace=False))
        resolved = set_resolved(G, nodes, length)
        for node in resolved:
            intensities[node] += +1

    small = [key for key, val in intensities.items() if val < tresh]
    s = len(small)
    nodes_to_add += get_nodes_with_diff_neighbors(G, small)
    if s == 0:
        break
indices = [node_list.index(i) for i in nodes_to_add] 
node_list_without_indices_to_add = [element for index, element in enumerate(node_list) if index not in indices]
c_norm_without_indices_to_add = [element for index, element in enumerate(c_norm) if index not in indices]
c_norm_without_indices_to_add = c_norm_without_indices_to_add / sum(c_norm_without_indices_to_add)

for _ in tqdm(range(nb_of_iters)):

    #print(len(node_list_without_indices_to_add))
    nodes = nodes_to_add + list(np.random.choice(node_list_without_indices_to_add, p=c_norm_without_indices_to_add, size=len(node_list_without_indices_to_add), replace=False)) # Random set of nodes to test
    for nb in range(len(nodes_to_add), len(G)):
        if is_resolving_set(G, nodes[:nb], length):
            start1_betweenness_box_real.append(nb)
            break

  1%|▍                                          | 1/100 [00:00<01:33,  1.06it/s]

93


  2%|▊                                          | 2/100 [00:01<00:51,  1.91it/s]

46


  3%|█▎                                         | 3/100 [00:01<00:44,  2.19it/s]

57


  4%|█▋                                         | 4/100 [00:01<00:38,  2.49it/s]

54
27


  6%|██▌                                        | 6/100 [00:02<00:24,  3.89it/s]

35


  8%|███▍                                       | 8/100 [00:02<00:21,  4.19it/s]

53
35


 10%|████▏                                     | 10/100 [00:03<00:27,  3.23it/s]

83
33


 11%|████▌                                     | 11/100 [00:03<00:24,  3.67it/s]

40


 13%|█████▍                                    | 13/100 [00:04<00:23,  3.68it/s]

62
38


 15%|██████▎                                   | 15/100 [00:04<00:21,  3.90it/s]

51
38
27


 17%|███████▏                                  | 17/100 [00:05<00:24,  3.42it/s]

73


 18%|███████▌                                  | 18/100 [00:05<00:25,  3.22it/s]

56


 19%|███████▉                                  | 19/100 [00:06<00:26,  3.01it/s]

58


 20%|████████▍                                 | 20/100 [00:06<00:24,  3.30it/s]

46


 21%|████████▊                                 | 21/100 [00:06<00:24,  3.23it/s]

57


 23%|█████████▋                                | 23/100 [00:07<00:22,  3.42it/s]

67
37


 24%|██████████                                | 24/100 [00:07<00:23,  3.19it/s]

58


 25%|██████████▌                               | 25/100 [00:08<00:24,  3.09it/s]

53


 27%|███████████▎                              | 27/100 [00:08<00:23,  3.09it/s]

59
37


 28%|███████████▊                              | 28/100 [00:09<00:23,  3.05it/s]

57


 29%|████████████▏                             | 29/100 [00:09<00:26,  2.69it/s]

70
45


 31%|█████████████                             | 31/100 [00:10<00:22,  3.01it/s]

54


 32%|█████████████▍                            | 32/100 [00:10<00:24,  2.82it/s]

51


 33%|█████████████▊                            | 33/100 [00:10<00:24,  2.73it/s]

47


 34%|██████████████▎                           | 34/100 [00:11<00:23,  2.76it/s]

52


 35%|██████████████▋                           | 35/100 [00:11<00:22,  2.87it/s]

52
42


 37%|███████████████▌                          | 37/100 [00:11<00:17,  3.65it/s]

44
43


 40%|████████████████▊                         | 40/100 [00:12<00:16,  3.55it/s]

73
43


 41%|█████████████████▏                        | 41/100 [00:13<00:16,  3.59it/s]

53
29


 44%|██████████████████▍                       | 44/100 [00:13<00:11,  5.04it/s]

39
34


 45%|██████████████████▉                       | 45/100 [00:14<00:16,  3.28it/s]

70


 46%|███████████████████▎                      | 46/100 [00:14<00:17,  3.02it/s]

66
44


 49%|████████████████████▌                     | 49/100 [00:15<00:12,  4.22it/s]

42
42


 50%|█████████████████████                     | 50/100 [00:15<00:11,  4.19it/s]

46


 52%|█████████████████████▊                    | 52/100 [00:15<00:11,  4.26it/s]

63
34


 53%|██████████████████████▎                   | 53/100 [00:16<00:11,  3.97it/s]

56


 54%|██████████████████████▋                   | 54/100 [00:16<00:15,  3.04it/s]

76


 55%|███████████████████████                   | 55/100 [00:16<00:13,  3.42it/s]

46


 56%|███████████████████████▌                  | 56/100 [00:17<00:15,  2.77it/s]

77
47


 59%|████████████████████████▊                 | 59/100 [00:17<00:10,  4.00it/s]

44
45


 60%|█████████████████████████▏                | 60/100 [00:18<00:09,  4.31it/s]

44


 62%|██████████████████████████                | 62/100 [00:18<00:08,  4.64it/s]

51
38


 64%|██████████████████████████▉               | 64/100 [00:18<00:06,  5.53it/s]

37
38


 66%|███████████████████████████▋              | 66/100 [00:19<00:05,  6.32it/s]

34
38


 68%|████████████████████████████▌             | 68/100 [00:19<00:07,  4.18it/s]

73
45


 69%|████████████████████████████▉             | 69/100 [00:20<00:10,  3.01it/s]

76


 71%|█████████████████████████████▊            | 71/100 [00:20<00:07,  3.71it/s]

56
38


 72%|██████████████████████████████▏           | 72/100 [00:21<00:06,  4.25it/s]

39


 74%|███████████████████████████████           | 74/100 [00:22<00:10,  2.51it/s]

104
43


 75%|███████████████████████████████▌          | 75/100 [00:22<00:07,  3.21it/s]

32


 76%|███████████████████████████████▉          | 76/100 [00:22<00:08,  2.93it/s]

63


 77%|████████████████████████████████▎         | 77/100 [00:23<00:10,  2.10it/s]

87


 79%|█████████████████████████████████▏        | 79/100 [00:24<00:07,  2.67it/s]

57
34


 80%|█████████████████████████████████▌        | 80/100 [00:24<00:09,  2.10it/s]

52


 81%|██████████████████████████████████        | 81/100 [00:25<00:07,  2.40it/s]

39


 82%|██████████████████████████████████▍       | 82/100 [00:25<00:07,  2.38it/s]

58


 83%|██████████████████████████████████▊       | 83/100 [00:26<00:08,  2.12it/s]

75


 85%|███████████████████████████████████▋      | 85/100 [00:26<00:05,  2.91it/s]

46
41


 86%|████████████████████████████████████      | 86/100 [00:27<00:05,  2.73it/s]

63


 87%|████████████████████████████████████▌     | 87/100 [00:27<00:04,  2.70it/s]

56


 89%|█████████████████████████████████████▍    | 89/100 [00:27<00:03,  3.60it/s]

45
36


 90%|█████████████████████████████████████▊    | 90/100 [00:28<00:03,  3.01it/s]

67


 91%|██████████████████████████████████████▏   | 91/100 [00:28<00:03,  2.72it/s]

56


 92%|██████████████████████████████████████▋   | 92/100 [00:29<00:05,  1.59it/s]

57


 93%|███████████████████████████████████████   | 93/100 [00:30<00:04,  1.71it/s]

44


 94%|███████████████████████████████████████▍  | 94/100 [00:31<00:03,  1.76it/s]

59


 95%|███████████████████████████████████████▉  | 95/100 [00:31<00:02,  1.92it/s]

51


 97%|████████████████████████████████████████▋ | 97/100 [00:32<00:01,  2.54it/s]

51
37


 99%|█████████████████████████████████████████▌| 99/100 [00:32<00:00,  2.83it/s]

66
32


100%|█████████████████████████████████████████| 100/100 [00:33<00:00,  2.97it/s]

104





In [None]:
# Strategy 2
start2_box_real = []

nb_of_iters = 100

G = G_fb
nb_of_nodes_in_resolving_set = 20

length = dict(nx.all_pairs_shortest_path_length(G))

for _ in range(nb_of_iters):
    nodes_to_add = []

    while True:
        intensities = {str(key): 0 for key in G.nodes}
        for i in range(nb_of_iters):
            # Random set of nodes to test
            nodes = nodes_to_add + list(np.random.choice(G.nodes, size=nb_of_nodes_in_resolving_set, replace=False))
            resolved = set_resolved(G, nodes, length)
            for node in resolved:
                intensities[node] += 1

        hardest_node_to_resolve = min(intensities, key=intensities.get)
        nodes_to_add.append(hardest_node_to_resolve)
        if is_resolving_set(G, nodes_to_add, length):
            start2_box_real.append(len(nodes_to_add))
            break

In [200]:
dict_result_real = {'Strategy 1 degree':start1_degree_box_real, 
                    'Strategy 1 betweenness':start1_betweenness_box_real, 
                    'Strategy 2':start2_box_real, 
                    'Random strategy':rand_box_real,
                    'Betweenness strategy':betweenness_box_sol,
                    'Degree strategy':degree_box_sol
                   }

In [205]:
fig = go.Figure()

for group, values in dict_result_real.items():
    fig.add_trace(go.Box(y=values, name=group, boxpoints='all'))

    
fig.update_layout(
    yaxis=dict(
        title="Size of the resolving set",
        titlefont=dict(size=12, color='black')
    )
)
fig.show()

In [53]:
# Here we check if the values of the threshold have a big influence

dict_tresh = {}

tresh_values = [25]
nb_graph = 1
nb_of_iters = 100

G = G_calls
print(len(G))
nb_of_nodes_in_resolving_set = 20

length = dict(nx.all_pairs_shortest_path_length(G))

c = nx.degree_centrality(G)

# normalize the betweenness centralities
c_values = np.array(list(c.values()))
c_norm = c_values / sum(c_values)
node_list = list(c.keys())
for tresh in tresh_values:
    ms_b_box_sol_real = []
    nodes_to_add = []

    while True:
        intensities = {str(key): 0 for key in G.nodes}
        for i in range(100):
            # Random set of nodes to test
            nodes = nodes_to_add + list(np.random.choice(node_list, p=c_norm, size=nb_of_nodes_in_resolving_set, replace=False))
            resolved = set_resolved(G, nodes, length)
            for node in resolved:
                intensities[node] += +1

        small = [key for key, val in intensities.items() if val < tresh]
        s = len(small)
        nodes_to_add += get_nodes_with_diff_neighbors(G, small)
        if s == 0:
            print("We manually add {} nodes".format(len(nodes_to_add)))
            break
    indices = [node_list.index(i) for i in nodes_to_add] 
    node_list_without_indices_to_add = [element for index, element in enumerate(node_list) if index not in indices]
    c_norm_without_indices_to_add = [element for index, element in enumerate(c_norm) if index not in indices]
    c_norm_without_indices_to_add = c_norm_without_indices_to_add / sum(c_norm_without_indices_to_add)
    for _ in tqdm(range(nb_of_iters)):
        
        nodes = nodes_to_add + list(np.random.choice(node_list_without_indices_to_add, p=c_norm_without_indices_to_add, size=len(node_list_without_indices_to_add), replace=False)) # Random set of nodes to test
        for nb in range(len(nodes_to_add), len(G)):
            if is_resolving_set(G, nodes[:nb], length):
                ms_b_box_sol_real.append(nb)
                break
    dict_tresh[tresh] = ms_b_box_sol_real

347
We manually add 62 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 191.79it/s]


In [54]:
fig = go.Figure()

for group, values in dict_tresh.items():
    fig.add_trace(go.Box(y=values, name=group))

fig.show()

## Copenhagen calls graph

In [31]:
# read the file and create a graph
G_calls = nx.read_edgelist('../Real graphs simulations/Copenhagen graphs/calls.csv/edges.csv', delimiter=',', data=(('timestamp', int),('duration', int)))

# print the number of nodes and edges
print('Number of nodes:', G_calls.number_of_nodes())
print('Number of edges:', G_calls.number_of_edges())

Number of nodes: 536
Number of edges: 621


In [32]:
nx.is_connected(G_calls)

False

In [33]:
# Remove the small components such that the graph becomes connected
connected_components = nx.connected_components(G_calls)
biggest = max(connected_components, key=len)
G_calls = G_calls.subgraph(biggest)
nx.is_connected(G_calls)

True

In [34]:
len(G_calls)

347

In [35]:
# Betweenness

betweenness_box_sol_calls = []

nb_of_iters = 100

# Generate the random graph and compute shortest paths
G = G_calls
length = dict(nx.all_pairs_shortest_path_length(G))

c = nx.betweenness_centrality(G, endpoints=True)

# normalize the centralities
c_values = np.array(list(c.values()))
c_norm = c_values / sum(c_values)
node_list = list(c.keys())

for _ in tqdm(range(nb_of_iters)):
    nodes = np.random.choice(node_list, p=c_norm, size=len(G), replace=False) # Random set of nodes to test
    for nb in range(0, len(G)):
        if is_resolving_set(G, nodes[:nb], length):
            betweenness_box_sol_calls.append(nb)
            break

100%|█████████████████████████████████████████| 100/100 [14:09<00:00,  8.49s/it]


In [36]:
# Degree

degree_box_sol_calls = []

nb_of_iters = 100

# Generate the random graph and compute shortest paths
G = G_calls
length = dict(nx.all_pairs_shortest_path_length(G))

c = nx.degree_centrality(G)

# normalize the centralities
c_values = np.array(list(c.values()))
c_norm = c_values / sum(c_values)
node_list = list(c.keys())

for _ in range(nb_of_iters):
    nodes = np.random.choice(node_list, p=c_norm, size=len(G), replace=False) # Random set of nodes to test
    for nb in range(0, len(G)):
        if is_resolving_set(G, nodes[:nb], length):
            degree_box_sol_calls.append(nb)
            break

In [37]:
# Random

nb_of_iters = 100
rand_box_real_calls = []

# Generate the random graph and compute shortest paths
G = G_calls
length = dict(nx.all_pairs_shortest_path_length(G))

for _ in tqdm(range(nb_of_iters)):
    nodes = list(random.sample(list(G.nodes), len(G)))
    for nb in range(0, len(G)):
        if is_resolving_set(G, nodes[:nb], length):
            rand_box_real_calls.append(nb)
            break

100%|█████████████████████████████████████████| 100/100 [04:35<00:00,  2.75s/it]


In [51]:
# Strategy 1 with degree

start1_degree_box_real_calls = []

tresh = 25
nb_of_iters = 100

G = G_calls
nb_of_nodes_in_resolving_set = 20

length = dict(nx.all_pairs_shortest_path_length(G))
c = nx.degree_centrality(G)

# normalize the betweenness centralities
c_values = np.array(list(c.values()))
c_norm = c_values / sum(c_values)
node_list = list(c.keys())
ms_b_box_sol_real = []
nodes_to_add = []

while True:
    intensities = {str(key): 0 for key in G.nodes}
    for i in range(100):
        # Random set of nodes to test
        nodes = nodes_to_add + list(np.random.choice(node_list, p=c_norm, size=nb_of_nodes_in_resolving_set, replace=False))
        resolved = set_resolved(G, nodes, length)
        for node in resolved:
            intensities[node] += +1

    small = [key for key, val in intensities.items() if val < tresh]
    s = len(small)
    nodes_to_add += get_nodes_with_diff_neighbors(G, small)

    if s == 0:
        print("We manually add {} nodes".format(len(nodes_to_add)))
        break
indices = [node_list.index(i) for i in nodes_to_add] 
node_list_without_indices_to_add = [element for index, element in enumerate(node_list) if index not in indices]
c_norm_without_indices_to_add = [element for index, element in enumerate(c_norm) if index not in indices]
c_norm_without_indices_to_add = c_norm_without_indices_to_add / sum(c_norm_without_indices_to_add)

for _ in tqdm(range(nb_of_iters)):
    nodes = nodes_to_add + list(np.random.choice(node_list_without_indices_to_add, p=c_norm_without_indices_to_add, size=len(node_list_without_indices_to_add), replace=False)) # Random set of nodes to test
    for nb in range(len(nodes_to_add), len(G)):
        if is_resolving_set(G, nodes[:nb], length):
            start1_degree_box_real_calls.append(nb)
            break

49
61
61


100%|█████████████████████████████████████████| 100/100 [00:20<00:00,  5.00it/s]


In [64]:
# Strategy 1 with betweenness

start1_betweenness_box_real_calls = []

tresh = 15
nb_of_iters = 100

G = G_calls
nb_of_nodes_in_resolving_set = 20

length = dict(nx.all_pairs_shortest_path_length(G))
c = nx.betweenness_centrality(G, endpoints=True)

# normalize the betweenness centralities
c_values = np.array(list(c.values()))
c_norm = c_values / sum(c_values)
node_list = list(c.keys())
ms_b_box_sol_real = []
nodes_to_add = []

while True:
    intensities = {str(key): 0 for key in G.nodes}
    for i in range(100):
        # Random set of nodes to test
        nodes = nodes_to_add + list(np.random.choice(node_list, p=c_norm, size=nb_of_nodes_in_resolving_set, replace=False))
        resolved = set_resolved(G, nodes, length)
        for node in resolved:
            intensities[node] += +1

    small = [key for key, val in intensities.items() if val < tresh]
    s = len(small)
    nodes_to_add += get_nodes_with_diff_neighbors(G, small)
    if s == 0:
        print("We manually add {} nodes".format(len(nodes_to_add)))
        break
indices = [node_list.index(i) for i in nodes_to_add] 
node_list_without_indices_to_add = [element for index, element in enumerate(node_list) if index not in indices]
c_norm_without_indices_to_add = [element for index, element in enumerate(c_norm) if index not in indices]
c_norm_without_indices_to_add = c_norm_without_indices_to_add / sum(c_norm_without_indices_to_add)

for _ in tqdm(range(nb_of_iters)):

    #print(len(node_list_without_indices_to_add))
    nodes = nodes_to_add + list(np.random.choice(node_list_without_indices_to_add, p=c_norm_without_indices_to_add, size=len(node_list_without_indices_to_add), replace=False)) # Random set of nodes to test
    for nb in range(len(nodes_to_add), len(G)):
        if is_resolving_set(G, nodes[:nb], length):
            start1_betweenness_box_real_calls.append(nb)
            break

We manually add 61 nodes


100%|█████████████████████████████████████████| 100/100 [01:04<00:00,  1.56it/s]


In [40]:
# Strategy 2
start2_box_real_calls = []

nb_of_iters = 100

G = G_calls
nb_of_nodes_in_resolving_set = 20

length = dict(nx.all_pairs_shortest_path_length(G))

for _ in range(nb_of_iters):
    nodes_to_add = []

    while True:
        intensities = {str(key): 0 for key in G.nodes}
        for i in range(nb_of_iters):
            # Random set of nodes to test
            nodes = nodes_to_add + list(np.random.choice(G.nodes, size=nb_of_nodes_in_resolving_set, replace=False))
            resolved = set_resolved(G, nodes, length)
            for node in resolved:
                intensities[node] += 1

        hardest_node_to_resolve = min(intensities, key=intensities.get)
        nodes_to_add.append(hardest_node_to_resolve)
        if is_resolving_set(G, nodes_to_add, length):
            start2_box_real_calls.append(len(nodes_to_add))
            break

In [65]:
dict_result_real = {'Strategy 1 degree':start1_degree_box_real_calls, 
                    'Strategy 1 betweenness':start1_betweenness_box_real_calls, 
                    'Strategy 2':start2_box_real_calls, 
                    'Random strategy':rand_box_real_calls,
                    'Betweenness strategy':betweenness_box_sol_calls,
                    'Degree strategy':degree_box_sol_calls
                   }

In [66]:
fig = go.Figure()

for group, values in dict_result_real.items():
    fig.add_trace(go.Box(y=values, name=group, boxpoints='all'))

    
fig.update_layout(
    yaxis=dict(
        title="Size of the resolving set",
        titlefont=dict(size=12, color='black')
    )
)
fig.show()

In [69]:
len(G_fb)

800