The goal of this notebook is to find strategies that are able to find resolving sets as small as possible. We start with the Barabàsi-Albert model and then simulate the strategies on real graphs.

In [None]:
import networkx as nx
import random
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import pickle
import sys
sys.path.append('../')
from helpers import *
import plotly.graph_objs as go
%load_ext autoreload
%autoreload 2

## Barabási–Albert model

In [None]:
# By default, we will use the following parameters
# n = 500
# m = 30
# nb_graph = 1
# nb_of_iters = 100

# can be used to speed up simulations as we know the theoritical value
# lower_bound = 60
# upper_bound = 160

In [None]:
def sim_centrality(centrality_f, lower_bound=20, upper_bound=110, n=500, m=30, nb_graph=1, nb_of_iters=100, seed=0):
    solutions_prob_c = {}
    random.seed(seed)
    for _ in range(nb_graph):
        # Generate the random graph and compute shortest paths
        G = nx.barabasi_albert_graph(n, m)
        length = dict(nx.all_pairs_shortest_path_length(G))

        c = centrality_f(G)

        # normalize the betweenness centralities
        c_values = np.array(list(c.values()))
        c_norm = c_values / sum(c_values)
        node_list = list(c.keys())

        for nb in range(0, lower_bound):
            solutions_prob_c[nb] = solutions_prob_c.get(nb, 0) + 0  
        for nb in tqdm(range(lower_bound, upper_bound)): # G.number_of_nodes()
            num_nodes = nb # Number of nodes to sample
            count = 0
            for i in range(nb_of_iters):
                nodes = set(np.random.choice(node_list, p=c_norm, size=num_nodes, replace=False)) # Random set of nodes to test
                if is_resolving_set(G, nodes, length):
                    count += 1
            solutions_prob_c[nb] =  solutions_prob_c.get(nb, 0) + (count / nb_of_iters)
        for nb in range(upper_bound, n):
                solutions_prob_c[nb] = solutions_prob_c.get(nb, 0) + 1

    for i in range(n):
        solutions_prob_c[i] = solutions_prob_c[i] / nb_graph
        
    return solutions_prob_c

In [None]:
solutions_prob_bc_2 = sim_centrality(nx.betweenness_centrality, lower_bound=20, upper_bound=80, nb_graph=10)

In [None]:
solutions_prob_eigen = sim_centrality(nx.eigenvector_centrality, lower_bound=30, upper_bound=90, nb_graph=10)

In [None]:
solutions_prob_dc = sim_centrality(nx.degree_centrality, lower_bound=30, upper_bound=90, nb_graph=10)

In [None]:
solutions_prob_cc = sim_centrality(nx.closeness_centrality, lower_bound=20, upper_bound=100, nb_graph=10)

In [None]:
solutions_prob_flow_bc = sim_centrality(nx.current_flow_betweenness_centrality, lower_bound=30, upper_bound=90, nb_graph=10)

In [None]:
solutions_prob_so = sim_centrality(nx.second_order_centrality, lower_bound=25, upper_bound=90, nb_graph=10)

In [None]:
# Random strategy

n = 500
m = 30
nb_graph = 10
s = {}
nb_of_iters = 100

# can be used to speed up simulations as we know the theoritical value
lower_bound = 20
upper_bound = 100

solutions_rand = {}
random.seed(0)
for _ in range(nb_graph):
    # Generate the random graph and compute shortest paths
    G = nx.barabasi_albert_graph(n, m)
    length = dict(nx.all_pairs_shortest_path_length(G))
    
    for nb in range(0, lower_bound):
        solutions_rand[nb] = solutions_rand.get(nb, 0) + 0  
    for nb in tqdm(range(lower_bound, upper_bound)): # G.number_of_nodes()
        num_nodes = nb # Number of nodes to sample
        node_list = list(G.nodes())
        count = 0
        for i in range(nb_of_iters):
            nodes = set(random.sample(node_list, num_nodes)) # Random set of nodes to test
            if is_resolving_set(G, nodes, length):
                count += 1
        solutions_rand[nb] =  solutions_rand.get(nb, 0) + (count / nb_of_iters)
    for nb in range(upper_bound, n):
            solutions_rand[nb] = solutions_rand.get(nb, 0) + 1

for i in range(n):
    solutions_rand[i] = solutions_rand[i] / nb_graph

In [None]:
# Strategy where we draw only from first quntile (degree)

n = 500
m = 30
nb_of_iters = 100
nb_graphs = 10

solutions_first_q = {}

lower_bound = 20
upper_bound = 90

number_of_quintile = 5

for _ in range(nb_graphs):
    G = nx.barabasi_albert_graph(n, m)

    node_list = list(G.nodes())

    degree_list = [(n, d) for n, d in G.degree()]
    degree_list.sort(key=lambda x: x[1], reverse=True)
    num_vertices = len(degree_list)
    num_vertices_per_decile = n // number_of_quintile
    decile_num = 1
    decile_vertices = []

    for i in range(n):
        vertex = degree_list[i][0]
        degree = degree_list[i][1]
        decile_vertices.append(vertex)
        if (i + 1) % num_vertices_per_decile == 0:
            nx.set_node_attributes(G, {v: decile_num for v in decile_vertices}, 'decile')
            decile_num += 1
            decile_vertices = []

    length = dict(nx.all_pairs_shortest_path_length(G))

    quintile = 1      
    nodes_in_decile = [node for node, decile in nx.get_node_attributes(G, 'decile').items() if decile == quintile]
    
    
    for nb in range(0, lower_bound):
        solutions_first_q[nb] = solutions_first_q.get(nb, 0) + 0  
    for nb in tqdm(range(lower_bound, upper_bound)): # G.number_of_nodes()
        num_nodes = nb # Number of nodes to sample
        node_list = list(G.nodes())
        count = 0
        for i in range(nb_of_iters):
            nodes = set(random.sample(nodes_in_decile, num_nodes)) # Random set of nodes to test
            if is_resolving_set(G, nodes, length):
                count += 1
        solutions_first_q[nb] =  solutions_first_q.get(nb, 0) + (count / nb_of_iters)
    for nb in range(upper_bound, n):
            solutions_first_q[nb] = solutions_first_q.get(nb, 0) + 1
    
for i in range(n):
    solutions_first_q[i] = solutions_first_q[i] / nb_graphs

In [None]:
for i in range(n):
    solutions_first_q[i] = solutions_first_q[i] / nb_graphs

In [None]:
#with open('simulations/transition_eigen_centrality.pickle', 'wb') as file:
    #pickle.dump(solutions_prob_eigen, file, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
#Can be used to directyl import the results of the simulations


#with open('simulations/Barbasi-Albert/transition_random.pickle', "rb") as file:
    #solutions_rand = pickle.load(file)
#with open('simulations/Barbasi-Albert/transition_betweenness_centrality.pickle', "rb") as file:
    #solutions_prob_bc = pickle.load(file)
#with open('simulations/Barbasi-Albert/transition_degree_centrality.pickle', "rb") as file:
    #solutions_prob_dc = pickle.load(file)
#with open('simulations/Barbasi-Albert/transition_so_centrality.pickle', "rb") as file:
    #solutions_prob_so = pickle.load(file)
#with open('simulations/Barbasi-Albert/transition_closness_centrality.pickle', "rb") as file:
    #solutions_prob_cc = pickle.load(file)
#with open('simulations/Barbasi-Albert/transition_eigen_centrality.pickle', "rb") as file:
    #solutions_prob_eigen = pickle.load(file)

In [None]:
# Define your data
x = list(solutions_rand.keys())
y = list(solutions_rand.values())

x1 = list(solutions_first_q.keys())
y1 = list(solutions_first_q.values())

x3 = list(solutions_prob_bc.keys())
y3 = list(solutions_prob_bc.values())

x4 = list(solutions_prob_dc.keys())
y4 = list(solutions_prob_dc.values())

x5 = list(solutions_prob_so.keys())
y5 = list(solutions_prob_so.values())

x6 = list(solutions_prob_cc.keys())
y6 = list(solutions_prob_cc.values())

x8 = list(solutions_prob_eigen.keys())
y8 = list(solutions_prob_eigen.values())

# Define the trace for the scatter plot
trace = go.Scatter(x=x, y=y, mode='markers+lines', name='Random nodes')
trace1 = go.Scatter(x=x1, y=y1, mode='markers+lines', name='Nodes with high degree (top 20%)')
trace3 = go.Scatter(x=x3, y=y3, mode='markers+lines', name='Betweenness centrality')
trace4 = go.Scatter(x=x4, y=y4, mode='markers+lines', name='Degree centrality')
trace5 = go.Scatter(x=x5, y=y5, mode='markers+lines', name='Second order centrality')
trace6 = go.Scatter(x=x6, y=y6, mode='markers+lines', name='Closeness centrality')
trace8 = go.Scatter(x=x8, y=y8, mode='markers+lines', name='Eigenvector centrality')


# Define the layout
layout = go.Layout(#title='Probability of resolving the graph as a function of the subset cardinality', 
                   #title_x=0.5,
                   xaxis=dict(title='Cardinality of the subset'), 
                   yaxis=dict(title='Probability of resolving the graph'),
                   legend=dict(x=0.67, y=0.08, orientation='v'))

# Combine the traces and layout into a figure
fig = go.Figure(data=[trace, trace1, trace3, trace4, trace5, trace6, trace8], layout=layout)

# Show the figure
fig.show()

In [None]:
def sim_centrality_box(centrality_f, n=500, m=30, nb_graph=15, nb_of_iters=100, seed=0):
    
    box_sol = []
    random.seed(seed)
    for _ in tqdm(range(nb_graph)):
    
        # Generate the random graph and compute shortest paths
        G = nx.barabasi_albert_graph(n, m)
        length = dict(nx.all_pairs_shortest_path_length(G))

        c = centrality_f(G)

        # normalize the betweenness centralities
        c_values = np.array(list(c.values()))
        c_norm = c_values / sum(c_values)
        node_list = list(c.keys())
        for _ in range(nb_of_iters):
            nodes = np.random.choice(node_list, p=c_norm, size=len(G), replace=False) # Random set of nodes to test
            for nb in range(0, len(G) + 1):
                #print(nb)
                if is_resolving_set(G, nodes[:nb], length):
                    box_sol.append(nb)
                    break
    return box_sol

In [None]:
# Random

n = 500
m = 30
nb_graph = 15
nb_of_iters = 100

rand_box_sol = []
random.seed(0)
for _ in range(nb_graph):

    # Generate the random graph and compute shortest paths
    G = nx.barabasi_albert_graph(n, m)
    length = dict(nx.all_pairs_shortest_path_length(G))

    for _ in tqdm(range(nb_of_iters)):
        nodes = list(random.sample(list(G.nodes), n))
        for nb in range(0, len(G)):
            if is_resolving_set(G, nodes[:nb], length):
                rand_box_sol.append(nb)
                break

In [None]:
dc_box_sol = sim_centrality_box(nx.degree_centrality)

In [None]:
bc_box_sol = sim_centrality_box(nx.betweenness_centrality)

In [None]:
cc_box_sol = sim_centrality_box(nx.closeness_centrality)

In [None]:
so_box_sol = sim_centrality_box(nx.second_order_centrality)

In [None]:
eigen_box_sol = sim_centrality_box(nx.eigenvector_centrality)

In [None]:
dict_result = {'Betweenness centrality': bc_box_sol, 'Degree centrality': dc_box_sol, 'Closeness centrality': cc_box_sol , 'Random': rand_box_sol, 'Second order centrality': so_box_sol, }

In [None]:
#with open('simulations/Barbasi-Albert/centrality_boxes_15graph_100iters.pickle', "rb") as file:
    #dict_result = pickle.load(file)

In [None]:
#with open('simulations/centrality_boxes_15graph_100iters.pickle', 'wb') as file:
    #pickle.dump(dict_result, file, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
fig = go.Figure()

for group, values in dict_result2.items():
    fig.add_trace(go.Box(y=values, name=group))

fig.update_layout(
    yaxis=dict(
        title="Size of the resolving set",
        titlefont=dict(size=12, color='black')
    )
)       
    
fig.show()

## Copenhagen fb_friends graph

In [None]:
# read the file and create a graph
G_fb = nx.read_edgelist('../Real graphs simulations/Copenhagen graphs/fb_friends.csv/edges.csv', delimiter=',')

# print the number of nodes and edges
print('Number of nodes:', G_fb.number_of_nodes())
print('Number of edges:', G_fb.number_of_edges())

In [None]:
def sim_centrality_real_g(centrality_f, lower_bound=20, upper_bound=800, nb_of_iters=400, seed=0, step=10):
    solutions_prob_c = {}
    random.seed(seed)
    for _ in range(nb_graph):
        # Generate the random graph and compute shortest paths
        G = G_fb
        length = dict(nx.all_pairs_shortest_path_length(G))

        c = centrality_f(G)

        # normalize the betweenness centralities
        c_values = np.array(list(c.values()))
        c_norm = c_values / sum(c_values)
        node_list = list(c.keys())

        for nb in range(0, lower_bound):
            solutions_prob_c[nb] = solutions_prob_c.get(nb, 0) + 0  
        for nb in tqdm(range(lower_bound, upper_bound+1, step)): # G.number_of_nodes()
            num_nodes = nb # Number of nodes to sample
            count = 0
            for i in range(nb_of_iters):
                nodes = set(np.random.choice(node_list, p=c_norm, size=num_nodes, replace=False)) # Random set of nodes to test
                if is_resolving_set(G, nodes, length):
                    count += 1
            solutions_prob_c[nb] =  solutions_prob_c.get(nb, 0) + (count / nb_of_iters)
        for nb in range(upper_bound+1, G_fb.number_of_nodes()+1):
                solutions_prob_c[nb] = solutions_prob_c.get(nb, 0) + 1
        
    return solutions_prob_c

In [None]:
solutions_prob_eigen_fb = sim_centrality_real_g(nx.eigenvector_centrality)

In [None]:
solutions_prob_dc_fb = sim_centrality_real_g(nx.degree_centrality)

In [None]:
solutions_prob_cc_fb = sim_centrality_real_g(nx.closeness_centrality)

In [None]:
solutions_prob_so_fb = sim_centrality_real_g(nx.second_order_centrality)

In [None]:
# Random strategy

nb_graph = 1
s = {}
nb_of_iters = 800
step = 10

# can be used to speed up simulations
lower_bound = 20
upper_bound = 800

solutions_rand_fb = {}
random.seed(0)
for _ in range(nb_graph):
    # Generate the random graph and compute shortest paths
    G = G_fb
    length = dict(nx.all_pairs_shortest_path_length(G))
    
    for nb in range(0, lower_bound):
        solutions_rand_fb[nb] = solutions_rand_fb.get(nb, 0) + 0  
    for nb in tqdm(range(lower_bound, upper_bound+1, step)):
        num_nodes = nb # Number of nodes to sample
        node_list = list(G.nodes())
        count = 0
        for i in range(nb_of_iters):
            nodes = set(random.sample(node_list, num_nodes)) # Random set of nodes to test
            if is_resolving_set(G, nodes, length):
                count += 1
        solutions_rand_fb[nb] =  solutions_rand_fb.get(nb, 0) + (count / nb_of_iters)
    for nb in range(upper_bound+1, G_fb.number_of_nodes()+1):
            solutions_rand_fb[nb] = solutions_rand_fb.get(nb, 0) + 1

In [None]:
# Betweenness centrality

# can be used to speed up simulations
lower_bound = 20
upper_bound = 800
nb_of_iters = 800
step = 10
solutions_prob_bc_fb = {}
for _ in range(nb_graph):
    # Generate the random graph and compute shortest paths
    G = G_fb
    length = dict(nx.all_pairs_shortest_path_length(G))

    c = nx.betweenness_centrality(G, endpoints=True)

    # normalize the betweenness centralities
    c_values = np.array(list(c.values()))
    c_norm = c_values / sum(c_values)
    node_list = list(c.keys())

    for nb in range(0, lower_bound):
        solutions_prob_bc_fb[nb] = solutions_prob_bc_fb.get(nb, 0) + 0  
    for nb in tqdm(range(lower_bound, upper_bound+1, step)): # G.number_of_nodes()
        num_nodes = nb # Number of nodes to sample
        count = 0
        for i in range(nb_of_iters):
            nodes = set(np.random.choice(node_list, p=c_norm, size=num_nodes, replace=False)) # Random set of nodes to test
            if is_resolving_set(G, nodes, length):
                count += 1
        solutions_prob_bc_fb[nb] = solutions_prob_bc_fb.get(nb, 0) + (count / nb_of_iters)
    for nb in range(upper_bound+1, G_fb.number_of_nodes()+1):
            solutions_prob_bc_fb[nb] = solutions_prob_bc_fb.get(nb, 0) + 1


In [None]:
# Boost the proba of low degree nodes

lower_bound = 20
upper_bound = 40

solutions_prob_ms_d = {}
for _ in range(nb_graph):
    # Generate the random graph and compute shortest paths
    G = G_fb
    length = dict(nx.all_pairs_shortest_path_length(G))

    c = nx.degree_centrality(G)
    a = list(set(c.values()))
    a.sort()
    c = {key: 1000*max(c.values()) if value in a[:2] else value for key, value in c.items()}
    # normalize the betweenness centralities
    c_values = np.array(list(c.values()))
    c_norm = c_values / sum(c_values)
    node_list = list(c.keys())

    for nb in range(0, lower_bound):
        solutions_prob_ms_d[nb] = solutions_prob_ms_d.get(nb, 0) + 0  
    for nb in tqdm(range(lower_bound, upper_bound, 1)): # G.number_of_nodes()
        num_nodes = nb # Number of nodes to sample
        count = 0
        for i in range(nb_of_iters):
            nodes = set(np.random.choice(node_list, p=c_norm, size=num_nodes, replace=False)) # Random set of nodes to test
            if is_resolving_set(G, nodes, length):
                count += 1
        solutions_prob_ms_d[nb] =  solutions_prob_ms_d.get(nb, 0) + (count / nb_of_iters)
    for nb in range(upper_bound, G_fb.number_of_nodes()):
            solutions_prob_ms_d[nb] = solutions_prob_ms_d.get(nb, 0) + 1


In [None]:
# Boost the proba of low betweenness nodes

lower_bound = 20
upper_bound = 40

solutions_prob_ms_bc = {}
for _ in range(nb_graph):
    # Generate the random graph and compute shortest paths
    G = G_fb
    length = dict(nx.all_pairs_shortest_path_length(G))

    c = nx.betweenness_centrality(G)
    a = list(set(c.values()))
    a.sort()
    #c = {key: -(v-max(c.values())-min(c.values())) for key, v in c.items()}
    c = {key: 1000*max(c.values()) if value in a[:1] else value for key, value in c.items()}
    # normalize the betweenness centralities
    c_values = np.array(list(c.values()))
    c_norm = c_values / sum(c_values)
    node_list = list(c.keys())

    for nb in range(0, lower_bound):
        solutions_prob_ms_bc[nb] = solutions_prob_ms_bc.get(nb, 0) + 0  
    for nb in tqdm(range(lower_bound, upper_bound, 1)): # G.number_of_nodes()
        num_nodes = nb # Number of nodes to sample
        count = 0
        for i in range(nb_of_iters):
            nodes = set(np.random.choice(node_list, p=c_norm, size=num_nodes, replace=False)) # Random set of nodes to test
            if is_resolving_set(G, nodes, length):
                count += 1
        solutions_prob_ms_bc[nb] =  solutions_prob_ms_bc.get(nb, 0) + (count / nb_of_iters)
    for nb in range(upper_bound, G_fb.number_of_nodes()):
            solutions_prob_ms_bc[nb] = solutions_prob_ms_bc.get(nb, 0) + 1


In [None]:
# Strategy 1 with degree

lower_bound = 20
upper_bound = 40

solutions_prob_ms_bc_add = {}
for _ in range(nb_graph):
    # Generate the random graph and compute shortest paths
    G = G_fb

    nb_of_nodes_in_resolving_set = 20

    length = dict(nx.all_pairs_shortest_path_length(G))

    c = nx.degree_centrality(G)

    intensities = {str(key): 0 for key in G.nodes}

    # normalize the betweenness centralities
    c_values = np.array(list(c.values()))
    c_norm = c_values / sum(c_values)
    node_list = list(c.keys())
    
    for i in range(100):
        # Random set of nodes to test
        nodes = set(np.random.choice(node_list, p=c_norm, size=nb_of_nodes_in_resolving_set, replace=False))
        resolved = set_resolved(G, nodes, length)
        for node in resolved:
            intensities[node] += +1
    
    small = [key for key, val in intensities.items() if val < 40]
    nodes_to_add = get_nodes_with_diff_neighbors(G, small)
    
    for nb in range(0, lower_bound):
        solutions_prob_ms_bc_add[nb] = solutions_prob_ms_bc_add.get(nb, 0) + 0  
    for nb in tqdm(range(lower_bound, upper_bound, 1)): # G.number_of_nodes()
        num_nodes = nb - len(nodes_to_add) # Number of nodes to sample
        count = 0
        for i in range(nb_of_iters):
            nodes = nodes_to_add + list(np.random.choice(node_list, p=c_norm, size=num_nodes, replace=False)) # Random set of nodes to test
            if is_resolving_set(G, nodes, length):
                count += 1
        solutions_prob_ms_bc_add[nb] =  solutions_prob_ms_bc_add.get(nb, 0) + (count / nb_of_iters)
    for nb in range(upper_bound, G_fb.number_of_nodes()):
            solutions_prob_ms_bc_add[nb] = solutions_prob_ms_bc_add.get(nb, 0) + 1

In [None]:
# Strategy 1 with betweenness

lower_bound = 20
upper_bound = 40
nb_of_iters = 4
solutions_prob_ms_bc_add = {}

G = G_fb
nodes_to_add = []
nb_of_nodes_in_resolving_set = 20

length = dict(nx.all_pairs_shortest_path_length(G))

c = nx.betweenness_centrality(G)

# normalize the betweenness centralities
c_values = np.array(list(c.values()))
c_norm = c_values / sum(c_values)
node_list = list(c.keys())

while True:
    intensities = {str(key): 0 for key in G.nodes}
    for i in range(100):
        # Random set of nodes to test
        nodes = set(np.random.choice(node_list, p=c_norm, size=nb_of_nodes_in_resolving_set, replace=False))
        resolved = set_resolved(G, nodes, length)
        for node in resolved:
            intensities[node] += +1

    small = [key for key, val in intensities.items() if val < 40]
    s = len(small)
    nodes_to_add += get_nodes_with_diff_neighbors(G, small)
    if s == 0:
        break
indices = [node_list.index(i) for i in nodes_to_add] 
node_list = [element for index, element in enumerate(node_list) if index not in indices]
c_norm = [element for index, element in enumerate(c_norm) if index not in indices]
c_norm = c_norm / sum(c_norm)

for nb in range(0, len(nodes_to_add)):
    solutions_prob_ms_bc_add[nb] = solutions_prob_ms_bc_add.get(nb, 0) + 0  
for nb in tqdm(range(len(nodes_to_add), upper_bound, 1)): # G.number_of_nodes()
    num_nodes = nb - len(nodes_to_add) # Number of nodes to sample
    count = 0
    for i in range(nb_of_iters):
        nodes = nodes_to_add + list(np.random.choice(node_list, p=c_norm, size=num_nodes, replace=False)) # Random set of nodes to test
        if is_resolving_set(G, nodes, length):
            count += 1
    solutions_prob_ms_bc_add[nb] =  solutions_prob_ms_bc_add.get(nb, 0) + (count / nb_of_iters)
for nb in range(upper_bound, G_fb.number_of_nodes()):
        solutions_prob_ms_bc_add[nb] = solutions_prob_ms_bc_add.get(nb, 0) + 1

In [None]:
#with open('simulations/fb_transition_eigen_400iters_10step.pickle', 'wb') as file:
    #pickle.dump(solutions_prob_eigen_fb, file, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
# Define your data
x = list(solutions_rand_fb.keys())
y = list(solutions_rand_fb.values())


x2 = list(solutions_prob_d.keys())
y2 = list(solutions_prob_d.values())

x3 = list(solutions_prob_bc_fb.keys())
y3 = list(solutions_prob_bc_fb.values())

x4 = list(solutions_prob_dc_fb.keys())
y4 = list(solutions_prob_dc_fb.values())

x5 = list(solutions_prob_so_fb.keys())
y5 = list(solutions_prob_so_fb.values())

x6 = list(solutions_prob_cc_fb.keys())
y6 = list(solutions_prob_cc_fb.values())

x7 = list(solutions_prob_ms_bc_add.keys())
y7 = list(solutions_prob_ms_bc_add.values())

x8 = list(solutions_prob_eigen_fb.keys())
y8 = list(solutions_prob_eigen_fb.values())

x9 = list(solutions_prob_ms_d.keys())
y9 = list(solutions_prob_ms_d.values())

x10 = list(solutions_prob_ms_bc.keys())
y10 = list(solutions_prob_ms_bc.values())

# Define the trace for the scatter plot
trace = go.Scatter(x=x, y=y, mode='markers+lines', name='Random vertices')
trace2 = go.Scatter(x=x2, y=y2, mode='markers+lines', name='Random vertices (prop to degree)')
trace3 = go.Scatter(x=x3, y=y3, mode='markers+lines', name='Betweeness centrality')
trace4 = go.Scatter(x=x4, y=y4, mode='markers+lines', name='Degree centrality')
trace5 = go.Scatter(x=x5, y=y5, mode='markers+lines', name='Second order centrality)')
trace6 = go.Scatter(x=x6, y=y6, mode='markers+lines', name='Closness centrality')
trace7 = go.Scatter(x=x7, y=y7, mode='markers+lines', name='Magic sauce (betweenness + trick)')
trace8 = go.Scatter(x=x8, y=y8, mode='markers+lines', name='Eigenvector centrality')
trace9 = go.Scatter(x=x9, y=y9, mode='markers+lines', name='Magic Sauce (degree)')
trace10 = go.Scatter(x=x10, y=y10, mode='markers+lines', name='Magic sauce (betweenness)')

# Define the layout
layout = go.Layout(#title='Probability of resolving the graph as a function of the subset cardinality', 
                   #title_x=0.5,
                   xaxis=dict(title='Cardinality of the subset'), 
                   yaxis=dict(title='Probability of resolving the graph'))
                   #legend=dict(x=0.67, y=0.08, orientation='v'))

# Combine the traces and layout into a figure
fig = go.Figure(data=[trace, trace1, trace3, trace4, trace5, trace6, trace7, trace8, trace9, trace10], layout=layout)

# Show the figure
fig.show()

In [None]:
def sim_centrality_box_real(G, centrality_f, nb_graph=1, nb_of_iters=400):
    
    box_sol = []
    
    # Generate the random graph and compute shortest paths
    G = G_fb
    length = dict(nx.all_pairs_shortest_path_length(G))

    c = centrality_f(G)

    # normalize the centralities
    c_values = np.array(list(c.values()))
    c_norm = c_values / sum(c_values)
    node_list = list(c.keys())

    for _ in range(nb_of_iters):
        nodes = np.random.choice(node_list, p=c_norm, size=len(G), replace=False) # Random set of nodes to test
        for nb in range(0, len(G)):
            if is_resolving_set(G, nodes[:nb], length):
                box_sol.append(nb)
                break
                    
    return box_sol

In [None]:
# Betweenness

betweenness_box_sol = []

nb_of_iters = 100

# Generate the random graph and compute shortest paths
G = G_fb
length = dict(nx.all_pairs_shortest_path_length(G))

c = nx.betweenness_centrality(G, endpoints=True)

# normalize the centralities
c_values = np.array(list(c.values()))
c_norm = c_values / sum(c_values)
node_list = list(c.keys())

for _ in tqdm(range(nb_of_iters)):
    nodes = np.random.choice(node_list, p=c_norm, size=len(G), replace=False) # Random set of nodes to test
    for nb in range(0, len(G)):
        if is_resolving_set(G, nodes[:nb], length):
            betweenness_box_sol.append(nb)
            break

In [None]:
# Degree

degree_box_sol = []

nb_of_iters = 100

# Generate the random graph and compute shortest paths
G = G_fb
length = dict(nx.all_pairs_shortest_path_length(G))

c = nx.degree_centrality(G)

# normalize the centralities
c_values = np.array(list(c.values()))
c_norm = c_values / sum(c_values)
node_list = list(c.keys())

for _ in range(nb_of_iters):
    nodes = np.random.choice(node_list, p=c_norm, size=len(G), replace=False) # Random set of nodes to test
    for nb in range(0, len(G)):
        if is_resolving_set(G, nodes[:nb], length):
            degree_box_sol.append(nb)
            break

In [None]:
# Random

nb_of_iters = 100
rand_box_real = []

# Generate the random graph and compute shortest paths
G = G_fb
length = dict(nx.all_pairs_shortest_path_length(G))

for _ in tqdm(range(nb_of_iters)):
    nodes = list(random.sample(list(G.nodes), len(G)))
    for nb in range(0, len(G)):
        if is_resolving_set(G, nodes[:nb], length):
            rand_box_real.append(nb)
            print(nb)
            break

In [None]:
# Strategy 1 with degree

start1_degree_box_real = []

tresh = 50
nb_of_iters = 100

G = G_fb
nb_of_nodes_in_resolving_set = 20

length = dict(nx.all_pairs_shortest_path_length(G))
c = nx.degree_centrality(G)

# normalize the betweenness centralities
c_values = np.array(list(c.values()))
c_norm = c_values / sum(c_values)
node_list = list(c.keys())
ms_b_box_sol_real = []
nodes_to_add = []

while True:
    intensities = {str(key): 0 for key in G.nodes}
    for i in range(100):
        # Random set of nodes to test
        nodes = nodes_to_add + list(np.random.choice(node_list, p=c_norm, size=nb_of_nodes_in_resolving_set, replace=False))
        resolved = set_resolved(G, nodes, length)
        for node in resolved:
            intensities[node] += +1

    small = [key for key, val in intensities.items() if val < tresh]
    s = len(small)
    nodes_to_add += get_nodes_with_diff_neighbors(G, small)
    if s == 0:
        break
indices = [node_list.index(i) for i in nodes_to_add] 
node_list_without_indices_to_add = [element for index, element in enumerate(node_list) if index not in indices]
c_norm_without_indices_to_add = [element for index, element in enumerate(c_norm) if index not in indices]
c_norm_without_indices_to_add = c_norm_without_indices_to_add / sum(c_norm_without_indices_to_add)

for _ in tqdm(range(nb_of_iters)):
    nodes = nodes_to_add + list(np.random.choice(node_list_without_indices_to_add, p=c_norm_without_indices_to_add, size=len(node_list_without_indices_to_add), replace=False)) # Random set of nodes to test
    for nb in range(len(nodes_to_add), len(G)):
        if is_resolving_set(G, nodes[:nb], length):
            start1_degree_box_real.append(nb)
            break

In [None]:
# Strategy 1 with betweenness

start1_betweenness_box_real = []

tresh = 50
nb_of_iters = 100

G = G_fb
nb_of_nodes_in_resolving_set = 20

length = dict(nx.all_pairs_shortest_path_length(G))
c = nx.betweenness_centrality(G, endpoints=True)

# normalize the betweenness centralities
c_values = np.array(list(c.values()))
c_norm = c_values / sum(c_values)
node_list = list(c.keys())
ms_b_box_sol_real = []
nodes_to_add = []

while True:
    intensities = {str(key): 0 for key in G.nodes}
    for i in range(100):
        # Random set of nodes to test
        nodes = nodes_to_add + list(np.random.choice(node_list, p=c_norm, size=nb_of_nodes_in_resolving_set, replace=False))
        resolved = set_resolved(G, nodes, length)
        for node in resolved:
            intensities[node] += +1

    small = [key for key, val in intensities.items() if val < tresh]
    s = len(small)
    nodes_to_add += get_nodes_with_diff_neighbors(G, small)
    if s == 0:
        break
indices = [node_list.index(i) for i in nodes_to_add] 
node_list_without_indices_to_add = [element for index, element in enumerate(node_list) if index not in indices]
c_norm_without_indices_to_add = [element for index, element in enumerate(c_norm) if index not in indices]
c_norm_without_indices_to_add = c_norm_without_indices_to_add / sum(c_norm_without_indices_to_add)

for _ in tqdm(range(nb_of_iters)):

    #print(len(node_list_without_indices_to_add))
    nodes = nodes_to_add + list(np.random.choice(node_list_without_indices_to_add, p=c_norm_without_indices_to_add, size=len(node_list_without_indices_to_add), replace=False)) # Random set of nodes to test
    for nb in range(len(nodes_to_add), len(G)):
        if is_resolving_set(G, nodes[:nb], length):
            start1_betweenness_box_real.append(nb)
            break

In [None]:
# Strategy 2
start2_box_real = []

nb_of_iters = 100

G = G_fb
nb_of_nodes_in_resolving_set = 20

length = dict(nx.all_pairs_shortest_path_length(G))

for _ in range(nb_of_iters):
    nodes_to_add = []

    while True:
        intensities = {str(key): 0 for key in G.nodes}
        for i in range(nb_of_iters):
            # Random set of nodes to test
            nodes = nodes_to_add + list(np.random.choice(G.nodes, size=nb_of_nodes_in_resolving_set, replace=False))
            resolved = set_resolved(G, nodes, length)
            for node in resolved:
                intensities[node] += 1

        hardest_node_to_resolve = min(intensities, key=intensities.get)
        nodes_to_add.append(hardest_node_to_resolve)
        if is_resolving_set(G, nodes_to_add, length):
            start2_box_real.append(len(nodes_to_add))
            break

In [None]:
dict_result_real = {'Strategy 1 degree':start1_degree_box_real, 
                    'Strategy 1 betweenness':start1_betweenness_box_real, 
                    'Strategy 2':start2_box_real, 
                    'Random strategy':rand_box_real,
                    'Betweenness strategy':betweenness_box_sol,
                    'Degree strategy':degree_box_sol
                   }

In [None]:
fig = go.Figure()

for group, values in dict_result_real.items():
    fig.add_trace(go.Box(y=values, name=group, boxpoints='all'))

    
fig.update_layout(
    yaxis=dict(
        title="Size of the resolving set",
        titlefont=dict(size=12, color='black')
    )
)
fig.show()

In [None]:
# Here we check if the values of the threshold have a big influence

dict_tresh = {}

tresh_values = [25]
nb_graph = 1
nb_of_iters = 100

G = G_calls
print(len(G))
nb_of_nodes_in_resolving_set = 20

length = dict(nx.all_pairs_shortest_path_length(G))

c = nx.degree_centrality(G)

# normalize the betweenness centralities
c_values = np.array(list(c.values()))
c_norm = c_values / sum(c_values)
node_list = list(c.keys())
for tresh in tresh_values:
    ms_b_box_sol_real = []
    nodes_to_add = []

    while True:
        intensities = {str(key): 0 for key in G.nodes}
        for i in range(100):
            # Random set of nodes to test
            nodes = nodes_to_add + list(np.random.choice(node_list, p=c_norm, size=nb_of_nodes_in_resolving_set, replace=False))
            resolved = set_resolved(G, nodes, length)
            for node in resolved:
                intensities[node] += +1

        small = [key for key, val in intensities.items() if val < tresh]
        s = len(small)
        nodes_to_add += get_nodes_with_diff_neighbors(G, small)
        if s == 0:
            print("We manually add {} nodes".format(len(nodes_to_add)))
            break
    indices = [node_list.index(i) for i in nodes_to_add] 
    node_list_without_indices_to_add = [element for index, element in enumerate(node_list) if index not in indices]
    c_norm_without_indices_to_add = [element for index, element in enumerate(c_norm) if index not in indices]
    c_norm_without_indices_to_add = c_norm_without_indices_to_add / sum(c_norm_without_indices_to_add)
    for _ in tqdm(range(nb_of_iters)):
        
        nodes = nodes_to_add + list(np.random.choice(node_list_without_indices_to_add, p=c_norm_without_indices_to_add, size=len(node_list_without_indices_to_add), replace=False)) # Random set of nodes to test
        for nb in range(len(nodes_to_add), len(G)):
            if is_resolving_set(G, nodes[:nb], length):
                ms_b_box_sol_real.append(nb)
                break
    dict_tresh[tresh] = ms_b_box_sol_real

In [None]:
fig = go.Figure()

for group, values in dict_tresh.items():
    fig.add_trace(go.Box(y=values, name=group))

fig.show()

## Copenhagen calls graph

In [None]:
# read the file and create a graph
G_calls = nx.read_edgelist('../Real graphs simulations/Copenhagen graphs/calls.csv/edges.csv', delimiter=',', data=(('timestamp', int),('duration', int)))

# print the number of nodes and edges
print('Number of nodes:', G_calls.number_of_nodes())
print('Number of edges:', G_calls.number_of_edges())

In [None]:
nx.is_connected(G_calls)

In [None]:
# Remove the small components such that the graph becomes connected
connected_components = nx.connected_components(G_calls)
biggest = max(connected_components, key=len)
G_calls = G_calls.subgraph(biggest)
nx.is_connected(G_calls)

In [None]:
len(G_calls)

In [None]:
# Betweenness

betweenness_box_sol_calls = []

nb_of_iters = 100

# Generate the random graph and compute shortest paths
G = G_calls
length = dict(nx.all_pairs_shortest_path_length(G))

c = nx.betweenness_centrality(G, endpoints=True)

# normalize the centralities
c_values = np.array(list(c.values()))
c_norm = c_values / sum(c_values)
node_list = list(c.keys())

for _ in tqdm(range(nb_of_iters)):
    nodes = np.random.choice(node_list, p=c_norm, size=len(G), replace=False) # Random set of nodes to test
    for nb in range(0, len(G)):
        if is_resolving_set(G, nodes[:nb], length):
            betweenness_box_sol_calls.append(nb)
            break

In [None]:
# Degree

degree_box_sol_calls = []

nb_of_iters = 100

# Generate the random graph and compute shortest paths
G = G_calls
length = dict(nx.all_pairs_shortest_path_length(G))

c = nx.degree_centrality(G)

# normalize the centralities
c_values = np.array(list(c.values()))
c_norm = c_values / sum(c_values)
node_list = list(c.keys())

for _ in range(nb_of_iters):
    nodes = np.random.choice(node_list, p=c_norm, size=len(G), replace=False) # Random set of nodes to test
    for nb in range(0, len(G)):
        if is_resolving_set(G, nodes[:nb], length):
            degree_box_sol_calls.append(nb)
            break

In [None]:
# Random

nb_of_iters = 100
rand_box_real_calls = []

# Generate the random graph and compute shortest paths
G = G_calls
length = dict(nx.all_pairs_shortest_path_length(G))

for _ in tqdm(range(nb_of_iters)):
    nodes = list(random.sample(list(G.nodes), len(G)))
    for nb in range(0, len(G)):
        if is_resolving_set(G, nodes[:nb], length):
            rand_box_real_calls.append(nb)
            break

In [None]:
# Strategy 1 with degree

start1_degree_box_real_calls = []

tresh = 25
nb_of_iters = 100

G = G_calls
nb_of_nodes_in_resolving_set = 20

length = dict(nx.all_pairs_shortest_path_length(G))
c = nx.degree_centrality(G)

# normalize the betweenness centralities
c_values = np.array(list(c.values()))
c_norm = c_values / sum(c_values)
node_list = list(c.keys())
ms_b_box_sol_real = []
nodes_to_add = []

while True:
    intensities = {str(key): 0 for key in G.nodes}
    for i in range(100):
        # Random set of nodes to test
        nodes = nodes_to_add + list(np.random.choice(node_list, p=c_norm, size=nb_of_nodes_in_resolving_set, replace=False))
        resolved = set_resolved(G, nodes, length)
        for node in resolved:
            intensities[node] += +1

    small = [key for key, val in intensities.items() if val < tresh]
    s = len(small)
    nodes_to_add += get_nodes_with_diff_neighbors(G, small)

    if s == 0:
        print("We manually add {} nodes".format(len(nodes_to_add)))
        break
indices = [node_list.index(i) for i in nodes_to_add] 
node_list_without_indices_to_add = [element for index, element in enumerate(node_list) if index not in indices]
c_norm_without_indices_to_add = [element for index, element in enumerate(c_norm) if index not in indices]
c_norm_without_indices_to_add = c_norm_without_indices_to_add / sum(c_norm_without_indices_to_add)

for _ in tqdm(range(nb_of_iters)):
    nodes = nodes_to_add + list(np.random.choice(node_list_without_indices_to_add, p=c_norm_without_indices_to_add, size=len(node_list_without_indices_to_add), replace=False)) # Random set of nodes to test
    for nb in range(len(nodes_to_add), len(G)):
        if is_resolving_set(G, nodes[:nb], length):
            start1_degree_box_real_calls.append(nb)
            break

In [None]:
# Strategy 1 with betweenness

start1_betweenness_box_real_calls = []

tresh = 15
nb_of_iters = 100

G = G_calls
nb_of_nodes_in_resolving_set = 20

length = dict(nx.all_pairs_shortest_path_length(G))
c = nx.betweenness_centrality(G, endpoints=True)

# normalize the betweenness centralities
c_values = np.array(list(c.values()))
c_norm = c_values / sum(c_values)
node_list = list(c.keys())
ms_b_box_sol_real = []
nodes_to_add = []

while True:
    intensities = {str(key): 0 for key in G.nodes}
    for i in range(100):
        # Random set of nodes to test
        nodes = nodes_to_add + list(np.random.choice(node_list, p=c_norm, size=nb_of_nodes_in_resolving_set, replace=False))
        resolved = set_resolved(G, nodes, length)
        for node in resolved:
            intensities[node] += +1

    small = [key for key, val in intensities.items() if val < tresh]
    s = len(small)
    nodes_to_add += get_nodes_with_diff_neighbors(G, small)
    if s == 0:
        print("We manually add {} nodes".format(len(nodes_to_add)))
        break
indices = [node_list.index(i) for i in nodes_to_add] 
node_list_without_indices_to_add = [element for index, element in enumerate(node_list) if index not in indices]
c_norm_without_indices_to_add = [element for index, element in enumerate(c_norm) if index not in indices]
c_norm_without_indices_to_add = c_norm_without_indices_to_add / sum(c_norm_without_indices_to_add)

for _ in tqdm(range(nb_of_iters)):

    #print(len(node_list_without_indices_to_add))
    nodes = nodes_to_add + list(np.random.choice(node_list_without_indices_to_add, p=c_norm_without_indices_to_add, size=len(node_list_without_indices_to_add), replace=False)) # Random set of nodes to test
    for nb in range(len(nodes_to_add), len(G)):
        if is_resolving_set(G, nodes[:nb], length):
            start1_betweenness_box_real_calls.append(nb)
            break

In [None]:
# Strategy 2
start2_box_real_calls = []

nb_of_iters = 100

G = G_calls
nb_of_nodes_in_resolving_set = 20

length = dict(nx.all_pairs_shortest_path_length(G))

for _ in range(nb_of_iters):
    nodes_to_add = []

    while True:
        intensities = {str(key): 0 for key in G.nodes}
        for i in range(nb_of_iters):
            # Random set of nodes to test
            nodes = nodes_to_add + list(np.random.choice(G.nodes, size=nb_of_nodes_in_resolving_set, replace=False))
            resolved = set_resolved(G, nodes, length)
            for node in resolved:
                intensities[node] += 1

        hardest_node_to_resolve = min(intensities, key=intensities.get)
        nodes_to_add.append(hardest_node_to_resolve)
        if is_resolving_set(G, nodes_to_add, length):
            start2_box_real_calls.append(len(nodes_to_add))
            break

In [None]:
dict_result_real = {'Strategy 1 degree':start1_degree_box_real_calls, 
                    'Strategy 1 betweenness':start1_betweenness_box_real_calls, 
                    'Strategy 2':start2_box_real_calls, 
                    'Random strategy':rand_box_real_calls,
                    'Betweenness strategy':betweenness_box_sol_calls,
                    'Degree strategy':degree_box_sol_calls
                   }

In [None]:
fig = go.Figure()

for group, values in dict_result_real.items():
    fig.add_trace(go.Box(y=values, name=group, boxpoints='all'))

    
fig.update_layout(
    yaxis=dict(
        title="Size of the resolving set",
        titlefont=dict(size=12, color='black')
    )
)
fig.show()