In [1]:
import networkx as nx
import random
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.append('../')
from helpers import *
from tqdm import tqdm
import plotly.graph_objs as go

In [2]:
colors=['#636EFA',
 '#EF553B',
 '#00CC96',
 '#AB63FA',
 '#FFA15A',
 '#19D3F3',
 '#FF6692',
 '#B6E880',
 '#FF97FF',
 '#FECB52']

In [3]:
def f1c(n, p, k):
    """Theoretical value when p is constant

    Args:
        n (int): number of nodes
        p (float): probability of two nodes being connected

    Returns:
        float: theoretical value
    """
    return 2*np.log(n) / np.log(1/(p**2 + (1-p)**2))


def f12c(n, p, q, k):
    """Theoretical value when p is constant

    Args:
        n (int): number of nodes
        p (float): probability of two nodes being connected

    Returns:
        float: theoretical value
    """
    gamma = (np.sqrt(p**2 + (1-p)**2)**(1/k)) * (np.sqrt(q**2 + (1-q)**2)**((k-1)/k))
    return np.log(n*(1-gamma)) / np.log(1/gamma)

# Stochastic block model graphs

### 2 clusters, p and n constant

In [5]:
n = 100
p = 0.5
q_range = np.array([0, 0.1, 0.2, 0.3, 0.4, 0.5])
#G = nx.stochastic_block_model([int(n/2), int(n/2)], [[p, q], [q, p]])
#length = dict(nx.all_pairs_shortest_path_length(G))

In [None]:
sol_all_q = {}
for q in tqdm(q_range[1:]):
    G = nx.stochastic_block_model([int(n/2), int(n/2)], [[p, q], [q, p]])
    length = dict(nx.all_pairs_shortest_path_length(G))
    solutions = {}
    nb_of_iters = 300

    for nb in range(0, n): # G.number_of_nodes()
        num_nodes = nb # Number of nodes to sample
        node_list = list(G.nodes())
        count = 0
        for i in range(nb_of_iters):
            nodes = set(random.sample(node_list, num_nodes)) # Random set of nodes to test
            if is_resolving_set(G, nodes, length):
                count += 1
        solutions[nb] = count / nb_of_iters
    sol_all_q[q] = solutions

 60%|███████████████████████████                  | 3/5 [00:59<00:39, 19.93s/it]

In [38]:
G = nx.erdos_renyi_graph(int(n/2), p)
length = dict(nx.all_pairs_shortest_path_length(G))
solutions = {}
nb_of_iters = 300

for nb in tqdm(range(0, int(n/2))): #G.number_of_nodes()

    num_nodes = nb # Number of nodes to sample
    node_list = list(G.nodes())
    count = 0
    for i in range(nb_of_iters):
        nodes = set(random.sample(node_list, num_nodes)) # Random set of nodes to test
        if is_resolving_set(G, nodes, length):
            count += 1
    solutions[2*nb] = (count/nb_of_iters)**2

sol_all_q[0] = solutions

100%|███████████████████████████████████████████| 50/50 [00:04<00:00, 10.63it/s]


In [40]:
# Define the trace for the scatter plot
d = {}
for i in q_range:
    d["scatter{}".format(i)] = go.Scatter(x=list(sol_all_q[i].keys()), y=list(sol_all_q[i].values()), mode='markers+lines', name='Simulations with n={} and p={:.2f}'.format(n, float(i)))

data = [sc for sc in d.values()]

# Define the layout
layout = go.Layout(title='Probability of resolving the graph as a function of the subset cardinality', 
                   xaxis=dict(title='Cardinality of the subset', rangeslider=dict(visible=True)), 
                   yaxis=dict(title='Probability of resolving the graph'),
                   legend=dict(x=0.67, y=0.08, orientation='v'))

# Combine the traces and layout into a figure
fig = go.Figure(data=data, layout=layout)

# Show the figure
fig.show()

In [86]:
# Define the trace for the scatter plot
d = {}
vl = {}
for count, i in enumerate(q_range):
    d["scatter{}".format(i)] = go.Scatter(x=list(sol_all_q[i].keys()), y=list(sol_all_q[i].values()), mode='markers+lines',  line=dict(color=colors[count]), name='Simulations with n={} and p={:.2f}'.format(n, float(i)))

data = [sc for sc in d.values()]


for count, i in enumerate(q_range):
    vl["vertical_line{}".format(i)] = go.Scatter(x=[f1c(n, p, i, 2), f1c(n, p, i, 2)], y=[0, 1], mode='lines', line=dict(color=colors[count]), name=i)

data = data + [vl for vl in vl.values()]

# Define the layout
layout = go.Layout(title='Probability of resolving the graph as a function of the subset cardinality', 
                   xaxis=dict(title='Cardinality of the subset', rangeslider=dict(visible=True)), 
                   yaxis=dict(title='Probability of resolving the graph'),
                   legend=dict(x=0.67, y=0.08, orientation='v'))

# Combine the traces and layout into a figure
fig = go.Figure(data=data, layout=layout)

# Show the figure
fig.show()

KeyError: 0.0

### 3 clusters, p and n constant

In [28]:
n = 100
p = 0.5
q_range = np.array([0, 0.1, 0.2, 0.3, 0.4, 0.5])
G = nx.stochastic_block_model([int(n/3), int(n/3), int(n/3)], [[p, q, q], [q, p, q], [q, q, p]])
length = dict(nx.all_pairs_shortest_path_length(G))

In [29]:
sol_all_q = {}
for q in tqdm(q_range[1:]):
    G = nx.stochastic_block_model([int(n/3), int(n/3), int(n/3)], [[p, q, q], [q, p, q], [q, q, p]])
    length = dict(nx.all_pairs_shortest_path_length(G))
    solutions = {}
    nb_of_iters = 300

    for nb in range(0, n): #G.number_of_nodes()
        num_nodes = nb # Number of nodes to sample
        node_list = list(G.nodes())
        count = 0
        for i in range(nb_of_iters):
            nodes = set(random.sample(node_list, num_nodes)) # Random set of nodes to test
            if is_resolving_set(G, nodes, length):
                count += 1
        solutions[nb] = count / nb_of_iters
    sol_all_q[q] = solutions

100%|█████████████████████████████████████████████| 5/5 [01:51<00:00, 22.33s/it]


In [30]:
G = nx.erdos_renyi_graph(int(n/3), p)
length = dict(nx.all_pairs_shortest_path_length(G))
solutions = {}
nb_of_iters = 300

for nb in tqdm(range(0, int(n/3))): #G.number_of_nodes()

    num_nodes = nb # Number of nodes to sample
    node_list = list(G.nodes())
    count = 0
    for i in range(nb_of_iters):
        nodes = set(random.sample(node_list, num_nodes)) # Random set of nodes to test
        if is_resolving_set(G, nodes, length):
            count += 1
    solutions[2*nb] = (count/nb_of_iters)**3

sol_all_q[0] = solutions

100%|███████████████████████████████████████████| 33/33 [00:01<00:00, 28.56it/s]


In [31]:
# Define the trace for the scatter plot
d = {}
for i in q_range:
    d["scatter{}".format(i)] = go.Scatter(x=list(sol_all_q[i].keys()), y=list(sol_all_q[i].values()), mode='markers+lines', name='Simulations with n={} and p={:.2f}'.format(n, float(i)))

data = [sc for sc in d.values()]

# Define the layout
layout = go.Layout(title='Probability of resolving the graph as a function of the subset cardinality', 
                   xaxis=dict(title='Cardinality of the subset', rangeslider=dict(visible=True)), 
                   yaxis=dict(title='Probability of resolving the graph'),
                   legend=dict(x=0.67, y=0.08, orientation='v'))

# Combine the traces and layout into a figure
fig = go.Figure(data=data, layout=layout)

# Show the figure
fig.show()

In [None]:
# Define the trace for the scatter plot
d = {}
vl = {}
for count, i in enumerate(q_range):
    d["scatter{}".format(i)] = go.Scatter(x=list(sol_all_q[i].keys()), y=list(sol_all_q[i].values()), mode='markers+lines',  line=dict(color=colors[count]), name='Simulations with n={} and p={:.2f}'.format(n, float(i)))

data = [sc for sc in d.values()]


for count, i in enumerate(q_range):
    vl["vertical_line{}".format(i)] = go.Scatter(x=[f12c(n, p, i, 3), f12c(n, p, i, 3)], y=[0, 1], mode='lines', line=dict(color=colors[count]), name=i)

data = data + [vl for vl in vl.values()]

# Define the layout
layout = go.Layout(title='Probability of resolving the graph as a function of the subset cardinality', 
                   xaxis=dict(title='Cardinality of the subset', rangeslider=dict(visible=True)), 
                   yaxis=dict(title='Probability of resolving the graph'),
                   legend=dict(x=0.67, y=0.08, orientation='v'))

# Combine the traces and layout into a figure
fig = go.Figure(data=data, layout=layout)

# Show the figure
fig.show()

### Simulations of MD with p constant, multiple q in function of n

In [81]:
n_range = range(100, 1001, 100)
sol_all_q = {}
for q in q_range[1:]:
    solutions = {}
    for n in tqdm(n_range):
        G = nx.stochastic_block_model([int(n/2), int(n/2)], [[p, q], [q, p]])    
        length = dict(nx.all_pairs_shortest_path_length(G))
        node_list = list(G.nodes())
        nb_of_iters = 50

        high = n
        low = 0

        while low + 1 < high:
            middle = (high+low) // 2
            num_nodes = middle # Number of nodes to sample
            node_list = list(G.nodes())
            count = 0
            for i in range(nb_of_iters):
                nodes = set(random.sample(node_list, num_nodes)) # Random set of nodes to test
                if is_resolving_set(G, nodes, length):
                    count += 1
            v = count / nb_of_iters
            if (count / nb_of_iters) > 0.5:
                high = middle
                v_high = v 
            else:
                low = middle
                v_low = v
        if abs(v_low - 0.5) < abs(v_high - 0.5):
            solutions[n] = low
        else:
            solutions[n] = high
            
            
    sol_all_q[q] = solutions

100%|███████████████████████████████████████████| 10/10 [00:46<00:00,  4.67s/it]
100%|███████████████████████████████████████████| 10/10 [00:41<00:00,  4.14s/it]
100%|███████████████████████████████████████████| 10/10 [00:44<00:00,  4.44s/it]
100%|███████████████████████████████████████████| 10/10 [00:49<00:00,  4.93s/it]
100%|███████████████████████████████████████████| 10/10 [00:48<00:00,  4.89s/it]


In [83]:
# Define the trace for the scatter plot
d = {}
for i in q_range[1:]:
    d["scatter{}".format(i)] = go.Scatter(x=list(sol_all_q[i].keys()), y=list(sol_all_q[i].values()), mode='markers+lines', name='Simulations with n={} and p={:.2f}'.format(n, float(i)))

data = [sc for sc in d.values()]

# Define the layout
layout = go.Layout(title='Probability of resolving the graph as a function of the subset cardinality', 
                   xaxis=dict(title='Cardinality of the subset', rangeslider=dict(visible=True)), 
                   yaxis=dict(title='Probability of resolving the graph'),
                   legend=dict(x=0.67, y=0.08, orientation='v'))

# Combine the traces and layout into a figure
fig = go.Figure(data=data, layout=layout)

# Show the figure
fig.show()

In [131]:
r = range(0, 1000, 1)

# Define the trace for the scatter plot
d = {}
for count, i in enumerate(q_range[1:]):
    d["scatter{}".format(i)] = go.Scatter(x=list(sol_all_q[i].keys()), y=list(sol_all_q[i].values()), mode='markers+lines',line=dict(color=colors[count]), name='Simulations with n={} and p={:.2f}'.format(n, float(i)))

for count, i in enumerate(q_range[1:]):
    d["theo{}".format(i)] = go.Scatter(x=list(r), y=f12c(r,p,i,2), mode='lines',line=dict(color=colors[count]), name='Simulations with n={} and p={:.2f}'.format(n, float(i)))
    
    
data = [sc for sc in d.values()]

# Define the layout
layout = go.Layout(title='Probability of resolving the graph as a function of the subset cardinality', 
                   xaxis=dict(title='Cardinality of the subset', rangeslider=dict(visible=True)), 
                   yaxis=dict(title='Probability of resolving the graph'),
                   legend=dict(x=0.67, y=0.08, orientation='v'))

# Combine the traces and layout into a figure
fig = go.Figure(data=data, layout=layout)

# Show the figure
fig.show()

AttributeError: 'int' object has no attribute 'keys'

In [123]:
q_range = np.linspace(0,0.5,11)
n = 1000
sol_all_q = {}
q_range = np.linspace(0,1,11)
for q in tqdm(q_range[1:]):
    #temp

    G = nx.stochastic_block_model([int(n/2), int(n/2)], [[p, q], [q, p]])    
    length = dict(nx.all_pairs_shortest_path_length(G))
    node_list = list(G.nodes())
    nb_of_iters = 50

    high = n
    low = 0

    while low + 1 < high:
        middle = (high+low) // 2
        num_nodes = middle # Number of nodes to sample
        node_list = list(G.nodes())
        count = 0
        for i in range(nb_of_iters):
            nodes = set(random.sample(node_list, num_nodes)) # Random set of nodes to test
            if is_resolving_set(G, nodes, length):
                count += 1
        v = count / nb_of_iters
        if (count / nb_of_iters) > 0.5:
            high = middle
            v_high = v 
        else:
            low = middle
            v_low = v
    if abs(v_low - 0.5) < abs(v_high - 0.5):
        sol_all_q[q] = low
    else:
        sol_all_q[q] = high
            
            
    #sol_all_q[q] = solutions

100%|███████████████████████████████████████████| 10/10 [02:18<00:00, 13.83s/it]


In [128]:
def ff(n,p,q,k):
    gamma = np.sqrt(p**2 + (1-p)**2)
    gamma2 = (np.sqrt(p**2 + (1-p)**2)**(1/k)) * (np.sqrt(q**2 + (1-q)**2)**((k-1)/k))

    return (-1/np.log(gamma2)) + ((-np.log(1-gamma2))/(np.log(n)*np.log(gamma2)))

In [129]:
r = np.linspace(0, 1, 30)

y = [y/np.log(n) for y in sol_all_q.values()]

# Define the trace for the scatter plot
d = {}

sss = go.Scatter(x=list(sol_all_q.keys()), y=y, mode='markers+lines', name='Simulations with n={}'.format(n))
sss1 = go.Scatter(x=list(r), y=ff(n,0.5,r,2), mode='markers+lines', name='Simulations with n={}'.format(n))


# Define the layout
layout = go.Layout(title='Probability of resolving the graph as a function of the subset cardinality', 
                   xaxis=dict(title='Cardinality of the subset', rangeslider=dict(visible=True)), 
                   yaxis=dict(title='Probability of resolving the graph'),
                   legend=dict(x=0.67, y=0.08, orientation='v'))

# Combine the traces and layout into a figure
fig = go.Figure(data=[sss, sss1], layout=layout)

# Show the figure
fig.show()