In [None]:
import networkx as nx
import random
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.append('../')
from helpers import *
from tqdm import tqdm
import plotly.graph_objs as go

# Stochastic block model graphs

### 2 clusters, p and n constant

In [None]:
n = 200
p = 0.5
q_range = np.array([0, 0.1, 0.2, 0.3, 0.4, 0.5])

In [None]:
sol_all_q = {}
stop = 50 # used to speed up computations
for q in tqdm(q_range[1:]):
    G = nx.stochastic_block_model([int(n/2), int(n/2)], [[p, q], [q, p]])
    length = dict(nx.all_pairs_shortest_path_length(G))
    solutions = {}
    nb_of_iters = 300

    for nb in range(0, stop): # G.number_of_nodes()
        num_nodes = nb # Number of nodes to sample
        node_list = list(G.nodes())
        count = 0
        for i in range(nb_of_iters):
            nodes = set(random.sample(node_list, num_nodes)) # Random set of nodes to test
            if is_resolving_set(G, nodes, length):
                count += 1
        solutions[nb] = count / nb_of_iters
    for nb in range(stop, n):
        solutions[nb] = 1
    sol_all_q[q] = solutions
    
# We separate the case q = 0 as it means the graph is not connected
# instead we simulate two erdos-renyi graphs of size n/2
G = nx.erdos_renyi_graph(int(n/2), p)
length = dict(nx.all_pairs_shortest_path_length(G))
solutions = {}
nb_of_iters = 300

for nb in tqdm(range(0, int(n/2))): #G.number_of_nodes()

    num_nodes = nb # Number of nodes to sample
    node_list = list(G.nodes())
    count = 0
    for i in range(nb_of_iters):
        nodes = set(random.sample(node_list, num_nodes)) # Random set of nodes to test
        if is_resolving_set(G, nodes, length):
            count += 1
    solutions[2*nb] = (count/nb_of_iters)**2

sol_all_q[0] = solutions

In [None]:
# Define the trace for the scatter plot
d = {}
for i in q_range:
    d["scatter{}".format(i)] = go.Scatter(x=list(sol_all_q[i].keys()), y=list(sol_all_q[i].values()), mode='markers+lines', name='Simulations with n={}, p={:.2f} and q={:.2f}'.format(n, p, float(i)))

data = [sc for sc in d.values()]

# Define the layout
layout = go.Layout(title='SBM graph with two communities', title_x=0.5, 
                   xaxis=dict(title='Cardinality of the subset'#, #rangeslider=dict(visible=True)
                             ), yaxis=dict(title='Probability of resolving the graph'),
                              legend=dict(x=0.6, y=0.08, orientation='v'))

# Combine the traces and layout into a figure
fig = go.Figure(data=data, layout=layout)

# Show the figure
fig.show()

In [None]:
# Define the trace for the scatter plot
d = {}
vl = {}
for count, i in enumerate(q_range):
    d["scatter{}".format(i)] = go.Scatter(x=list(sol_all_q[i].keys()), y=list(sol_all_q[i].values()), mode='markers+lines',  line=dict(color=colors[count]), name='Simulations with n={}, p={:.2f} and q={:.2f}'.format(n, p, float(i)))

data = [sc for sc in d.values()]


for count, i in enumerate(q_range):
    vl["vertical_line{}".format(i)] = go.Scatter(x=[cvalue_communities(n, p, i, 2), cvalue_communities(n, p, i, 2)], y=[0, 1], mode='lines', line=dict(color=colors[count]), name="Conjecture value with q={:.2f}".format(i))

data = data + [vl for vl in vl.values()]

# Define the layout
layout = go.Layout(title='SBM graph with two communities', title_x=0.5, 
                   xaxis=dict(title='Cardinality of the subset'#, rangeslider=dict(visible=True)
                   ), yaxis=dict(title='Probability of resolving the graph'),
                   legend=dict(x=0.62, y=0.00, orientation='v'))

# Combine the traces and layout into a figure
fig = go.Figure(data=data, layout=layout)

# Show the figure
fig.show()

### 3 clusters, p and n constant

In [None]:
n = 150
p = 0.5
nb_of_iters = 500
q_range = np.array([0, 0.1, 0.2, 0.3, 0.4, 0.5])
G = nx.stochastic_block_model([int(n/3), int(n/3), int(n/3)], [[p, q, q], [q, p, q], [q, q, p]])
length = dict(nx.all_pairs_shortest_path_length(G))

In [None]:
sol_all_q = {}
for q in tqdm(q_range[1:]):
    G = nx.stochastic_block_model([int(n/3), int(n/3), int(n/3)], [[p, q, q], [q, p, q], [q, q, p]])
    length = dict(nx.all_pairs_shortest_path_length(G))
    solutions = {}

    for nb in range(0, G.number_of_nodes()): # 
        num_nodes = nb # Number of nodes to sample
        node_list = list(G.nodes())
        count = 0
        for i in range(nb_of_iters):
            nodes = set(random.sample(node_list, num_nodes)) # Random set of nodes to test
            if is_resolving_set(G, nodes, length):
                count += 1
        solutions[nb] = count / nb_of_iters
    sol_all_q[q] = solutions
    
# We separate the case q=0 as it means the graph is not connected
# instead we simulate three erdos-renyi graphs of size n/3

G = nx.erdos_renyi_graph(int(n/3), p)
length = dict(nx.all_pairs_shortest_path_length(G))
solutions = {}

for nb in tqdm(range(0, int(n/3))): #G.number_of_nodes()

    num_nodes = nb # Number of nodes to sample
    node_list = list(G.nodes())
    count = 0
    for i in range(nb_of_iters):
        nodes = set(random.sample(node_list, num_nodes)) # Random set of nodes to test
        if is_resolving_set(G, nodes, length):
            count += 1
    solutions[3*nb] = (count/nb_of_iters)**3

sol_all_q[0] = solutions

In [None]:
# Define the trace for the scatter plot
d = {}
for i in q_range:
    d["scatter{}".format(i)] = go.Scatter(x=list(sol_all_q[i].keys()), y=list(sol_all_q[i].values()), mode='markers+lines', name='Simulations with n={} and p={:.2f}'.format(n, float(i)))

data = [sc for sc in d.values()]

# Define the layout
layout = go.Layout(title='Probability of resolving the graph as a function of the subset cardinality', 
                   xaxis=dict(title='Cardinality of the subset', rangeslider=dict(visible=True)), 
                   yaxis=dict(title='Probability of resolving the graph'),
                   legend=dict(x=0.67, y=0.08, orientation='v'))

# Combine the traces and layout into a figure
fig = go.Figure(data=data, layout=layout)

# Show the figure
fig.show()

In [None]:
# Define the trace for the scatter plot

d = {}
vl = {}
for count, i in enumerate(q_range):
    d["scatter{}".format(i)] = go.Scatter(x=list(sol_all_q[i].keys()), y=list(sol_all_q[i].values()), mode='markers+lines',  line=dict(color=colors[count]), name='Simulations with n={} and p={:.2f}'.format(n, float(i)))

data = [sc for sc in d.values()]


for count, i in enumerate(q_range):
    vl["vertical_line{}".format(i)] = go.Scatter(x=[cvalue_communities(n, p, i, 3), cvalue_communities(n, p, i, 3)], y=[0, 1], mode='lines', line=dict(color=colors[count]), name=i)

data = data + [vl for vl in vl.values()]

# Define the layout
layout = go.Layout(title='Probability of resolving the graph as a function of the subset cardinality', 
                   xaxis=dict(title='Cardinality of the subset', rangeslider=dict(visible=True)), 
                   yaxis=dict(title='Probability of resolving the graph'),
                   legend=dict(x=0.67, y=0.08, orientation='v'))

# Combine the traces and layout into a figure
fig = go.Figure(data=data, layout=layout)

# Show the figure
fig.show()

### m clusters, p and n constant

In [None]:
nb_cluster = 3
n = 100
p = 0.75
nb_of_iters = 100

q_range = np.array([0, 0.2, 0.4, 0.5, 0.6, 0.8, 1])
sizes = np.full(nb_cluster, int(n/nb_cluster))

In [None]:
sol_all_q = {}
stop = 75
for q in tqdm(q_range[1:]):
    edges_prob = diag_mat(nb_cluster, p, q)
    G = nx.stochastic_block_model(sizes, edges_prob)
    length = dict(nx.all_pairs_shortest_path_length(G))
    solutions = {}

    for nb in range(0, stop): # G.number_of_nodes()
        num_nodes = nb # Number of nodes to sample
        node_list = list(G.nodes())
        count = 0
        for i in range(nb_of_iters):
            nodes = set(random.sample(node_list, num_nodes)) # Random set of nodes to test
            if is_resolving_set(G, nodes, length):
                count += 1
        solutions[nb] = count / nb_of_iters
    for nb in range(stop, n):
        solutions[nb] = 1
    sol_all_q[q] = solutions
    
# We separate the case q=0 as it means the graph is not connected
# instead we simulate three erdos-renyi graphs of size n/nb_cluster

G = nx.erdos_renyi_graph(int(n/nb_cluster), p)
length = dict(nx.all_pairs_shortest_path_length(G))
solutions = {}

for nb in tqdm(range(0, int(n/nb_cluster))): #G.number_of_nodes()

    num_nodes = nb # Number of nodes to sample
    node_list = list(G.nodes())
    count = 0
    for i in range(nb_of_iters):
        nodes = set(random.sample(node_list, num_nodes)) # Random set of nodes to test
        if is_resolving_set(G, nodes, length):
            count += 1
    solutions[nb_cluster*nb] = (count/nb_of_iters)**nb_cluster

sol_all_q[0] = solutions

In [None]:
# Define the trace for the scatter plot
d = {}
for i in q_range:
    d["scatter{}".format(i)] = go.Scatter(x=list(sol_all_q[i].keys()), y=list(sol_all_q[i].values()), mode='markers+lines', name='q={:.2f}'.format(float(i)))

data = [sc for sc in d.values()]

# Define the layout
layout = go.Layout(title='SBM graph with {} communities of size {:.0f} with p={}'.format(nb_cluster, n/nb_cluster, p), title_x=0.5, 
                   xaxis=dict(title='Cardinality of the subset'#, #rangeslider=dict(visible=True)
                             ), yaxis=dict(title='Probability of resolving the graph'),
                              legend=dict(x=0.87, y=0.05, orientation='v'))

# Combine the traces and layout into a figure
fig = go.Figure(data=data, layout=layout)

# Show the figure
fig.show()

In [None]:
# Define the trace for the scatter plot
d = {}
vl = {}
for count, i in enumerate(q_range):
    d["scatter{}".format(i)] = go.Scatter(x=list(sol_all_q[i].keys()), y=list(sol_all_q[i].values()), mode='markers+lines',  line=dict(color=colors[count]), name='q={:.2f}'.format(float(i)))

data = [sc for sc in d.values()]


for count, i in enumerate(q_range):
    vl["vertical_line{}".format(i)] = go.Scatter(x=[cvalue_communities(n, p, i, 2), cvalue_communities(n, p, i, 2)], y=[0, 1], mode='lines', line=dict(color=colors[count]), name="Conjecture value with q={:.2f}".format(i), showlegend=False)

data = data + [vl for vl in vl.values()]

# Define the layout
layout = go.Layout(title='SBM graph with {} communities of size {:.0f} with p={}'.format(nb_cluster, n/nb_cluster, p), title_x=0.5, 
                   xaxis=dict(title='Cardinality of the subset'#, rangeslider=dict(visible=True)
                   ), yaxis=dict(title='Probability of resolving the graph'),
                   legend=dict(x=0.87, y=0.05, orientation='v'))

# Combine the traces and layout into a figure
fig = go.Figure(data=data, layout=layout)

# Show the figure
fig.show()

### Simulations of MD with p constant, multiple q in function of n

In [None]:
n_range = range(100, 301, 100)
p = 0.5
q_range = np.array([0, 0.1, 0.2, 0.3, 0.4, 0.5])
sol_all_q = {}
for q in q_range[1:]:
    solutions = {}
    for n in tqdm(n_range):
        G = nx.stochastic_block_model([int(n/2), int(n/2)], [[p, q], [q, p]])    
        length = dict(nx.all_pairs_shortest_path_length(G))
        node_list = list(G.nodes())
        nb_of_iters = 50

        high = n
        low = 0

        while low + 1 < high:
            middle = (high+low) // 2
            num_nodes = middle # Number of nodes to sample
            node_list = list(G.nodes())
            count = 0
            for i in range(nb_of_iters):
                nodes = set(random.sample(node_list, num_nodes)) # Random set of nodes to test
                if is_resolving_set(G, nodes, length):
                    count += 1
            v = count / nb_of_iters
            if (count / nb_of_iters) > 0.5:
                high = middle
                v_high = v 
            else:
                low = middle
                v_low = v
        if abs(v_low - 0.5) < abs(v_high - 0.5):
            solutions[n] = low
        else:
            solutions[n] = high
            
            
    sol_all_q[q] = solutions

In [None]:
# Define the trace for the scatter plot
d = {}
for i in q_range[1:]:
    d["scatter{}".format(i)] = go.Scatter(x=list(sol_all_q[i].keys()), y=list(sol_all_q[i].values()), mode='markers+lines', name='Simulations with n={} and p={:.2f}'.format(n, float(i)))

data = [sc for sc in d.values()]

# Define the layout
layout = go.Layout(title='Probability of resolving the graph as a function of the subset cardinality', 
                   xaxis=dict(title='Cardinality of the subset', rangeslider=dict(visible=True)), 
                   yaxis=dict(title='Probability of resolving the graph'),
                   legend=dict(x=0.67, y=0.08, orientation='v'))

# Combine the traces and layout into a figure
fig = go.Figure(data=data, layout=layout)

# Show the figure
fig.show()

In [None]:
r = range(100, 300, 1)

# Define the trace for the scatter plot
d = {}
for count, i in enumerate(q_range[1:]):
    d["scatter{}".format(i)] = go.Scatter(x=list(sol_all_q[i].keys()), y=list(sol_all_q[i].values()), mode='markers+lines',line=dict(color=colors[count]), name='Simulations with n={} and p={:.2f}'.format(n, float(i)))

for count, i in enumerate(q_range[1:]):
    d["theo{}".format(i)] = go.Scatter(x=list(r), y=cvalue_communities(r,p,i,2), mode='lines',line=dict(color=colors[count]), name='Conjecture with n={} and p={:.2f}'.format(n, float(i)))
    
    
data = [sc for sc in d.values()]

# Define the layout
layout = go.Layout(title='Probability of resolving the graph as a function of the subset cardinality', 
                   xaxis=dict(title='Cardinality of the subset', rangeslider=dict(visible=True)), 
                   yaxis=dict(title='Probability of resolving the graph'),
                   legend=dict(x=0.67, y=0.08, orientation='v'))

# Combine the traces and layout into a figure
fig = go.Figure(data=data, layout=layout)

# Show the figure
fig.show()

### Simulations of MD with p constant, n constant as a function of q

###### When $p$ = 0.5:

In [None]:
n = 200
sol_all_q = {}
p = 0.5
q_range = np.linspace(0, 1, 11)
for q in tqdm(q_range[1:]):

    G = nx.stochastic_block_model([int(n/2), int(n/2)], [[p, q], [q, p]])    
    length = dict(nx.all_pairs_shortest_path_length(G))
    node_list = list(G.nodes())
    nb_of_iters = 300

    high = n
    low = 0

    # Binary search of the "transition value"
    while low + 1 < high:
        middle = (high+low) // 2
        num_nodes = middle # Number of nodes to sample
        node_list = list(G.nodes())
        count = 0
        for i in range(nb_of_iters):
            nodes = set(random.sample(node_list, num_nodes)) # Random set of nodes to test
            if is_resolving_set(G, nodes, length):
                count += 1
        v = count / nb_of_iters
        if (count / nb_of_iters) > 0.5:
            high = middle
            v_high = v 
        else:
            low = middle
            v_low = v
    if abs(v_low - 0.5) < abs(v_high - 0.5):
        sol_all_q[q] = low
    else:
        sol_all_q[q] = high

In [None]:
r = np.linspace(0, 1, 30)

y = [y/np.log(n) for y in sol_all_q.values()]

# Define the trace for the scatter plot
d = {}

sss = go.Scatter(x=list(sol_all_q.keys()), y=y, mode='markers+lines', name='Simulations with n={} and p={}'.format(n, p))
sss1 = go.Scatter(x=list(r), y=cvalue_communities(n,p,r,2)/np.log(n), mode='lines', name='Conjecture value')


# Define the layout
layout = go.Layout(title=r"$\text{Behaviour of } \frac{\beta}{\log n} \text{ as a function of } q$",
                   title_x=0.5,
                   xaxis=dict(title='Inter-community edge probability $q$'#, rangeslider=dict(visible=True)
                ), yaxis=dict(title=r'$\frac{\beta}{\log n}$'),
                   legend=dict(x=0.67, y=0.08, orientation='v'))

# Combine the traces and layout into a figure
fig = go.Figure(data=[sss, sss1], layout=layout)

# Show the figure
fig.show()

###### When $p$ = 0.75:

In [None]:
n = 200
p = 0.75
q_range = np.linspace(0, 1, 20)
sol_all_q = {}
for q in tqdm(q_range[1:]):

    G = nx.stochastic_block_model([int(n/2), int(n/2)], [[p, q], [q, p]])    
    length = dict(nx.all_pairs_shortest_path_length(G))
    node_list = list(G.nodes())
    nb_of_iters = 500

    high = n
    low = 0

    while low + 1 < high:
        middle = (high+low) // 2
        num_nodes = middle # Number of nodes to sample
        node_list = list(G.nodes())
        count = 0
        for i in range(nb_of_iters):
            nodes = set(random.sample(node_list, num_nodes)) # Random set of nodes to test
            if is_resolving_set(G, nodes, length):
                count += 1
        v = count / nb_of_iters
        if (count / nb_of_iters) > 0.5:
            high = middle
            v_high = v 
        else:
            low = middle
            v_low = v
    if abs(v_low - 0.5) < abs(v_high - 0.5):
        sol_all_q[q] = low
    else:
        sol_all_q[q] = high

In [None]:
r = np.linspace(0, 1, 100)

y = [y/np.log(n) for y in sol_all_q.values()]

# Define the trace for the scatter plot
d = {}

experimental = go.Scatter(x=list(sol_all_q.keys()), y=y, mode='markers+lines', name='Simulations with n={} and p={}'.format(n, p))
theoretical = go.Scatter(x=list(r), y=cvalue_communities(n,p,r,2)/np.log(n), mode='lines', name='Conjecture value')


# Define the layout
layout = go.Layout(title=r"$\text{Behaviour of } \frac{\beta}{\log n} \text{ as a function of } q$",
                   title_x=0.5,
                   xaxis=dict(title=r'$q$'), yaxis=dict(title=r'$\frac{\beta}{\log n}$'),
                   legend=dict(x=0.67, y=0.08, orientation='v'))

# Combine the traces and layout into a figure
fig = go.Figure(data=[experimental, theoretical], layout=layout)

# Show the figure
fig.show()