In [1]:
import networkx as nx
import networkx.algorithms.community as nx_comm
from networkx.generators.community import LFR_benchmark_graph
from networkx.algorithms import bipartite
import numpy as np
import scipy as sp
from scipy.sparse import coo_array
from scipy import sparse
from cdlib import algorithms
from cdlib import evaluation
import sklearn
from utils import *
from distances import *
from consensus import *
import math
import itertools
import random
import time
from pathlib import Path

Note: to be able to use all crisp methods, you need to install some additional packages:  {'ASLPAw'}


In [2]:
cons_name = "lf"

In [3]:
def prep_consensus_graph(P_list):
    G = nx.Graph(P_list[0]["graph"])
    n = len(list(G.nodes()))
    k = len(P_list)
    #print("Number of nodes", n)
    
    row = []
    col = []
    val = []
    for x in P_list:
        graph = x["graph"]
        partition = x["partition"]
        for cluster in partition:
            for i in range(len(cluster)):
                for j in range(i+1, len(cluster)):
                    item_1 = cluster[i]
                    item_2 = cluster[j]
                    row.append(int(item_1))
                    col.append(int(item_2))
                    val.append(int(1))
                    
    r = coo_array((val, (row, col)), shape=(n, n))
    rDense = r.toarray()
    threshold = k / 2
    rDense[np.abs(rDense) < threshold] = 0
    
    G = nx.from_numpy_array(rDense)
    return G

# n=200

In [6]:
n = 200
expected_clusters = []
for i in range(4):
    expected_clusters.append(random.randint(int(n ** (1. / 3)),3*int(n ** (1. / 2))))
    
alg_params = {
    "label_propagation": None,
    "leiden": None,
    "significance_communities": None,
    "surprise_communities": None,
    "greedy_modularity": None,
    "paris": None,
    "louvain": {
        "resolution": [0.75, 1.0, 1.25, 1.5],
        "randomize": [314159, 2718]
    },
    "infomap": None,
    "walktrap": None,
    "markov_clustering": {
        "inflation": [1.2, 1.5, 2, 2.5],
        "pruning_threshold": [0.01, 0.001],
        "convergence_check_frequency": [100]
    },
    "em": {
        "k": list(expected_clusters)
    },
    "sbm_dl": None,
    "spinglass": {
        "spins": list(expected_clusters)
    },
    "ricci_community": {
        "alpha": [0.3, 0.5, 0.6, 0.75]
    }
}

clustering_enumeration = []
count = 0
for alg, params in alg_params.items():
    param_combinations = []
    param_names = []
    if params is not None:
        iterables = []
        param_names = []
        for param in params.keys():
            iterables.append(list(params[param]))
            param_names.append(param)
        param_combinations = list(itertools.product(*iterables))
    if len(param_combinations) > 0:
        for param_combination in param_combinations:
            expr = "algorithms."+alg+"(G"
            for i in range(len(param_names)):
                expr = expr + "," + param_names[i] + "=" + str(param_combination[i])
            expr = expr + ")"
            clustering_enumeration.append((expr,count))
            count = count + 1      
    else:
        expr = "algorithms."+alg+"(G)"
        clustering_enumeration.append((expr,count))
        count = count + 1
        
print(clustering_enumeration)

[('algorithms.label_propagation(G)', 0), ('algorithms.leiden(G)', 1), ('algorithms.significance_communities(G)', 2), ('algorithms.surprise_communities(G)', 3), ('algorithms.greedy_modularity(G)', 4), ('algorithms.paris(G)', 5), ('algorithms.louvain(G,resolution=0.75,randomize=314159)', 6), ('algorithms.louvain(G,resolution=0.75,randomize=2718)', 7), ('algorithms.louvain(G,resolution=1.0,randomize=314159)', 8), ('algorithms.louvain(G,resolution=1.0,randomize=2718)', 9), ('algorithms.louvain(G,resolution=1.25,randomize=314159)', 10), ('algorithms.louvain(G,resolution=1.25,randomize=2718)', 11), ('algorithms.louvain(G,resolution=1.5,randomize=314159)', 12), ('algorithms.louvain(G,resolution=1.5,randomize=2718)', 13), ('algorithms.infomap(G)', 14), ('algorithms.walktrap(G)', 15), ('algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.01,convergence_check_frequency=100)', 16), ('algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.001,convergence_check_frequency=100)

In [5]:
import random

n = 200
fileprefix = "LFR/" + "n" + str(n) + "/"
mus = [1, 2, 3, 4]
#mus = [4]
gammas = [30]
betas = [11]
for mu in mus:
    for gamma in gammas:
        for beta in betas:
            P_list = []
            fname = "LFR_n" + str(n) + "_mu0" + str(mu) + "_gamma" + str(gamma) + "_beta" + str(beta)
            graph_file = fileprefix + fname + ".mtx"
            print(graph_file)
            G = None
            with open(graph_file) as f:
                G = nx.from_scipy_sparse_array(spio.mmread(f), create_using=nx.Graph)
                coms = None
                new_adj_mat = nx.to_numpy_array(G)
                old_adj_mat = np.zeros(new_adj_mat.shape)
                diff_mat = old_adj_mat - new_adj_mat
                old_adj_mat = np.array(new_adj_mat)
                norm = np.linalg.norm(diff_mat)
                P_star = None
                for it in range(20):
                    P_list = []
                    if it > 0:
                        for k in clustering_enumeration:
                            try:
                                coms = eval(k[0])
                                print("mu:", mu, "it:", it, k[0], len(coms.communities))
                                P_list.append({"graph": nx.Graph(G), "partition": list(coms.communities)})
                                #stats.append({"mu": mu, "it": it, "norm": norm, "alg": k[0], "ncluster": len(coms.communities)})
                                count = count + 1
                            except:
                                print("UNSUCCESSFUL", k[0])
                    else:
                        for k in clustering_enumeration:
                            clust_file = fileprefix + fname + "." + str(k[1])
                            if Path(clust_file).is_file():
                                partition = read_clust_lst(clust_file)
                                print("mu:", mu, "it:", it, k[0], len(partition))
                                P_list.append({"graph": nx.Graph(G), "partition": list(partition)})
                                #stats.append({"mu": mu, "it": it, "norm": norm, "alg": k[0], "ncluster": len(partition)})
                    G = prep_consensus_graph(P_list)
                    
                    new_adj_mat = nx.to_numpy_array(G)
                    diff_mat = old_adj_mat - new_adj_mat
                    norm = np.linalg.norm(diff_mat)
                    old_adj_mat = np.array(new_adj_mat)
                    #stats.append({"mu": mu, "it": it, "norm": norm, "alg": "lf-louvain", "ncluster": len(P_star["partition"])})
                    print("mu:", mu, "it:", it, "norm:", norm)
                    P_star = P_list[0]
                    if norm < 1e-3:
                        print("Converged")
                        print("---")
                        break
                    print("---")
                        
                write_clust_lst(P_star["partition"], fileprefix + fname + "." + cons_name)
        
#df = pd.DataFrame(stats)
#df.to_csv("benchmark-lf-convergence-multi-alg.csv", index=False)

LFR/n200/LFR_n200_mu01_gamma30_beta11.mtx
mu: 1 it: 0 algorithms.label_propagation(G) 20
mu: 1 it: 0 algorithms.leiden(G) 12
mu: 1 it: 0 algorithms.significance_communities(G) 26
mu: 1 it: 0 algorithms.surprise_communities(G) 199
mu: 1 it: 0 algorithms.greedy_modularity(G) 10
mu: 1 it: 0 algorithms.paris(G) 15
mu: 1 it: 0 algorithms.louvain(G,resolution=0.75,randomize=314159) 12
mu: 1 it: 0 algorithms.louvain(G,resolution=0.75,randomize=2718) 12
mu: 1 it: 0 algorithms.louvain(G,resolution=1.0,randomize=314159) 12
mu: 1 it: 0 algorithms.louvain(G,resolution=1.0,randomize=2718) 12
mu: 1 it: 0 algorithms.louvain(G,resolution=1.25,randomize=314159) 14
mu: 1 it: 0 algorithms.louvain(G,resolution=1.25,randomize=2718) 14
mu: 1 it: 0 algorithms.louvain(G,resolution=1.5,randomize=314159) 14
mu: 1 it: 0 algorithms.louvain(G,resolution=1.5,randomize=2718) 14
mu: 1 it: 0 algorithms.infomap(G) 16
mu: 1 it: 0 algorithms.walktrap(G) 15
mu: 1 it: 0 algorithms.markov_clustering(G,inflation=1.2,pruning_

mu: 1 it: 3 algorithms.infomap(G) 15
mu: 1 it: 3 algorithms.walktrap(G) 15
mu: 1 it: 3 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.01,convergence_check_frequency=100) 15
mu: 1 it: 3 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.001,convergence_check_frequency=100) 15
mu: 1 it: 3 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.01,convergence_check_frequency=100) 15
mu: 1 it: 3 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.001,convergence_check_frequency=100) 15
mu: 1 it: 3 algorithms.markov_clustering(G,inflation=2,pruning_threshold=0.01,convergence_check_frequency=100) 15
mu: 1 it: 3 algorithms.markov_clustering(G,inflation=2,pruning_threshold=0.001,convergence_check_frequency=100) 15
mu: 1 it: 3 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.01,convergence_check_frequency=100) 17
mu: 1 it: 3 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.001,convergence_check_frequency=100) 

mu: 1 it: 6 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.001,convergence_check_frequency=100) 16
mu: 1 it: 6 algorithms.em(G,k=11) 3
mu: 1 it: 6 algorithms.em(G,k=41) 7
mu: 1 it: 6 algorithms.em(G,k=40) 9
mu: 1 it: 6 algorithms.em(G,k=12) 3
mu: 1 it: 6 algorithms.sbm_dl(G) 13
UNSUCCESSFUL algorithms.spinglass(G,spins=11)
UNSUCCESSFUL algorithms.spinglass(G,spins=41)
UNSUCCESSFUL algorithms.spinglass(G,spins=40)
UNSUCCESSFUL algorithms.spinglass(G,spins=12)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.3)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.5)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.6)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.75)
mu: 1 it: 6 norm: 48.33218389437829
---
mu: 1 it: 7 algorithms.label_propagation(G) 15
mu: 1 it: 7 algorithms.leiden(G) 15
mu: 1 it: 7 algorithms.significance_communities(G) 15
mu: 1 it: 7 algorithms.surprise_communities(G) 15
mu: 1 it: 7 algorithms.greedy_modularity(G) 15
mu: 1 it: 7 algorithms.paris(G) 

mu: 1 it: 10 algorithms.paris(G) 15
mu: 1 it: 10 algorithms.louvain(G,resolution=0.75,randomize=314159) 15
mu: 1 it: 10 algorithms.louvain(G,resolution=0.75,randomize=2718) 15
mu: 1 it: 10 algorithms.louvain(G,resolution=1.0,randomize=314159) 15
mu: 1 it: 10 algorithms.louvain(G,resolution=1.0,randomize=2718) 15
mu: 1 it: 10 algorithms.louvain(G,resolution=1.25,randomize=314159) 15
mu: 1 it: 10 algorithms.louvain(G,resolution=1.25,randomize=2718) 15
mu: 1 it: 10 algorithms.louvain(G,resolution=1.5,randomize=314159) 15
mu: 1 it: 10 algorithms.louvain(G,resolution=1.5,randomize=2718) 15
mu: 1 it: 10 algorithms.infomap(G) 15
mu: 1 it: 10 algorithms.walktrap(G) 15
mu: 1 it: 10 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.01,convergence_check_frequency=100) 15
mu: 1 it: 10 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.001,convergence_check_frequency=100) 15
mu: 1 it: 10 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.01,convergence_che

mu: 1 it: 13 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.01,convergence_check_frequency=100) 15
mu: 1 it: 13 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.001,convergence_check_frequency=100) 15
mu: 1 it: 13 algorithms.markov_clustering(G,inflation=2,pruning_threshold=0.01,convergence_check_frequency=100) 15
mu: 1 it: 13 algorithms.markov_clustering(G,inflation=2,pruning_threshold=0.001,convergence_check_frequency=100) 15
mu: 1 it: 13 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.01,convergence_check_frequency=100) 17
mu: 1 it: 13 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.001,convergence_check_frequency=100) 17
mu: 1 it: 13 algorithms.em(G,k=11) 8
mu: 1 it: 13 algorithms.em(G,k=41) 8
mu: 1 it: 13 algorithms.em(G,k=40) 10
mu: 1 it: 13 algorithms.em(G,k=12) 3
mu: 1 it: 13 algorithms.sbm_dl(G) 13
UNSUCCESSFUL algorithms.spinglass(G,spins=11)
UNSUCCESSFUL algorithms.spinglass(G,spins=41)
UNSUCCESSFUL algorith

UNSUCCESSFUL algorithms.spinglass(G,spins=41)
UNSUCCESSFUL algorithms.spinglass(G,spins=40)
UNSUCCESSFUL algorithms.spinglass(G,spins=12)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.3)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.5)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.6)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.75)
mu: 1 it: 16 norm: 51.84592558726288
---
mu: 1 it: 17 algorithms.label_propagation(G) 15
mu: 1 it: 17 algorithms.leiden(G) 15
mu: 1 it: 17 algorithms.significance_communities(G) 15
mu: 1 it: 17 algorithms.surprise_communities(G) 15
mu: 1 it: 17 algorithms.greedy_modularity(G) 15
mu: 1 it: 17 algorithms.paris(G) 15
mu: 1 it: 17 algorithms.louvain(G,resolution=0.75,randomize=314159) 15
mu: 1 it: 17 algorithms.louvain(G,resolution=0.75,randomize=2718) 15
mu: 1 it: 17 algorithms.louvain(G,resolution=1.0,randomize=314159) 15
mu: 1 it: 17 algorithms.louvain(G,resolution=1.0,randomize=2718) 15
mu: 1 it: 17 algorithms.louvain(G,resolution=1.25,rando

mu: 2 it: 0 algorithms.walktrap(G) 16
mu: 2 it: 0 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.01,convergence_check_frequency=100) 9
mu: 2 it: 0 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.001,convergence_check_frequency=100) 1
mu: 2 it: 0 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.01,convergence_check_frequency=100) 16
mu: 2 it: 0 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.001,convergence_check_frequency=100) 14
mu: 2 it: 0 algorithms.markov_clustering(G,inflation=2,pruning_threshold=0.01,convergence_check_frequency=100) 18
mu: 2 it: 0 algorithms.markov_clustering(G,inflation=2,pruning_threshold=0.001,convergence_check_frequency=100) 18
mu: 2 it: 0 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.01,convergence_check_frequency=100) 45
mu: 2 it: 0 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.001,convergence_check_frequency=100) 43
mu: 2 it: 0 algorithms.em(G,k=11) 6


UNSUCCESSFUL algorithms.spinglass(G,spins=41)
UNSUCCESSFUL algorithms.spinglass(G,spins=40)
UNSUCCESSFUL algorithms.spinglass(G,spins=12)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.3)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.5)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.6)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.75)
mu: 2 it: 3 norm: 52.72570530585627
---
mu: 2 it: 4 algorithms.label_propagation(G) 16
mu: 2 it: 4 algorithms.leiden(G) 16
mu: 2 it: 4 algorithms.significance_communities(G) 16
mu: 2 it: 4 algorithms.surprise_communities(G) 16
mu: 2 it: 4 algorithms.greedy_modularity(G) 16
mu: 2 it: 4 algorithms.paris(G) 16
mu: 2 it: 4 algorithms.louvain(G,resolution=0.75,randomize=314159) 16
mu: 2 it: 4 algorithms.louvain(G,resolution=0.75,randomize=2718) 16
mu: 2 it: 4 algorithms.louvain(G,resolution=1.0,randomize=314159) 16
mu: 2 it: 4 algorithms.louvain(G,resolution=1.0,randomize=2718) 16
mu: 2 it: 4 algorithms.louvain(G,resolution=1.25,randomize=314159)

mu: 2 it: 7 algorithms.infomap(G) 16
mu: 2 it: 7 algorithms.walktrap(G) 16
mu: 2 it: 7 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.01,convergence_check_frequency=100) 16
mu: 2 it: 7 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.001,convergence_check_frequency=100) 16
mu: 2 it: 7 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.01,convergence_check_frequency=100) 16
mu: 2 it: 7 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.001,convergence_check_frequency=100) 16
mu: 2 it: 7 algorithms.markov_clustering(G,inflation=2,pruning_threshold=0.01,convergence_check_frequency=100) 16
mu: 2 it: 7 algorithms.markov_clustering(G,inflation=2,pruning_threshold=0.001,convergence_check_frequency=100) 16
mu: 2 it: 7 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.01,convergence_check_frequency=100) 16
mu: 2 it: 7 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.001,convergence_check_frequency=100) 

mu: 2 it: 10 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.001,convergence_check_frequency=100) 16
mu: 2 it: 10 algorithms.em(G,k=11) 10
mu: 2 it: 10 algorithms.em(G,k=41) 4
mu: 2 it: 10 algorithms.em(G,k=40) 12
mu: 2 it: 10 algorithms.em(G,k=12) 4
mu: 2 it: 10 algorithms.sbm_dl(G) 12
UNSUCCESSFUL algorithms.spinglass(G,spins=11)
UNSUCCESSFUL algorithms.spinglass(G,spins=41)
UNSUCCESSFUL algorithms.spinglass(G,spins=40)
UNSUCCESSFUL algorithms.spinglass(G,spins=12)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.3)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.5)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.6)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.75)
mu: 2 it: 10 norm: 34.698703145794944
---
mu: 2 it: 11 algorithms.label_propagation(G) 16
mu: 2 it: 11 algorithms.leiden(G) 16
mu: 2 it: 11 algorithms.significance_communities(G) 16
mu: 2 it: 11 algorithms.surprise_communities(G) 16
mu: 2 it: 11 algorithms.greedy_modularity(G) 16
mu: 2 it: 11 algo

mu: 2 it: 14 algorithms.paris(G) 16
mu: 2 it: 14 algorithms.louvain(G,resolution=0.75,randomize=314159) 16
mu: 2 it: 14 algorithms.louvain(G,resolution=0.75,randomize=2718) 16
mu: 2 it: 14 algorithms.louvain(G,resolution=1.0,randomize=314159) 16
mu: 2 it: 14 algorithms.louvain(G,resolution=1.0,randomize=2718) 16
mu: 2 it: 14 algorithms.louvain(G,resolution=1.25,randomize=314159) 16
mu: 2 it: 14 algorithms.louvain(G,resolution=1.25,randomize=2718) 16
mu: 2 it: 14 algorithms.louvain(G,resolution=1.5,randomize=314159) 16
mu: 2 it: 14 algorithms.louvain(G,resolution=1.5,randomize=2718) 16
mu: 2 it: 14 algorithms.infomap(G) 16
mu: 2 it: 14 algorithms.walktrap(G) 16
mu: 2 it: 14 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.01,convergence_check_frequency=100) 16
mu: 2 it: 14 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.001,convergence_check_frequency=100) 16
mu: 2 it: 14 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.01,convergence_che

mu: 2 it: 17 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.001,convergence_check_frequency=100) 16
mu: 2 it: 17 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.01,convergence_check_frequency=100) 16
mu: 2 it: 17 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.001,convergence_check_frequency=100) 16
mu: 2 it: 17 algorithms.markov_clustering(G,inflation=2,pruning_threshold=0.01,convergence_check_frequency=100) 16
mu: 2 it: 17 algorithms.markov_clustering(G,inflation=2,pruning_threshold=0.001,convergence_check_frequency=100) 16
mu: 2 it: 17 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.01,convergence_check_frequency=100) 16
mu: 2 it: 17 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.001,convergence_check_frequency=100) 16
mu: 2 it: 17 algorithms.em(G,k=11) 3
mu: 2 it: 17 algorithms.em(G,k=41) 10
mu: 2 it: 17 algorithms.em(G,k=40) 9
mu: 2 it: 17 algorithms.em(G,k=12) 4
mu: 2 it: 17 algorithms.sbm_dl(G

mu: 3 it: 0 norm: 1686.567520142612
---
mu: 3 it: 1 algorithms.label_propagation(G) 15
mu: 3 it: 1 algorithms.leiden(G) 14
mu: 3 it: 1 algorithms.significance_communities(G) 17
mu: 3 it: 1 algorithms.surprise_communities(G) 15
mu: 3 it: 1 algorithms.greedy_modularity(G) 14
mu: 3 it: 1 algorithms.paris(G) 13
mu: 3 it: 1 algorithms.louvain(G,resolution=0.75,randomize=314159) 15
mu: 3 it: 1 algorithms.louvain(G,resolution=0.75,randomize=2718) 15
mu: 3 it: 1 algorithms.louvain(G,resolution=1.0,randomize=314159) 14
mu: 3 it: 1 algorithms.louvain(G,resolution=1.0,randomize=2718) 14
mu: 3 it: 1 algorithms.louvain(G,resolution=1.25,randomize=314159) 15
mu: 3 it: 1 algorithms.louvain(G,resolution=1.25,randomize=2718) 15
mu: 3 it: 1 algorithms.louvain(G,resolution=1.5,randomize=314159) 15
mu: 3 it: 1 algorithms.louvain(G,resolution=1.5,randomize=2718) 15
mu: 3 it: 1 algorithms.infomap(G) 15
mu: 3 it: 1 algorithms.walktrap(G) 14
mu: 3 it: 1 algorithms.markov_clustering(G,inflation=1.2,pruning_thr

mu: 3 it: 4 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.01,convergence_check_frequency=100) 15
mu: 3 it: 4 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.001,convergence_check_frequency=100) 15
mu: 3 it: 4 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.01,convergence_check_frequency=100) 15
mu: 3 it: 4 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.001,convergence_check_frequency=100) 15
mu: 3 it: 4 algorithms.markov_clustering(G,inflation=2,pruning_threshold=0.01,convergence_check_frequency=100) 15
mu: 3 it: 4 algorithms.markov_clustering(G,inflation=2,pruning_threshold=0.001,convergence_check_frequency=100) 15
mu: 3 it: 4 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.01,convergence_check_frequency=100) 16
mu: 3 it: 4 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.001,convergence_check_frequency=100) 16
mu: 3 it: 4 algorithms.em(G,k=11) 5
mu: 3 it: 4 algorithms.em(G,k=41) 8


mu: 3 it: 7 algorithms.em(G,k=12) 3
mu: 3 it: 7 algorithms.sbm_dl(G) 11
UNSUCCESSFUL algorithms.spinglass(G,spins=11)
UNSUCCESSFUL algorithms.spinglass(G,spins=41)
UNSUCCESSFUL algorithms.spinglass(G,spins=40)
UNSUCCESSFUL algorithms.spinglass(G,spins=12)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.3)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.5)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.6)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.75)
mu: 3 it: 7 norm: 40.22437072223753
---
mu: 3 it: 8 algorithms.label_propagation(G) 15
mu: 3 it: 8 algorithms.leiden(G) 15
mu: 3 it: 8 algorithms.significance_communities(G) 15
mu: 3 it: 8 algorithms.surprise_communities(G) 15
mu: 3 it: 8 algorithms.greedy_modularity(G) 15
mu: 3 it: 8 algorithms.paris(G) 15
mu: 3 it: 8 algorithms.louvain(G,resolution=0.75,randomize=314159) 15
mu: 3 it: 8 algorithms.louvain(G,resolution=0.75,randomize=2718) 15
mu: 3 it: 8 algorithms.louvain(G,resolution=1.0,randomize=314159) 15
mu: 3 it: 8 alg

mu: 3 it: 11 algorithms.infomap(G) 15
mu: 3 it: 11 algorithms.walktrap(G) 15
mu: 3 it: 11 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.01,convergence_check_frequency=100) 15
mu: 3 it: 11 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.001,convergence_check_frequency=100) 15
mu: 3 it: 11 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.01,convergence_check_frequency=100) 15
mu: 3 it: 11 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.001,convergence_check_frequency=100) 15
mu: 3 it: 11 algorithms.markov_clustering(G,inflation=2,pruning_threshold=0.01,convergence_check_frequency=100) 15
mu: 3 it: 11 algorithms.markov_clustering(G,inflation=2,pruning_threshold=0.001,convergence_check_frequency=100) 15
mu: 3 it: 11 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.01,convergence_check_frequency=100) 16
mu: 3 it: 11 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.001,convergence_check_frequ

mu: 3 it: 14 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.001,convergence_check_frequency=100) 16
mu: 3 it: 14 algorithms.em(G,k=11) 8
mu: 3 it: 14 algorithms.em(G,k=41) 3
mu: 3 it: 14 algorithms.em(G,k=40) 9
mu: 3 it: 14 algorithms.em(G,k=12) 3
mu: 3 it: 14 algorithms.sbm_dl(G) 10
UNSUCCESSFUL algorithms.spinglass(G,spins=11)
UNSUCCESSFUL algorithms.spinglass(G,spins=41)
UNSUCCESSFUL algorithms.spinglass(G,spins=40)
UNSUCCESSFUL algorithms.spinglass(G,spins=12)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.3)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.5)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.6)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.75)
mu: 3 it: 14 norm: 42.37924020083418
---
mu: 3 it: 15 algorithms.label_propagation(G) 15
mu: 3 it: 15 algorithms.leiden(G) 15
mu: 3 it: 15 algorithms.significance_communities(G) 15
mu: 3 it: 15 algorithms.surprise_communities(G) 15
mu: 3 it: 15 algorithms.greedy_modularity(G) 15
mu: 3 it: 15 algorit

mu: 3 it: 18 algorithms.paris(G) 15
mu: 3 it: 18 algorithms.louvain(G,resolution=0.75,randomize=314159) 15
mu: 3 it: 18 algorithms.louvain(G,resolution=0.75,randomize=2718) 15
mu: 3 it: 18 algorithms.louvain(G,resolution=1.0,randomize=314159) 15
mu: 3 it: 18 algorithms.louvain(G,resolution=1.0,randomize=2718) 15
mu: 3 it: 18 algorithms.louvain(G,resolution=1.25,randomize=314159) 15
mu: 3 it: 18 algorithms.louvain(G,resolution=1.25,randomize=2718) 15
mu: 3 it: 18 algorithms.louvain(G,resolution=1.5,randomize=314159) 15
mu: 3 it: 18 algorithms.louvain(G,resolution=1.5,randomize=2718) 15
mu: 3 it: 18 algorithms.infomap(G) 15
mu: 3 it: 18 algorithms.walktrap(G) 15
mu: 3 it: 18 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.01,convergence_check_frequency=100) 15
mu: 3 it: 18 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.001,convergence_check_frequency=100) 15
mu: 3 it: 18 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.01,convergence_che

mu: 4 it: 1 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.01,convergence_check_frequency=100) 14
mu: 4 it: 1 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.001,convergence_check_frequency=100) 14
mu: 4 it: 1 algorithms.markov_clustering(G,inflation=2,pruning_threshold=0.01,convergence_check_frequency=100) 20
mu: 4 it: 1 algorithms.markov_clustering(G,inflation=2,pruning_threshold=0.001,convergence_check_frequency=100) 20
mu: 4 it: 1 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.01,convergence_check_frequency=100) 24
mu: 4 it: 1 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.001,convergence_check_frequency=100) 24
mu: 4 it: 1 algorithms.em(G,k=11) 9
mu: 4 it: 1 algorithms.em(G,k=41) 14
mu: 4 it: 1 algorithms.em(G,k=40) 18
mu: 4 it: 1 algorithms.em(G,k=12) 11
mu: 4 it: 1 algorithms.sbm_dl(G) 15
UNSUCCESSFUL algorithms.spinglass(G,spins=11)
UNSUCCESSFUL algorithms.spinglass(G,spins=41)
UNSUCCESSFUL algorithms.spingl

UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.3)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.5)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.6)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.75)
mu: 4 it: 4 norm: 50.79370039680118
---
mu: 4 it: 5 algorithms.label_propagation(G) 14
mu: 4 it: 5 algorithms.leiden(G) 14
mu: 4 it: 5 algorithms.significance_communities(G) 14
mu: 4 it: 5 algorithms.surprise_communities(G) 14
mu: 4 it: 5 algorithms.greedy_modularity(G) 14
mu: 4 it: 5 algorithms.paris(G) 14
mu: 4 it: 5 algorithms.louvain(G,resolution=0.75,randomize=314159) 14
mu: 4 it: 5 algorithms.louvain(G,resolution=0.75,randomize=2718) 14
mu: 4 it: 5 algorithms.louvain(G,resolution=1.0,randomize=314159) 14
mu: 4 it: 5 algorithms.louvain(G,resolution=1.0,randomize=2718) 14
mu: 4 it: 5 algorithms.louvain(G,resolution=1.25,randomize=314159) 14
mu: 4 it: 5 algorithms.louvain(G,resolution=1.25,randomize=2718) 14
mu: 4 it: 5 algorithms.louvain(G,resolution=1.5,randomize=314159) 

mu: 4 it: 8 algorithms.infomap(G) 14
mu: 4 it: 8 algorithms.walktrap(G) 14
mu: 4 it: 8 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.01,convergence_check_frequency=100) 15
mu: 4 it: 8 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.001,convergence_check_frequency=100) 15
mu: 4 it: 8 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.01,convergence_check_frequency=100) 15
mu: 4 it: 8 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.001,convergence_check_frequency=100) 15
mu: 4 it: 8 algorithms.markov_clustering(G,inflation=2,pruning_threshold=0.01,convergence_check_frequency=100) 16
mu: 4 it: 8 algorithms.markov_clustering(G,inflation=2,pruning_threshold=0.001,convergence_check_frequency=100) 16
mu: 4 it: 8 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.01,convergence_check_frequency=100) 16
mu: 4 it: 8 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.001,convergence_check_frequency=100) 

mu: 4 it: 11 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.001,convergence_check_frequency=100) 16
mu: 4 it: 11 algorithms.em(G,k=11) 8
mu: 4 it: 11 algorithms.em(G,k=41) 10
mu: 4 it: 11 algorithms.em(G,k=40) 10
mu: 4 it: 11 algorithms.em(G,k=12) 7
mu: 4 it: 11 algorithms.sbm_dl(G) 11
UNSUCCESSFUL algorithms.spinglass(G,spins=11)
UNSUCCESSFUL algorithms.spinglass(G,spins=41)
UNSUCCESSFUL algorithms.spinglass(G,spins=40)
UNSUCCESSFUL algorithms.spinglass(G,spins=12)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.3)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.5)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.6)
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.75)
mu: 4 it: 11 norm: 43.034869582700026
---
mu: 4 it: 12 algorithms.label_propagation(G) 14
mu: 4 it: 12 algorithms.leiden(G) 14
mu: 4 it: 12 algorithms.significance_communities(G) 14
mu: 4 it: 12 algorithms.surprise_communities(G) 14
mu: 4 it: 12 algorithms.greedy_modularity(G) 14
mu: 4 it: 12 algo

mu: 4 it: 15 algorithms.paris(G) 14
mu: 4 it: 15 algorithms.louvain(G,resolution=0.75,randomize=314159) 14
mu: 4 it: 15 algorithms.louvain(G,resolution=0.75,randomize=2718) 14
mu: 4 it: 15 algorithms.louvain(G,resolution=1.0,randomize=314159) 14
mu: 4 it: 15 algorithms.louvain(G,resolution=1.0,randomize=2718) 14
mu: 4 it: 15 algorithms.louvain(G,resolution=1.25,randomize=314159) 14
mu: 4 it: 15 algorithms.louvain(G,resolution=1.25,randomize=2718) 14
mu: 4 it: 15 algorithms.louvain(G,resolution=1.5,randomize=314159) 14
mu: 4 it: 15 algorithms.louvain(G,resolution=1.5,randomize=2718) 14
mu: 4 it: 15 algorithms.infomap(G) 14
mu: 4 it: 15 algorithms.walktrap(G) 14
mu: 4 it: 15 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.01,convergence_check_frequency=100) 15
mu: 4 it: 15 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.001,convergence_check_frequency=100) 15
mu: 4 it: 15 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.01,convergence_che

mu: 4 it: 18 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.01,convergence_check_frequency=100) 15
mu: 4 it: 18 algorithms.markov_clustering(G,inflation=1.5,pruning_threshold=0.001,convergence_check_frequency=100) 15
mu: 4 it: 18 algorithms.markov_clustering(G,inflation=2,pruning_threshold=0.01,convergence_check_frequency=100) 16
mu: 4 it: 18 algorithms.markov_clustering(G,inflation=2,pruning_threshold=0.001,convergence_check_frequency=100) 16
mu: 4 it: 18 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.01,convergence_check_frequency=100) 16
mu: 4 it: 18 algorithms.markov_clustering(G,inflation=2.5,pruning_threshold=0.001,convergence_check_frequency=100) 16
mu: 4 it: 18 algorithms.em(G,k=11) 5
mu: 4 it: 18 algorithms.em(G,k=41) 8
mu: 4 it: 18 algorithms.em(G,k=40) 12
mu: 4 it: 18 algorithms.em(G,k=12) 6
mu: 4 it: 18 algorithms.sbm_dl(G) 11
UNSUCCESSFUL algorithms.spinglass(G,spins=11)
UNSUCCESSFUL algorithms.spinglass(G,spins=41)
UNSUCCESSFUL algorith

# n=1000

In [7]:
n = 1000
expected_clusters = []
for i in range(4):
    expected_clusters.append(random.randint(int(n ** (1. / 3)),3*int(n ** (1. / 2))))
    
alg_params = {
    "label_propagation": None,
    "leiden": None,
    "significance_communities": None,
    "surprise_communities": None,
    "greedy_modularity": None,
    "paris": None,
    "louvain": {
        "resolution": [0.75, 1.0, 1.25, 1.5],
        "randomize": [314159, 2718]
    },
    "infomap": None,
    "walktrap": None,
    "markov_clustering": {
        "inflation": [1.2, 1.5, 2, 2.5],
        "pruning_threshold": [0.01, 0.001],
        "convergence_check_frequency": [100]
    },
    "em": {
        "k": list(expected_clusters)
    },
    "sbm_dl": None,
    "spinglass": {
        "spins": list(expected_clusters)
    },
    "ricci_community": {
        "alpha": [0.3, 0.5, 0.6, 0.75]
    }
}

clustering_enumeration = []
count = 0
for alg, params in alg_params.items():
    param_combinations = []
    param_names = []
    if params is not None:
        iterables = []
        param_names = []
        for param in params.keys():
            iterables.append(list(params[param]))
            param_names.append(param)
        param_combinations = list(itertools.product(*iterables))
    if len(param_combinations) > 0:
        for param_combination in param_combinations:
            expr = "algorithms."+alg+"(G"
            for i in range(len(param_names)):
                expr = expr + "," + param_names[i] + "=" + str(param_combination[i])
            expr = expr + ")"
            clustering_enumeration.append((expr,count))
            count = count + 1      
    else:
        expr = "algorithms."+alg+"(G)"
        clustering_enumeration.append((expr,count))
        count = count + 1
        
print(clustering_enumeration)

[('algorithms.label_propagation(G)', 0), ('algorithms.leiden(G)', 1), ('algorithms.significance_communities(G)', 2), ('algorithms.surprise_communities(G)', 3), ('algorithms.greedy_modularity(G)', 4), ('algorithms.paris(G)', 5), ('algorithms.louvain(G,resolution=0.75,randomize=314159)', 6), ('algorithms.louvain(G,resolution=0.75,randomize=2718)', 7), ('algorithms.louvain(G,resolution=1.0,randomize=314159)', 8), ('algorithms.louvain(G,resolution=1.0,randomize=2718)', 9), ('algorithms.louvain(G,resolution=1.25,randomize=314159)', 10), ('algorithms.louvain(G,resolution=1.25,randomize=2718)', 11), ('algorithms.louvain(G,resolution=1.5,randomize=314159)', 12), ('algorithms.louvain(G,resolution=1.5,randomize=2718)', 13), ('algorithms.infomap(G)', 14), ('algorithms.walktrap(G)', 15), ('algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.01,convergence_check_frequency=100)', 16), ('algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.001,convergence_check_frequency=100)

# n=5000

In [8]:
n = 5000
expected_clusters = []
for i in range(4):
    expected_clusters.append(random.randint(int(n ** (1. / 3)),3*int(n ** (1. / 2))))
    
alg_params = {
    "label_propagation": None,
    "leiden": None,
    "significance_communities": None,
    "surprise_communities": None,
    "greedy_modularity": None,
    "paris": None,
    "louvain": {
        "resolution": [0.75, 1.0, 1.25, 1.5],
        "randomize": [314159, 2718]
    },
    "infomap": None,
    "walktrap": None,
    "markov_clustering": {
        "inflation": [1.2, 1.5, 2, 2.5],
        "pruning_threshold": [0.01, 0.001],
        "convergence_check_frequency": [100]
    },
    "em": {
        "k": list(expected_clusters)
    },
    "sbm_dl": None,
    "spinglass": {
        "spins": list(expected_clusters)
    },
    "ricci_community": {
        "alpha": [0.3, 0.5, 0.6, 0.75]
    }
}

clustering_enumeration = []
count = 0
for alg, params in alg_params.items():
    param_combinations = []
    param_names = []
    if params is not None:
        iterables = []
        param_names = []
        for param in params.keys():
            iterables.append(list(params[param]))
            param_names.append(param)
        param_combinations = list(itertools.product(*iterables))
    if len(param_combinations) > 0:
        for param_combination in param_combinations:
            expr = "algorithms."+alg+"(G"
            for i in range(len(param_names)):
                expr = expr + "," + param_names[i] + "=" + str(param_combination[i])
            expr = expr + ")"
            clustering_enumeration.append((expr,count))
            count = count + 1      
    else:
        expr = "algorithms."+alg+"(G)"
        clustering_enumeration.append((expr,count))
        count = count + 1
        
print(clustering_enumeration)

[('algorithms.label_propagation(G)', 0), ('algorithms.leiden(G)', 1), ('algorithms.significance_communities(G)', 2), ('algorithms.surprise_communities(G)', 3), ('algorithms.greedy_modularity(G)', 4), ('algorithms.paris(G)', 5), ('algorithms.louvain(G,resolution=0.75,randomize=314159)', 6), ('algorithms.louvain(G,resolution=0.75,randomize=2718)', 7), ('algorithms.louvain(G,resolution=1.0,randomize=314159)', 8), ('algorithms.louvain(G,resolution=1.0,randomize=2718)', 9), ('algorithms.louvain(G,resolution=1.25,randomize=314159)', 10), ('algorithms.louvain(G,resolution=1.25,randomize=2718)', 11), ('algorithms.louvain(G,resolution=1.5,randomize=314159)', 12), ('algorithms.louvain(G,resolution=1.5,randomize=2718)', 13), ('algorithms.infomap(G)', 14), ('algorithms.walktrap(G)', 15), ('algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.01,convergence_check_frequency=100)', 16), ('algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.001,convergence_check_frequency=100)