# Benchmark network analysis

In [2]:
import networkx as nx
import networkx.algorithms.community as nx_comm
from networkx.generators.community import LFR_benchmark_graph
from networkx.algorithms import bipartite
import numpy as np
import scipy as sp
from scipy.sparse import coo_array
from scipy import sparse
from cdlib import algorithms
from cdlib import evaluation
import sklearn
from utils import *
from distances import *
from consensus import *
import math
import itertools
import random
from pathlib import Path

# Generate benchmark graphs

- https://arxiv.org/pdf/0805.4770.pdf
- https://networkx.org/documentation/stable/reference/generated/networkx.generators.community.LFR_benchmark_graph.html
- https://stackoverflow.com/questions/53608425/how-tune-lfr-benchmark-graph-method-in-networkx-for-generating-large-graph

## 200 node graph
Parameters:
- Number of nodes $n=200$
- Inter-cluster edge probability $\mu \in \{ 0.1, 0.2, 0.3, 0.4 \}$
- Degree distribution parameter (Power-law exponent) $\gamma = 3.0$
- Community size distribution parameter (Power-law exponent) $\beta = 1.1$
- Minimum degree: $5$
- Maximum degree: $50$

In [11]:
import networkx as nx
import networkx.algorithms.community as nx_comm
from networkx.generators.community import LFR_benchmark_graph
from networkx.algorithms import bipartite
import numpy as np
import scipy as sp
from scipy import sparse
n = 200
fileprefix = "LFR/" + "n" + str(n) + "/"
mus = [1, 2, 3, 4]
gammas = [30]
betas = [11]
for mu in mus:
    for gamma in gammas:
        for beta in betas:
            fname = "LFR_n" + str(n) + "_mu0" + str(mu) + "_gamma" + str(gamma) + "_beta" + str(beta)
            G = LFR_benchmark_graph(n, (float(gamma) / 10), (float(beta) / 10), (float(mu) / 10), seed=10, min_degree=5, max_degree=50)
            m = nx.to_scipy_sparse_array(G)
            sp.io.mmwrite(fileprefix + fname + ".mtx", m)
            clust_lst = {frozenset(G.nodes[v]["community"]) for v in G}
            #nx.write_edgelist(G, fileprefix + name + ".edgelist", data=False)
            write_clust_lst(clust_lst, fileprefix + fname + ".gt")

### Clustering algorithm parameter configuration

In [14]:
n = 200
expected_clusters = []
for i in range(4):
    expected_clusters.append(random.randint(int(n ** (1. / 3)),3*int(n ** (1. / 2))))
    
alg_params = {
    "label_propagation": None,
    "leiden": None,
    "significance_communities": None,
    "surprise_communities": None,
    "greedy_modularity": None,
    "paris": None,
    "louvain": {
        "resolution": [0.75, 1.0, 1.25, 1.5],
        "randomize": [314159, 2718]
    },
    "infomap": None,
    "walktrap": None,
    "markov_clustering": {
        "inflation": [1.2, 1.5, 2, 2.5],
        "pruning_threshold": [0.01, 0.001],
        "convergence_check_frequency": [100]
    },
    "em": {
        "k": list(expected_clusters)
    },
    "sbm_dl": None,
    "spinglass": {
        "spins": list(expected_clusters)
    },
    "ricci_community": {
        "alpha": [0.3, 0.5, 0.6, 0.75]
    }
}

### Run different algorithms on generated benchmark networks

In [15]:
n = 200
fileprefix = "LFR/" + "n" + str(n) + "/"
mus = [1, 2, 3, 4]
#mus = [1]
gammas = [30]
betas = [11]
for mu in mus:
    for gamma in gammas:
        for beta in betas:
            fname = "LFR_n" + str(n) + "_mu0" + str(mu) + "_gamma" + str(gamma) + "_beta" + str(beta)
            graph_file = fileprefix + fname + ".mtx"
            G = None
            with open(graph_file) as f:
                G = nx.from_scipy_sparse_array(spio.mmread(f), create_using=nx.Graph)
                count = 0
                comms = None
                for alg, params in alg_params.items():
                    param_combinations = []
                    param_names = []
                    if params is not None:
                        iterables = []
                        param_names = []
                        for param in params.keys():
                            iterables.append(list(params[param]))
                            param_names.append(param)
                        param_combinations = list(itertools.product(*iterables))
                    if len(param_combinations) > 0:
                        for param_combination in param_combinations:
                            expr = "algorithms."+alg+"(G"
                            for i in range(len(param_names)):
                                expr = expr + "," + param_names[i] + "=" + str(param_combination[i])
                            expr = expr + ")"
                            try:
                                coms = eval(expr)
                                print(count, expr, len(coms.communities))
                                write_clust_lst(coms.communities, fileprefix + fname + "." + str(count))
                                count = count + 1
                            except:
                                print("UNSUCCESSFUL", expr)       
                    else:
                        expr = "algorithms."+alg+"(G)"
                        try:
                            coms = eval(expr)
                            print(count, expr, len(coms.communities))
                            write_clust_lst(coms.communities, fileprefix + fname + "." + str(count))
                            count = count + 1
                        except:
                            print("UNSUCCESSFUL", expr)

                    #coms = eval()
                #write_clust_lst(coms.communities, fileprefix + fname + "." + alg)

0 algorithms.label_propagation(G) 20
1 algorithms.leiden(G) 12
2 algorithms.significance_communities(G) 26
3 algorithms.surprise_communities(G) 199
4 algorithms.greedy_modularity(G) 10
5 algorithms.paris(G) 15
6 algorithms.louvain(G,resolution=0.75,randomize=314159) 12
7 algorithms.louvain(G,resolution=0.75,randomize=2718) 12
8 algorithms.louvain(G,resolution=1.0,randomize=314159) 12
9 algorithms.louvain(G,resolution=1.0,randomize=2718) 12
10 algorithms.louvain(G,resolution=1.25,randomize=314159) 14
11 algorithms.louvain(G,resolution=1.25,randomize=2718) 14
12 algorithms.louvain(G,resolution=1.5,randomize=314159) 14
13 algorithms.louvain(G,resolution=1.5,randomize=2718) 14
14 algorithms.infomap(G) 16
15 algorithms.walktrap(G) 15
16 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.01,convergence_check_frequency=100) 12
17 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.001,convergence_check_frequency=100) 1
18 algorithms.markov_clustering(G,inflation=1.5

36 algorithms.ricci_community(G,alpha=0.75) 5


## 1000 node graph
Parameters:
- Number of nodes $n=1000$
- Inter-cluster edge probability $\mu \in \{ 0.1, 0.2, 0.3, 0.4 \}$
- Degree distribution parameter (Power-law exponent) $\gamma = 3.0$
- Community size distribution parameter (Power-law exponent) $\beta = 1.1$
- Minimum degree: $10$
- Maximum degree: $50$

In [17]:
import networkx as nx
import networkx.algorithms.community as nx_comm
from networkx.generators.community import LFR_benchmark_graph
from networkx.algorithms import bipartite
import numpy as np
import scipy as sp
from scipy import sparse
n = 1000
fileprefix = "LFR/" + "n" + str(n) + "/"
mus = [1, 2, 3, 4]
gammas = [30]
betas = [11]
for mu in mus:
    for gamma in gammas:
        for beta in betas:
            fname = "LFR_n" + str(n) + "_mu0" + str(mu) + "_gamma" + str(gamma) + "_beta" + str(beta)
            G = LFR_benchmark_graph(n, (float(gamma) / 10), (float(beta) / 10), (float(mu) / 10), seed=10, min_degree=10, max_degree=50)
            m = nx.to_scipy_sparse_array(G)
            sp.io.mmwrite(fileprefix + fname + ".mtx", m)
            clust_lst = {frozenset(G.nodes[v]["community"]) for v in G}
            #nx.write_edgelist(G, fileprefix + name + ".edgelist", data=False)
            write_clust_lst(clust_lst, fileprefix + fname + ".gt")

### Clustering algorithm parameter configuration

In [18]:
n = 1000
expected_clusters = []
for i in range(4):
    expected_clusters.append(random.randint(int(n ** (1. / 3)),3*int(n ** (1. / 2))))
    
alg_params = {
    "label_propagation": None,
    "leiden": None,
    "significance_communities": None,
    "surprise_communities": None,
    "greedy_modularity": None,
    "paris": None,
    "louvain": {
        "resolution": [0.75, 1.0, 1.25, 1.5],
        "randomize": [314159, 2718]
    },
    "infomap": None,
    "walktrap": None,
    "markov_clustering": {
        "inflation": [1.2, 1.5, 2, 2.5],
        "pruning_threshold": [0.01, 0.001],
        "convergence_check_frequency": [100]
    },
    "em": {
        "k": list(expected_clusters)
    },
    "sbm_dl": None,
    "spinglass": {
        "spins": list(expected_clusters)
    },
    "ricci_community": {
        "alpha": [0.3, 0.5, 0.6, 0.75]
    }
}

### Run different algorithms on generated benchmark networks

In [19]:
n = 1000
fileprefix = "LFR/" + "n" + str(n) + "/"
mus = [1, 2, 3, 4]
#mus = [1]
gammas = [30]
betas = [11]
for mu in mus:
    for gamma in gammas:
        for beta in betas:
            fname = "LFR_n" + str(n) + "_mu0" + str(mu) + "_gamma" + str(gamma) + "_beta" + str(beta)
            graph_file = fileprefix + fname + ".mtx"
            G = None
            with open(graph_file) as f:
                G = nx.from_scipy_sparse_array(spio.mmread(f), create_using=nx.Graph)
                count = 0
                comms = None
                for alg, params in alg_params.items():
                    param_combinations = []
                    param_names = []
                    if params is not None:
                        iterables = []
                        param_names = []
                        for param in params.keys():
                            iterables.append(list(params[param]))
                            param_names.append(param)
                        param_combinations = list(itertools.product(*iterables))
                    if len(param_combinations) > 0:
                        for param_combination in param_combinations:
                            expr = "algorithms."+alg+"(G"
                            for i in range(len(param_names)):
                                expr = expr + "," + param_names[i] + "=" + str(param_combination[i])
                            expr = expr + ")"
                            try:
                                coms = eval(expr)
                                print(count, expr, len(coms.communities))
                                write_clust_lst(coms.communities, fileprefix + fname + "." + str(count))
                                count = count + 1
                            except:
                                print("UNSUCCESSFUL", expr)       
                    else:
                        expr = "algorithms."+alg+"(G)"
                        try:
                            coms = eval(expr)
                            print(count, expr, len(coms.communities))
                            write_clust_lst(coms.communities, fileprefix + fname + "." + str(count))
                            count = count + 1
                        except:
                            print("UNSUCCESSFUL", expr)

                    #coms = eval()
                #write_clust_lst(coms.communities, fileprefix + fname + "." + alg)
    print("---")

0 algorithms.label_propagation(G) 32
1 algorithms.leiden(G) 37
2 algorithms.significance_communities(G) 38
3 algorithms.surprise_communities(G) 999
4 algorithms.greedy_modularity(G) 25
5 algorithms.paris(G) 38
6 algorithms.louvain(G,resolution=0.75,randomize=314159) 38
7 algorithms.louvain(G,resolution=0.75,randomize=2718) 38
8 algorithms.louvain(G,resolution=1.0,randomize=314159) 37
9 algorithms.louvain(G,resolution=1.0,randomize=2718) 37
10 algorithms.louvain(G,resolution=1.25,randomize=314159) 38
11 algorithms.louvain(G,resolution=1.25,randomize=2718) 38
12 algorithms.louvain(G,resolution=1.5,randomize=314159) 38
13 algorithms.louvain(G,resolution=1.5,randomize=2718) 38
14 algorithms.infomap(G) 38
15 algorithms.walktrap(G) 38
16 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.01,convergence_check_frequency=100) 38
17 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.001,convergence_check_frequency=100) 37
18 algorithms.markov_clustering(G,inflation=1.

35 algorithms.ricci_community(G,alpha=0.6) 17
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.75)
---


## 5000 node graph
Parameters:
- Number of nodes $n=5000$
- Inter-cluster edge probability $\mu \in \{ 0.1, 0.2, 0.3, 0.4 \}$
- Degree distribution parameter (Power-law exponent) $\gamma = 3.0$
- Community size distribution parameter (Power-law exponent) $\beta = 1.1$
- Minimum degree: $10$
- Maximum degree: $50$

In [13]:
import networkx as nx
import networkx.algorithms.community as nx_comm
from networkx.generators.community import LFR_benchmark_graph
from networkx.algorithms import bipartite
import numpy as np
import scipy as sp
from scipy import sparse
n = 5000
fileprefix = "LFR/" + "n" + str(n) + "/"
mus = [1, 2, 3, 4]
gammas = [30]
betas = [11]
for mu in mus:
    for gamma in gammas:
        for beta in betas:
            fname = "LFR_n" + str(n) + "_mu0" + str(mu) + "_gamma" + str(gamma) + "_beta" + str(beta)
            G = LFR_benchmark_graph(n, (float(gamma) / 10), (float(beta) / 10), (float(mu) / 10), seed=10, min_degree=10, max_degree=50)
            m = nx.to_scipy_sparse_array(G)
            sp.io.mmwrite(fileprefix + fname + ".mtx", m)
            clust_lst = {frozenset(G.nodes[v]["community"]) for v in G}
            #nx.write_edgelist(G, fileprefix + name + ".edgelist", data=False)
            write_clust_lst(clust_lst, fileprefix + fname + ".gt")

## Clustering algorithm parameter configuration

In [20]:
n = 5000
expected_clusters = []
for i in range(4):
    expected_clusters.append(random.randint(int(n ** (1. / 3)),3*int(n ** (1. / 2))))
    
alg_params = {
    "label_propagation": None,
    "leiden": None,
    "significance_communities": None,
    "surprise_communities": None,
    "greedy_modularity": None,
    "paris": None,
    "louvain": {
        "resolution": [0.75, 1.0, 1.25, 1.5],
        "randomize": [314159, 2718]
    },
    "infomap": None,
    "walktrap": None,
    "markov_clustering": {
        "inflation": [1.2, 1.5, 2, 2.5],
        "pruning_threshold": [0.01, 0.001],
        "convergence_check_frequency": [100]
    },
    "em": {
        "k": list(expected_clusters)
    },
    "sbm_dl": None,
    "spinglass": {
        "spins": list(expected_clusters)
    },
    "ricci_community": {
        "alpha": [0.3, 0.5, 0.6, 0.75]
    }
}

## Run different algorithms on generated benchmark networks

In [21]:
import random

n = 5000
fileprefix = "LFR/" + "n" + str(n) + "/"
mus = [1, 2, 3, 4]
#mus = [1]
gammas = [30]
betas = [11]
for mu in mus:
    for gamma in gammas:
        for beta in betas:
            fname = "LFR_n" + str(n) + "_mu0" + str(mu) + "_gamma" + str(gamma) + "_beta" + str(beta)
            graph_file = fileprefix + fname + ".mtx"
            G = None
            with open(graph_file) as f:
                G = nx.from_scipy_sparse_array(spio.mmread(f), create_using=nx.Graph)
                count = 0
                comms = None
                for alg, params in alg_params.items():
                    param_combinations = []
                    param_names = []
                    if params is not None:
                        iterables = []
                        param_names = []
                        for param in params.keys():
                            iterables.append(list(params[param]))
                            param_names.append(param)
                        param_combinations = list(itertools.product(*iterables))
                    if len(param_combinations) > 0:
                        for param_combination in param_combinations:
                            expr = "algorithms."+alg+"(G"
                            for i in range(len(param_names)):
                                expr = expr + "," + param_names[i] + "=" + str(param_combination[i])
                            expr = expr + ")"
                            try:
                                coms = eval(expr)
                                print(count, expr, len(coms.communities))
                                write_clust_lst(coms.communities, fileprefix + fname + "." + str(count))
                                count = count + 1
                            except:
                                print("UNSUCCESSFUL", expr)       
                    else:
                        expr = "algorithms."+alg+"(G)"
                        try:
                            coms = eval(expr)
                            print(count, expr, len(coms.communities))
                            write_clust_lst(coms.communities, fileprefix + fname + "." + str(count))
                            count = count + 1
                        except:
                            print("UNSUCCESSFUL", expr)

                    #coms = eval()
                #write_clust_lst(coms.communities, fileprefix + fname + "." + alg)
    print("---")

0 algorithms.label_propagation(G) 201
1 algorithms.leiden(G) 113
2 algorithms.significance_communities(G) 201
3 algorithms.surprise_communities(G) 4999
4 algorithms.greedy_modularity(G) 66
5 algorithms.paris(G) 197
6 algorithms.louvain(G,resolution=0.75,randomize=314159) 97
7 algorithms.louvain(G,resolution=0.75,randomize=2718) 98
8 algorithms.louvain(G,resolution=1.0,randomize=314159) 114
9 algorithms.louvain(G,resolution=1.0,randomize=2718) 112
10 algorithms.louvain(G,resolution=1.25,randomize=314159) 127
11 algorithms.louvain(G,resolution=1.25,randomize=2718) 129
12 algorithms.louvain(G,resolution=1.5,randomize=314159) 134
13 algorithms.louvain(G,resolution=1.5,randomize=2718) 134
14 algorithms.infomap(G) 201
15 algorithms.walktrap(G) 200
16 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.01,convergence_check_frequency=100) 201
17 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.001,convergence_check_frequency=100) 200
18 algorithms.markov_clustering

32 algorithms.spinglass(G,spins=34) 34
33 algorithms.ricci_community(G,alpha=0.3) 338
34 algorithms.ricci_community(G,alpha=0.5) 410
35 algorithms.ricci_community(G,alpha=0.6) 414
36 algorithms.ricci_community(G,alpha=0.75) 420
---
