# Benchmark network analysis

In [1]:
import networkx as nx
import networkx.algorithms.community as nx_comm
from networkx.generators.community import LFR_benchmark_graph
from networkx.algorithms import bipartite
import numpy as np
import scipy as sp
from scipy import sparse
from cdlib import algorithms
from cdlib import evaluation
from utils import *
from distances import *
from consensus import *
import math
import random
import itertools
import sklearn

Note: to be able to use all crisp methods, you need to install some additional packages:  {'ASLPAw'}


## Generate benchmark graphs

- https://arxiv.org/pdf/0805.4770.pdf
- https://networkx.org/documentation/stable/reference/generated/networkx.generators.community.LFR_benchmark_graph.html
- https://stackoverflow.com/questions/53608425/how-tune-lfr-benchmark-graph-method-in-networkx-for-generating-large-graph

In [None]:
import networkx as nx
import networkx.algorithms.community as nx_comm
from networkx.generators.community import LFR_benchmark_graph
from networkx.algorithms import bipartite
import numpy as np
import scipy as sp
from scipy import sparse
n = 1000
"""
Pay attention to the file prefix
Short evaluation of eight algorithms
"""
fileprefix = "LFR/" + "n" + str(n) + "/"
#fileprefix = "LFR/" + "n" + str(n) + "-k8" "/"
mus = [1, 2, 3, 4]
gammas = [30]
betas = [11]
for mu in mus:
    for gamma in gammas:
        for beta in betas:
            fname = "LFR_n" + str(n) + "_mu0" + str(mu) + "_gamma" + str(gamma) + "_beta" + str(beta)
            G = LFR_benchmark_graph(n, (float(gamma) / 10), (float(beta) / 10), (float(mu) / 10), seed=10, min_degree=10, max_degree=50)
            m = nx.to_scipy_sparse_array(G)
            sp.io.mmwrite(fileprefix + fname + ".mtx", m)
            clust_lst = {frozenset(G.nodes[v]["community"]) for v in G}
            #nx.write_edgelist(G, fileprefix + name + ".edgelist", data=False)
            write_clust_lst(clust_lst, fileprefix + fname + ".gt")

## Clustering algorithm parameter configuration

In [6]:
n = 1000
expected_clusters = []
for i in range(4):
    expected_clusters.append(random.randint(int(n ** (1. / 3)),3*int(n ** (1. / 2))))
    
alg_params = {
    "label_propagation": None,
    "leiden": None,
    "significance_communities": None,
    "surprise_communities": None,
    "greedy_modularity": None,
    "paris": None,
    "louvain": {
        "resolution": [0.75, 1.0, 1.25, 1.5],
        "randomize": [314159, 2718]
    },
    "infomap": None,
    "walktrap": None,
    "markov_clustering": {
        "inflation": [1.2, 1.5, 2, 2.5],
        "pruning_threshold": [0.01, 0.001],
        "convergence_check_frequency": [100]
    },
    "em": {
        "k": list(expected_clusters)
    },
    "sbm_dl": None,
    "spinglass": {
        "spins": list(expected_clusters)
    },
    "ricci_community": {
        "alpha": [0.3, 0.5, 0.6, 0.75]
    }
}

In [None]:
import networkx as nx
import networkx.algorithms.community as nx_comm
from networkx.generators.community import LFR_benchmark_graph
from networkx.algorithms import bipartite
import numpy as np
import scipy as sp
from scipy import sparse
from cdlib import algorithms
from cdlib import evaluation

#algs = ["infomap", "louvain", "leiden", "cnm", "label-prop", "markov", "walktrap", "spinglass"]
algs = ["louvain", "leiden", "cnm", "label-prop", "markov", "walktrap", "spinglass"]
n = 1000
fileprefix = "LFR/" + "n" + str(n) + "/"
mus = [1, 2, 3, 4, 5, 6, 7, 8, 9]
gammas = [30]
betas = [11]
for mu in mus:
    for gamma in gammas:
        for beta in betas:
            fname = "LFR_n" + str(n) + "_mu0" + str(mu) + "_gamma" + str(gamma) + "_beta" + str(beta)
            graph_file = fileprefix + fname + ".mtx"
            G = None
            with open(graph_file) as f:
                G = nx.from_scipy_sparse_matrix(spio.mmread(f), create_using=nx.Graph)
                #print(len(G.nodes()), len(G.edges()))
                comms = None
                for alg in algs:
                    print("Running", alg, "on", graph_file)
                    if alg == "infomap":
                        coms = algorithms.infomap(G)
                    elif alg == "markov":
                        coms = algorithms.markov_clustering(G)
                    elif alg == "louvain":
                        coms = algorithms.louvain(G)
                    elif alg == "leiden":
                        coms = algorithms.leiden(G)
                    elif alg == "label-prop":
                        coms = algorithms.label_propagation(G)
                    elif alg == "cnm":
                        coms = algorithms.greedy_modularity(G)
                    elif alg == "walktrap":
                        coms = algorithms.walktrap(G)
                    elif alg == "spinglass":
                        coms = algorithms.spinglass(G)
                    print(mu, alg, len(coms.communities))
                    write_clust_lst(coms.communities, fileprefix + fname + "." + alg)
            #nx.write_edgelist(G, fileprefix + name + ".edgelist", data=False)
            #write_clust_lst(clust_lst, fileprefix + fname + ".gt")

In [7]:
print(alg_params)

{'label_propagation': None, 'leiden': None, 'significance_communities': None, 'surprise_communities': None, 'greedy_modularity': None, 'paris': None, 'louvain': {'resolution': [0.75, 1.0, 1.25, 1.5], 'randomize': [314159, 2718]}, 'infomap': None, 'walktrap': None, 'markov_clustering': {'inflation': [1.2, 1.5, 2, 2.5], 'pruning_threshold': [0.01, 0.001], 'convergence_check_frequency': [100]}, 'em': {'k': [50, 45, 19, 19]}, 'sbm_dl': None, 'spinglass': {'spins': [50, 45, 19, 19]}, 'ricci_community': {'alpha': [0.3, 0.5, 0.6, 0.75]}}


## Run different algorithms on generated benchmark networks

In [8]:
import random

n = 1000
fileprefix = "LFR/" + "n" + str(n) + "/"
mus = [1, 2, 3, 4]
#mus = [1]
gammas = [30]
betas = [11]
for mu in mus:
    for gamma in gammas:
        for beta in betas:
            fname = "LFR_n" + str(n) + "_mu0" + str(mu) + "_gamma" + str(gamma) + "_beta" + str(beta)
            graph_file = fileprefix + fname + ".mtx"
            G = None
            with open(graph_file) as f:
                G = nx.from_scipy_sparse_array(spio.mmread(f), create_using=nx.Graph)
                count = 0
                comms = None
                for alg, params in alg_params.items():
                    param_combinations = []
                    param_names = []
                    if params is not None:
                        iterables = []
                        param_names = []
                        for param in params.keys():
                            iterables.append(list(params[param]))
                            param_names.append(param)
                        param_combinations = list(itertools.product(*iterables))
                    if len(param_combinations) > 0:
                        for param_combination in param_combinations:
                            expr = "algorithms."+alg+"(G"
                            for i in range(len(param_names)):
                                expr = expr + "," + param_names[i] + "=" + str(param_combination[i])
                            expr = expr + ")"
                            try:
                                coms = eval(expr)
                                print(count, expr, len(coms.communities))
                                write_clust_lst(coms.communities, fileprefix + fname + "." + str(count))
                                count = count + 1
                            except:
                                print("UNSUCCESSFUL", expr)       
                    else:
                        expr = "algorithms."+alg+"(G)"
                        try:
                            coms = eval(expr)
                            print(count, expr, len(coms.communities))
                            write_clust_lst(coms.communities, fileprefix + fname + "." + str(count))
                            count = count + 1
                        except:
                            print("UNSUCCESSFUL", expr)

                    #coms = eval()
                #write_clust_lst(coms.communities, fileprefix + fname + "." + alg)

0 algorithms.label_propagation(G) 32
1 algorithms.leiden(G) 37
2 algorithms.significance_communities(G) 38
3 algorithms.surprise_communities(G) 999
4 algorithms.greedy_modularity(G) 25
5 algorithms.paris(G) 38
6 algorithms.louvain(G,resolution=0.75,randomize=314159) 38
7 algorithms.louvain(G,resolution=0.75,randomize=2718) 38
8 algorithms.louvain(G,resolution=1.0,randomize=314159) 37
9 algorithms.louvain(G,resolution=1.0,randomize=2718) 37
10 algorithms.louvain(G,resolution=1.25,randomize=314159) 38
11 algorithms.louvain(G,resolution=1.25,randomize=2718) 38
12 algorithms.louvain(G,resolution=1.5,randomize=314159) 38
13 algorithms.louvain(G,resolution=1.5,randomize=2718) 38
14 algorithms.infomap(G) 38
15 algorithms.walktrap(G) 38
16 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.01,convergence_check_frequency=100) 38
17 algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.001,convergence_check_frequency=100) 37
18 algorithms.markov_clustering(G,inflation=1.

35 algorithms.ricci_community(G,alpha=0.6) 17
UNSUCCESSFUL algorithms.ricci_community(G,alpha=0.75)
