In [4]:
import networkx as nx
import networkx.algorithms.community as nx_comm
from networkx.generators.community import LFR_benchmark_graph
from networkx.algorithms import bipartite
import numpy as np
import scipy as sp
from scipy.sparse import coo_array
from scipy import sparse
from cdlib import algorithms
from cdlib import evaluation
import sklearn
from utils import *
from distances import *
from consensus import *
import math
import itertools
import random
import time
from pathlib import Path

In [2]:
cons_name = "hbgf"

In [3]:
def hbgf_consensus(P_list):
    k = len(P_list)
    n = 0
    for cluster in P_list[0]["partition"]:
        n =  n + len(cluster)
    label_matrix = np.full((n, k), -1)
    for e in range(k):
        P = P_list[e]
        clust_asn = clust_lst_to_asn(P["partition"], nelem=n)
        label_matrix[:,e] = np.array(clust_asn)
    cons_asn = CE.cluster_ensembles(np.transpose(label_matrix), solver="hbgf")
    cons_lst = clust_asn_to_lst(cons_asn)
    P_star = {"graph": None, "partition": list(cons_lst)}
    return P_star

# n=200

In [6]:
n = 200
expected_clusters = []
for i in range(4):
    expected_clusters.append(random.randint(int(n ** (1. / 3)),3*int(n ** (1. / 2))))
    
alg_params = {
    "label_propagation": None,
    "leiden": None,
    "significance_communities": None,
    "surprise_communities": None,
    "greedy_modularity": None,
    "paris": None,
    "louvain": {
        "resolution": [0.75, 1.0, 1.25, 1.5],
        "randomize": [314159, 2718]
    },
    "infomap": None,
    "walktrap": None,
    "markov_clustering": {
        "inflation": [1.2, 1.5, 2, 2.5],
        "pruning_threshold": [0.01, 0.001],
        "convergence_check_frequency": [100]
    },
    "em": {
        "k": list(expected_clusters)
    },
    "sbm_dl": None,
    "spinglass": {
        "spins": list(expected_clusters)
    },
    "ricci_community": {
        "alpha": [0.3, 0.5, 0.6, 0.75]
    }
}

clustering_enumeration = []
count = 0
for alg, params in alg_params.items():
    param_combinations = []
    param_names = []
    if params is not None:
        iterables = []
        param_names = []
        for param in params.keys():
            iterables.append(list(params[param]))
            param_names.append(param)
        param_combinations = list(itertools.product(*iterables))
    if len(param_combinations) > 0:
        for param_combination in param_combinations:
            expr = "algorithms."+alg+"(G"
            for i in range(len(param_names)):
                expr = expr + "," + param_names[i] + "=" + str(param_combination[i])
            expr = expr + ")"
            clustering_enumeration.append((expr,count))
            count = count + 1      
    else:
        expr = "algorithms."+alg+"(G)"
        clustering_enumeration.append((expr,count))
        count = count + 1
        
print(clustering_enumeration)

[('algorithms.label_propagation(G)', 0), ('algorithms.leiden(G)', 1), ('algorithms.significance_communities(G)', 2), ('algorithms.surprise_communities(G)', 3), ('algorithms.greedy_modularity(G)', 4), ('algorithms.paris(G)', 5), ('algorithms.louvain(G,resolution=0.75,randomize=314159)', 6), ('algorithms.louvain(G,resolution=0.75,randomize=2718)', 7), ('algorithms.louvain(G,resolution=1.0,randomize=314159)', 8), ('algorithms.louvain(G,resolution=1.0,randomize=2718)', 9), ('algorithms.louvain(G,resolution=1.25,randomize=314159)', 10), ('algorithms.louvain(G,resolution=1.25,randomize=2718)', 11), ('algorithms.louvain(G,resolution=1.5,randomize=314159)', 12), ('algorithms.louvain(G,resolution=1.5,randomize=2718)', 13), ('algorithms.infomap(G)', 14), ('algorithms.walktrap(G)', 15), ('algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.01,convergence_check_frequency=100)', 16), ('algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.001,convergence_check_frequency=100)

In [8]:
import random

n = 200
fileprefix = "LFR/" + "n" + str(n) + "/"
mus = [1, 2, 3, 4]
#mus = [4]
gammas = [30]
betas = [11]
for mu in mus:
    for gamma in gammas:
        for beta in betas:
            P_list = []
            fname = "LFR_n" + str(n) + "_mu0" + str(mu) + "_gamma" + str(gamma) + "_beta" + str(beta)
            graph_file = fileprefix + fname + ".mtx"
            print(graph_file)
            G = None
            with open(graph_file) as f:
                G = nx.from_scipy_sparse_array(spio.mmread(f), create_using=nx.Graph)
                coms = None
                for k in clustering_enumeration:
                    clust_file = fileprefix + fname + "." + str(k[1])
                    if Path(clust_file).is_file():
                        partition = read_clust_lst(clust_file)
                        P_list.append({"graph": nx.Graph(G), "partition": list(partition)})
                t1 = time.time()
                P_star = hbgf_consensus(P_list)
                t2 = time.time()
                print("mu", mu, ", number of clusters", len(P_star["partition"]))
                print("Time:", t2-t1)
                write_clust_lst(P_star["partition"], fileprefix + fname + "." + cons_name)
                

LFR/n200/LFR_n200_mu01_gamma30_beta11.mtx
mu 1 , number of clusters 122
Time: 0.40747547149658203
LFR/n200/LFR_n200_mu02_gamma30_beta11.mtx
mu 2 , number of clusters 133
Time: 0.41968536376953125
LFR/n200/LFR_n200_mu03_gamma30_beta11.mtx
mu 3 , number of clusters 116
Time: 0.4672977924346924
LFR/n200/LFR_n200_mu04_gamma30_beta11.mtx
mu 4 , number of clusters 130
Time: 0.5079178810119629


# n=1000

In [9]:
n = 1000
expected_clusters = []
for i in range(4):
    expected_clusters.append(random.randint(int(n ** (1. / 3)),3*int(n ** (1. / 2))))
    
alg_params = {
    "label_propagation": None,
    "leiden": None,
    "significance_communities": None,
    "surprise_communities": None,
    "greedy_modularity": None,
    "paris": None,
    "louvain": {
        "resolution": [0.75, 1.0, 1.25, 1.5],
        "randomize": [314159, 2718]
    },
    "infomap": None,
    "walktrap": None,
    "markov_clustering": {
        "inflation": [1.2, 1.5, 2, 2.5],
        "pruning_threshold": [0.01, 0.001],
        "convergence_check_frequency": [100]
    },
    "em": {
        "k": list(expected_clusters)
    },
    "sbm_dl": None,
    "spinglass": {
        "spins": list(expected_clusters)
    },
    "ricci_community": {
        "alpha": [0.3, 0.5, 0.6, 0.75]
    }
}

clustering_enumeration = []
count = 0
for alg, params in alg_params.items():
    param_combinations = []
    param_names = []
    if params is not None:
        iterables = []
        param_names = []
        for param in params.keys():
            iterables.append(list(params[param]))
            param_names.append(param)
        param_combinations = list(itertools.product(*iterables))
    if len(param_combinations) > 0:
        for param_combination in param_combinations:
            expr = "algorithms."+alg+"(G"
            for i in range(len(param_names)):
                expr = expr + "," + param_names[i] + "=" + str(param_combination[i])
            expr = expr + ")"
            clustering_enumeration.append((expr,count))
            count = count + 1      
    else:
        expr = "algorithms."+alg+"(G)"
        clustering_enumeration.append((expr,count))
        count = count + 1
        
print(clustering_enumeration)

[('algorithms.label_propagation(G)', 0), ('algorithms.leiden(G)', 1), ('algorithms.significance_communities(G)', 2), ('algorithms.surprise_communities(G)', 3), ('algorithms.greedy_modularity(G)', 4), ('algorithms.paris(G)', 5), ('algorithms.louvain(G,resolution=0.75,randomize=314159)', 6), ('algorithms.louvain(G,resolution=0.75,randomize=2718)', 7), ('algorithms.louvain(G,resolution=1.0,randomize=314159)', 8), ('algorithms.louvain(G,resolution=1.0,randomize=2718)', 9), ('algorithms.louvain(G,resolution=1.25,randomize=314159)', 10), ('algorithms.louvain(G,resolution=1.25,randomize=2718)', 11), ('algorithms.louvain(G,resolution=1.5,randomize=314159)', 12), ('algorithms.louvain(G,resolution=1.5,randomize=2718)', 13), ('algorithms.infomap(G)', 14), ('algorithms.walktrap(G)', 15), ('algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.01,convergence_check_frequency=100)', 16), ('algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.001,convergence_check_frequency=100)

In [10]:
import random

n = 1000
fileprefix = "LFR/" + "n" + str(n) + "/"
mus = [1, 2, 3, 4]
#mus = [4]
gammas = [30]
betas = [11]
for mu in mus:
    for gamma in gammas:
        for beta in betas:
            P_list = []
            fname = "LFR_n" + str(n) + "_mu0" + str(mu) + "_gamma" + str(gamma) + "_beta" + str(beta)
            graph_file = fileprefix + fname + ".mtx"
            print(graph_file)
            G = None
            with open(graph_file) as f:
                G = nx.from_scipy_sparse_array(spio.mmread(f), create_using=nx.Graph)
                coms = None
                for k in clustering_enumeration:
                    clust_file = fileprefix + fname + "." + str(k[1])
                    if Path(clust_file).is_file():
                        partition = read_clust_lst(clust_file)
                        P_list.append({"graph": nx.Graph(G), "partition": list(partition)})
                t1 = time.time()
                P_star = hbgf_consensus(P_list)
                t2 = time.time()
                print("mu", mu, ", number of clusters", len(P_star["partition"]))
                print("Time:", t2-t1)
                write_clust_lst(P_star["partition"], fileprefix + fname + "." + cons_name)
                

LFR/n1000/LFR_n1000_mu01_gamma30_beta11.mtx
mu 1 , number of clusters 748
Time: 0.8461880683898926
LFR/n1000/LFR_n1000_mu02_gamma30_beta11.mtx
mu 2 , number of clusters 765
Time: 0.8739335536956787
LFR/n1000/LFR_n1000_mu03_gamma30_beta11.mtx
mu 3 , number of clusters 580
Time: 1.1762597560882568
LFR/n1000/LFR_n1000_mu04_gamma30_beta11.mtx
mu 4 , number of clusters 635
Time: 1.426685094833374


# n=5000

In [11]:
n = 5000
expected_clusters = []
for i in range(4):
    expected_clusters.append(random.randint(int(n ** (1. / 3)),3*int(n ** (1. / 2))))
    
alg_params = {
    "label_propagation": None,
    "leiden": None,
    "significance_communities": None,
    "surprise_communities": None,
    "greedy_modularity": None,
    "paris": None,
    "louvain": {
        "resolution": [0.75, 1.0, 1.25, 1.5],
        "randomize": [314159, 2718]
    },
    "infomap": None,
    "walktrap": None,
    "markov_clustering": {
        "inflation": [1.2, 1.5, 2, 2.5],
        "pruning_threshold": [0.01, 0.001],
        "convergence_check_frequency": [100]
    },
    "em": {
        "k": list(expected_clusters)
    },
    "sbm_dl": None,
    "spinglass": {
        "spins": list(expected_clusters)
    },
    "ricci_community": {
        "alpha": [0.3, 0.5, 0.6, 0.75]
    }
}

clustering_enumeration = []
count = 0
for alg, params in alg_params.items():
    param_combinations = []
    param_names = []
    if params is not None:
        iterables = []
        param_names = []
        for param in params.keys():
            iterables.append(list(params[param]))
            param_names.append(param)
        param_combinations = list(itertools.product(*iterables))
    if len(param_combinations) > 0:
        for param_combination in param_combinations:
            expr = "algorithms."+alg+"(G"
            for i in range(len(param_names)):
                expr = expr + "," + param_names[i] + "=" + str(param_combination[i])
            expr = expr + ")"
            clustering_enumeration.append((expr,count))
            count = count + 1      
    else:
        expr = "algorithms."+alg+"(G)"
        clustering_enumeration.append((expr,count))
        count = count + 1
        
print(clustering_enumeration)

[('algorithms.label_propagation(G)', 0), ('algorithms.leiden(G)', 1), ('algorithms.significance_communities(G)', 2), ('algorithms.surprise_communities(G)', 3), ('algorithms.greedy_modularity(G)', 4), ('algorithms.paris(G)', 5), ('algorithms.louvain(G,resolution=0.75,randomize=314159)', 6), ('algorithms.louvain(G,resolution=0.75,randomize=2718)', 7), ('algorithms.louvain(G,resolution=1.0,randomize=314159)', 8), ('algorithms.louvain(G,resolution=1.0,randomize=2718)', 9), ('algorithms.louvain(G,resolution=1.25,randomize=314159)', 10), ('algorithms.louvain(G,resolution=1.25,randomize=2718)', 11), ('algorithms.louvain(G,resolution=1.5,randomize=314159)', 12), ('algorithms.louvain(G,resolution=1.5,randomize=2718)', 13), ('algorithms.infomap(G)', 14), ('algorithms.walktrap(G)', 15), ('algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.01,convergence_check_frequency=100)', 16), ('algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.001,convergence_check_frequency=100)

In [12]:
import random

n = 5000
fileprefix = "LFR/" + "n" + str(n) + "/"
mus = [1, 2, 3, 4]
#mus = [4]
gammas = [30]
betas = [11]
for mu in mus:
    for gamma in gammas:
        for beta in betas:
            P_list = []
            fname = "LFR_n" + str(n) + "_mu0" + str(mu) + "_gamma" + str(gamma) + "_beta" + str(beta)
            graph_file = fileprefix + fname + ".mtx"
            print(graph_file)
            G = None
            with open(graph_file) as f:
                G = nx.from_scipy_sparse_array(spio.mmread(f), create_using=nx.Graph)
                coms = None
                for k in clustering_enumeration:
                    clust_file = fileprefix + fname + "." + str(k[1])
                    if Path(clust_file).is_file():
                        partition = read_clust_lst(clust_file)
                        P_list.append({"graph": nx.Graph(G), "partition": list(partition)})
                t1 = time.time()
                P_star = hbgf_consensus(P_list)
                t2 = time.time()
                print("mu", mu, ", number of clusters", len(P_star["partition"]))
                print("Time:", t2-t1)
                write_clust_lst(P_star["partition"], fileprefix + fname + "." + cons_name)
                

LFR/n5000/LFR_n5000_mu01_gamma30_beta11.mtx
mu 1 , number of clusters 3451
Time: 4.917860984802246
LFR/n5000/LFR_n5000_mu02_gamma30_beta11.mtx
mu 2 , number of clusters 3762
Time: 4.905524730682373
LFR/n5000/LFR_n5000_mu03_gamma30_beta11.mtx
mu 3 , number of clusters 2953
Time: 6.563857078552246
LFR/n5000/LFR_n5000_mu04_gamma30_beta11.mtx
mu 4 , number of clusters 3499
Time: 8.91361403465271
