In [None]:
import networkx as nx
import networkx.algorithms.community as nx_comm
from networkx.generators.community import LFR_benchmark_graph
from networkx.algorithms import bipartite
import numpy as np
import scipy as sp
from scipy.sparse import coo_array
from scipy import sparse
from cdlib import algorithms
from cdlib import evaluation
import sklearn
from utils import *
from distances import *
from consensus import *
import math
import itertools
import random
from pathlib import Path

In [None]:
cons_name = "lf"

## Parameter configurations for clustering generation

In [None]:
n = 1000
expected_clusters = []
for i in range(4):
    expected_clusters.append(random.randint(int(n ** (1. / 3)),3*int(n ** (1. / 2))))
    
alg_params = {
    "label_propagation": None,
    "leiden": None,
    "significance_communities": None,
    "surprise_communities": None,
    "greedy_modularity": None,
    "paris": None,
    "louvain": {
        "resolution": [0.75, 1.0, 1.25, 1.5],
        "randomize": [314159, 2718]
    },
    "infomap": None,
    "walktrap": None,
    "markov_clustering": {
        "inflation": [1.2, 1.5, 2, 2.5],
        "pruning_threshold": [0.01, 0.001],
        "convergence_check_frequency": [100]
    },
    "em": {
        "k": list(expected_clusters)
    },
    "sbm_dl": None,
    "spinglass": {
        "spins": list(expected_clusters)
    },
    "ricci_community": {
        "alpha": [0.3, 0.5, 0.6, 0.75]
    }
}

## Enumerate clusterings

In [None]:
clustering_enumeration = []
count = 0
for alg, params in alg_params.items():
    param_combinations = []
    param_names = []
    if params is not None:
        iterables = []
        param_names = []
        for param in params.keys():
            iterables.append(list(params[param]))
            param_names.append(param)
        param_combinations = list(itertools.product(*iterables))
    if len(param_combinations) > 0:
        for param_combination in param_combinations:
            expr = "algorithms."+alg+"(G"
            for i in range(len(param_names)):
                expr = expr + "," + param_names[i] + "=" + str(param_combination[i])
            expr = expr + ")"
            clustering_enumeration.append((expr,count))
            count = count + 1      
    else:
        expr = "algorithms."+alg+"(G)"
        clustering_enumeration.append((expr,count))
        count = count + 1
print(clustering_enumeration)

In [None]:
# Lancichinetti, A., & Fortunato, S.
# Consensus clustering in complex networks. 
# Scientific reports, 2(1), 1-7. (2012)
def lf_consensus(P_list):
    G = nx.Graph(P_list[0]["graph"])
    n = len(list(G.nodes()))
    k = len(P_list)
    #print("Number of nodes", n)
    
    row = []
    col = []
    val = []
    for x in P_list:
        graph = x["graph"]
        partition = x["partition"]
        for cluster in partition:
            for i in range(len(cluster)):
                for j in range(i+1, len(cluster)):
                    item_1 = cluster[i]
                    item_2 = cluster[j]
                    row.append(int(item_1))
                    col.append(int(item_2))
                    val.append(int(1))
                    
    r = coo_array((val, (row, col)), shape=(n, n))
    
    Ga = nx.from_scipy_sparse_array(r)
    
    #filter_mat = nx.to_numpy_array(P_list[0]["graph"])
    #nz_rows, nz_cols = np.nonzero(filter_mat)
    #adj_mat_target = np.zeros((n,n))
    #for i in range(nz_rows.shape[0]):
        #adj_mat_target[nz_rows[i], nz_cols[i]] = adj_mat[nz_rows[i], nz_cols[i]] # Need to figure out better way to do it
    #Ga = nx.from_numpy_matrix(adj_mat_target)
    clust_lst = nx_comm.louvain_communities(Ga, weight="weight", seed=123)
    P_star = { "graph": nx.Graph(Ga), "partition": list(clust_lst)}
    return P_star

In [None]:
"""
import networkx as nx
import networkx.algorithms.community as nx_comm
from networkx.generators.community import LFR_benchmark_graph
from networkx.algorithms import bipartite
import numpy as np
import scipy as sp
from scipy import sparse

algs = ["infomap", "louvain", "leiden", "cnm", "label-prop", "markov", "walktrap", "spinglass"]
n = 1000
fileprefix = "LFR/" + "n" + str(n) + "/"
mus = [1, 2, 3, 4, 5, 6, 7, 8, 9]
#mus = [2]
gammas = [30]
betas = [11]
for mu in mus:
    for gamma in gammas:
        for beta in betas:
            P_list = []
            fname = "LFR_n" + str(n) + "_mu0" + str(mu) + "_gamma" + str(gamma) + "_beta" + str(beta)
            graph_file = fileprefix + fname + ".mtx"
            print(graph_file)
            G = None
            
            with open(graph_file) as f:
                G = nx.from_scipy_sparse_matrix(spio.mmread(f), create_using=nx.Graph)
                for alg in algs:
                    clust_file = fileprefix + fname + "." + alg
                    partition = read_clust_lst(clust_file)
                    P_list.append({"graph": nx.Graph(G), "partition": list(partition)})
                P_star_lf = lf_consensus(P_list)
                write_clust_lst(P_star_lf["partition"], fileprefix + fname + ".lf")
                print("LF:", len(P_star_lf["partition"]))
"""

# Iterative LF runs

In [None]:
n = 1000
fileprefix = "LFR/" + "n" + str(n) + "/"
mus = [1, 2, 3, 4]
#mus = [1]
gammas = [30]
betas = [11]

stats = []

for mu in mus:
    for gamma in gammas:
        for beta in betas:
            P_list = []
            fname = "LFR_n" + str(n) + "_mu0" + str(mu) + "_gamma" + str(gamma) + "_beta" + str(beta)
            graph_file = fileprefix + fname + ".mtx"
            print(graph_file)
            G = None
            with open(graph_file) as f:
                G = nx.from_scipy_sparse_array(spio.mmread(f), create_using=nx.Graph)
                new_adj_mat = nx.to_numpy_array(G)
                old_adj_mat = np.zeros(new_adj_mat.shape)
                diff_mat = old_adj_mat - new_adj_mat
                old_adj_mat = np.array(new_adj_mat)
                norm = np.linalg.norm(diff_mat)
                P_star = None
                for it in range(20):
                    P_list = []
                    if it > 0:
                        for k in clustering_enumeration:
                            try:
                                coms = eval(k[0])
                                print("mu:", mu, "it:", it, k[0], len(coms.communities))
                                P_list.append({"graph": nx.Graph(G), "partition": list(coms.communities)})
                                stats.append({"mu": mu, "it": it, "norm": norm, "alg": k[0], "ncluster": len(coms.communities)})
                                count = count + 1
                            except:
                                print("UNSUCCESSFUL", expr)
                    else:
                        for k in clustering_enumeration:
                            clust_file = fileprefix + fname + "." + str(k[1])
                            if Path(clust_file).is_file():
                                partition = read_clust_lst(clust_file)
                                print("mu:", mu, "it:", it, k[0], len(partition))
                                P_list.append({"graph": nx.Graph(G), "partition": list(partition)})
                                stats.append({"mu": mu, "it": it, "norm": norm, "alg": k[0], "ncluster": len(partition)})
                    P_star = lf_consensus(P_list)
                    new_adj_mat = nx.to_numpy_array(P_star["graph"])
                    G = nx.Graph(P_star["graph"])
                    diff_mat = old_adj_mat - new_adj_mat
                    norm = np.linalg.norm(diff_mat)
                    old_adj_mat = np.array(new_adj_mat)
                    stats.append({"mu": mu, "it": it, "norm": norm, "alg": "lf", "ncluster": len(P_star["partition"])})
                    print("mu:", mu, "it:", it, "norm:", norm)
                    
                    write_clust_lst(P_star["partition"], fileprefix + fname + "." + cons_name)
        
df = pd.DataFrame(stats)
df.to_csv("benchmark-lf-convergence-multi-alg.csv", index=False)