In [1]:
import networkx as nx
import networkx.algorithms.community as nx_comm
from networkx.generators.community import LFR_benchmark_graph
from networkx.algorithms import bipartite
import numpy as np
import scipy as sp
from scipy import sparse
from cdlib import algorithms
from cdlib import evaluation
from utils import *
from distances import *
from consensus import *
import math
import sklearn

Note: to be able to use all crisp methods, you need to install some additional packages:  {'karateclub', 'wurlitzer', 'graph_tool'}
Note: to be able to use all overlapping methods, you need to install some additional packages:  {'karateclub', 'ASLPAw'}
Note: to be able to use all bipartite methods, you need to install some additional packages:  {'wurlitzer'}


In [15]:
import sys

def best_candidate_consensus(P_list, distance=split_joint_distance):
    k = len(P_list)
    n = 0
    best_candidate = None
    best_dist_total = 1000000000.0
    for i in range(k):
        dist = []
        for j in range(k):
            d = distance(P_list[i]["partition"], P_list[j]["partition"])
            #print("[best_candidate_consensus]", len(P_list[i]["partition"]), len(P_list[j]["partition"]), d)
            dist.append(d)
        dist = np.array(dist)
        dist_total = np.sum(dist)
        dist_mean = np.mean(dist)
        dist_std = np.std(dist)
        dist_med = np.median(dist)
        #print("[best_candidate_consensus]", dist_total)
        if dist_total < best_dist_total:
            #print("[best_candidate_consensus] Updating best candidate", dist_total)
            best_candidate = P_list[i]["partition"]
            best_dist_total = dist_total
    P_star = { "graph": None, "partition": list(best_candidate)}
    return P_star

In [17]:
import networkx as nx
import networkx.algorithms.community as nx_comm
from networkx.generators.community import LFR_benchmark_graph
from networkx.algorithms import bipartite
import numpy as np
import scipy as sp
from scipy import sparse

algs = ["infomap", "louvain", "leiden", "cnm", "label-prop", "markov", "walktrap", "spinglass"]
distance_metrics = ["split_joint_distance", "mirkin_distance", "variation_of_info_distance"]
n = 1000
fileprefix = "LFR/" + "n" + str(n) + "/"
mus = [1, 2, 3, 4, 5, 6, 7, 8, 9]
#mus = [2]
gammas = [30]
betas = [11]
for mu in mus:
    for gamma in gammas:
        for beta in betas:
            P_list = []
            fname = "LFR_n" + str(n) + "_mu0" + str(mu) + "_gamma" + str(gamma) + "_beta" + str(beta)
            graph_file = fileprefix + fname + ".mtx"
            print(graph_file)
            G = None
            with open(graph_file) as f:
                G = nx.from_scipy_sparse_matrix(spio.mmread(f), create_using=nx.Graph)
                for alg in algs:
                    clust_file = fileprefix + fname + "." + alg
                    partition = read_clust_lst(clust_file)
                    P_list.append({"graph": nx.Graph(G), "partition": list(partition)})
                for distance_metric in distance_metrics:
                    P_star_best_candidate = None
                    if distance_metric == "split_joint_distance":
                        P_star_best_candidate = best_candidate_consensus(P_list, split_joint_distance)
                    elif distance_metric == "variation_of_info_distance":
                        P_star_best_candidate = best_candidate_consensus(P_list, variation_of_info_distance)
                    elif distance_metric == "mirkin_distance":
                        P_star_best_candidate = best_candidate_consensus(P_list, mirkin_distance)
                    print("BEST CANDIDATE:", distance_metric, len(P_star_best_candidate["partition"]))
                    write_clust_lst(P_star_best_candidate["partition"], fileprefix + fname + ".best_candidate." + distance_metric)

LFR/n1000/LFR_n1000_mu01_gamma30_beta11.mtx
BEST CANDIDATE: split_joint_distance 38
BEST CANDIDATE: mirkin_distance 38
BEST CANDIDATE: variation_of_info_distance 38
LFR/n1000/LFR_n1000_mu02_gamma30_beta11.mtx
BEST CANDIDATE: split_joint_distance 38
BEST CANDIDATE: mirkin_distance 36
BEST CANDIDATE: variation_of_info_distance 38
LFR/n1000/LFR_n1000_mu03_gamma30_beta11.mtx
BEST CANDIDATE: split_joint_distance 38
BEST CANDIDATE: mirkin_distance 38
BEST CANDIDATE: variation_of_info_distance 38
LFR/n1000/LFR_n1000_mu04_gamma30_beta11.mtx
BEST CANDIDATE: split_joint_distance 35
BEST CANDIDATE: mirkin_distance 35
BEST CANDIDATE: variation_of_info_distance 35
LFR/n1000/LFR_n1000_mu05_gamma30_beta11.mtx
BEST CANDIDATE: split_joint_distance 1
BEST CANDIDATE: mirkin_distance 16
BEST CANDIDATE: variation_of_info_distance 1
LFR/n1000/LFR_n1000_mu06_gamma30_beta11.mtx
BEST CANDIDATE: split_joint_distance 1
BEST CANDIDATE: mirkin_distance 992
BEST CANDIDATE: variation_of_info_distance 1
LFR/n1000/LFR