## Benchmark network analysis

In [1]:
import networkx as nx
import networkx.algorithms.community as nx_comm
from networkx.generators.community import LFR_benchmark_graph
from networkx.algorithms import bipartite
import numpy as np
import scipy as sp
from scipy import sparse
from cdlib import algorithms
from cdlib import evaluation
from utils import *
from distances import *
from consensus import *
import math
import sklearn

Note: to be able to use all crisp methods, you need to install some additional packages:  {'graph_tool', 'karateclub'}
Note: to be able to use all overlapping methods, you need to install some additional packages:  {'ASLPAw', 'karateclub'}


In [2]:
cons_name = "refined"

In [3]:
# Works only when length of P_list is 1
def consensus_refined(P_star, P_list, distance=mirkin_distance):
    #print(P_star.keys())
    A = P_star["partition"]
    B = P_list[0]["partition"]
    #print("A", A)
    #print("B", B)
    n = 0
    C = []
    for a in A:
        n = n + len(a)
        for b in B:
            set_a = set(a)
            set_b = set(b)
            isec = set_a.intersection(set_b)
            if len(isec) != 0:
                C.append(list(isec))
    
    P = {}
    P["graph"] = P_star["graph"]
    P["partition"] = C
    
    return P

In [4]:
from datetime import datetime

def iterative_consensus(P_list, n_iter=10, batch_size=1, distance=split_joint_distance, batch_consensus=markov_consensus):
    stats = []
    
    k = len(P_list)
    n = 0
    for cluster in P_list[0]["partition"]:
        n =  n + len(cluster)
    
    # Convert each input graph to a directed graph
    # Reason: Dynamics of directed graphs would be employed for consensus generation
    for i in range(k):
        if P_list[i]["graph"].is_directed() == False:
            P_list[i]["graph"] = P_list[i]["graph"].to_directed()
     
    # If no initial solution is given randomly pick an item from C as initial solution
    random.seed(datetime.now().timestamp())
    P_star = random.choice(P_list)
    
    P_prev = P_star

    for it in range(n_iter):
        #global axr
        #axr = 0
        print("***")
        print("Iteration:", it)
        print("***")
        
        P_list_working = []
        dist = []
        # Calculate the distance between current solution and all inputs
        for i in range(k):
            d = distance(P_list[i]["partition"], P_star["partition"])
            item = P_list[i]
            item["dist"] = d
            dist.append(d)
            P_list_working.append(item)
        
        dist = np.array(dist)
        dist_total = np.sum(dist)
        dist_mean = np.mean(dist)
        dist_std = np.std(dist)
        dist_med = np.median(dist)
        
        stat = {}
        stat["iteration"] = it
        stat["dist_mean"] = dist_mean
        stat["dist_std"] = dist_std
        stat["dist_med"] = dist_med
        stat["ncluster"] = len(P_star["partition"])
        
        w_star = (1.0) / (k + 1) + (k * 1.0 / (k + 1)) * np.exp(-1 * dist_std)
        w_rest = 1.0 - w_star
        
        print("dist_total:", dist_total, ", dist_mean:", dist_mean, ", dist_std:", dist_std, ", w_star:", w_star, ", w_rest:", w_rest)

        for i in range(len(P_list_working)):
            P_list_working[i]["weight"] = (P_list_working[i]["dist"] / dist_total) * w_rest
        
        batches = gen_batches(P_list_working, batch_size)
        print("Shuffle")
        random.shuffle(batches)
        for b in range(len(P_list_working)):
            P_list_batch = batches[b]
            P_star["weight"] = w_star
            P_star = batch_consensus(P_star, P_list_batch, distance=distance)
            d = distance(P_star["partition"], P_prev["partition"])
            P_prev = P_star
            print("batch: ", b, ": distance to previous solution:", d, ", #cluster:", len(P_star["partition"]))
        print(stat)
        stats.append(stat)
    return P_star, stats

In [5]:
import networkx as nx
import networkx.algorithms.community as nx_comm
from networkx.generators.community import LFR_benchmark_graph
from networkx.algorithms import bipartite
import numpy as np
import scipy as sp
from scipy import sparse
import pandas as pd

algs = ["infomap", "louvain", "leiden", "cnm", "label-prop", "markov", "walktrap", "spinglass"]
n = 1000
fileprefix = "LFR/" + "n" + str(n) + "/"
mus = [1, 2, 3, 4, 5, 6, 7, 8, 9]
#mus = [2]
gammas = [30]
betas = [11]

all_stats = []

for mu in mus:
    for gamma in gammas:
        for beta in betas:
            P_list = []
            fname = "LFR_n" + str(n) + "_mu0" + str(mu) + "_gamma" + str(gamma) + "_beta" + str(beta)
            graph_file = fileprefix + fname + ".mtx"
            print(graph_file)
            G = None
            with open(graph_file) as f:
                G = nx.from_scipy_sparse_matrix(spio.mmread(f), create_using=nx.Graph)
                for alg in algs:
                    clust_file = fileprefix + fname + "." + alg
                    partition = read_clust_lst(clust_file)
                    P_list.append({"graph": nx.Graph(G), "partition": list(partition)})
                P_star, stats = iterative_consensus(P_list, n_iter=1, batch_size=1, distance=variation_of_info_distance, batch_consensus=consensus_refined)
                print(P_star)
                write_clust_lst(P_star["partition"], fileprefix + fname + "." + cons_name)
                print(cons_name, len(P_star["partition"]))

LFR/n1000/LFR_n1000_mu01_gamma30_beta11.mtx
***
Iteration: 0
***
dist_total: 1.2852306365966797 , dist_mean: 0.16065382957458496 , dist_std: 0.22771344627300488 , w_star: 0.818979933243088 , w_rest: 0.18102006675691196
Shuffle
dict_keys(['graph', 'partition', 'dist', 'weight'])
batch:  0 : distance to previous solution: -4.76837158203125e-07 , #cluster: 38
dict_keys(['graph', 'partition', 'weight'])
batch:  1 : distance to previous solution: -4.76837158203125e-07 , #cluster: 38
dict_keys(['graph', 'partition', 'weight'])
batch:  2 : distance to previous solution: 0.06707239151000977 , #cluster: 51
dict_keys(['graph', 'partition', 'weight'])
batch:  3 : distance to previous solution: -4.76837158203125e-07 , #cluster: 51
dict_keys(['graph', 'partition', 'weight'])
batch:  4 : distance to previous solution: -4.76837158203125e-07 , #cluster: 51
dict_keys(['graph', 'partition', 'weight'])
batch:  5 : distance to previous solution: -4.76837158203125e-07 , #cluster: 51
dict_keys(['graph', 'pa

***
Iteration: 0
***
dist_total: 2.759697675704956 , dist_mean: 0.3449622094631195 , dist_std: 0.4933071990089218 , w_star: 0.6538699230319824 , w_rest: 0.34613007696801756
Shuffle
dict_keys(['graph', 'partition', 'dist', 'weight'])
batch:  0 : distance to previous solution: 0.02359318733215332 , #cluster: 44
dict_keys(['graph', 'partition', 'weight'])
batch:  1 : distance to previous solution: -9.5367431640625e-07 , #cluster: 44
dict_keys(['graph', 'partition', 'weight'])
batch:  2 : distance to previous solution: 0.14145541191101074 , #cluster: 72
dict_keys(['graph', 'partition', 'weight'])
batch:  3 : distance to previous solution: -1.430511474609375e-06 , #cluster: 72
dict_keys(['graph', 'partition', 'weight'])
batch:  4 : distance to previous solution: -1.430511474609375e-06 , #cluster: 72
dict_keys(['graph', 'partition', 'weight'])
batch:  5 : distance to previous solution: 0.011044740676879883 , #cluster: 75
dict_keys(['graph', 'partition', 'weight'])
batch:  6 : distance to pre

***
Iteration: 0
***
dist_total: 6.637460470199585 , dist_mean: 0.8296825587749481 , dist_std: 0.6282631562714546 , w_star: 0.5853490075704926 , w_rest: 0.4146509924295074
Shuffle
dict_keys(['graph', 'partition', 'dist', 'weight'])
batch:  0 : distance to previous solution: 0.43065929412841797 , #cluster: 47
dict_keys(['graph', 'partition', 'weight'])
batch:  1 : distance to previous solution: 0.07092022895812988 , #cluster: 57
dict_keys(['graph', 'partition', 'weight'])
batch:  2 : distance to previous solution: 0.0 , #cluster: 57
dict_keys(['graph', 'partition', 'weight'])
batch:  3 : distance to previous solution: 0.6373922824859619 , #cluster: 211
dict_keys(['graph', 'partition', 'weight'])
batch:  4 : distance to previous solution: 0.0 , #cluster: 211
dict_keys(['graph', 'partition', 'weight'])
batch:  5 : distance to previous solution: 0.0 , #cluster: 211
dict_keys(['graph', 'partition', 'weight'])
batch:  6 : distance to previous solution: 0.0 , #cluster: 211
dict_keys(['graph',

***
Iteration: 0
***
dist_total: 12.1884183883667 , dist_mean: 1.5235522985458374 , dist_std: 1.2189958904563611 , w_star: 0.3738015637625396 , w_rest: 0.6261984362374604
Shuffle
dict_keys(['graph', 'partition', 'dist', 'weight'])
batch:  0 : distance to previous solution: 2.824594497680664 , #cluster: 725
dict_keys(['graph', 'partition', 'weight'])
batch:  1 : distance to previous solution: 0.06006336212158203 , #cluster: 747
dict_keys(['graph', 'partition', 'weight'])
batch:  2 : distance to previous solution: 9.5367431640625e-07 , #cluster: 747
dict_keys(['graph', 'partition', 'weight'])
batch:  3 : distance to previous solution: 9.5367431640625e-07 , #cluster: 747
dict_keys(['graph', 'partition', 'weight'])
batch:  4 : distance to previous solution: 0.0013875961303710938 , #cluster: 748
dict_keys(['graph', 'partition', 'weight'])
batch:  5 : distance to previous solution: 0.006436347961425781 , #cluster: 751
dict_keys(['graph', 'partition', 'weight'])
batch:  6 : distance to previo

***
Iteration: 0
***
dist_total: 35.55923271179199 , dist_mean: 4.444904088973999 , dist_std: 2.0181826786998243 , w_star: 0.22924156610923296 , w_rest: 0.770758433890767
Shuffle
dict_keys(['graph', 'partition', 'dist', 'weight'])
batch:  0 : distance to previous solution: 0.00415802001953125 , #cluster: 956
dict_keys(['graph', 'partition', 'weight'])
batch:  1 : distance to previous solution: 0.015572071075439453 , #cluster: 963
dict_keys(['graph', 'partition', 'weight'])
batch:  2 : distance to previous solution: 0.002773761749267578 , #cluster: 965
dict_keys(['graph', 'partition', 'weight'])
batch:  3 : distance to previous solution: 0.006590843200683594 , #cluster: 969
dict_keys(['graph', 'partition', 'weight'])
batch:  4 : distance to previous solution: -9.5367431640625e-07 , #cluster: 969
dict_keys(['graph', 'partition', 'weight'])
batch:  5 : distance to previous solution: -9.5367431640625e-07 , #cluster: 969
dict_keys(['graph', 'partition', 'weight'])
batch:  6 : distance to pr

***
Iteration: 0
***
dist_total: 23.757386207580566 , dist_mean: 2.969673275947571 , dist_std: 1.3291513774278723 , w_star: 0.34640159816313787 , w_rest: 0.6535984018368621
Shuffle
dict_keys(['graph', 'partition', 'dist', 'weight'])
batch:  0 : distance to previous solution: 1.924675464630127 , #cluster: 109
dict_keys(['graph', 'partition', 'weight'])
batch:  1 : distance to previous solution: 0.0 , #cluster: 109
dict_keys(['graph', 'partition', 'weight'])
batch:  2 : distance to previous solution: 1.2810015678405762 , #cluster: 398
dict_keys(['graph', 'partition', 'weight'])
batch:  3 : distance to previous solution: 1.376844882965088 , #cluster: 997
dict_keys(['graph', 'partition', 'weight'])
batch:  4 : distance to previous solution: 9.5367431640625e-07 , #cluster: 997
dict_keys(['graph', 'partition', 'weight'])
batch:  5 : distance to previous solution: 9.5367431640625e-07 , #cluster: 997
dict_keys(['graph', 'partition', 'weight'])
batch:  6 : distance to previous solution: 9.53674

***
Iteration: 0
***
dist_total: 26.377325534820557 , dist_mean: 3.2971656918525696 , dist_std: 1.4507601451346597 , w_star: 0.3194595985628217 , w_rest: 0.6805404014371783
Shuffle
dict_keys(['graph', 'partition', 'dist', 'weight'])
batch:  0 : distance to previous solution: 1.3775897026062012 , #cluster: 70
dict_keys(['graph', 'partition', 'weight'])
batch:  1 : distance to previous solution: 0.0 , #cluster: 70
dict_keys(['graph', 'partition', 'weight'])
batch:  2 : distance to previous solution: 1.5250117778778076 , #cluster: 331
dict_keys(['graph', 'partition', 'weight'])
batch:  3 : distance to previous solution: 0.9403877258300781 , #cluster: 713
dict_keys(['graph', 'partition', 'weight'])
batch:  4 : distance to previous solution: 0.505467414855957 , #cluster: 999
dict_keys(['graph', 'partition', 'weight'])
batch:  5 : distance to previous solution: 0.0 , #cluster: 999
dict_keys(['graph', 'partition', 'weight'])
batch:  6 : distance to previous solution: 0.0 , #cluster: 999
dict_

***
Iteration: 0
***
dist_total: 17.81906247138977 , dist_mean: 2.2273828089237213 , dist_std: 2.0226138342960023 , w_star: 0.22871926972537432 , w_rest: 0.7712807302746256
Shuffle
dict_keys(['graph', 'partition', 'dist', 'weight'])
batch:  0 : distance to previous solution: 6.882802486419678 , #cluster: 982
dict_keys(['graph', 'partition', 'weight'])
batch:  1 : distance to previous solution: 0.008318424224853516 , #cluster: 988
dict_keys(['graph', 'partition', 'weight'])
batch:  2 : distance to previous solution: 0.004159450531005859 , #cluster: 991
dict_keys(['graph', 'partition', 'weight'])
batch:  3 : distance to previous solution: 0.0013861656188964844 , #cluster: 992
dict_keys(['graph', 'partition', 'weight'])
batch:  4 : distance to previous solution: 0.0 , #cluster: 992
dict_keys(['graph', 'partition', 'weight'])
batch:  5 : distance to previous solution: 0.006932735443115234 , #cluster: 997
dict_keys(['graph', 'partition', 'weight'])
batch:  6 : distance to previous solution:

refined 998
LFR/n1000/LFR_n1000_mu09_gamma30_beta11.mtx
***
Iteration: 0
***
dist_total: 24.606646299362183 , dist_mean: 3.075830787420273 , dist_std: 1.4825597102276702 , w_star: 0.3129384417627349 , w_rest: 0.6870615582372651
Shuffle
dict_keys(['graph', 'partition', 'dist', 'weight'])
batch:  0 : distance to previous solution: 1.7833597660064697 , #cluster: 62
dict_keys(['graph', 'partition', 'weight'])
batch:  1 : distance to previous solution: 3.0211000442504883 , #cluster: 998
dict_keys(['graph', 'partition', 'weight'])
batch:  2 : distance to previous solution: 0.0 , #cluster: 998
dict_keys(['graph', 'partition', 'weight'])
batch:  3 : distance to previous solution: 0.0013852119445800781 , #cluster: 999
dict_keys(['graph', 'partition', 'weight'])
batch:  4 : distance to previous solution: 0.0013856887817382812 , #cluster: 1000
dict_keys(['graph', 'partition', 'weight'])
batch:  5 : distance to previous solution: 0.0 , #cluster: 1000
dict_keys(['graph', 'partition', 'weight'])
bat

refined 1000
