In [84]:
import networkx as nx
import networkx.algorithms.community as nx_comm
from networkx.generators.community import LFR_benchmark_graph
from networkx.algorithms import bipartite
import numpy as np
import scipy as sp
from scipy.sparse import coo_array
from scipy import sparse
from cdlib import algorithms
from cdlib import evaluation
import sklearn
from utils import *
from distances import *
from consensus import *
import math
import itertools
import random
from pathlib import Path

In [66]:
cons_name = "v1"

In [87]:
def v1_consensus(P_list, niter=10, verbose=True):
    G = nx.Graph(P_list[0]["graph"])
    n = len(list(G.nodes()))
    k = len(P_list)
    #print("Number of nodes", n)
    
    row = []
    col = []
    val = []
    for x in P_list:
        graph = x["graph"]
        partition = x["partition"]
        for cluster in partition:
            for i in range(len(cluster)):
                for j in range(i+1, len(cluster)):
                    item_1 = cluster[i]
                    item_2 = cluster[j]
                    row.append(int(item_1))
                    col.append(int(item_2))
                    val.append(int(1))
                    
    r = coo_array((val, (row, col)), shape=(n, n))
    r = r.tocsr()
                
    refined_partition = None
    for x in P_list:
        graph = x["graph"]
        partition = x["partition"]
        if not refined_partition:
            refined_partition = list(partition)
        new_partition = []
        for clusterA in refined_partition:
            for clusterB in partition:
                seta = set(clusterA)
                setb = set(clusterB)
                anb = seta.intersection(setb)
                if len(anb) != 0:
                    new_partition.append(list(anb))
        refined_partition = list(new_partition)
    
    refined_partition_map = clust_lst_to_map(refined_partition)
    
    if verbose:
        print("Number of core clusters", len(refined_partition_map))
    
    items = list(refined_partition_map.keys())
    count = 0
    it = 0
    while(it < niter):
        random.shuffle(items)
        flag = True
        for item in items:
            neighbors = []
            if isinstance(items[0], int):
                neighbors = [n for n in G.neighbors(int(item))]
            else:
                neighbors = [str(n) for n in G.neighbors(int(item))]
                
            if len(neighbors) > 0:
                opt_diff = 0
                opt_src_clust_no = refined_partition_map[item]
                opt_dst_clust_no = refined_partition_map[item]

                diff_src = 0
                for elem in refined_partition[opt_src_clust_no]:
                    i = min(int(item), int(elem))
                    j = max(int(item), int(elem))
                    diff_src = diff_src + (k - 2*r[i,j])
                
                for neighbor in neighbors:
                    if refined_partition_map[neighbor] != opt_src_clust_no:
                        dst_clust_no = refined_partition_map[neighbor]
                        diff_dst = 0

                        for elem in refined_partition[dst_clust_no]:
                            i = min(int(item), int(elem))
                            j = max(int(item), int(elem))
                            diff_dst = diff_dst + (k - 2*r[i,j])

                        if diff_dst - diff_src < opt_diff:
                            opt_diff = diff_dst - diff_src
                            opt_dst_clust_no = dst_clust_no
                            
                if opt_src_clust_no != opt_dst_clust_no:
                    refined_partition[opt_src_clust_no].remove(item)
                    refined_partition[opt_dst_clust_no].append(item)
                    
                    refined_partition_map[item] = opt_src_clust_no
                    refined_partition_map[item] = opt_dst_clust_no
                    
                    flag = False
                    count = count + 1
                    if verbose:
                        print("Move:", count, "|",
                              "Moving", item, 
                              "from cluster", 
                              opt_src_clust_no, "( size", len(refined_partition[opt_src_clust_no])+1, ")", 
                              "to", 
                              opt_dst_clust_no, "( size", len(refined_partition[opt_dst_clust_no])-1, ")",
                              "distance reduction:", opt_diff
                             )
            else:
                # No move necessary
                pass
        if verbose:
            print("---", it, "/", niter, "---")
        if flag:
            break
        it = it + 1
    
    empty_clusters = []
    for i in range(len(refined_partition)):
        if len(refined_partition[i]) == 0:
            empty_clusters.append(i)
            
    empty_clusters.sort(reverse=True)
    for e in empty_clusters:
        del refined_partition[e]
    
    return {"graph": nx.Graph(G), "partition": list(refined_partition)}

## Parameter configurations for clustering generation

In [2]:
n = 1000
expected_clusters = []
for i in range(4):
    expected_clusters.append(random.randint(int(n ** (1. / 3)),3*int(n ** (1. / 2))))
    
alg_params = {
    "label_propagation": None,
    "leiden": None,
    "significance_communities": None,
    "surprise_communities": None,
    "greedy_modularity": None,
    "paris": None,
    "louvain": {
        "resolution": [0.75, 1.0, 1.25, 1.5],
        "randomize": [314159, 2718]
    },
    "infomap": None,
    "walktrap": None,
    "markov_clustering": {
        "inflation": [1.2, 1.5, 2, 2.5],
        "pruning_threshold": [0.01, 0.001],
        "convergence_check_frequency": [100]
    },
    "em": {
        "k": list(expected_clusters)
    },
    "sbm_dl": None,
    "spinglass": {
        "spins": list(expected_clusters)
    },
    "ricci_community": {
        "alpha": [0.3, 0.5, 0.6, 0.75]
    }
}

## Enumerate clusterings

In [7]:
clustering_enumeration = []
count = 0
for alg, params in alg_params.items():
    param_combinations = []
    param_names = []
    if params is not None:
        iterables = []
        param_names = []
        for param in params.keys():
            iterables.append(list(params[param]))
            param_names.append(param)
        param_combinations = list(itertools.product(*iterables))
    if len(param_combinations) > 0:
        for param_combination in param_combinations:
            expr = "algorithms."+alg+"(G"
            for i in range(len(param_names)):
                expr = expr + "," + param_names[i] + "=" + str(param_combination[i])
            expr = expr + ")"
            clustering_enumeration.append((expr,count))
            count = count + 1      
    else:
        expr = "algorithms."+alg+"(G)"
        clustering_enumeration.append((expr,count))
        count = count + 1
print(clustering_enumeration)

[('algorithms.label_propagation(G)', 0), ('algorithms.leiden(G)', 1), ('algorithms.significance_communities(G)', 2), ('algorithms.surprise_communities(G)', 3), ('algorithms.greedy_modularity(G)', 4), ('algorithms.paris(G)', 5), ('algorithms.louvain(G,resolution=0.75,randomize=314159)', 6), ('algorithms.louvain(G,resolution=0.75,randomize=2718)', 7), ('algorithms.louvain(G,resolution=1.0,randomize=314159)', 8), ('algorithms.louvain(G,resolution=1.0,randomize=2718)', 9), ('algorithms.louvain(G,resolution=1.25,randomize=314159)', 10), ('algorithms.louvain(G,resolution=1.25,randomize=2718)', 11), ('algorithms.louvain(G,resolution=1.5,randomize=314159)', 12), ('algorithms.louvain(G,resolution=1.5,randomize=2718)', 13), ('algorithms.infomap(G)', 14), ('algorithms.walktrap(G)', 15), ('algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.01,convergence_check_frequency=100)', 16), ('algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.001,convergence_check_frequency=100)

In [89]:
import random

n = 1000
fileprefix = "LFR/" + "n" + str(n) + "/"
#mus = [1, 2, 3, 4]
mus = [4]
gammas = [30]
betas = [11]
for mu in mus:
    for gamma in gammas:
        for beta in betas:
            P_list = []
            fname = "LFR_n" + str(n) + "_mu0" + str(mu) + "_gamma" + str(gamma) + "_beta" + str(beta)
            graph_file = fileprefix + fname + ".mtx"
            G = None
            with open(graph_file) as f:
                G = nx.from_scipy_sparse_array(spio.mmread(f), create_using=nx.Graph)
                coms = None
                for k in clustering_enumeration:
                    clust_file = fileprefix + fname + "." + str(k[1])
                    if Path(clust_file).is_file():
                        partition = read_clust_lst(clust_file)
                        P_list.append({"graph": nx.Graph(G), "partition": list(partition)})
                P_star = v1_consensus(P_list, niter=20, verbose=True)
                print("mu", mu, ", number of clusters", len(P_star["partition"]))
                write_clust_lst(P_star["partition"], fileprefix + fname + "." + cons_name)
                

Number of core clusters 1000
Move: 1 | Moving 363 from cluster 473 ( size 1 ) to 469 ( size 1 ) distance reduction: -58
Move: 2 | Moving 473 from cluster 71 ( size 1 ) to 0 ( size 1 ) distance reduction: -48
Move: 3 | Moving 611 from cluster 412 ( size 1 ) to 395 ( size 1 ) distance reduction: -56
Move: 4 | Moving 945 from cluster 950 ( size 1 ) to 940 ( size 1 ) distance reduction: -56
Move: 5 | Moving 814 from cluster 128 ( size 1 ) to 126 ( size 1 ) distance reduction: -60
Move: 6 | Moving 536 from cluster 609 ( size 1 ) to 593 ( size 1 ) distance reduction: -54
Move: 7 | Moving 108 from cluster 858 ( size 1 ) to 855 ( size 1 ) distance reduction: -62
Move: 8 | Moving 713 from cluster 93 ( size 1 ) to 98 ( size 1 ) distance reduction: -58
Move: 9 | Moving 599 from cluster 411 ( size 1 ) to 395 ( size 2 ) distance reduction: -54
Move: 10 | Moving 608 from cluster 990 ( size 1 ) to 979 ( size 1 ) distance reduction: -58
Move: 11 | Moving 260 from cluster 830 ( size 1 ) to 829 ( size 1

Move: 308 | Moving 375 from cluster 724 ( size 1 ) to 721 ( size 11 ) distance reduction: -206
Move: 309 | Moving 747 from cluster 717 ( size 1 ) to 721 ( size 12 ) distance reduction: -254
Move: 310 | Moving 96 from cluster 305 ( size 1 ) to 264 ( size 5 ) distance reduction: -90
Move: 311 | Moving 858 from cluster 652 ( size 1 ) to 641 ( size 7 ) distance reduction: -130
Move: 312 | Moving 210 from cluster 188 ( size 2 ) to 187 ( size 2 ) distance reduction: -34
Move: 313 | Moving 714 from cluster 56 ( size 1 ) to 34 ( size 6 ) distance reduction: -92
Move: 314 | Moving 268 from cluster 378 ( size 1 ) to 381 ( size 13 ) distance reduction: -302
Move: 315 | Moving 630 from cluster 760 ( size 1 ) to 740 ( size 5 ) distance reduction: -120
Move: 316 | Moving 50 from cluster 74 ( size 1 ) to 98 ( size 6 ) distance reduction: -122
Move: 317 | Moving 442 from cluster 295 ( size 2 ) to 264 ( size 6 ) distance reduction: -58
Move: 318 | Moving 553 from cluster 852 ( size 1 ) to 642 ( size 1 

Move: 457 | Moving 857 from cluster 586 ( size 1 ) to 570 ( size 7 ) distance reduction: -186
Move: 458 | Moving 529 from cluster 853 ( size 1 ) to 829 ( size 16 ) distance reduction: -92
Move: 459 | Moving 667 from cluster 968 ( size 1 ) to 961 ( size 8 ) distance reduction: -132
Move: 460 | Moving 201 from cluster 661 ( size 1 ) to 666 ( size 10 ) distance reduction: -180
Move: 461 | Moving 112 from cluster 893 ( size 1 ) to 890 ( size 13 ) distance reduction: -264
Move: 462 | Moving 44 from cluster 490 ( size 3 ) to 500 ( size 5 ) distance reduction: -88
Move: 463 | Moving 958 from cluster 999 ( size 1 ) to 979 ( size 10 ) distance reduction: -234
Move: 464 | Moving 273 from cluster 133 ( size 1 ) to 116 ( size 4 ) distance reduction: -72
Move: 465 | Moving 818 from cluster 554 ( size 1 ) to 535 ( size 19 ) distance reduction: -364
Move: 466 | Moving 504 from cluster 907 ( size 1 ) to 890 ( size 14 ) distance reduction: -318
Move: 467 | Moving 330 from cluster 298 ( size 1 ) to 264 

Move: 559 | Moving 182 from cluster 700 ( size 1 ) to 721 ( size 19 ) distance reduction: -398
Move: 560 | Moving 449 from cluster 5 ( size 2 ) to 7 ( size 9 ) distance reduction: -202
Move: 561 | Moving 286 from cluster 505 ( size 2 ) to 476 ( size 8 ) distance reduction: -104
Move: 562 | Moving 81 from cluster 462 ( size 1 ) to 13 ( size 1 ) distance reduction: -18
Move: 563 | Moving 93 from cluster 892 ( size 1 ) to 890 ( size 17 ) distance reduction: -380
Move: 564 | Moving 466 from cluster 871 ( size 1 ) to 862 ( size 16 ) distance reduction: -396
Move: 565 | Moving 793 from cluster 553 ( size 1 ) to 535 ( size 24 ) distance reduction: -468
Move: 566 | Moving 171 from cluster 524 ( size 1 ) to 535 ( size 25 ) distance reduction: -476
Move: 567 | Moving 489 from cluster 328 ( size 1 ) to 333 ( size 26 ) distance reduction: -528
Move: 568 | Moving 758 from cluster 296 ( size 1 ) to 264 ( size 13 ) distance reduction: -92
Move: 569 | Moving 282 from cluster 831 ( size 1 ) to 829 ( si

Move: 700 | Moving 403 from cluster 756 ( size 1 ) to 740 ( size 17 ) distance reduction: -306
Move: 701 | Moving 314 from cluster 283 ( size 1 ) to 289 ( size 7 ) distance reduction: -170
Move: 702 | Moving 35 from cluster 121 ( size 1 ) to 116 ( size 5 ) distance reduction: -60
Move: 703 | Moving 917 from cluster 917 ( size 1 ) to 890 ( size 22 ) distance reduction: -484
Move: 704 | Moving 850 from cluster 775 ( size 2 ) to 740 ( size 18 ) distance reduction: -322
Move: 705 | Moving 216 from cluster 77 ( size 1 ) to 98 ( size 23 ) distance reduction: -386
Move: 706 | Moving 461 from cluster 154 ( size 1 ) to 161 ( size 24 ) distance reduction: -464
Move: 707 | Moving 75 from cluster 924 ( size 1 ) to 935 ( size 11 ) distance reduction: -212
Move: 708 | Moving 101 from cluster 491 ( size 1 ) to 500 ( size 8 ) distance reduction: -184
Move: 709 | Moving 358 from cluster 900 ( size 1 ) to 890 ( size 23 ) distance reduction: -454
Move: 710 | Moving 811 from cluster 810 ( size 1 ) to 788 

Move: 802 | Moving 766 from cluster 632 ( size 1 ) to 641 ( size 26 ) distance reduction: -550
Move: 803 | Moving 120 from cluster 201 ( size 1 ) to 205 ( size 18 ) distance reduction: -360
Move: 804 | Moving 696 from cluster 241 ( size 1 ) to 233 ( size 19 ) distance reduction: -402
Move: 805 | Moving 328 from cluster 772 ( size 1 ) to 740 ( size 25 ) distance reduction: -428
Move: 806 | Moving 690 from cluster 580 ( size 1 ) to 570 ( size 18 ) distance reduction: -338
Move: 807 | Moving 197 from cluster 204 ( size 1 ) to 205 ( size 19 ) distance reduction: -342
Move: 808 | Moving 168 from cluster 896 ( size 1 ) to 890 ( size 28 ) distance reduction: -612
Move: 809 | Moving 578 from cluster 214 ( size 1 ) to 205 ( size 20 ) distance reduction: -344
Move: 810 | Moving 710 from cluster 366 ( size 1 ) to 333 ( size 35 ) distance reduction: -274
Move: 811 | Moving 508 from cluster 479 ( size 3 ) to 476 ( size 18 ) distance reduction: -346
Move: 812 | Moving 971 from cluster 883 ( size 1 )

Move: 907 | Moving 293 from cluster 787 ( size 1 ) to 788 ( size 30 ) distance reduction: -538
Move: 908 | Moving 706 from cluster 436 ( size 1 ) to 448 ( size 37 ) distance reduction: -686
Move: 909 | Moving 813 from cluster 66 ( size 1 ) to 7 ( size 15 ) distance reduction: -286
Move: 910 | Moving 490 from cluster 873 ( size 1 ) to 862 ( size 26 ) distance reduction: -622
Move: 911 | Moving 810 from cluster 439 ( size 1 ) to 448 ( size 38 ) distance reduction: -682
Move: 912 | Moving 900 from cluster 639 ( size 1 ) to 641 ( size 30 ) distance reduction: -446
Move: 913 | Moving 434 from cluster 85 ( size 1 ) to 98 ( size 31 ) distance reduction: -492
Move: 914 | Moving 379 from cluster 496 ( size 1 ) to 500 ( size 9 ) distance reduction: -206
Move: 915 | Moving 486 from cluster 456 ( size 1 ) to 448 ( size 39 ) distance reduction: -214
Move: 916 | Moving 654 from cluster 966 ( size 1 ) to 961 ( size 18 ) distance reduction: -338
Move: 917 | Moving 617 from cluster 123 ( size 1 ) to 10

Move: 999 | Moving 877 from cluster 486 ( size 2 ) to 642 ( size 1 ) distance reduction: -18
Move: 1000 | Moving 725 from cluster 284 ( size 1 ) to 59 ( size 2 ) distance reduction: -18
Move: 1001 | Moving 159 from cluster 306 ( size 1 ) to 775 ( size 1 ) distance reduction: -20
Move: 1002 | Moving 462 from cluster 314 ( size 2 ) to 264 ( size 34 ) distance reduction: -412
Move: 1003 | Moving 984 from cluster 646 ( size 2 ) to 641 ( size 33 ) distance reduction: -630
Move: 1004 | Moving 331 from cluster 111 ( size 4 ) to 113 ( size 3 ) distance reduction: -36
Move: 1005 | Moving 497 from cluster 566 ( size 1 ) to 570 ( size 23 ) distance reduction: -410
Move: 1006 | Moving 506 from cluster 638 ( size 7 ) to 641 ( size 34 ) distance reduction: -520
Move: 1007 | Moving 19 from cluster 113 ( size 4 ) to 111 ( size 3 ) distance reduction: -32
Move: 1008 | Moving 117 from cluster 574 ( size 3 ) to 570 ( size 24 ) distance reduction: -450
Move: 1009 | Moving 147 from cluster 395 ( size 3 ) t

Move: 1093 | Moving 718 from cluster 285 ( size 1 ) to 289 ( size 15 ) distance reduction: -350
Move: 1094 | Moving 572 from cluster 334 ( size 1 ) to 333 ( size 43 ) distance reduction: -124
Move: 1095 | Moving 743 from cluster 126 ( size 2 ) to 98 ( size 39 ) distance reduction: -512
Move: 1096 | Moving 100 from cluster 479 ( size 3 ) to 476 ( size 29 ) distance reduction: -440
Move: 1097 | Moving 354 from cluster 376 ( size 1 ) to 381 ( size 44 ) distance reduction: -776
Move: 1098 | Moving 604 from cluster 359 ( size 2 ) to 113 ( size 4 ) distance reduction: -46
Move: 1099 | Moving 916 from cluster 551 ( size 1 ) to 535 ( size 47 ) distance reduction: -928
Move: 1100 | Moving 81 from cluster 13 ( size 1 ) to 301 ( size 1 ) distance reduction: -16
Move: 1101 | Moving 378 from cluster 203 ( size 1 ) to 205 ( size 30 ) distance reduction: -562
Move: 1102 | Moving 664 from cluster 359 ( size 1 ) to 361 ( size 5 ) distance reduction: -56
Move: 1103 | Moving 344 from cluster 515 ( size 2

Move: 1180 | Moving 553 from cluster 641 ( size 44 ) to 642 ( size 2 ) distance reduction: -62
Move: 1181 | Moving 604 from cluster 113 ( size 9 ) to 361 ( size 4 ) distance reduction: -20
Move: 1182 | Moving 666 from cluster 151 ( size 3 ) to 775 ( size 1 ) distance reduction: -38
Move: 1183 | Moving 648 from cluster 361 ( size 5 ) to 356 ( size 5 ) distance reduction: -68
Move: 1184 | Moving 989 from cluster 361 ( size 4 ) to 356 ( size 6 ) distance reduction: -52
--- 4 / 20 ---
Move: 1185 | Moving 756 from cluster 505 ( size 3 ) to 187 ( size 7 ) distance reduction: -70
Move: 1186 | Moving 836 from cluster 187 ( size 8 ) to 273 ( size 1 ) distance reduction: -6
Move: 1187 | Moving 187 from cluster 151 ( size 2 ) to 775 ( size 2 ) distance reduction: -52
Move: 1188 | Moving 406 from cluster 361 ( size 3 ) to 356 ( size 7 ) distance reduction: -80
Move: 1189 | Moving 245 from cluster 47 ( size 12 ) to 59 ( size 3 ) distance reduction: -4
Move: 1190 | Moving 542 from cluster 356 ( size

Move: 1269 | Moving 604 from cluster 360 ( size 9 ) to 113 ( size 8 ) distance reduction: -76
Move: 1270 | Moving 406 from cluster 360 ( size 8 ) to 356 ( size 2 ) distance reduction: -34
Move: 1271 | Moving 666 from cluster 151 ( size 4 ) to 775 ( size 1 ) distance reduction: -78
Move: 1272 | Moving 245 from cluster 47 ( size 12 ) to 59 ( size 3 ) distance reduction: -4
Move: 1273 | Moving 553 from cluster 642 ( size 3 ) to 641 ( size 43 ) distance reduction: -10
--- 9 / 20 ---
Move: 1274 | Moving 982 from cluster 118 ( size 10 ) to 116 ( size 1 ) distance reduction: -54
Move: 1275 | Moving 604 from cluster 113 ( size 9 ) to 356 ( size 3 ) distance reduction: -28
Move: 1276 | Moving 788 from cluster 479 ( size 3 ) to 187 ( size 2 ) distance reduction: -30
Move: 1277 | Moving 424 from cluster 59 ( size 4 ) to 118 ( size 9 ) distance reduction: -8
Move: 1278 | Moving 406 from cluster 356 ( size 4 ) to 360 ( size 7 ) distance reduction: -40
Move: 1279 | Moving 474 from cluster 151 ( size

Move: 1356 | Moving 666 from cluster 775 ( size 4 ) to 479 ( size 3 ) distance reduction: -10
Move: 1357 | Moving 187 from cluster 775 ( size 3 ) to 479 ( size 4 ) distance reduction: -16
Move: 1358 | Moving 817 from cluster 479 ( size 5 ) to 273 ( size 6 ) distance reduction: -4
Move: 1359 | Moving 982 from cluster 116 ( size 2 ) to 118 ( size 9 ) distance reduction: -18
Move: 1360 | Moving 553 from cluster 641 ( size 44 ) to 642 ( size 2 ) distance reduction: -62
Move: 1361 | Moving 94 from cluster 98 ( size 41 ) to 187 ( size 4 ) distance reduction: -26
--- 16 / 20 ---
Move: 1362 | Moving 756 from cluster 187 ( size 5 ) to 273 ( size 7 ) distance reduction: -76
Move: 1363 | Moving 666 from cluster 479 ( size 4 ) to 775 ( size 2 ) distance reduction: -22
Move: 1364 | Moving 982 from cluster 118 ( size 10 ) to 116 ( size 1 ) distance reduction: -54
Move: 1365 | Moving 245 from cluster 47 ( size 12 ) to 59 ( size 3 ) distance reduction: -4
Move: 1366 | Moving 187 from cluster 479 ( siz

In [11]:
len(P_list)

37

In [None]:
P_star = v1_consensus(P_list)

In [62]:
print(len(P_star["partition"]))

38
