In [1]:
import networkx as nx
import networkx.algorithms.community as nx_comm
from networkx.generators.community import LFR_benchmark_graph
from networkx.algorithms import bipartite
import numpy as np
import scipy as sp
from scipy.sparse import coo_array
from scipy import sparse
from cdlib import algorithms
from cdlib import evaluation
import sklearn
from utils import *
from distances import *
from consensus import *
import math
import itertools
import random
import time
from pathlib import Path

Note: to be able to use all crisp methods, you need to install some additional packages:  {'ASLPAw'}


In [2]:
n = 81747
expected_clusters = []
for i in range(4):
    expected_clusters.append(random.randint(int(n ** (1. / 3)),3*int(n ** (1. / 2))))
    
alg_params = {
    "label_propagation": None,
    "leiden": None,
    "significance_communities": None,
    "surprise_communities": None,
    "greedy_modularity": None,
    "paris": None,
    "louvain": {
        "resolution": [0.75, 1.0, 1.25, 1.5],
        "randomize": [314159, 2718]
    },
    "infomap": None,
    "walktrap": None,
    "markov_clustering": {
        "inflation": [1.2, 1.5, 2, 2.5],
        "pruning_threshold": [0.01, 0.001],
        "convergence_check_frequency": [100]
    },
    "em": {
        "k": list(expected_clusters)
    },
    "sbm_dl": None,
    "spinglass": {
        "spins": list(expected_clusters)
    },
    "ricci_community": {
        "alpha": [0.3, 0.5, 0.6, 0.75]
    }
}
clustering_enumeration = []
count = 0
for alg, params in alg_params.items():
    param_combinations = []
    param_names = []
    if params is not None:
        iterables = []
        param_names = []
        for param in params.keys():
            iterables.append(list(params[param]))
            param_names.append(param)
        param_combinations = list(itertools.product(*iterables))
    if len(param_combinations) > 0:
        for param_combination in param_combinations:
            expr = "algorithms."+alg+"(G"
            for i in range(len(param_names)):
                expr = expr + "," + param_names[i] + "=" + str(param_combination[i])
            expr = expr + ")"
            clustering_enumeration.append((expr,count))
            count = count + 1      
    else:
        expr = "algorithms."+alg+"(G)"
        clustering_enumeration.append((expr,count))
        count = count + 1
        
print(clustering_enumeration)

[('algorithms.label_propagation(G)', 0), ('algorithms.leiden(G)', 1), ('algorithms.significance_communities(G)', 2), ('algorithms.surprise_communities(G)', 3), ('algorithms.greedy_modularity(G)', 4), ('algorithms.paris(G)', 5), ('algorithms.louvain(G,resolution=0.75,randomize=314159)', 6), ('algorithms.louvain(G,resolution=0.75,randomize=2718)', 7), ('algorithms.louvain(G,resolution=1.0,randomize=314159)', 8), ('algorithms.louvain(G,resolution=1.0,randomize=2718)', 9), ('algorithms.louvain(G,resolution=1.25,randomize=314159)', 10), ('algorithms.louvain(G,resolution=1.25,randomize=2718)', 11), ('algorithms.louvain(G,resolution=1.5,randomize=314159)', 12), ('algorithms.louvain(G,resolution=1.5,randomize=2718)', 13), ('algorithms.infomap(G)', 14), ('algorithms.walktrap(G)', 15), ('algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.01,convergence_check_frequency=100)', 16), ('algorithms.markov_clustering(G,inflation=1.2,pruning_threshold=0.001,convergence_check_frequency=100)

In [None]:
fileprefix = "/home/mth/Data/UNC DATASET/Metis Format/"
fname = "Levine13_dimNetworkMetis"
graph_file = fileprefix + fname + ".mtx"
G = None
print(graph_file)
if Path(graph_file).is_file():
    #print("File found")
    with open(graph_file) as f:
        G = nx.from_scipy_sparse_array(spio.mmread(f), create_using=nx.Graph)
        #G = nx.read_weighted_edgelist(f)
        coms = None
        count = 0
        #print(G.edges())
        for k in clustering_enumeration:
            try:
                t1 = time.time()
                coms = eval(k[0])
                t2 = time.time()
                print(k[0], len(coms.communities), "communities, ", t2-t1, "seconds")
                nelem = 0
                for l in coms.communities:
                    nelem = nelem + len(l)
                if nelem == len(G.nodes()):
                    write_clust_lst(coms.communities, fileprefix + fname + "." + str(count))
                    count = count + 1
                    #print("VALID PARTITION")
                    print("---")
                else:
                    print("INVALID PARTITION")
                    print("---")
            except Exception as e:
                print("UNSUCCESSFUL", k[0], e)

/home/mth/Data/UNC DATASET/Metis Format/Levine13_dimNetworkMetis.mtx
algorithms.label_propagation(G) 26 communities,  339.12851643562317 seconds
---
algorithms.leiden(G) 12 communities,  23.3561589717865 seconds
---
algorithms.significance_communities(G) 585 communities,  24.777318239212036 seconds
---
algorithms.surprise_communities(G) 322 communities,  24.452152729034424 seconds
---
algorithms.greedy_modularity(G) 5 communities,  6313.164267778397 seconds
---
algorithms.paris(G) 5 communities,  103.02369570732117 seconds
---
algorithms.louvain(G,resolution=0.75,randomize=314159) 14 communities,  790.4262328147888 seconds
---
algorithms.louvain(G,resolution=0.75,randomize=2718) 14 communities,  836.808762550354 seconds
---
algorithms.louvain(G,resolution=1.0,randomize=314159) 15 communities,  829.4173173904419 seconds
---
algorithms.louvain(G,resolution=1.0,randomize=2718) 14 communities,  1235.414052248001 seconds
---
algorithms.louvain(G,resolution=1.25,randomize=314159) 18 communit

In [None]:
fileprefix = "/home/mth/Data/UNC DATASET/Metis Format/"
fname = "Levine13_dimNetworkMetis"
#graph_file = fileprefix + fname + ".edgelist"
graph_file = fileprefix + fname
G = None
print(graph_file)
if Path(graph_file).is_file():
    print("File found")
    with open(graph_file, "rU") as f:
        first_line = f.readline().strip('\n')
        tokens = first_line.split(" ")
        n = int(tokens[0])
        m = int(tokens[1])
        m = m - 1
        print(n, m)
        #nz_rows, nz_cols = np.nonzero(A)
        row = [-1]*m
        col = [-1]*m
        val = [0]*m
        for i in range(m):
            line = f.readline().strip('\n')
            tokens = line.split(" ")
            #print(tokens)
            row[i] = int(tokens[0])-1
            col[i] = int(tokens[1])-1
            val[i] = float(tokens[2])
        r = coo_array((val, (row, col)), shape=(n, n))
        #spio.mmwrite(graph_file+".mtx", r)

In [10]:
fileprefix = "/home/mth/Data/UNC DATASET/"
fname = "Levine_13dim.fcs groundtruth"
full_file = fileprefix + fname
print(full_file)
max_label = 0
if Path(full_file).is_file():
    print("File found")
    with open(full_file, "rU") as f:
        for line in f:
            line = f.readline().strip('\n')
            tokens = line.split(",")
            
            #print(len(tokens), tokens)
            label = int(tokens[1])
            if label > max_label:
                max_label = label
else:
    print("File not found")
    
print(max_label)

/home/mth/Data/UNC DATASET/Levine_13dim.fcs groundtruth
File found
24
