# Benchmark network analysis

In [None]:
import networkx as nx
import networkx.algorithms.community as nx_comm
from networkx.generators.community import LFR_benchmark_graph
from networkx.algorithms import bipartite
import numpy as np
import scipy as sp
from scipy import sparse
from cdlib import algorithms
from cdlib import evaluation
from utils import *
from distances import *
from consensus import *
import math
import sklearn

## Generate benchmark graphs

- https://arxiv.org/pdf/0805.4770.pdf
- https://networkx.org/documentation/stable/reference/generated/networkx.generators.community.LFR_benchmark_graph.html
- https://stackoverflow.com/questions/53608425/how-tune-lfr-benchmark-graph-method-in-networkx-for-generating-large-graph

In [None]:
n = 1000
fileprefix = "LFR/" + "n" + str(n) + "/"
mus = [1, 2, 3, 4, 5, 6, 7, 8, 9]
gammas = [30]
betas = [11]
for mu in mus:
    for gamma in gammas:
        for beta in betas:
            fname = "LFR_n" + str(n) + "_mu0" + str(mu) + "_gamma" + str(gamma) + "_beta" + str(beta)
            G = LFR_benchmark_graph(n, (float(gamma) / 10), (float(beta) / 10), (float(mu) / 10), seed=10, min_degree=10, max_degree=50)
            m = nx.to_scipy_sparse_array(G)
            sp.io.mmwrite(fileprefix + fname + ".mtx", m)
            clust_lst = {frozenset(G.nodes[v]["community"]) for v in G}
            #nx.write_edgelist(G, fileprefix + name + ".edgelist", data=False)
            write_clust_lst(clust_lst, fileprefix + fname + ".gt")

## Run different algorithms on generated benchmark networks

In [None]:
#algs = ["infomap", "louvain", "leiden", "cnm", "label-prop", "markov", "walktrap", "spinglass"]
algs = ["louvain", "leiden", "cnm", "label-prop", "markov", "walktrap", "spinglass"]
n = 1000
fileprefix = "LFR/" + "n" + str(n) + "/"
mus = [1, 2, 3, 4, 5, 6, 7, 8, 9]
gammas = [30]
betas = [11]
for mu in mus:
    for gamma in gammas:
        for beta in betas:
            fname = "LFR_n" + str(n) + "_mu0" + str(mu) + "_gamma" + str(gamma) + "_beta" + str(beta)
            graph_file = fileprefix + fname + ".mtx"
            G = None
            with open(graph_file) as f:
                G = nx.from_scipy_sparse_matrix(spio.mmread(f), create_using=nx.Graph)
                #print(len(G.nodes()), len(G.edges()))
                comms = None
                for alg in algs:
                    print("Running", alg, "on", graph_file)
                    if alg == "infomap":
                        coms = algorithms.infomap(G)
                    elif alg == "markov":
                        coms = algorithms.markov_clustering(G)
                    elif alg == "louvain":
                        coms = algorithms.louvain(G)
                    elif alg == "leiden":
                        coms = algorithms.leiden(G)
                    elif alg == "label-prop":
                        coms = algorithms.label_propagation(G)
                    elif alg == "cnm":
                        coms = algorithms.greedy_modularity(G)
                    elif alg == "walktrap":
                        coms = algorithms.walktrap(G)
                    elif alg == "spinglass":
                        coms = algorithms.spinglass(G)
                    print(mu, alg, len(coms.communities))
                    write_clust_lst(coms.communities, fileprefix + fname + "." + alg)
            #nx.write_edgelist(G, fileprefix + name + ".edgelist", data=False)
            #write_clust_lst(clust_lst, fileprefix + fname + ".gt")

## Prepare data structure for consensus algorithm implementations and run consensus

In [None]:
stats = {}
algs = ["infomap", "louvain", "leiden", "cnm", "label-prop", "markov", "walktrap", "spinglass"]
n = 1000
fileprefix = "LFR/" + "n" + str(n) + "/"
mus = [1, 2, 3, 4, 5, 6, 7, 8, 9]
#mus = [1]
gammas = [30]
betas = [11]
for mu in mus:
    for gamma in gammas:
        for beta in betas:
            P_list = []
            fname = "LFR_n" + str(n) + "_mu0" + str(mu) + "_gamma" + str(gamma) + "_beta" + str(beta)
            graph_file = fileprefix + fname + ".mtx"
            print(graph_file)
            G = None
            with open(graph_file) as f:
                G = nx.from_scipy_sparse_matrix(spio.mmread(f), create_using=nx.Graph)
                for alg in algs:
                    clust_file = fileprefix + fname + "." + alg
                    partition = read_clust_lst(clust_file)
                    P_list.append({"graph": nx.Graph(G), "partition": list(partition)})
            P_star_lf = lf_consensus(P_list)
            write_clust_lst(P_star_lf["partition"], fileprefix + fname + ".lf")
            P_star_it = iterative_consensus(P_list, n_iter=10, batch_size=1, distance=earth_movers_distance, batch_consensus=markov_consensus)
            write_clust_lst(P_star_it["partition"], fileprefix + fname + ".it")

In [None]:
len(P_list)

In [None]:
stats = []
algs = ["infomap", "louvain", "leiden", "cnm", "label-prop", "markov", "walktrap", "spinglass", "lf", "it"]
n = 1000
fileprefix = "LFR/" + "n" + str(n) + "/"
mus = [1, 2, 3, 4, 5, 6, 7, 8, 9]
#mus = [1]
gammas = [30]
betas = [11]
for mu in mus:
    for gamma in gammas:
        for beta in betas:
            fname = "LFR_n" + str(n) + "_mu0" + str(mu) + "_gamma" + str(gamma) + "_beta" + str(beta)
            graph_file = fileprefix + fname + ".mtx"
            gt_clust_lst = read_clust_lst(fileprefix + fname + ".gt")
            gt_clust_asn = clust_lst_to_asn(gt_clust_lst)
            lf_clust_lst = read_clust_lst(fileprefix + fname + ".lf")
            lf_clust_asn = clust_lst_to_asn(lf_clust_lst)
            it_clust_lst = read_clust_lst(fileprefix + fname + ".it")
            it_clust_asn = clust_lst_to_asn(it_clust_lst)
            for alg in algs:
                clust_file = fileprefix + fname + "." + alg
                clust_lst = read_clust_lst(clust_file)
                clust_asn = clust_lst_to_asn(clust_lst)
                nmi = sklearn.metrics.normalized_mutual_info_score(gt_clust_asn, clust_asn)
                emd_lf = earth_movers_distance(lf_clust_lst, clust_lst)
                emd_it = earth_movers_distance(it_clust_lst, clust_lst)
                stat = {}
                stat["mu"] = float(mu) / 10
                stat["n"] = n
                stat["nmi"] = nmi
                stat["alg"] = alg
                stat["emd_lf"] = emd_lf
                stat["emd_it"] = emd_it
                stats.append(stat)

In [None]:
stats

In [None]:
df = pd.DataFrame(stats)

In [None]:
df

In [None]:
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import matplotlib.colors as mcolors
fig = plt.figure(figsize=(6, 4))
gs = GridSpec(nrows=1, ncols=1)

ax = fig.add_subplot(gs[0,0])
algs = ["infomap", "louvain", "leiden", "cnm", "label-prop", "markov", "walktrap", "spinglass", "lf", "it"]
for alg in algs:
    df_target = df[df["alg"] == alg]
    if(alg == "lf" or alg == "it"):
        ax.plot(df_target["mu"], df_target["nmi"], label=alg, marker="*", linewidth=0.5)
    else:
        ax.plot(df_target["mu"], df_target["nmi"], label=alg, marker=".", linewidth=0.5)

ax.grid(linestyle='-')
ax.set_xlabel("Mixing parameter")
ax.set_ylabel("NMI")

# https://stackoverflow.com/questions/4700614/how-to-put-the-legend-outside-the-plot
# Shrink current axis's height by 20% on the bottom
old_box = ax.get_position()
print(old_box, old_box.width, old_box.height)
ax.set_position([old_box.x0, old_box.y0 + old_box.height * 0.2,
                 old_box.width, old_box.height * 0.8])
new_box = ax.get_position()
print(new_box, new_box.width, new_box.height)
# Put a legend below current axis
#ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2), ncol=5)
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.18), ncol=5)

### ---
"""
ax = fig.add_subplot(gs[0,1])
algs = ["infomap", "louvain", "leiden", "cnm", "label-prop", "markov", "walktrap", "spinglass"]

for alg in algs:
    df_target = df[df["alg"] == alg]
    if(alg == "lf" or alg == "it"):
        #ax.plot(df_target["mu"], df_target["emd_lf"], label=alg, marker="*", linewidth=0.5)
        pass
    else:
        ax.scatter(df_target["mu"], df_target["emd_lf"], label=alg, marker="^", color="tab:blue")
        ax.scatter(df_target["mu"], df_target["emd_it"], label=alg, marker="s", color="tab:orange")

ax.grid(linestyle='-')
ax.set_xlabel("Mixing parameter")
ax.set_ylabel("EMD")

"""

plt.savefig("benchmark-nmi.pdf")

In [None]:
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import matplotlib.colors as mcolors
fig = plt.figure(figsize=(6, 4))
gs = GridSpec(nrows=1, ncols=1)

ax = fig.add_subplot(gs[0,0])
algs = ["infomap", "louvain", "leiden", "cnm", "label-prop", "markov", "walktrap", "spinglass"]

for alg in algs:
    df_target = df[df["alg"] == alg]
    if(alg == "lf" or alg == "it"):
        #ax.plot(df_target["mu"], df_target["emd_lf"], label=alg, marker="*", linewidth=0.5)
        pass
    else:
        if alg == algs[-1]:
            ax.scatter(df_target["mu"], df_target["emd_lf"], label="lf", marker="x", color="tab:blue")
            ax.scatter(df_target["mu"], df_target["emd_it"], label="it", marker="+", color="tab:orange")
        else:
            ax.scatter(df_target["mu"], df_target["emd_lf"], marker="x", color="tab:blue")
            ax.scatter(df_target["mu"], df_target["emd_it"], marker="+", color="tab:orange")
    
ax.grid(linestyle='-')
ax.set_xlabel("Mixing parameter")
ax.set_ylabel("EMD")

# https://stackoverflow.com/questions/4700614/how-to-put-the-legend-outside-the-plot
# Shrink current axis's height by 20% on the bottom
old_box = ax.get_position()
print(old_box, old_box.width, old_box.height)
ax.set_position([old_box.x0, old_box.y0 + old_box.height * 0.2,
                 old_box.width, old_box.height * 0.8])
new_box = ax.get_position()
print(new_box, new_box.width, new_box.height)
# Put a legend below current axis
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2), ncol=2)

plt.savefig("benchmark-distance.pdf")