# Plot LFR benchmark results

In this notebook plot the benchmark results, comparing the community detection methods under
consideration.

First, we import the necessary packages and set up global plotting parameters.

In [2]:
import os
from collections import namedtuple

import matplotlib.pyplot as plt

from src.lfr.benchmark_results import *
%pylab

plt.rcParams.update({'font.size': 15})
plt.rcParams.update({'text.usetex': True})
plt.rcParams.update({'font.family': 'sans-serif'})
plt.rcParams.update({'lines.linewidth': 2})
plt.rcParams.update({'lines.markersize': 8})
plt.rcParams.update({'lines.markeredgewidth': 2})
plt.rcParams.update({'axes.labelpad': 20})
plt.rcParams['text.latex.preamble'] = [
    r'\usepackage{amsmath,amssymb,amsfonts,amsthm}',
    r'\usepackage{siunitx}',  # i need upright \micro symbols, but you need...
    r'\sisetup{detect-all}',  # ...this to force siunitx to actually use your fonts
    r'\usepackage{helvet}',  # set the normal font here
    r'\usepackage{sansmath}',  # load up the sansmath so that math -> helvet
    r'\sansmath'  # <- tricky! -- gotta actually tell tex to use!
]

# output directory for storing generated figures
fig_dir = '../figures/'
os.makedirs(fig_dir, exist_ok=True)

Using matplotlib backend: Qt5Agg
Populating the interactive namespace from numpy and matplotlib


## Plot benchmark AMI results for a fixed average degree and network size $n$ as a function of the mixing parameter $\mu$.

In [3]:
# plot setup
avg_degree = 25 # average node degree
n = 1200 # fixed network size of the benchmark series
plot_uncertainty = True # if True plot uncertainty as a margin of one std deviation
save_figure = True # if True, we save the figure as .pdf in ´fig_dir´

# assemble a list of results to plot (path + plot formatting) -> add to list what you want to plot
base_path = '../results/lfr/ami/'
path_suffix = f'/{avg_degree}deg/{n}n.pkl'
ResultInfo = namedtuple('ResultInfo', ['path', 'label', 'linestyle'])
info = []
info.append(ResultInfo(base_path + 'infomap' + path_suffix, 'Infomap', 'x:'))
info.append(ResultInfo(base_path + 'synwalk' + path_suffix, 'SynWalk', '^:'))
info.append(ResultInfo(base_path + 'walktrap' + path_suffix, 'Walktrap', 's:'))
# info.append(ResultInfo(base_path + 'label_propagation' + path_suffix, 'Label Propagation', 'o:'))

# plot the results
plt.close('all')
plt.figure(figsize=(12,9))

for entry in info:
    results = BenchmarkResults.load(entry.path)
    xdata = results.get_var_list()
    ydata = results.get_mean_scores()
    plt.plot(xdata, ydata, entry.linestyle, label=entry.label)

    if plot_uncertainty:
        upper = np.clip(ydata + results.get_score_std(), 0.0, 1.0)
        lower = np.clip(ydata - results.get_score_std(), 0.0, 1.0)
        plt.fill_between(xdata, upper, lower, alpha=0.25)

# plt.title(f'LFR benchmark\n{n} nodes with avg degree {avg_degree}')
plt.xlabel('Mixing parameter, $\mu$')
plt.ylabel(r'Adjusted Mutual Information, $\mathcal{I}^{adj}(\mathcal{Y}, \mathcal{Y}_{true})$')
plt.legend(loc=1)
plt.tight_layout()

# save figure as .pdf
if save_figure:
    fig_path = fig_dir + f'lfr_ami_vs_mu_{avg_degree}k_{n}n.pdf'
    plt.savefig(fig_path, dpi=600, format='pdf')


## Plot benchmark AMI results for a fixed average degree and mixing parameter $\mu$ as a function of the network size $n$.

In [108]:
# plot setup
avg_degree = 50 # average node degree
mu = 0.35 # fixed mixing parameter of the benchmark series
plot_uncertainty = True # if True plot uncertainty as a margin of one std deviation
save_figure = True # if True, we save the figure as .pdf in ´fig_dir´

# assemble a list of results to plot (path + plot formatting) -> add to list what you want to plot
base_path = '../results/lfr/ami/'
path_suffix = f'/{avg_degree}deg/{int(100*mu)}mu.pkl'
ResultInfo = namedtuple('ResultInfo', ['path', 'label', 'linestyle'])
info = []
info.append(ResultInfo(base_path + 'infomap' + path_suffix, 'Infomap', 'x:'))
info.append(ResultInfo(base_path + 'synwalk' + path_suffix, 'SynWalk', '^:'))
info.append(ResultInfo(base_path + 'walktrap' + path_suffix, 'Walktrap', 's:'))
# info.append(ResultInfo(base_path + 'label_propagation' + path_suffix, 'Label Propagation', 'o:'))

# plot the results
plt.close('all')
plt.figure(figsize=(12,9))

for entry in info:
    results = BenchmarkResults.load(entry.path)
    xdata = avg_degree / results.get_var_list()
    n = results.get_var_list()
    xdata = (avg_degree - 2 + 1.0 / n) / (n - 3 + 2.0 / n)
    print (xdata.shape)
    print(f'Xdata range = [{np.min(xdata), np.max(xdata)}]')
    # xdata = results.get_var_list()
    ydata = results.get_mean_scores()
    plt.plot(xdata, ydata, entry.linestyle, label=entry.label)

    if plot_uncertainty:
        upper = np.clip(ydata + results.get_score_std(), 0.0, 1.0)
        lower = np.clip(ydata - results.get_score_std(), 0.0, 1.0)
        plt.fill_between(xdata, upper, lower, alpha=0.25)

# plt.title(f'LFR benchmark\nmixing parameter {mu}, avg node degree {avg_degree}')
plt.xlabel(r'Network density, $\frac{k^{avg}}{n}\mid_{k^{avg} = %d}$'% avg_degree)
plt.ylabel(r'Adjusted Mutual Information, $\mathcal{I}^{adj}(\mathcal{Y}, \mathcal{Y}_{true})$')
plt.legend(loc=5)
plt.xlim([0.00075, 0.18])
plt.semilogx()

# save figure as .pdf
if save_figure:
    fig_path = fig_dir + f'lfr_ami_vs_nd_{avg_degree}k_{int(100*mu)}mu.pdf'
    plt.savefig(fig_path, dpi=600, format='pdf')


(7,)
Xdata range = [(0.0025003933855746053, 0.16162375704249063)]
(7,)
Xdata range = [(0.0025003933855746053, 0.16162375704249063)]
(7,)
Xdata range = [(0.0025003933855746053, 0.16162375704249063)]


[]

## Plot synwalk error results

Plot the relative deviation of the synwalk objective for a predicted clustering
and the ground truth clustering of a graph.

In [4]:
# plot setup
avg_degree = 15 # average node degree
n = 300 # fixed network size of the benchmark series
save_figure = False # if True, we save the figure as .pdf in ´fig_dir´

# assemble a list of results to plot (path + plot formatting) -> add to list what you want to plot
base_path = '../results/lfr/synwalk_error/'
path_suffix = f'/{avg_degree}deg/{n}n.pkl'
ResultInfo = namedtuple('ResultInfo', ['path', 'label', 'linestyle'])
info = []
info.append(ResultInfo(base_path + 'synwalk' + path_suffix, 'SynWalk', 'r+:'))

# plot the results
plt.close('all')
plt.figure(figsize=(8,6))

lines = []
labels = []
for entry in info:
    results = BenchmarkResults.load(entry.path)
    # plot mean deviation
    xdata = results.get_var_list()
    ydata = results.get_mean_scores()
    plt.plot([0.2,0.8], [0,0], 'k')
    ln, = plt.plot(xdata, ydata, entry.linestyle, label=entry.label)
    lines.append(ln)
    labels.append(entry.label + ' - Mean Deviation')
    # plot individual sample deviations
    for dp in results.datapoints:
        ln = plt.scatter([dp.var] * dp.num_samples, dp.scores, marker='x', c='b', alpha=0.3)

    lines.append(ln)
    labels.append(entry.label + ' - Objective Deviations')

plt.title(f'LFR benchmark\n{n} nodes with avg degree {avg_degree}')
plt.xlabel('Mixing parameter, $\mu$')
plt.ylabel(r'SynWalk Deviation, $\frac{J(\mathcal{Y}) - J(\mathcal{Y}_{true})}{J(\mathcal{Y}_{true})}$')
plt.legend(lines, labels, loc=0)

# save figure as .pdf
if save_figure:
    fig_path = fig_dir + f'lfr_synwalk_err_{avg_degree}k_{n}n.pdf'
    plt.savefig(fig_path, dpi=600, format='pdf')