# Run the LFR benchmark

In this notebook we run the community detection methods under
comparision on sets of generated LFR benchmark graphs.

First, we import the necessary packages.

In [2]:
%load_ext autoreload
%autoreload 2

import os

from src.lfr.run_benchmark import run_benchmark, evaluate_clustering_results


## Benchmarks with fixed average degree and network size

Run benchmarks for fixed average degree $k^{avg}$ and network size $n$ as a
function of the mixing parameter $\mu$.

In [None]:
# benchmark setup
avg_degree_list = [15, 20, 25, 50]
n_list = [300, 600, 1200]
selected_methods = ['infomap', 'synwalk', 'walktrap']
overwrite_results = True

i = 0
num_benchmark_sets = len(n_list) * len(avg_degree_list)
for avg_degree in avg_degree_list:
    for n in n_list:
        i += 1
        print(f'Starting benchmark for benchmark set {i}/{num_benchmark_sets}...')
        benchmark_dir = f'../data/lfr_benchmark/{avg_degree}deg/{n}n/'

        for j, method in enumerate(selected_methods):
            print(f'Testing method {j+1}/{len(selected_methods)} ({method})...')

            # create results directory if necessary
            results_dir = '../results/lfr/clustering/' + method + f'/{avg_degree}deg/{n}n/'
            os.makedirs(results_dir, exist_ok=True)

            run_benchmark(benchmark_dir, results_dir, method, overwrite_results=overwrite_results)

Starting benchmark for benchmark set 1/12...
Testing method 1/3 (infomap)...
Completed 30/30 data points.
Testing method 2/3 (synwalk)...


Evaluate predicted clusterings for fixed average degree $k^{avg}$ and network size $n$ as a
function of the mixing parameter $\mu$.

In [None]:
# benchmark setup
metric = 'ami'
avg_degree_list = [15, 20, 25, 50]
n_list = [300, 600, 1200]
selected_methods = ['infomap', 'synwalk', 'walktrap']

i = 0
num_benchmark_sets = len(n_list) * len(avg_degree_list)
for avg_degree in avg_degree_list:
    for n in n_list:
        i += 1
        print(f'Evaluating predicted clusterings for benchmark set {i}/{num_benchmark_sets}...')
        benchmark_dir = f'../data/lfr_benchmark/{avg_degree}deg/{n}n/'

        for j, method in enumerate(selected_methods):
            print(f'Evaluating method {j+1}/{len(selected_methods)} ({method})...')
            pred_dir = '../results/lfr/clustering/' + method + f'/{avg_degree}deg/{n}n/'

            results = evaluate_clustering_results(benchmark_dir, pred_dir, metric, variable='mu')

            results_dir = '../results/lfr/' + metric + '/' + method + f'/{avg_degree}deg/'
            os.makedirs(results_dir, exist_ok=True)
            results.save(results_dir + f'{n}n.pkl')


## Benchmarks with fixed average degree and mixing parameter

Run benchmarks for fixed average degree $k^{avg}$ and mixing parameter $\mu$ as
a function of the network size $n$.

In [None]:
# benchmark setup
avg_degree_list = [15, 25, 50]
mu_list = [0.35, 0.45, 0.55]
selected_methods = ['infomap', 'synwalk', 'walktrap']
overwrite_results = True

i = 0
num_benchmark_sets = len(mu_list) * len(avg_degree_list)
for avg_degree in avg_degree_list:
    for mu in mu_list:
        i += 1
        print(f'Starting benchmark for benchmark set {i}/{num_benchmark_sets}...')
        benchmark_dir = f'../data/lfr_benchmark/{avg_degree}deg/{int(mu*100)}mu/'

        for j, method in enumerate(selected_methods):
            print(f'Testing method {j+1}/{len(selected_methods)} ({method})...')

            # create results directory if necessary
            results_dir = '../results/lfr/clustering/' + method + f'/{avg_degree}deg/{int(mu*100)}mu/'
            os.makedirs(results_dir, exist_ok=True)

            run_benchmark(benchmark_dir, results_dir, method, overwrite_results=overwrite_results)


 Evaluate predicted clusterings for fixed average degree $k^{avg}$ and mixing parameter $\mu$ as
a function of the network size $n$.

In [None]:
# benchmark setup
metric = 'ami'
avg_degree_list = [15, 25, 50]
mu_list = [0.35, 0.45, 0.55]
selected_methods = ['infomap', 'synwalk', 'walktrap']

i = 0
num_benchmark_sets = len(mu_list) * len(avg_degree_list)
for avg_degree in avg_degree_list:
    for mu in mu_list:
        i += 1
        print(f'Evaluating predicted clusterings for benchmark set {i}/{num_benchmark_sets}...')
        benchmark_dir = f'../data/lfr_benchmark/{avg_degree}deg/{int(100*mu)}mu/'

        for j, method in enumerate(selected_methods):
            print(f'Evaluating method {j+1}/{len(selected_methods)} ({method})...')
            pred_dir = '../results/lfr/clustering/' + method + f'/{avg_degree}deg/{int(100*mu)}mu/'

            results = evaluate_clustering_results(benchmark_dir, pred_dir, metric, variable='n')

            results_dir = '../results/lfr/' + metric + '/' + method + f'/{avg_degree}deg/'
            os.makedirs(results_dir, exist_ok=True)
            results.save(results_dir + f'{int(100*mu)}mu.pkl')