In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import json
import glob
import os

plt.style.use('ggplot') # Make the graphs a bit prettier

In [2]:
data = {}
files = glob.glob(os.path.expanduser("~/dev/ma/data/results/seq_louvain/*.json")) + glob.glob(os.path.expanduser("~/dev/ma/data/results/dlm_all/*.json")) + glob.glob(os.path.expanduser("~/dev/ma/data/results/plm_all/*.json"))

for path in files:
  for typename, items in json.load(open(path)).items():
    if typename in data:
      data[typename].update(items)
    else:
      data[typename] = items

frames = { typename: pd.DataFrame.from_dict(items, orient='index') for typename, items in data.items() }

algorithm_names = {
    'thrill partitioned louvain': 'PLPLM',
    'thrill node based fully distributed local moving': 'PLSLM'
}

frames['algorithm_run'].replace({ 'algorithm': algorithm_names }, inplace=True)

frames['program_run']['graph_path'] = frames['program_run']['graph']

graph_names = {
    '/home/kit/iti/kp0036/graphs/mu-04/graph_50_10000_mu_0.4_100000-sorted-preprocessed-*.bin': 'LFR 100K', 
    '/home/kit/iti/kp0036/graphs/mu-04/graph_50_10000_mu_0.4_1000000-preprocessed-*.bin': 'LFR 1M', 
    '/home/kit/iti/kp0036/graphs/mu-04/graph_50_10000_mu_0.4_10000000-preprocessed-*.bin': 'LFR 10M', 
    '/home/kit/iti/kp0036/graphs/mu-04/graph_50_10000_mu_0.4_100000000-preprocessed-*.bin': 'LFR 100M', 
    '/home/kit/iti/kp0036/graphs/mu-04/graph_50_10000_mu_0.4_200000000-sorted-preprocessed-*.bin': 'LFR 200M', 
    '/home/kit/iti/kp0036/graphs/hypercubegraph23-preprocessed-*.bin': 'hypercube', 
    '/home/kit/iti/kp0036/graphs/uk-2002.metis-preprocessed-*.bin': 'uk-2002', 
    '/home/kit/iti/kp0036/graphs/uk-2007-05.metis-preprocessed-*.bin': 'uk-2007-05', 
    '/home/kit/iti/kp0036/graphs/in-2004.metis-preprocessed-*.bin': 'in-2004', 
    '/home/kit/iti/kp0036/graphs/com-friendster-preprocessed-*.bin': 'com-friendster', 
    '/home/kit/iti/kp0036/graphs/com-lj.ungraph-preprocessed-*.bin': 'com-lj', 
    '/home/kit/iti/kp0036/graphs/com-orkut.ungraph-preprocessed-*.bin': 'com-orkut', 
    '/home/kit/iti/kp0036/graphs/com-youtube.ungraph-preprocessed-*.bin': 'com-youtube', 
    '/home/kit/iti/kp0036/graphs/com-amazon.ungraph-preprocessed-*.bin': 'com-amazon'
}

frames['program_run'].replace({ 'graph': graph_names }, inplace=True)

def calc_worker_label(x):
    if np.isnan(x['hosts']):
        return 'sequential'
    else: 
        return str(int(x['hosts'])) + 'x' + str(int(x['workers_per_host']))
        
frames['program_run']['worker_label'] = frames['program_run'].apply(calc_worker_label, axis=1)

In [3]:
frames['ground_truth'] = pd.DataFrame([{ 'graph': 'LFR 10M', 'modularity': 0.59928, 'cluster_count': 4542 },
{ 'graph': 'LFR 1M', 'modularity': 0.592803, 'cluster_count': 457 },
{ 'graph': 'LFR 100K', 'modularity': 0.524983, 'cluster_count': 49 },
{ 'graph': 'LFR 200M', 'modularity': 0.599963, 'cluster_count': 91636 },
{ 'graph': 'LFR 100M', 'modularity': 0.599928, 'cluster_count': 45653 }]).set_index('graph')


In [5]:
all_data = frames['clustering'] \
    .merge(frames['algorithm_run'], left_on='algorithm_run_id', right_index=True) \
    .merge(frames['program_run'], left_on='program_run_id', right_index=True) \
    .groupby(['algorithm', 'graph']).agg({ 'hosts': 'first', 'runtime': 'mean', 'modularity': 'mean'}) \
    .stack() \
    .unstack(['algorithm']).unstack()
    
all_data[('', 'hosts')] = all_data[('PLSLM', 'hosts')].astype(int)
all_data[('ground truth', 'modularity')] = frames['ground_truth']['modularity']

all_data = all_data \
    [[('ground truth', 'modularity'), ('sequential louvain', 'runtime'), ('sequential louvain', 'modularity'), 
      ('', 'hosts'),
      ('PLPLM', 'runtime'),
      ('PLPLM', 'modularity'),
      ('PLSLM', 'runtime'),
      ('PLSLM', 'modularity')]]

def my_round(x):
    if x:
        return round(x, 1)
    else: 
        return np.NaN
    
all_data[[('sequential louvain', 'runtime'), ('PLPLM', 'runtime'), ('PLSLM', 'runtime')]] = all_data.apply(lambda x: pd.Series([my_round(x[('sequential louvain', 'runtime')]), my_round(x[('PLPLM', 'runtime')]), my_round(x[('PLSLM', 'runtime')])]), axis=1)

all_data = all_data.loc[frames['program_run'].sort_values('edge_count')['graph'].dropna().unique()]

print(all_data.to_latex().replace('NaN', '   ').replace('runtime', 'runtime [s]'))

all_data

\begin{tabular}{lrrrrrrrr}
\toprule
algorithm & ground truth & \multicolumn{3}{l}{sequential louvain} & \multicolumn{2}{l}{PLPLM} & \multicolumn{2}{l}{PLSLM} \\
{} &   modularity &            runtime [s] & modularity & hosts & runtime [s] & modularity & runtime [s] & modularity \\
graph          &              &                    &            &       &         &            &         &            \\
\midrule
com-amazon     &              &                1.1 &   0.925990 &     2 &         &            &    35.5 &   0.925443 \\
com-youtube    &              &                6.3 &   0.719774 &     2 &   150.6 &   0.713472 &    41.2 &   0.720974 \\
LFR 100K       &     0.524983 &                1.5 &   0.521162 &     2 &         &            &     9.6 &   0.524983 \\
in-2004        &              &               14.6 &   0.980155 &     4 &    68.6 &   0.980307 &    98.2 &   0.980120 \\
com-lj         &              &              102.3 &   0.748221 &     8 &   222.1 &   0.744079 &   167.7

algorithm,ground truth,sequential louvain,sequential louvain,Unnamed: 4_level_0,PLPLM,PLPLM,PLSLM,PLSLM
Unnamed: 0_level_1,modularity,runtime,modularity,hosts,runtime,modularity,runtime,modularity
graph,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
com-amazon,,1.1,0.92599,2,,,35.5,0.925443
com-youtube,,6.3,0.719774,2,150.6,0.713472,41.2,0.720974
LFR 100K,0.524983,1.5,0.521162,2,,,9.6,0.524983
in-2004,,14.6,0.980155,4,68.6,0.980307,98.2,0.98012
com-lj,,102.3,0.748221,8,222.1,0.744079,167.7,0.730967
hypercube,,1534.8,0.572928,8,,,115.4,0.539654
com-orkut,,173.7,0.667512,8,217.5,0.664707,132.8,0.657185
LFR 1M,0.592803,29.2,0.59149,4,49.1,0.592719,27.2,0.592534
uk-2002,,587.3,0.989749,8,324.4,0.989725,222.0,0.989648
LFR 10M,0.59928,1021.0,0.599284,8,225.2,0.599273,151.7,0.596983
