In [1]:
import numpy as np
import matplotlib as mpl
%matplotlib inline

import pandas as pd

import json
import glob
import os

from subprocess import call
from networkit import *

In [2]:
data = {}

for path in glob.glob(os.path.expanduser("/amd.home/home/i11/zeitz/ma/data/results/paper/all_real/*.json")):
  for typename, items in json.load(open(path)).items():
    if typename in data:
      data[typename].update(items)
    else:
      data[typename] = items

frames = { typename: pd.DataFrame.from_dict(items, orient='index') for typename, items in data.items() }

In [3]:
algo_name_mapping = {
    'synchronous local moving with map equation': 'Distr. MapEq',
    'synchronous local moving with modularity': 'Distr. Modularity'
}

frames['algorithm_run'].replace({ 'algorithm': algo_name_mapping }, inplace=True)

frames['algorithm_run']['algorithm'] += frames['algorithm_run'].merge(frames['program_run'], left_on='program_run_id', right_index=True, how='left')['switch_to_seq'].map({ False: '', True: ' w. Seq.', np.NaN: '' })

In [4]:
frames['algorithm_run']['runtime'] = (frames['algorithm_run']['done_ts'] - frames['algorithm_run']['start_ts']) / 1000000.0

In [5]:
frames['program_run']['graph_path'] = frames['program_run']['graph']

graph_names = { 
    '/home/kit/iti/kp0036/graphs/uk-2002.metis-preprocessed-*.bin': 'uk-2002', 
    '/home/kit/iti/kp0036/graphs/uk-2007-05.metis-preprocessed-*.bin': 'uk-2007-05', 
    '/home/kit/iti/kp0036/graphs/in-2004.metis-preprocessed-*.bin': 'in-2004', 
    '/home/kit/iti/kp0036/graphs/com-friendster-preprocessed-*.bin': 'com-friendster', 
    '/home/kit/iti/kp0036/graphs/com-lj.ungraph-preprocessed-*.bin': 'com-lj', 
    '/home/kit/iti/kp0036/graphs/com-orkut.ungraph-preprocessed-*.bin': 'com-orkut', 
    '/home/kit/iti/kp0036/graphs/com-youtube.ungraph-preprocessed-*.bin': 'com-youtube', 
    '/home/kit/iti/kp0036/graphs/com-amazon.ungraph-preprocessed-*.bin': 'com-amazon',
    '/home/kit/iti/kp0036/graphs/europe.osm-preprocessed-*.bin': 'osm-europe',
}

frames['program_run'].replace({ 'graph': graph_names }, inplace=True)

In [7]:
all_data = frames['clustering'] \
    .merge(frames['algorithm_run'], left_on='algorithm_run_id', right_index=True) \
    .merge(frames['program_run'], left_on='program_run_id', right_index=True) \
    .groupby(['algorithm', 'graph']).agg({ 'hosts': 'first', 'runtime': 'mean', 'modularity': 'mean', 'map_equation': 'mean', 'cluster_count': 'mean' }) \
    .stack() \
    .unstack(['algorithm']).unstack()

all_data[('', 'hosts')] = all_data[('Distr. MapEq', 'hosts')].astype(int)

all_data = all_data \
    [[('', 'hosts'),
      ('Distr. Modularity w. Seq.', 'runtime'),
      ('Distr. Modularity w. Seq.', 'modularity'),
      ('Distr. Modularity w. Seq.', 'map_equation'),
      ('Distr. Modularity w. Seq.', 'cluster_count'),
      ('Distr. MapEq', 'runtime'),
      ('Distr. MapEq', 'modularity'),
      ('Distr. MapEq', 'map_equation'),
      ('Distr. MapEq', 'cluster_count')]]

all_data = all_data.loc[frames['program_run'].sort_values('edge_count')['graph'].dropna().unique()]

print(all_data.to_latex().replace('NaN', '   ').replace('runtime', 'runtime [s]'))

all_data

\begin{tabular}{lrrrrrrrrr}
\toprule
algorithm & \multicolumn{4}{l}{Distr. Modularity w. Seq.} & \multicolumn{4}{l}{Distr. MapEq} \\
{} & hosts &                   runtime [s] & modularity & map\_equation & cluster\_count &      runtime [s] & modularity & map\_equation & cluster\_count \\
graph          &       &                           &            &              &               &              &            &              &               \\
\midrule
com-amazon     &     2 &                  1.381012 &   0.926326 &     8.296293 &         235.0 &     5.911048 &   0.830689 &     5.309140 &       14212.0 \\
com-youtube    &     2 &                  6.555162 &   0.719162 &    10.266429 &        6194.0 &    15.259467 &   0.574181 &     8.544769 &       52727.0 \\
in-2004        &     4 &                  5.053232 &   0.980193 &     7.253690 &         929.0 &    12.258572 &   0.938075 &     6.296900 &       27198.0 \\
com-lj         &     8 &                 22.829539 &   0.749858 &    11.8

algorithm,Unnamed: 1_level_0,Distr. Modularity w. Seq.,Distr. Modularity w. Seq.,Distr. Modularity w. Seq.,Distr. Modularity w. Seq.,Distr. MapEq,Distr. MapEq,Distr. MapEq,Distr. MapEq
Unnamed: 0_level_1,hosts,runtime,modularity,map_equation,cluster_count,runtime,modularity,map_equation,cluster_count
graph,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
com-amazon,2,1.381012,0.926326,8.296293,235.0,5.911048,0.830689,5.30914,14212.0
com-youtube,2,6.555162,0.719162,10.266429,6194.0,15.259467,0.574181,8.544769,52727.0
in-2004,4,5.053232,0.980193,7.25369,929.0,12.258572,0.938075,6.2969,27198.0
com-lj,8,22.829539,0.749858,11.831103,1568.0,55.600465,0.633798,9.978074,76662.0
osm-europe,8,158.296461,0.998935,9.843927,3048.0,198.753312,0.938616,4.350022,2188001.0
com-orkut,8,46.849471,0.656178,12.969572,30.0,102.769051,0.540202,11.89763,15281.0
uk-2002,8,40.686056,0.98974,8.21757,4967.0,70.592353,0.95949,6.46731,186717.0
com-friendster,16,1112.737257,0.611034,15.702329,79079.0,1315.601055,0.471586,14.781484,586610.0
uk-2007-05,16,238.920323,0.995823,8.9372,71212.0,318.782911,0.97235,8.05644,375345.0
