In [1]:
import numpy as np
import matplotlib as mpl
%matplotlib inline

import pandas as pd

import json
import glob
import os

from networkit import *

In [2]:
data = {}

for path in glob.glob(os.path.expanduser("/amd.home/home/i11/zeitz/ma/data/results/paper/all_real2/*.json")) + glob.glob(os.path.expanduser("/amd.home/home/i11/zeitz/ma/data/results/paper/all_real_seq/*.json")):
  for typename, items in json.load(open(path)).items():
    if typename in data:
      data[typename].update(items)
    else:
      data[typename] = items

frames = { typename: pd.DataFrame.from_dict(items, orient='index') for typename, items in data.items() }

In [3]:
dlslm_label = 'DSLM-Mod'
dlslm_me_label = 'DSLM-Map'
seq_postfix = ' w. Seq.'
no_contraction_postfix = ' w/o Contraction'
dlslm_ws_label = dlslm_label + seq_postfix
dlslm_nc_label = dlslm_label + no_contraction_postfix
seq_louvain_label = 'Seq. Louvain'
seq_infomap_label = 'Seq. Infomap'

algo_name_mapping = {
    'synchronous local moving with map equation': dlslm_me_label,
    'synchronous local moving with modularity': dlslm_label,
    'sequential louvain': seq_louvain_label,
    'sequential infomap': seq_infomap_label
}

frames['algorithm_run'].replace({ 'algorithm': algo_name_mapping }, inplace=True)

frames['algorithm_run']['algorithm'] += frames['algorithm_run'].merge(frames['program_run'], left_on='program_run_id', right_index=True, how='left')['switch_to_seq'].map({ False: '', True: seq_postfix, np.NaN: '' })
frames['algorithm_run']['algorithm'] += frames['algorithm_run'].merge(frames['program_run'], left_on='program_run_id', right_index=True, how='left')['contraction'].map({ False: no_contraction_postfix, True: '', np.NaN: '' })

In [4]:
frames['algorithm_run']['runtime'].fillna((frames['algorithm_run']['done_ts'] - frames['algorithm_run']['start_ts']) / 1000000.0, inplace=True)

In [5]:
frames['program_run']['graph_path'] = frames['program_run']['graph']

graph_names = { 
    '/home/kit/iti/kp0036/graphs/uk-2002.metis-preprocessed-*.bin': 'uk-2002', 
    '/home/kit/iti/kp0036/graphs/uk-2007-05.metis-preprocessed-*.bin': 'uk-2007-05', 
    '/home/kit/iti/kp0036/graphs/in-2004.metis-preprocessed-*.bin': 'in-2004', 
    '/home/kit/iti/kp0036/graphs/com-friendster-preprocessed-*.bin': 'com-friendster', 
    '/home/kit/iti/kp0036/graphs/com-lj.ungraph-preprocessed-*.bin': 'com-lj', 
    '/home/kit/iti/kp0036/graphs/com-orkut.ungraph-preprocessed-*.bin': 'com-orkut', 
    '/home/kit/iti/kp0036/graphs/com-youtube.ungraph-preprocessed-*.bin': 'com-youtube', 
    '/home/kit/iti/kp0036/graphs/com-amazon.ungraph-preprocessed-*.bin': 'com-amazon',
    '/home/kit/iti/kp0036/graphs/europe.osm-preprocessed-*.bin': 'osm-europe',
}

frames['program_run'].replace({ 'graph': graph_names }, inplace=True)

In [52]:
all_data = frames['clustering'] \
    .merge(frames['algorithm_run'], left_on='algorithm_run_id', right_index=True) \
    .merge(frames['program_run'], left_on='program_run_id', right_index=True) \
    .groupby(['graph', 'algorithm'])['runtime'].mean().round(1).to_frame() \
    .unstack()["runtime"][[dlslm_label, dlslm_nc_label, dlslm_me_label, seq_louvain_label, seq_infomap_label]]

all_data = all_data.loc[frames['program_run'].sort_values('edge_count')['graph'].dropna().unique()]

graph_data = frames['program_run'].dropna(subset=['hosts'])[['graph', 'node_count', 'edge_count', 'hosts']].set_index('graph').drop_duplicates(keep='first')
graph_data['hosts'] = graph_data['hosts'].astype(int)
graph_data.columns = ['n', 'm', 'hosts']
res = graph_data.sort_values('m').merge(all_data, left_index=True, right_index=True)

with open("../../dist-thrill-cluster/plots/real_world_runtimes.tex", "w") as file:
    print(res.to_latex(), file=file)
res

Unnamed: 0_level_0,n,m,hosts,DSLM-Mod,DSLM-Mod w/o Contraction,DSLM-Map,Seq. Louvain,Seq. Infomap
graph,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
com-amazon,334863,925872,2,5.7,1.0,4.6,1.2,23.8
com-youtube,1134890,2987624,2,9.6,3.9,14.7,6.9,113.1
in-2004,1382867,13591473,4,13.4,4.0,11.7,16.5,131.2
com-lj,3997962,34681189,8,31.4,11.4,45.3,94.8,1093.8
osm-europe,50912018,54054660,8,156.8,45.9,164.1,1607.3,
com-orkut,3072441,117185083,8,45.3,34.3,83.5,164.8,2478.9
uk-2002,18483186,261787258,8,48.0,20.3,52.1,529.4,5614.0
com-friendster,65608366,1806067135,16,993.3,1093.1,1143.8,5499.1,
uk-2007-05,105153952,3301876564,16,162.5,108.8,220.3,7260.0,


In [7]:
all_data = frames['clustering'] \
    .merge(frames['algorithm_run'], left_on='algorithm_run_id', right_index=True) \
    .merge(frames['program_run'], left_on='program_run_id', right_index=True) \
    .groupby(['algorithm', 'graph']).agg({ 'hosts': 'first', 'runtime': 'mean', 'modularity': 'mean', 'map_equation': 'mean', 'cluster_count': 'mean' }) \
    .stack() \
    .unstack(['algorithm']).unstack()

all_data[('', 'hosts')] = all_data[(dlslm_me_label, 'hosts')].astype(int)

all_data = all_data \
    [[('', 'hosts'),
      (seq_louvain_label, 'runtime'),
      (seq_louvain_label, 'modularity'),
      (seq_louvain_label, 'map_equation'),
      (seq_louvain_label, 'cluster_count'),
      (dlslm_nc_label, 'runtime'),
      (dlslm_nc_label, 'modularity'),
      (dlslm_nc_label, 'map_equation'),
      (dlslm_nc_label, 'cluster_count'),
      (seq_infomap_label, 'runtime'),
      (seq_infomap_label, 'modularity'),
      (seq_infomap_label, 'map_equation'),
      (seq_infomap_label, 'cluster_count'),
      (dlslm_me_label, 'runtime'),
      (dlslm_me_label, 'modularity'),
      (dlslm_me_label, 'map_equation'),
      (dlslm_me_label, 'cluster_count')]]

all_data = all_data.loc[frames['program_run'].sort_values('edge_count')['graph'].dropna().unique()]

print(all_data.to_latex().replace('NaN', '   ').replace('runtime', 'runtime [s]'))

all_data

\begin{tabular}{lrrrrrrrrrrrrrrrrr}
\toprule
algorithm & \multicolumn{4}{l}{Seq. Louvain} & \multicolumn{4}{l}{DSLM-Mod w/o Contraction} & \multicolumn{4}{l}{Seq. Infomap} & \multicolumn{4}{l}{DSLM-Map} \\
{} & hosts &      runtime [s] & modularity & map\_equation & cluster\_count &                  runtime [s] & modularity & map\_equation & cluster\_count &      runtime [s] & modularity & map\_equation & cluster\_count &      runtime [s] & modularity & map\_equation & cluster\_count \\
graph          &       &              &            &              &               &                          &            &              &               &              &            &              &               &              &            &              &               \\
\midrule
com-amazon     &     2 &      1.18867 &   0.926283 &      8.26193 &         244.0 &                 1.018540 &   0.662349 &     6.423891 &       58321.0 &      23.8371 &   0.833251 &      5.24015 &       15450.0 &     4.62511

algorithm,Unnamed: 1_level_0,Seq. Louvain,Seq. Louvain,Seq. Louvain,Seq. Louvain,DSLM-Mod w/o Contraction,DSLM-Mod w/o Contraction,DSLM-Mod w/o Contraction,DSLM-Mod w/o Contraction,Seq. Infomap,Seq. Infomap,Seq. Infomap,Seq. Infomap,DSLM-Map,DSLM-Map,DSLM-Map,DSLM-Map
Unnamed: 0_level_1,hosts,runtime,modularity,map_equation,cluster_count,runtime,modularity,map_equation,cluster_count,runtime,modularity,map_equation,cluster_count,runtime,modularity,map_equation,cluster_count
graph,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
com-amazon,2,1.18867,0.926283,8.26193,244.0,1.01854,0.662349,6.423891,58321.0,23.8371,0.833251,5.24015,15450.0,4.625115,0.831016,5.308886,14195.0
com-youtube,2,6.93669,0.718357,10.2581,7153.0,3.884698,0.593793,9.459313,205037.0,113.097,0.581635,8.44782,59257.0,14.685556,0.575558,8.543954,52631.0
in-2004,4,16.4854,0.980143,7.2624,929.0,4.042109,0.878717,6.666957,104124.0,131.183,0.935296,6.28732,28653.0,11.697303,0.937861,6.298318,27122.0
com-lj,8,94.7533,0.752447,11.8418,2272.0,11.369681,0.571526,10.504783,334165.0,1093.8,0.642551,9.9004,84945.0,45.284255,0.632626,9.980078,76991.0
osm-europe,8,1607.29,0.998942,9.84196,3037.0,45.878178,0.485844,10.453295,22737159.0,,,,,164.115861,0.938606,4.349963,2188584.0
com-orkut,8,164.769,0.667288,12.9111,33.0,34.275958,0.537024,12.263079,27384.0,2478.87,0.558472,11.8249,14821.0,83.48925,0.540117,11.896035,15326.0
uk-2002,8,529.38,0.989801,8.23784,4952.0,20.346798,0.876513,7.068022,962802.0,5613.98,0.95854,6.45794,198601.0,52.091819,0.960123,6.468501,186116.0
com-friendster,16,5499.12,0.621828,15.6447,32442.0,1093.074086,0.575267,15.38197,1456400.0,,,,,1143.842553,0.472372,14.788328,585763.0
uk-2007-05,16,7260.0,0.996251,9.06702,20235.0,108.757773,0.907177,8.470246,2321666.0,,,,,220.287635,0.972456,8.056724,375587.0
