In [1]:
import numpy as np
import matplotlib as mpl
%matplotlib inline

import pandas as pd

import json
import glob
import os

from networkit import *

In [2]:
data = {}

for path in glob.glob("../../data/results/all_real_cmp/*.json"):
  for typename, items in json.load(open(path)).items():
      if typename in data:
        for key, object_data in items.items():
          if key in data[typename]:
            data[typename][key].update(object_data)
          else:
            data[typename][key] = object_data
      else:
        data[typename] = items

frames = { typename: pd.DataFrame.from_dict(items, orient='index') for typename, items in data.items() }

In [3]:
dlslm_label = 'DSLM-Mod'
dlslm_me_label = 'DSLM-Map'
seq_postfix = ' w. Seq.'
no_contraction_postfix = ' w/o Contraction'
dlslm_ws_label = dlslm_label + seq_postfix
dlslm_nc_label = dlslm_label + no_contraction_postfix
seq_louvain_label = 'Seq. Louvain'
seq_infomap_label = 'Seq. InfoMap'
plm_label = 'PLM'
relax_map_label = 'RelaxMap'
gossip_map_label = 'GossipMap'

algo_name_mapping = {
    'synchronous local moving with map equation': dlslm_me_label,
    'synchronous local moving with modularity': dlslm_label,
    'sequential louvain': seq_louvain_label,
    'sequential infomap': seq_infomap_label,
    'relax map': relax_map_label,
    'gossip map': gossip_map_label
}

frames['algorithm_run'].replace({ 'algorithm': algo_name_mapping }, inplace=True)

frames['algorithm_run']['algorithm'] += frames['algorithm_run'].merge(frames['program_run'], left_on='program_run_id', right_index=True, how='left')['switch_to_seq'].map({ False: '', True: seq_postfix, np.NaN: '' })
frames['algorithm_run']['algorithm'] += frames['algorithm_run'].merge(frames['program_run'], left_on='program_run_id', right_index=True, how='left')['contraction'].map({ False: no_contraction_postfix, True: '', np.NaN: '' })

In [5]:
if not 'runtime' in frames['algorithm_run']:
    frames['algorithm_run']['runtime'] = np.NaN
frames['algorithm_run']['runtime'].fillna((frames['algorithm_run']['done_ts'] - frames['algorithm_run']['start_ts']) / 1000000.0, inplace=True)

In [6]:
frames['program_run']['graph_path'] = frames['program_run']['graph']

graph_names = { 
    'data/graphs/uk-2002.metis-preprocessed-*.bin': 'uk-2002', 
    'data/graphs/uk-2007-05.metis-preprocessed-*.bin': 'uk-2007-05', 
    'data/graphs/in-2004.metis-preprocessed-*.bin': 'in-2004', 
    'data/graphs/com-friendster-preprocessed-*.bin': 'com-friendster', 
    'data/graphs/com-lj.ungraph-preprocessed-*.bin': 'com-lj', 
    'data/graphs/com-orkut.ungraph-preprocessed-*.bin': 'com-orkut', 
    'data/graphs/com-youtube.ungraph-preprocessed-*.bin': 'com-youtube', 
    'data/graphs/com-amazon.ungraph-preprocessed-*.bin': 'com-amazon',
    'data/graphs/europe.osm-preprocessed-*.bin': 'osm-europe',
}

frames['program_run'].replace({ 'graph': graph_names }, inplace=True)

In [8]:
all_data = frames['clustering'] \
    .merge(frames['algorithm_run'], left_on='algorithm_run_id', right_index=True) \
    .merge(frames['program_run'], left_on='program_run_id', right_index=True) \
    .groupby(['graph', 'algorithm'])['runtime'].mean().round(1).to_frame() \
    .unstack()["runtime"][[dlslm_label, dlslm_nc_label, dlslm_me_label]]

all_data = all_data.loc[frames['program_run'].sort_values('edge_count')['graph'].dropna().unique()]

graph_data = frames['program_run'].dropna(subset=['hosts', 'edge_count']).groupby('graph').agg({ 'node_count': 'first', 'edge_count': 'first', 'hosts': 'max' })
graph_data['hosts'] = graph_data['hosts'].astype(int)
graph_data.columns = ['n', 'm', 'hosts']
res = graph_data.sort_values('m').merge(all_data, left_index=True, right_index=True)

# with open("../../dist-thrill-cluster/plots/real_world_runtimes.tex", "w") as file:
print(res.to_latex())
res

\begin{tabular}{lrrrrrr}
\toprule
{} &          n &           m &  hosts &  DSLM-Mod &  DSLM-Mod w/o Contraction &  DSLM-Map \\
graph          &            &             &        &           &                           &           \\
\midrule
com-amazon     &     334863 &      925872 &      2 &       8.8 &                       1.4 &       6.4 \\
com-youtube    &    1134890 &     2987624 &      2 &      13.0 &                       4.5 &      16.4 \\
in-2004        &    1382867 &    13591473 &      4 &      20.1 &                       5.6 &      15.7 \\
com-lj         &    3997962 &    34681189 &      8 &      45.5 &                      22.1 &      75.0 \\
osm-europe     &   50912018 &    54054660 &      8 &     521.6 &                      57.1 &     561.5 \\
com-orkut      &    3072441 &   117185083 &      8 &      56.5 &                      47.2 &     106.8 \\
uk-2002        &   18483186 &   261787258 &      8 &      68.2 &                      30.3 &      76.9 \\
com-friendster 

Unnamed: 0_level_0,n,m,hosts,DSLM-Mod,DSLM-Mod w/o Contraction,DSLM-Map
graph,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
com-amazon,334863,925872,2,8.8,1.4,6.4
com-youtube,1134890,2987624,2,13.0,4.5,16.4
in-2004,1382867,13591473,4,20.1,5.6,15.7
com-lj,3997962,34681189,8,45.5,22.1,75.0
osm-europe,50912018,54054660,8,521.6,57.1,561.5
com-orkut,3072441,117185083,8,56.5,47.2,106.8
uk-2002,18483186,261787258,8,68.2,30.3,76.9
com-friendster,65608366,1806067135,16,1403.9,803.1,1395.5
uk-2007-05,105153952,3301876564,16,230.7,157.2,288.4
