In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import json
import glob
import os

plt.style.use('ggplot') # Make the graphs a bit prettier

In [11]:
data = {}
files = glob.glob(os.path.expanduser("~/dev/ma/data/results/seq_louvain/*.json")) + glob.glob(os.path.expanduser("~/dev/ma/data/results/dlm_all/*.json")) + glob.glob(os.path.expanduser("~/dev/ma/data/results/plm_all/*.json"))

for path in files:
  for typename, items in json.load(open(path)).items():
    if typename in data:
      data[typename].update(items)
    else:
      data[typename] = items

frames = { typename: pd.DataFrame.from_dict(items, orient='index') for typename, items in data.items() }

algorithm_names = {
    'thrill partitioned louvain': 'partitioned local moving',
    'thrill node based fully distributed local moving': 'synchronous local moving'
}

frames['algorithm_run'].replace({ 'algorithm': algorithm_names }, inplace=True)

frames['program_run']['graph_path'] = frames['program_run']['graph']

graph_names = {
    '/home/kit/iti/kp0036/graphs/mu-04/graph_50_10000_mu_0.4_100000-sorted-preprocessed-*.bin': 'LFR 100K', 
    '/home/kit/iti/kp0036/graphs/mu-04/graph_50_10000_mu_0.4_1000000-preprocessed-*.bin': 'LFR 1M', 
    '/home/kit/iti/kp0036/graphs/mu-04/graph_50_10000_mu_0.4_10000000-preprocessed-*.bin': 'LFR 10M', 
    '/home/kit/iti/kp0036/graphs/mu-04/graph_50_10000_mu_0.4_100000000-preprocessed-*.bin': 'LFR 100M', 
    '/home/kit/iti/kp0036/graphs/mu-04/graph_50_10000_mu_0.4_200000000-sorted-preprocessed-*.bin': 'LFR 200M', 
    '/home/kit/iti/kp0036/graphs/hypercubegraph23-preprocessed-*.bin': 'hypercube', 
    '/home/kit/iti/kp0036/graphs/uk-2002.metis-preprocessed-*.bin': 'uk-2002', 
    '/home/kit/iti/kp0036/graphs/uk-2007-05.metis-preprocessed-*.bin': 'uk-2007-05', 
    '/home/kit/iti/kp0036/graphs/in-2004.metis-preprocessed-*.bin': 'in-2004', 
    '/home/kit/iti/kp0036/graphs/com-friendster-preprocessed-*.bin': 'com-friendster', 
    '/home/kit/iti/kp0036/graphs/com-lj.ungraph-preprocessed-*.bin': 'com-lj', 
    '/home/kit/iti/kp0036/graphs/com-orkut.ungraph-preprocessed-*.bin': 'com-orkut', 
    '/home/kit/iti/kp0036/graphs/com-youtube.ungraph-preprocessed-*.bin': 'com-youtube', 
    '/home/kit/iti/kp0036/graphs/com-amazon.ungraph-preprocessed-*.bin': 'com-amazon'
}

frames['program_run'].replace({ 'graph': graph_names }, inplace=True)

def calc_worker_label(x):
    if np.isnan(x['hosts']):
        return 'sequential'
    else: 
        return str(int(x['hosts'])) + 'x' + str(int(x['workers_per_host']))
        
frames['program_run']['worker_label'] = frames['program_run'].apply(calc_worker_label, axis=1)

In [16]:
frames['clustering'] \
    .merge(frames['algorithm_run'], left_on='algorithm_run_id', right_index=True) \
    .merge(frames['program_run'], left_on='program_run_id', right_index=True) \
    .groupby(['algorithm', 'worker_label', 'graph'])[['runtime', 'modularity']].mean() \
    .stack() \
    .unstack(['algorithm', 'worker_label'])

Unnamed: 0_level_0,algorithm,partitioned local moving,partitioned local moving,partitioned local moving,partitioned local moving,sequential louvain,synchronous local moving,synchronous local moving,synchronous local moving,synchronous local moving,synchronous local moving
Unnamed: 0_level_1,worker_label,16x16,2x16,4x16,8x16,sequential,16x16,2x16,32x16,4x16,8x16
graph,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
LFR 100K,runtime,,,,,1.43868,,9.63083,,,
LFR 100K,modularity,,,,,0.521836,,0.524983,,,
LFR 100M,runtime,,,,,,640.933073,,,,
LFR 100M,modularity,,,,,,0.56462,,,,
LFR 10M,runtime,,,,225.192853,1018.98,,,,,151.685848
LFR 10M,modularity,,,,0.599273,0.599285,,,,,0.596983
LFR 1M,runtime,,,49.133987,,28.9294,,,,27.153824,
LFR 1M,modularity,,,0.592719,,0.591128,,,,0.592534,
LFR 200M,runtime,,,,,,,,860.819239,,
LFR 200M,modularity,,,,,,,,0.574363,,
