In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import json
import glob
import os

plt.style.use('ggplot') # Make the graphs a bit prettier

In [2]:
data = {}

for path in glob.glob(os.path.expanduser("~/dev/ma/data/results/pre_dlm1/*.json")):
  for typename, items in json.load(open(path)).items():
    if typename in data:
      data[typename].update(items)
    else:
      data[typename] = items

frames = { typename: pd.DataFrame.from_dict(items, orient='index') for typename, items in data.items() }

graph_names = {
    '/home/kit/iti/kp0036/graphs/mu-04/graph_50_10000_mu_0.4_100000-sorted-preprocessed-*.bin': 'LFR 100K', 
    '/home/kit/iti/kp0036/graphs/mu-04/graph_50_10000_mu_0.4_1000000-preprocessed-*.bin': 'LFR 1M', 
    '/home/kit/iti/kp0036/graphs/mu-04/graph_50_10000_mu_0.4_10000000-preprocessed-*.bin': 'LFR 10M', 
    '/home/kit/iti/kp0036/graphs/mu-04/graph_50_10000_mu_0.4_100000000-preprocessed-*.bin': 'LFR 100M', 
    '/home/kit/iti/kp0036/graphs/mu-04/graph_50_10000_mu_0.4_200000000-sorted-preprocessed-*.bin': 'LFR 200M', 
    '/home/kit/iti/kp0036/graphs/hypercubegraph23-preprocessed-*.bin': 'hypercube', 
    '/home/kit/iti/kp0036/graphs/uk-2002.metis-preprocessed-*.bin': 'uk-2002', 
    '/home/kit/iti/kp0036/graphs/uk-2007-05.metis-preprocessed-*.bin': 'uk-2007-05', 
    '/home/kit/iti/kp0036/graphs/in-2004.metis-preprocessed-*.bin': 'in-2004', 
    '/home/kit/iti/kp0036/graphs/com-friendster-preprocessed-*.bin': 'com-friendster', 
    '/home/kit/iti/kp0036/graphs/com-lj.ungraph-preprocessed-*.bin': 'com-lj', 
    '/home/kit/iti/kp0036/graphs/com-orkut.ungraph-preprocessed-*.bin': 'com-orkut', 
    '/home/kit/iti/kp0036/graphs/com-youtube.ungraph-preprocessed-*.bin': 'com-youtube', 
    '/home/kit/iti/kp0036/graphs/com-amazon.ungraph-preprocessed-*.bin': 'com-amazon'
}


frames['program_run'].replace({ 'graph': graph_names }, inplace=True)

In [3]:
frames['clustering'] \
    .merge(frames['algorithm_run'], left_on='algorithm_run_id', right_index=True) \
    .merge(frames['program_run'], left_on='program_run_id', right_index=True) \
    .groupby(['graph', 'local_moving_stopping_criterion', 'local_moving_node_ratio'])['modularity', 'runtime'] \
    .mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,modularity,runtime
graph,local_moving_stopping_criterion,local_moving_node_ratio,Unnamed: 3_level_1,Unnamed: 4_level_1
LFR 10M,cluster_count,1,0.587117,144.988748
LFR 10M,cluster_count,2,0.595163,198.402892
LFR 10M,cluster_count,4,0.588008,283.782
LFR 10M,cluster_count,8,0.598188,912.742191
LFR 10M,cluster_count,dynamic,0.574811,204.410172
LFR 10M,moved_count,1,0.593185,201.965941
LFR 10M,moved_count,2,0.596367,174.213303
LFR 10M,moved_count,4,0.596907,190.03278
LFR 10M,moved_count,8,0.587475,202.820913
LFR 10M,moved_count,dynamic,0.584092,220.911456


In [4]:
df = frames['clustering'] \
    .merge(frames['algorithm_run'], left_on='algorithm_run_id', right_index=True) \
    .merge(frames['program_run'], left_on='program_run_id', right_index=True) \
    .groupby(['graph', 'local_moving_stopping_criterion', 'local_moving_node_ratio'])['modularity', 'runtime'] \
    .mean() \
    .stack() \
    .unstack(['local_moving_stopping_criterion', 'local_moving_node_ratio'])
  
df.columns = df.columns.rename(['stopping criterion', 'ratio'])

df

Unnamed: 0_level_0,stopping criterion,cluster_count,cluster_count,cluster_count,cluster_count,cluster_count,moved_count,moved_count,moved_count,moved_count,moved_count
Unnamed: 0_level_1,ratio,1,2,4,8,dynamic,1,2,4,8,dynamic
graph,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
LFR 10M,modularity,0.587117,0.595163,0.588008,0.598188,0.574811,0.593185,0.596367,0.596907,0.587475,0.584092
LFR 10M,runtime,144.988748,198.402892,283.782,912.742191,204.410172,201.965941,174.213303,190.03278,202.820913,220.911456
com-friendster,modularity,0.505257,0.574823,0.577839,0.59144,0.607118,,0.595428,0.611349,0.613112,0.624124
com-friendster,runtime,845.836941,948.078753,1319.555163,2920.162724,1413.715154,,1967.538978,2620.497621,2298.872828,2741.186756
uk-2002,modularity,0.348436,0.988985,0.989275,0.98955,0.98908,,0.989374,0.989609,0.98966,0.989579
uk-2002,runtime,85.180454,124.188546,158.990083,450.134352,70.951985,,149.45749,132.547036,114.781697,123.721434


In [5]:
deltas = df.copy()
for c in df.columns:
    deltas[c] = sum([(df[c] - df[d].fillna(df[c])) for d in df.columns])
deltas

Unnamed: 0_level_0,stopping criterion,cluster_count,cluster_count,cluster_count,cluster_count,cluster_count,moved_count,moved_count,moved_count,moved_count,moved_count
Unnamed: 0_level_1,ratio,1,2,4,8,dynamic,1,2,4,8,dynamic
graph,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
LFR 10M,modularity,-0.030143,0.050317,-0.021233,0.080567,-0.153203,0.030534,0.062354,0.067757,-0.026563,-0.060389
LFR 10M,runtime,-1284.382915,-750.241475,103.549605,6393.151511,-690.168675,-714.610985,-992.137365,-833.942599,-706.061262,-525.155839
com-friendster,modularity,-0.753175,-0.127081,-0.099943,0.022469,0.163574,,0.058361,0.201653,0.217517,0.316625
com-friendster,runtime,-9462.912448,-8542.73614,-5199.44845,9206.019603,-4352.008528,,632.405881,6509.033669,3614.410532,7595.235883
uk-2002,modularity,-5.127621,0.637314,0.639927,0.642405,0.638172,,0.640821,0.64293,0.643395,0.64266
uk-2002,runtime,-643.328989,-292.256161,20.957672,2641.256087,-771.385216,,-64.835668,-217.029751,-376.917805,-296.460169


In [8]:
df.transpose()

Unnamed: 0_level_0,graph,LFR 10M,LFR 10M,com-friendster,com-friendster,uk-2002,uk-2002
Unnamed: 0_level_1,Unnamed: 1_level_1,modularity,runtime,modularity,runtime,modularity,runtime
stopping criterion,ratio,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
cluster_count,1,0.587117,144.988748,0.505257,845.836941,0.348436,85.180454
cluster_count,2,0.595163,198.402892,0.574823,948.078753,0.988985,124.188546
cluster_count,4,0.588008,283.782,0.577839,1319.555163,0.989275,158.990083
cluster_count,8,0.598188,912.742191,0.59144,2920.162724,0.98955,450.134352
cluster_count,dynamic,0.574811,204.410172,0.607118,1413.715154,0.98908,70.951985
moved_count,1,0.593185,201.965941,,,,
moved_count,2,0.596367,174.213303,0.595428,1967.538978,0.989374,149.45749
moved_count,4,0.596907,190.03278,0.611349,2620.497621,0.989609,132.547036
moved_count,8,0.587475,202.820913,0.613112,2298.872828,0.98966,114.781697
moved_count,dynamic,0.584092,220.911456,0.624124,2741.186756,0.989579,123.721434
