In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import json
import glob
import os

plt.style.use('ggplot') # Make the graphs a bit prettier

In [2]:
data = {}

for path in glob.glob(os.path.expanduser("~/dev/ma/data/results/pre_dlm1/*.json")):
  for typename, items in json.load(open(path)).items():
    if typename in data:
      data[typename].update(items)
    else:
      data[typename] = items

frames = { typename: pd.DataFrame.from_dict(items, orient='index') for typename, items in data.items() }

graph_names = {
    '/home/kit/iti/kp0036/graphs/mu-04/graph_50_10000_mu_0.4_100000-sorted-preprocessed-*.bin': 'LFR 100K', 
    '/home/kit/iti/kp0036/graphs/mu-04/graph_50_10000_mu_0.4_1000000-preprocessed-*.bin': 'LFR 1M', 
    '/home/kit/iti/kp0036/graphs/mu-04/graph_50_10000_mu_0.4_10000000-preprocessed-*.bin': 'LFR 10M', 
    '/home/kit/iti/kp0036/graphs/mu-04/graph_50_10000_mu_0.4_100000000-preprocessed-*.bin': 'LFR 100M', 
    '/home/kit/iti/kp0036/graphs/mu-04/graph_50_10000_mu_0.4_200000000-sorted-preprocessed-*.bin': 'LFR 200M', 
    '/home/kit/iti/kp0036/graphs/hypercubegraph23-preprocessed-*.bin': 'hypercube', 
    '/home/kit/iti/kp0036/graphs/uk-2002.metis-preprocessed-*.bin': 'uk-2002', 
    '/home/kit/iti/kp0036/graphs/uk-2007-05.metis-preprocessed-*.bin': 'uk-2007-05', 
    '/home/kit/iti/kp0036/graphs/in-2004.metis-preprocessed-*.bin': 'in-2004', 
    '/home/kit/iti/kp0036/graphs/com-friendster-preprocessed-*.bin': 'com-friendster', 
    '/home/kit/iti/kp0036/graphs/com-lj.ungraph-preprocessed-*.bin': 'com-lj', 
    '/home/kit/iti/kp0036/graphs/com-orkut.ungraph-preprocessed-*.bin': 'com-orkut', 
    '/home/kit/iti/kp0036/graphs/com-youtube.ungraph-preprocessed-*.bin': 'com-youtube', 
    '/home/kit/iti/kp0036/graphs/com-amazon.ungraph-preprocessed-*.bin': 'com-amazon'
}


frames['program_run'].replace({ 'graph': graph_names }, inplace=True)

In [3]:
frames['clustering'] \
    .merge(frames['algorithm_run'], left_on='algorithm_run_id', right_index=True) \
    .merge(frames['program_run'], left_on='program_run_id', right_index=True) \
    .groupby(['graph', 'local_moving_stopping_criterion', 'local_moving_node_ratio'])['modularity', 'runtime'] \
    .mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,modularity,runtime
graph,local_moving_stopping_criterion,local_moving_node_ratio,Unnamed: 3_level_1,Unnamed: 4_level_1
LFR 10M,cluster_count,1,0.587054,124.740916
LFR 10M,cluster_count,2,0.595544,186.33922
LFR 10M,cluster_count,4,0.587416,289.71545
LFR 10M,cluster_count,8,0.597741,914.074681
LFR 10M,cluster_count,dynamic,0.578653,199.217159
LFR 10M,moved_count,1,0.591836,240.974209
LFR 10M,moved_count,2,0.596406,167.03077
LFR 10M,moved_count,4,0.597009,173.034491
LFR 10M,moved_count,8,0.589233,209.436321
LFR 10M,moved_count,dynamic,0.591084,206.705705


In [16]:
df = frames['clustering'] \
    .merge(frames['algorithm_run'], left_on='algorithm_run_id', right_index=True) \
    .merge(frames['program_run'], left_on='program_run_id', right_index=True) \
    .groupby(['graph', 'local_moving_stopping_criterion', 'local_moving_node_ratio'])['modularity', 'runtime'] \
    .mean() \
    .stack() \
    .unstack(['local_moving_stopping_criterion', 'local_moving_node_ratio'])
  
df.columns = df.columns.rename(['stopping criterion', 'ratio'])

df

Unnamed: 0_level_0,stopping criterion,cluster_count,cluster_count,cluster_count,cluster_count,cluster_count,moved_count,moved_count,moved_count,moved_count,moved_count
Unnamed: 0_level_1,ratio,1,2,4,8,dynamic,1,2,4,8,dynamic
graph,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
LFR 10M,modularity,0.587054,0.595544,0.587416,0.597741,0.578653,0.591836,0.596406,0.597009,0.589233,0.591084
LFR 10M,runtime,124.740916,186.33922,289.71545,914.074681,199.217159,240.974209,167.03077,173.034491,209.436321,206.705705
friendster,modularity,0.50316,0.562513,0.577368,0.592235,0.606859,,0.591695,0.6095,0.613189,0.625279
friendster,runtime,762.548029,999.474926,1315.349956,2911.561037,1461.522005,,1866.585136,2556.930383,2290.740927,2731.077903
uk-2002,modularity,0.351908,0.988926,0.989308,0.989551,0.988914,,0.989319,0.989561,0.98967,0.989531
uk-2002,runtime,85.96736,133.125686,154.490371,462.429317,66.893048,,128.487342,116.285455,113.849404,121.070345


In [17]:
deltas = df.copy()
for c in df.columns:
    deltas[c] = sum([(df[c] - df[d].fillna(df[c])) for d in df.columns])
deltas

Unnamed: 0_level_0,stopping criterion,cluster_count,cluster_count,cluster_count,cluster_count,cluster_count,moved_count,moved_count,moved_count,moved_count,moved_count
Unnamed: 0_level_1,ratio,1,2,4,8,dynamic,1,2,4,8,dynamic
graph,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
LFR 10M,modularity,-0.041436,0.043464,-0.037816,0.065434,-0.125446,0.006384,0.052084,0.058114,-0.019646,-0.001136
LFR 10M,runtime,-1463.859762,-847.876722,185.885578,6429.477888,-719.097332,-301.526832,-1040.961222,-980.924012,-616.905712,-644.211872
friendster,modularity,-0.753358,-0.219181,-0.085486,0.048317,0.179933,,0.043457,0.203702,0.236903,0.345713
friendster,runtime,-10032.858041,-7900.515968,-5057.640698,9308.259031,-3742.092257,,-96.524078,6116.583145,3720.878041,7683.910825
uk-2002,modularity,-5.099516,0.633646,0.637084,0.639271,0.633538,,0.637183,0.639361,0.640342,0.639091
uk-2002,runtime,-608.892088,-184.467154,7.815011,2779.265525,-780.560896,,-226.21225,-336.029233,-357.953692,-292.965223
