In [48]:
import os

import numpy as np

import performance
from report_util import *
from run_data import extract


def create_row(data, y, tool, benchmark):
    values1 = select(data, benchmark=benchmark, tool='galette')[y]
    values2 = select(data, benchmark=benchmark, tool=tool)[y]
    row = dict(n=len(values2), tool=tool, benchmark=benchmark, s=np.NaN, p=np.NaN, sig=False, med=np.NaN)
    if len(values2) > 0:
        row['med'] = np.median(values2)
        row['s'] = np.std(values2)
        if len(values1) > 0:
            row['p'] = mann_whitney(values2, values1)
            row['a12'] = a12(values2, values1)
    return row


def create_table(data, y):
    rows = [create_row(data, y, t, b) for b in performance.BENCHMARKS for t in performance.TOOLS]
    return pd.DataFrame(rows)


def mark_significant(p, sig_level):
    if pd.isna(p):
        return p
    elif p < sig_level:
        return f'*{p:.3f}'
    else:
        return f'{p:.3f}'


def format_table(table):
    result = pd.DataFrame(table)
    sig_level = 0.05 / 3
    result['p'] = result['p'].apply(lambda p: mark_significant(p, sig_level))
    result['sig'] = result['sig'].apply(lambda s: 'color: red;' if s else '')
    result['med'] = result['med'].round().astype(pd.Int64Dtype())
    result['s'] = result['s'].round().astype(pd.Int64Dtype())
    result['tool'] = result['tool'] \
        .apply(lambda x: x.replace('none', 'base')) \
        .apply(str.title) \
        .apply(lambda x: x.replace('-', ''))
    return result


def style_table(table, title):
    f_table = format_table(table).pivot(index=['benchmark'], values=['med', 's', 'p', 'a12'], columns=['tool']) \
        .reorder_levels(axis=1, order=['tool', None]) \
        .sort_index(axis=1) \
        .sort_index(axis=0) \
        .reindex(['Galette', 'Base', 'MirrorTaint', 'Phosphor'], axis=1, level=0) \
        .reindex(['med', 's', 'p', 'a12'], axis=1, level=1)
    # Remove comparison of Galette against itself
    f_table = f_table.drop(columns=[('Galette', 'p'), ('Galette', 'a12')])
    f_table.index.names = [None for _ in f_table.index.names]
    f_table.columns.names = [None for _ in f_table.columns.names]
    return f_table.style.format(precision=3, na_rep='---') \
        .set_caption(title)


reports_dir = '/home/katie/Downloads/galette/slurm-1190773/'
data = extract(reports_dir, os.path.join(reports_dir, 'performance.csv'))

Searching for runs in /home/katie/Downloads/galette/slurm-1190773/.
Found 84 runs.
Checking runs.
	Failed run 28 --- {'benchmark': 'h2o', 'tool': 'galette', 'status': 'TIMEOUT'}
	Failed run 73 --- {'benchmark': 'kafka', 'tool': 'phosphor', 'status': 'DACAPO_FAILURE'}
	Failed run 44 --- {'benchmark': 'biojava', 'tool': 'mirror-taint', 'status': 'TIMEOUT'}
	Failed run 76 --- {'benchmark': 'pmd', 'tool': 'phosphor', 'status': 'DACAPO_FAILURE'}
	Failed run 59 --- {'benchmark': 'tradebeans', 'tool': 'mirror-taint', 'status': 'DACAPO_FAILURE'}
	Failed run 43 --- {'benchmark': 'batik', 'tool': 'mirror-taint', 'status': 'TIMEOUT'}
	Failed run 47 --- {'benchmark': 'graphchi', 'tool': 'mirror-taint', 'status': 'TIMEOUT'}
	Failed run 79 --- {'benchmark': 'tomcat', 'tool': 'phosphor', 'status': 'DACAPO_FAILURE'}
	Failed run 83 --- {'benchmark': 'zxing', 'tool': 'phosphor', 'status': 'DACAPO_FAILURE'}
	Failed run 61 --- {'benchmark': 'xalan', 'tool': 'mirror-taint', 'status': 'DACAPO_FAILURE'}
	Fai

In [49]:
style_table(create_table(data, 'rss'), 'Memory Usage (kB).')

Unnamed: 0_level_0,Galette,Galette,Base,Base,Base,Base,MirrorTaint,MirrorTaint,MirrorTaint,MirrorTaint,Phosphor,Phosphor,Phosphor,Phosphor
Unnamed: 0_level_1,med,s,med,s,p,a12,med,s,p,a12,med,s,p,a12
avrora,842548,59593,125428,7203,*0.008,1.000,---,---,---,---,326744,4381,*0.008,1.000
batik,954088,53793,232188,7506,*0.008,1.000,---,---,---,---,---,---,---,---
biojava,1315984,164172,170824,2104,*0.008,1.000,---,---,---,---,380292,15004,*0.008,1.000
eclipse,1608860,214133,314216,1023,*0.008,1.000,---,---,---,---,---,---,---,---
fop,429924,10918,149128,1601,*0.008,1.000,---,---,---,---,466596,3734,*0.008,1.000
graphchi,---,---,400700,38567,---,---,---,---,---,---,544672,3607,---,---
h2,1067332,65717,323044,1427,*0.008,1.000,---,---,---,---,844212,6878,*0.008,1.000
h2o,---,---,391268,10876,---,---,---,---,---,---,---,---,---,---
jme,1718332,273180,246484,23111,*0.008,1.000,---,---,---,---,637364,31962,*0.008,1.000
jython,3467232,232135,407200,11836,*0.008,1.000,---,---,---,---,---,---,---,---


In [50]:
style_table(create_table(data, 'elapsed_time'), 'Execution Time (ms).')

Unnamed: 0_level_0,Galette,Galette,Base,Base,Base,Base,MirrorTaint,MirrorTaint,MirrorTaint,MirrorTaint,Phosphor,Phosphor,Phosphor,Phosphor
Unnamed: 0_level_1,med,s,med,s,p,a12,med,s,p,a12,med,s,p,a12
avrora,12249,348,2111,268,*0.008,1.000,---,---,---,---,5598,339,*0.008,1.000
batik,4916,61,245,20,*0.008,1.000,---,---,---,---,---,---,---,---
biojava,7037,234,134,3,*0.008,1.000,---,---,---,---,2035,142,*0.008,1.000
eclipse,4031,351,307,88,*0.008,1.000,---,---,---,---,---,---,---,---
fop,724,93,93,12,*0.008,1.000,---,---,---,---,600,25,0.151,0.800
graphchi,---,---,525,30,---,---,---,---,---,---,7448,72,---,---
h2,2236,76,132,21,*0.008,1.000,---,---,---,---,1079,165,*0.008,1.000
h2o,---,---,536,32,---,---,---,---,---,---,---,---,---,---
jme,8386,572,946,194,*0.008,1.000,---,---,---,---,4413,292,*0.008,1.000
jython,6861,299,357,24,*0.008,1.000,---,---,---,---,---,---,---,---
