In [7]:
import os

import numpy as np

import performance
from report_util import *
from run_data import extract


def create_row(data, y, tool, benchmark):
    values1 = select(data, benchmark=benchmark, tool='galette')[y]
    values2 = select(data, benchmark=benchmark, tool=tool)[y]
    row = dict(n=len(values2), tool=tool, benchmark=benchmark, s=np.NaN, p=np.NaN, sig=False, med=np.NaN)
    if len(values2) > 0:
        row['med'] = np.median(values2)
        row['s'] = np.std(values2)
        if len(values1) > 0:
            row['p'] = mann_whitney(values2, values1)
            row['a12'] = a12(values2, values1)
    return row


def create_table(data, y):
    rows = [create_row(data, y, t, b) for b in performance.BENCHMARKS for t in performance.TOOLS]
    return pd.DataFrame(rows)


def mark_significant(p, sig_level):
    if pd.isna(p):
        return p
    elif p < sig_level:
        return f'*{p:.3f}'
    else:
        return f'{p:.3f}'


def format_table(table):
    result = pd.DataFrame(table)
    result['p'] = result['p'].apply(lambda p: mark_significant(p, sig_level=0.05 / 3))
    result['sig'] = result['sig'].apply(lambda s: 'color: red;' if s else '')
    result['med'] = result['med'].round().astype(pd.Int64Dtype())
    result['s'] = result['s'].round().astype(pd.Int64Dtype())
    return format_tool_names(result)


def style_table(table, title):
    f_table = format_table(table).pivot(index=['benchmark'], values=['med', 's', 'p', 'a12'], columns=['tool']) \
        .reorder_levels(axis=1, order=['tool', None]) \
        .sort_index(axis=1) \
        .sort_index(axis=0) \
        .reindex(['Galette', 'Base', 'MirrorTaint', 'Phosphor'], axis=1, level=0) \
        .reindex(['med', 's', 'p', 'a12'], axis=1, level=1)
    # Remove comparison of Galette against itself
    f_table = f_table.drop(columns=[('Galette', 'p'), ('Galette', 'a12')])
    f_table.index.names = [None for _ in f_table.index.names]
    f_table.columns.names = [None for _ in f_table.columns.names]
    return f_table.style.format(precision=3, na_rep='---') \
        .set_caption(title)


reports_dir = '/home/katie/Downloads/galette/slurm-1191447/'
data = extract(reports_dir, os.path.join(reports_dir, 'performance.csv'))

Searching for runs in /home/katie/Downloads/galette/slurm-1191447/.
Found 1680 runs.
Checking runs.
	Failed run 557 --- {'benchmark': 'luindex', 'tool': 'mirror-taint', 'status': 'RUN_FAILURE'}
	Failed run 900 --- {'benchmark': 'tradesoap', 'tool': 'mirror-taint', 'status': 'RUN_FAILURE'}
	Failed run 1666 --- {'benchmark': 'h2o', 'tool': 'phosphor', 'status': 'RUN_FAILURE'}
	Failed run 313 --- {'benchmark': 'xalan', 'tool': 'mirror-taint', 'status': 'RUN_FAILURE'}
	Failed run 227 --- {'benchmark': 'tradebeans', 'tool': 'mirror-taint', 'status': 'RUN_FAILURE'}
	Failed run 664 --- {'benchmark': 'pmd', 'tool': 'phosphor', 'status': 'RUN_FAILURE'}
	Failed run 805 --- {'benchmark': 'h2o', 'tool': 'mirror-taint', 'status': 'RUN_FAILURE'}
	Failed run 305 --- {'benchmark': 'luindex', 'tool': 'mirror-taint', 'status': 'RUN_FAILURE'}
	Failed run 335 --- {'benchmark': 'zxing', 'tool': 'phosphor', 'status': 'RUN_FAILURE'}
	Failed run 167 --- {'benchmark': 'zxing', 'tool': 'phosphor', 'status': 'RU

In [13]:
data.head()
data.groupby(by=['benchmark', 'tool'])['iteration']\
    .count()\
    .reset_index()

Unnamed: 0,benchmark,tool,iteration
0,avrora,galette,100
1,avrora,mirror-taint,100
2,avrora,none,100
3,avrora,phosphor,100
4,batik,galette,100
...,...,...,...
57,xalan,none,100
58,xalan,phosphor,100
59,zxing,galette,100
60,zxing,mirror-taint,100


In [14]:
memory_table = style_table(create_table(data, 'rss'), 'Memory Usage (kB).')
memory_table

Unnamed: 0_level_0,Galette,Galette,Base,Base,Base,Base,MirrorTaint,MirrorTaint,MirrorTaint,MirrorTaint,Phosphor,Phosphor,Phosphor,Phosphor
Unnamed: 0_level_1,med,s,med,s,p,a12,med,s,p,a12,med,s,p,a12
avrora,809920,173200,120864,24626,*0.000,1.0,2284304,238627,*0.000,1.000,326262,6972,*0.000,1.000
batik,951094,67068,222608,17281,*0.000,1.0,2431990,250269,*0.000,1.000,---,---,---,---
biojava,1117026,167526,172694,3552,*0.000,1.0,853112,49305,*0.000,0.971,394496,20668,*0.000,1.000
eclipse,1279780,329498,301030,56095,*0.000,1.0,---,---,---,---,---,---,---,---
fop,435308,20091,142594,12253,*0.000,1.0,---,---,---,---,466488,12380,*0.000,0.901
graphchi,2033442,310242,412470,34086,*0.000,1.0,---,---,---,---,542210,17985,*0.000,1.000
h2,1074124,80881,332658,8441,*0.000,1.0,829460,107817,*0.000,0.931,827916,34592,*0.000,0.999
h2o,1329412,64566,393566,33857,*0.000,1.0,---,---,---,---,---,---,---,---
jme,1500840,205919,261894,33856,*0.000,1.0,2514882,130129,*0.000,1.000,615138,46196,*0.000,1.000
jython,3602176,198823,421112,15975,*0.000,1.0,---,---,---,---,---,---,---,---


In [15]:
print(memory_table.to_latex(multicol_align='c', hrules=True, multirow_align='t', convert_css=True))

\begin{table}
\caption{Memory Usage (kB).}
\begin{tabular}{lllllllllllllll}
\toprule
 & \multicolumn{2}{c}{Galette} & \multicolumn{4}{c}{Base} & \multicolumn{4}{c}{MirrorTaint} & \multicolumn{4}{c}{Phosphor} \\
 & med & s & med & s & p & a12 & med & s & p & a12 & med & s & p & a12 \\
\midrule
avrora & 809920 & 173200 & 120864 & 24626 & *0.000 & 1.000 & 2284304 & 238627 & *0.000 & 1.000 & 326262 & 6972 & *0.000 & 1.000 \\
batik & 951094 & 67068 & 222608 & 17281 & *0.000 & 1.000 & 2431990 & 250269 & *0.000 & 1.000 & --- & --- & --- & --- \\
biojava & 1117026 & 167526 & 172694 & 3552 & *0.000 & 1.000 & 853112 & 49305 & *0.000 & 0.971 & 394496 & 20668 & *0.000 & 1.000 \\
eclipse & 1279780 & 329498 & 301030 & 56095 & *0.000 & 1.000 & --- & --- & --- & --- & --- & --- & --- & --- \\
fop & 435308 & 20091 & 142594 & 12253 & *0.000 & 1.000 & --- & --- & --- & --- & 466488 & 12380 & *0.000 & 0.901 \\
graphchi & 2033442 & 310242 & 412470 & 34086 & *0.000 & 1.000 & --- & --- & --- & --- & 542210 &

In [11]:
time_table = style_table(create_table(data, 'elapsed_time'), 'Execution Time (ms).')
time_table

Unnamed: 0_level_0,Galette,Galette,Base,Base,Base,Base,MirrorTaint,MirrorTaint,MirrorTaint,MirrorTaint,Phosphor,Phosphor,Phosphor,Phosphor
Unnamed: 0_level_1,med,s,med,s,p,a12,med,s,p,a12,med,s,p,a12
avrora,12983,1162,2432,359,*0.000,1.0,1410370,128023,*0.000,1.000,6370,622,*0.000,1.000
batik,4922,454,265,105,*0.000,1.0,812568,36961,*0.000,1.000,---,---,---,---
biojava,7443,661,154,25,*0.000,1.0,363983,21838,*0.000,1.000,2330,287,*0.000,1.000
eclipse,4544,19752,4283,18926,0.061,0.577,---,---,---,---,---,---,---,---
fop,915,225,118,108,*0.000,0.999,---,---,---,---,792,199,*0.000,0.687
graphchi,29015,3406,534,93,*0.000,1.0,---,---,---,---,7694,772,*0.000,1.000
h2,2616,404,148,79,*0.000,1.0,118315,5069,*0.000,1.000,1298,223,*0.000,1.000
h2o,17502,1390,604,98,*0.000,1.0,---,---,---,---,---,---,---,---
jme,9250,1123,1029,312,*0.000,1.0,3989700,29022,*0.000,1.000,5034,812,*0.000,0.999
jython,8497,981,392,39,*0.000,1.0,---,---,---,---,---,---,---,---


In [12]:
print(time_table.to_latex(multicol_align='c', hrules=True, multirow_align='t', convert_css=True))

\begin{table}
\caption{Execution Time (ms).}
\begin{tabular}{lllllllllllllll}
\toprule
 & \multicolumn{2}{c}{Galette} & \multicolumn{4}{c}{Base} & \multicolumn{4}{c}{MirrorTaint} & \multicolumn{4}{c}{Phosphor} \\
 & med & s & med & s & p & a12 & med & s & p & a12 & med & s & p & a12 \\
\midrule
avrora & 12983 & 1162 & 2432 & 359 & *0.000 & 1.000 & 1410370 & 128023 & *0.000 & 1.000 & 6370 & 622 & *0.000 & 1.000 \\
batik & 4922 & 454 & 265 & 105 & *0.000 & 1.000 & 812568 & 36961 & *0.000 & 1.000 & --- & --- & --- & --- \\
biojava & 7443 & 661 & 154 & 25 & *0.000 & 1.000 & 363983 & 21838 & *0.000 & 1.000 & 2330 & 287 & *0.000 & 1.000 \\
eclipse & 4544 & 19752 & 4283 & 18926 & 0.061 & 0.577 & --- & --- & --- & --- & --- & --- & --- & --- \\
fop & 915 & 225 & 118 & 108 & *0.000 & 0.999 & --- & --- & --- & --- & 792 & 199 & *0.000 & 0.687 \\
graphchi & 29015 & 3406 & 534 & 93 & *0.000 & 1.000 & --- & --- & --- & --- & 7694 & 772 & *0.000 & 1.000 \\
h2 & 2616 & 404 & 148 & 79 & *0.000 & 1.000