In [1]:
import os

import numpy as np

import performance
from report_util import *
from run_data import extract


def create_row(data, y, tool, benchmark):
    values1 = select(data, benchmark=benchmark, tool='galette')[y]
    values2 = select(data, benchmark=benchmark, tool=tool)[y]
    row = dict(n=len(values2), tool=tool, benchmark=benchmark, s=np.NaN, p=np.NaN, sig=False, med=np.NaN)
    if len(values2) > 0:
        row['med'] = np.median(values2)
        row['s'] = np.std(values2)
        if len(values1) > 0:
            row['p'] = mann_whitney(values2, values1)
            row['a12'] = a12(values2, values1)
    return row


def create_table(data, y):
    rows = [create_row(data, y, t, b) for b in performance.BENCHMARKS for t in performance.TOOLS]
    return pd.DataFrame(rows)


def mark_significant(p, sig_level):
    if pd.isna(p):
        return p
    elif p < sig_level:
        return f'*{p:.3f}'
    else:
        return f'{p:.3f}'


def format_table(table):
    result = pd.DataFrame(table)
    result['p'] = result['p'].apply(lambda p: mark_significant(p, sig_level=0.05 / 3))
    result['sig'] = result['sig'].apply(lambda s: 'color: red;' if s else '')
    result['med'] = result['med'].round().astype(pd.Int64Dtype())
    result['s'] = result['s'].round().astype(pd.Int64Dtype())
    return format_tool_names(result)


def style_table(table, title):
    f_table = format_table(table).pivot(index=['benchmark'], values=['med', 's', 'p', 'a12'], columns=['tool']) \
        .reorder_levels(axis=1, order=['tool', None]) \
        .sort_index(axis=1) \
        .sort_index(axis=0) \
        .reindex(['Galette', 'Base', 'MirrorTaint', 'Phosphor'], axis=1, level=0) \
        .reindex(['med', 's', 'p', 'a12'], axis=1, level=1)
    # Remove comparison of Galette against itself
    f_table = f_table.drop(columns=[('Galette', 'p'), ('Galette', 'a12')])
    f_table.index.names = [None for _ in f_table.index.names]
    f_table.columns.names = [None for _ in f_table.columns.names]
    return f_table.style.format(precision=3, na_rep='---') \
        .set_caption(title)


reports_dir = '/home/katie/Downloads/galette/slurm-1191363/'
data = extract(reports_dir, os.path.join(reports_dir, 'performance.csv'))

Searching for runs in /home/katie/Downloads/galette/slurm-1191363/.
Found 84 runs.
Checking runs.
	Failed run 73 --- {'benchmark': 'kafka', 'tool': 'phosphor', 'status': 'RUN_FAILURE'}
	Failed run 44 --- {'benchmark': 'biojava', 'tool': 'mirror-taint', 'status': 'TIMEOUT'}
	Failed run 76 --- {'benchmark': 'pmd', 'tool': 'phosphor', 'status': 'RUN_FAILURE'}
	Failed run 59 --- {'benchmark': 'tradebeans', 'tool': 'mirror-taint', 'status': 'RUN_FAILURE'}
	Failed run 43 --- {'benchmark': 'batik', 'tool': 'mirror-taint', 'status': 'TIMEOUT'}
	Failed run 47 --- {'benchmark': 'graphchi', 'tool': 'mirror-taint', 'status': 'RUN_FAILURE'}
	Failed run 79 --- {'benchmark': 'tomcat', 'tool': 'phosphor', 'status': 'RUN_FAILURE'}
	Failed run 83 --- {'benchmark': 'zxing', 'tool': 'phosphor', 'status': 'RUN_FAILURE'}
	Failed run 61 --- {'benchmark': 'xalan', 'tool': 'mirror-taint', 'status': 'RUN_FAILURE'}
	Failed run 66 --- {'benchmark': 'eclipse', 'tool': 'phosphor', 'status': 'RUN_FAILURE'}
	Failed r

In [2]:
style_table(create_table(data, 'rss'), 'Memory Usage (kB).')

Unnamed: 0_level_0,Galette,Galette,Base,Base,Base,Base,MirrorTaint,MirrorTaint,MirrorTaint,MirrorTaint,Phosphor,Phosphor,Phosphor,Phosphor
Unnamed: 0_level_1,med,s,med,s,p,a12,med,s,p,a12,med,s,p,a12
avrora,808460,68834,123252,21954,*0.008,1.0,---,---,---,---,312644,724,*0.008,1.000
batik,934400,26676,215960,12290,*0.008,1.0,---,---,---,---,---,---,---,---
biojava,1027544,257316,173464,3751,*0.008,1.0,---,---,---,---,367988,18902,*0.008,1.000
eclipse,1303928,109980,271864,2443,*0.008,1.0,---,---,---,---,---,---,---,---
fop,431520,23913,142616,1793,*0.008,1.0,---,---,---,---,482128,6329,0.151,0.800
graphchi,1828656,507535,416192,28747,*0.008,1.0,---,---,---,---,549216,13896,*0.008,1.000
h2,1093640,44395,313864,4189,*0.008,1.0,---,---,---,---,888884,13161,*0.008,1.000
h2o,1380540,42327,388856,14078,*0.008,1.0,---,---,---,---,---,---,---,---
jme,1415760,231931,246672,15128,*0.008,1.0,---,---,---,---,580160,14615,*0.008,1.000
jython,3625148,130035,409936,13023,*0.008,1.0,---,---,---,---,---,---,---,---


In [3]:
style_table(create_table(data, 'elapsed_time'), 'Execution Time (ms).')

Unnamed: 0_level_0,Galette,Galette,Base,Base,Base,Base,MirrorTaint,MirrorTaint,MirrorTaint,MirrorTaint,Phosphor,Phosphor,Phosphor,Phosphor
Unnamed: 0_level_1,med,s,med,s,p,a12,med,s,p,a12,med,s,p,a12
avrora,12345,128,2879,101,*0.008,1.0,---,---,---,---,6323,334,*0.008,1.000
batik,4397,183,242,48,*0.008,1.0,---,---,---,---,---,---,---,---
biojava,7014,247,139,7,*0.012,1.0,---,---,---,---,2392,265,*0.008,1.000
eclipse,3890,482,216,41,*0.008,1.0,---,---,---,---,---,---,---,---
fop,782,60,121,70,*0.008,1.0,---,---,---,---,815,93,0.222,0.760
graphchi,22973,360,475,33,*0.012,1.0,---,---,---,---,6290,322,*0.008,1.000
h2,2226,161,131,11,*0.008,1.0,---,---,---,---,1104,139,*0.008,1.000
h2o,15606,278,521,38,*0.008,1.0,---,---,---,---,---,---,---,---
jme,8663,762,628,188,*0.008,1.0,---,---,---,---,4803,399,*0.008,1.000
jython,7143,629,375,23,*0.008,1.0,---,---,---,---,---,---,---,---
