In [5]:
import os
import re

from report_util import *
from run_data import extract

STATUSES = ['tag', 'success', 'timeout', 'crash', 'abort', 'fail', 'vm-crash']


def get_benchmark_group(test_identifier):
    m = re.search(r'\.([^\.]*?)ITCase', test_identifier)
    return m.group(1)


def compute_status(fp, fn, status):
    if status != 'success':
        return status
    else:
        return 'success' if (fp + fn == 0) else "tag"


def complete_cartesian_index(data, columns, fill_value=0, categories=None):
    if categories is None:
        index = compute_cartesian_index(data, columns)
    else:
        index = pd.MultiIndex.from_product(categories, names=columns)
    # Set the index of the data to be the selected columns and align with the new index
    return data.set_index(columns) \
        .reindex(index, fill_value=fill_value) \
        .reset_index()


def compute_executions_counts(data):
    by = ['group', 'tool', 'version']
    executed = data.groupby(by)['status'] \
        .size() \
        .rename('executed') \
        .reset_index() \
        .drop_duplicates() \
        .reset_index(drop=True)
    return complete_cartesian_index(executed, by)


def create_count_table(data, executed):
    # Compute statuses
    data['status'] = data[['fp', 'fn', 'status']] \
        .apply(lambda x: compute_status(*x), axis=1)
    by = ['group', 'tool', 'version']
    # Count the number of entries in each status for each group for each tool on each JDK
    counts = data.groupby(by)['status'] \
        .value_counts() \
        .reset_index()
    # Fill in zeros for missing combinations
    # Pivot along the statuses to put the statues in columns
    categories = [data[c].unique() for c in by] + [STATUSES]
    counts = complete_cartesian_index(counts, by + ['status'], categories=categories) \
        .pivot(columns='status', index=by, values='count') \
        .fillna(0) \
        .astype('int64') \
        .reset_index()
    # Compute the total number of tests per group
    totals = executed.groupby(['group'])['executed'] \
        .max() \
        .rename('total') \
        .reset_index() \
        .drop_duplicates()
    # Add totals to the table
    counts = counts.merge(totals, on=['group'], how='left')
    executed = executed[['group', 'version', 'executed']].drop_duplicates()
    # Added executed and totals to table
    counts = counts.merge(executed, on=['group', 'version'], how='left')
    # Drop rows for groups where nothing was executed (because the minimum version was not satisfied)
    counts = pd.DataFrame(counts[counts['executed'] != 0])
    # Compute the total number of tests with statuses indicating a deviation from
    # the original program semantics
    counts['sem'] = counts['abort'] + counts['crash'] + counts['fail'] + counts['timeout'] + counts['vm-crash']
    return counts


def style_counts(counts):
    failures = counts.melt(id_vars=['group', 'tool', 'version', 'total'], value_vars=['sem', 'tag'])
    failures = format_tool_names(failures)
    failures['variable'] = failures['variable'].apply(str.title)
    table = failures.pivot(index=['group', 'total', 'version'], values=['value'], columns=['tool', 'variable']) \
        .reorder_levels(axis=1, order=['tool', 'variable', None]) \
        .sort_index(axis=1) \
        .sort_index(axis=0) \
        .droplevel(2, axis=1)
    table.index.names = [x.title() for x in table.index.names]
    table.columns.names = [None for _ in table.columns.names]
    return table.style.format(precision=0, na_rep='---')


reports_dir = '/home/katie/Downloads/galette/slurm-1193160/'
data = extract(reports_dir, os.path.join(reports_dir, 'functional.csv'))
# Remove disabled tests
data = data[data['status'] != 'disabled']
# Convert test class names into benchmark groups
data['group'] = data['test'].apply(get_benchmark_group)
# Convert versions to ints
data['version'] = pd.to_numeric(data['version'])
executed = compute_executions_counts(data)

Searching for runs in /home/katie/Downloads/galette/slurm-1193160/.
Found 12 runs.
Checking runs.
12 runs were successful.
Creating combined run dataset CSV.
Wrote combined run dataset CSV to /home/katie/Downloads/galette/slurm-1193160/functional.csv.


In [9]:
counts = create_count_table(data, executed)
style_counts(counts)\
    .set_caption('Semantics Preservation and Propagation Accuracy.')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Galette,Galette,MirrorTaint,MirrorTaint,Phosphor,Phosphor
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Sem,Tag,Sem,Tag,Sem,Tag
Group,Total,Version,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
ArrayAccess,74,8,0,0,0,55,0,0
ArrayAccess,74,11,0,0,0,55,0,0
ArrayAccess,74,17,0,0,0,55,74,0
ArrayAccess,74,21,0,0,0,55,74,0
ArrayLength,5,8,0,0,0,3,0,0
ArrayLength,5,11,0,0,0,3,0,0
ArrayLength,5,17,0,0,0,3,5,0
ArrayLength,5,21,0,0,0,3,5,0
ArrayReflection,75,8,0,0,0,65,0,7
ArrayReflection,75,11,0,0,0,65,0,7


In [7]:
ZEBRA_STRIPE = r'\rowcolor{row-stripe}'
GROUP_ENTRY = r'\multirow[c]{-$0}{*}{\shortstack[c]{$1}}'
COUNT_ENTRY = r'\multirow[c]{-$0}{*}{$1}'
TABLE_TEMPLATE = r"""
\begin{tabular}{crr|rrrrrr}
    \toprule
    & & & \multicolumn{2}{c}{\galette} & \multicolumn{2}{c}{\mirrortaint} & \multicolumn{2}{c}{\phosphor} \\
    Group & \# & JDK & Sem & Tag & Sem & Tag & Sem & Tag \\
    \cmidrule(lr){1-3}\cmidrule(lr){4-5}\cmidrule(lr){6-7} \cmidrule(lr){8-9}
$0
    \bottomrule
\end{tabular}
"""


def format_group_name(name):
    new_name = name[0] + ""
    for c in name[1:]:
        if c.isupper():
            new_name += r"\\"
        new_name += c
    return new_name.replace('Java9', '9+')


def format_group(data, group, stripe):
    selected = select(data, group=group)
    count_map = selected.melt(id_vars=['tool', 'version'], value_vars=['sem', 'tag']) \
        .set_index(['tool', 'version', 'variable']) \
        .to_dict()['value']
    versions = sorted(list(selected['version'].unique()))
    count = selected['total'].to_list()[0]
    elements = []
    tools = ['galette', 'mirror-taint', 'phosphor']
    for version in versions:
        keys = [(tool, version, v) for tool in tools for v in ['sem', 'tag']]
        elements.append(['', '', str(version), ] + [str(count_map[k]) for k in keys])
    elements[-1][0] = GROUP_ENTRY.replace('$0', str(len(versions))).replace('$1', format_group_name(group))
    elements[-1][1] = COUNT_ENTRY.replace('$0', str(len(versions))).replace('$1', str(count))
    lines = [" & ".join(e).strip() for e in elements]
    lines = ['\t' + ' '.join(line.split()) for line in lines]
    if stripe:
        lines = ['\t' + ZEBRA_STRIPE + '\n' + line for line in lines]
    return " \\\\\n".join(lines)


def create_table(counts, groups, stripe):
    chunks = []
    for group in groups:
        chunks.append(format_group(counts, group, stripe))
        stripe = not stripe
    return TABLE_TEMPLATE.replace('$0', " \\\\\n".join(chunks) + r' \\')


groups = sorted(list(counts['group'].unique()))
i = len(groups) // 2
print(create_table(counts, groups[:i], True))


\begin{tabular}{crr|rrrrrr}
    \toprule
    & & & \multicolumn{2}{c}{\galette} & \multicolumn{2}{c}{\mirrortaint} & \multicolumn{2}{c}{\phosphor} \\
    Group & \# & JDK & Sem & Tag & Sem & Tag & Sem & Tag \\
    \cmidrule(lr){1-3}\cmidrule(lr){4-5}\cmidrule(lr){6-7} \cmidrule(lr){8-9}
	\rowcolor{row-stripe}
	& & 8 & 0 & 0 & 0 & 55 & 0 & 0 \\
	\rowcolor{row-stripe}
	& & 11 & 0 & 0 & 0 & 55 & 0 & 0 \\
	\rowcolor{row-stripe}
	& & 17 & 0 & 0 & 0 & 55 & 74 & 0 \\
	\rowcolor{row-stripe}
	\multirow[c]{-4}{*}{\shortstack[c]{Array\\Access}} & \multirow[c]{-4}{*}{74} & 21 & 0 & 0 & 0 & 55 & 74 & 0 \\
	& & 8 & 0 & 0 & 0 & 3 & 0 & 0 \\
	& & 11 & 0 & 0 & 0 & 3 & 0 & 0 \\
	& & 17 & 0 & 0 & 0 & 3 & 5 & 0 \\
	\multirow[c]{-4}{*}{\shortstack[c]{Array\\Length}} & \multirow[c]{-4}{*}{5} & 21 & 0 & 0 & 0 & 3 & 5 & 0 \\
	\rowcolor{row-stripe}
	& & 8 & 0 & 0 & 0 & 65 & 0 & 7 \\
	\rowcolor{row-stripe}
	& & 11 & 0 & 0 & 0 & 65 & 0 & 7 \\
	\rowcolor{row-stripe}
	& & 17 & 0 & 0 & 0 & 65 & 75 & 0 \\
	\rowcolo

In [8]:
print(create_table(counts, groups[i:], False))


\begin{tabular}{crr|rrrrrr}
    \toprule
    & & & \multicolumn{2}{c}{\galette} & \multicolumn{2}{c}{\mirrortaint} & \multicolumn{2}{c}{\phosphor} \\
    Group & \# & JDK & Sem & Tag & Sem & Tag & Sem & Tag \\
    \cmidrule(lr){1-3}\cmidrule(lr){4-5}\cmidrule(lr){6-7} \cmidrule(lr){8-9}
	& & 8 & 0 & 0 & 0 & 1 & 0 & 0 \\
	& & 11 & 0 & 0 & 0 & 1 & 0 & 0 \\
	& & 17 & 0 & 0 & 0 & 1 & 4 & 0 \\
	\multirow[c]{-4}{*}{\shortstack[c]{Loop}} & \multirow[c]{-4}{*}{4} & 21 & 0 & 0 & 0 & 1 & 4 & 0 \\
	\rowcolor{row-stripe}
	& & 8 & 0 & 0 & 0 & 0 & 0 & 0 \\
	\rowcolor{row-stripe}
	& & 11 & 0 & 0 & 0 & 0 & 0 & 0 \\
	\rowcolor{row-stripe}
	& & 17 & 0 & 0 & 0 & 0 & 14 & 0 \\
	\rowcolor{row-stripe}
	\multirow[c]{-4}{*}{\shortstack[c]{Method\\Call}} & \multirow[c]{-4}{*}{14} & 21 & 0 & 0 & 0 & 0 & 14 & 0 \\
	& & 8 & 0 & 0 & 0 & 34 & 3 & 36 \\
	& & 11 & 0 & 0 & 0 & 34 & 3 & 36 \\
	& & 17 & 0 & 0 & 0 & 34 & 40 & 0 \\
	\multirow[c]{-4}{*}{\shortstack[c]{Method\\Handle}} & \multirow[c]{-4}{*}{40} & 21 & 0 & 