In [21]:
import json
import os
import re
from collections import defaultdict

import numpy as np

from data_util import *

SEMANTIC_DATA_FILE_NAME = 'report.csv'
INFO_FILE_NAME = 'info.json'
STATUSES = ['propagation', 'success', 'timeout', 'crash', 'abort', 'fail', 'vm-crash']
MINIMUM_VERSIONS = defaultdict(
    lambda: 8,
    {
        'JdkUnsafe': 9,
        'MethodHandleJava9': 9,
        'RecordType': 16,
        'StringIndyConcat': 9,
        'VarHandle': 9
    }
)


class SemanticTrial:
    """Represents the results of a taint tracking tool on the synthetic benchmarks."""

    def __init__(self, trial_dir):
        self.id = os.path.basename(trial_dir)
        self.data_file = os.path.join(trial_dir, SEMANTIC_DATA_FILE_NAME)
        self.info_file = os.path.join(trial_dir, INFO_FILE_NAME)
        self.valid = all(os.path.isfile(f) for f in [self.data_file, self.info_file])
        self.vendor = self.version = self.tool = None
        if os.path.isfile(self.info_file):
            with open(self.info_file, 'r') as f:
                info = json.load(f)
                self.version = int(info['version'])
                self.tool = info['tool']
                self.vendor = info['vendor']

    def get_id(self):
        return f"<id={self.id}, tool={self.tool}, vendor={self.vendor}, version={self.version}>"

    def get_data_frame(self):
        data = pd.read_csv(self.data_file) \
            .rename(columns=lambda x: x.strip())
        return set_columns(data, version=self.version, vendor=self.vendor, tool=self.tool)


def get_semantic_group(test_identifier):
    m = re.search(r'\.([^\.]*?)ITCase', test_identifier)
    return m.group(1)


def compute_status(fp, fn, status):
    if status != 'success':
        return status
    else:
        return 'success' if (fp + fn == 0) else "propagation"


def find_trials(input_dir):
    print(f'Searching for trials in {input_dir}.')
    files = [os.path.join(input_dir, f) for f in os.listdir(input_dir)]
    trials = list(map(SemanticTrial, filter(os.path.isdir, files)))
    print(f"\tFound {len(trials)} trials.")
    return trials


def check_trials(trials):
    print(f'Checking trials.')
    result = []
    for c in trials:
        if not c.valid:
            print(f"\tMissing required files for {c.get_id()}.")
        else:
            result.append(c)
    print(f'\t{len(result)} trials were valid.')
    return result


def combine_semantic_data(trials, file):
    print('Creating semantic experiment CSV.')
    data = pd.concat([t.get_data_frame() for t in trials]) \
        .reset_index(drop=True)
    # Remove disabled tests
    data = data[data['status'] != 'disabled']
    # Convert test class names into benchmark groups
    data['group'] = data['test'].apply(get_semantic_group)
    data.to_csv(file, index=False)
    print(f'\tWrote semantic experiment CSV to {file}.')
    return data


def complete_cartesian_index(data, columns, fill_value=0, categories=None):
    if categories is None:
        categories = [data[c].unique() for c in columns]
    # Create an index that it the cartesian product of all unique values in the specified columns
    index = pd.MultiIndex.from_product(categories, names=columns)
    # Set the index of the data to be the selected columns and align with the new index
    return data.set_index(columns) \
        .reindex(index, fill_value=fill_value) \
        .reset_index()


def compute_executions_counts(data):
    by = ['group', 'tool', 'version', 'vendor']
    executed = data.groupby(by)['status'] \
        .size() \
        .rename('executed') \
        .reset_index() \
        .drop_duplicates() \
        .reset_index(drop=True)
    return complete_cartesian_index(executed, by)


def check_execution_counts(executed):
    groups = list(executed['group'].unique())
    for group in groups:
        min_version = MINIMUM_VERSIONS[group]
        selected = select(executed, group=group)
        # Assert that there are no executions for each group on JDKs lower than its minimum version
        disabled = selected[selected['version'] < min_version]['executed']
        assert set(disabled) == {0} or len(disabled) == 0
        enabled = selected[selected['version'] >= min_version]['executed']
        # Assert that the total number tests executed in each group for each tool on each JDK is consistent
        assert len(set(enabled)) == 1


def assert_record_equality(frames):
    if len(frames) <= 1:
        return
    record_sets = [set(list(map(tuple, frame.to_dict(orient='records')))) for frame in frames]
    assert all(record_sets[0] == record_set for record_set in record_sets[1:])


def assert_vendor_matches(counts):
    vendors = list(counts['vendor'].unique())
    for version in list(counts['version'].unique()):
        assert_record_equality([select(counts, version=version, vendor=vendor) for vendor in vendors])


reports_dir = '/home/katie/Downloads/slurm-1189372/'
output_dir = '/home/katie/Downloads/processed-1189372/'
pd.set_option('display.max_rows', 100)
os.makedirs(output_dir, exist_ok=True)
trials = check_trials(find_trials(reports_dir))
# Aggregate the individual CSVs
data = combine_semantic_data(trials, os.path.join(output_dir, 'semantic.csv'))
data.head()

Searching for trials in /home/katie/Downloads/slurm-1189372/.
	Found 24 trials.
Checking trials.
	24 trials were valid.
Creating semantic experiment CSV.
	Wrote semantic experiment CSV to /home/katie/Downloads/processed-1189372/semantic.csv.


Unnamed: 0,test,tp,fp,fn,status,version,vendor,tool,group
0,[engine:junit-jupiter]/[class:edu.neu.ccs.prl....,1,0,0,success,21,corretto,galette,RecordType
1,[engine:junit-jupiter]/[class:edu.neu.ccs.prl....,0,0,0,success,21,corretto,galette,RecordType
2,[engine:junit-jupiter]/[class:edu.neu.ccs.prl....,1,0,0,success,21,corretto,galette,RecordType
3,[engine:junit-jupiter]/[class:edu.neu.ccs.prl....,0,0,0,success,21,corretto,galette,RecordType
4,[engine:junit-jupiter]/[class:edu.neu.ccs.prl....,1,0,0,success,21,corretto,galette,RecordType


In [22]:
executed = compute_executions_counts(data)
check_execution_counts(executed)
executed

Unnamed: 0,group,tool,version,vendor,executed
0,ArrayAccess,galette,8,corretto,74
1,ArrayAccess,galette,8,temurin,74
2,ArrayAccess,galette,11,corretto,74
3,ArrayAccess,galette,11,temurin,74
4,ArrayAccess,galette,17,corretto,74
...,...,...,...,...,...
643,VarHandle,phosphor,11,temurin,1932
644,VarHandle,phosphor,17,corretto,1932
645,VarHandle,phosphor,17,temurin,1932
646,VarHandle,phosphor,21,corretto,1932


In [27]:
def create_count_table(data, executed):
    # Compute statuses
    data['status'] = data[['fp', 'fn', 'status']] \
        .apply(lambda x: compute_status(*x), axis=1)
    by = ['group', 'tool', 'version', 'vendor']
    # Count the number of entries in each status for each group for each tool on each JDK
    counts = data.groupby(by)['status'] \
        .value_counts() \
        .reset_index()
    # Fill in zeros for missing combinations
    # Pivot along the statuses to put the statues in columns
    categories = [data[c].unique() for c in by] + [STATUSES]
    counts = complete_cartesian_index(counts, by + ['status'], categories=categories) \
        .pivot(columns='status', index=by, values='count') \
        .fillna(0) \
        .astype('int64') \
        .reset_index()
    # Compute the total number of tests per group
    totals = executed.groupby(['group'])['executed'] \
        .max() \
        .rename('total') \
        .reset_index() \
        .drop_duplicates()
    # Add totals to the table
    counts = counts.merge(totals, on=['group'], how='left')
    executed = executed[['group', 'version', 'executed']].drop_duplicates()
    # Added executed and totals to table
    return counts.merge(executed, on=['group', 'version'], how='left')


counts = create_count_table(data, executed)
# Confirm that each tool performed the same on each JDK version for all vendors
assert_vendor_matches(counts)
# Since the results for different vendors are identical, select a single vendor
counts = select(counts, vendor='temurin') \
    .drop(columns='vendor') \
    .reset_index(drop=True)
counts

Unnamed: 0,group,tool,version,abort,crash,fail,propagation,success,timeout,vm-crash,total,executed
0,ArrayAccess,galette,8,0,0,0,0,74,0,0,74,74
1,ArrayAccess,galette,11,0,0,0,0,74,0,0,74,74
2,ArrayAccess,galette,17,0,0,0,0,74,0,0,74,74
3,ArrayAccess,galette,21,0,0,0,0,74,0,0,74,74
4,ArrayAccess,mirror-taint,8,0,0,0,55,19,0,0,74,74
...,...,...,...,...,...,...,...,...,...,...,...,...
319,VarHandle,mirror-taint,21,0,0,0,1353,579,0,0,1932,1932
320,VarHandle,phosphor,8,0,0,0,0,0,0,0,1932,0
321,VarHandle,phosphor,11,0,0,190,726,1016,0,0,1932,1932
322,VarHandle,phosphor,17,0,0,0,0,0,0,1932,1932,1932


In [28]:
# List failures due to the JVM immediately crashing
display(counts[counts['vm-crash'] != 0].style)

Unnamed: 0,group,tool,version,abort,crash,fail,propagation,success,timeout,vm-crash,total,executed
10,ArrayAccess,phosphor,17,0,0,0,0,0,0,74,74,74
11,ArrayAccess,phosphor,21,0,0,0,0,0,0,74,74,74
22,ArrayLength,phosphor,17,0,0,0,0,0,0,5,5,5
23,ArrayLength,phosphor,21,0,0,0,0,0,0,5,5,5
34,ArrayReflection,phosphor,17,0,0,0,0,0,0,75,75,75
35,ArrayReflection,phosphor,21,0,0,0,0,0,0,75,75,75
46,Assignment,phosphor,17,0,0,0,0,0,0,12,12,12
47,Assignment,phosphor,21,0,0,0,0,0,0,12,12,12
58,BoxedType,phosphor,17,0,0,0,0,0,0,8,8,8
59,BoxedType,phosphor,21,0,0,0,0,0,0,8,8,8


In [33]:
# List other behavioral failures
counts['behavioral'] = counts['abort'] + counts['crash'] + counts['fail'] + counts['timeout']
display(counts[counts['behavioral'] != 0].style)

Unnamed: 0,group,tool,version,abort,crash,fail,propagation,success,timeout,vm-crash,total,executed,behaviorial,behavioral
104,ConstructorReflection,phosphor,8,0,0,18,0,70,0,0,88,88,18,18
105,ConstructorReflection,phosphor,11,0,0,18,0,70,0,0,88,88,18,18
188,MethodHandle,phosphor,8,0,0,3,36,1,0,0,40,40,3,3
189,MethodHandle,phosphor,11,0,0,3,36,1,0,0,40,40,3,3
201,MethodHandleJava9,phosphor,11,0,0,4,7,2,0,0,13,13,4,4
212,MethodReflection,phosphor,8,0,0,38,1,174,0,0,213,213,38,38
213,MethodReflection,phosphor,11,0,0,38,1,174,0,0,213,213,38,38
277,StringIndyConcat,galette,11,0,0,0,0,36,1,0,37,37,1,1
285,StringIndyConcat,phosphor,11,0,0,0,18,18,1,0,37,37,1,1
321,VarHandle,phosphor,11,0,0,190,726,1016,0,0,1932,1932,190,190


In [35]:
def compute_table_entry(row):
    if row['executed'] == 0:
        return np.NaN
    else:
        failures = row['executed'] - row['success']
        assert failures == row['behavioral'] + row['propagation'] + row['vm-crash']
        return "*" + str(failures) if (row['behavioral'] != 0 or row['vm-crash'] != 0) else str(failures)


counts['entry'] = counts.apply(lambda x: compute_table_entry(x), axis=1)
counts

Unnamed: 0,group,tool,version,abort,crash,fail,propagation,success,timeout,vm-crash,total,executed,behaviorial,behavioral,entry
0,ArrayAccess,galette,8,0,0,0,0,74,0,0,74,74,0,0,0
1,ArrayAccess,galette,11,0,0,0,0,74,0,0,74,74,0,0,0
2,ArrayAccess,galette,17,0,0,0,0,74,0,0,74,74,0,0,0
3,ArrayAccess,galette,21,0,0,0,0,74,0,0,74,74,0,0,0
4,ArrayAccess,mirror-taint,8,0,0,0,55,19,0,0,74,74,0,0,55
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
319,VarHandle,mirror-taint,21,0,0,0,1353,579,0,0,1932,1932,0,0,1353
320,VarHandle,phosphor,8,0,0,0,0,0,0,0,1932,0,0,0,
321,VarHandle,phosphor,11,0,0,190,726,1016,0,0,1932,1932,190,190,*916
322,VarHandle,phosphor,17,0,0,0,0,0,0,1932,1932,1932,0,0,*1932


In [38]:
table = counts.pivot(index=['group', 'total'], values=['entry'], columns=['tool', 'version']) \
    .reorder_levels(axis=1, order=['tool', 'version', None]) \
    .sort_index(axis=1) \
    .sort_index(axis=0) \
    .droplevel(2, axis=1)
table.index.names = [x.title() for x in table.index.names]
table.columns = table.columns.map(lambda l: (l[0].title().replace('-', ''), l[1]))
table.columns.names = [None for _ in table.columns.names]
styler = table.style.format(precision=0, na_rep='---')
display(styler)

Unnamed: 0_level_0,Unnamed: 1_level_0,Galette,Galette,Galette,Galette,MirrorTaint,MirrorTaint,MirrorTaint,MirrorTaint,Phosphor,Phosphor,Phosphor,Phosphor
Unnamed: 0_level_1,Unnamed: 1_level_1,8,11,17,21,8,11,17,21,8,11,17,21
Group,Total,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
ArrayAccess,74,0,0,0,0,55,55,55,55,0,0,*74,*74
ArrayLength,5,0,0,0,0,3,3,3,3,0,0,*5,*5
ArrayReflection,75,0,0,0,0,65,65,65,65,7,7,*75,*75
Assignment,12,0,0,0,0,0,0,0,0,0,0,*12,*12
BoxedType,8,0,0,0,0,0,0,0,0,0,0,*8,*8
ClassReflection,7,0,0,0,0,0,0,0,0,0,0,*7,*7
Collection,6,0,0,0,0,2,2,2,2,0,0,*6,*6
Conditional,4,0,0,0,0,0,0,0,0,0,0,*4,*4
ConstructorReflection,88,0,0,0,0,12,12,12,12,*18,*18,*88,*88
Field,6,0,0,0,0,2,2,2,2,0,0,*6,*6
