In [3]:
# reload modules
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
import os

# os.chdir("") # set the root directory of the project
import numpy as np
import pandas as pd
from sra.dataloader import (
    get_covs_runs,
    get_run_paths,
    get_trial_paths,
    get_covs_trial,
)
from sra.cfggen import parse_ll, parse_dotfiles, gen_cfg_inter
from sra.mapper import get_bbs_fuzz, get_mapping, fuzzdat_to_obs
from sra.estimator import Graph
from sra.estimator_chunk import structure_estimation


In [5]:
def get_statistics(num_arr: np.ndarray):
    mean_val = np.mean(num_arr)
    std_val = np.std(num_arr)
    min_val = np.min(num_arr)
    max_val = np.max(num_arr)
    quantiles = pd.DataFrame(num_arr).quantile(
        [0.01] + [1 / 20 * i for i in range(1, 20)] + [0.99]
    )
    print(f"mean: {mean_val}, std: {std_val}, min: {min_val}, max: {max_val}")
    print(f"quantiles:\n{quantiles}")


In [6]:
import threading

from functools import wraps
import errno
import os
import signal


class TimeoutError(Exception):
    pass


def timeout(seconds=10, error_message=os.strerror(errno.ETIME)):
    def decorator(func):
        def _handle_timeout(signum, frame):
            raise TimeoutError(error_message)

        def wrapper(*args, **kwargs):
            signal.signal(signal.SIGALRM, _handle_timeout)
            signal.alarm(seconds)
            try:
                result = func(*args, **kwargs)
            finally:
                signal.alarm(0)
            return result

        return wraps(func)(wrapper)

    return decorator


@timeout(5)
def print_result(observable_nodes, graph, target_node):
    print(
        f"{target_node}: {structure_estimation(np.zeros((1, len(observable_nodes))),graph,target_node,observable_nodes,2)}"
    )

# Tcas

In [7]:
dotfiles = [
    fname
    for fname in os.listdir("fuzz-data/ft_data/source_code/tcas")
    if fname.startswith(".") and fname.endswith(".dot")
]
functions = sorted([fname.split(".")[1] for fname in dotfiles])

bbs_fuzz_path = "fuzz-data/tcas_exp_1/tcas_01/tcas_aflpp_run_01/ft_tcas.json"
bbs_fuzz = get_bbs_fuzz(bbs_fuzz_path)

debug_info_dict, blocks_dict = parse_ll("fuzz-data/ft_data/source_code/tcas/tcas.ll")
cfgs_intra, node_to_bb = parse_dotfiles(
    dotfiles,
    "fuzz-data/ft_data/source_code/tcas",
    debug_info_dict,
    blocks_dict,
    non_overlap_lineidx=False,
)

cfg_inter = gen_cfg_inter(cfgs_intra)
graph = Graph(cfg_inter)

map_fuzz_to_obs_node, map_obs_to_fuzz_node = get_mapping(
    functions, bbs_fuzz, node_to_bb, cfg_inter, debug=True
)

[ALIM]
bb_fuzz_idxs=       1 node_obs=Node0x11de3a780      ('ALIM', 0)                         obs_node_lines={59, 61} vs fuzz_node_lines=[61]

[Inhibit_Biased_Climb]
bb_fuzz_idxs=       2 node_obs=Node0x11de3af90      ('Inhibit_Biased_Climb', 0)         obs_node_lines={64, 66} vs fuzz_node_lines=[66]
bb_fuzz_idxs=       3 node_obs=Node0x11de3b1f0      ('Inhibit_Biased_Climb', 1)         obs_node_lines={66} vs fuzz_node_lines=[66]
bb_fuzz_idxs=       4 node_obs=Node0x11de3b270      ('Inhibit_Biased_Climb', 2)         obs_node_lines={66} vs fuzz_node_lines=[66]
bb_fuzz_idxs=       5 node_obs=Node0x11de3b6e0      ('Inhibit_Biased_Climb', 3)         obs_node_lines={66} vs fuzz_node_lines=[66]

[Non_Crossing_Biased_Climb]
bb_fuzz_idxs=       6 node_obs=Node0x11de3bd40-0    ('Non_Crossing_Biased_Climb', 0)    obs_node_lines={69, 71, 72, 73, 75} vs fuzz_node_lines=[71, 72, 73, 75, 76]
bb_fuzz_idxs=       6 node_obs=Node0x11de3bd40-1    ('Non_Crossing_Biased_Climb', 0)    obs_node_lines={75, 

In [8]:
projname = "tcas"
runs_path = "fuzz-data/tcas_exp_1"
run_paths = get_run_paths(runs_path, projname)
covs_trials_f = []
for run_path in run_paths:
    for trial_path in get_trial_paths(run_path):
        covs_trial = get_covs_trial(trial_path)
        covs = [cov for cov in covs_trial if max(cov) > 0]
        covs_trials_f.append(covs)
print(f"number of trials: {len(covs_trials_f)}")

number of trials: 990


In [9]:
covs_f = []
for covs in covs_trials_f:
    covs_f.extend(covs)
covs_f = np.array(covs_f)
print(f"{covs_f.shape=}")
cov_maxs = np.max(covs_f, axis=1)
print(f"{cov_maxs.shape=}")
get_statistics(cov_maxs)


covs_f.shape=(357377, 63)
cov_maxs.shape=(357377,)
mean: 226.89232659068713, std: 153.19242703306415, min: 4, max: 1132
quantiles:
          0
0.01   79.0
0.05   90.0
0.10   92.0
0.15   99.0
0.20  108.0
0.25  111.0
0.30  123.0
0.35  125.0
0.40  140.0
0.45  143.0
0.50  165.0
0.55  170.0
0.60  199.0
0.65  232.0
0.70  248.0
0.75  320.0
0.80  336.0
0.85  428.0
0.90  453.0
0.95  580.0
0.99  640.0


In [10]:
in_between_range = [90 <= cov_max_f <= 580 for cov_max_f in cov_maxs]
cov_data_f = covs_f[in_between_range]
print(f"{cov_data_f.shape=}")
sum_data_f = np.sum(cov_data_f, axis=0)
print(f"{sum_data_f.shape=}")
min_obs = np.min(sum_data_f[sum_data_f > 0])
print(f"{min_obs=}")
num_obss = cov_maxs[in_between_range]
total_obs = np.sum(num_obss)
print(f"{total_obs=}")


cov_data_f.shape=(325346, 63)
sum_data_f.shape=(63,)
min_obs=2442916
total_obs=68911068


In [11]:
nodenames = list(cfg_inter["nodes"].keys())
cov_data_o = fuzzdat_to_obs(
    cov_data_f, cfg_inter, bbs_fuzz, map_fuzz_to_obs_node, nodenames
)
print(f"{cov_data_o.shape=}")
sum_data_o = np.sum(cov_data_o, axis=0)
print(f"{sum_data_o.shape=}")


cov_data_o.shape=(325346, 152)
sum_data_o.shape=(152,)


In [12]:
def order_found(
    covs: np.ndarray, observable_ids_f, consider_remaining: bool = True
):
    founded = set()
    ret = {idx: np.nan for idx in range(len(covs[0]))}
    last_found = {}
    for cov in covs:
        found = {idx for idx in range(len(cov)) if cov[idx] > 0}
        new_found = found - founded
        founded = founded | found
        for idx in new_found:
            ret[idx] = len(founded)
        if len(new_found):
            last_found = new_found
    if consider_remaining:
        remaining = observable_ids_f - founded
        if len(remaining):
            founded = founded | remaining
            for idx in remaining:
                ret[idx] = len(founded)
            last_found = remaining
    return ret, last_found

In [13]:
observable_ids_f = {i for i in range(len(sum_data_f)) if sum_data_f[i] > 0}


order_data = {idx: [] for idx in range(len(covs_trials_f[0][0]))}
last_found_cnt = {}
for covs_trial in covs_trials_f:
    order_trial, last_found = order_found(
        covs_trial, observable_ids_f, consider_remaining=False
    )
    for idx in order_trial:
        if np.isnan(order_trial[idx]):
            continue
        order_data[idx].append(order_trial[idx])
    for idx in last_found:
        if idx not in last_found_cnt:
            last_found_cnt[idx] = 0
        last_found_cnt[idx] += 1
for idx, orders in order_data.items():
    if idx <= 10:
        print(f"{idx}: {np.nanmean(orders):.2f} ({orders[:5]}...)")
    order_data[idx] = np.nanmean(order_data[idx])
    if np.isnan(order_data[idx]):
        order_data[idx] = 0

order_df = pd.DataFrame(order_data, index=[0]).T
display(order_df.sort_values(by=0, ascending=False).head(10))

last_found_df = pd.DataFrame(last_found_cnt, index=[0])
display(last_found_df.T.sort_values(by=0, ascending=False).head(10))


0: 25.85 ([37, 37, 14, 36, 12]...)
1: 48.76 ([49, 49, 44, 52, 57]...)
2: 44.12 ([37, 37, 44, 36, 44]...)
3: 49.83 ([49, 37, 44, 52, 44]...)
4: 44.30 ([37, 49, 44, 36, 44]...)
5: 44.12 ([37, 37, 44, 36, 44]...)
6: 44.12 ([37, 37, 44, 36, 44]...)
7: 50.52 ([49, 49, 50, 52, 44]...)
8: 55.78 ([49, 49, 55, 52, 57]...)
9: 55.78 ([49, 49, 55, 52, 57]...)
10: 55.78 ([49, 49, 55, 52, 57]...)


  order_data[idx] = np.nanmean(order_data[idx])


Unnamed: 0,0
37,58.711066
47,57.471717
44,57.471717
22,55.782828
21,55.782828
8,55.782828
9,55.782828
10,55.782828
49,55.250505
11,50.520202


Unnamed: 0,0
37,894
44,65
47,65
49,36
8,8
9,8
10,8
21,8
22,8


In [14]:
ids_of_interest_f = [37]
for idx_f in ids_of_interest_f:
    print(
        f"map_fuzz_to_obs_node[{idx_f}]={map_fuzz_to_obs_node[idx_f]}"
    )


map_fuzz_to_obs_node[37]={'Node0x11de49030'}


# Printtokens2

In [None]:
source_dir = "fuzz-data/ft_data/source_code/printtokens2_2"
source_name = "printtokens2_2_fuzz"

dotfiles = [
    fname
    for fname in os.listdir(source_dir)
    if fname.startswith(".") and fname.endswith(".dot")
]
functions = sorted([fname.split(".")[1] for fname in dotfiles])

bbs_fuzz_path = "fuzz-data/printtokens2_2_exp_1/printtokens2_2_01/printtokens2_2_aflpp_run_01/ft_printtokens2_2.json"
bbs_fuzz = get_bbs_fuzz(bbs_fuzz_path)

debug_info_dict, blocks_dict = parse_ll(
    os.path.join(source_dir, f"{source_name}.ll")
)
cfgs_intra, node_to_bb = parse_dotfiles(
    dotfiles,
    source_dir,
    debug_info_dict,
    blocks_dict,
    non_overlap_lineidx=False,
)

cfg_inter = gen_cfg_inter(cfgs_intra)
graph = Graph(cfg_inter)

map_fuzz_to_obs_node, map_obs_to_fuzz_node = get_mapping(
    functions, bbs_fuzz, node_to_bb, cfg_inter, debug=True
)


In [None]:
projname = "printtokens2_2"
runs_path = "fuzz-data/printtokens2_2_exp_1"
run_paths = get_run_paths(runs_path, projname)
covs_trials_f = []
for run_path in run_paths:
    for trial_path in get_trial_paths(run_path):
        covs_trial = get_covs_trial(trial_path)
        covs = [cov for cov in covs_trial if max(cov) > 0]
        covs_trials_f.append(covs)
print(f"number of trials: {len(covs_trials_f)}")

In [None]:
covs_f = []
for covs in covs_trials_f:
    covs_f.extend(covs)
covs_f = np.array(covs_f)
print(f"{covs_f.shape=}")
cov_maxs = np.max(covs_f, axis=1)
print(f"{cov_maxs.shape=}")
get_statistics(cov_maxs)


In [None]:
in_between_range = [89 <= cov_max_f <= 563 for cov_max_f in cov_maxs]
cov_data_f = covs_f[in_between_range]
print(f"{cov_data_f.shape=}")
sum_data_f = np.sum(cov_data_f, axis=0)
print(f"{sum_data_f.shape=}")
min_obs = np.min(sum_data_f[sum_data_f > 0])
print(f"{min_obs=}")
num_obss = cov_maxs[in_between_range]
total_obs = np.sum(num_obss)
print(f"{total_obs=}")


In [None]:
nodenames = list(cfg_inter["nodes"].keys())
cov_data_o = fuzzdat_to_obs(
    cov_data_f, cfg_inter, bbs_fuzz, map_fuzz_to_obs_node, nodenames
)
print(f"{cov_data_o.shape=}")
sum_data_o = np.sum(cov_data_o, axis=0)
print(f"{sum_data_o.shape=}")


In [None]:
order_data = {idx: [] for idx in range(len(covs_trials_f[0][0]))}
last_found_cnt = {}
for covs_trial in covs_trials_f:
    order_trial, last_found = order_found(covs_trial, observable_ids_f, consider_remaining=False)
    for idx in order_trial:
        if np.isnan(order_trial[idx]):
            continue
        order_data[idx].append(order_trial[idx])
    for idx in last_found:
        if idx not in last_found_cnt:
            last_found_cnt[idx] = 0
        last_found_cnt[idx] += 1
for idx, orders in order_data.items():
    if idx <= 10:
        print(f"{idx}: {np.nanmean(orders):.2f} ({orders[:5]}...)")
    order_data[idx] = np.nanmean(order_data[idx])
    if np.isnan(order_data[idx]):
        order_data[idx] = 0

order_df = pd.DataFrame(order_data, index=[0]).T
display(order_df.sort_values(by=0, ascending=False).head(10))

last_found_df = pd.DataFrame(last_found_cnt, index=[0])
display(last_found_df.T.sort_values(by=0, ascending=False).head(10))

In [None]:
ids_of_interest_f = [118, 95]
for idx_f in ids_of_interest_f:
    print(
        f"map_fuzz_to_obs_node[{idx_f}]={map_fuzz_to_obs_node[idx_f]}"
    )


# Replace

In [None]:
source_dir = "fuzz-data/ft_data/source_code/replace"
source_name = "replace_fuzz"

dotfiles = [
    fname
    for fname in os.listdir(source_dir)
    if fname.startswith(".") and fname.endswith(".dot")
]
functions = sorted([fname.split(".")[1] for fname in dotfiles])

bbs_fuzz_path = "fuzz-data/replace_exp_2/replace_01/replace_aflpp_run_01/ft_replace.json"
bbs_fuzz = get_bbs_fuzz(bbs_fuzz_path)

debug_info_dict, blocks_dict = parse_ll(
    os.path.join(source_dir, f"{source_name}.ll")
)
cfgs_intra, node_to_bb = parse_dotfiles(
    dotfiles,
    source_dir,
    debug_info_dict,
    blocks_dict,
    non_overlap_lineidx=False,
)

cfg_inter = gen_cfg_inter(cfgs_intra)
graph = Graph(cfg_inter)

map_fuzz_to_obs_node, map_obs_to_fuzz_node = get_mapping(
    functions, bbs_fuzz, node_to_bb, cfg_inter, debug=True
)


In [None]:
projname = "replace"
runs_path = "fuzz-data/replace_exp_2"
run_paths = get_run_paths(runs_path, projname)
covs_trials_f = []
for run_path in run_paths:
    for trial_path in get_trial_paths(run_path):
        covs_trial = get_covs_trial(trial_path)
        covs = [cov for cov in covs_trial if max(cov) > 0]
        covs_trials_f.append(covs)
print(f"number of trials: {len(covs_trials_f)}")

In [None]:
covs_f = []
for covs in covs_trials_f:
    covs_f.extend(covs)
covs_f = np.array(covs_f)
print(f"{covs_f.shape=}")
cov_maxs = np.max(covs_f, axis=1)
print(f"{cov_maxs.shape=}")
get_statistics(cov_maxs)


In [None]:
in_between_range = [90.0 <= cov_max_f <= 609.0 for cov_max_f in cov_maxs]
cov_data_f = covs_f[in_between_range]
print(f"{cov_data_f.shape=}")
sum_data_f = np.sum(cov_data_f, axis=0)
print(f"{sum_data_f.shape=}")
min_obs = np.min(sum_data_f[sum_data_f > 0])
print(f"{min_obs=}")
num_obss = cov_maxs[in_between_range]
total_obs = np.sum(num_obss)
print(f"{total_obs=}")


In [None]:
nodenames = list(cfg_inter["nodes"].keys())
cov_data_o = fuzzdat_to_obs(
    cov_data_f, cfg_inter, bbs_fuzz, map_fuzz_to_obs_node, nodenames
)
print(f"{cov_data_o.shape=}")
sum_data_o = np.sum(cov_data_o, axis=0)
print(f"{sum_data_o.shape=}")


In [None]:
observable_ids_f = {i for i in range(len(sum_data_f)) if sum_data_f[i] > 0}

order_data = {idx: [] for idx in range(len(covs_trials_f[0][0]))}
last_found_cnt = {}
for covs_trial in covs_trials_f:
    order_trial, last_found = order_found(covs_trial, observable_ids_f, consider_remaining=False)
    for idx in order_trial:
        if np.isnan(order_trial[idx]):
            continue
        order_data[idx].append(order_trial[idx])
    for idx in last_found:
        if idx not in last_found_cnt:
            last_found_cnt[idx] = 0
        last_found_cnt[idx] += 1
for idx, orders in order_data.items():
    if idx <= 10:
        print(f"{idx}: {np.nanmean(orders):.2f} ({orders[:5]}...)")
    order_data[idx] = np.nanmean(order_data[idx])
    if np.isnan(order_data[idx]):
        order_data[idx] = 0

order_df = pd.DataFrame(order_data, index=[0]).T
display(order_df.sort_values(by=0, ascending=False).head(10))

last_found_df = pd.DataFrame(last_found_cnt, index=[0])
display(last_found_df.T.sort_values(by=0, ascending=False).head(10))

In [None]:
ids_of_interest_f = [31, 32, 29, 30, 33, 28, 12, 10, 23, 24]
for idx_f in ids_of_interest_f:
    print(
        f"map_fuzz_to_obs_node[{idx_f}]={map_fuzz_to_obs_node[idx_f]}"
    )
