# Compute time taken

In [1]:
num_sample_needed_lap = {
    "tcas": 34,
    "schedule2": 47,
    "totinfo": 18,
    "printtokens2": 1,
    "replace": 70,
    "gif2png": 64,
    "jsoncpp": 5,
    "jasper": 77,
    "readelf": 100630,
    "freetype2": 242449,
}

num_sample_needed_gotu = {
    "tcas": 134,
    "schedule2": 88,
    "totinfo": -1,
    "printtokens2": -1,
    "replace": 301,
    "gif2png": 170,
    "jsoncpp": -1,
    "jasper": -1,
    "readelf": -1,
    "freetype2": -1,
}

num_sample_needed_struct = {
    "tcas": 8,
    "schedule2": 9,
    "totinfo": 14,
    "printtokens2": 1,
    "replace": 1,
    "gif2png": 1,
    "jsoncpp": 1,
    "jasper": 32,
    "readelf": 100630,
    "freetype2": 15154,
}

time_for_estimation = {
    "tcas": 2.14e-04,
    "gif2png": 6.06e-03,
    "schedule2": 2.43e-04,
    "jsoncpp": 7.30e-04,
    "totinfo": 9.20e-04,
    "jasper": 2.48e-04,
    "printtokens2": 1.28e-04,
    "readelf": 8.56e-02,
    "replace": 2.74e-04,
    "freetype2": 2.77e-02,
}


In [2]:
# unzip data in fuzz-data dir.

data_dir = {
    "tcas": "fuzz-data/tcas_exp_1",
    "schedule2": "fuzz-data/schedule2_exp_1",
    "totinfo": "fuzz-data/totinfo_exp_1",
    "printtokens2": "fuzz-data/printtokens2_2_exp_1",
    "replace": "fuzz-data/replace_exp_2",
    "gif2png": "fuzz-data/gif2png-fuzzruns",
    "jsoncpp": "fuzz-data/jsoncpp-fuzzruns",
    "jasper": "fuzz-data/jasper-fuzzruns",
    "readelf": "fuzz-data/readelf-fuzzruns",
    "freetype2": "fuzz-data/freetype2-fuzzruns",
}



In [3]:
from sra.dataloader import (
    get_run_paths,
    get_trial_paths,
    get_covs_trial,
    get_cov_cum,
)
import numpy as np
import os
from typing import Tuple


def get_time_data(
    trial_path: str, expected_time_interval: Tuple[int] = (0, 2)
) -> np.ndarray:
    covs_path = os.path.join(trial_path, "coverage")
    cov_files = sorted(
        os.listdir(covs_path),
        key=lambda fname: float(fname.split("_")[-1][:-5]),
    )
    times = [float(fname.split("_")[-1][:-5]) for fname in cov_files]
    max_time = max(times)
    min_time = min(times)
    interval = max_time - min_time
    last_coverage = get_cov_cum(os.path.join(covs_path, cov_files[-1]))
    max_coverage = max(last_coverage)
    return interval, max_coverage
    

In [4]:
import pandas as pd

data = []

for projname in data_dir:
    run_paths = get_run_paths(data_dir[projname], projname)
    run_paths
    covs_trials_f = []
    total_time_interval = 0
    total_execution = 0
    for run_path in run_paths:
        for trial_path in get_trial_paths(run_path):
            time_interval, num_execution = get_time_data(trial_path)
            total_time_interval += time_interval
            total_execution += num_execution

    print(f"[{projname}]")
    print(f"{total_time_interval=}")
    print(f"{total_execution=}")
    print(f"time_per_execution={total_time_interval / total_execution}")
    print()
    lap_time = (
        num_sample_needed_lap[projname] * total_time_interval / total_execution
    )
    if num_sample_needed_gotu[projname] > 0:
        gotu_time = (
            num_sample_needed_gotu[projname]
            * total_time_interval
            / total_execution
        )
    else:
        gotu_time = np.nan
    struct_time = (
        num_sample_needed_struct[projname]
        * total_time_interval
        / total_execution
        + time_for_estimation[projname]
    )
    data.append(
        [
            projname,
            time_for_estimation[projname],
            lap_time,
            gotu_time,
            struct_time,
        ]
    )

df = pd.DataFrame(
    data, columns=["Project", "EstiTime", "Lap", "Gotu", "Struct"]
)
display(df)


[tcas]
total_time_interval=365411.93107008934
total_execution=81086033
time_per_execution=0.004506471923100361

[schedule2]
total_time_interval=365406.4824564457
total_execution=67706371
time_per_execution=0.005396929078600974

[totinfo]
total_time_interval=365406.4659512043
total_execution=55564748
time_per_execution=0.006576228258089181

[printtokens2]
total_time_interval=365406.7864882946
total_execution=80464765
time_per_execution=0.004541202431751271

[replace]
total_time_interval=365405.4494802952
total_execution=83430635
time_per_execution=0.004379751508307412

[gif2png]
total_time_interval=13824202.0
total_execution=2196042978
time_per_execution=0.0062950507519621045

[jsoncpp]
total_time_interval=6912100.0
total_execution=1889499313
time_per_execution=0.0036581648653925707

[jasper]
total_time_interval=13824202.0
total_execution=3154399753
time_per_execution=0.004382514291935402

[readelf]
total_time_interval=13824201.0
total_execution=2883400246
time_per_execution=0.004794409

Unnamed: 0,Project,EstiTime,Lap,Gotu,Struct
0,tcas,0.000214,0.15322,0.603867,0.036266
1,schedule2,0.000243,0.253656,0.47493,0.048815
2,totinfo,0.00092,0.118372,,0.092987
3,printtokens2,0.000128,0.004541,,0.004669
4,replace,0.000274,0.306583,1.318305,0.004654
5,gif2png,0.00606,0.402883,1.070159,0.012355
6,jsoncpp,0.00073,0.018291,,0.004388
7,jasper,0.000248,0.337454,,0.140488
8,readelf,0.0856,482.461409,,482.547009
9,freetype2,0.0277,704.438009,,44.057799


In [5]:
# exponential representation
pd.options.display.float_format = "{:.2e}".format
display(df)
pd.reset_option("display.float_format")

Unnamed: 0,Project,EstiTime,Lap,Gotu,Struct
0,tcas,0.000214,0.153,0.604,0.0363
1,schedule2,0.000243,0.254,0.475,0.0488
2,totinfo,0.00092,0.118,,0.093
3,printtokens2,0.000128,0.00454,,0.00467
4,replace,0.000274,0.307,1.32,0.00465
5,gif2png,0.00606,0.403,1.07,0.0124
6,jsoncpp,0.00073,0.0183,,0.00439
7,jasper,0.000248,0.337,,0.14
8,readelf,0.0856,482.0,,483.0
9,freetype2,0.0277,704.0,,44.1


In [6]:
df.EstiTime.mean(), df.EstiTime.median()

(0.0122117, 0.000502)