# Parse Raw

In [12]:
# %matplotlib inline
import pandas as pd
from glob import glob
from pdb import set_trace
from pathlib import Path
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

# --- Configure seaborn ---
sns.set_style("ticks")
sns.set_context("poster", font_scale=1.75, rc={"lines.linewidth": 4.5})
colors = ["pale red", "windows blue", "green grey", "dusty purple", "amber", "faded green"]
sns.set_palette(sns.xkcd_palette(colors))


def neuzz(path):
    raw_file = path.joinpath("neuzz")
    cov_data = []
    flag = True
    with open(raw_file) as f:
        lines = f.readlines()

    for i, line in enumerate(lines):
        prefix = line.split(" ")[0]
        if prefix == "dry" and flag == True:
            dry_run = int(line.split(" ")[-1].rstrip()[:-1])
            flag = False
        elif prefix == "edge":
            cov_data.append(int(line.split(" ")[-1]) - dry_run)
    
    cov_data.sort()
    numel = len(cov_data)
    time_step = 24 / numel
    cov_dict = {
        "Timestamp": [],
        "fuzzer": [],
        "Coverage": []
    }
    for i in range(numel):
        cov_dict["Timestamp"].append(round(i*time_step, 2))
        cov_dict["fuzzer"].append("Neuzz")
        cov_dict["Coverage"].append(cov_data[i])
    
    return pd.DataFrame(cov_dict)


def nabla(path):
    raw_file = path.joinpath("neuzz_pp")
    cov_data = []
    flag = True
    
    with open(raw_file) as f:
        lines = f.readlines()

    for i, line in enumerate(lines):
        prefix = line.split(" ")[0]
        if prefix == "dry" and flag == True:
            dry_run = int(line.split(" ")[-1].rstrip()[:-1])
            flag = False
            
        elif prefix == "edge":
            cov_data.append(int(line.split(" ")[-1]) - dry_run)
    
    cov_data.sort()
    numel = len(cov_data)
    time_step = 24 / numel
    cov_dict = {
        "Timestamp": [],
        "fuzzer": [],
        "Coverage": []
    }
    for i in range(numel):
        cov_dict["Timestamp"].append(round(i*time_step, 2))
        cov_dict["fuzzer"].append("Nabla")
        cov_dict["Coverage"].append(cov_data[i])
    
    return pd.DataFrame(cov_dict)
        

def afl(path, prefix):
    raw_file = path.joinpath(prefix+"plot_data")
    df = pd.read_csv(raw_file)
    cov = df[" map_size"].apply(lambda x: int(float(x[1:-1])/100*2**16)).tolist()
    cov_data = [ii - cov[0] for ii in cov]

    cov_data.sort()
    numel = len(cov_data)
    time_step = 24 / numel
    cov_dict = {
        "Timestamp": [],
        "fuzzer": [],
        "Coverage": []
    }
    for i in range(numel):
        cov_dict["Timestamp"].append(round(i*time_step, 2))
        if "fast" in prefix:
            cov_dict["fuzzer"].append("AFLFast")
        else:
            cov_dict["fuzzer"].append("AFL")
            
        cov_dict["Coverage"].append(cov_data[i])
    
    return pd.DataFrame(cov_dict)


# ===============================================================================================================
def libjpeg():
    "libjpeg"
    path_to_data = Path.cwd().joinpath('libjpeg')
    cov_neuzz = neuzz(path_to_data)
    cov_nabla = nabla(path_to_data)
    all_cov = pd.concat([cov_nabla, cov_neuzz], axis=0)
    ax = sns.lineplot(x="Timestamp", y="Coverage", hue="fuzzer", style_order=[], data=all_cov)
    ax.legend_.remove()
    ax.set_xlim(0, 24.5)
    ax.xaxis.set_major_locator(ticker.MultipleLocator(6))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(500))
    sns.despine(offset=10, trim=True)
    ax.set_title("libjpeg")
    plt.savefig("libjpeg.pdf", dpi=300, bbox_inches='tight', pad_inches=0)
    plt.clf()

def nm():
    "nm"
    path_to_data = Path.cwd().joinpath('nm')
    cov_neuzz = neuzz(path_to_data)
    cov_nabla = nabla(path_to_data)
    cov_afl = afl(path_to_data, "nm_fuzz_")
    cov_afl_fast = afl(path_to_data, "nm_fast_")
    
    all_cov = pd.concat([cov_nabla, cov_neuzz, cov_afl, cov_afl_fast], axis=0)
    ax = sns.lineplot(x="Timestamp", y="Coverage", hue="fuzzer", data=all_cov)
    ax.legend_.remove()
    ax.set_ylim(all_cov.min()['Coverage'], all_cov.max()['Coverage']+500)
    ax.set_xlim(0, 24.5)
    ax.xaxis.set_major_locator(ticker.MultipleLocator(6))
    sns.despine(offset=10, trim=True)
    ax.set_title("nm")
    plt.savefig("nm.pdf", dpi=300, bbox_inches='tight', pad_inches=0)
    plt.clf()

def libxml():
    "libxml"
    path_to_data = Path.cwd().joinpath('libxml')
    cov_neuzz = neuzz(path_to_data)
    cov_nabla = nabla(path_to_data)
    cov_afl = afl(path_to_data, "libxml_fuzz_")
    cov_afl_fast = afl(path_to_data, "libxml_fast_")
    
    all_cov = pd.concat([cov_nabla, cov_neuzz, cov_afl, cov_afl_fast], axis=0)
    ax = sns.lineplot(x="Timestamp", y="Coverage", hue="fuzzer", data=all_cov)
    ax.legend_.remove()
    ax.set_ylim(all_cov.min()['Coverage'], 1500)
    ax.set_xlim(0, 24.5)
    ax.xaxis.set_major_locator(ticker.MultipleLocator(6))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(500))
    sns.despine(offset=10, trim=True)
    ax.set_title("libxml")
    plt.savefig("libxml.pdf", dpi=300, bbox_inches='tight', pad_inches=0)
    plt.clf()

def strip():
    "strip"
    path_to_data = Path.cwd().joinpath('strip')
    cov_neuzz = neuzz(path_to_data)
    cov_nabla = nabla(path_to_data)
    cov_afl = afl(path_to_data, "strip_fuzz_")
    cov_afl_fast = afl(path_to_data, "strip_fast_")
    
    all_cov = pd.concat([cov_nabla, cov_neuzz, cov_afl, cov_afl_fast], axis=0)
    ax = sns.lineplot(x="Timestamp", y="Coverage", hue="fuzzer", data=all_cov)
    ax.legend_.remove()
    ax.set_ylim(all_cov.min()['Coverage'], 4000)
    ax.set_xlim(0, 24.5)
    ax.xaxis.set_major_locator(ticker.MultipleLocator(6))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1000))
    sns.despine(offset=10, trim=True)
    ax.set_title("strip")
    plt.savefig("strip.pdf", dpi=300, bbox_inches='tight', pad_inches=0)
    plt.clf()

def objdump():
    "objdump"
    path_to_data = Path.cwd().joinpath('objdump')
    cov_neuzz = neuzz(path_to_data)
    cov_nabla = nabla(path_to_data)
    cov_afl = afl(path_to_data, "objdump_fuzz_")
    cov_afl_fast = afl(path_to_data, "objdump_fast_")
    
    all_cov = pd.concat([cov_nabla, cov_neuzz, cov_afl, cov_afl_fast], axis=0)
    ax = sns.lineplot(x="Timestamp", y="Coverage", hue="fuzzer", data=all_cov)
    ax.legend_.remove()
    ax.set_ylim(all_cov.min()['Coverage'], 3000)
    ax.set_xlim(0, 24.5)
    ax.xaxis.set_major_locator(ticker.MultipleLocator(6))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1000))
    sns.despine(offset=10, trim=True)
    ax.set_title("objdump")
    plt.savefig("objdump.pdf", dpi=300, bbox_inches='tight', pad_inches=0)
    plt.clf()

def size():
    "size"
    path_to_data = Path.cwd().joinpath('size')
    cov_neuzz = neuzz(path_to_data)
    cov_nabla = nabla(path_to_data)
    cov_afl = afl(path_to_data, "size_fuzz_")
    cov_afl_fast = afl(path_to_data, "size_fast_")
    
    all_cov = pd.concat([cov_nabla, cov_neuzz, cov_afl, cov_afl_fast], axis=0)
    ax = sns.lineplot(x="Timestamp", y="Coverage", hue="fuzzer", data=all_cov)
    ax.legend_.remove()
    ax.set_ylim(all_cov.min()['Coverage'], 2500)
    ax.set_xlim(0, 24.5)
    ax.xaxis.set_major_locator(ticker.MultipleLocator(6))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(600))
    sns.despine(offset=10, trim=True)
    ax.set_title("size")
    plt.savefig("size.pdf", dpi=300, bbox_inches='tight', pad_inches=0)
    plt.clf()



if __name__ == "__main__":
#     libjpeg()
    libxml()
#     nm()
#     strip()
#     size()
#     objdump()

<Figure size 432x288 with 0 Axes>