This Notebook Contains Routines for Loading and Parsing Autofz log files

In [1]:
# Load required module(s)
import json
import pandas as pd
from io import StringIO
import os
import jinja2

# Create a dataframe for Unique Bugs metric
ub_df = pd.DataFrame([], columns=('benchmark', 'algorithm', 'discrim', 'unique_bugs', 'run_time'))

# Where do we want to start looking for Autofz Logs?
top_dir = "amd64"

# Create a list of files to parse (ref: 
# https://stackoverflow.com/questions/16953842/using-os-walk-to-recursively-traverse-directories-in-python and
# https://stackoverflow.com/questions/1192978/python-get-relative-path-of-all-files-and-subfolders-in-a-directory
#
jsons = []
for root, dirs, files in os.walk(top_dir):
   for file in files:
    if (str(file))[-4:] == "json":
        rel_dir = os.path.relpath(root, '.')
        rel_file = os.path.join(rel_dir, file)
        jsons.append('./'+rel_file)

print(jsons)

['./amd64/target_infocap/ub0/infotocap_2024-04-22-19-41-39.json', './amd64/target_infocap/bitmap-ub0/infotocap_2024-04-22-19-41-12.json', './amd64/target_infocap/0/infotocap_2024-04-12-07-33-39.json', './amd64/target_infocap/7/infotocap_2024-04-12-07-34-06.json', './amd64/target_infocap/6/infotocap_2024-04-12-07-34-06.json', './amd64/target_infocap/1/infotocap_2024-04-12-07-33-54.json', './amd64/target_infocap/8/infotocap_2024-04-12-07-34-18.json', './amd64/target_infocap/ub-bitmap0/infotocap_2024-04-22-19-41-36.json', './amd64/target_infocap/draw/infotocap_2024-04-12-07-34-18.json', './amd64/target_infocap/draw/infotocap_2024-04-22-19-41-36.json', './amd64/target_infocap/draw/infotocap_2024-04-22-19-41-39.json', './amd64/target_infocap/draw/infotocap_2024-04-12-07-34-06.json', './amd64/target_infocap/draw/infotocap_2024-04-22-19-41-12.json', './amd64/target_infocap/4/infotocap_2024-04-12-07-34-03.json', './amd64/target_infocap/3/infotocap_2024-04-12-07-34-03.json', './amd64/target_inf

In [3]:
# Read in Json file(s)

for log in jsons:
    print("Processing: "+log)
    with open(log, "r") as file:
        autofz_log = json.load(file)
    
    benchmark = autofz_log['autofz_args']['target']
    algorithm = autofz_log['algorithm']
    
    if 'discriminator' in autofz_log['autofz_args']:
        discrim = autofz_log['autofz_args']['discriminator']
    else:
        discrim = 'bitmap'
        
    unique_bugs = autofz_log['log'][-1]['global_unique_bugs']['unique_bugs']
    run_time = float(autofz_log['end_time']) - float(autofz_log['start_time'])
    #print("run_time is: ")

    ub_df = pd.concat([ub_df, pd.DataFrame({'benchmark': [benchmark], 
                                            'algorithm': [algorithm], 
                                            'discrim': [discrim], 
                                            'unique_bugs': [unique_bugs], 
                                            'run_time': [run_time]})])

ub_df.describe()

Processing: ./amd64/target_infocap/ub0/infotocap_2024-04-22-19-41-39.json
Processing: ./amd64/target_infocap/bitmap-ub0/infotocap_2024-04-22-19-41-12.json
Processing: ./amd64/target_infocap/0/infotocap_2024-04-12-07-33-39.json
Processing: ./amd64/target_infocap/7/infotocap_2024-04-12-07-34-06.json
Processing: ./amd64/target_infocap/6/infotocap_2024-04-12-07-34-06.json
Processing: ./amd64/target_infocap/1/infotocap_2024-04-12-07-33-54.json
Processing: ./amd64/target_infocap/8/infotocap_2024-04-12-07-34-18.json
Processing: ./amd64/target_infocap/ub-bitmap0/infotocap_2024-04-22-19-41-36.json
Processing: ./amd64/target_infocap/draw/infotocap_2024-04-12-07-34-18.json
Processing: ./amd64/target_infocap/draw/infotocap_2024-04-22-19-41-36.json
Processing: ./amd64/target_infocap/draw/infotocap_2024-04-22-19-41-39.json
Processing: ./amd64/target_infocap/draw/infotocap_2024-04-12-07-34-06.json
Processing: ./amd64/target_infocap/draw/infotocap_2024-04-22-19-41-12.json
Processing: ./amd64/target_in

Unnamed: 0,run_time
count,168.0
mean,83989.471157
std,12105.699397
min,15559.088649
25%,86716.495398
50%,86735.248688
75%,86751.772812
max,86768.035581


In [4]:
# We only care about runs that took 24 hours to run
ub_df = ub_df.loc[ub_df['run_time'] >= 86400]

# We also only care about cases where discriminator is bitmap (for now).
# Get the average number of unique bugs found

av_ub_series = ub_df.loc[ub_df['discrim'] == 'bitmap'].groupby(['benchmark', 'algorithm'])['unique_bugs'].mean()

benchmarks = ub_df['benchmark'].unique()
algorithms = ub_df['algorithm'].unique()

t_df = pd.DataFrame([], columns=(algorithms))

# Reference: https://stackoverflow.com/questions/13842088/set-value-for-particular-cell-in-pandas-dataframe-using-index
for b in benchmarks:
    t_df = pd.concat([t_df, pd.DataFrame({}, index=[b])])
    for a in algorithms:
        try:
            t_df.at[b, a] = av_ub_series[b, a]
        except:
            pass
t_df

Unnamed: 0,autofz,redqueen,afl,lafintel,angora,aflfast,learnafl,mopt,radamsa,fairfuzz
infotocap,4.090909,,,,,,,,,
mp3gain,8.666667,,,,,,,,,
tcpdump,0.153846,,,,,,,,,
exiv2,10.0,14.0,4.0,3.5,10.0,4.0,4.0,4.5,0.0,5.0


In [5]:
# Return it in LaTex format
t_df.to_latex()

'\\begin{tabular}{lllllllllll}\n\\toprule\n & autofz & redqueen & afl & lafintel & angora & aflfast & learnafl & mopt & radamsa & fairfuzz \\\\\n\\midrule\ninfotocap & 4.090909 & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN \\\\\nmp3gain & 8.666667 & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN \\\\\ntcpdump & 0.153846 & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN \\\\\nexiv2 & 10.000000 & 14.000000 & 4.000000 & 3.500000 & 10.000000 & 4.000000 & 4.000000 & 4.500000 & 0.000000 & 5.000000 \\\\\n\\bottomrule\n\\end{tabular}\n'

In [6]:
# We only care about runs that took 24 hours to run
ub_df = ub_df.loc[ub_df['run_time'] >= 86400]

#
# Now, let's look at other discriminators
#
# Get the average number of unique bugs found

av_ub_series = ub_df.loc[ub_df['algorithm'] == 'autofz'].groupby(['benchmark', 'discrim'])['unique_bugs'].mean()

benchmarks = ub_df['benchmark'].unique()
discrims = ub_df['discrim'].unique()

t_df = pd.DataFrame([], columns=(discrims))

# Reference: https://stackoverflow.com/questions/13842088/set-value-for-particular-cell-in-pandas-dataframe-using-index
for b in benchmarks:
    t_df = pd.concat([t_df, pd.DataFrame({}, index=[b])])
    for d in discrims:
        try:
            t_df.at[b, d] = av_ub_series[b, d]
        except:
            pass
t_df

Unnamed: 0,ub,bitmap-ub,bitmap,ub-bitmap
infotocap,4.0,4.0,4.090909,5.0
mp3gain,9.5,9.833333,8.666667,10.0
tcpdump,0.0,0.0,0.153846,1.0
exiv2,,,10.0,


In [7]:
# Return it in LaTex format
t_df.to_latex()

'\\begin{tabular}{lllll}\n\\toprule\n & ub & bitmap-ub & bitmap & ub-bitmap \\\\\n\\midrule\ninfotocap & 4.000000 & 4.000000 & 4.090909 & 5.000000 \\\\\nmp3gain & 9.500000 & 9.833333 & 8.666667 & 10.000000 \\\\\ntcpdump & 0.000000 & 0.000000 & 0.153846 & 1.000000 \\\\\nexiv2 & NaN & NaN & 10.000000 & NaN \\\\\n\\bottomrule\n\\end{tabular}\n'