This Notebook Contains Routines for Loading and Parsing Autofz log files

In [1]:
# Load required module(s)
import json
import pandas as pd
from io import StringIO
import os
import jinja2

# Create a dataframe for Unique Bugs metric
ub_df = pd.DataFrame([], columns=('benchmark', 'algorithm', 'discrim', 'unique_bugs', 'run_time'))

# Where do we want to start looking for Autofz Logs?
top_dir = "amd64"

# Create a list of files to parse (ref: 
# https://stackoverflow.com/questions/16953842/using-os-walk-to-recursively-traverse-directories-in-python and
# https://stackoverflow.com/questions/1192978/python-get-relative-path-of-all-files-and-subfolders-in-a-directory
#
jsons = []
for root, dirs, files in os.walk(top_dir):
   for file in files:
    if (str(file))[-4:] == "json":
        rel_dir = os.path.relpath(root, '.')
        rel_file = os.path.join(rel_dir, file)
        jsons.append('./'+rel_file)

print(jsons)

['./amd64/target_infocap/ub0/infotocap_2024-04-22-19-41-39.json', './amd64/target_infocap/bitmap-ub0/infotocap_2024-04-22-19-41-12.json', './amd64/target_infocap/0/infotocap_2024-04-12-07-33-39.json', './amd64/target_infocap/7/infotocap_2024-04-12-07-34-06.json', './amd64/target_infocap/6/infotocap_2024-04-12-07-34-06.json', './amd64/target_infocap/1/infotocap_2024-04-12-07-33-54.json', './amd64/target_infocap/8/infotocap_2024-04-12-07-34-18.json', './amd64/target_infocap/ub-bitmap0/infotocap_2024-04-22-19-41-36.json', './amd64/target_infocap/draw/infotocap_2024-04-12-07-34-18.json', './amd64/target_infocap/draw/infotocap_2024-04-22-19-41-36.json', './amd64/target_infocap/draw/infotocap_2024-04-22-19-41-39.json', './amd64/target_infocap/draw/infotocap_2024-04-12-07-34-06.json', './amd64/target_infocap/draw/infotocap_2024-04-22-19-41-12.json', './amd64/target_infocap/4/infotocap_2024-04-12-07-34-03.json', './amd64/target_infocap/3/infotocap_2024-04-12-07-34-03.json', './amd64/target_inf

In [2]:
# Read in Json file(s)

for log in jsons:
    print("Processing: "+log)
    with open(log, "r") as file:
        autofz_log = json.load(file)
    
    benchmark = autofz_log['autofz_args']['target']
    algorithm = autofz_log['algorithm']
    
    if 'discriminator' in autofz_log['autofz_args']:
        discrim = autofz_log['autofz_args']['discriminator']
    else:
        discrim = 'bitmap'
        
    unique_bugs = autofz_log['log'][-1]['global_unique_bugs']['unique_bugs']
    
    try: # We need to test this in case there are incomplete fuzzing logs
        run_time = float(autofz_log['end_time']) - float(autofz_log['start_time'])
        #print("run_time is: ")

        ub_df = pd.concat([ub_df, pd.DataFrame({'benchmark': [benchmark], 
                                                'algorithm': [algorithm], 
                                                'discrim': [discrim], 
                                                'unique_bugs': [unique_bugs], 
                                                'run_time': [run_time]})])
    except:
        pass

ub_df.describe()

Processing: ./amd64/target_infocap/ub0/infotocap_2024-04-22-19-41-39.json
Processing: ./amd64/target_infocap/bitmap-ub0/infotocap_2024-04-22-19-41-12.json
Processing: ./amd64/target_infocap/0/infotocap_2024-04-12-07-33-39.json
Processing: ./amd64/target_infocap/7/infotocap_2024-04-12-07-34-06.json
Processing: ./amd64/target_infocap/6/infotocap_2024-04-12-07-34-06.json
Processing: ./amd64/target_infocap/1/infotocap_2024-04-12-07-33-54.json
Processing: ./amd64/target_infocap/8/infotocap_2024-04-12-07-34-18.json
Processing: ./amd64/target_infocap/ub-bitmap0/infotocap_2024-04-22-19-41-36.json
Processing: ./amd64/target_infocap/draw/infotocap_2024-04-12-07-34-18.json
Processing: ./amd64/target_infocap/draw/infotocap_2024-04-22-19-41-36.json


  ub_df = pd.concat([ub_df, pd.DataFrame({'benchmark': [benchmark],


Processing: ./amd64/target_infocap/draw/infotocap_2024-04-22-19-41-39.json
Processing: ./amd64/target_infocap/draw/infotocap_2024-04-12-07-34-06.json
Processing: ./amd64/target_infocap/draw/infotocap_2024-04-22-19-41-12.json
Processing: ./amd64/target_infocap/4/infotocap_2024-04-12-07-34-03.json
Processing: ./amd64/target_infocap/3/infotocap_2024-04-12-07-34-03.json
Processing: ./amd64/target_infocap/2/infotocap_2024-04-12-07-33-58.json
Processing: ./amd64/target_infocap/5/infotocap_2024-04-12-07-34-05.json
Processing: ./amd64/target_mp3gain/output_algo_autofz-ub/9/mp3gain_2024-05-02-00-34-08.json
Processing: ./amd64/target_mp3gain/output_algo_autofz-ub/7/mp3gain_2024-04-30-00-22-22.json
Processing: ./amd64/target_mp3gain/output_algo_autofz-ub/6/mp3gain_2024-04-29-00-16-10.json
Processing: ./amd64/target_mp3gain/output_algo_autofz-ub/8/mp3gain_2024-05-01-00-28-05.json
Processing: ./amd64/target_mp3gain/output_algo_autofz-ub/4/mp3gain_2024-04-27-00-03-59.json
Processing: ./amd64/target_

Unnamed: 0,run_time
count,97.0
mean,83467.199795
std,14176.665446
min,365.196688
25%,86716.375062
50%,86732.394901
75%,86751.646265
max,86779.601206


In [29]:
# We only care about runs that took 24 hours to run
ub_df = ub_df.loc[ub_df['run_time'] >= 86400]

# We also only care about cases where discriminator is bitmap (for now).
# Get the average number of unique bugs found

av_ub_series = ub_df.loc[ub_df['discrim'] == 'bitmap'].groupby(['benchmark', 'algorithm'])['unique_bugs'].mean()
count_series = ub_df.loc[ub_df['discrim'] == 'bitmap'].groupby(['benchmark', 'algorithm'])['unique_bugs'].sum()

benchmarks = ub_df['benchmark'].unique()
algorithms = ub_df['algorithm'].unique()

t_df = pd.DataFrame([], columns=(algorithms))

# Reference: https://stackoverflow.com/questions/13842088/set-value-for-particular-cell-in-pandas-dataframe-using-index
for b in benchmarks:
    t_df = pd.concat([t_df, pd.DataFrame({}, index=[b])])
    for a in algorithms:
        try:
            t_df.at[b, a] = av_ub_series[b, a]
        except:
            pass
t_df

Unnamed: 0,autofz,redqueen,afl,lafintel,angora,aflfast,learnafl,mopt,radamsa,fairfuzz
infotocap,4.090909,,,,,,,,,
mp3gain,8.375,,,,,,,,,
tcpdump,0.153846,,,,,,,,,
exiv2,10.666667,14.0,4.0,3.5,10.0,4.0,4.0,4.5,0.0,5.0


In [4]:
# Return it in LaTex format
print(t_df.to_latex())

\begin{tabular}{lllllllllll}
\toprule
 & autofz & redqueen & afl & lafintel & angora & aflfast & learnafl & mopt & radamsa & fairfuzz \\
\midrule
infotocap & 4.090909 & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN \\
mp3gain & 8.375000 & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN \\
tcpdump & 0.153846 & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN \\
exiv2 & 10.666667 & 14.000000 & 4.000000 & 3.500000 & 10.000000 & 4.000000 & 4.000000 & 4.500000 & 0.000000 & 5.000000 \\
\bottomrule
\end{tabular}



In [26]:
count_series = ub_df.loc[ub_df['discrim'] == 'bitmap'].groupby(['benchmark', 'algorithm'])['unique_bugs'].sum()

benchmarks = ub_df['benchmark'].unique()
algorithms = ub_df['algorithm'].unique()

t_df = pd.DataFrame([], columns=(algorithms))

# Reference: https://stackoverflow.com/questions/13842088/set-value-for-particular-cell-in-pandas-dataframe-using-index
for b in benchmarks:
    t_df = pd.concat([t_df, pd.DataFrame({}, index=[b])])
    for a in algorithms:
        try:
            t_df.at[b, a] = count_series[b, a]
        except:
            pass
t_df

Unnamed: 0,autofz,redqueen,afl,lafintel,angora,aflfast,learnafl,mopt,radamsa,fairfuzz
infotocap,45,,,,,,,,,
mp3gain,67,,,,,,,,,
tcpdump,2,,,,,,,,,
exiv2,32,14.0,4.0,7.0,10.0,4.0,8.0,9.0,0.0,5.0


In [6]:
# We only care about runs that took 24 hours to run
ub_df = ub_df.loc[ub_df['run_time'] >= 86400]

#
# Now, let's look at other discriminators
#
# Get the average number of unique bugs found

av_ub_series = ub_df.loc[ub_df['algorithm'] == 'autofz'].groupby(['benchmark', 'discrim'])['unique_bugs'].mean()

benchmarks = ub_df['benchmark'].unique()
discrims = ub_df['discrim'].unique()

t_df = pd.DataFrame([], columns=(discrims))

# Reference: https://stackoverflow.com/questions/13842088/set-value-for-particular-cell-in-pandas-dataframe-using-index
for b in benchmarks:
    t_df = pd.concat([t_df, pd.DataFrame({}, index=[b])])
    for d in discrims:
        try:
            t_df.at[b, d] = av_ub_series[b, d]
        except:
            pass
t_df

Unnamed: 0,ub,bitmap-ub,bitmap,ub-bitmap
infotocap,4.0,4.0,4.090909,5.0
mp3gain,10.0,9.625,8.375,9.375
tcpdump,0.0,0.0,0.153846,1.0
exiv2,7.0,11.0,10.666667,35.0


In [7]:
# Return it in LaTex format
print(t_df.to_latex())

\begin{tabular}{lllll}
\toprule
 & ub & bitmap-ub & bitmap & ub-bitmap \\
\midrule
infotocap & 4.000000 & 4.000000 & 4.090909 & 5.000000 \\
mp3gain & 10.000000 & 9.625000 & 8.375000 & 9.375000 \\
tcpdump & 0.000000 & 0.000000 & 0.153846 & 1.000000 \\
exiv2 & 7.000000 & 11.000000 & 10.666667 & 35.000000 \\
\bottomrule
\end{tabular}



In [27]:
count_series = ub_df.loc[ub_df['algorithm'] == 'autofz'].groupby(['benchmark', 'discrim'])['unique_bugs'].sum()

benchmarks = ub_df['benchmark'].unique()
discrims = ub_df['discrim'].unique()

t_df = pd.DataFrame([], columns=(discrims))

# Reference: https://stackoverflow.com/questions/13842088/set-value-for-particular-cell-in-pandas-dataframe-using-index
for b in benchmarks:
    t_df = pd.concat([t_df, pd.DataFrame({}, index=[b])])
    for d in discrims:
        try:
            t_df.at[b, d] = count_series[b, d]
        except:
            pass
t_df

Unnamed: 0,ub,bitmap-ub,bitmap,ub-bitmap
infotocap,8,8,45,10
mp3gain,80,77,67,75
tcpdump,0,0,2,3
exiv2,7,11,32,35
