This Notebook Contains Routines for Loading and Parsing Autofz log files

In [14]:
# Load required module(s)
import json
import pandas as pd
from io import StringIO
import os
import jinja2

# Create a dataframe for Unique Bugs metric
ub_df = pd.DataFrame([], columns=('benchmark', 'algorithm', 'discrim', 'unique_bugs', 'run_time'))

# Where do we want to start looking for Autofz Logs?
top_dir = "arm64"

# Create a list of files to parse (ref: 
# https://stackoverflow.com/questions/16953842/using-os-walk-to-recursively-traverse-directories-in-python and
# https://stackoverflow.com/questions/1192978/python-get-relative-path-of-all-files-and-subfolders-in-a-directory
#
jsons = []
for root, dirs, files in os.walk(top_dir):
   for file in files:
    if (str(file))[-4:] == "json":
        rel_dir = os.path.relpath(root, '.')
        rel_file = os.path.join(rel_dir, file)
        jsons.append('./'+rel_file)

print(jsons)

['./arm64/target_mp3gain/output_algo_autofz-ub/7/mp3gain_2024-05-01-06-47-43.json', './arm64/target_mp3gain/output_algo_autofz-ub/6/mp3gain_2024-04-30-08-37-13.json', './arm64/target_mp3gain/output_algo_autofz-ub/8/mp3gain_2024-05-02-06-53-07.json', './arm64/target_mp3gain/output_algo_autofz-ub/4/mp3gain_2024-04-27-22-16-14.json', './arm64/target_mp3gain/output_algo_autofz-ub/3/mp3gain_2024-04-26-22-10-16.json', './arm64/target_mp3gain/output_algo_autofz-ub/2/mp3gain_2024-04-25-22-04-22.json', './arm64/target_mp3gain/output_algo_autofz-ub/5/mp3gain_2024-04-28-22-22-14.json', './arm64/target_mp3gain/output_algo_autofz-ub-bitmap/7/mp3gain_2024-05-01-06-49-37.json', './arm64/target_mp3gain/output_algo_autofz-ub-bitmap/6/mp3gain_2024-04-30-08-36-26.json', './arm64/target_mp3gain/output_algo_autofz-ub-bitmap/8/mp3gain_2024-05-02-06-54-59.json', './arm64/target_mp3gain/output_algo_autofz-ub-bitmap/4/mp3gain_2024-04-27-22-17-21.json', './arm64/target_mp3gain/output_algo_autofz-ub-bitmap/3/mp3

In [15]:
# Read in Json file(s)

for log in jsons:
    print("Processing: "+log)
    with open(log, "r") as file:
        autofz_log = json.load(file)
    
    benchmark = autofz_log['autofz_args']['target']
    algorithm = autofz_log['algorithm']
    
    if 'discriminator' in autofz_log['autofz_args']:
        discrim = autofz_log['autofz_args']['discriminator']
    else:
        discrim = 'bitmap'
        
    unique_bugs = autofz_log['log'][-1]['global_unique_bugs']['unique_bugs']
    
    try: # We need to test this in case there are incomplete fuzzing logs
        run_time = float(autofz_log['end_time']) - float(autofz_log['start_time'])
        #print("run_time is: ")

        ub_df = pd.concat([ub_df, pd.DataFrame({'benchmark': [benchmark], 
                                                'algorithm': [algorithm], 
                                                'discrim': [discrim], 
                                                'unique_bugs': [unique_bugs], 
                                                'run_time': [run_time]})])
    except:
        pass

ub_df.describe()

Processing: ./arm64/target_mp3gain/output_algo_autofz-ub/7/mp3gain_2024-05-01-06-47-43.json
Processing: ./arm64/target_mp3gain/output_algo_autofz-ub/6/mp3gain_2024-04-30-08-37-13.json
Processing: ./arm64/target_mp3gain/output_algo_autofz-ub/8/mp3gain_2024-05-02-06-53-07.json
Processing: ./arm64/target_mp3gain/output_algo_autofz-ub/4/mp3gain_2024-04-27-22-16-14.json
Processing: ./arm64/target_mp3gain/output_algo_autofz-ub/3/mp3gain_2024-04-26-22-10-16.json


  ub_df = pd.concat([ub_df, pd.DataFrame({'benchmark': [benchmark],


Processing: ./arm64/target_mp3gain/output_algo_autofz-ub/2/mp3gain_2024-04-25-22-04-22.json
Processing: ./arm64/target_mp3gain/output_algo_autofz-ub/5/mp3gain_2024-04-28-22-22-14.json
Processing: ./arm64/target_mp3gain/output_algo_autofz-ub-bitmap/7/mp3gain_2024-05-01-06-49-37.json
Processing: ./arm64/target_mp3gain/output_algo_autofz-ub-bitmap/6/mp3gain_2024-04-30-08-36-26.json
Processing: ./arm64/target_mp3gain/output_algo_autofz-ub-bitmap/8/mp3gain_2024-05-02-06-54-59.json
Processing: ./arm64/target_mp3gain/output_algo_autofz-ub-bitmap/4/mp3gain_2024-04-27-22-17-21.json
Processing: ./arm64/target_mp3gain/output_algo_autofz-ub-bitmap/3/mp3gain_2024-04-26-22-11-24.json
Processing: ./arm64/target_mp3gain/output_algo_autofz-ub-bitmap/2/mp3gain_2024-04-25-22-05-32.json
Processing: ./arm64/target_mp3gain/output_algo_autofz-ub-bitmap/5/mp3gain_2024-04-28-22-23-18.json
Processing: ./arm64/target_mp3gain/1/output_algo_autofz-bitmap-ub/mp3gain_2024-04-24-19-43-36.json
Processing: ./arm64/targ

Unnamed: 0,run_time
count,101.0
mean,79477.259156
std,22347.813375
min,3203.768204
25%,86710.286309
50%,86733.30211
75%,86748.653162
max,86765.539336


In [16]:
# We only care about runs that took 24 hours to run
ub_df = ub_df.loc[ub_df['run_time'] >= 86400]

# We also only care about cases where discriminator is bitmap (for now).
# Get the average number of unique bugs found

av_ub_series = ub_df.loc[ub_df['discrim'] == 'bitmap'].groupby(['benchmark', 'algorithm'])['unique_bugs'].mean()

benchmarks = ub_df['benchmark'].unique()
algorithms = ub_df['algorithm'].unique()

t_df = pd.DataFrame([], columns=(algorithms))

# Reference: https://stackoverflow.com/questions/13842088/set-value-for-particular-cell-in-pandas-dataframe-using-index
for b in benchmarks:
    t_df = pd.concat([t_df, pd.DataFrame({}, index=[b])])
    for a in algorithms:
        try:
            t_df.at[b, a] = av_ub_series[b, a]
        except:
            pass
t_df

Unnamed: 0,autofz,mopt,lafintel,radamsa,fairfuzz,redqueen,aflfast
mp3gain,8.0,,,,,,
infotocap,4.769231,,,,,,
tcpdump,0.375,,,,,,
exiv2,33.5,4.5,4.0,0.5,3.5,18.5,2.0


In [17]:
# Return it in LaTex format
print(t_df.to_latex())

\begin{tabular}{llllllll}
\toprule
 & autofz & mopt & lafintel & radamsa & fairfuzz & redqueen & aflfast \\
\midrule
mp3gain & 8.000000 & NaN & NaN & NaN & NaN & NaN & NaN \\
infotocap & 4.769231 & NaN & NaN & NaN & NaN & NaN & NaN \\
tcpdump & 0.375000 & NaN & NaN & NaN & NaN & NaN & NaN \\
exiv2 & 33.500000 & 4.500000 & 4.000000 & 0.500000 & 3.500000 & 18.500000 & 2.000000 \\
\bottomrule
\end{tabular}



In [18]:
# We only care about runs that took 24 hours to run
ub_df = ub_df.loc[ub_df['run_time'] >= 86400]

#
# Now, let's look at other discriminators
#
# Get the average number of unique bugs found

av_ub_series = ub_df.loc[ub_df['algorithm'] == 'autofz'].groupby(['benchmark', 'discrim'])['unique_bugs'].mean()

benchmarks = ub_df['benchmark'].unique()
discrims = ub_df['discrim'].unique()

t_df = pd.DataFrame([], columns=(discrims))

# Reference: https://stackoverflow.com/questions/13842088/set-value-for-particular-cell-in-pandas-dataframe-using-index
for b in benchmarks:
    t_df = pd.concat([t_df, pd.DataFrame({}, index=[b])])
    for d in discrims:
        try:
            t_df.at[b, d] = av_ub_series[b, d]
        except:
            pass
t_df

Unnamed: 0,ub,ub-bitmap,bitmap-ub,bitmap
mp3gain,7.333333,8.833333,8.0,8.0
infotocap,4.0,3.0,1.0,4.769231
tcpdump,0.0,0.0,0.0,0.375
exiv2,34.0,44.0,27.0,33.5


In [19]:
# Return it in LaTex format
print(t_df.to_latex())

\begin{tabular}{lllll}
\toprule
 & ub & ub-bitmap & bitmap-ub & bitmap \\
\midrule
mp3gain & 7.333333 & 8.833333 & 8.000000 & 8.000000 \\
infotocap & 4.000000 & 3.000000 & 1.000000 & 4.769231 \\
tcpdump & 0.000000 & 0.000000 & 0.000000 & 0.375000 \\
exiv2 & 34.000000 & 44.000000 & 27.000000 & 33.500000 \\
\bottomrule
\end{tabular}

