## Evaluating ConcolicGrammarFuzzer

In this chapter, we will evaluate `ConcolicGrammarFuzzer`, which is our concolic fuzzer that uses grammars.

Our target will be a basic program that mimics an SQL database. We will fuzz this program using the `INVENTORY_GRAMMAR`, both with `ConcolicGrammarFuzzer` and `GrammarCoverageFuzzer`, then we will compare the success of these fuzzers. Namely, we will consider **line coverage**, **branch coverage** and **program depth** reached.

In [None]:
import fuzzingbook_utils

In [None]:
from ConcolicFuzzer import ConcolicGrammarFuzzer, ConcolicTracer
from GrammarCoverageFuzzer import GrammarCoverageFuzzer
from Coverage import Coverage, BranchCoverage

## Target Program

In [None]:
from InformationFlow import INVENTORY_GRAMMAR, SQLException
from ConcolicFuzzer import ConcolicDB

INITIAL_DB = [
    (
        'vehicles',
        (
            {'year': int, 'kind': str, 'company': str, 'model': str},
            [
                {'year': 1997, 'kind': 'van', 'company': 'Ford', 'model': 'E350'},
                {'year': 2000, 'kind': 'car', 'company': 'Mercury', 'model': 'Cougar'},
                {'year': 1999, 'kind': 'car', 'company': 'Chevy', 'model': 'Venture'}
            ]
        )
    ),
    (
        'months',
        (
            {'month': int, 'name': str},
            [
                {'month': 1, 'name': 'jan'},
                {'month': 2, 'name': 'feb'},
                {'month': 3, 'name': 'mar'},
                {'month': 4, 'name': 'apr'},
                {'month': 5, 'name': 'may'},
                {'month': 6, 'name': 'jun'},
                {'month': 7, 'name': 'jul'},
                {'month': 8, 'name': 'aug'},
                {'month': 9, 'name': 'sep'},
                {'month': 10, 'name': 'oct'},
                {'month': 11, 'name': 'nov'},
                {'month': 12, 'name': 'dec'}
            ]
        )
    )
]

def db_select(query):
    global INITIAL_DB
    concolic_db = ConcolicDB()
    concolic_db.db = INITIAL_DB
    return concolic_db.sql(query)

## Fuzzer Runners

In [None]:
def run_grammar_coverage_fuzzer(target, grammar, max_nonterminals, n_fuzz): 
    gcf = GrammarCoverageFuzzer(grammar=grammar, 
                                max_nonterminals=max_nonterminals)
    coverage = set()
    branch_coverage = set()
    sql_exceptions = []
    other_exceptions = []
    
    for _ in range(n_fuzz):
        query = gcf.fuzz()
        with Coverage() as cov, BranchCoverage() as b_cov:
            try:
                target(query)
            except SQLException as e:
                sql_exceptions.append(e)
            except Exception as e:
                other_exceptions.append(e)
            
        coverage |= cov.coverage()
        branch_coverage |= b_cov.coverage()
    
    return coverage, branch_coverage, sql_exceptions, other_exceptions

In [None]:
def run_concolic_grammar_fuzzer(target, grammar, max_nonterminals, n_fuzz):
    cgf = ConcolicGrammarFuzzer(grammar=grammar,
                                max_nonterminals=max_nonterminals)

    tokens_to_prune = ['<value>', '<table>', '<column>', 
                       '<literals>', '<exprs>', '<bexpr>']    
    cgf.prune_tokens(tokens_to_prune)
    
    coverage = set()
    branch_coverage = set()
    sql_exceptions = []
    other_exceptions = []
    
    for _ in range(n_fuzz):
        query = cgf.fuzz()
        with ConcolicTracer() as tracer, \
             Coverage() as cov, \
             BranchCoverage() as b_cov:
            try:
                tracer[target](query)
            except SQLException as e:
                sql_exceptions.append(e)
            except Exception as e:
                other_exceptions.append(e)
                
        cgf.update_grammar(tracer)

        coverage |= cov.coverage()
        branch_coverage |= b_cov.coverage()
            
        
            
    return coverage, branch_coverage, sql_exceptions, other_exceptions

## Reporters

In [None]:
from collections import defaultdict
    
def report_line_coverage(coverage, coverage_functions):
    coverage = {(function, line) for (function, line) in coverage
                if function in coverage_functions}
    n_lines_covered = defaultdict(int)
    n_lines_covered['all functions'] = len(coverage)
    
    for func_name, line in coverage:
        n_lines_covered[func_name] += 1
    
    print('-' * 80)
    print(' {0: <40} {1}'.format('Function Name', 'Number of Lines Covered'))
    print('-' * 80)
    
    sorted_by_n_lines = sorted(n_lines_covered.items(), 
                               key=lambda x: x[1],
                               reverse=True)
    
    for func_name, n_lines in sorted_by_n_lines:
        print(' {0: <40} {1}'.format(func_name, n_lines))
        
    print('-' * 80 + '\n')

In [None]:
def report_exceptions(sql_exceptions, other_exceptions):
    print('-' * 80)
    print(' {0: <40} {1}'.format('Exception Type', 'Number of Occurrences'))
    print('-' * 80)
    print(' {0: <40} {1}'.format('SQLException', len(sql_exceptions)))
    print(' {0: <40} {1}'.format('Other', len(other_exceptions)))    
    print('-' * 80 + '\n')

In [None]:
class CoverageHelper:
    def __init__(self, coverage, branch_coverage, functions):
        self._coverage = coverage
        self._branch_coverage = branch_coverage
        self._functions = functions
        
        self._filtered_coverage = {
            (function, line) for (function, line) in coverage
            if function in functions
        }
        
        self._filtered_branch_coverage = {
            branch for branch in branch_coverage
            #if (branch[0][0] == branch[1][0] and
            #    branch[0][0] in functions)
            if (branch[0][0] in functions and
                branch[1][0] in functions)
        }
    
    def get_functions(self):
        return self._functions
    
    def get_line_coverage(self):
        line_dict = defaultdict(int)
        n_lines_covered = len(self._filtered_coverage)
        for func_name, line in self._filtered_coverage:
            line_dict[func_name] += 1
        return line_dict, n_lines_covered
    
    def get_branch_coverage(self):
        branch_dict = defaultdict(int)
        n_branches_covered = len(self._filtered_branch_coverage)
        for branch in self._filtered_branch_coverage:
            func_name = branch[0][0]
            branch_dict[func_name] += 1
        return branch_dict, n_branches_covered


## Fuzzing Configuration

In [None]:
N_FUZZ = 10
TARGET = db_select
GRAMMAR = INVENTORY_GRAMMAR
MAX_NONTERMINALS = 10
COVERAGE_FUNCTIONS = {
    'sql', 
    'table',
    'do_update', 
    'do_insert', 
    'do_delete', 
    'db_select'
}

## Evaluation

In [None]:
from collections import defaultdict
import matplotlib.pyplot as plt

def compare_coverage(ch1, name1, ch2, name2, functions):
    
    def print_plot_coverage(cov_dict1, total1, cov_dict2, total2, title):
        def print_coverage():
            print_format = '{0: <30} {1: <25} {2: <25}'
            print(print_format.format(title, name1, name2))
            print('-' * 80)
            print(print_format.format('all functions', total1, total2))
            for f in functions:
                print(print_format.format(f, cov_dict1[f], cov_dict2[f]))

        def plot_coverage():
            x_indexes = [i for i in range(len(functions) + 1)]
            x_labels = ['all functions'] + list(functions)

            plt.title(title)
            plt.bar([x-0.1 for x in x_indexes], 
                    [total1] + [cov_dict1[f] for f in functions],  
                    width=0.2, align='center', label=name1)

            plt.bar([x+0.1 for x in x_indexes], 
                    [total2] + [cov_dict2[f] for f in functions],  
                    width=0.2, align='center', label=name2)

            plt.legend(loc='upper right')
            plt.xticks(x_indexes, x_labels, rotation=20)
            plt.show()
            
        print_coverage()
        plot_coverage()
    
    def compare_line_coverage():
        print_plot_coverage(*ch1.get_line_coverage(),
                            *ch2.get_line_coverage(),
                            'Line Coverage')
        
    
    def compare_branch_coverage():
        print_plot_coverage(*ch1.get_branch_coverage(),
                            *ch2.get_branch_coverage(),
                            'Branch Coverage')
        
    compare_line_coverage()
    compare_branch_coverage()

def compare_exceptions(sql1, other1, name1, sql2, other2, name2):
    print()
    print_format = '{0: <30} {1: <25} {2: <25}'
    print(print_format.format('Exception Type', name1, name2))
    print('-' * 80)
    print(print_format.format('sql', len(sql1), len(sql2)))
    print(print_format.format('other', len(other1), len(other2)))

        
def comperative_run(target, grammar, max_nonterminals, n_fuzz, functions):
    cov1, b_cov1, sql1, other1 = run_concolic_grammar_fuzzer(
        target=target, 
        grammar=grammar, 
        max_nonterminals=max_nonterminals, 
        n_fuzz=n_fuzz
    )
    
    cov2, b_cov2, sql2, other2 = run_grammar_coverage_fuzzer(
        target=target, 
        grammar=grammar, 
        max_nonterminals=max_nonterminals, 
        n_fuzz=n_fuzz
    )

    ch1 = CoverageHelper(cov1, b_cov1, functions)
    ch2 = CoverageHelper(cov2, b_cov2, functions)
    
    compare_coverage(ch1, 'ConcolicGrammarFuzzer',
                     ch2, 'GrammarCoverageFuzzer',
                     functions)
    
    compare_exceptions(sql1, other1, 'ConcolicGrammarFuzzer',
                       sql2, other2, 'GrammarCoverageFuzzer',)

In [None]:
comperative_run(
    target=TARGET, 
    grammar=GRAMMAR, 
    max_nonterminals=MAX_NONTERMINALS, 
    n_fuzz=100,
    functions=COVERAGE_FUNCTIONS
)

In [None]:
coverage, sql_exceptions, other_exceptions = run_concolic_grammar_fuzzer(
    target=TARGET, 
    grammar=GRAMMAR, 
    max_nonterminals=MAX_NONTERMINALS, 
    n_fuzz=100
)

report_line_coverage(coverage, COVERAGE_FUNCTIONS)
report_exceptions(sql_exceptions, other_exceptions)

In [None]:
coverage, sql_exceptions, other_exceptions = run_grammar_coverage_fuzzer(
    target=TARGET, 
    grammar=GRAMMAR, 
    max_nonterminals=MAX_NONTERMINALS, 
    n_fuzz=1
)

report_line_coverage(coverage, COVERAGE_FUNCTIONS)
report_exceptions(sql_exceptions, other_exceptions)