Skip to content

Commit

Permalink
Add grammar analyzer functionality to grammarinator-process
Browse files Browse the repository at this point in the history
This new funcionality helps identifying cycles in the graph and it
looks for unavailable rules from a user-defined start rule.
Furthermore, it finds the farthest rule from a starting rule
- this can be predefined or the first parser rule. This information
can be useful by generation when parametrizing the max-depth argument.
The node classes representing the grammar tree are extended with
__str__ and print_tree methods.
  • Loading branch information
renatahodovan committed Mar 18, 2023
1 parent a4fff1d commit 490dc1e
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 26 deletions.
2 changes: 1 addition & 1 deletion examples/fuzzer/HTMLGenerator.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by Grammarinator 19.3.post125+g9f16403.d20230306
# Generated by Grammarinator 19.3.post125+geecdcb7.d20230311

import itertools

Expand Down
153 changes: 129 additions & 24 deletions grammarinator/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,26 @@ def __init__(self, id=None):
self.out_neighbours = []
self.min_depth = inf

def print_tree(self):
def _walk(node):
nonlocal indent
print(f'{" " * indent}{str(node)}{"" if node not in visited else " (...recursion)"}')
if node in visited:
return

visited.add(node)
indent += 1
for child in node.out_neighbours:
_walk(child)
indent -= 1

visited = set()
indent = 0
_walk(self)

def __str__(self):
return f'cls: {self.__class__.__name__}'


class RuleNode(Node):

Expand All @@ -50,6 +70,9 @@ def __init__(self, name, label, type):
self.type = type
self.has_var = False

def __str__(self):
return f'{super().__str__()}; name: {self.name}; var: {self.has_var}'


class UnlexerRuleNode(RuleNode):

Expand All @@ -76,14 +99,24 @@ def __init__(self, src):
super().__init__()
self.src = src

def __str__(self):
return f'{super().__str__()}; src: {self.src}'


class CharsetNode(Node):

def __init__(self, idx, charset):
def __init__(self, rule_id, idx, charset):
super().__init__()
# Identifier of the container rule.
self.rule_id = rule_id
# Index of the charset inside the current rule.
self.idx = idx
# Global identifier of the charset.
self.charset = charset

def __str__(self):
return f'{super().__str__()}; idx: {self.idx}; charset: {self.charset}'


class LambdaNode(Node):

Expand All @@ -93,8 +126,11 @@ def __init__(self):

class AlternationNode(Node):

def __init__(self, idx, conditions):
def __init__(self, rule_id, idx, conditions):
super().__init__()
# Identifier of the container rule.
self.rule_id = rule_id
# Index of the alternation in the container rule.
self.idx = idx
self.conditions = conditions

Expand Down Expand Up @@ -122,35 +158,59 @@ def simple_alternatives(self):

return simple_lits, simple_rules

def __str__(self):
return f'{super().__str__()}; idx: {self.idx}; cond: {", ".join(self.conditions)}'


class AlternativeNode(Node):

def __init__(self):
def __init__(self, rule_id, alt_id, idx):
super().__init__()
# Identifier of the container rule.
self.rule_id = rule_id
# Identifier of the container alternation.
self.alt_id = alt_id
# Index of the alternative in the container alternation.
self.idx = idx

def __str__(self):
return f'{super().__str__()}; idx: {self.idx}'


class QuantifierNode(Node):

def __init__(self, idx, min, max):
def __init__(self, rule_id, idx, min, max):
super().__init__()
# Identifier of the container rule.
self.rule_id = rule_id
# Index of the quantifier in the container alternation.
self.idx = idx
self.min = min
self.max = max

def __str__(self):
return f'{super().__str__()}; idx: {self.idx}; min: {self.min}; max: {self.max}'


class ActionNode(Node):

def __init__(self, src):
super().__init__()
self.src = src

def __str__(self):
return f'{super().__str__()}; src: {self.src}'


class VariableNode(Node):

def __init__(self, name):
super().__init__()
self.name = name

def __str__(self):
return f'{super().__str__()}; name: {self.name}'


def printable_ranges(lower_bound, upper_bound):
ranges = []
Expand Down Expand Up @@ -229,6 +289,10 @@ def rules(self):
def imag_rules(self):
return (vertex for vertex in self.vertices.values() if isinstance(vertex, ImagRuleNode))

def print_tree(self, root=None):
assert root or self.default_rule, 'Either `root` must be defined or `print` should be called after `default_rule` is set.'
(root or self.vertices[self.default_rule]).print_tree()

def add_node(self, node):
self.vertices[node.id] = node
return node.id
Expand All @@ -244,33 +308,68 @@ def calc_min_depths(self):

while changed:
changed = False
for ident in self.vertices:
selector = min if isinstance(self.vertices[ident], AlternationNode) else max
min_depth = selector((min_depths[node.id] + int(isinstance(self.vertices[node.id], RuleNode))
for node in self.vertices[ident].out_neighbours if not isinstance(node, QuantifierNode) or node.min >= 1), default=0)
for ident, node in self.vertices.items():
selector = min if isinstance(node, AlternationNode) else max
min_depth = selector((min_depths[out_node.id] + int(isinstance(out_node, RuleNode))
for out_node in node.out_neighbours if not isinstance(out_node, QuantifierNode) or out_node.min >= 1), default=0)

if min_depth < min_depths[ident]:
min_depths[ident] = min_depth
changed = True

# Lift the minimal depths of the alternatives to the alternations, where the decision will happen.
inf_alt = []
for ident in min_depths:
if isinstance(self.vertices[ident], AlternationNode):
assert all(min_depths[node.id] < inf for node in self.vertices[ident].out_neighbours), f'{ident!r} has an alternative that is not reachable.'
for node in self.vertices[ident].out_neighbours:
if min_depths[node.id] == inf:
# Generate human-readable description for an alternative in the graph. The output is a
# (rule node, alternation node, alternative node) string, where `rule` defines the container
# rule and the (alternation node, alternative node) sequence defines a derivation reaching the alternative.
inf_alt.append(', '.join(list(map(str, [self.vertices[node.rule_id], self.vertices[node.alt_id], node]))))
min_depths[ident] = [min_depths[node.id] for node in self.vertices[ident].out_neighbours]
if inf_alt:
logger.warning('Alternative(s) with infinite derivation (rule node, alternation node, alternative node):\n\t%s', ',\n\t'.join(inf_alt))

# Remove the lifted Alternatives and check for infinite derivations.
inf_rule = []
for ident in list(min_depths.keys()):
if isinstance(self.vertices[ident], AlternativeNode):
del min_depths[ident]
else:
assert min_depths[ident] != inf, f'Rule with infinite derivation {ident!r}'
elif min_depths[ident] == inf and isinstance(self.vertices[ident], RuleNode):
inf_rule.append(ident)
if inf_rule:
logger.warning('Rule(s) with infinite derivation (possible cycles): %s', ', '.join(inf_rule))

for ident, min_depth in min_depths.items():
self.vertices[ident].min_depth = min_depth

# Calculates the distance of every rule node from the start node. As a result, it can
# point out to rules, that are not available from there, furthermore it can give a hint
# about the farthest node/rule to help to determine a max_depth that has the chance to
# reach every rule.
def analyze(self):
min_distances = defaultdict(lambda: inf)
min_distances[self.default_rule] = 0

def build_graph(actions, lexer_root, parser_root):
work_list = [self.default_rule]
while work_list:
v = work_list.pop(0)
for out_v in self.vertices[v].out_neighbours:
d = min_distances[v] + int(isinstance(out_v, RuleNode))
if d < min_distances[out_v.id]:
min_distances[out_v.id] = d
work_list.append(out_v.id)

farthest_ident, max_distance = max(((v, d) for v, d in min_distances.items() if isinstance(self.vertices[v], RuleNode)), key=lambda item: item[1])
logger.info('\tDistance to the farthest rule (%s): %d', farthest_ident, max_distance)

unreachable_rules = [v_id for v_id, v in self.vertices.items() if isinstance(v, RuleNode) and min_distances[v_id] == inf]
if unreachable_rules:
logger.warning('\t%d rules unreachable from %s: %s', len(unreachable_rules), self.default_rule, ', '.join(unreachable_rules))


def build_graph(actions, lexer_root, parser_root, default_rule):

def find_conditions(node):
if not actions:
Expand Down Expand Up @@ -440,12 +539,12 @@ def build_expr(node, parent_id):
return

nonlocal alt_idx
alt_id = graph.add_node(AlternationNode(idx=alt_idx, conditions=[find_conditions(child) for child in children]))
alt_id = graph.add_node(AlternationNode(idx=alt_idx, conditions=[find_conditions(child) for child in children], rule_id=rule.name))
alt_idx += 1
graph.add_edge(frm=parent_id, to=alt_id)

for child in children:
alternative_id = graph.add_node(AlternativeNode())
for i, child in enumerate(children):
alternative_id = graph.add_node(AlternativeNode(rule_id=rule.name, alt_id=graph.vertices[alt_id].idx, idx=i))
graph.add_edge(frm=alt_id, to=alternative_id)
build_expr(child, alternative_id)

Expand Down Expand Up @@ -488,7 +587,7 @@ def build_expr(node, parent_id):
nonlocal quant_idx
suffix = str(suffix.children[0])
quant_ranges = {'?': {'min': 0, 'max': 1}, '*': {'min': 0, 'max': 'inf'}, '+': {'min': 1, 'max': 'inf'}}
quant_id = graph.add_node(QuantifierNode(idx=quant_idx, **quant_ranges[suffix]))
quant_id = graph.add_node(QuantifierNode(rule_id=rule.name, idx=quant_idx, **quant_ranges[suffix]))
quant_idx += 1
graph.add_edge(frm=parent_id, to=quant_id)
build_expr(node.children[0], quant_id)
Expand All @@ -507,7 +606,7 @@ def build_expr(node, parent_id):
nonlocal chr_idx

if node.DOT():
graph.add_edge(frm=parent_id, to=graph.add_node(CharsetNode(idx=chr_idx, charset=dot_charset.id)))
graph.add_edge(frm=parent_id, to=graph.add_node(CharsetNode(rule_id=rule.name, idx=chr_idx, charset=dot_charset.id)))
chr_idx += 1

elif node.notSet():
Expand All @@ -520,7 +619,7 @@ def build_expr(node, parent_id):

charset = Charset(multirange_diff(dot_charset.ranges, sorted(options, key=lambda x: x[0])))
graph.charsets.append(charset)
graph.add_edge(frm=parent_id, to=graph.add_node(CharsetNode(idx=chr_idx, charset=charset.id)))
graph.add_edge(frm=parent_id, to=graph.add_node(CharsetNode(rule_id=rule.name, idx=chr_idx, charset=charset.id)))
chr_idx += 1

elif isinstance(node, ANTLRv4Parser.LexerAtomContext) and node.characterRange():
Expand All @@ -530,7 +629,7 @@ def build_expr(node, parent_id):

charset = Charset([(start, end)])
graph.charsets.append(charset)
graph.add_edge(frm=parent_id, to=graph.add_node(CharsetNode(idx=chr_idx, charset=charset.id)))
graph.add_edge(frm=parent_id, to=graph.add_node(CharsetNode(rule_id=rule.name, idx=chr_idx, charset=charset.id)))
chr_idx += 1

elif isinstance(node, ANTLRv4Parser.LexerAtomContext) and node.LEXER_CHAR_SET():
Expand All @@ -540,7 +639,7 @@ def build_expr(node, parent_id):

charset = Charset(sorted(ranges, key=lambda x: x[0]))
graph.charsets.append(charset)
graph.add_edge(frm=parent_id, to=graph.add_node(CharsetNode(idx=chr_idx, charset=charset.id)))
graph.add_edge(frm=parent_id, to=graph.add_node(CharsetNode(rule_id=rule.name, idx=chr_idx, charset=charset.id)))
chr_idx += 1

for child in node.children:
Expand Down Expand Up @@ -632,7 +731,9 @@ def build_rules(node):
for rule_args in generator_rules:
build_rule(*rule_args)

if node.grammarDecl().grammarType().PARSER() or not (node.grammarDecl().grammarType().LEXER() or node.grammarDecl().grammarType().PARSER()):
if default_rule:
graph.default_rule = default_rule
elif node.grammarDecl().grammarType().PARSER() or not (node.grammarDecl().grammarType().LEXER() or node.grammarDecl().grammarType().PARSER()):
graph.default_rule = generator_rules[0][0].name

graph = GrammarGraph()
Expand Down Expand Up @@ -704,12 +805,13 @@ def __init__(self, lang, work_dir=None):
self.template = env.from_string(get_data(__package__, 'resources/codegen/GeneratorTemplate.' + lang + '.jinja').decode('utf-8'))
self.work_dir = work_dir or getcwd()

def generate_fuzzer(self, grammars, *, options=None, encoding='utf-8', lib_dir=None, actions=True, pep8=False):
def generate_fuzzer(self, grammars, *, options=None, default_rule=None, encoding='utf-8', lib_dir=None, actions=True, pep8=False):
"""
Generates fuzzers from grammars.
:param grammars: List of grammar files to generate from.
:param options: Dictionary of options that override/extend those set in the grammar.
:param default_rule: Name of the default rule to start generation from.
:param encoding: Grammar file encoding.
:param lib_dir: Alternative directory to look for imports.
:param actions: Boolean to enable or disable grammar actions.
Expand All @@ -728,8 +830,9 @@ def generate_fuzzer(self, grammars, *, options=None, encoding='utf-8', lib_dir=N
else:
copy(grammar, self.work_dir)

graph = build_graph(actions, lexer_root, parser_root)
graph = build_graph(actions, lexer_root, parser_root, default_rule)
graph.options.update(options or {})
graph.analyze()

src = self.template.render(graph=graph, version=__version__).lstrip()
with open(join(self.work_dir, graph.name + '.' + self.lang), 'w') as f:
Expand Down Expand Up @@ -790,6 +893,8 @@ def execute():
help='language of the generated code (choices: %(choices)s; default: %(default)s)')
parser.add_argument('--no-actions', dest='actions', default=True, action='store_false',
help='do not process inline actions.')
parser.add_argument('--rule', '-r', metavar='NAME',
help='default rule to start generation from (default: the first parser rule)')
parser.add_argument('--encoding', metavar='ENC', default='utf-8',
help='grammar file encoding (default: %(default)s).')
parser.add_argument('--lib', metavar='DIR',
Expand Down Expand Up @@ -818,7 +923,7 @@ def execute():
init_logging()
process_log_level_argument(args, logger)

FuzzerFactory(args.language, args.out).generate_fuzzer(args.grammar, options=options, encoding=args.encoding, lib_dir=args.lib, actions=args.actions, pep8=args.pep8)
FuzzerFactory(args.language, args.out).generate_fuzzer(args.grammar, options=options, default_rule=args.rule, encoding=args.encoding, lib_dir=args.lib, actions=args.actions, pep8=args.pep8)


if __name__ == '__main__':
Expand Down
2 changes: 1 addition & 1 deletion grammarinator/resources/codegen/GeneratorTemplate.py.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ with AlternationContext(self, [{{ node.min_depth | join(', ') }}], [{{ node.cond
[{% for rule in simple_rules %}self.{{ rule }}{% if not loop.last %}, {% endif %}{% endfor %}][choice{{ node.idx }}](parent=current)
{% else %}
{% for child in node.out_neighbours %}
{{ 'if' if loop.index0 == 0 else 'elif' }} choice{{ node.idx }} == {{ loop.index0 }}:
{{ 'if' if loop.index0 == 0 else 'elif' }} choice{{ node.idx }} == {{ child.idx }}:
{{ processNode(child) | indent | indent -}}
{% endfor %}
{% endif %}
Expand Down

0 comments on commit 490dc1e

Please sign in to comment.