Skip to content

Commit

Permalink
Add serializer functionality to the generator
Browse files Browse the repository at this point in the history
  • Loading branch information
renatahodovan committed Mar 5, 2020
1 parent 1915fe8 commit ecc7d72
Show file tree
Hide file tree
Showing 8 changed files with 84 additions and 77 deletions.
15 changes: 11 additions & 4 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,17 @@ language that can be placed basically anywhere without breaking the syntax. The
most common examples are comments or whitespaces. However, when using these
grammars - which don't define explicitly where whitespace may or may not appear
in rules - to generate test cases, we have to insert the missing spaces
manually. This can be done by applying various transformers (with the ``-t``
option) to the tree representation of the output tests. A simple transformer -
manually. This can be done by applying a serializer (with the ``-s``
option) to the tree representation of the output tests. A simple serializer -
that inserts a space after every unparser rule - is provided by grammarinator
(``grammarinator.runtime.simple_space_transformer``).
(``grammarinator.runtime.simple_space_serializer``).

In some cases, we may want to postprocess the output tree itself (without
serializing it). For example, to enforce some logic that cannot be
expressed by a context-free grammar. For this purpose the transformer mechanism
can be used (with the ``-t`` option). Similarly to the serializers, it will
take a tree as input, but instead of creating a string representation, it is
expected to return the modified (transformed) tree object.

As a final thought, one must not forget that the original purpose of grammars
is the syntax-wise validation of various inputs. As a consequence, these
Expand All @@ -123,7 +130,7 @@ a try, run the processor first::
Then, use the generator to produce test cases::

grammarinator-generate HTMLCustomGenerator.HTMLCustomGenerator -r htmlDocument \
-o examples/tests/test_%d.html -t HTMLGenerator.html_space_transformer -n 100 -d 20 --sys-path examples/fuzzer/
-o examples/tests/test_%d.html -s HTMLGenerator.html_space_serializer -n 100 -d 20 --sys-path examples/fuzzer/

.. _example: examples/

Expand Down
32 changes: 17 additions & 15 deletions examples/fuzzer/HTMLGenerator.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by Grammarinator 19.3+21.gdbf52d7
# Generated by Grammarinator 19.3+28.g12d4f7f

from itertools import chain
from math import inf
Expand All @@ -16,22 +16,24 @@
charset_9 = list(chain(*multirange_diff(printable_unicode_ranges, [(34, 35), (60, 61)])))
charset_10 = list(chain(*multirange_diff(printable_unicode_ranges, [(39, 40), (60, 61)])))

def html_space_transformer(node):
def html_space_serializer(root):

for child in node.children:
html_space_transformer(child)

if isinstance(node, UnparserRule):
new_children = []
def _walk(node):
nonlocal src
for child in node.children:
new_children.append(child)
if child.name == 'htmlTagName' and child.right_sibling and child.right_sibling.name == 'htmlAttribute' \
or child.name == 'htmlAttribute' \
or isinstance(child, UnlexerRule) and child.src and child.src.endswith(('<script', '<style', '<?xml')):
new_children.append(UnlexerRule(src=' '))
node.children = new_children

return node
_walk(child)

if isinstance(node, UnlexerRule) and node.src:
src += node.src

if (isinstance(node, UnparserRule) and
node.name == 'htmlTagName' and node.right_sibling and node.right_sibling.name == 'htmlAttribute' or node.name == 'htmlAttribute') \
or isinstance(node, UnlexerRule) and node.src and node.src.endswith(('<script', '<style', '<?xml')):
src += ' '

src = ''
_walk(root)
return src



Expand Down
34 changes: 18 additions & 16 deletions examples/grammars/HTMLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -27,31 +27,33 @@
*/

// TEST-PROCESS: {grammar}Parser.g4 {grammar}Lexer.g4 -o {tmpdir}
// TEST-GENERATE: {grammar}Generator.{grammar}Generator -r htmlDocument -t {grammar}Generator.html_space_transformer -n 5 -o {tmpdir}/{grammar}G%d.html
// TEST-GENERATE: {grammar}CustomGenerator.{grammar}CustomGenerator -r htmlDocument -t {grammar}Generator.html_space_transformer -n 5 -o {tmpdir}/{grammar}C%d.html --sys-path ../fuzzer/
// TEST-GENERATE: {grammar}Generator.{grammar}Generator -r htmlDocument -s {grammar}Generator.html_space_serializer -n 5 -o {tmpdir}/{grammar}G%d.html
// TEST-GENERATE: {grammar}CustomGenerator.{grammar}CustomGenerator -r htmlDocument -s {grammar}Generator.html_space_serializer -n 5 -o {tmpdir}/{grammar}C%d.html --sys-path ../fuzzer/

parser grammar HTMLParser;

options { tokenVocab=HTMLLexer;
dot=any_unicode_char;}

@header {
def html_space_transformer(node):
def html_space_serializer(root):
for child in node.children:
html_space_transformer(child)
if isinstance(node, UnparserRule):
new_children = []
def _walk(node):
nonlocal src
for child in node.children:
new_children.append(child)
if child.name == 'htmlTagName' and child.right_sibling and child.right_sibling.name == 'htmlAttribute' \
or child.name == 'htmlAttribute' \
or isinstance(child, UnlexerRule) and child.src and child.src.endswith(('<script', '<style', '<?xml')):
new_children.append(UnlexerRule(src=' '))
node.children = new_children
return node
_walk(child)
if isinstance(node, UnlexerRule) and node.src:
src += node.src
if (isinstance(node, UnparserRule) and
node.name == 'htmlTagName' and node.right_sibling and node.right_sibling.name == 'htmlAttribute' or node.name == 'htmlAttribute') \
or isinstance(node, UnlexerRule) and node.src and node.src.endswith(('<script', '<style', '<?xml')):
src += ' '
src = ''
_walk(root)
return src
}

Expand Down
26 changes: 9 additions & 17 deletions grammarinator/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class Generator(object):
def __init__(self, generator, rule, out_format,
model=None, max_depth=inf, cooldown=1.0,
population=None, generate=True, mutate=True, recombine=True, keep_trees=False,
tree_transformers=None, test_transformers=None,
transformers=None, serializer=None,
cleanup=True, encoding='utf-8'):

def import_entity(name):
Expand All @@ -67,6 +67,8 @@ def get_boolean(value):

self.generator_cls = import_entity(generator)
self.model_cls = import_entity(model or 'grammarinator.model.DefaultModel')
self.transformers = import_list(transformers)
self.serializer = import_entity(serializer) if serializer else str
self.rule = rule or self.generator_cls.default_rule.__name__

out_dir = abspath(dirname(out_format))
Expand All @@ -89,9 +91,6 @@ def get_boolean(value):
self.cleanup = get_boolean(cleanup)
self.encoding = encoding

self.tree_transformers = import_list(tree_transformers)
self.test_transformers = import_list(test_transformers)

def __enter__(self):
return self

Expand Down Expand Up @@ -122,7 +121,7 @@ def create_new_test(self, index):
return self.create_new_test(index)

test_fn = self.out_format % index
tree.root = Generator.transform(tree.root, self.tree_transformers)
tree.root = Generator.transform(tree.root, self.transformers)

tree_fn = None
if self.keep_trees:
Expand All @@ -131,15 +130,10 @@ def create_new_test(self, index):
tree.save(tree_fn)

with codecs.open(test_fn, 'w', self.encoding) as f:
f.write(str(Generator.transform(tree.root, self.test_transformers)))
f.write(self.serializer(tree.root))

return test_fn, tree_fn

def serialize(self, tree):
tree.root = Generator.transform(tree.root, self.tree_transformers)
tree.root = Generator.transform(tree.root, self.test_transformers)
return str(tree.root)

@staticmethod
def transform(root, transformers):
for transformer in transformers:
Expand Down Expand Up @@ -220,13 +214,11 @@ def restricted_float(value):
help='name of the rule to start generation from (default: first parser rule).')
parser.add_argument('-m', '--model', metavar='NAME', default='grammarinator.model.DefaultModel',
help='reference to the decision model (in package.module.class format) (default: %(default)s).')
parser.add_argument('-t', '--tree-transformer', metavar='NAME', action='append', default=[],
parser.add_argument('-t', '--transformer', metavar='NAME', action='append', default=[],
help='reference to a transformer (in package.module.function format) to postprocess the generated tree '
'(the result of these transformers will be saved into the serialized tree, e.g., variable matching).')
parser.add_argument('--test-transformer', metavar='NAME', action='append', default=[],
help='reference to a transformer (in package.module.function format) to postprocess the generated tree '
'(the result of these transformers will only affect test serialization but won\'t be saved to the '
'tree representation, e.g., space insertion).')
parser.add_argument('-s', '--serializer', metavar='NAME',
help='reference to a seralizer (in package.module.function format) that takes a tree and produces a string from it.')
parser.add_argument('-d', '--max-depth', default=inf, type=int, metavar='NUM',
help='maximum recursion depth during generation (default: %(default)f).')
parser.add_argument('-c', '--cooldown', default=1.0, type=restricted_float, metavar='NUM',
Expand Down Expand Up @@ -276,7 +268,7 @@ def restricted_float(value):
with Generator(generator=args.generator, rule=args.rule, out_format=args.out,
model=args.model, max_depth=args.max_depth, cooldown=args.cooldown,
population=args.population, generate=args.generate, mutate=args.mutate, recombine=args.recombine, keep_trees=args.keep_trees,
tree_transformers=args.tree_transformer, test_transformers=args.test_transformer,
transformers=args.transformer, serializer=args.serializer,
cleanup=False, encoding=args.encoding) as generator:
if args.jobs > 1:
with Pool(args.jobs) as pool:
Expand Down
4 changes: 2 additions & 2 deletions grammarinator/runtime/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# according to those terms.

from .generator import depthcontrol, Generator, multirange_diff, printable_ascii_ranges, printable_unicode_ranges
from .transformer import *
from .serializer import *
from .tree import BaseRule, Tree, UnlexerRule, UnparserRule

__all__ = [
Expand All @@ -16,7 +16,7 @@
'multirange_diff',
'printable_ascii_ranges',
'printable_unicode_ranges',
'simple_space_transformer',
'simple_space_serializer',
'Tree',
'UnlexerRule',
'UnparserRule',
Expand Down
26 changes: 26 additions & 0 deletions grammarinator/runtime/serializer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright (c) 2017-2020 Renata Hodovan, Akos Kiss.
#
# Licensed under the BSD 3-Clause License
# <LICENSE.rst or https://opensource.org/licenses/BSD-3-Clause>.
# This file may not be copied, modified, or distributed except
# according to those terms.

from .tree import *


def simple_space_serializer(root):

def _walk(node):
nonlocal src
for child in node.children:
_walk(child)

if isinstance(node, UnparserRule):
src += ' '

if isinstance(node, UnlexerRule) and node.src:
src += node.src

src = ''
_walk(root)
return src
22 changes: 0 additions & 22 deletions grammarinator/runtime/transformer.py

This file was deleted.

2 changes: 1 addition & 1 deletion tests/grammars/Whitespace.g4
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
*/

// TEST-PROCESS: {grammar}.g4 -o {tmpdir}
// TEST-GENERATE: {grammar}Generator.{grammar}Generator -r start -t grammarinator.runtime.simple_space_transformer -o {tmpdir}/{grammar}%d.txt
// TEST-GENERATE: {grammar}Generator.{grammar}Generator -r start -s grammarinator.runtime.simple_space_serializer -o {tmpdir}/{grammar}%d.txt
// TEST-ANTLR: {grammar}.g4 -o {tmpdir}
// TEST-PARSE: -p {grammar}Parser -l {grammar}Lexer -r start {tmpdir}/{grammar}%d.txt

Expand Down

0 comments on commit ecc7d72

Please sign in to comment.