Add serializer functionality to the generator

renatahodovan · Mar 5, 2020 · ecc7d72 · ecc7d72
1 parent 1915fe8
commit ecc7d72
Show file tree

Hide file tree

Showing 8 changed files with 84 additions and 77 deletions.
diff --git a/README.rst b/README.rst
@@ -95,10 +95,17 @@ language that can be placed basically anywhere without breaking the syntax. The
 most common examples are comments or whitespaces. However, when using these
 grammars - which don't define explicitly where whitespace may or may not appear
 in rules - to generate test cases, we have to insert the missing spaces
-manually. This can be done by applying various transformers (with the ``-t``
-option) to the tree representation of the output tests. A simple transformer -
+manually. This can be done by applying a serializer (with the ``-s``
+option) to the tree representation of the output tests. A simple serializer -
 that inserts a space after every unparser rule - is provided by grammarinator
-(``grammarinator.runtime.simple_space_transformer``).
+(``grammarinator.runtime.simple_space_serializer``).
+
+In some cases, we may want to postprocess the output tree itself (without
+serializing it). For example, to enforce some logic that cannot be
+expressed by a context-free grammar. For this purpose the transformer mechanism
+can be used (with the ``-t`` option). Similarly to the serializers, it will
+take a tree as input, but instead of creating a string representation, it is
+expected to return the modified (transformed) tree object.
 
 As a final thought, one must not forget that the original purpose of grammars
 is the syntax-wise validation of various inputs. As a consequence, these
@@ -123,7 +130,7 @@ a try, run the processor first::
 Then, use the generator to produce test cases::
 
     grammarinator-generate HTMLCustomGenerator.HTMLCustomGenerator -r htmlDocument \
-    -o examples/tests/test_%d.html -t HTMLGenerator.html_space_transformer -n 100 -d 20 --sys-path examples/fuzzer/
+    -o examples/tests/test_%d.html -s HTMLGenerator.html_space_serializer -n 100 -d 20 --sys-path examples/fuzzer/
 
 .. _example: examples/
 

diff --git a/examples/fuzzer/HTMLGenerator.py b/examples/fuzzer/HTMLGenerator.py
@@ -1,4 +1,4 @@
-# Generated by Grammarinator 19.3+21.gdbf52d7
+# Generated by Grammarinator 19.3+28.g12d4f7f
 
 from itertools import chain
 from math import inf
@@ -16,22 +16,24 @@
 charset_9 = list(chain(*multirange_diff(printable_unicode_ranges, [(34, 35), (60, 61)])))
 charset_10 = list(chain(*multirange_diff(printable_unicode_ranges, [(39, 40), (60, 61)])))
 
-def html_space_transformer(node):
+def html_space_serializer(root):
 
-    for child in node.children:
-        html_space_transformer(child)
-
-    if isinstance(node, UnparserRule):
-        new_children = []
+    def _walk(node):
+        nonlocal src
         for child in node.children:
-            new_children.append(child)
-            if child.name == 'htmlTagName' and child.right_sibling and child.right_sibling.name == 'htmlAttribute' \
-                    or child.name == 'htmlAttribute' \
-                    or isinstance(child, UnlexerRule) and child.src and child.src.endswith(('<script', '<style', '<?xml')):
-                new_children.append(UnlexerRule(src=' '))
-        node.children = new_children
-
-    return node
+            _walk(child)
+
+        if isinstance(node, UnlexerRule) and node.src:
+            src += node.src
+
+        if (isinstance(node, UnparserRule) and
+            node.name == 'htmlTagName' and node.right_sibling and node.right_sibling.name == 'htmlAttribute' or node.name == 'htmlAttribute') \
+                or isinstance(node, UnlexerRule) and node.src and node.src.endswith(('<script', '<style', '<?xml')):
+            src += ' '
+
+    src = ''
+    _walk(root)
+    return src
 
 
 

diff --git a/examples/grammars/HTMLParser.g4 b/examples/grammars/HTMLParser.g4
@@ -27,31 +27,33 @@
 */
 
 // TEST-PROCESS: {grammar}Parser.g4 {grammar}Lexer.g4 -o {tmpdir}
-// TEST-GENERATE: {grammar}Generator.{grammar}Generator -r htmlDocument -t {grammar}Generator.html_space_transformer -n 5 -o {tmpdir}/{grammar}G%d.html
-// TEST-GENERATE: {grammar}CustomGenerator.{grammar}CustomGenerator -r htmlDocument -t {grammar}Generator.html_space_transformer -n 5 -o {tmpdir}/{grammar}C%d.html --sys-path ../fuzzer/
+// TEST-GENERATE: {grammar}Generator.{grammar}Generator -r htmlDocument -s {grammar}Generator.html_space_serializer -n 5 -o {tmpdir}/{grammar}G%d.html
+// TEST-GENERATE: {grammar}CustomGenerator.{grammar}CustomGenerator -r htmlDocument -s {grammar}Generator.html_space_serializer -n 5 -o {tmpdir}/{grammar}C%d.html --sys-path ../fuzzer/
 
 parser grammar HTMLParser;
 
 options { tokenVocab=HTMLLexer;
           dot=any_unicode_char;}
 
 @header {
-def html_space_transformer(node):
+def html_space_serializer(root):
 
-    for child in node.children:
-        html_space_transformer(child)
-
-    if isinstance(node, UnparserRule):
-        new_children = []
+    def _walk(node):
+        nonlocal src
         for child in node.children:
-            new_children.append(child)
-            if child.name == 'htmlTagName' and child.right_sibling and child.right_sibling.name == 'htmlAttribute' \
-                    or child.name == 'htmlAttribute' \
-                    or isinstance(child, UnlexerRule) and child.src and child.src.endswith(('<script', '<style', '<?xml')):
-                new_children.append(UnlexerRule(src=' '))
-        node.children = new_children
-
-    return node
+            _walk(child)
+
+        if isinstance(node, UnlexerRule) and node.src:
+            src += node.src
+
+        if (isinstance(node, UnparserRule) and
+            node.name == 'htmlTagName' and node.right_sibling and node.right_sibling.name == 'htmlAttribute' or node.name == 'htmlAttribute') \
+                or isinstance(node, UnlexerRule) and node.src and node.src.endswith(('<script', '<style', '<?xml')):
+            src += ' '
+
+    src = ''
+    _walk(root)
+    return src
 
 }
 

diff --git a/grammarinator/generate.py b/grammarinator/generate.py
@@ -47,7 +47,7 @@ class Generator(object):
     def __init__(self, generator, rule, out_format,
                  model=None, max_depth=inf, cooldown=1.0,
                  population=None, generate=True, mutate=True, recombine=True, keep_trees=False,
-                 tree_transformers=None, test_transformers=None,
+                 transformers=None, serializer=None,
                  cleanup=True, encoding='utf-8'):
 
         def import_entity(name):
@@ -67,6 +67,8 @@ def get_boolean(value):
 
         self.generator_cls = import_entity(generator)
         self.model_cls = import_entity(model or 'grammarinator.model.DefaultModel')
+        self.transformers = import_list(transformers)
+        self.serializer = import_entity(serializer) if serializer else str
         self.rule = rule or self.generator_cls.default_rule.__name__
 
         out_dir = abspath(dirname(out_format))
@@ -89,9 +91,6 @@ def get_boolean(value):
         self.cleanup = get_boolean(cleanup)
         self.encoding = encoding
 
-        self.tree_transformers = import_list(tree_transformers)
-        self.test_transformers = import_list(test_transformers)
-
     def __enter__(self):
         return self
 
@@ -122,7 +121,7 @@ def create_new_test(self, index):
             return self.create_new_test(index)
 
         test_fn = self.out_format % index
-        tree.root = Generator.transform(tree.root, self.tree_transformers)
+        tree.root = Generator.transform(tree.root, self.transformers)
 
         tree_fn = None
         if self.keep_trees:
@@ -131,15 +130,10 @@ def create_new_test(self, index):
             tree.save(tree_fn)
 
         with codecs.open(test_fn, 'w', self.encoding) as f:
-            f.write(str(Generator.transform(tree.root, self.test_transformers)))
+            f.write(self.serializer(tree.root))
 
         return test_fn, tree_fn
 
-    def serialize(self, tree):
-        tree.root = Generator.transform(tree.root, self.tree_transformers)
-        tree.root = Generator.transform(tree.root, self.test_transformers)
-        return str(tree.root)
-
     @staticmethod
     def transform(root, transformers):
         for transformer in transformers:
@@ -220,13 +214,11 @@ def restricted_float(value):
                         help='name of the rule to start generation from (default: first parser rule).')
     parser.add_argument('-m', '--model', metavar='NAME', default='grammarinator.model.DefaultModel',
                         help='reference to the decision model (in package.module.class format) (default: %(default)s).')
-    parser.add_argument('-t', '--tree-transformer', metavar='NAME', action='append', default=[],
+    parser.add_argument('-t', '--transformer', metavar='NAME', action='append', default=[],
                         help='reference to a transformer (in package.module.function format) to postprocess the generated tree '
                              '(the result of these transformers will be saved into the serialized tree, e.g., variable matching).')
-    parser.add_argument('--test-transformer', metavar='NAME', action='append', default=[],
-                        help='reference to a transformer (in package.module.function format) to postprocess the generated tree '
-                             '(the result of these transformers will only affect test serialization but won\'t be saved to the '
-                             'tree representation, e.g., space insertion).')
+    parser.add_argument('-s', '--serializer', metavar='NAME',
+                        help='reference to a seralizer (in package.module.function format) that takes a tree and produces a string from it.')
     parser.add_argument('-d', '--max-depth', default=inf, type=int, metavar='NUM',
                         help='maximum recursion depth during generation (default: %(default)f).')
     parser.add_argument('-c', '--cooldown', default=1.0, type=restricted_float, metavar='NUM',
@@ -276,7 +268,7 @@ def restricted_float(value):
     with Generator(generator=args.generator, rule=args.rule, out_format=args.out,
                    model=args.model, max_depth=args.max_depth, cooldown=args.cooldown,
                    population=args.population, generate=args.generate, mutate=args.mutate, recombine=args.recombine, keep_trees=args.keep_trees,
-                   tree_transformers=args.tree_transformer, test_transformers=args.test_transformer,
+                   transformers=args.transformer, serializer=args.serializer,
                    cleanup=False, encoding=args.encoding) as generator:
         if args.jobs > 1:
             with Pool(args.jobs) as pool:

diff --git a/grammarinator/runtime/__init__.py b/grammarinator/runtime/__init__.py
@@ -6,7 +6,7 @@
 # according to those terms.
 
 from .generator import depthcontrol, Generator, multirange_diff, printable_ascii_ranges, printable_unicode_ranges
-from .transformer import *
+from .serializer import *
 from .tree import BaseRule, Tree, UnlexerRule, UnparserRule
 
 __all__ = [
@@ -16,7 +16,7 @@
     'multirange_diff',
     'printable_ascii_ranges',
     'printable_unicode_ranges',
-    'simple_space_transformer',
+    'simple_space_serializer',
     'Tree',
     'UnlexerRule',
     'UnparserRule',

diff --git a/grammarinator/runtime/serializer.py b/grammarinator/runtime/serializer.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2017-2020 Renata Hodovan, Akos Kiss.
+#
+# Licensed under the BSD 3-Clause License
+# <LICENSE.rst or https://opensource.org/licenses/BSD-3-Clause>.
+# This file may not be copied, modified, or distributed except
+# according to those terms.
+
+from .tree import *
+
+
+def simple_space_serializer(root):
+
+    def _walk(node):
+        nonlocal src
+        for child in node.children:
+            _walk(child)
+
+            if isinstance(node, UnparserRule):
+                src += ' '
+
+        if isinstance(node, UnlexerRule) and node.src:
+            src += node.src
+
+    src = ''
+    _walk(root)
+    return src
diff --git a/grammarinator/runtime/transformer.py b/grammarinator/runtime/transformer.py
diff --git a/tests/grammars/Whitespace.g4 b/tests/grammars/Whitespace.g4
@@ -15,7 +15,7 @@
  */
 
 // TEST-PROCESS: {grammar}.g4 -o {tmpdir}
-// TEST-GENERATE: {grammar}Generator.{grammar}Generator -r start -t grammarinator.runtime.simple_space_transformer -o {tmpdir}/{grammar}%d.txt
+// TEST-GENERATE: {grammar}Generator.{grammar}Generator -r start -s grammarinator.runtime.simple_space_serializer -o {tmpdir}/{grammar}%d.txt
 // TEST-ANTLR: {grammar}.g4 -o {tmpdir}
 // TEST-PARSE: -p {grammar}Parser -l {grammar}Lexer -r start {tmpdir}/{grammar}%d.txt