Skip to content

Commit

Permalink
Add option to control recursion depth.
Browse files Browse the repository at this point in the history
Until now, grammarinator hasn't supported any way to control the
recursion limit of generation (except of putting weights on the
non-recursive alternatives). This patch makes possible to guide
the generation by a predefined maximum recursion limit.

Fixes #1.
  • Loading branch information
renatahodovan committed Jun 28, 2017
1 parent 43a95ba commit 85ca911
Show file tree
Hide file tree
Showing 6 changed files with 442 additions and 315 deletions.
153 changes: 77 additions & 76 deletions examples/fuzzer/HTMLUnlexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,181 +23,180 @@ def __init__(self):
self.lexer = self
self.set_options()

def EOF(self):
def EOF(self, *args, **kwargs):
pass



def style_sheet(self):
def style_sheet(self, *args, **kwargs):
return UnlexerRule(src='')

def HTML_COMMENT(self):
def HTML_COMMENT(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='HTML_COMMENT'))
current += self.create_node(UnlexerRule(src='<!--'))
for _ in self.zero_or_more():
for _ in self.zero_or_more(max_depth=max_depth):
current += UnlexerRule(src=self.any_char())

current += self.create_node(UnlexerRule(src='-->'))
return current

def HTML_CONDITIONAL_COMMENT(self):
def HTML_CONDITIONAL_COMMENT(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='HTML_CONDITIONAL_COMMENT'))
current += self.create_node(UnlexerRule(src='<!['))
for _ in self.zero_or_more():
for _ in self.zero_or_more(max_depth=max_depth):
current += UnlexerRule(src=self.any_char())

current += self.create_node(UnlexerRule(src=']>'))
return current

def XML_DECLARATION(self):
def XML_DECLARATION(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='XML_DECLARATION'))
current += self.create_node(UnlexerRule(src='<?xml'))
for _ in self.zero_or_more():
for _ in self.zero_or_more(max_depth=max_depth):
current += UnlexerRule(src=self.any_char())

current += self.create_node(UnlexerRule(src='>'))
return current

def CDATA(self):
def CDATA(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='CDATA'))
current += self.create_node(UnlexerRule(src='<![CDATA['))
for _ in self.zero_or_more():
for _ in self.zero_or_more(max_depth=max_depth):
current += UnlexerRule(src=self.any_char())

current += self.create_node(UnlexerRule(src=']]>'))
return current

def DTD(self):
def DTD(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='DTD'))
current += self.create_node(UnlexerRule(src='<!'))
for _ in self.zero_or_more():
for _ in self.zero_or_more(max_depth=max_depth):
current += UnlexerRule(src=self.any_char())

current += self.create_node(UnlexerRule(src='>'))
return current

def SCRIPTLET(self):
def SCRIPTLET(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='SCRIPTLET'))
weights = [1, 1]
weights = self.depth_limited_weights([1, 1], self.min_depths['alt_0'], max_depth)
choice = self.choice(weights)
if choice == 0:
current += self.create_node(UnlexerRule(src='<?'))
for _ in self.zero_or_more():
for _ in self.zero_or_more(max_depth=max_depth):
current += UnlexerRule(src=self.any_char())

current += self.create_node(UnlexerRule(src='?>'))
elif choice == 1:
current += self.create_node(UnlexerRule(src='<%'))
for _ in self.zero_or_more():
for _ in self.zero_or_more(max_depth=max_depth):
current += UnlexerRule(src=self.any_char())

current += self.create_node(UnlexerRule(src='%>'))
return current

def SEA_WS(self):
def SEA_WS(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='SEA_WS'))
for _ in self.one_or_more():
weights = [1, 1, 1]
for _ in self.one_or_more(max_depth=max_depth):
weights = self.depth_limited_weights([1, 1, 1], self.min_depths['alt_1'], max_depth)
choice = self.choice(weights)
if choice == 0:
current += self.create_node(UnlexerRule(src=' '))
elif choice == 1:
current += self.create_node(UnlexerRule(src='\t'))
elif choice == 2:
for _ in self.zero_or_one():
for _ in self.zero_or_one(max_depth=max_depth):
current += self.create_node(UnlexerRule(src='\r'))

current += self.create_node(UnlexerRule(src='\n'))

return current

def SCRIPT_OPEN(self):
def SCRIPT_OPEN(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='SCRIPT_OPEN'))
current += self.create_node(UnlexerRule(src='<script'))
for _ in self.zero_or_more():
for _ in self.zero_or_more(max_depth=max_depth):
current += UnlexerRule(src=self.any_char())

current += self.create_node(UnlexerRule(src='>'))
return current

def STYLE_OPEN(self):
def STYLE_OPEN(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='STYLE_OPEN'))
current += self.create_node(UnlexerRule(src='<style'))
for _ in self.zero_or_more():
for _ in self.zero_or_more(max_depth=max_depth):
current += UnlexerRule(src=self.any_char())

current += self.create_node(UnlexerRule(src='>'))
return current

def TAG_OPEN(self):
def TAG_OPEN(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='TAG_OPEN'))
current += self.create_node(UnlexerRule(src='<'))
return current

def HTML_TEXT(self):
def HTML_TEXT(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='HTML_TEXT'))
for _ in self.one_or_more():
for _ in self.one_or_more(max_depth=max_depth):
current += UnlexerRule(src=self.char_from_list(charset_0))

return current

def TAG_CLOSE(self):
def TAG_CLOSE(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='TAG_CLOSE'))
current += self.create_node(UnlexerRule(src='>'))
return current

def TAG_SLASH_CLOSE(self):
def TAG_SLASH_CLOSE(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='TAG_SLASH_CLOSE'))
current += self.create_node(UnlexerRule(src='/>'))
return current

def TAG_SLASH(self):
def TAG_SLASH(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='TAG_SLASH'))
current += self.create_node(UnlexerRule(src='/'))
return current

def TAG_EQUALS(self):
def TAG_EQUALS(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='TAG_EQUALS'))
current += self.create_node(UnlexerRule(src='='))
return current

def TAG_NAME(self):
def TAG_NAME(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='TAG_NAME'))
current += self.lexer.TAG_NameStartChar()
for _ in self.zero_or_more():
current += self.lexer.TAG_NameChar()
current += self.lexer.TAG_NameStartChar(max_depth=max_depth - 1)
for _ in self.zero_or_more(max_depth=max_depth):
current += self.lexer.TAG_NameChar(max_depth=max_depth - 1)

return current

def TAG_WHITESPACE(self):
def TAG_WHITESPACE(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='TAG_WHITESPACE'))
current += self.create_node(UnlexerRule(src=self.char_from_list(charset_1)))
return current

def HEXDIGIT(self):
def HEXDIGIT(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='HEXDIGIT'))
current += self.create_node(UnlexerRule(src=self.char_from_list(charset_2)))
return current

def DIGIT(self):
def DIGIT(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='DIGIT'))
current += self.create_node(UnlexerRule(src=self.char_from_list(charset_3)))
return current

def TAG_NameChar(self):
def TAG_NameChar(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='TAG_NameChar'))
weights = [1, 1, 1, 1, 1, 1, 1, 1]
weights = self.depth_limited_weights([1, 1, 1, 1, 1, 1, 1, 1], self.min_depths['alt_2'], max_depth)
choice = self.choice(weights)
if choice == 0:
current += self.lexer.TAG_NameStartChar()
current += self.lexer.TAG_NameStartChar(max_depth=max_depth - 1)
elif choice == 1:
current += self.create_node(UnlexerRule(src='-'))
elif choice == 2:
current += self.create_node(UnlexerRule(src='_'))
elif choice == 3:
current += self.create_node(UnlexerRule(src='.'))
elif choice == 4:
current += self.lexer.DIGIT()
current += self.lexer.DIGIT(max_depth=max_depth - 1)
elif choice == 5:
current += self.create_node(UnlexerRule(src='\u00B7'))
elif choice == 6:
Expand All @@ -206,9 +205,9 @@ def TAG_NameChar(self):
current += self.create_node(UnlexerRule(src=self.char_from_list(range(8255, 8256))))
return current

def TAG_NameStartChar(self):
def TAG_NameStartChar(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='TAG_NameStartChar'))
weights = [1, 1, 1, 1, 1, 1]
weights = self.depth_limited_weights([1, 1, 1, 1, 1, 1], self.min_depths['alt_3'], max_depth)
choice = self.choice(weights)
if choice == 0:
current += self.create_node(UnlexerRule(src=self.char_from_list(charset_4)))
Expand All @@ -224,61 +223,61 @@ def TAG_NameStartChar(self):
current += self.create_node(UnlexerRule(src=self.char_from_list(range(65008, 65533))))
return current

def SCRIPT_BODY(self):
def SCRIPT_BODY(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='SCRIPT_BODY'))
for _ in self.zero_or_more():
for _ in self.zero_or_more(max_depth=max_depth):
current += UnlexerRule(src=self.any_char())

current += self.create_node(UnlexerRule(src='</script>'))
return current

def SCRIPT_SHORT_BODY(self):
def SCRIPT_SHORT_BODY(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='SCRIPT_SHORT_BODY'))
for _ in self.zero_or_more():
for _ in self.zero_or_more(max_depth=max_depth):
current += UnlexerRule(src=self.any_char())

current += self.create_node(UnlexerRule(src='</>'))
return current

def STYLE_BODY(self):
def STYLE_BODY(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='STYLE_BODY'))
current += self.style_sheet()
current += self.create_node(UnlexerRule(src='</style>'))
return current

def STYLE_SHORT_BODY(self):
def STYLE_SHORT_BODY(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='STYLE_SHORT_BODY'))
current += self.style_sheet()
current += self.create_node(UnlexerRule(src='</>'))
return current

def ATTVALUE_VALUE(self):
def ATTVALUE_VALUE(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='ATTVALUE_VALUE'))
for _ in self.zero_or_more():
for _ in self.zero_or_more(max_depth=max_depth):
current += self.create_node(UnlexerRule(src=self.char_from_list(charset_5)))

current += self.lexer.ATTRIBUTE()
current += self.lexer.ATTRIBUTE(max_depth=max_depth - 1)
return current

def ATTRIBUTE(self):
def ATTRIBUTE(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='ATTRIBUTE'))
weights = [1, 1, 1, 1, 1]
weights = self.depth_limited_weights([1, 1, 1, 1, 1], self.min_depths['alt_4'], max_depth)
choice = self.choice(weights)
if choice == 0:
current += self.lexer.DOUBLE_QUOTE_STRING()
current += self.lexer.DOUBLE_QUOTE_STRING(max_depth=max_depth - 1)
elif choice == 1:
current += self.lexer.SINGLE_QUOTE_STRING()
current += self.lexer.SINGLE_QUOTE_STRING(max_depth=max_depth - 1)
elif choice == 2:
current += self.lexer.ATTCHARS()
current += self.lexer.ATTCHARS(max_depth=max_depth - 1)
elif choice == 3:
current += self.lexer.HEXCHARS()
current += self.lexer.HEXCHARS(max_depth=max_depth - 1)
elif choice == 4:
current += self.lexer.DECCHARS()
current += self.lexer.DECCHARS(max_depth=max_depth - 1)
return current

def ATTCHAR(self):
def ATTCHAR(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='ATTCHAR'))
weights = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
weights = self.depth_limited_weights([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], self.min_depths['alt_5'], max_depth)
choice = self.choice(weights)
if choice == 0:
current += self.create_node(UnlexerRule(src='-'))
Expand Down Expand Up @@ -306,47 +305,47 @@ def ATTCHAR(self):
current += self.create_node(UnlexerRule(src=self.char_from_list(charset_6)))
return current

def ATTCHARS(self):
def ATTCHARS(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='ATTCHARS'))
for _ in self.one_or_more():
current += self.lexer.ATTCHAR()
for _ in self.one_or_more(max_depth=max_depth):
current += self.lexer.ATTCHAR(max_depth=max_depth - 1)

for _ in self.zero_or_one():
for _ in self.zero_or_one(max_depth=max_depth):
current += self.create_node(UnlexerRule(src=' '))

return current

def HEXCHARS(self):
def HEXCHARS(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='HEXCHARS'))
current += self.create_node(UnlexerRule(src='#'))
for _ in self.one_or_more():
for _ in self.one_or_more(max_depth=max_depth):
current += self.create_node(UnlexerRule(src=self.char_from_list(charset_7)))

return current

def DECCHARS(self):
def DECCHARS(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='DECCHARS'))
for _ in self.one_or_more():
for _ in self.one_or_more(max_depth=max_depth):
current += self.create_node(UnlexerRule(src=self.char_from_list(charset_8)))

for _ in self.zero_or_one():
for _ in self.zero_or_one(max_depth=max_depth):
current += self.create_node(UnlexerRule(src='%'))

return current

def DOUBLE_QUOTE_STRING(self):
def DOUBLE_QUOTE_STRING(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='DOUBLE_QUOTE_STRING'))
current += self.create_node(UnlexerRule(src='"'))
for _ in self.zero_or_more():
for _ in self.zero_or_more(max_depth=max_depth):
current += UnlexerRule(src=self.char_from_list(charset_9))

current += self.create_node(UnlexerRule(src='"'))
return current

def SINGLE_QUOTE_STRING(self):
def SINGLE_QUOTE_STRING(self, *, max_depth=float('inf')):
current = self.create_node(UnlexerRule(name='SINGLE_QUOTE_STRING'))
current += self.create_node(UnlexerRule(src='\''))
for _ in self.zero_or_more():
for _ in self.zero_or_more(max_depth=max_depth):
current += UnlexerRule(src=self.char_from_list(charset_10))

current += self.create_node(UnlexerRule(src='\''))
Expand All @@ -355,3 +354,5 @@ def SINGLE_QUOTE_STRING(self):
def set_options(self):
self.options = dict(tokenVocab="HTMLLexer", dot="any_unicode_char")

min_depths = {'SCRIPT_SHORT_BODY': 0, 'TAG_CLOSE': 0, 'alt_2': [1, 0, 0, 0, 1, 0, 0, 0], 'HEXCHARS': 0, 'TAG_EQUALS': 0, 'TAG_NAME': 1, 'STYLE_BODY': 0, 'DOUBLE_QUOTE_STRING': 0, 'alt_4': [1, 1, 2, 1, 1], 'SEA_WS': 0, 'TAG_WHITESPACE': 0, 'STYLE_OPEN': 0, 'HTML_TEXT': 0, 'DECCHARS': 0, 'ATTCHAR': 0, 'TAG_SLASH': 0, 'SCRIPT_BODY': 0, 'XML_DECLARATION': 0, 'DTD': 0, 'ATTRIBUTE': 1, 'ATTCHARS': 1, 'ATTVALUE_VALUE': 2, 'TAG_SLASH_CLOSE': 0, 'STYLE_SHORT_BODY': 0, 'alt_5': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'TAG_NameStartChar': 0, 'HTML_COMMENT': 0, 'SINGLE_QUOTE_STRING': 0, 'SCRIPT_OPEN': 0, 'HTML_CONDITIONAL_COMMENT': 0, 'TAG_NameChar': 0, 'SCRIPTLET': 0, 'CDATA': 0, 'alt_3': [0, 0, 0, 0, 0, 0], 'HEXDIGIT': 0, 'DIGIT': 0, 'alt_0': [0, 0], 'TAG_OPEN': 0, 'alt_1': [0, 0, 0]}

Loading

0 comments on commit 85ca911

Please sign in to comment.