Skip to content
This repository has been archived by the owner on Jan 30, 2024. It is now read-only.

Commit

Permalink
nfa greediness
Browse files Browse the repository at this point in the history
  • Loading branch information
nitely committed Jul 10, 2017
1 parent b6e6f0c commit 5beab29
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 3 deletions.
16 changes: 13 additions & 3 deletions regexy/compile/nfa.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,19 @@ def nfa(nodes: Iterator[Node]) -> Node:

if node.char == Symbols.ZERO_OR_ONE:
state = states.pop()
node.out = [state, EOF]
states.append(node)
continue

# todo: refactor the whole thing. Move repetition range to parse.py
# todo: add non-greedy symbols
if state.char in (
Symbols.ZERO_OR_MORE,
Symbols.ZERO_OR_ONE):
state.out = list(reversed(state.out))
states.append(state)
continue
else:
node.out = [state, EOF]
states.append(node)
continue

if node.char == Symbols.GROUP_START:
state = states.pop()
Expand Down
20 changes: 20 additions & 0 deletions tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,3 +337,23 @@ def test_non_capturing_groups(self):

# self.assertIsNone(match(r'((a)*n?(asd))*', 'aaanasdnasd')) # fixme
# should be equal to r'((a)*n?(asd)*)*' (see last capture)

def test_greediness(self):
self.assertEqual(
match(r'(a)*(a)*(a)*', 'aaa'),
(('a', 'a', 'a'), None, None))
self.assertEqual(
match(r'(a)*?(a)*(a)*?', 'aaa'),
(None, ('a', 'a', 'a'), None))
self.assertEqual(
match(r'(a)*?(a)*?(a)*', 'aaa'),
(None, None, ('a', 'a', 'a')))
self.assertEqual(
match(r'(a)*?(a)*?(a)*?', 'aaa'),
(None, None, ('a', 'a', 'a')))

self.assertEqual(match(r'(a)?(aa?)', 'aa'), ('a', 'a'))
self.assertEqual(match(r'(a)??(a)', 'aa'), ('a', 'a'))
self.assertEqual(match(r'(a)??(aa?)', 'aa'), (None, 'aa'))

# print(to_nfa_str('(a)*?(a)*(a)*'))

0 comments on commit 5beab29

Please sign in to comment.