Skip to content

Commit

Permalink
gh-100061: Proper fix of the bug in the matching of possessive quanti…
Browse files Browse the repository at this point in the history
…fiers (GH-102612)

Restore the global Input Stream pointer after trying to match a sub-pattern.

Co-authored-by: Ma Lin <animalize@users.noreply.github.com>
  • Loading branch information
uyw4687 and Ma Lin committed Aug 16, 2023
1 parent a86df29 commit abd9cc5
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 10 deletions.
7 changes: 0 additions & 7 deletions Lib/re/_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,6 @@ def _compile(code, pattern, flags):
emit(ANY_ALL)
else:
emit(ANY)
elif op is POSSESSIVE_REPEAT:
# gh-106052: Possessive quantifiers do not work when the
# subpattern contains backtracking, i.e. "(?:ab?c)*+".
# Implement it as equivalent greedy qualifier in atomic group.
p = [(MAX_REPEAT, av)]
p = [(ATOMIC_GROUP, p)]
_compile(code, p, flags)
elif op in REPEATING_CODES:
if _simple(av[2]):
emit(REPEATING_CODES[op][2])
Expand Down
14 changes: 11 additions & 3 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -2342,7 +2342,17 @@ def test_bug_gh91616(self):
self.assertTrue(re.fullmatch(r'(?s:(?>.*?\.).*)\Z', "a.txt")) # reproducer
self.assertTrue(re.fullmatch(r'(?s:(?=(?P<g0>.*?\.))(?P=g0).*)\Z', "a.txt"))

def test_bug_gh106052(self):
def test_bug_gh100061(self):
# gh-100061
self.assertEqual(re.match('(?>(?:.(?!D))+)', 'ABCDE').span(), (0, 2))
self.assertEqual(re.match('(?:.(?!D))++', 'ABCDE').span(), (0, 2))
self.assertEqual(re.match('(?>(?:.(?!D))*)', 'ABCDE').span(), (0, 2))
self.assertEqual(re.match('(?:.(?!D))*+', 'ABCDE').span(), (0, 2))
self.assertEqual(re.match('(?>(?:.(?!D))?)', 'CDE').span(), (0, 0))
self.assertEqual(re.match('(?:.(?!D))?+', 'CDE').span(), (0, 0))
self.assertEqual(re.match('(?>(?:.(?!D)){1,3})', 'ABCDE').span(), (0, 2))
self.assertEqual(re.match('(?:.(?!D)){1,3}+', 'ABCDE').span(), (0, 2))
# gh-106052
self.assertEqual(re.match("(?>(?:ab?c)+)", "aca").span(), (0, 2))
self.assertEqual(re.match("(?:ab?c)++", "aca").span(), (0, 2))
self.assertEqual(re.match("(?>(?:ab?c)*)", "aca").span(), (0, 2))
Expand Down Expand Up @@ -2451,7 +2461,6 @@ def test_atomic_group(self):
17: SUCCESS
''')

@unittest.expectedFailure # gh-106052
def test_possesive_repeat_one(self):
self.assertEqual(get_debug_out(r'a?+'), '''\
POSSESSIVE_REPEAT 0 1
Expand All @@ -2464,7 +2473,6 @@ def test_possesive_repeat_one(self):
12: SUCCESS
''')

@unittest.expectedFailure # gh-106052
def test_possesive_repeat(self):
self.assertEqual(get_debug_out(r'(?:ab)?+'), '''\
POSSESSIVE_REPEAT 0 1
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix a bug that causes wrong matches for regular expressions with possessive
qualifier.
4 changes: 4 additions & 0 deletions Modules/_sre/sre_lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -1336,6 +1336,10 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
MARK_POP(ctx->lastmark);
LASTMARK_RESTORE();

/* Restore the global Input Stream pointer
since it can change after jumps. */
state->ptr = ptr;

/* We have sufficient matches, so exit loop. */
break;
}
Expand Down

0 comments on commit abd9cc5

Please sign in to comment.