From 63af7b3b11d93b6636afb83c91d7eb7d2558ba20 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Tue, 19 Apr 2022 07:35:21 -0700 Subject: [PATCH] Add more tests for group names and refs in RE (GH-91695) (cherry picked from commit 74070085da5322ac83c954f101f2caa150655be2) Co-authored-by: Serhiy Storchaka --- Lib/test/test_re.py | 56 +++++++++++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 15 deletions(-) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 48a609b5a00317..56e98b7aedce7c 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -217,6 +217,16 @@ def test_symbolic_groups(self): re.compile(r'(?Px)(?P=a)(?(a)y)') re.compile(r'(?Px)(?P=a1)(?(a1)y)') re.compile(r'(?Px)\1(?(1)y)') + re.compile(b'(?Px)(?P=a1)(?(a1)y)') + # New valid identifiers in Python 3 + re.compile('(?P<ยต>x)(?P=ยต)(?(ยต)y)') + re.compile('(?P<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>x)(?P=๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข)(?(๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข)y)') + # Support > 100 groups. + pat = '|'.join('x(?P%x)y' % (i, i) for i in range(1, 200 + 1)) + pat = '(?:%s)(?(200)z|t)' % pat + self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5)) + + def test_symbolic_groups_errors(self): self.checkPatternError(r'(?P)(?P)', "redefinition of group name 'a' as group 2; " "was group 1") @@ -242,16 +252,22 @@ def test_symbolic_groups(self): self.checkPatternError(r'(?(-1))', "bad character in group name '-1'", 3) self.checkPatternError(r'(?(1a))', "bad character in group name '1a'", 3) self.checkPatternError(r'(?(a.))', "bad character in group name 'a.'", 3) - # New valid/invalid identifiers in Python 3 - re.compile('(?P<ยต>x)(?P=ยต)(?(ยต)y)') - re.compile('(?P<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>x)(?P=๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข)(?(๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข)y)') self.checkPatternError('(?P<ยฉ>x)', "bad character in group name 'ยฉ'", 4) + self.checkPatternError('(?P=ยฉ)', "bad character in group name 'ยฉ'", 4) + self.checkPatternError('(?(ยฉ)y)', "bad character in group name 'ยฉ'", 3) + + def test_symbolic_refs(self): + self.assertEqual(re.sub('(?Px)|(?Py)', r'\g', 'xx'), '') + self.assertEqual(re.sub('(?Px)|(?Py)', r'\2', 'xx'), '') + self.assertEqual(re.sub(b'(?Px)', br'\g', b'xx'), b'xx') + # New valid identifiers in Python 3 + self.assertEqual(re.sub('(?P<ยต>x)', r'\g<ยต>', 'xx'), 'xx') + self.assertEqual(re.sub('(?P<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>x)', r'\g<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>', 'xx'), 'xx') # Support > 100 groups. pat = '|'.join('x(?P%x)y' % (i, i) for i in range(1, 200 + 1)) - pat = '(?:%s)(?(200)z|t)' % pat - self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5)) + self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8') - def test_symbolic_refs(self): + def test_symbolic_refs_errors(self): self.checkTemplateError('(?Px)', r'\g, unterminated name', 3) self.checkTemplateError('(?Px)', r'\g<', 'xx', @@ -269,18 +285,14 @@ def test_symbolic_refs(self): 'invalid group reference 2', 1) with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"): re.sub('(?Px)', r'\g', 'xx') - self.assertEqual(re.sub('(?Px)|(?Py)', r'\g', 'xx'), '') - self.assertEqual(re.sub('(?Px)|(?Py)', r'\2', 'xx'), '') self.checkTemplateError('(?Px)', r'\g<-1>', 'xx', "bad character in group name '-1'", 3) - # New valid/invalid identifiers in Python 3 - self.assertEqual(re.sub('(?P<ยต>x)', r'\g<ยต>', 'xx'), 'xx') - self.assertEqual(re.sub('(?P<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>x)', r'\g<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>', 'xx'), 'xx') self.checkTemplateError('(?Px)', r'\g<ยฉ>', 'xx', "bad character in group name 'ยฉ'", 3) - # Support > 100 groups. - pat = '|'.join('x(?P%x)y' % (i, i) for i in range(1, 200 + 1)) - self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8') + self.checkTemplateError('(?Px)', r'\g<ใŠ€>', 'xx', + "bad character in group name 'ใŠ€'", 3) + self.checkTemplateError('(?Px)', r'\g<ยน>', 'xx', + "bad character in group name 'ยน'", 3) def test_re_subn(self): self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2)) @@ -542,9 +554,23 @@ def test_re_groupref_exists(self): pat = '(?:%s)(?(200)z)' % pat self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5)) - self.checkPatternError(r'(?P)(?(0))', 'bad group number', 10) + def test_re_groupref_exists_errors(self): + self.checkPatternError(r'(?P)(?(0)a|b)', 'bad group number', 10) + self.checkPatternError(r'()(?(-1)a|b)', + "bad character in group name '-1'", 5) + self.checkPatternError(r'()(?(ใŠ€)a|b)', + "bad character in group name 'ใŠ€'", 5) + self.checkPatternError(r'()(?(ยน)a|b)', + "bad character in group name 'ยน'", 5) + self.checkPatternError(r'()(?(1', + "missing ), unterminated name", 5) + self.checkPatternError(r'()(?(1)a', + "missing ), unterminated subpattern", 2) self.checkPatternError(r'()(?(1)a|b', 'missing ), unterminated subpattern', 2) + self.checkPatternError(r'()(?(1)a|b|c', + 'conditional backref with more than ' + 'two branches', 10) self.checkPatternError(r'()(?(1)a|b|c)', 'conditional backref with more than ' 'two branches', 10)