From 2e3272be48e32367cb1110dba5329ace06312017 Mon Sep 17 00:00:00 2001 From: Matthew Barnett Date: Sun, 28 Apr 2024 20:11:20 +0100 Subject: [PATCH] Git issue 527: `VERBOSE`/`X` flag breaks `\N` escapes --- .github/workflows/main.yml | 4 ++-- changelog.txt | 4 ++++ regex_3/_regex_core.py | 6 +++--- regex_3/regex.py | 2 +- regex_3/test_regex.py | 4 ++++ setup.py | 5 ++--- 6 files changed, 16 insertions(+), 9 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 11ca26b..c0de7fe 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -9,7 +9,7 @@ on: env: PYTHON_VER: '3.10' # Python to run test/cibuildwheel - CIBW_BUILD: cp37-* cp38-* cp39-* cp310-* cp311-* cp312-* + CIBW_BUILD: cp38-* cp39-* cp310-* cp311-* cp312-* CIBW_TEST_COMMAND: python -m unittest regex.test_regex jobs: @@ -94,7 +94,7 @@ jobs: # manylinux2014 >=19.3 3.7.8+, 3.8.4+, 3.9.0+ 2.17 (2012-12-25) # manylinux_x_y >=20.3 3.8.10+, 3.9.5+, 3.10.0+ x.y # manylinux2010 images EOL on 2022-08-01, it doesn't support cp311. - CIBW_BUILD: cp37-* cp38-* cp39-* cp310-* + CIBW_BUILD: cp38-* cp39-* cp310-* CIBW_MANYLINUX_X86_64_IMAGE: manylinux2010 CIBW_ARCHS_LINUX: x86_64 diff --git a/changelog.txt b/changelog.txt index 7ab3202..8193bd7 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,7 @@ +Version: 2024.4.28 + + Git issue 527: `VERBOSE`/`X` flag breaks `\N` escapes + Version: 2024.4.16 Git issue 525: segfault when fuzzy matching empty list diff --git a/regex_3/_regex_core.py b/regex_3/_regex_core.py index 550e922..b2ffeae 100644 --- a/regex_3/_regex_core.py +++ b/regex_3/_regex_core.py @@ -1359,7 +1359,7 @@ def parse_named_char(source, info, in_set): "Parses a named character." saved_pos = source.pos if source.match("{"): - name = source.get_while(NAMED_CHAR_PART) + name = source.get_while(NAMED_CHAR_PART, keep_spaces=True) if source.match("}"): try: value = unicodedata.lookup(name) @@ -4067,11 +4067,11 @@ def get_many(self, count=1): self.pos = len(string) return "".join(substring) - def get_while(self, test_set, include=True): + def get_while(self, test_set, include=True, keep_spaces=False): string = self.string pos = self.pos - if self.ignore_space: + if self.ignore_space and not keep_spaces: try: substring = [] diff --git a/regex_3/regex.py b/regex_3/regex.py index e467685..428a159 100644 --- a/regex_3/regex.py +++ b/regex_3/regex.py @@ -241,7 +241,7 @@ "VERSION1", "X", "VERBOSE", "W", "WORD", "error", "Regex", "__version__", "__doc__", "RegexFlag"] -__version__ = "2.5.141" +__version__ = "2.5.142" # -------------------------------------------------------------------- # Public interface. diff --git a/regex_3/test_regex.py b/regex_3/test_regex.py index 9f902c0..8a02751 100644 --- a/regex_3/test_regex.py +++ b/regex_3/test_regex.py @@ -4329,6 +4329,10 @@ def test_hg_bugs(self): # Git issue 525: segfault when fuzzy matching empty list self.assertEqual(regex.match(r"(\L){e<=5}", "blah", foo=[]).span(), (0, 0)) + # Git issue 527: `VERBOSE`/`X` flag breaks `\N` escapes + self.assertEqual(regex.compile(r'\N{LATIN SMALL LETTER A}').match('a').span(), (0, 1)) + self.assertEqual(regex.compile(r'\N{LATIN SMALL LETTER A}', flags=regex.X).match('a').span(), (0, 1)) + def test_fuzzy_ext(self): self.assertEqual(bool(regex.fullmatch(r'(?r)(?:a){e<=1:[a-z]}', 'e')), True) diff --git a/setup.py b/setup.py index 45e8140..57188fd 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name='regex', - version='2024.4.16', + version='2024.4.28', description='Alternative regular expression module, to replace re.', long_description=long_description, long_description_content_type='text/x-rst', @@ -21,7 +21,6 @@ 'Intended Audience :: Developers', 'License :: OSI Approved :: Apache Software License', 'Operating System :: OS Independent', - 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', @@ -32,7 +31,7 @@ 'Topic :: Text Processing', 'Topic :: Text Processing :: General', ], - python_requires='>=3.7', + python_requires='>=3.8', package_dir={'regex': 'regex_3'}, py_modules=['regex.__init__', 'regex.regex', 'regex._regex_core',