diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f4b84bc8..99491da7 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,7 +4,9 @@ Changelog *unreleased* ~~~~~~~~~~~~ -No unreleased changes. +* Prevent certain non-ASCII letters from being accepted as a part of the + local version segment (:issue:`469`). +* Document that ``VERSION_PATTERN`` requires using the ``re.ASCII`` flag. 21.3 - 2021-11-17 ~~~~~~~~~~~~~~~~~ diff --git a/docs/version.rst b/docs/version.rst index a43cf786..33e14647 100644 --- a/docs/version.rst +++ b/docs/version.rst @@ -284,7 +284,7 @@ Reference The pattern is not anchored at either end, and is intended for embedding in larger expressions (for example, matching a version number as part of a file name). The regular expression should be compiled with the - ``re.VERBOSE`` and ``re.IGNORECASE`` flags set. + ``re.VERBOSE``, ``re.IGNORECASE`` and ``re.ASCII`` flags set. .. _PEP 440: https://www.python.org/dev/peps/pep-0440/ diff --git a/packaging/specifiers.py b/packaging/specifiers.py index 0e218a6f..f6b66862 100644 --- a/packaging/specifiers.py +++ b/packaging/specifiers.py @@ -393,6 +393,23 @@ class Specifier(_IndividualSpecifier): _regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE) + # Note: an additional check, based of the following regular + # expression, is necessary because without it the 'a-z' + # character ranges in the above regular expression, in + # conjunction with re.IGNORECASE, would cause erroneous + # acceptance of non-ASCII letters in the local version segment + # (see: https://docs.python.org/library/re.html#re.IGNORECASE). + _supplementary_restriction_regex = re.compile( + r""" + \s*===.* # No restriction in the identity operator case. + | + [\s\0-\177]* # In all other cases only whitespace characters + # and ASCII-only non-whitespace characters are + # allowed. + """, + re.VERBOSE, + ) + _operators = { "~=": "compatible", "==": "equal", @@ -404,6 +421,13 @@ class Specifier(_IndividualSpecifier): "===": "arbitrary", } + def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: + super().__init__(spec, prereleases) + + match = self._supplementary_restriction_regex.fullmatch(spec) + if not match: + raise InvalidSpecifier(f"Invalid specifier: '{spec}'") + @_require_version_compare def _compare_compatible(self, prospective: ParsedVersion, spec: str) -> bool: diff --git a/packaging/version.py b/packaging/version.py index de9a09a4..ec2fce25 100644 --- a/packaging/version.py +++ b/packaging/version.py @@ -256,12 +256,20 @@ def _legacy_cmpkey(version: str) -> LegacyCmpKey: class Version(_BaseVersion): - _regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE) + _regex = re.compile( + VERSION_PATTERN, + # Note: the re.ASCII flag is necessary because without it the + # 'a-z' character ranges in VERSION_PATTERN, in conjunction + # with re.IGNORECASE, would cause erroneous acceptance of + # non-ASCII letters in the local version segment (see: + # https://docs.python.org/library/re.html#re.IGNORECASE). + re.VERBOSE | re.IGNORECASE | re.ASCII, + ) def __init__(self, version: str) -> None: # Validate the version and parse it into pieces - match = self._regex.search(version) + match = self._regex.fullmatch(version.strip()) if not match: raise InvalidVersion(f"Invalid version: '{version}'") diff --git a/tests/test_specifiers.py b/tests/test_specifiers.py index ca21fa1d..89c00290 100644 --- a/tests/test_specifiers.py +++ b/tests/test_specifiers.py @@ -81,6 +81,9 @@ def test_specifiers_valid(self, specifier): # Cannot use a prefix matching after a .devN version "==1.0.dev1.*", "!=1.0.dev1.*", + # Local version which includes a non-ASCII letter that matches + # regex '[a-z]' when re.IGNORECASE is in force and re.ASCII is not + "==1.0+\u0130", ], ) def test_specifiers_invalid(self, specifier): @@ -197,6 +200,7 @@ def test_specifiers_invalid(self, specifier): # Various other normalizations "v1.0", " \r \f \v v1.0\t\n", + " \r\N{NARROW NO-BREAK SPACE}\v v1.0\N{PARAGRAPH SEPARATOR}", ], ) def test_specifiers_normalized(self, version): @@ -221,6 +225,7 @@ def test_specifiers_normalized(self, version): ("~=2.0", "~=2.0"), # Spaces should be removed ("< 2", "<2"), + ("<\N{HAIR SPACE}2", "<2"), ], ) def test_specifiers_str_and_repr(self, specifier, expected): diff --git a/tests/test_version.py b/tests/test_version.py index 5f2251e1..2e96bed5 100644 --- a/tests/test_version.py +++ b/tests/test_version.py @@ -96,6 +96,9 @@ def test_valid_versions(self, version): "1.0+_foobar", "1.0+foo&asd", "1.0+1+1", + # Local version which includes a non-ASCII letter that matches + # regex '[a-z]' when re.IGNORECASE is in force and re.ASCII is not + "1.0+\u0130", ], ) def test_invalid_versions(self, version): @@ -218,6 +221,7 @@ def test_invalid_versions(self, version): # Various other normalizations ("v1.0", "1.0"), (" v1.0\t\n", "1.0"), + ("\N{NARROW NO-BREAK SPACE}1.0\t\N{PARAGRAPH SEPARATOR}\n ", "1.0"), ], ) def test_normalized_versions(self, version, normalized):