Skip to content

Commit

Permalink
Fix erroneous non-ASCII in local version (#469)
Browse files Browse the repository at this point in the history
* Fix validation in the packaging.version.Version's constructor

* Fix validation in the packaging.specifiers.Specifier's constructor

* Fix docs of packaging.version.VERSION_PATTERN by mentioning necessity
  of the re.ASCII flag
  • Loading branch information
zuo committed Oct 24, 2021
1 parent 42e1396 commit 3a92170
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 4 deletions.
5 changes: 4 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ Changelog
*unreleased*
~~~~~~~~~~~~

No unreleased changes.
* Fix parsing of ``Version`` and ``Specifier``, to prevent certain
non-ASCII letters from being accepted as a part of the local version
segment (:issue:`469`); also, fix the docs of ``VERSION_PATTERN``, to
mention necessity of the ``re.ASCII`` flag

21.0 - 2021-07-03
~~~~~~~~~~~~~~~~~
Expand Down
2 changes: 1 addition & 1 deletion docs/version.rst
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ Reference
The pattern is not anchored at either end, and is intended for embedding
in larger expressions (for example, matching a version number as part of
a file name). The regular expression should be compiled with the
``re.VERBOSE`` and ``re.IGNORECASE`` flags set.
``re.VERBOSE``, ``re.IGNORECASE`` and ``re.ASCII`` flags set.


.. _PEP 440: https://www.python.org/dev/peps/pep-0440/
Expand Down
21 changes: 21 additions & 0 deletions packaging/specifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,20 @@ class Specifier(_IndividualSpecifier):

_regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE)

# Note: an additional check, based of the following regular
# expression, is necessary because without it the 'a-z'
# character ranges in the above regular expression, in
# conjunction with re.IGNORECASE, would cause erroneous
# acceptance of non-ASCII letters in the local version segment
# (see: https://docs.python.org/library/re.html#re.IGNORECASE).
_supplementary_restriction_regex = re.compile(r"""
\s*===.* # No restriction in the identity operator case.
|
[\s\0-\177]* # In all other cases only whitespace characters
# and ASCII-only non-whitespace characters are
# allowed.
""", re.VERBOSE)

_operators = {
"~=": "compatible",
"==": "equal",
Expand All @@ -422,6 +436,13 @@ class Specifier(_IndividualSpecifier):
"===": "arbitrary",
}

def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None:
super().__init__(spec, prereleases)

match = self._supplementary_restriction_regex.fullmatch(spec)
if not match:
raise InvalidSpecifier(f"Invalid specifier: '{spec}'")

@_require_version_compare
def _compare_compatible(self, prospective: ParsedVersion, spec: str) -> bool:

Expand Down
13 changes: 11 additions & 2 deletions packaging/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,12 +256,21 @@ def _legacy_cmpkey(version: str) -> LegacyCmpKey:

class Version(_BaseVersion):

_regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
_regex = re.compile(
VERSION_PATTERN,

# Note: the re.ASCII flag is necessary because without it the
# 'a-z' character ranges in VERSION_PATTERN, in conjunction
# with re.IGNORECASE, would cause erroneous acceptance of
# non-ASCII letters in the local version segment (see:
# https://docs.python.org/library/re.html#re.IGNORECASE).
re.VERBOSE | re.IGNORECASE | re.ASCII,
)

def __init__(self, version: str) -> None:

# Validate the version and parse it into pieces
match = self._regex.search(version)
match = self._regex.fullmatch(version.strip())
if not match:
raise InvalidVersion(f"Invalid version: '{version}'")

Expand Down
4 changes: 4 additions & 0 deletions tests/test_specifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ def test_specifiers_valid(self, specifier):
# Cannot use a prefix matching after a .devN version
"==1.0.dev1.*",
"!=1.0.dev1.*",
# Local version which includes a non-ASCII letter that
# matches regex '[a-z]' when re.IGNORECASE is in force in
# conjunction with implicit re.UNICODE (i.e., without re.ASCII)
"==1.0+\u0130",
],
)
def test_specifiers_invalid(self, specifier):
Expand Down
4 changes: 4 additions & 0 deletions tests/test_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ def test_valid_versions(self, version):
"1.0+_foobar",
"1.0+foo&asd",
"1.0+1+1",
# Local version which includes a non-ASCII letter that
# matches regex '[a-z]' when re.IGNORECASE is in force in
# conjunction with implicit re.UNICODE (i.e., without re.ASCII)
"1.0+\u0130",
],
)
def test_invalid_versions(self, version):
Expand Down

0 comments on commit 3a92170

Please sign in to comment.