Skip to content

Commit

Permalink
Merge pull request #5875 from uranusjr/egg-info-canonical
Browse files Browse the repository at this point in the history
Rewrite egg_info_matches with canonicalize_name
  • Loading branch information
cjerdonek committed Oct 30, 2018
2 parents 4ccea7e + 033a47c commit 5c93250
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 40 deletions.
1 change: 1 addition & 0 deletions news/5870.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Canonicalize sdist file names so they can be matched to a canonicalized package name passed to ``pip install``.
59 changes: 36 additions & 23 deletions src/pip/_internal/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -815,8 +815,8 @@ def _link_package_versions(self, link, search):
return

if not version:
version = egg_info_matches(egg_info, search.supplied, link)
if version is None:
version = _egg_info_matches(egg_info, search.canonical)
if not version:
self._log_skipped_link(
link, 'Missing project version for %s' % search.supplied)
return
Expand Down Expand Up @@ -846,33 +846,46 @@ def _link_package_versions(self, link, search):
return InstallationCandidate(search.supplied, version, link)


def egg_info_matches(
egg_info, search_name, link,
_egg_info_re=re.compile(r'([a-z0-9_.]+)-([a-z0-9_.!+-]+)', re.I)):
def _find_name_version_sep(egg_info, canonical_name):
"""Find the separator's index based on the package's canonical name.
`egg_info` must be an egg info string for the given package, and
`canonical_name` must be the package's canonical name.
This function is needed since the canonicalized name does not necessarily
have the same length as the egg info's name part. An example::
>>> egg_info = 'foo__bar-1.0'
>>> canonical_name = 'foo-bar'
>>> _find_name_version_sep(egg_info, canonical_name)
8
"""
# Project name and version must be separated by one single dash. Find all
# occurrences of dashes; if the string in front of it matches the canonical
# name, this is the one separating the name and version parts.
for i, c in enumerate(egg_info):
if c != "-":
continue
if canonicalize_name(egg_info[:i]) == canonical_name:
return i
raise ValueError("{} does not match {}".format(egg_info, canonical_name))


def _egg_info_matches(egg_info, canonical_name):
"""Pull the version part out of a string.
:param egg_info: The string to parse. E.g. foo-2.1
:param search_name: The name of the package this belongs to. None to
infer the name. Note that this cannot unambiguously parse strings
like foo-2-2 which might be foo, 2-2 or foo-2, 2.
:param link: The link the string came from, for logging on failure.
:param canonical_name: The canonicalized name of the package this
belongs to.
"""
match = _egg_info_re.search(egg_info)
if not match:
logger.debug('Could not parse version from link: %s', link)
try:
version_start = _find_name_version_sep(egg_info, canonical_name) + 1
except ValueError:
return None
if search_name is None:
full_match = match.group(0)
return full_match.split('-', 1)[-1]
name = match.group(0).lower()
# To match the "safe" name that pkg_resources creates:
name = name.replace('_', '-')
# project name and version must be separated by a dash
look_for = search_name.lower() + "-"
if name.startswith(look_for):
return match.group(0)[len(look_for):]
else:
version = egg_info[version_start:]
if not version:
return None
return version


def _determine_base_url(document, page_url):
Expand Down
12 changes: 10 additions & 2 deletions src/pip/_internal/wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,15 @@ def supported(self, tags=None):
return bool(set(tags).intersection(self.file_tags))


def _contains_egg_info(
s, _egg_info_re=re.compile(r'([a-z0-9_.]+)-([a-z0-9_.!+-]+)', re.I)):
"""Determine whether the string looks like an egg_info.
:param s: The string to parse. E.g. foo-2.1
"""
return bool(_egg_info_re.search(s))


class WheelBuilder(object):
"""Build wheels from a RequirementSet."""

Expand Down Expand Up @@ -733,7 +742,6 @@ def build(self, requirements, session, autobuilding=False):
newly built wheel, in preparation for installation.
:return: True if all the wheels built correctly.
"""
from pip._internal import index
from pip._internal.models.link import Link

building_is_possible = self._wheel_dir or (
Expand Down Expand Up @@ -763,7 +771,7 @@ def build(self, requirements, session, autobuilding=False):
if autobuilding:
link = req.link
base, ext = link.splitext()
if index.egg_info_matches(base, None, link) is None:
if not _contains_egg_info(base):
# E.g. local directory. Build wheel just for this run.
ephem_cache = True
if "binary" not in format_control.get_allowed_formats(
Expand Down
91 changes: 76 additions & 15 deletions tests/unit/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@

from pip._internal.download import PipSession
from pip._internal.index import (
Link, PackageFinder, _determine_base_url, _get_html_page, egg_info_matches,
Link, PackageFinder, _determine_base_url, _egg_info_matches,
_find_name_version_sep, _get_html_page,
)


Expand Down Expand Up @@ -167,32 +168,92 @@ def test_get_formatted_locations_basic_auth():


@pytest.mark.parametrize(
("egg_info", "search_name", "expected"),
("egg_info", "canonical_name", "expected"),
[
# Trivial.
("pip-18.0", "pip", 3),
("zope-interface-4.5.0", "zope-interface", 14),
# Canonicalized name match non-canonicalized egg info. (pypa/pip#5870)
("Jinja2-2.10", "jinja2", 6),
("zope.interface-4.5.0", "zope-interface", 14),
("zope_interface-4.5.0", "zope-interface", 14),
# Should be smart enough to parse ambiguous names from the provided
# package name.
("foo-2-2", "foo", 3),
("foo-2-2", "foo-2", 5),
# Should be able to detect collapsed characters in the egg info.
("foo--bar-1.0", "foo-bar", 8),
("foo-_bar-1.0", "foo-bar", 8),
# The package name must not ends with a dash (PEP 508), so the first
# dash would be the separator, not the second.
("zope.interface--4.5.0", "zope-interface", 14),
("zope.interface--", "zope-interface", 14),
# The version part is missing, but the split function does not care.
("zope.interface-", "zope-interface", 14),
],
)
def test_find_name_version_sep(egg_info, canonical_name, expected):
index = _find_name_version_sep(egg_info, canonical_name)
assert index == expected


@pytest.mark.parametrize(
("egg_info", "canonical_name"),
[
# A dash must follow the package name.
("zope.interface4.5.0", "zope-interface"),
("zope.interface.4.5.0", "zope-interface"),
("zope.interface.-4.5.0", "zope-interface"),
("zope.interface", "zope-interface"),
],
)
def test_find_name_version_sep_failure(egg_info, canonical_name):
with pytest.raises(ValueError) as ctx:
_find_name_version_sep(egg_info, canonical_name)
message = "{} does not match {}".format(egg_info, canonical_name)
assert str(ctx.value) == message


@pytest.mark.parametrize(
("egg_info", "canonical_name", "expected"),
[
# Trivial.
("pip-18.0", "pip", "18.0"),
("pip-18.0", None, "18.0"),
("zope-interface-4.5.0", "zope-interface", "4.5.0"),
# Non-canonical names.
# Canonicalized name match non-canonicalized egg info. (pypa/pip#5870)
("Jinja2-2.10", "jinja2", "2.10"),
("jinja2-2.10", "Jinja2", "2.10"),
("zope.interface-4.5.0", "zope-interface", "4.5.0"),
("zope_interface-4.5.0", "zope-interface", "4.5.0"),
# Ambiguous names. Should be smart enough if the package name is
# provided, otherwise make a guess.
# Should be smart enough to parse ambiguous names from the provided
# package name.
("foo-2-2", "foo", "2-2"),
("foo-2-2", "foo-2", "2"),
("foo-2-2", None, "2-2"),
("im-valid", None, "valid"),
("zope.interface--4.5.0", "zope-interface", "-4.5.0"),
("zope.interface--", "zope-interface", "-"),
# Should be able to detect collapsed characters in the egg info.
("foo--bar-1.0", "foo-bar", "1.0"),
("foo-_bar-1.0", "foo-bar", "1.0"),
# Invalid names.
("invalid", None, None),
("im_invalid", None, None),
# Invalid.
("the-package-name-8.19", "does-not-match", None),
("zope.interface.-4.5.0", "zope.interface", None),
("zope.interface-", "zope-interface", None),
("zope.interface4.5.0", "zope-interface", None),
("zope.interface.4.5.0", "zope-interface", None),
("zope.interface.-4.5.0", "zope-interface", None),
("zope.interface", "zope-interface", None),
],
)
def test_egg_info_matches(egg_info, search_name, expected):
link = None # Only used for reporting.
version = egg_info_matches(egg_info, search_name, link)
def test_egg_info_matches(egg_info, canonical_name, expected):
version = _egg_info_matches(egg_info, canonical_name)
assert version == expected


Expand Down
20 changes: 20 additions & 0 deletions tests/unit/test_wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,26 @@
from tests.lib import DATA_DIR


@pytest.mark.parametrize(
"s, expected",
[
# Trivial.
("pip-18.0", True),
# Ambiguous.
("foo-2-2", True),
("im-valid", True),
# Invalid.
("invalid", False),
("im_invalid", False),
],
)
def test_contains_egg_info(s, expected):
result = wheel._contains_egg_info(s)
assert result == expected


@pytest.mark.parametrize("console_scripts",
["pip = pip._internal.main:pip",
"pip:pip = pip._internal.main:pip"])
Expand Down

0 comments on commit 5c93250

Please sign in to comment.