Skip to content

Commit

Permalink
Handle locking all direct reference URL forms. (#2060)
Browse files Browse the repository at this point in the history
Previously, it was assumed that direct reference URLs were either VCS
URLs or else wheel or sdist URLs. This neglected two remaining cases,
both of which failed to lock with better or worse error messages. The
two missed cases were:
1. URLs of source archives not conforming to the sdist quasi-standard
   naming convention of `<project name>-<version>.{.tar.gz,.zip}`.
2. Local `file://` URLs pointing at project directories.

A notable case of the 1st are project archives provided by GitHub.
A notable need for the 2nd case comes from Pants where Pip proprietary
requirement strings are not handled (e.g.: `/path/to/project`) and a
direct reference URL must be used instead (e.g.: `projectname @
file:///path/to/project`).

Fixes #2057
  • Loading branch information
jsirois committed Feb 23, 2023
1 parent 660b66d commit 93e904a
Show file tree
Hide file tree
Showing 13 changed files with 513 additions and 243 deletions.
5 changes: 3 additions & 2 deletions pex/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,9 @@ def _chmod(self, info, path):
# This magic works to extract perm bits from the 32 bit external file attributes field for
# unix-created zip files, for the layout, see:
# https://www.forensicswiki.org/wiki/ZIP#External_file_attributes
attr = info.external_attr >> 16
os.chmod(path, attr)
if info.external_attr > 0xFFFF:
attr = info.external_attr >> 16
os.chmod(path, attr)


@contextlib.contextmanager
Expand Down
6 changes: 4 additions & 2 deletions pex/dist_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,8 +291,10 @@ def from_filename(cls, path):
# characters and dashes.
fname = _strip_sdist_path(path)
if fname is not None:
project_name, version = fname.rsplit("-", 1)
return cls(project_name=project_name, version=version)
components = fname.rsplit("-", 1)
if len(components) == 2:
project_name, version = components
return cls(project_name=project_name, version=version)

raise UnrecognizedDistributionFormat(
"The distribution at path {!r} does not have a file name matching known sdist or wheel "
Expand Down
31 changes: 5 additions & 26 deletions pex/pip/download_observer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from __future__ import absolute_import

from pex.pip.log_analyzer import LogAnalyzer
from pex.typing import TYPE_CHECKING, Generic
from pex.typing import TYPE_CHECKING

if TYPE_CHECKING:
from typing import Iterable, Mapping, Optional, Text
Expand All @@ -21,28 +21,7 @@ class Patch(object):
env = attr.ib(factory=dict) # type: Mapping[str, str]


if TYPE_CHECKING:
from typing import TypeVar

_L = TypeVar("_L", bound=LogAnalyzer)


class DownloadObserver(Generic["_L"]):
def __init__(
self,
analyzer, # type: _L
patch=Patch(), # type: Patch
):
# type: (...) -> None
self._analyzer = analyzer
self._patch = patch

@property
def analyzer(self):
# type: () -> _L
return self._analyzer

@property
def patch(self):
# type: () -> Patch
return self._patch
@attr.s(frozen=True)
class DownloadObserver(object):
analyzer = attr.ib() # type: Optional[LogAnalyzer]
patch = attr.ib() # type: Patch
3 changes: 2 additions & 1 deletion pex/pip/tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,8 @@ def spawn_download_distributions(
code = None # type: Optional[Text]
for obs in (foreign_platform_observer, observer):
if obs:
log_analyzers.append(obs.analyzer)
if obs.analyzer:
log_analyzers.append(obs.analyzer)
download_cmd.extend(obs.patch.args)
extra_env.update(obs.patch.env)
code = code or obs.patch.code
Expand Down
10 changes: 5 additions & 5 deletions pex/pip/vcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@

from __future__ import absolute_import

import hashlib
import os
import re

from pex import hashing
from pex.common import filter_pyc_dirs, filter_pyc_files, open_zip, temporary_dir
from pex.hashing import Sha256
from pex.pep_440 import Version
from pex.pep_503 import ProjectName
from pex.requirements import VCS
Expand All @@ -18,7 +18,7 @@
from pex.typing import TYPE_CHECKING

if TYPE_CHECKING:
from typing import Union
from typing import Tuple, Union

from pex.hashing import HintedDigest

Expand Down Expand Up @@ -61,16 +61,16 @@ def fingerprint_downloaded_vcs_archive(
version, # type: str
vcs, # type: VCS.Value
):
# type: (...) -> Fingerprint
# type: (...) -> Tuple[Fingerprint, str]

archive_path = try_(
_find_built_source_dist(
build_dir=download_dir, project_name=ProjectName(project_name), version=Version(version)
)
)
digest = hashlib.sha256()
digest = Sha256()
digest_vcs_archive(archive_path=archive_path, vcs=vcs, digest=digest)
return Fingerprint(algorithm=digest.name, hash=digest.hexdigest())
return Fingerprint.from_digest(digest), archive_path


def digest_vcs_archive(
Expand Down
24 changes: 6 additions & 18 deletions pex/resolve/downloads.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,13 @@
from pex.atomic_directory import atomic_directory
from pex.common import safe_mkdir, safe_mkdtemp
from pex.compatibility import unquote, urlparse
from pex.fetcher import URLFetcher
from pex.hashing import Sha256
from pex.jobs import Job, Raise, SpawnedJob, execute_parallel
from pex.pip.download_observer import DownloadObserver
from pex.pip.installation import get_pip
from pex.pip.tool import PackageIndexConfiguration, Pip
from pex.requirements import parse_requirement_string
from pex.resolve import locker
from pex.resolve.locked_resolve import Artifact, FileArtifact, LockConfiguration, LockStyle
from pex.resolve.pep_691.fingerprint_service import FingerprintService
from pex.resolve.resolved_requirement import Fingerprint, PartialArtifact
from pex.resolve.resolvers import Resolver
from pex.result import Error
Expand Down Expand Up @@ -75,7 +73,7 @@ def _fingerprint_and_move(path):
with atomic_directory(target_dir) as atomic_dir:
if not atomic_dir.is_finalized():
shutil.move(path, os.path.join(atomic_dir.work_dir, os.path.basename(path)))
return Fingerprint(algorithm=fingerprint.algorithm, hash=fingerprint)
return Fingerprint.from_hashing_fingerprint(fingerprint)

@staticmethod
def _create_file_artifact(
Expand Down Expand Up @@ -110,19 +108,9 @@ def _download(
# care about wheel tags, environment markers or Requires-Python. The locker's download
# observer does just this for universal locks with no target system or requires python
# restrictions.
download_observer = locker.patch(
root_requirements=[parse_requirement_string(url)],
pip_version=self.package_index_configuration.pip_version,
resolver=self.resolver,
lock_configuration=LockConfiguration(style=LockStyle.UNIVERSAL),
download_dir=download_dir,
fingerprint_service=FingerprintService.create(
url_fetcher=URLFetcher(
network_configuration=self.package_index_configuration.network_configuration,
password_entries=self.package_index_configuration.password_entries,
),
max_parallel_jobs=self.max_parallel_jobs,
),
download_observer = DownloadObserver(
analyzer=None,
patch=locker.patch(lock_configuration=LockConfiguration(style=LockStyle.UNIVERSAL)),
)
return self.pip.spawn_download_distributions(
download_dir=download_dir,
Expand Down Expand Up @@ -158,7 +146,7 @@ def _download_and_fingerprint(self, url):

def _to_file_artifact(self, artifact):
# type: (PartialArtifact) -> SpawnedJob[FileArtifact]
url = artifact.url
url = artifact.url.normalized_url
fingerprint = artifact.fingerprint
if fingerprint:
return SpawnedJob.completed(
Expand Down
80 changes: 64 additions & 16 deletions pex/resolve/locked_resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,13 @@
from pex.pep_503 import ProjectName
from pex.rank import Rank
from pex.requirements import VCS, VCSScheme, parse_scheme
from pex.resolve.resolved_requirement import Fingerprint, PartialArtifact, Pin, ResolvedRequirement
from pex.resolve.resolved_requirement import (
ArtifactURL,
Fingerprint,
PartialArtifact,
Pin,
ResolvedRequirement,
)
from pex.result import Error
from pex.sorted_tuple import SortedTuple
from pex.targets import LocalInterpreter, Target
Expand Down Expand Up @@ -95,29 +101,48 @@ def _validate_only_set_for_universal(
@attr.s(frozen=True)
class Artifact(object):
@classmethod
def from_url(
def from_artifact_url(
cls,
url, # type: str
artifact_url, # type: ArtifactURL
fingerprint, # type: Fingerprint
verified=False, # type: bool
):
# type: (...) -> Union[FileArtifact, LocalProjectArtifact, VCSArtifact]
url_info = urlparse.urlparse(url)
parsed_scheme = parse_scheme(url_info.scheme)
if isinstance(parsed_scheme, VCSScheme):
return VCSArtifact(
url=url, fingerprint=fingerprint, verified=verified, vcs=parsed_scheme.vcs
if isinstance(artifact_url.scheme, VCSScheme):
return VCSArtifact.from_artifact_url(
artifact_url=artifact_url,
fingerprint=fingerprint,
verified=verified,
)

path = unquote(url_info.path)
if "file" == parsed_scheme and os.path.isdir(path):
directory = os.path.normpath(path)
if "file" == artifact_url.scheme and os.path.isdir(artifact_url.path):
directory = os.path.normpath(artifact_url.path)
return LocalProjectArtifact(
url=url, fingerprint=fingerprint, verified=verified, directory=directory
url=artifact_url.normalized_url,
fingerprint=fingerprint,
verified=verified,
directory=directory,
)

filename = os.path.basename(path)
return FileArtifact(url=url, fingerprint=fingerprint, verified=verified, filename=filename)
filename = os.path.basename(artifact_url.path)
return FileArtifact(
url=artifact_url.normalized_url,
fingerprint=fingerprint,
verified=verified,
filename=filename,
)

@classmethod
def from_url(
cls,
url, # type: str
fingerprint, # type: Fingerprint
verified=False, # type: bool
):
# type: (...) -> Union[FileArtifact, LocalProjectArtifact, VCSArtifact]
return cls.from_artifact_url(
artifact_url=ArtifactURL.parse(url), fingerprint=fingerprint, verified=verified
)

url = attr.ib() # type: str
fingerprint = attr.ib() # type: Fingerprint
Expand Down Expand Up @@ -152,6 +177,29 @@ def is_source(self):

@attr.s(frozen=True)
class VCSArtifact(Artifact):
@classmethod
def from_artifact_url(
cls,
artifact_url, # type: ArtifactURL
fingerprint, # type: Fingerprint
verified=False, # type: bool
):
# type: (...) -> VCSArtifact
if not isinstance(artifact_url.scheme, VCSScheme):
raise ValueError(
"The given artifact URL is not that of a VCS artifact: {url}".format(
url=artifact_url.raw_url
)
)
return cls(
# N.B.: We need the raw URL in order to have access to the fragment needed for
# `as_unparsed_requirement`.
url=artifact_url.raw_url,
fingerprint=fingerprint,
verified=verified,
vcs=artifact_url.scheme.vcs,
)

vcs = attr.ib() # type: VCS.Value

@property
Expand Down Expand Up @@ -447,8 +495,8 @@ def resolve_fingerprint(partial_artifact):
to_map="\n".join(map(str, artifacts_to_fingerprint)),
)
)
return Artifact.from_url(
url=partial_artifact.url,
return Artifact.from_artifact_url(
artifact_url=partial_artifact.url,
fingerprint=partial_artifact.fingerprint,
verified=partial_artifact.verified,
)
Expand Down
Loading

0 comments on commit 93e904a

Please sign in to comment.