diff --git a/pex/common.py b/pex/common.py index 5df9a3192..8031243c7 100644 --- a/pex/common.py +++ b/pex/common.py @@ -213,8 +213,9 @@ def _chmod(self, info, path): # This magic works to extract perm bits from the 32 bit external file attributes field for # unix-created zip files, for the layout, see: # https://www.forensicswiki.org/wiki/ZIP#External_file_attributes - attr = info.external_attr >> 16 - os.chmod(path, attr) + if info.external_attr > 0xFFFF: + attr = info.external_attr >> 16 + os.chmod(path, attr) @contextlib.contextmanager diff --git a/pex/dist_metadata.py b/pex/dist_metadata.py index a8826f7b7..5f0cd071b 100644 --- a/pex/dist_metadata.py +++ b/pex/dist_metadata.py @@ -291,8 +291,10 @@ def from_filename(cls, path): # characters and dashes. fname = _strip_sdist_path(path) if fname is not None: - project_name, version = fname.rsplit("-", 1) - return cls(project_name=project_name, version=version) + components = fname.rsplit("-", 1) + if len(components) == 2: + project_name, version = components + return cls(project_name=project_name, version=version) raise UnrecognizedDistributionFormat( "The distribution at path {!r} does not have a file name matching known sdist or wheel " diff --git a/pex/pip/download_observer.py b/pex/pip/download_observer.py index dea9df414..67bbaebcb 100644 --- a/pex/pip/download_observer.py +++ b/pex/pip/download_observer.py @@ -4,7 +4,7 @@ from __future__ import absolute_import from pex.pip.log_analyzer import LogAnalyzer -from pex.typing import TYPE_CHECKING, Generic +from pex.typing import TYPE_CHECKING if TYPE_CHECKING: from typing import Iterable, Mapping, Optional, Text @@ -21,28 +21,7 @@ class Patch(object): env = attr.ib(factory=dict) # type: Mapping[str, str] -if TYPE_CHECKING: - from typing import TypeVar - - _L = TypeVar("_L", bound=LogAnalyzer) - - -class DownloadObserver(Generic["_L"]): - def __init__( - self, - analyzer, # type: _L - patch=Patch(), # type: Patch - ): - # type: (...) -> None - self._analyzer = analyzer - self._patch = patch - - @property - def analyzer(self): - # type: () -> _L - return self._analyzer - - @property - def patch(self): - # type: () -> Patch - return self._patch +@attr.s(frozen=True) +class DownloadObserver(object): + analyzer = attr.ib() # type: Optional[LogAnalyzer] + patch = attr.ib() # type: Patch diff --git a/pex/pip/tool.py b/pex/pip/tool.py index b9f153248..f07d9cc4e 100644 --- a/pex/pip/tool.py +++ b/pex/pip/tool.py @@ -475,7 +475,8 @@ def spawn_download_distributions( code = None # type: Optional[Text] for obs in (foreign_platform_observer, observer): if obs: - log_analyzers.append(obs.analyzer) + if obs.analyzer: + log_analyzers.append(obs.analyzer) download_cmd.extend(obs.patch.args) extra_env.update(obs.patch.env) code = code or obs.patch.code diff --git a/pex/pip/vcs.py b/pex/pip/vcs.py index 10770a4be..387f77790 100644 --- a/pex/pip/vcs.py +++ b/pex/pip/vcs.py @@ -3,12 +3,12 @@ from __future__ import absolute_import -import hashlib import os import re from pex import hashing from pex.common import filter_pyc_dirs, filter_pyc_files, open_zip, temporary_dir +from pex.hashing import Sha256 from pex.pep_440 import Version from pex.pep_503 import ProjectName from pex.requirements import VCS @@ -18,7 +18,7 @@ from pex.typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import Union + from typing import Tuple, Union from pex.hashing import HintedDigest @@ -61,16 +61,16 @@ def fingerprint_downloaded_vcs_archive( version, # type: str vcs, # type: VCS.Value ): - # type: (...) -> Fingerprint + # type: (...) -> Tuple[Fingerprint, str] archive_path = try_( _find_built_source_dist( build_dir=download_dir, project_name=ProjectName(project_name), version=Version(version) ) ) - digest = hashlib.sha256() + digest = Sha256() digest_vcs_archive(archive_path=archive_path, vcs=vcs, digest=digest) - return Fingerprint(algorithm=digest.name, hash=digest.hexdigest()) + return Fingerprint.from_digest(digest), archive_path def digest_vcs_archive( diff --git a/pex/resolve/downloads.py b/pex/resolve/downloads.py index 57cc0b911..cd2b21f5d 100644 --- a/pex/resolve/downloads.py +++ b/pex/resolve/downloads.py @@ -7,15 +7,13 @@ from pex.atomic_directory import atomic_directory from pex.common import safe_mkdir, safe_mkdtemp from pex.compatibility import unquote, urlparse -from pex.fetcher import URLFetcher from pex.hashing import Sha256 from pex.jobs import Job, Raise, SpawnedJob, execute_parallel +from pex.pip.download_observer import DownloadObserver from pex.pip.installation import get_pip from pex.pip.tool import PackageIndexConfiguration, Pip -from pex.requirements import parse_requirement_string from pex.resolve import locker from pex.resolve.locked_resolve import Artifact, FileArtifact, LockConfiguration, LockStyle -from pex.resolve.pep_691.fingerprint_service import FingerprintService from pex.resolve.resolved_requirement import Fingerprint, PartialArtifact from pex.resolve.resolvers import Resolver from pex.result import Error @@ -75,7 +73,7 @@ def _fingerprint_and_move(path): with atomic_directory(target_dir) as atomic_dir: if not atomic_dir.is_finalized(): shutil.move(path, os.path.join(atomic_dir.work_dir, os.path.basename(path))) - return Fingerprint(algorithm=fingerprint.algorithm, hash=fingerprint) + return Fingerprint.from_hashing_fingerprint(fingerprint) @staticmethod def _create_file_artifact( @@ -110,19 +108,9 @@ def _download( # care about wheel tags, environment markers or Requires-Python. The locker's download # observer does just this for universal locks with no target system or requires python # restrictions. - download_observer = locker.patch( - root_requirements=[parse_requirement_string(url)], - pip_version=self.package_index_configuration.pip_version, - resolver=self.resolver, - lock_configuration=LockConfiguration(style=LockStyle.UNIVERSAL), - download_dir=download_dir, - fingerprint_service=FingerprintService.create( - url_fetcher=URLFetcher( - network_configuration=self.package_index_configuration.network_configuration, - password_entries=self.package_index_configuration.password_entries, - ), - max_parallel_jobs=self.max_parallel_jobs, - ), + download_observer = DownloadObserver( + analyzer=None, + patch=locker.patch(lock_configuration=LockConfiguration(style=LockStyle.UNIVERSAL)), ) return self.pip.spawn_download_distributions( download_dir=download_dir, @@ -158,7 +146,7 @@ def _download_and_fingerprint(self, url): def _to_file_artifact(self, artifact): # type: (PartialArtifact) -> SpawnedJob[FileArtifact] - url = artifact.url + url = artifact.url.normalized_url fingerprint = artifact.fingerprint if fingerprint: return SpawnedJob.completed( diff --git a/pex/resolve/locked_resolve.py b/pex/resolve/locked_resolve.py index 2cf74bd34..9141c8501 100644 --- a/pex/resolve/locked_resolve.py +++ b/pex/resolve/locked_resolve.py @@ -16,7 +16,13 @@ from pex.pep_503 import ProjectName from pex.rank import Rank from pex.requirements import VCS, VCSScheme, parse_scheme -from pex.resolve.resolved_requirement import Fingerprint, PartialArtifact, Pin, ResolvedRequirement +from pex.resolve.resolved_requirement import ( + ArtifactURL, + Fingerprint, + PartialArtifact, + Pin, + ResolvedRequirement, +) from pex.result import Error from pex.sorted_tuple import SortedTuple from pex.targets import LocalInterpreter, Target @@ -95,29 +101,48 @@ def _validate_only_set_for_universal( @attr.s(frozen=True) class Artifact(object): @classmethod - def from_url( + def from_artifact_url( cls, - url, # type: str + artifact_url, # type: ArtifactURL fingerprint, # type: Fingerprint verified=False, # type: bool ): # type: (...) -> Union[FileArtifact, LocalProjectArtifact, VCSArtifact] - url_info = urlparse.urlparse(url) - parsed_scheme = parse_scheme(url_info.scheme) - if isinstance(parsed_scheme, VCSScheme): - return VCSArtifact( - url=url, fingerprint=fingerprint, verified=verified, vcs=parsed_scheme.vcs + if isinstance(artifact_url.scheme, VCSScheme): + return VCSArtifact.from_artifact_url( + artifact_url=artifact_url, + fingerprint=fingerprint, + verified=verified, ) - path = unquote(url_info.path) - if "file" == parsed_scheme and os.path.isdir(path): - directory = os.path.normpath(path) + if "file" == artifact_url.scheme and os.path.isdir(artifact_url.path): + directory = os.path.normpath(artifact_url.path) return LocalProjectArtifact( - url=url, fingerprint=fingerprint, verified=verified, directory=directory + url=artifact_url.normalized_url, + fingerprint=fingerprint, + verified=verified, + directory=directory, ) - filename = os.path.basename(path) - return FileArtifact(url=url, fingerprint=fingerprint, verified=verified, filename=filename) + filename = os.path.basename(artifact_url.path) + return FileArtifact( + url=artifact_url.normalized_url, + fingerprint=fingerprint, + verified=verified, + filename=filename, + ) + + @classmethod + def from_url( + cls, + url, # type: str + fingerprint, # type: Fingerprint + verified=False, # type: bool + ): + # type: (...) -> Union[FileArtifact, LocalProjectArtifact, VCSArtifact] + return cls.from_artifact_url( + artifact_url=ArtifactURL.parse(url), fingerprint=fingerprint, verified=verified + ) url = attr.ib() # type: str fingerprint = attr.ib() # type: Fingerprint @@ -152,6 +177,29 @@ def is_source(self): @attr.s(frozen=True) class VCSArtifact(Artifact): + @classmethod + def from_artifact_url( + cls, + artifact_url, # type: ArtifactURL + fingerprint, # type: Fingerprint + verified=False, # type: bool + ): + # type: (...) -> VCSArtifact + if not isinstance(artifact_url.scheme, VCSScheme): + raise ValueError( + "The given artifact URL is not that of a VCS artifact: {url}".format( + url=artifact_url.raw_url + ) + ) + return cls( + # N.B.: We need the raw URL in order to have access to the fragment needed for + # `as_unparsed_requirement`. + url=artifact_url.raw_url, + fingerprint=fingerprint, + verified=verified, + vcs=artifact_url.scheme.vcs, + ) + vcs = attr.ib() # type: VCS.Value @property @@ -447,8 +495,8 @@ def resolve_fingerprint(partial_artifact): to_map="\n".join(map(str, artifacts_to_fingerprint)), ) ) - return Artifact.from_url( - url=partial_artifact.url, + return Artifact.from_artifact_url( + artifact_url=partial_artifact.url, fingerprint=partial_artifact.fingerprint, verified=partial_artifact.verified, ) diff --git a/pex/resolve/locker.py b/pex/resolve/locker.py index fb0af62c2..e7cb51a54 100644 --- a/pex/resolve/locker.py +++ b/pex/resolve/locker.py @@ -8,25 +8,32 @@ import os import pkgutil import re -import sys -from collections import defaultdict +from collections import OrderedDict, defaultdict +from pex import hashing from pex.common import safe_mkdtemp from pex.compatibility import unquote, urlparse from pex.dist_metadata import ProjectNameAndVersion, Requirement +from pex.hashing import Sha256 from pex.interpreter_constraints import iter_compatible_versions from pex.orderedset import OrderedSet from pex.pep_440 import Version -from pex.pip.download_observer import DownloadObserver, Patch -from pex.pip.local_project import fingerprint_local_project +from pex.pip.download_observer import Patch +from pex.pip.local_project import digest_local_project from pex.pip.log_analyzer import LogAnalyzer from pex.pip.vcs import fingerprint_downloaded_vcs_archive from pex.pip.version import PipVersionValue -from pex.requirements import VCS, VCSRequirement, VCSScheme, parse_scheme +from pex.requirements import ArchiveScheme, VCSRequirement, VCSScheme, parse_scheme from pex.resolve.locked_resolve import LockConfiguration, LockStyle, TargetSystem from pex.resolve.pep_691.fingerprint_service import FingerprintService from pex.resolve.pep_691.model import Endpoint -from pex.resolve.resolved_requirement import Fingerprint, PartialArtifact, Pin, ResolvedRequirement +from pex.resolve.resolved_requirement import ( + ArtifactURL, + Fingerprint, + PartialArtifact, + Pin, + ResolvedRequirement, +) from pex.resolve.resolvers import Resolver from pex.tracer import TRACER from pex.typing import TYPE_CHECKING @@ -52,12 +59,6 @@ from pex.third_party import attr -@attr.s(frozen=True) -class _VCSPartialInfo(object): - vcs = attr.ib() # type: VCS.Value - via = attr.ib() # type: Tuple[str, ...] - - @attr.s(frozen=True) class LockResult(object): resolved_requirements = attr.ib() # type: Tuple[ResolvedRequirement, ...] @@ -204,6 +205,47 @@ def normalize_url(self, url): return str(credentialed_url) +class AnalyzeError(Exception): + """Indicates an error analyzing lock data.""" + + +@attr.s(frozen=True) +class ArtifactBuildResult(object): + url = attr.ib() # type: ArtifactURL + pin = attr.ib() # type: Pin + requirement = attr.ib() # type: Requirement + + +@attr.s(frozen=True) +class ArtifactBuildObserver(object): + _done_building_pattern = attr.ib() # type: Pattern + _artifact_url = attr.ib() # type: ArtifactURL + + def is_done_building(self, line): + # type: (str) -> bool + return self._done_building_pattern.search(line) is not None + + def build_result(self, line): + # type: (str) -> Optional[ArtifactBuildResult] + + match = re.search( + r"Source in .+ has version (?P[^\s]+), which satisfies requirement " + r"(?P.+) .*from {url}".format(url=re.escape(self._artifact_url.raw_url)), + line, + ) + if not match: + return None + + version = Version(match.group("version")) + requirement = Requirement.parse(match.group("requirement")) + pin = Pin(project_name=requirement.project_name, version=version) + return ArtifactBuildResult( + url=self._artifact_url, + pin=pin, + requirement=requirement, + ) + + class Locker(LogAnalyzer): def __init__( self, @@ -224,14 +266,15 @@ def __init__( self._fingerprint_service = fingerprint_service or FingerprintService() self._saved = set() # type: Set[Pin] + self._selected_path_to_pin = {} # type: Dict[str, Pin] self._resolved_requirements = [] # type: List[ResolvedRequirement] self._pep_691_endpoints = set() # type: Set[Endpoint] - self._links = defaultdict(OrderedSet) # type: DefaultDict[Pin, OrderedSet[PartialArtifact]] - self._done_building_re = None # type: Optional[Pattern] - self._source_built_re = None # type: Optional[Pattern] + self._links = defaultdict( + OrderedDict + ) # type: DefaultDict[Pin, OrderedDict[ArtifactURL, PartialArtifact]] + self._artifact_build_observer = None # type: Optional[ArtifactBuildObserver] self._local_projects = OrderedSet() # type: OrderedSet[str] - self._vcs_partial_info = None # type: Optional[_VCSPartialInfo] self._lock_result = None # type: Optional[LockResult] @property @@ -249,21 +292,20 @@ def should_collect(self, returncode): return returncode == 0 @staticmethod - def _extract_resolve_data(url): - # type: (str) -> Tuple[Pin, PartialArtifact] + def _extract_resolve_data(artifact_url): + # type: (ArtifactURL) -> Tuple[Pin, PartialArtifact] fingerprint = None # type: Optional[Fingerprint] - fingerprint_match = re.search(r"(?P[^#]+)#(?P[^=]+)=(?P.*)$", url) + fingerprint_match = re.search( + r"[^#]+#(?P[^=]+)=(?P.*)$", artifact_url.raw_url + ) if fingerprint_match: - url = fingerprint_match.group("url") algorithm = fingerprint_match.group("algorithm") hash_ = fingerprint_match.group("hash") fingerprint = Fingerprint(algorithm=algorithm, hash=hash_) - pin = Pin.canonicalize( - ProjectNameAndVersion.from_filename(unquote(urlparse.urlparse(url).path)) - ) - partial_artifact = PartialArtifact(url, fingerprint) + pin = Pin.canonicalize(ProjectNameAndVersion.from_filename(artifact_url.path)) + partial_artifact = PartialArtifact(artifact_url, fingerprint) return pin, partial_artifact def analyze(self, line): @@ -272,15 +314,16 @@ def analyze(self, line): # The log sequence for processing a resolved requirement is as follows (log lines irrelevant # to our purposes omitted): # - # 1.) "... Found link ..." + # 1.) "... Found link ..." # ... - # 1.) "... Found link ..." - # 2.) "... Added to build tracker ..." - # 3.) Lines related to extracting metadata from 's artifact - # * 4.) "... Source in has version , which satisfies requirement " - # " from ..." - # 5.) "... Removed from ... from build tracker ..." - # 6.) "... Saved / + # 1.) "... Found link ..." + # 2.) "... Added to build tracker ..." + # * 3.) Lines related to extracting metadata from if the selected + # distribution is an sdist in any form (VCS, local directory, source archive). + # * 3.5.) "... Source in has version , which satisfies requirement " + # " from ..." + # 4.) "... Removed from ... from build tracker ..." + # 5.) "... Saved / # The lines in section 3 can contain this same pattern of lines if the metadata extraction # proceeds via PEP-517 which recursively uses Pip to resolve build dependencies. We want to @@ -292,80 +335,75 @@ def analyze(self, line): # The section 4 line will be present for requirements that represent either local source # directories or VCS requirements and can be used to learn their version. - if self._done_building_re: - if self._done_building_re.search(line): - self._done_building_re = None - elif self._vcs_partial_info is not None: - match = re.search( - r"Source in .+ has version (?P[^\s]+), which satisfies requirement " - r"(?P.+) from (?P[^\s]+)(?: \(from .+)?$", - line, - ) - if match: - vcs_partial_info = self._vcs_partial_info - self._vcs_partial_info = None - - raw_requirement = match.group("requirement") - requirement = Requirement.parse(raw_requirement) - version = match.group("version") - - # VCS requirements are satisfied by a singular source; so we need not consult - # links collected in this round. - self._resolved_requirements.append( - ResolvedRequirement( - requirement=requirement, - pin=Pin( - project_name=requirement.project_name, version=Version(version) - ), - artifact=PartialArtifact( - url=self._vcs_url_manager.normalize_url(match.group("url")), - fingerprint=fingerprint_downloaded_vcs_archive( - download_dir=self._download_dir, - project_name=str(requirement.project_name), - version=version, - vcs=vcs_partial_info.vcs, - ), - verified=True, - ), - via=vcs_partial_info.via, - ) - ) - return self.Continue() + if self._artifact_build_observer: + if self._artifact_build_observer.is_done_building(line): + self._artifact_build_observer = None + return self.Continue() - if self._source_built_re: - match = self._source_built_re.search(line) - if match: - raw_requirement = match.group("requirement") - file_url = match.group("file_url") - self._done_building_re = re.compile( - r"Removed {requirement} from {file_url} (?:.* )?from build tracker".format( - requirement=re.escape(raw_requirement), file_url=re.escape(file_url) + build_result = self._artifact_build_observer.build_result(line) + if build_result: + artifact_url = build_result.url + if isinstance(artifact_url.scheme, VCSScheme): + source_fingerprint, archive_path = fingerprint_downloaded_vcs_archive( + download_dir=self._download_dir, + project_name=str(build_result.pin.project_name), + version=str(build_result.pin.version), + vcs=artifact_url.scheme.vcs, + ) + selected_path = os.path.basename(archive_path) + artifact_url = ArtifactURL.parse( + self._vcs_url_manager.normalize_url(artifact_url.raw_url) + ) + self._selected_path_to_pin[selected_path] = build_result.pin + elif isinstance(artifact_url.scheme, ArchiveScheme.Value): + selected_path = os.path.basename(artifact_url.path) + source_archive_path = os.path.join(self._download_dir, selected_path) + if not os.path.isfile(source_archive_path): + raise AnalyzeError( + "Failed to lock {artifact}. Could not obtain its content for " + "analysis.".format(artifact=artifact_url) + ) + digest = Sha256() + hashing.file_hash(source_archive_path, digest) + source_fingerprint = Fingerprint.from_digest(digest) + self._selected_path_to_pin[selected_path] = build_result.pin + elif "file" == artifact_url.scheme: + digest = Sha256() + if os.path.isfile(artifact_url.path): + hashing.file_hash(artifact_url.path, digest) + self._selected_path_to_pin[ + os.path.basename(artifact_url.path) + ] = build_result.pin + else: + digest_local_project( + directory=artifact_url.path, + digest=digest, + pip_version=self._pip_version, + resolver=self._resolver, + ) + self._local_projects.add(artifact_url.path) + self._saved.add(build_result.pin) + source_fingerprint = Fingerprint.from_digest(digest) + else: + raise AnalyzeError( + "Unexpected scheme {scheme!r} for artifact at {url}".format( + scheme=artifact_url.scheme, url=artifact_url + ) ) - ) - self._source_built_re = None - - requirement = Requirement.parse(raw_requirement) - version = match.group("version") - pin = Pin(project_name=requirement.project_name, version=Version(version)) + additional_artifacts = self._links[build_result.pin] + additional_artifacts.pop(artifact_url, None) - local_project_path = urlparse.urlparse(file_url).path - digest = fingerprint_local_project( - local_project_path, self._pip_version, self._resolver - ) - self._local_projects.add(local_project_path) self._resolved_requirements.append( ResolvedRequirement( - requirement=requirement, - pin=pin, + requirement=build_result.requirement, + pin=build_result.pin, artifact=PartialArtifact( - url=file_url, - fingerprint=Fingerprint(algorithm=digest.algorithm, hash=digest), - verified=True, + url=artifact_url, fingerprint=source_fingerprint, verified=True ), + additional_artifacts=tuple(additional_artifacts.values()), ) ) - self._saved.add(pin) return self.Continue() match = re.search( @@ -383,75 +421,63 @@ def analyze(self, line): return self.Continue() match = re.search( - r"Added (?P.+) from (?P[^\s]+) (?:\(from (?P.*)\) )?to build " - r"tracker", + r"Added (?P.+) from (?P[^\s]+) .*to build tracker", line, ) if match: raw_requirement = match.group("requirement") - url = match.group("url") - self._done_building_re = re.compile( - r"Removed {requirement} from {url} (?:.* )?from build tracker".format( - requirement=re.escape(raw_requirement), url=re.escape(url) - ) - ) - - from_ = match.group("from") - if from_: - via = tuple(from_.split("->")) - else: - via = () - - parsed_scheme = parse_scheme(urlparse.urlparse(url).scheme) - if isinstance(parsed_scheme, VCSScheme): - # We'll get the remaining information we need to record the resolved VCS requirement - # in a later log line; so just save what we have so far. - self._vcs_partial_info = _VCSPartialInfo(vcs=parsed_scheme.vcs, via=via) - else: + url = ArtifactURL.parse(match.group("url")) + if url.is_wheel: requirement = Requirement.parse(raw_requirement) - project_name_and_version, partial_artifact = self._extract_resolve_data(url) - - additional_artifacts = self._links[project_name_and_version] - additional_artifacts.discard(partial_artifact) + pin, partial_artifact = self._extract_resolve_data(url) + additional_artifacts = self._links[pin] + additional_artifacts.pop(url, None) self._resolved_requirements.append( ResolvedRequirement( requirement=requirement, - pin=project_name_and_version, + pin=pin, artifact=partial_artifact, - additional_artifacts=tuple(additional_artifacts), - via=via, + additional_artifacts=tuple(additional_artifacts.values()), ) ) + self._selected_path_to_pin[os.path.basename(url.path)] = pin + else: + self._artifact_build_observer = ArtifactBuildObserver( + done_building_pattern=re.compile( + r"Removed {requirement} from {url} (?:.* )?from build tracker".format( + requirement=re.escape(raw_requirement), url=re.escape(url.raw_url) + ) + ), + artifact_url=url, + ) return self.Continue() match = re.search(r"Added (?Pfile:.+) to build tracker", line) if match: file_url = match.group("file_url") - self._source_built_re = re.compile( - r"Source in .+ has version (?P.+), which satisfies requirement " - r"(?P.+) from (?P{file_url})".format( - file_url=re.escape(file_url) - ) + self._artifact_build_observer = ArtifactBuildObserver( + done_building_pattern=re.compile( + r"Removed .+ from {file_url} from build tracker".format( + file_url=re.escape(file_url) + ) + ), + artifact_url=ArtifactURL.parse(file_url), ) return self.Continue() match = re.search(r"Saved (?P.+)$", line) if match: - self._saved.add( - Pin.canonicalize( - ProjectNameAndVersion.from_filename(os.path.basename(match.group("file_path"))) - ) - ) + saved_path = match.group("file_path") + self._saved.add(self._selected_path_to_pin[os.path.basename(saved_path)]) return self.Continue() if self.style in (LockStyle.SOURCES, LockStyle.UNIVERSAL): match = re.search(r"Found link (?P[^\s]+)(?: \(from .*\))?, version: ", line) if match: - project_name_and_version, partial_artifact = self._extract_resolve_data( - match.group("url") - ) - self._links[project_name_and_version].add(partial_artifact) + url = ArtifactURL.parse(match.group("url")) + pin, partial_artifact = self._extract_resolve_data(url) + self._links[pin][url] = partial_artifact return self.Continue() return self.Continue() @@ -529,15 +555,8 @@ def lock_result(self): } -def patch( - root_requirements, # type: Iterable[ParsedRequirement] - pip_version, # type: PipVersionValue - resolver, # type: Resolver - lock_configuration, # type: LockConfiguration - download_dir, # type: str - fingerprint_service=None, # type: Optional[FingerprintService] -): - # type: (...) -> DownloadObserver[Locker] +def patch(lock_configuration): + # type: (LockConfiguration) -> Patch code = None # type: Optional[Text] env = {} # type: Dict[str, str] @@ -591,14 +610,4 @@ def patch( json.dump(target_systems, fp) env.update(_PEX_TARGET_SYSTEMS_FILE=fp.name) - return DownloadObserver( - analyzer=Locker( - root_requirements=root_requirements, - pip_version=pip_version, - resolver=resolver, - lock_configuration=lock_configuration, - download_dir=download_dir, - fingerprint_service=fingerprint_service, - ), - patch=Patch(code=code, env=env), - ) + return Patch(code=code, env=env) diff --git a/pex/resolve/lockfile/create.py b/pex/resolve/lockfile/create.py index 6806f7d44..9ca0dc356 100644 --- a/pex/resolve/lockfile/create.py +++ b/pex/resolve/lockfile/create.py @@ -55,6 +55,8 @@ from pex.hashing import HintedDigest from pex.requirements import ParsedRequirement + + AnyArtifact = Union[FileArtifact, LocalProjectArtifact, VCSArtifact] else: from pex.third_party import attr @@ -69,27 +71,34 @@ def create( ): # type: (...) -> CreateLockDownloadManager - file_artifacts_by_filename = {} # type: Dict[str, FileArtifact] - source_artifacts_by_pin = {} # type: Dict[Pin, Union[LocalProjectArtifact, VCSArtifact]] + file_artifacts_by_filename = {} # type: Dict[str, Tuple[FileArtifact, ProjectName]] + source_artifacts_by_pin = ( + {} + ) # type: Dict[Pin, Tuple[Union[LocalProjectArtifact, VCSArtifact], ProjectName]] for locked_resolve in locked_resolves: for locked_requirement in locked_resolve.locked_requirements: + pin = locked_requirement.pin + project_name = pin.project_name for artifact in locked_requirement.iter_artifacts(): if isinstance(artifact, FileArtifact): - file_artifacts_by_filename[artifact.filename] = artifact + file_artifacts_by_filename[artifact.filename] = (artifact, project_name) else: # N.B.: We know there is only ever one local project artifact for a given # locked local project requirement and likewise only one VCS artifact for a # given locked VCS requirement. - source_artifacts_by_pin[locked_requirement.pin] = artifact + source_artifacts_by_pin[locked_requirement.pin] = (artifact, project_name) path_by_artifact_and_project_name = {} # type: Dict[Tuple[Artifact, ProjectName], str] for root, _, files in os.walk(download_dir): for f in files: - pin = Pin.canonicalize(ProjectNameAndVersion.from_filename(f)) - artifact = file_artifacts_by_filename.get(f) or source_artifacts_by_pin[pin] - path_by_artifact_and_project_name[(artifact, pin.project_name)] = os.path.join( - root, f - ) + artifact_and_project_name = file_artifacts_by_filename.get( + f + ) # type: Optional[Tuple[AnyArtifact, ProjectName]] + if not artifact_and_project_name: + project_name_and_version = ProjectNameAndVersion.from_filename(f) + pin = Pin.canonicalize(project_name_and_version) + artifact_and_project_name = source_artifacts_by_pin[pin] + path_by_artifact_and_project_name[artifact_and_project_name] = os.path.join(root, f) return cls( path_by_artifact_and_project_name=path_by_artifact_and_project_name, pex_root=pex_root @@ -181,7 +190,7 @@ def observe_download( download_dir, # type: str ): # type: (...) -> DownloadObserver - patch = locker.patch( + analyzer = Locker( root_requirements=self.root_requirements, pip_version=self.package_index_configuration.pip_version, resolver=self.resolver, @@ -195,10 +204,12 @@ def observe_download( max_parallel_jobs=self.max_parallel_jobs, ), ) + patch = locker.patch(lock_configuration=self.lock_configuration) + observer = DownloadObserver(analyzer=analyzer, patch=patch) self._analysis.add( - _LockAnalysis(target=target, analyzer=patch.analyzer, download_dir=download_dir) + _LockAnalysis(target=target, analyzer=analyzer, download_dir=download_dir) ) - return patch + return observer def lock(self, downloaded): # type: (Downloaded) -> Tuple[LockedResolve, ...] diff --git a/pex/resolve/pep_691/fingerprint_service.py b/pex/resolve/pep_691/fingerprint_service.py index 4fa3c2716..90bac7b8e 100644 --- a/pex/resolve/pep_691/fingerprint_service.py +++ b/pex/resolve/pep_691/fingerprint_service.py @@ -150,7 +150,7 @@ def fingerprint( if artifact.fingerprint: yield artifact else: - artifacts_to_fingerprint[artifact.url] = artifact + artifacts_to_fingerprint[artifact.url.normalized_url] = artifact if not artifacts_to_fingerprint: return diff --git a/pex/resolve/resolved_requirement.py b/pex/resolve/resolved_requirement.py index 49a971777..cca7626f4 100644 --- a/pex/resolve/resolved_requirement.py +++ b/pex/resolve/resolved_requirement.py @@ -6,13 +6,16 @@ import hashlib from pex import hashing +from pex.compatibility import unquote, urlparse from pex.dist_metadata import ProjectNameAndVersion, Requirement +from pex.hashing import HashlibHasher from pex.pep_440 import Version from pex.pep_503 import ProjectName +from pex.requirements import ArchiveScheme, VCSScheme, parse_scheme from pex.typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import BinaryIO, Iterator, Optional, Tuple + from typing import BinaryIO, Iterator, Optional, Tuple, Union import attr # vendor:skip else: @@ -58,13 +61,56 @@ def from_stream( hashing.update_hash(filelike=stream, digest=digest) return cls(algorithm=algorithm, hash=digest.hexdigest()) + @classmethod + def from_digest(cls, digest): + # type: (HashlibHasher) -> Fingerprint + return cls.from_hashing_fingerprint(digest.hexdigest()) + + @classmethod + def from_hashing_fingerprint(cls, fingerprint): + # type: (hashing.Fingerprint) -> Fingerprint + return cls(algorithm=fingerprint.algorithm, hash=fingerprint) + algorithm = attr.ib() # type: str hash = attr.ib() # type: str +@attr.s(frozen=True) +class ArtifactURL(object): + @classmethod + def parse(cls, url): + # type: (str) -> ArtifactURL + url_info = urlparse.urlparse(url) + normalized_url = urlparse.urlunparse( + (url_info.scheme, url_info.netloc, url_info.path, "", "", "") + ) + return cls( + raw_url=url, + normalized_url=normalized_url, + scheme=parse_scheme(url_info.scheme) if url_info.scheme else None, + path=unquote(url_info.path), + ) + + raw_url = attr.ib(eq=False) # type: str + normalized_url = attr.ib() # type: str + scheme = attr.ib() # type: Optional[Union[str, ArchiveScheme.Value, VCSScheme]] + path = attr.ib(eq=False) # type: str + + @property + def is_wheel(self): + return self.path.endswith(".whl") + + +def _convert_url(value): + # type: (Union[str, ArtifactURL]) -> ArtifactURL + if isinstance(value, ArtifactURL): + return value + return ArtifactURL.parse(value) + + @attr.s(frozen=True) class PartialArtifact(object): - url = attr.ib() # type: str + url = attr.ib(converter=_convert_url) # type: ArtifactURL fingerprint = attr.ib(default=None) # type: Optional[Fingerprint] verified = attr.ib(default=False) # type: bool diff --git a/tests/integration/cli/commands/test_issue_2057.py b/tests/integration/cli/commands/test_issue_2057.py new file mode 100644 index 000000000..a2b0e523c --- /dev/null +++ b/tests/integration/cli/commands/test_issue_2057.py @@ -0,0 +1,184 @@ +# Copyright 2023 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +import os.path +import shutil +import subprocess +import tempfile +from textwrap import dedent + +import colors + +from pex.cli.testing import run_pex3 +from pex.resolve.lockfile import json_codec +from pex.testing import run_pex_command +from pex.typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Any + + +def test_pex_archive_direct_reference(tmpdir): + # type: (Any) -> None + + result = run_pex_command( + args=[ + "cowsay @ https://github.com/VaasuDevanS/cowsay-python/archive/v5.0.zip", + "-c", + "cowsay", + "--", + "Moo!", + ] + ) + result.assert_success() + assert "Moo!" in result.output + + +def test_lock_create_archive_direct_reference(tmpdir): + # type: (Any) -> None + + pex_root = os.path.join(str(tmpdir), "pex_root") + lock = os.path.join(str(tmpdir), "lock.json") + run_pex3( + "lock", + "create", + "--pex-root", + pex_root, + "cowsay @ https://github.com/VaasuDevanS/cowsay-python/archive/v5.0.zip", + "--indent", + "2", + "-o", + lock, + ).assert_success() + + def assert_create_and_run_pex_from_lock(): + # type: () -> None + result = run_pex_command( + args=[ + "--pex-root", + pex_root, + "--runtime-pex-root", + pex_root, + "--lock", + lock, + "-c", + "cowsay", + "--", + "Moo!", + ] + ) + result.assert_success() + assert "Moo!" in result.output + + assert_create_and_run_pex_from_lock() + shutil.rmtree(pex_root) + assert_create_and_run_pex_from_lock() + + +def test_lock_create_local_project_direct_reference(tmpdir): + # type: (Any) -> None + + clone_dir = os.path.join(str(tmpdir), "ansicolors") + subprocess.check_call(args=["git", "init", clone_dir]) + + ansicolors_1_1_8_sha = "c965f5b9103c5bd32a1572adb8024ebe83278fb0" + subprocess.check_call( + args=[ + "git", + "fetch", + "--depth", + "1", + "https://github.com/jonathaneunice/colors", + ansicolors_1_1_8_sha, + ], + cwd=clone_dir, + ) + subprocess.check_call(args=["git", "reset", "--hard", ansicolors_1_1_8_sha], cwd=clone_dir) + + pex_root = os.path.join(str(tmpdir), "pex_root") + lock = os.path.join(str(tmpdir), "lock.json") + run_pex3( + "lock", + "create", + "--pex-root", + pex_root, + "ansicolors @ file://{}".format(clone_dir), + "--indent", + "2", + "-o", + lock, + ).assert_success() + + def assert_create_and_run_pex_from_lock(): + # type: () -> None + result = run_pex_command( + args=[ + "--pex-root", + pex_root, + "--runtime-pex-root", + pex_root, + "--lock", + lock, + "--", + "-c", + "import colors; print(colors.yellow('Vogon Constructor Fleet!'))", + ] + ) + result.assert_success() + assert colors.yellow("Vogon Constructor Fleet!") == result.output.strip() + + assert_create_and_run_pex_from_lock() + shutil.rmtree(pex_root) + assert_create_and_run_pex_from_lock() + + with tempfile.NamedTemporaryFile() as fp: + fp.write( + dedent( + """\ + diff --git a/setup.py b/setup.py + index 0b58889..bdb7c90 100755 + --- a/setup.py + +++ b/setup.py + @@ -42,3 +42,4 @@ setup( + 'Topic :: Software Development :: Libraries :: Python Modules' + ] + ) + +# Changed + """ + ).encode("utf-8") + ) + fp.flush() + subprocess.check_call(args=["git", "apply", fp.name], cwd=clone_dir) + + # We patched the source but have a cached wheel built from it before the patch in + # ~/.pex/installed_wheels; so no "download" is performed. + assert_create_and_run_pex_from_lock() + + # But now we do need to "download" the project, build a wheel and install it. The hash check + # should fail. + shutil.rmtree(pex_root) + result = run_pex_command( + args=["--pex-root", pex_root, "--runtime-pex-root", pex_root, "--lock", lock] + ) + result.assert_failure() + + lockfile = json_codec.load(lockfile_path=lock) + assert 1 == len(lockfile.locked_resolves) + locked_resolve = lockfile.locked_resolves[0] + assert 1 == len(locked_resolve.locked_requirements) + locked_requirement = locked_resolve.locked_requirements[0] + assert ( + dedent( + """\ + There was 1 error downloading required artifacts: + 1. ansicolors 1.1.8 from file://{clone_dir} + Expected sha256 hash of {expected} when downloading ansicolors but hashed to + """ + ) + .format( + clone_dir=clone_dir, + expected=locked_requirement.artifact.fingerprint.hash, + ) + .strip() + in result.error + ), result.error diff --git a/tests/integration/test_downloads.py b/tests/integration/test_downloads.py index 3190da9b5..09cf69006 100644 --- a/tests/integration/test_downloads.py +++ b/tests/integration/test_downloads.py @@ -5,6 +5,7 @@ import pytest +from pex.pep_503 import ProjectName from pex.resolve.configured_resolver import ConfiguredResolver from pex.resolve.downloads import ArtifactDownloader from pex.resolve.locked_resolve import Artifact, FileArtifact