diff --git a/src/python/pants/backend/python/util_rules/pex_environment.py b/src/python/pants/backend/python/util_rules/pex_environment.py index 41781eca545..fa0eca77a76 100644 --- a/src/python/pants/backend/python/util_rules/pex_environment.py +++ b/src/python/pants/backend/python/util_rules/pex_environment.py @@ -3,6 +3,7 @@ import os from dataclasses import dataclass +from textwrap import dedent from typing import Iterable, Mapping, Optional, Tuple, cast from pants.core.util_rules import subprocess_environment @@ -132,7 +133,7 @@ async def find_pex_python( pex_runtime_env: PexRuntimeEnvironment, subprocess_env_vars: SubprocessEnvironmentVars, ) -> PexEnvironment: - # PEX files are compatible with bootstrapping via python2.7 or python 3.5+. The bootstrap + # PEX files are compatible with bootstrapping via Python 2.7 or Python 3.5+. The bootstrap # code will then re-exec itself if the underlying PEX user code needs a more specific python # interpreter. As such, we look for many Pythons usable by the PEX bootstrap code here for # maximum flexibility. @@ -141,7 +142,39 @@ async def find_pex_python( Get( BinaryPaths, BinaryPathRequest( - search_path=python_setup.interpreter_search_paths, binary_name=binary_name + search_path=python_setup.interpreter_search_paths, + binary_name=binary_name, + test_args=[ + "-c", + # N.B.: The following code snippet must be compatible with Python 2.7 and + # Python 3.5+. + dedent( + """\ + import sys + + major, minor = sys.version_info[:2] + if (major, minor) == (2, 7) or (major == 3 and minor >= 5): + # Here we hash the underlying python interpreter executable to + # ensure we detect changes in the real interpreter that might + # otherwise be masked by pyenv shim scripts found on the search + # path. Naively, just printing out the full version_info would be + # enough, but that does not account for supported abi changes (e.g.: + # a pyenv switch from a py27mu interpreter to a py27m interpreter. + import hashlib + hasher = hashlib.sha256() + with open(sys.executable, "rb") as fp: + # We pick 8192 for efficiency of reads and fingerprint updates + # (writes) since it's a common OS buffer size and an even + # multiple of the hash block size. + for chunk in iter(lambda: fp.read(8192), b""): + hasher.update(chunk) + sys.stdout.write(hasher.hexdigest()) + sys.exit(0) + else: + sys.exit(1) + """ + ), + ], ), ) for binary_name in pex_runtime_env.bootstrap_interpreter_names @@ -151,7 +184,7 @@ async def find_pex_python( def first_python_binary() -> Optional[str]: for binary_paths in all_python_binary_paths: if binary_paths.first_path: - return binary_paths.first_path + return binary_paths.first_path.path return None return PexEnvironment( diff --git a/src/python/pants/engine/desktop.py b/src/python/pants/engine/desktop.py index 88311f1f114..fc82e88d0cf 100644 --- a/src/python/pants/engine/desktop.py +++ b/src/python/pants/engine/desktop.py @@ -50,13 +50,15 @@ async def find_open_program(request: OpenFilesRequest, plat: Platform) -> OpenFi if plat == Platform.darwin: processes = [ InteractiveProcess( - argv=(open_program_paths.first_path, *(str(f) for f in request.files)), + argv=(open_program_paths.first_path.path, *(str(f) for f in request.files)), run_in_workspace=True, ) ] else: processes = [ - InteractiveProcess(argv=(open_program_paths.first_path, str(f)), run_in_workspace=True) + InteractiveProcess( + argv=(open_program_paths.first_path.path, str(f)), run_in_workspace=True + ) for f in request.files ] diff --git a/src/python/pants/engine/process.py b/src/python/pants/engine/process.py index cdea5e870e9..57330036b9d 100644 --- a/src/python/pants/engine/process.py +++ b/src/python/pants/engine/process.py @@ -2,6 +2,7 @@ # Licensed under the Apache License, Version 2.0 (see LICENSE). import dataclasses +import hashlib import logging from dataclasses import dataclass from textwrap import dedent @@ -11,6 +12,7 @@ from pants.base.exception_sink import ExceptionSink from pants.engine.engine_aware import EngineAwareReturnType from pants.engine.fs import EMPTY_DIGEST, CreateDigest, Digest, FileContent +from pants.engine.internals.selectors import MultiGet from pants.engine.internals.uuid import UUIDRequest from pants.engine.platform import Platform, PlatformConstraint from pants.engine.rules import Get, collect_rules, rule, side_effecting @@ -314,23 +316,64 @@ def run(self, request: InteractiveProcess) -> InteractiveProcessResult: @frozen_after_init @dataclass(unsafe_hash=True) class BinaryPathRequest: + """Request to find a binary of a given name. + + If `test_args` are specified all binaries that are found will be executed with these args and + only those binaries whose test executions exit with return code 0 will be retained. + Additionally, if test execution includes stdout content, that will be used to fingerprint the + binary path so that upgrades and downgrades can be detected. A reasonable test for many programs + might be to pass `["--version"]` for `test_args` since it will both ensure the program runs and + also produce stdout text that changes upon upgrade or downgrade of the binary at the discovered + path. + """ + search_path: Tuple[str, ...] binary_name: str + test_args: Optional[Tuple[str, ...]] - def __init__(self, *, search_path: Iterable[str], binary_name: str) -> None: + def __init__( + self, + *, + search_path: Iterable[str], + binary_name: str, + test_args: Optional[Iterable[str]] = None, + ) -> None: self.search_path = tuple(OrderedSet(search_path)) self.binary_name = binary_name + self.test_args = tuple(test_args or ()) + + +@frozen_after_init +@dataclass(unsafe_hash=True) +class BinaryPath: + path: str + fingerprint: str + + def __init__(self, path: str, fingerprint: Optional[str] = None) -> None: + self.path = path + self.fingerprint = self._fingerprint() if fingerprint is None else fingerprint + + @staticmethod + def _fingerprint(content: Optional[Union[bytes, bytearray, memoryview]] = None) -> str: + hasher = hashlib.sha256() if content is None else hashlib.sha256(content) + return hasher.hexdigest() + + @classmethod + def fingerprinted( + cls, path: str, representative_content: Union[bytes, bytearray, memoryview] + ) -> "BinaryPath": + return cls(path, fingerprint=cls._fingerprint(representative_content)) @frozen_after_init @dataclass(unsafe_hash=True) class BinaryPaths(EngineAwareReturnType): binary_name: str - paths: Tuple[str, ...] + paths: Tuple[BinaryPath, ...] - def __init__(self, binary_name: str, paths: Iterable[str]): + def __init__(self, binary_name: str, paths: Optional[Iterable[BinaryPath]] = None): self.binary_name = binary_name - self.paths = tuple(OrderedSet(paths)) + self.paths = tuple(OrderedSet(paths) if paths else ()) def message(self) -> str: if not self.paths: @@ -341,7 +384,7 @@ def message(self) -> str: return found_msg @property - def first_path(self) -> Optional[str]: + def first_path(self) -> Optional[BinaryPath]: """Return the first path to the binary that was discovered, if any.""" return next(iter(self.paths), None) @@ -398,7 +441,6 @@ async def find_binary(request: BinaryPathRequest) -> BinaryPaths: CreateDigest([FileContent(script_path, script_content.encode(), is_executable=True)]), ) - paths = [] search_path = create_path_env_var(request.search_path) result = await Get( FallibleProcessResult, @@ -416,10 +458,32 @@ async def find_binary(request: BinaryPathRequest) -> BinaryPaths: ) ), ) - if result.exit_code == 0: - paths.extend(result.stdout.decode().splitlines()) - return BinaryPaths(binary_name=request.binary_name, paths=paths) + binary_paths = BinaryPaths(binary_name=request.binary_name) + if result.exit_code != 0: + return binary_paths + + found_paths = result.stdout.decode().splitlines() + if not request.test_args: + return dataclasses.replace(binary_paths, paths=[BinaryPath(path) for path in found_paths]) + + results = await MultiGet( + Get( + FallibleProcessResult, + UncacheableProcess( + Process(argv=[path, *request.test_args], description=f"Test binary {path}.") + ), + ) + for path in found_paths + ) + return dataclasses.replace( + binary_paths, + paths=[ + BinaryPath.fingerprinted(path, result.stdout) + for path, result in zip(found_paths, results) + if result.exit_code == 0 + ], + ) def rules():