From 73c3c787abbe1ca71b900f11bb880e1de09c6d2a Mon Sep 17 00:00:00 2001 From: John Sirois Date: Sun, 13 Sep 2020 11:37:42 -0700 Subject: [PATCH] Support tests for BinaryPaths. (#10770) A BinaryPathRequest can now include a test to run against each found binary to validate it works and optionally fingerprint it. Eventually fingerprinting the binary contents should be performed automatically and the optional test fingerprint mixed in with it when present as tracked by #10769. We leverage testing found binaries in PexEnvironment to ensure we find only interpreter paths compatible with Pex bootstrapping. This plugs an existing hole not yet encountered in the wild where a Python 2.6 binary (for example) could be chosen and then PEX file bootstrapping fail as a result. We additionally fingerprint interpreters passing the version range test to ensure we detect interpreter upgrades and pyenv shim switches. Even with the automatic hashing of binaries tracked in #10769 working, we'd still need to do this in the pyenv shim case since the same shim script can redirect to different interpreters depending on configuration external to the shim script. --- .../python/util_rules/pex_environment.py | 39 ++++++++- src/python/pants/engine/desktop.py | 6 +- src/python/pants/engine/process.py | 82 +++++++++++++++++-- 3 files changed, 113 insertions(+), 14 deletions(-) diff --git a/src/python/pants/backend/python/util_rules/pex_environment.py b/src/python/pants/backend/python/util_rules/pex_environment.py index 41781eca545..fa0eca77a76 100644 --- a/src/python/pants/backend/python/util_rules/pex_environment.py +++ b/src/python/pants/backend/python/util_rules/pex_environment.py @@ -3,6 +3,7 @@ import os from dataclasses import dataclass +from textwrap import dedent from typing import Iterable, Mapping, Optional, Tuple, cast from pants.core.util_rules import subprocess_environment @@ -132,7 +133,7 @@ async def find_pex_python( pex_runtime_env: PexRuntimeEnvironment, subprocess_env_vars: SubprocessEnvironmentVars, ) -> PexEnvironment: - # PEX files are compatible with bootstrapping via python2.7 or python 3.5+. The bootstrap + # PEX files are compatible with bootstrapping via Python 2.7 or Python 3.5+. The bootstrap # code will then re-exec itself if the underlying PEX user code needs a more specific python # interpreter. As such, we look for many Pythons usable by the PEX bootstrap code here for # maximum flexibility. @@ -141,7 +142,39 @@ async def find_pex_python( Get( BinaryPaths, BinaryPathRequest( - search_path=python_setup.interpreter_search_paths, binary_name=binary_name + search_path=python_setup.interpreter_search_paths, + binary_name=binary_name, + test_args=[ + "-c", + # N.B.: The following code snippet must be compatible with Python 2.7 and + # Python 3.5+. + dedent( + """\ + import sys + + major, minor = sys.version_info[:2] + if (major, minor) == (2, 7) or (major == 3 and minor >= 5): + # Here we hash the underlying python interpreter executable to + # ensure we detect changes in the real interpreter that might + # otherwise be masked by pyenv shim scripts found on the search + # path. Naively, just printing out the full version_info would be + # enough, but that does not account for supported abi changes (e.g.: + # a pyenv switch from a py27mu interpreter to a py27m interpreter. + import hashlib + hasher = hashlib.sha256() + with open(sys.executable, "rb") as fp: + # We pick 8192 for efficiency of reads and fingerprint updates + # (writes) since it's a common OS buffer size and an even + # multiple of the hash block size. + for chunk in iter(lambda: fp.read(8192), b""): + hasher.update(chunk) + sys.stdout.write(hasher.hexdigest()) + sys.exit(0) + else: + sys.exit(1) + """ + ), + ], ), ) for binary_name in pex_runtime_env.bootstrap_interpreter_names @@ -151,7 +184,7 @@ async def find_pex_python( def first_python_binary() -> Optional[str]: for binary_paths in all_python_binary_paths: if binary_paths.first_path: - return binary_paths.first_path + return binary_paths.first_path.path return None return PexEnvironment( diff --git a/src/python/pants/engine/desktop.py b/src/python/pants/engine/desktop.py index 88311f1f114..fc82e88d0cf 100644 --- a/src/python/pants/engine/desktop.py +++ b/src/python/pants/engine/desktop.py @@ -50,13 +50,15 @@ async def find_open_program(request: OpenFilesRequest, plat: Platform) -> OpenFi if plat == Platform.darwin: processes = [ InteractiveProcess( - argv=(open_program_paths.first_path, *(str(f) for f in request.files)), + argv=(open_program_paths.first_path.path, *(str(f) for f in request.files)), run_in_workspace=True, ) ] else: processes = [ - InteractiveProcess(argv=(open_program_paths.first_path, str(f)), run_in_workspace=True) + InteractiveProcess( + argv=(open_program_paths.first_path.path, str(f)), run_in_workspace=True + ) for f in request.files ] diff --git a/src/python/pants/engine/process.py b/src/python/pants/engine/process.py index cdea5e870e9..57330036b9d 100644 --- a/src/python/pants/engine/process.py +++ b/src/python/pants/engine/process.py @@ -2,6 +2,7 @@ # Licensed under the Apache License, Version 2.0 (see LICENSE). import dataclasses +import hashlib import logging from dataclasses import dataclass from textwrap import dedent @@ -11,6 +12,7 @@ from pants.base.exception_sink import ExceptionSink from pants.engine.engine_aware import EngineAwareReturnType from pants.engine.fs import EMPTY_DIGEST, CreateDigest, Digest, FileContent +from pants.engine.internals.selectors import MultiGet from pants.engine.internals.uuid import UUIDRequest from pants.engine.platform import Platform, PlatformConstraint from pants.engine.rules import Get, collect_rules, rule, side_effecting @@ -314,23 +316,64 @@ def run(self, request: InteractiveProcess) -> InteractiveProcessResult: @frozen_after_init @dataclass(unsafe_hash=True) class BinaryPathRequest: + """Request to find a binary of a given name. + + If `test_args` are specified all binaries that are found will be executed with these args and + only those binaries whose test executions exit with return code 0 will be retained. + Additionally, if test execution includes stdout content, that will be used to fingerprint the + binary path so that upgrades and downgrades can be detected. A reasonable test for many programs + might be to pass `["--version"]` for `test_args` since it will both ensure the program runs and + also produce stdout text that changes upon upgrade or downgrade of the binary at the discovered + path. + """ + search_path: Tuple[str, ...] binary_name: str + test_args: Optional[Tuple[str, ...]] - def __init__(self, *, search_path: Iterable[str], binary_name: str) -> None: + def __init__( + self, + *, + search_path: Iterable[str], + binary_name: str, + test_args: Optional[Iterable[str]] = None, + ) -> None: self.search_path = tuple(OrderedSet(search_path)) self.binary_name = binary_name + self.test_args = tuple(test_args or ()) + + +@frozen_after_init +@dataclass(unsafe_hash=True) +class BinaryPath: + path: str + fingerprint: str + + def __init__(self, path: str, fingerprint: Optional[str] = None) -> None: + self.path = path + self.fingerprint = self._fingerprint() if fingerprint is None else fingerprint + + @staticmethod + def _fingerprint(content: Optional[Union[bytes, bytearray, memoryview]] = None) -> str: + hasher = hashlib.sha256() if content is None else hashlib.sha256(content) + return hasher.hexdigest() + + @classmethod + def fingerprinted( + cls, path: str, representative_content: Union[bytes, bytearray, memoryview] + ) -> "BinaryPath": + return cls(path, fingerprint=cls._fingerprint(representative_content)) @frozen_after_init @dataclass(unsafe_hash=True) class BinaryPaths(EngineAwareReturnType): binary_name: str - paths: Tuple[str, ...] + paths: Tuple[BinaryPath, ...] - def __init__(self, binary_name: str, paths: Iterable[str]): + def __init__(self, binary_name: str, paths: Optional[Iterable[BinaryPath]] = None): self.binary_name = binary_name - self.paths = tuple(OrderedSet(paths)) + self.paths = tuple(OrderedSet(paths) if paths else ()) def message(self) -> str: if not self.paths: @@ -341,7 +384,7 @@ def message(self) -> str: return found_msg @property - def first_path(self) -> Optional[str]: + def first_path(self) -> Optional[BinaryPath]: """Return the first path to the binary that was discovered, if any.""" return next(iter(self.paths), None) @@ -398,7 +441,6 @@ async def find_binary(request: BinaryPathRequest) -> BinaryPaths: CreateDigest([FileContent(script_path, script_content.encode(), is_executable=True)]), ) - paths = [] search_path = create_path_env_var(request.search_path) result = await Get( FallibleProcessResult, @@ -416,10 +458,32 @@ async def find_binary(request: BinaryPathRequest) -> BinaryPaths: ) ), ) - if result.exit_code == 0: - paths.extend(result.stdout.decode().splitlines()) - return BinaryPaths(binary_name=request.binary_name, paths=paths) + binary_paths = BinaryPaths(binary_name=request.binary_name) + if result.exit_code != 0: + return binary_paths + + found_paths = result.stdout.decode().splitlines() + if not request.test_args: + return dataclasses.replace(binary_paths, paths=[BinaryPath(path) for path in found_paths]) + + results = await MultiGet( + Get( + FallibleProcessResult, + UncacheableProcess( + Process(argv=[path, *request.test_args], description=f"Test binary {path}.") + ), + ) + for path in found_paths + ) + return dataclasses.replace( + binary_paths, + paths=[ + BinaryPath.fingerprinted(path, result.stdout) + for path, result in zip(found_paths, results) + if result.exit_code == 0 + ], + ) def rules():