Skip to content

Commit

Permalink
Support tests for BinaryPaths. (#10770)
Browse files Browse the repository at this point in the history
A BinaryPathRequest can now include a test to run against each found
binary to validate it works and optionally fingerprint it. Eventually
fingerprinting the binary contents should be performed automatically
and the optional test fingerprint mixed in with it when present as
tracked by #10769.

We leverage testing found binaries in PexEnvironment to ensure we find
only interpreter paths compatible with Pex bootstrapping. This plugs an
existing hole not yet encountered in the wild where a Python 2.6 binary
(for example) could be chosen and then PEX file bootstrapping fail as a
result. We additionally fingerprint interpreters passing the version
range test to ensure we detect interpreter upgrades and pyenv shim
switches. Even with the automatic hashing of binaries tracked in #10769
working, we'd still need to do this in the pyenv shim case since the
same shim script can redirect to different interpreters depending on
configuration external to the shim script.
  • Loading branch information
jsirois committed Sep 13, 2020
1 parent 9d93bdc commit 73c3c78
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 14 deletions.
39 changes: 36 additions & 3 deletions src/python/pants/backend/python/util_rules/pex_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import os
from dataclasses import dataclass
from textwrap import dedent
from typing import Iterable, Mapping, Optional, Tuple, cast

from pants.core.util_rules import subprocess_environment
Expand Down Expand Up @@ -132,7 +133,7 @@ async def find_pex_python(
pex_runtime_env: PexRuntimeEnvironment,
subprocess_env_vars: SubprocessEnvironmentVars,
) -> PexEnvironment:
# PEX files are compatible with bootstrapping via python2.7 or python 3.5+. The bootstrap
# PEX files are compatible with bootstrapping via Python 2.7 or Python 3.5+. The bootstrap
# code will then re-exec itself if the underlying PEX user code needs a more specific python
# interpreter. As such, we look for many Pythons usable by the PEX bootstrap code here for
# maximum flexibility.
Expand All @@ -141,7 +142,39 @@ async def find_pex_python(
Get(
BinaryPaths,
BinaryPathRequest(
search_path=python_setup.interpreter_search_paths, binary_name=binary_name
search_path=python_setup.interpreter_search_paths,
binary_name=binary_name,
test_args=[
"-c",
# N.B.: The following code snippet must be compatible with Python 2.7 and
# Python 3.5+.
dedent(
"""\
import sys
major, minor = sys.version_info[:2]
if (major, minor) == (2, 7) or (major == 3 and minor >= 5):
# Here we hash the underlying python interpreter executable to
# ensure we detect changes in the real interpreter that might
# otherwise be masked by pyenv shim scripts found on the search
# path. Naively, just printing out the full version_info would be
# enough, but that does not account for supported abi changes (e.g.:
# a pyenv switch from a py27mu interpreter to a py27m interpreter.
import hashlib
hasher = hashlib.sha256()
with open(sys.executable, "rb") as fp:
# We pick 8192 for efficiency of reads and fingerprint updates
# (writes) since it's a common OS buffer size and an even
# multiple of the hash block size.
for chunk in iter(lambda: fp.read(8192), b""):
hasher.update(chunk)
sys.stdout.write(hasher.hexdigest())
sys.exit(0)
else:
sys.exit(1)
"""
),
],
),
)
for binary_name in pex_runtime_env.bootstrap_interpreter_names
Expand All @@ -151,7 +184,7 @@ async def find_pex_python(
def first_python_binary() -> Optional[str]:
for binary_paths in all_python_binary_paths:
if binary_paths.first_path:
return binary_paths.first_path
return binary_paths.first_path.path
return None

return PexEnvironment(
Expand Down
6 changes: 4 additions & 2 deletions src/python/pants/engine/desktop.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,15 @@ async def find_open_program(request: OpenFilesRequest, plat: Platform) -> OpenFi
if plat == Platform.darwin:
processes = [
InteractiveProcess(
argv=(open_program_paths.first_path, *(str(f) for f in request.files)),
argv=(open_program_paths.first_path.path, *(str(f) for f in request.files)),
run_in_workspace=True,
)
]
else:
processes = [
InteractiveProcess(argv=(open_program_paths.first_path, str(f)), run_in_workspace=True)
InteractiveProcess(
argv=(open_program_paths.first_path.path, str(f)), run_in_workspace=True
)
for f in request.files
]

Expand Down
82 changes: 73 additions & 9 deletions src/python/pants/engine/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import dataclasses
import hashlib
import logging
from dataclasses import dataclass
from textwrap import dedent
Expand All @@ -11,6 +12,7 @@
from pants.base.exception_sink import ExceptionSink
from pants.engine.engine_aware import EngineAwareReturnType
from pants.engine.fs import EMPTY_DIGEST, CreateDigest, Digest, FileContent
from pants.engine.internals.selectors import MultiGet
from pants.engine.internals.uuid import UUIDRequest
from pants.engine.platform import Platform, PlatformConstraint
from pants.engine.rules import Get, collect_rules, rule, side_effecting
Expand Down Expand Up @@ -314,23 +316,64 @@ def run(self, request: InteractiveProcess) -> InteractiveProcessResult:
@frozen_after_init
@dataclass(unsafe_hash=True)
class BinaryPathRequest:
"""Request to find a binary of a given name.
If `test_args` are specified all binaries that are found will be executed with these args and
only those binaries whose test executions exit with return code 0 will be retained.
Additionally, if test execution includes stdout content, that will be used to fingerprint the
binary path so that upgrades and downgrades can be detected. A reasonable test for many programs
might be to pass `["--version"]` for `test_args` since it will both ensure the program runs and
also produce stdout text that changes upon upgrade or downgrade of the binary at the discovered
path.
"""

search_path: Tuple[str, ...]
binary_name: str
test_args: Optional[Tuple[str, ...]]

def __init__(self, *, search_path: Iterable[str], binary_name: str) -> None:
def __init__(
self,
*,
search_path: Iterable[str],
binary_name: str,
test_args: Optional[Iterable[str]] = None,
) -> None:
self.search_path = tuple(OrderedSet(search_path))
self.binary_name = binary_name
self.test_args = tuple(test_args or ())


@frozen_after_init
@dataclass(unsafe_hash=True)
class BinaryPath:
path: str
fingerprint: str

def __init__(self, path: str, fingerprint: Optional[str] = None) -> None:
self.path = path
self.fingerprint = self._fingerprint() if fingerprint is None else fingerprint

@staticmethod
def _fingerprint(content: Optional[Union[bytes, bytearray, memoryview]] = None) -> str:
hasher = hashlib.sha256() if content is None else hashlib.sha256(content)
return hasher.hexdigest()

@classmethod
def fingerprinted(
cls, path: str, representative_content: Union[bytes, bytearray, memoryview]
) -> "BinaryPath":
return cls(path, fingerprint=cls._fingerprint(representative_content))


@frozen_after_init
@dataclass(unsafe_hash=True)
class BinaryPaths(EngineAwareReturnType):
binary_name: str
paths: Tuple[str, ...]
paths: Tuple[BinaryPath, ...]

def __init__(self, binary_name: str, paths: Iterable[str]):
def __init__(self, binary_name: str, paths: Optional[Iterable[BinaryPath]] = None):
self.binary_name = binary_name
self.paths = tuple(OrderedSet(paths))
self.paths = tuple(OrderedSet(paths) if paths else ())

def message(self) -> str:
if not self.paths:
Expand All @@ -341,7 +384,7 @@ def message(self) -> str:
return found_msg

@property
def first_path(self) -> Optional[str]:
def first_path(self) -> Optional[BinaryPath]:
"""Return the first path to the binary that was discovered, if any."""
return next(iter(self.paths), None)

Expand Down Expand Up @@ -398,7 +441,6 @@ async def find_binary(request: BinaryPathRequest) -> BinaryPaths:
CreateDigest([FileContent(script_path, script_content.encode(), is_executable=True)]),
)

paths = []
search_path = create_path_env_var(request.search_path)
result = await Get(
FallibleProcessResult,
Expand All @@ -416,10 +458,32 @@ async def find_binary(request: BinaryPathRequest) -> BinaryPaths:
)
),
)
if result.exit_code == 0:
paths.extend(result.stdout.decode().splitlines())

return BinaryPaths(binary_name=request.binary_name, paths=paths)
binary_paths = BinaryPaths(binary_name=request.binary_name)
if result.exit_code != 0:
return binary_paths

found_paths = result.stdout.decode().splitlines()
if not request.test_args:
return dataclasses.replace(binary_paths, paths=[BinaryPath(path) for path in found_paths])

results = await MultiGet(
Get(
FallibleProcessResult,
UncacheableProcess(
Process(argv=[path, *request.test_args], description=f"Test binary {path}.")
),
)
for path in found_paths
)
return dataclasses.replace(
binary_paths,
paths=[
BinaryPath.fingerprinted(path, result.stdout)
for path, result in zip(found_paths, results)
if result.exit_code == 0
],
)


def rules():
Expand Down

0 comments on commit 73c3c78

Please sign in to comment.