From aa0f3a31915577913902af640d5dbd77810a128d Mon Sep 17 00:00:00 2001 From: John Sirois Date: Thu, 10 Dec 2020 23:20:49 -0800 Subject: [PATCH] Escape venvs unless PEX_INHERIT_PATH is requested. Virtual environments created with --system-site-packages can break PEX sys.path scrubbing. Avoid this class of environment leaks by breaking out of venvs unless PEX_INHERIT_PATH has been requested. A test is added that fails for virtualenv-16.7.10 that fails without the fix. Fixes #1031 --- pex/interpreter_constraints.py | 14 ++- pex/pex_bootstrapper.py | 185 +++++++++++++++++---------------- pex/testing.py | 16 ++- tests/test_integration.py | 127 ++++++++++++++++++---- 4 files changed, 226 insertions(+), 116 deletions(-) diff --git a/pex/interpreter_constraints.py b/pex/interpreter_constraints.py index 1e558592a..897f0aa8a 100644 --- a/pex/interpreter_constraints.py +++ b/pex/interpreter_constraints.py @@ -5,8 +5,6 @@ from __future__ import absolute_import -import os - from pex.common import die from pex.interpreter import PythonIdentity, PythonInterpreter from pex.typing import TYPE_CHECKING @@ -37,17 +35,27 @@ def __init__( constraints, # type: Iterable[str] candidates, # type: Iterable[PythonInterpreter] failures, # type: Iterable[InterpreterIdentificationError] + preamble=None, # type: Optional[str] ): # type: (...) -> None """ :param constraints: The constraints that could not be satisfied. :param candidates: The python interpreters that were compared against the constraints. :param failures: Descriptions of the python interpreters that were unidentifiable. + :param preamble: An optional preamble for the exception message. """ self.constraints = tuple(constraints) self.candidates = tuple(candidates) self.failures = tuple(failures) - super(UnsatisfiableInterpreterConstraintsError, self).__init__(self.create_message()) + super(UnsatisfiableInterpreterConstraintsError, self).__init__( + self.create_message(preamble=preamble) + ) + + def with_preamble(self, preamble): + # type: (str) -> UnsatisfiableInterpreterConstraintsError + return UnsatisfiableInterpreterConstraintsError( + self.constraints, self.candidates, self.failures, preamble=preamble + ) def create_message(self, preamble=None): # type: (Optional[str]) -> str diff --git a/pex/pex_bootstrapper.py b/pex/pex_bootstrapper.py index 8dff38492..1a7c695db 100644 --- a/pex/pex_bootstrapper.py +++ b/pex/pex_bootstrapper.py @@ -8,6 +8,7 @@ from pex import pex_warnings from pex.common import die +from pex.inherit_path import InheritPath from pex.interpreter import PythonInterpreter from pex.interpreter_constraints import UnsatisfiableInterpreterConstraintsError from pex.orderedset import OrderedSet @@ -157,13 +158,13 @@ def _valid_interpreter(interp_or_error): def _select_path_interpreter( path=None, # type: Optional[str] valid_basenames=None, # type: Optional[Tuple[str, ...]] - compatibility_constraints=None, # type: Optional[Iterable[str]] + interpreter_constraints=None, # type: Optional[Iterable[str]] ): # type: (...) -> Optional[PythonInterpreter] candidate_interpreters_iter = iter_compatible_interpreters( path=path, valid_basenames=valid_basenames, - interpreter_constraints=compatibility_constraints, + interpreter_constraints=interpreter_constraints, ) current_interpreter = PythonInterpreter.get() # type: PythonInterpreter candidate_interpreters = [] @@ -182,46 +183,10 @@ def _select_path_interpreter( return PythonInterpreter.latest_release_of_min_compatible_version(candidate_interpreters) -def maybe_reexec_pex(compatibility_constraints=None): - # type: (Optional[Iterable[str]]) -> Union[None, NoReturn] - """Handle environment overrides for the Python interpreter to use when executing this pex. - - This function supports interpreter filtering based on interpreter constraints stored in PEX-INFO - metadata. If PEX_PYTHON is set it attempts to obtain the binary location of the interpreter - specified by PEX_PYTHON. If PEX_PYTHON_PATH is set, it attempts to search the path for a matching - interpreter in accordance with the interpreter constraints. If both variables are present, this - function gives precedence to PEX_PYTHON_PATH and errors out if no compatible interpreters can be - found on said path. - - If neither variable is set, we fall back to plain PEX execution using PATH searching or the - currently executing interpreter. If compatibility constraints are used, we match those constraints - against these interpreters. - - :param compatibility_constraints: optional list of requirements-style strings that constrain the - Python interpreter to re-exec this pex with. - """ - +def find_compatible_interpreter(interpreter_constraints=None): + # type: (Optional[Iterable[str]]) -> PythonInterpreter current_interpreter = PythonInterpreter.get() - target = None # type: Optional[PythonInterpreter] - - # NB: Used only for tests. - if "_PEX_EXEC_CHAIN" in os.environ: - flag_or_chain = os.environ.pop("_PEX_EXEC_CHAIN") - pex_exec_chain = [] if flag_or_chain == "1" else flag_or_chain.split(os.pathsep) - pex_exec_chain.append(current_interpreter.binary) - os.environ["_PEX_EXEC_CHAIN"] = os.pathsep.join(pex_exec_chain) - - current_interpreter_blessed_env_var = "_PEX_SHOULD_EXIT_BOOTSTRAP_REEXEC" - if os.environ.pop(current_interpreter_blessed_env_var, None): - # We've already been here and selected an interpreter. Continue to execution. - return None - - from . import pex - - pythonpath = pex.PEX.stash_pythonpath() - if pythonpath is not None: - TRACER.log("Stashed PYTHONPATH of {}".format(pythonpath), V=2) - + target = current_interpreter # type: Optional[PythonInterpreter] with TRACER.timed("Selecting runtime interpreter", V=3): if ENV.PEX_PYTHON and not ENV.PEX_PYTHON_PATH: # preserve PEX_PYTHON re-exec for backwards compatibility @@ -229,7 +194,7 @@ def maybe_reexec_pex(compatibility_constraints=None): # https://github.com/pantsbuild/pex/issues/431 TRACER.log( "Using PEX_PYTHON={} constrained by {}".format( - ENV.PEX_PYTHON, compatibility_constraints + ENV.PEX_PYTHON, interpreter_constraints ), V=3, ) @@ -237,78 +202,124 @@ def maybe_reexec_pex(compatibility_constraints=None): if os.path.isabs(ENV.PEX_PYTHON): target = _select_path_interpreter( path=ENV.PEX_PYTHON, - compatibility_constraints=compatibility_constraints, + interpreter_constraints=interpreter_constraints, ) else: target = _select_path_interpreter( valid_basenames=(os.path.basename(ENV.PEX_PYTHON),), - compatibility_constraints=compatibility_constraints, + interpreter_constraints=interpreter_constraints, ) except UnsatisfiableInterpreterConstraintsError as e: - die( - e.create_message( - "Failed to find a compatible PEX_PYTHON={pex_python}.".format( - pex_python=ENV.PEX_PYTHON - ) + raise e.with_preamble( + "Failed to find a compatible PEX_PYTHON={pex_python}.".format( + pex_python=ENV.PEX_PYTHON ) ) - elif ENV.PEX_PYTHON_PATH or compatibility_constraints: + elif ENV.PEX_PYTHON_PATH or interpreter_constraints: TRACER.log( "Using {path} constrained by {constraints}".format( path="PEX_PYTHON_PATH={}".format(ENV.PEX_PYTHON_PATH) if ENV.PEX_PYTHON_PATH else "$PATH", - constraints=compatibility_constraints, + constraints=interpreter_constraints, ), V=3, ) try: target = _select_path_interpreter( - path=ENV.PEX_PYTHON_PATH, compatibility_constraints=compatibility_constraints + path=ENV.PEX_PYTHON_PATH, interpreter_constraints=interpreter_constraints ) except UnsatisfiableInterpreterConstraintsError as e: - die( - e.create_message( - "Failed to find compatible interpreter on path {path}.".format( - path=ENV.PEX_PYTHON_PATH or os.getenv("PATH") - ) + raise e.with_preamble( + "Failed to find compatible interpreter on path {path}.".format( + path=ENV.PEX_PYTHON_PATH or os.getenv("PATH") ) ) - elif pythonpath is None: - TRACER.log( - "Using the current interpreter {} since no constraints have been specified and " - "PYTHONPATH is not set.".format(sys.executable), - V=3, - ) - return None - else: - target = current_interpreter - - if not target: - # N.B.: This can only happen when PEX_PYTHON_PATH is set and compatibility_constraints is - # not set, but we handle all constraints generally for sanity sake. - constraints = [] - if ENV.PEX_PYTHON: - constraints.append("PEX_PYTHON={}".format(ENV.PEX_PYTHON)) - if ENV.PEX_PYTHON_PATH: - constraints.append("PEX_PYTHON_PATH={}".format(ENV.PEX_PYTHON_PATH)) - if compatibility_constraints: - constraints.extend( - "--interpreter-constraint={}".format(compatibility_constraint) - for compatibility_constraint in compatibility_constraints + + if target is None: + # N.B.: This can only happen when PEX_PYTHON_PATH is set and interpreter_constraints + # is empty, but we handle all constraints generally for sanity sake. + constraints = [] + if ENV.PEX_PYTHON: + constraints.append("PEX_PYTHON={}".format(ENV.PEX_PYTHON)) + if ENV.PEX_PYTHON_PATH: + constraints.append("PEX_PYTHON_PATH={}".format(ENV.PEX_PYTHON_PATH)) + if interpreter_constraints: + constraints.append( + "Version matches {}".format(" or ".join(interpreter_constraints)) + ) + raise UnsatisfiableInterpreterConstraintsError( + constraints=constraints, + candidates=[current_interpreter], + failures=[], + preamble="Could not find a compatible interpreter.", ) + return target - die( - "Failed to find an appropriate Python interpreter.\n" - "\n" - "Although the current interpreter is {python}, the following constraints exclude it:\n" - " {constraints}".format(python=sys.executable, constraints="\n ".join(constraints)) + +def maybe_reexec_pex(interpreter_constraints=None): + # type: (Optional[Iterable[str]]) -> Union[None, NoReturn] + """Handle environment overrides for the Python interpreter to use when executing this pex. + + This function supports interpreter filtering based on interpreter constraints stored in PEX-INFO + metadata. If PEX_PYTHON is set it attempts to obtain the binary location of the interpreter + specified by PEX_PYTHON. If PEX_PYTHON_PATH is set, it attempts to search the path for a + matching interpreter in accordance with the interpreter constraints. If both variables are + present, this function gives precedence to PEX_PYTHON_PATH and errors out if no compatible + interpreters can be found on said path. + + If neither variable is set, we fall back to plain PEX execution using PATH searching or the + currently executing interpreter. If compatibility constraints are used, we match those + constraints against these interpreters. + + :param interpreter_constraints: Optional list of requirements-style strings that constrain the + Python interpreter to re-exec this pex with. + """ + + current_interpreter = PythonInterpreter.get() + + # NB: Used only for tests. + if "_PEX_EXEC_CHAIN" in os.environ: + flag_or_chain = os.environ.pop("_PEX_EXEC_CHAIN") + pex_exec_chain = [] if flag_or_chain == "1" else flag_or_chain.split(os.pathsep) + pex_exec_chain.append(current_interpreter.binary) + os.environ["_PEX_EXEC_CHAIN"] = os.pathsep.join(pex_exec_chain) + + current_interpreter_blessed_env_var = "_PEX_SHOULD_EXIT_BOOTSTRAP_REEXEC" + if os.environ.pop(current_interpreter_blessed_env_var, None): + # We've already been here and selected an interpreter. Continue to execution. + return None + + try: + target = find_compatible_interpreter( + interpreter_constraints=interpreter_constraints, ) + except UnsatisfiableInterpreterConstraintsError as e: + die(str(e)) os.environ.pop("PEX_PYTHON", None) os.environ.pop("PEX_PYTHON_PATH", None) - if pythonpath is None and target == current_interpreter: + if ENV.PEX_INHERIT_PATH == InheritPath.FALSE: + # Now that we've found a compatible Python interpreter, make sure we resolve out of any + # virtual environments it may be contained in since virtual environments created with + # `--system-site-packages` foil PEX attempts to scrub the sys.path. + resolved = target.resolve_base_interpreter() + if resolved != target: + TRACER.log( + "Resolved base interpreter of {} from virtual environment at {}".format( + resolved, target.prefix + ), + V=3, + ) + target = resolved + + from . import pex + + pythonpath = pex.PEX.stash_pythonpath() + if pythonpath is not None: + TRACER.log("Stashed PYTHONPATH of {}".format(pythonpath), V=2) + elif target == current_interpreter: TRACER.log( "Using the current interpreter {} since it matches constraints and " "PYTHONPATH is not set.".format(sys.executable) @@ -323,13 +334,13 @@ def maybe_reexec_pex(compatibility_constraints=None): "sys.executable={python!r}, " "PEX_PYTHON={pex_python!r}, " "PEX_PYTHON_PATH={pex_python_path!r}, " - "COMPATIBILITY_CONSTRAINTS={compatibility_constraints!r}" + "interpreter_constraints={interpreter_constraints!r}" "{pythonpath}".format( cmdline=" ".join(cmdline), python=sys.executable, pex_python=ENV.PEX_PYTHON, pex_python_path=ENV.PEX_PYTHON_PATH, - compatibility_constraints=compatibility_constraints, + interpreter_constraints=interpreter_constraints, pythonpath=', (stashed) PYTHONPATH="{}"'.format(pythonpath) if pythonpath is not None else "", diff --git a/pex/testing.py b/pex/testing.py index f1ab10f29..f95150a40 100644 --- a/pex/testing.py +++ b/pex/testing.py @@ -479,17 +479,23 @@ def run_pyenv(args): return python, pip, run_pyenv -def ensure_python_venv(version, latest_pip=True): +def ensure_python_venv(version, latest_pip=True, system_site_packages=False): python, pip, _ = ensure_python_distribution(version) venv = safe_mkdtemp() if version in _ALL_PY3_VERSIONS: - subprocess.check_call([python, "-m", "venv", venv]) + args = [python, "-m", "venv", venv] + if system_site_packages: + args.append("--system-site-packages") + subprocess.check_call(args=args) else: - subprocess.check_call([pip, "install", "virtualenv==16.7.10"]) - subprocess.check_call([python, "-m", "virtualenv", venv]) + subprocess.check_call(args=[pip, "install", "virtualenv==16.7.10"]) + args = [python, "-m", "virtualenv", venv, "-q"] + if system_site_packages: + args.append("--system-site-packages") + subprocess.check_call(args=args) python, pip = tuple(os.path.join(venv, "bin", exe) for exe in ("python", "pip")) if latest_pip: - subprocess.check_call([pip, "install", "-U", "pip"]) + subprocess.check_call(args=[pip, "install", "-U", "pip"]) return python, pip diff --git a/tests/test_integration.py b/tests/test_integration.py index 81cb189e2..c635be63d 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1,6 +1,8 @@ # Copyright 2015 Pants project contributors (see CONTRIBUTORS.md). # Licensed under the Apache License, Version 2.0 (see LICENSE). +from __future__ import absolute_import, print_function + import errno import filecmp import functools @@ -33,15 +35,10 @@ from pex.executor import Executor from pex.interpreter import PythonInterpreter from pex.network_configuration import NetworkConfiguration +from pex.orderedset import OrderedSet from pex.pex_info import PexInfo from pex.pip import get_pip -from pex.requirements import ( - LogicalLine, - ReqInfo, - URLFetcher, - parse_requirement_file, - parse_requirements, -) +from pex.requirements import LogicalLine, ReqInfo, URLFetcher, parse_requirement_file from pex.testing import ( IS_PYPY, NOT_CPYTHON27, @@ -75,6 +72,7 @@ Iterable, Iterator, List, + MutableSet, Optional, Tuple, ) @@ -639,16 +637,16 @@ def test_plain_pex_exec_no_ppp_no_pp_no_constraints(): # type: () -> None with temporary_dir() as td: pex_out_path = os.path.join(td, "pex.pex") - env = make_env( - PEX_IGNORE_RCFILES="1", PATH=os.path.dirname(os.path.realpath(sys.executable)) - ) + env = make_env(PEX_IGNORE_RCFILES="1") res = run_pex_command(["--disable-cache", "-o", pex_out_path], env=env) res.assert_success() - stdin_payload = b"import os, sys; print(os.path.realpath(sys.executable)); sys.exit(0)" + stdin_payload = b"import os, sys; print(sys.executable); sys.exit(0)" stdout, rc = run_simple_pex(pex_out_path, stdin=stdin_payload, env=env) assert rc == 0 - assert os.path.realpath(sys.executable).encode() in stdout + assert ( + PythonInterpreter.get().resolve_base_interpreter().binary.encode() in stdout + ), "Expected the current interpreter to be used when no constraints were supplied." def test_pex_exec_with_pex_python_path_only(): @@ -1907,9 +1905,10 @@ def add_to_path(entry): PYTHONPATH=os.pathsep.join(pythonpath) if pythonpath else None, ) + initial_interpreter = PythonInterpreter.get() output = subprocess.check_output( [ - sys.executable, + initial_interpreter.binary, test_pex, "-c", "import json, os; print(json.dumps(os.environ.copy()))", @@ -1922,10 +1921,25 @@ def add_to_path(entry): assert "PEX_PYTHON_PATH" not in final_env assert "_PEX_SHOULD_EXIT_BOOTSTRAP_REEXEC" not in final_env - expected_exec_chain = [ - PythonInterpreter.from_binary(i).binary for i in [sys.executable] + (exec_chain or []) - ] - assert expected_exec_chain == final_env["_PEX_EXEC_CHAIN"].split(os.pathsep) + expected_exec_interpreters = [initial_interpreter] + if exec_chain: + expected_exec_interpreters.extend(PythonInterpreter.from_binary(b) for b in exec_chain) + final_interpreter = expected_exec_interpreters[-1] + if final_interpreter.is_venv: + # If the last interpreter in the chain is in a virtual environment, it should be fully + # resolved and re-exec'd against in order to escape the virtual environment since we're + # not setting PEX_INHERIT_PATH in these tests. + resolved = final_interpreter.resolve_base_interpreter() + if exec_chain: + # There is already an expected reason to re-exec; so no extra exec step is needed. + expected_exec_interpreters[-1] = resolved + else: + # The expected exec chain is just the initial_interpreter, but it turned out to be a + # venv which forces a re-exec. + expected_exec_interpreters.append(resolved) + expected_exec_chain = [i.binary for i in expected_exec_interpreters] + actual_exec_chain = final_env["_PEX_EXEC_CHAIN"].split(os.pathsep) + assert expected_exec_chain == actual_exec_chain def test_pex_no_reexec_no_constraints(): @@ -1940,17 +1954,15 @@ def test_pex_reexec_no_constraints_pythonpath_present(): def test_pex_no_reexec_constraints_match_current(): # type: () -> None - current_version = ".".join(str(component) for component in sys.version_info[0:3]) - _assert_exec_chain(interpreter_constraints=["=={}".format(current_version)]) + _assert_exec_chain(interpreter_constraints=[PythonInterpreter.get().identity.requirement]) def test_pex_reexec_constraints_match_current_pythonpath_present(): # type: () -> None - current_version = ".".join(str(component) for component in sys.version_info[0:3]) _assert_exec_chain( exec_chain=[sys.executable], pythonpath=["."], - interpreter_constraints=["=={}".format(current_version)], + interpreter_constraints=[PythonInterpreter.get().identity.requirement], ) @@ -2546,3 +2558,76 @@ def line( ReqInfo.create(line=line("translate>=3.2.1", 6), project_name="translate"), ReqInfo.create(line=line("protobuf>=3.11.3", 7), project_name="protobuf"), ] == list(reqs) + + +@pytest.mark.parametrize( + "py_version", + [ + pytest.param(PY27, id="virtualenv-16.7.10"), + pytest.param(PY36, id="pyvenv"), + ], +) +def test_issues_1031(py_version): + # type: (str) -> None + system_site_packages_venv, _ = ensure_python_venv( + py_version, latest_pip=False, system_site_packages=True + ) + standard_venv, _ = ensure_python_venv(py_version, latest_pip=False, system_site_packages=False) + + print_sys_path_code = "import os, sys; print('\\n'.join(map(os.path.realpath, sys.path)))" + + def get_sys_path(python): + # type: (str) -> MutableSet[str] + _, stdout, _ = PythonInterpreter.from_binary(python).execute( + args=["-c", print_sys_path_code] + ) + return OrderedSet(stdout.strip().splitlines()) + + system_site_packages_venv_sys_path = get_sys_path(system_site_packages_venv) + standard_venv_sys_path = get_sys_path(standard_venv) + + def venv_dir(python): + # type: (str) -> str + bin_dir = os.path.dirname(python) + venv_dir = os.path.dirname(bin_dir) + return os.path.realpath(venv_dir) + + system_site_packages = { + p + for p in (system_site_packages_venv_sys_path - standard_venv_sys_path) + if not p.startswith((venv_dir(system_site_packages_venv), venv_dir(standard_venv))) + } + assert len(system_site_packages) == 1, ( + "system_site_packages_venv_sys_path:\n" + "\t{}\n" + "standard_venv_sys_path:\n" + "\t{}\n" + "difference:\n" + "\t{}".format( + "\n\t".join(system_site_packages_venv_sys_path), + "\n\t".join(standard_venv_sys_path), + "\n\t".join(system_site_packages), + ) + ) + system_site_packages_path = system_site_packages.pop() + + def get_system_site_packages_pex_sys_path(**env): + # type: (**Any) -> MutableSet[str] + output, returncode = run_simple_pex_test( + body=print_sys_path_code, + interpreter=PythonInterpreter.from_binary(system_site_packages_venv), + env=make_env(**env), + ) + assert returncode == 0 + return OrderedSet(output.decode("utf-8").strip().splitlines()) + + assert system_site_packages_path not in get_system_site_packages_pex_sys_path() + assert system_site_packages_path not in get_system_site_packages_pex_sys_path( + PEX_INHERIT_PATH="false" + ) + assert system_site_packages_path in get_system_site_packages_pex_sys_path( + PEX_INHERIT_PATH="prefer" + ) + assert system_site_packages_path in get_system_site_packages_pex_sys_path( + PEX_INHERIT_PATH="fallback" + )