Skip to content

Commit

Permalink
Fix vendored attrs sys.path leak. (#2328)
Browse files Browse the repository at this point in the history
Whenever a given Python interpreter on a machine was 1st identified by
Pex during the PEX boot process, Pex's own vendored attrs would be
leaked onto the hermetic `sys.path` of that interpreter forevermore.
This would lead to Pex's vendored attrs not being scrubbed from the
`sys.path` during PEX boot handoff to user code.
  • Loading branch information
jsirois committed Jan 16, 2024
1 parent 23d4810 commit 4fb9444
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 8 deletions.
7 changes: 7 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Release Notes

## 2.1.159

This release brings a fix for leaks of Pex's vendored `attrs` onto the
`sys.path` of PEXes during boot in common usage scenarios.

* Fix vendored attrs `sys.path` leak. (#2328)

## 2.1.158

This release adds support for tab completion to all PEX repls running
Expand Down
51 changes: 44 additions & 7 deletions pex/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from pex import third_party
from pex.common import is_exe, safe_mkdtemp, safe_rmtree
from pex.compatibility import commonpath
from pex.executor import Executor
from pex.jobs import Job, Retain, SpawnedJob, execute_parallel
from pex.orderedset import OrderedSet
Expand Down Expand Up @@ -183,10 +184,23 @@ def get(cls, binary=None):
# vendored attrs distribution so that its `cache_hash=True` feature can work (see the
# bottom of pex/third_party/__init__.py where the vendor importer is installed). We ignore
# such adjoined `sys.path` entries to discover the true base interpreter `sys.path`.
pythonpath = frozenset(
os.environ.get("PYTHONPATH", "").split(os.pathsep) + list(third_party.exposed())
pythonpath = os.environ.get("PYTHONPATH")
internal_entries = frozenset(
(pythonpath.split(os.pathsep) if pythonpath else []) + list(third_party.exposed())
)
sys_path = OrderedSet(item for item in sys.path if item and item not in pythonpath)

def is_internal_entry(entry):
# type: (str) -> bool
if entry in internal_entries:
return True
if not os.path.isabs(entry):
return False
for internal_entry in internal_entries:
if internal_entry == commonpath((internal_entry, entry)):
return True
return False

sys_path = OrderedSet(entry for entry in sys.path if entry and not is_internal_entry(entry))

site_packages = OrderedSet(
path
Expand Down Expand Up @@ -227,12 +241,34 @@ def get(cls, binary=None):
configured_macosx_deployment_target=configured_macosx_deployment_target,
)

# Increment this integer version number when changing the encode / decode format or content.
_FORMAT_VERSION = 1

@classmethod
def decode(cls, encoded):
TRACER.log("creating PythonIdentity from encoded: %s" % encoded, V=9)
# type: (Text) -> PythonIdentity
TRACER.log("creating PythonIdentity from encoded: {encoded}".format(encoded=encoded), V=9)
values = json.loads(encoded)
if len(values) != 16:
raise cls.InvalidError("Invalid interpreter identity: %s" % encoded)
if len(values) != 17:
raise cls.InvalidError(
"Invalid interpreter identity: {encoded}".format(encoded=encoded)
)
try:
format_version = int(values.pop("__format_version__", "0"))
except ValueError as e:
raise cls.InvalidError(
"The PythonIdentity __format_version__ is invalid: {err}".format(err=e)
)
else:
if format_version < cls._FORMAT_VERSION:
raise cls.InvalidError(
"The PythonIdentity __format_version__ was {format_version}, but the current "
"version is {current_version}. Upgrading existing encoding: {encoded}".format(
format_version=format_version,
current_version=cls._FORMAT_VERSION,
encoded=encoded,
)
)

version = tuple(values.pop("version"))
pypy_version = tuple(values.pop("pypy_version") or ()) or None
Expand All @@ -252,7 +288,7 @@ def iter_tags():
env_markers = MarkerEnvironment(**values.pop("env_markers"))
return cls(
version=cast("Tuple[int, int, int]", version),
pypy_version=pypy_version,
pypy_version=cast("Optional[Tuple[int, int, int]]", pypy_version),
supported_tags=iter_tags(),
configured_macosx_deployment_target=configured_macosx_deployment_target,
env_markers=env_markers,
Expand Down Expand Up @@ -309,6 +345,7 @@ def __init__(

def encode(self):
values = dict(
__format_version__=self._FORMAT_VERSION,
binary=self._binary,
prefix=self._prefix,
base_prefix=self._base_prefix,
Expand Down
2 changes: 1 addition & 1 deletion pex/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2015 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

__version__ = "2.1.158"
__version__ = "2.1.159"
54 changes: 54 additions & 0 deletions tests/integration/test_interpreter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright 2024 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).
import os.path
import subprocess
import sys

from pex.compatibility import commonpath
from pex.interpreter import PythonInterpreter
from pex.typing import TYPE_CHECKING
from pex.variables import ENV
from testing import PY310, ensure_python_interpreter, run_pex_command

if TYPE_CHECKING:
from typing import Any


def test_boot_identification_leak(tmpdir):
# type: (Any) -> None

pex_root = os.path.join(str(tmpdir), "pex_root")

def assert_no_isolated_leak(python):
# type: (str) -> None
with ENV.patch(PEX_ROOT=pex_root), PythonInterpreter._cleared_memory_cache():
interpreter = PythonInterpreter.from_binary(python)
assert not any(
pex_root == commonpath((pex_root, entry)) for entry in interpreter.sys_path
), (
"The cached interpreter info for {python} contains leaked entries:\n"
"{entries}".format(python=python, entries="\n".join(interpreter.sys_path))
)

empty_pex = os.path.join(str(tmpdir), "empty.pex")
run_pex_command(
args=["--pex-root", pex_root, "--runtime-pex-root", pex_root, "-o", empty_pex],
python=sys.executable,
).assert_success()
assert_no_isolated_leak(sys.executable)

subprocess.check_call(args=[sys.executable, empty_pex, "-c", ""])
assert_no_isolated_leak(sys.executable)

other_python = ensure_python_interpreter(PY310)
subprocess.check_call(args=[other_python, empty_pex, "-c", ""])
# N.B.: Prior to the fix, this test failed with a vendored attrs leak:
# E AssertionError: The cached interpreter info for /home/jsirois/.pex_dev/pyenv/versions/3.10.7/bin/python3.10 contains leaked entries:
# E /tmp/pytest-of-jsirois/pytest-10/test_boot_identification_leak0/pex_root/isolated/975c556eea71292a09d930db2ca41875066d8be6/pex/vendor/_vendored/attrs
# E /home/jsirois/.pex_dev/pyenv/versions/3.10.7/lib/python310.zip
# E /home/jsirois/.pex_dev/pyenv/versions/3.10.7/lib/python3.10
# E /home/jsirois/.pex_dev/pyenv/versions/3.10.7/lib/python3.10/lib-dynload
# E /home/jsirois/.pex_dev/pyenv/versions/3.10.7/lib/python3.10/site-packages
# E assert not True
# E + where True = any(<generator object test_boot_identification_leak.<locals>.assert_no_isolated_leak.<locals>.<genexpr> at 0x7fa5d084bc40>)
assert_no_isolated_leak(other_python)

0 comments on commit 4fb9444

Please sign in to comment.