Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use append_only_caches in Pex processes. #11760

Merged
merged 1 commit into from
Mar 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/python/pants/backend/codegen/protobuf/python/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
VenvPex,
VenvPexRequest,
)
from pants.backend.python.util_rules.pex_environment import PexEnvironment
from pants.core.util_rules.external_tool import DownloadedExternalTool, ExternalToolRequest
from pants.core.util_rules.source_files import SourceFilesRequest
from pants.core.util_rules.stripped_source_files import StrippedSourceFiles
Expand Down Expand Up @@ -60,6 +61,7 @@ async def generate_python_from_protobuf(
grpc_python_plugin: GrpcPythonPlugin,
python_protobuf_subsystem: PythonProtobufSubsystem,
python_protobuf_mypy_plugin: PythonProtobufMypyPlugin,
pex_environment: PexEnvironment,
) -> GeneratedSources:
download_protoc_request = Get(
DownloadedExternalTool, ExternalToolRequest, protoc.get_request(Platform.current)
Expand Down Expand Up @@ -193,6 +195,7 @@ async def generate_python_from_protobuf(
description=f"Generating Python sources from {request.protocol_target.address}.",
level=LogLevel.DEBUG,
output_directories=(output_dir,),
append_only_caches=pex_environment.append_only_caches(),
),
)

Expand Down
37 changes: 32 additions & 5 deletions src/python/pants/backend/python/util_rules/pex.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import itertools
import json
import logging
import os
import re
import shlex
from collections import defaultdict
from dataclasses import dataclass
Expand Down Expand Up @@ -842,12 +844,33 @@ async def create_venv_pex(
seeded_venv_request = dataclasses.replace(
pex_request, additional_args=pex_request.additional_args + ("--venv", "--seed")
)
result = await Get(BuildPexResult, PexRequest, seeded_venv_request)
venv_pex_result = await Get(BuildPexResult, PexRequest, seeded_venv_request)
# Pex --seed mode outputs the path of the PEX executable. In the --venv case this is the `pex`
# script in the venv root directory.
venv_dir = PurePath(result.result.stdout.decode().strip()).parent
abs_venv_dir = PurePath(venv_pex_result.result.stdout.decode().strip()).parent

# TODO(John Sirois): Instead of performing this calculation, which uses knowledge of the
# PEX_ROOT layout (It assumes a `venvs` dir), rely on verbose seeding or a pex tool to get
# the relevant PEX_ROOT / venv pex directory combination in a single Process execution so
# we can relpath them.

pex_root = pex_environment.pex_root
if pex_root.is_absolute():
if not os.path.commonprefix([pex_root, abs_venv_dir]) == str(pex_root):
raise AssertionError(
f"The PEX_ROOT is absolute {pex_root} but does not match the venv pex "
f"{abs_venv_dir}."
)
venv_dir = abs_venv_dir.relative_to(pex_root)
else:
match = re.search(r"/(?P<venv_dir>venvs/.*)$", str(abs_venv_dir))
if match is None:
raise AssertionError(
f"Failed to find PEX_ROOT {pex_root} in venv_pex path {abs_venv_dir}."
)
venv_dir = pex_root / match["venv_dir"]

venv_script_writer = VenvScriptWriter(pex=result.create_pex(), venv_dir=venv_dir)
venv_script_writer = VenvScriptWriter(pex=venv_pex_result.create_pex(), venv_dir=venv_dir)
pex = venv_script_writer.exe(bash, pex_environment)
python = venv_script_writer.python(bash, pex_environment)
scripts = {
Expand All @@ -868,7 +891,7 @@ async def create_venv_pex(

return VenvPex(
digest=input_digest,
pex_filename=result.pex_filename,
pex_filename=venv_pex_result.pex_filename,
pex=pex.script,
python=python.script,
bin=FrozenDict((bin_name, venv_script.script) for bin_name, venv_script in scripts.items()),
Expand Down Expand Up @@ -982,6 +1005,7 @@ async def setup_pex_process(request: PexProcess, pex_environment: PexEnvironment
env=env,
output_files=request.output_files,
output_directories=request.output_directories,
append_only_caches=pex_environment.append_only_caches(),
timeout_seconds=request.timeout_seconds,
execution_slot_variable=request.execution_slot_variable,
cache_scope=request.cache_scope,
Expand Down Expand Up @@ -1032,7 +1056,9 @@ def __init__(


@rule
async def setup_venv_pex_process(request: VenvPexProcess) -> Process:
async def setup_venv_pex_process(
request: VenvPexProcess, pex_environment: PexEnvironment
) -> Process:
venv_pex = request.venv_pex
argv = (venv_pex.pex.argv0, *request.argv)
input_digest = (
Expand All @@ -1048,6 +1074,7 @@ async def setup_venv_pex_process(request: VenvPexProcess) -> Process:
env=request.extra_env,
output_files=request.output_files,
output_directories=request.output_directories,
append_only_caches=pex_environment.append_only_caches(),
timeout_seconds=request.timeout_seconds,
execution_slot_variable=request.execution_slot_variable,
cache_scope=request.cache_scope,
Expand Down
5 changes: 1 addition & 4 deletions src/python/pants/backend/python/util_rules/pex_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,14 +142,11 @@ async def setup_pex_cli_process(
digests_to_merge.append(request.additional_input_digest)
input_digest = await Get(Digest, MergeDigests(digests_to_merge))

pex_root_path = ".cache/pex_root"
argv = [
pex_binary.exe,
*cert_args,
"--python-path",
create_path_env_var(pex_env.interpreter_search_paths),
"--pex-root",
pex_root_path,
# Ensure Pex and its subprocesses create temporary files in the the process execution
# sandbox. It may make sense to do this generally for Processes, but in the short term we
# have known use cases where /tmp is too small to hold large wheel downloads Pex is asked to
Expand Down Expand Up @@ -182,7 +179,7 @@ async def setup_pex_cli_process(
env=env,
output_files=request.output_files,
output_directories=request.output_directories,
append_only_caches={"pex_root": pex_root_path},
append_only_caches=pex_env.append_only_caches(),
level=request.level,
cache_scope=request.cache_scope,
)
Expand Down
15 changes: 9 additions & 6 deletions src/python/pants/backend/python/util_rules/pex_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import os
from dataclasses import dataclass
from pathlib import Path
from pathlib import PurePath
from textwrap import dedent
from typing import Mapping, Optional, Tuple, cast

Expand Down Expand Up @@ -106,7 +106,6 @@ class PexEnvironment(EngineAwareReturnType):
path: Tuple[str, ...]
interpreter_search_paths: Tuple[str, ...]
subprocess_environment_dict: FrozenDict[str, str]
named_caches_dir: str
bootstrap_python: Optional[PythonExecutable] = None

def create_argv(
Expand All @@ -129,7 +128,7 @@ def environment_dict(self, *, python_configured: bool) -> Mapping[str, str]:
PATH=create_path_env_var(self.path),
PEX_INHERIT_PATH="false",
PEX_IGNORE_RCFILES="true",
PEX_ROOT=os.path.join(self.named_caches_dir, "pex_root"),
PEX_ROOT=str(self.pex_root),
**self.subprocess_environment_dict,
)
# NB: We only set `PEX_PYTHON_PATH` if the Python interpreter has not already been
Expand All @@ -139,6 +138,13 @@ def environment_dict(self, *, python_configured: bool) -> Mapping[str, str]:
d["PEX_PYTHON_PATH"] = create_path_env_var(self.interpreter_search_paths)
return d

@property
def pex_root(self) -> PurePath:
return PurePath(".cache/pex_root")

def append_only_caches(self) -> Mapping[str, str]:
return {"pex_root": str(self.pex_root)}

def level(self) -> LogLevel:
return LogLevel.DEBUG if self.bootstrap_python else LogLevel.WARN

Expand Down Expand Up @@ -227,9 +233,6 @@ def first_python_binary() -> Optional[PythonExecutable]:
python_setup.interpreter_search_paths(pex_relevant_environment)
),
subprocess_environment_dict=subprocess_env_vars.vars,
# TODO: This path normalization is duplicated with `engine_initializer.py`. How can we do
# the normalization only once, via the options system?
named_caches_dir=Path(global_options.options.named_caches_dir).resolve().as_posix(),
bootstrap_python=first_python_binary(),
)

Expand Down