Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up run to no longer rebuild a Pex on source file changes #10410

Merged
merged 2 commits into from Jul 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/python/pants/backend/python/register.py
Expand Up @@ -13,15 +13,16 @@
from pants.backend.python.python_requirements import PythonRequirements
from pants.backend.python.rules import (
coverage,
create_python_binary,
download_pex_bin,
inject_ancestor_files,
inject_init,
pex,
pex_from_targets,
pytest_runner,
python_create_binary,
python_sources,
repl,
run_python_binary,
run_setup_py,
)
from pants.backend.python.subsystems import python_native_code, subprocess_environment
Expand Down Expand Up @@ -62,9 +63,10 @@ def rules():
*pex.rules(),
*pex_from_targets.rules(),
*pytest_runner.rules(),
*python_create_binary.rules(),
*create_python_binary.rules(),
*python_native_code.rules(),
*repl.rules(),
*run_python_binary.rules(),
*run_setup_py.rules(),
*subprocess_environment.rules(),
)
Expand Down
72 changes: 72 additions & 0 deletions src/python/pants/backend/python/rules/run_python_binary.py
@@ -0,0 +1,72 @@
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from pants.backend.python.rules.create_python_binary import PythonBinaryFieldSet
from pants.backend.python.rules.pex import Pex, PexPlatforms
from pants.backend.python.rules.pex_from_targets import PexFromTargetsRequest
from pants.backend.python.rules.python_sources import (
UnstrippedPythonSources,
UnstrippedPythonSourcesRequest,
)
from pants.backend.python.target_types import PythonBinaryDefaults, PythonBinarySources
from pants.core.goals.binary import BinaryFieldSet
from pants.core.goals.run import RunRequest
from pants.core.util_rules.determine_source_files import AllSourceFilesRequest, SourceFiles
from pants.engine.addresses import Addresses
from pants.engine.fs import Digest, MergeDigests
from pants.engine.rules import SubsystemRule, rule
from pants.engine.selectors import Get, MultiGet
from pants.engine.target import InvalidFieldException, TransitiveTargets
from pants.engine.unions import UnionRule


@rule
async def run_python_binary(
Copy link
Sponsor Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: this doesn't actually run the binary: just produces it.

field_set: PythonBinaryFieldSet, python_binary_defaults: PythonBinaryDefaults
) -> RunRequest:
Comment on lines +24 to +26
Copy link
Sponsor Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It feels like rather than being binary (or repl) specific, this is connected to TwoStepPex... where rather than creating two pexes (one for requirements, the other for sources, iirc) we instead could use a requirements pex, and a Digest of loose sources.

Repl already uses TwoStepPex, so perhaps starting there would make sense?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so. We use TwoStepPex for binary and awslambda, actually. TwoStepPex results in one single Pex file. I think it's important that it continues to behave the same way.

I plan to change repl tomorrow to behave the same way as run. If it helps, I can bundle it into this PR. Only wanted to get this one up as a checkpoint and to make it in time for the release.

Copy link
Sponsor Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so. We use TwoStepPex for binary and awslambda, actually. TwoStepPex results in one single Pex file. I think it's important that it continues to behave the same way.

Got it: my mistake.

I plan to change repl tomorrow to behave the same way as run. If it helps, I can bundle it into this PR. Only wanted to get this one up as a checkpoint and to make it in time for the release.

No need to bundle. Thanks!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No mistake - it was a good question! I was wondering around the same time you wrote the original question "Hm, can we delete TwoStepPex now?"

entry_point = field_set.entry_point.value
if entry_point is None:
binary_sources = await Get(
SourceFiles, AllSourceFilesRequest([field_set.sources], strip_source_roots=True)
)
entry_point = PythonBinarySources.translate_source_file_to_entry_point(binary_sources.files)
if entry_point is None:
raise InvalidFieldException(
"You must either specify `sources` or `entry_point` for the `python_binary` target "
f"{repr(field_set.address)} in order to run it, but both fields were undefined."
)

transitive_targets = await Get(TransitiveTargets, Addresses([field_set.address]))

output_filename = f"{field_set.address.target_name}.pex"
pex_request = Get(
Pex,
PexFromTargetsRequest(
addresses=Addresses([field_set.address]),
platforms=PexPlatforms.create_from_platforms_field(field_set.platforms),
output_filename=output_filename,
additional_args=field_set.generate_additional_args(python_binary_defaults),
include_source_files=False,
),
)
source_files_request = Get(
UnstrippedPythonSources,
UnstrippedPythonSourcesRequest(transitive_targets.closure, include_files=True),
)
pex, source_files = await MultiGet(pex_request, source_files_request)

merged_digest = await Get(Digest, MergeDigests([pex.digest, source_files.snapshot.digest]))
return RunRequest(
digest=merged_digest,
binary_name=pex.output_filename,
extra_args=("-m", entry_point),
env={"PEX_EXTRA_SYS_PATH": ":".join(source_files.source_roots)},
)


def rules():
return [
run_python_binary,
UnionRule(BinaryFieldSet, PythonBinaryFieldSet),
SubsystemRule(PythonBinaryDefaults),
]
@@ -0,0 +1,68 @@
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from pathlib import Path
from textwrap import dedent

from pants.base.build_environment import get_buildroot
from pants.testutil.pants_run_integration_test import PantsRunIntegrationTest
from pants.util.contextutil import temporary_dir


class RunPythonBinaryIntegrationTest(PantsRunIntegrationTest):
def test_sample_script(self) -> None:
"""Test that we properly run a `python_binary` target.

This checks a few things:
- We can handle source roots.
- We properly load third party requirements.
- We propagate the error code.
"""
with temporary_dir(root_dir=get_buildroot()) as tmpdir:
tmpdir_relative = Path(tmpdir).relative_to(get_buildroot())

src_root1 = Path(tmpdir, "src_root1/project")
src_root1.mkdir(parents=True)
(src_root1 / "app.py").write_text(
dedent(
"""\
import sys
from utils.strutil import upper_case


if __name__ == "__main__":
print(upper_case("Hello world."))
print("Hola, mundo.", file=sys.stderr)
sys.exit(23)
"""
)
)
(src_root1 / "BUILD").write_text("python_binary(sources=['app.py'])")

src_root2 = Path(tmpdir, "src_root2/utils")
src_root2.mkdir(parents=True)
(src_root2 / "strutil.py").write_text(
dedent(
"""\
def upper_case(s):
return s.upper()
"""
)
)
(src_root2 / "BUILD").write_text("python_library()")
result = self.run_pants(
[
"--dependency-inference",
(
f"--source-root-patterns=['/{tmpdir_relative}/src_root1', "
f"'/{tmpdir_relative}/src_root2']"
),
"--pants-ignore=__pycache__",
"run",
f"{tmpdir_relative}/src_root1/project/app.py",
]
)

assert result.returncode == 23
assert result.stdout_data == "HELLO WORLD.\n"
assert "Hola, mundo.\n" in result.stderr_data
42 changes: 36 additions & 6 deletions src/python/pants/core/goals/run.py
@@ -1,12 +1,14 @@
# Copyright 2019 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from dataclasses import dataclass
from pathlib import PurePath
from typing import Iterable, Mapping, Optional, Tuple

from pants.base.build_root import BuildRoot
from pants.core.goals.binary import BinaryFieldSet, CreatedBinary
from pants.core.goals.binary import BinaryFieldSet
from pants.engine.console import Console
from pants.engine.fs import DirectoryToMaterialize, Workspace
from pants.engine.fs import Digest, DirectoryToMaterialize, Workspace
from pants.engine.goal import Goal, GoalSubsystem
from pants.engine.interactive_process import InteractiveProcess, InteractiveRunner
from pants.engine.rules import goal_rule
Expand All @@ -15,6 +17,30 @@
from pants.option.custom_types import shell_str
from pants.option.global_options import GlobalOptions
from pants.util.contextutil import temporary_dir
from pants.util.frozendict import FrozenDict
from pants.util.meta import frozen_after_init


@frozen_after_init
@dataclass(unsafe_hash=True)
class RunRequest:
digest: Digest
binary_name: str
extra_args: Tuple[str, ...]
Copy link
Sponsor Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: "extra" to me tends to imply that things are added afterward. Maybe prefix_args?

env: FrozenDict[str, str]

def __init__(
self,
*,
digest: Digest,
binary_name: str,
extra_args: Optional[Iterable[str]] = None,
env: Optional[Mapping[str, str]] = None,
) -> None:
self.digest = digest
self.binary_name = binary_name
self.extra_args = tuple(extra_args or ())
self.env = FrozenDict(env or {})


class RunOptions(GoalSubsystem):
Expand Down Expand Up @@ -66,17 +92,21 @@ async def run(
),
)
field_set = targets_to_valid_field_sets.field_sets[0]
binary = await Get(CreatedBinary, BinaryFieldSet, field_set)
request = await Get(RunRequest, BinaryFieldSet, field_set)

workdir = global_options.options.pants_workdir
with temporary_dir(root_dir=workdir, cleanup=True) as tmpdir:
path_relative_to_build_root = PurePath(tmpdir).relative_to(build_root.path).as_posix()
workspace.materialize_directory(
DirectoryToMaterialize(binary.digest, path_prefix=path_relative_to_build_root)
DirectoryToMaterialize(request.digest, path_prefix=path_relative_to_build_root)
)

full_path = PurePath(tmpdir, binary.binary_name).as_posix()
process = InteractiveProcess(argv=(full_path, *options.values.args), run_in_workspace=True)
full_path = PurePath(tmpdir, request.binary_name).as_posix()
process = InteractiveProcess(
argv=(full_path, *request.extra_args, *options.values.args),
env=request.env,
run_in_workspace=True,
)
try:
result = interactive_runner.run_process(process)
exit_code = result.exit_code
Expand Down
14 changes: 7 additions & 7 deletions src/python/pants/core/goals/run_test.py
Expand Up @@ -5,8 +5,8 @@

from pants.base.build_root import BuildRoot
from pants.base.specs import SingleAddress
from pants.core.goals.binary import BinaryFieldSet, CreatedBinary
from pants.core.goals.run import Run, RunOptions, run
from pants.core.goals.binary import BinaryFieldSet
from pants.core.goals.run import Run, RunOptions, RunRequest, run
from pants.engine.addresses import Address
from pants.engine.fs import CreateDigest, Digest, FileContent, Workspace
from pants.engine.interactive_process import InteractiveProcess, InteractiveRunner
Expand All @@ -28,14 +28,14 @@


class RunTest(TestBase):
def create_mock_binary(self, program_text: bytes) -> CreatedBinary:
def create_mock_run_request(self, program_text: bytes) -> RunRequest:
digest = self.request_single_product(
Digest,
CreateDigest(
[FileContent(path="program.py", content=program_text, is_executable=True)]
),
)
return CreatedBinary(binary_name="program.py", digest=digest)
return RunRequest(digest=digest, binary_name="program.py")

def single_target_run(
self, *, console: MockConsole, program_text: bytes, address_spec: str,
Expand Down Expand Up @@ -74,9 +74,9 @@ class TestBinaryTarget(Target):
mock=lambda _: TargetsToValidFieldSets({target_with_origin: [field_set]}),
),
MockGet(
product_type=CreatedBinary,
product_type=RunRequest,
subject_type=TestBinaryFieldSet,
mock=lambda _: self.create_mock_binary(program_text),
mock=lambda _: self.create_mock_run_request(program_text),
),
],
)
Expand All @@ -92,7 +92,7 @@ def test_normal_run(self) -> None:

def test_materialize_input_files(self) -> None:
program_text = b'#!/usr/bin/python\nprint("hello")'
binary = self.create_mock_binary(program_text)
binary = self.create_mock_run_request(program_text)
interactive_runner = InteractiveRunner(self.scheduler)
process = InteractiveProcess(
argv=("./program.py",), run_in_workspace=False, input_digest=binary.digest,
Expand Down