Skip to content

Commit

Permalink
Partition fmt by default.
Browse files Browse the repository at this point in the history
[ci skip-build-wheels]
[ci skip-rust]
  • Loading branch information
stuhood committed Jan 18, 2022
1 parent f737f93 commit 8b219af
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 2 deletions.
35 changes: 34 additions & 1 deletion src/python/pants/core/goals/fmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from pants.engine.rules import Get, MultiGet, collect_rules, goal_rule, rule
from pants.engine.target import SourcesField, Targets
from pants.engine.unions import UnionMembership, union
from pants.util.collections import partition_sequentially
from pants.util.logging import LogLevel
from pants.util.strutil import strip_v2_chroot_path

Expand Down Expand Up @@ -135,6 +136,11 @@ def register_options(cls, register) -> None:
advanced=True,
type=bool,
default=False,
removal_version="2.11.0.dev0",
removal_hint=(
"Formatters are now broken into multiple batches by default using the "
"`--batch-size` argument."
),
help=(
"Rather than formatting all files in a single batch, format each file as a "
"separate process.\n\nWhy do this? You'll get many more cache hits. Why not do "
Expand All @@ -145,11 +151,35 @@ def register_options(cls, register) -> None:
"faster than `--no-per-file-caching` for your use case."
),
)
register(
"--batch-size",
advanced=True,
type=int,
default=128,
help=(
"The target minimum number of files that will be included in each formatter batch.\n"
"\n"
"Formatter processes are batched for a few reasons:\n"
"\n"
"1. to avoid OS argument length limits (in processes which don't support argument "
"files)\n"
"2. to support more stable cache keys than would be possible if all files were "
"operated on in a single batch.\n"
"3. to allow for parallelism in formatter processes which don't have internal "
"parallelism, or -- if they do support internal parallelism -- to improve scheduling "
"behavior when multiple processes are competing for cores and so internal "
"parallelism cannot be used perfectly.\n"
),
)

@property
def per_file_caching(self) -> bool:
return cast(bool, self.options.per_file_caching)

@property
def batch_size(self) -> int:
return cast(int, self.options.batch_size)


class Fmt(Goal):
subsystem_cls = FmtSubsystem
Expand Down Expand Up @@ -187,9 +217,12 @@ async def fmt(
per_language_results = await MultiGet(
Get(
_LanguageFmtResults,
_LanguageFmtRequest(fmt_requests, Targets(targets)),
_LanguageFmtRequest(fmt_requests, Targets(target_batch)),
)
for fmt_requests, targets in targets_by_fmt_request_order.items()
for target_batch in partition_sequentially(
targets, key=lambda t: t.address.spec, size_min=fmt_subsystem.batch_size
)
)

individual_results = list(
Expand Down
1 change: 0 additions & 1 deletion src/python/pants/core/goals/fmt_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@ def run_fmt(rule_runner: RuleRunner, *, target_specs: List[str], per_file_cachin
return result.stderr


@logging
@pytest.mark.parametrize("per_file_caching", [True, False])
def test_summary(per_file_caching: bool) -> None:
"""Tests that the final summary is correct.
Expand Down

0 comments on commit 8b219af

Please sign in to comment.