Skip to content

Commit

Permalink
Partition lint by default.
Browse files Browse the repository at this point in the history
[ci skip-rust]
[ci skip-build-wheels]
  • Loading branch information
stuhood committed Jan 18, 2022
1 parent 8b219af commit e28c35b
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 65 deletions.
2 changes: 1 addition & 1 deletion src/python/pants/core/goals/fmt_test.py
Expand Up @@ -17,7 +17,7 @@
from pants.engine.rules import Get, collect_rules, rule
from pants.engine.target import FieldSet, MultipleSourcesField, Target
from pants.engine.unions import UnionRule
from pants.testutil.rule_runner import RuleRunner, logging
from pants.testutil.rule_runner import RuleRunner
from pants.util.logging import LogLevel

FORTRAN_FILE = FileContent("formatted.f98", b"READ INPUT TAPE 5\n")
Expand Down
90 changes: 66 additions & 24 deletions src/python/pants/core/goals/lint.py
Expand Up @@ -16,8 +16,9 @@
from pants.engine.goal import Goal, GoalSubsystem
from pants.engine.process import FallibleProcessResult
from pants.engine.rules import Get, MultiGet, collect_rules, goal_rule
from pants.engine.target import Targets
from pants.engine.target import FieldSet, Targets
from pants.engine.unions import UnionMembership, union
from pants.util.collections import partition_sequentially
from pants.util.logging import LogLevel
from pants.util.memo import memoized_property
from pants.util.meta import frozen_after_init
Expand Down Expand Up @@ -153,6 +154,11 @@ def register_options(cls, register) -> None:
advanced=True,
type=bool,
default=False,
removal_version="2.11.0.dev0",
removal_hint=(
"Linters are now broken into multiple batches by default using the "
"`--batch-size` argument."
),
help=(
"Rather than linting all files in a single batch, lint each file as a "
"separate process.\n\nWhy do this? You'll get many more cache hits. Why not do "
Expand All @@ -163,11 +169,35 @@ def register_options(cls, register) -> None:
"faster than `--no-per-file-caching` for your use case."
),
)
register(
"--batch-size",
advanced=True,
type=int,
default=128,
help=(
"The target minimum number of files that will be included in each linter batch.\n"
"\n"
"Linter processes are batched for a few reasons:\n"
"\n"
"1. to avoid OS argument length limits (in processes which don't support argument "
"files)\n"
"2. to support more stable cache keys than would be possible if all files were "
"operated on in a single batch.\n"
"3. to allow for parallelism in linter processes which don't have internal "
"parallelism, or -- if they do support internal parallelism -- to improve scheduling "
"behavior when multiple processes are competing for cores and so internal "
"parallelism cannot be used perfectly.\n"
),
)

@property
def per_file_caching(self) -> bool:
return cast(bool, self.options.per_file_caching)

@property
def batch_size(self) -> int:
return cast(int, self.options.batch_size)


class Lint(Goal):
subsystem_cls = LintSubsystem
Expand All @@ -182,7 +212,7 @@ async def lint(
union_membership: UnionMembership,
dist_dir: DistDir,
) -> Lint:
request_types = cast("Iterable[type[StyleRequest]]", union_membership[LintRequest])
request_types = cast("Iterable[type[LintRequest]]", union_membership[LintRequest])
requests = tuple(
request_type(
request_type.field_set_type.create(target)
Expand All @@ -193,36 +223,48 @@ async def lint(
)

if lint_subsystem.per_file_caching:
all_per_file_results = await MultiGet(
all_batch_results = await MultiGet(
Get(LintResults, LintRequest, request.__class__([field_set]))
for request in requests
for field_set in request.field_sets
if request.field_sets
for field_set in request.field_sets
)
else:

def key_fn(results: LintResults):
return results.linter_name

# NB: We must pre-sort the data for itertools.groupby() to work properly.
sorted_all_per_files_results = sorted(all_per_file_results, key=key_fn)
# We consolidate all results for each linter into a single `LintResults`.
all_results = tuple(
LintResults(
itertools.chain.from_iterable(
per_file_results.results for per_file_results in all_linter_results
),
linter_name=linter_name,
)
for linter_name, all_linter_results in itertools.groupby(
sorted_all_per_files_results, key=key_fn
def address_str(fs: FieldSet) -> str:
return fs.address.spec

all_batch_results = await MultiGet(
Get(LintResults, LintRequest, request.__class__(field_sets))
for request in requests
if request.field_sets
for field_sets in partition_sequentially(
request.field_sets, key=address_str, size_min=lint_subsystem.batch_size
)
)
else:
all_results = await MultiGet(
Get(LintResults, LintRequest, request) for request in requests if request.field_sets
)

all_results = tuple(sorted(all_results, key=lambda results: results.linter_name))
def key_fn(results: LintResults):
return results.linter_name

# NB: We must pre-sort the data for itertools.groupby() to work properly.
sorted_all_batch_results = sorted(all_batch_results, key=key_fn)
# We consolidate all results for each linter into a single `LintResults`.
all_results = tuple(
sorted(
(
LintResults(
itertools.chain.from_iterable(
per_file_results.results for per_file_results in all_linter_results
),
linter_name=linter_name,
)
for linter_name, all_linter_results in itertools.groupby(
sorted_all_batch_results, key=key_fn
)
),
key=lambda results: results.linter_name,
)
)

def get_tool_name(res: LintResults) -> str:
return res.linter_name
Expand Down
102 changes: 62 additions & 40 deletions src/python/pants/core/goals/lint_test.py
Expand Up @@ -112,7 +112,8 @@ def run_lint_rule(
*,
lint_request_types: List[Type[LintRequest]],
targets: List[Target],
per_file_caching: bool,
per_file_caching: bool = False,
batch_size: int = 128,
) -> Tuple[int, str]:
with mock_console(rule_runner.options_bootstrapper) as (console, stdio_reader):
union_membership = UnionMembership({LintRequest: lint_request_types})
Expand All @@ -123,7 +124,9 @@ def run_lint_rule(
Workspace(rule_runner.scheduler, _enforce_effects=False),
Targets(targets),
create_goal_subsystem(
LintSubsystem, per_file_caching=per_file_caching, per_target_caching=False
LintSubsystem,
per_file_caching=per_file_caching,
batch_size=128,
),
union_membership,
DistDir(relpath=Path("dist")),
Expand All @@ -141,22 +144,20 @@ def run_lint_rule(
return result.exit_code, stdio_reader.get_stderr()


def test_invalid_target_noops(rule_runner: RuleRunner) -> None:
def assert_noops(per_file_caching: bool) -> None:
exit_code, stderr = run_lint_rule(
rule_runner,
lint_request_types=[InvalidRequest],
targets=[make_target()],
per_file_caching=per_file_caching,
)
assert exit_code == 0
assert stderr == ""

assert_noops(per_file_caching=False)
assert_noops(per_file_caching=True)
@pytest.mark.parametrize("per_file_caching", [True, False])
def test_invalid_target_noops(rule_runner: RuleRunner, per_file_caching: bool) -> None:
exit_code, stderr = run_lint_rule(
rule_runner,
lint_request_types=[InvalidRequest],
targets=[make_target()],
per_file_caching=per_file_caching,
)
assert exit_code == 0
assert stderr == ""


def test_summary(rule_runner: RuleRunner) -> None:
@pytest.mark.parametrize("per_file_caching", [True, False])
def test_summary(rule_runner: RuleRunner, per_file_caching: bool) -> None:
"""Test that we render the summary correctly.
This tests that we:
Expand All @@ -166,31 +167,52 @@ def test_summary(rule_runner: RuleRunner) -> None:
good_address = Address("", target_name="good")
bad_address = Address("", target_name="bad")

def assert_expected(*, per_file_caching: bool) -> None:
exit_code, stderr = run_lint_rule(
rule_runner,
lint_request_types=[
ConditionallySucceedsRequest,
FailingRequest,
SkippedRequest,
SuccessfulRequest,
],
targets=[make_target(good_address), make_target(bad_address)],
per_file_caching=per_file_caching,
)
assert exit_code == FailingRequest.exit_code([bad_address])
assert stderr == dedent(
"""\
𐄂 ConditionallySucceedsLinter failed.
𐄂 FailingLinter failed.
- SkippedLinter skipped.
βœ“ SuccessfulLinter succeeded.
"""
)
exit_code, stderr = run_lint_rule(
rule_runner,
lint_request_types=[
ConditionallySucceedsRequest,
FailingRequest,
SkippedRequest,
SuccessfulRequest,
],
targets=[make_target(good_address), make_target(bad_address)],
per_file_caching=per_file_caching,
)
assert exit_code == FailingRequest.exit_code([bad_address])
assert stderr == dedent(
"""\
assert_expected(per_file_caching=False)
assert_expected(per_file_caching=True)
𐄂 ConditionallySucceedsLinter failed.
𐄂 FailingLinter failed.
- SkippedLinter skipped.
βœ“ SuccessfulLinter succeeded.
"""
)


@pytest.mark.parametrize("batch_size", [1, 32, 128, 1024])
def test_batched(rule_runner: RuleRunner, batch_size: int) -> None:
exit_code, stderr = run_lint_rule(
rule_runner,
lint_request_types=[
ConditionallySucceedsRequest,
FailingRequest,
SkippedRequest,
SuccessfulRequest,
],
targets=[make_target(Address("", target_name=f"good{i}")) for i in range(0, 512)],
batch_size=batch_size,
)
assert exit_code == FailingRequest.exit_code([])
assert stderr == dedent(
"""\
βœ“ ConditionallySucceedsLinter succeeded.
𐄂 FailingLinter failed.
- SkippedLinter skipped.
βœ“ SuccessfulLinter succeeded.
"""
)


def test_streaming_output_skip() -> None:
Expand Down

0 comments on commit e28c35b

Please sign in to comment.