-
-
Notifications
You must be signed in to change notification settings - Fork 608
/
lint.py
300 lines (253 loc) · 10.1 KB
/
lint.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
# Copyright 2019 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).
from __future__ import annotations
import itertools
import logging
from dataclasses import dataclass
from typing import Any, Iterable, cast
from pants.core.goals.style_request import StyleRequest, write_reports
from pants.core.util_rules.distdir import DistDir
from pants.engine.console import Console
from pants.engine.engine_aware import EngineAwareReturnType
from pants.engine.fs import EMPTY_DIGEST, Digest, Workspace
from pants.engine.goal import Goal, GoalSubsystem
from pants.engine.process import FallibleProcessResult
from pants.engine.rules import Get, MultiGet, collect_rules, goal_rule
from pants.engine.target import FieldSet, Targets
from pants.engine.unions import UnionMembership, union
from pants.util.collections import partition_sequentially
from pants.util.logging import LogLevel
from pants.util.memo import memoized_property
from pants.util.meta import frozen_after_init
from pants.util.strutil import strip_v2_chroot_path
logger = logging.getLogger(__name__)
@dataclass(frozen=True)
class LintResult(EngineAwareReturnType):
exit_code: int
stdout: str
stderr: str
partition_description: str | None = None
report: Digest = EMPTY_DIGEST
@classmethod
def from_fallible_process_result(
cls,
process_result: FallibleProcessResult,
*,
partition_description: str | None = None,
strip_chroot_path: bool = False,
report: Digest = EMPTY_DIGEST,
) -> LintResult:
def prep_output(s: bytes) -> str:
return strip_v2_chroot_path(s) if strip_chroot_path else s.decode()
return cls(
exit_code=process_result.exit_code,
stdout=prep_output(process_result.stdout),
stderr=prep_output(process_result.stderr),
partition_description=partition_description,
report=report,
)
def metadata(self) -> dict[str, Any]:
return {"partition": self.partition_description}
@frozen_after_init
@dataclass(unsafe_hash=True)
class LintResults(EngineAwareReturnType):
"""Zero or more LintResult objects for a single linter.
Typically, linters will return one result. If they no-oped, they will return zero results.
However, some linters may need to partition their input and thus may need to return multiple
results. For example, many Python linters will need to group by interpreter compatibility.
"""
results: tuple[LintResult, ...]
linter_name: str
def __init__(self, results: Iterable[LintResult], *, linter_name: str) -> None:
self.results = tuple(results)
self.linter_name = linter_name
@property
def skipped(self) -> bool:
return bool(self.results) is False
@memoized_property
def exit_code(self) -> int:
return next((result.exit_code for result in self.results if result.exit_code != 0), 0)
def level(self) -> LogLevel | None:
if self.skipped:
return LogLevel.DEBUG
return LogLevel.ERROR if self.exit_code != 0 else LogLevel.INFO
def message(self) -> str | None:
if self.skipped:
return f"{self.linter_name} skipped."
message = self.linter_name
message += (
" succeeded." if self.exit_code == 0 else f" failed (exit code {self.exit_code})."
)
def msg_for_result(result: LintResult) -> str:
msg = ""
if result.stdout:
msg += f"\n{result.stdout}"
if result.stderr:
msg += f"\n{result.stderr}"
if msg:
msg = f"{msg.rstrip()}\n\n"
return msg
if len(self.results) == 1:
results_msg = msg_for_result(self.results[0])
else:
results_msg = "\n"
for i, result in enumerate(self.results):
msg = f"Partition #{i + 1}"
msg += (
f" - {result.partition_description}:" if result.partition_description else ":"
)
msg += msg_for_result(result) or "\n\n"
results_msg += msg
message += results_msg
return message
def cacheable(self) -> bool:
"""Is marked uncacheable to ensure that it always renders."""
return False
@union
class LintRequest(StyleRequest):
"""A union for StyleRequests that should be lintable.
Subclass and install a member of this type to provide a linter.
"""
# If a user wants linter reports to show up in dist/ they must ensure that the reports
# are written under this directory. E.g.,
# ./pants --flake8-args="--output-file=reports/report.txt" lint <target>
REPORT_DIR = "reports"
class LintSubsystem(GoalSubsystem):
name = "lint"
help = "Run all linters and/or formatters in check mode."
required_union_implementations = (LintRequest,)
@classmethod
def register_options(cls, register) -> None:
super().register_options(register)
register(
"--per-file-caching",
advanced=True,
type=bool,
default=False,
removal_version="2.11.0.dev0",
removal_hint=(
"Linters are now broken into multiple batches by default using the "
"`--batch-size` argument."
),
help=(
"Rather than linting all files in a single batch, lint each file as a "
"separate process.\n\nWhy do this? You'll get many more cache hits. Why not do "
"this? Linters both have substantial startup overhead and are cheap to add one "
"additional file to the run. On a cold cache, it is much faster to use "
"`--no-per-file-caching`.\n\nWe only recommend using `--per-file-caching` if you "
"are using a remote cache or if you have benchmarked that this option will be "
"faster than `--no-per-file-caching` for your use case."
),
)
register(
"--batch-size",
advanced=True,
type=int,
default=128,
help=(
"The target minimum number of files that will be included in each linter batch.\n"
"\n"
"Linter processes are batched for a few reasons:\n"
"\n"
"1. to avoid OS argument length limits (in processes which don't support argument "
"files)\n"
"2. to support more stable cache keys than would be possible if all files were "
"operated on in a single batch.\n"
"3. to allow for parallelism in linter processes which don't have internal "
"parallelism, or -- if they do support internal parallelism -- to improve scheduling "
"behavior when multiple processes are competing for cores and so internal "
"parallelism cannot be used perfectly.\n"
),
)
@property
def per_file_caching(self) -> bool:
return cast(bool, self.options.per_file_caching)
@property
def batch_size(self) -> int:
return cast(int, self.options.batch_size)
class Lint(Goal):
subsystem_cls = LintSubsystem
@goal_rule
async def lint(
console: Console,
workspace: Workspace,
targets: Targets,
lint_subsystem: LintSubsystem,
union_membership: UnionMembership,
dist_dir: DistDir,
) -> Lint:
request_types = cast("Iterable[type[LintRequest]]", union_membership[LintRequest])
requests = tuple(
request_type(
request_type.field_set_type.create(target)
for target in targets
if request_type.field_set_type.is_applicable(target)
)
for request_type in request_types
)
if lint_subsystem.per_file_caching:
all_batch_results = await MultiGet(
Get(LintResults, LintRequest, request.__class__([field_set]))
for request in requests
if request.field_sets
for field_set in request.field_sets
)
else:
def address_str(fs: FieldSet) -> str:
return fs.address.spec
all_batch_results = await MultiGet(
Get(LintResults, LintRequest, request.__class__(field_sets))
for request in requests
if request.field_sets
for field_sets in partition_sequentially(
request.field_sets, key=address_str, size_min=lint_subsystem.batch_size
)
)
def key_fn(results: LintResults):
return results.linter_name
# NB: We must pre-sort the data for itertools.groupby() to work properly.
sorted_all_batch_results = sorted(all_batch_results, key=key_fn)
# We consolidate all results for each linter into a single `LintResults`.
all_results = tuple(
sorted(
(
LintResults(
itertools.chain.from_iterable(
per_file_results.results for per_file_results in all_linter_results
),
linter_name=linter_name,
)
for linter_name, all_linter_results in itertools.groupby(
sorted_all_batch_results, key=key_fn
)
),
key=lambda results: results.linter_name,
)
)
def get_tool_name(res: LintResults) -> str:
return res.linter_name
write_reports(
all_results,
workspace,
dist_dir,
goal_name=LintSubsystem.name,
get_tool_name=get_tool_name,
)
exit_code = 0
if all_results:
console.print_stderr("")
for results in all_results:
if results.skipped:
sigil = console.sigil_skipped()
status = "skipped"
elif results.exit_code == 0:
sigil = console.sigil_succeeded()
status = "succeeded"
else:
sigil = console.sigil_failed()
status = "failed"
exit_code = results.exit_code
console.print_stderr(f"{sigil} {results.linter_name} {status}.")
return Lint(exit_code)
def rules():
return collect_rules()