From ec61fa721257b9b352c2d4a8df4f4a7bd31554ed Mon Sep 17 00:00:00 2001 From: Stu Hood Date: Tue, 12 Jul 2022 09:19:22 -0700 Subject: [PATCH] Improve memoization of interpreter constraints, Python parsing, and request classes (#16141) Profiling showed that: 1. `Requirement.parse` and `InterpreterConstraint.merge_*` were taking up a significant fraction of time even with singular requirements (~2%). 2. `AllAssetTargetsRequest` did not have stable equality due to not being marked as a `@dataclass`, and so `find_all_assets` was not being memoized (~11%). 3. Repeatedly loading and creating a `Digest` for the `dependency_parser.py` script took a noticeable amount of time (~6%). Fixing these represents a ~19% performance improvement for `./pants --no-pantsd --changed-diffspec=fcaac98402..2a300002f0 --changed-dependees=transitive list`, which brings `2.12.x`/`2.13.x` roughly back in line with `2.11.x`. --- .../parse_python_dependencies.py | 22 +++++-- .../pants/backend/python/goals/lockfile.py | 1 + .../util_rules/interpreter_constraints.py | 58 ++++++++++--------- src/python/pants/core/target_types.py | 1 + .../pants/core/util_rules/system_binaries.py | 9 +++ src/python/pants/goal/anonymous_telemetry.py | 2 + src/python/pants/goal/stats_aggregator.py | 2 + 7 files changed, 62 insertions(+), 33 deletions(-) diff --git a/src/python/pants/backend/python/dependency_inference/parse_python_dependencies.py b/src/python/pants/backend/python/dependency_inference/parse_python_dependencies.py index 93416b06bea..183ded7992f 100644 --- a/src/python/pants/backend/python/dependency_inference/parse_python_dependencies.py +++ b/src/python/pants/backend/python/dependency_inference/parse_python_dependencies.py @@ -54,15 +54,27 @@ class ParsePythonDependenciesRequest: assets_min_slashes: int +@dataclass(frozen=True) +class ParserScript: + digest: Digest + + +@rule +async def parser_script() -> ParserScript: + script = pkgutil.get_data(__name__, "scripts/dependency_parser.py") + assert script is not None + return ParserScript( + await Get(Digest, CreateDigest([FileContent("__parse_python_dependencies.py", script)])) + ) + + @rule async def parse_python_dependencies( request: ParsePythonDependenciesRequest, + parser_script: ParserScript, ) -> ParsedPythonDependencies: - script = pkgutil.get_data(__name__, "scripts/dependency_parser.py") - assert script is not None - python_interpreter, script_digest, stripped_sources = await MultiGet( + python_interpreter, stripped_sources = await MultiGet( Get(PythonExecutable, InterpreterConstraints, request.interpreter_constraints), - Get(Digest, CreateDigest([FileContent("__parse_python_dependencies.py", script)])), Get(StrippedSourceFiles, SourceFilesRequest([request.source])), ) @@ -71,7 +83,7 @@ async def parse_python_dependencies( file = stripped_sources.snapshot.files[0] input_digest = await Get( - Digest, MergeDigests([script_digest, stripped_sources.snapshot.digest]) + Digest, MergeDigests([parser_script.digest, stripped_sources.snapshot.digest]) ) process_result = await Get( ProcessResult, diff --git a/src/python/pants/backend/python/goals/lockfile.py b/src/python/pants/backend/python/goals/lockfile.py index 12775e62967..dfc7e79bb79 100644 --- a/src/python/pants/backend/python/goals/lockfile.py +++ b/src/python/pants/backend/python/goals/lockfile.py @@ -109,6 +109,7 @@ class MaybeWarnPythonRepos: pass +@dataclass(frozen=True) class MaybeWarnPythonReposRequest: pass diff --git a/src/python/pants/backend/python/util_rules/interpreter_constraints.py b/src/python/pants/backend/python/util_rules/interpreter_constraints.py index 914fe4207da..3ff587859a7 100644 --- a/src/python/pants/backend/python/util_rules/interpreter_constraints.py +++ b/src/python/pants/backend/python/util_rules/interpreter_constraints.py @@ -3,7 +3,6 @@ from __future__ import annotations -import functools import itertools from collections import defaultdict from typing import Iterable, Iterator, Sequence, Tuple, TypeVar @@ -57,6 +56,20 @@ def interpreter_constraints_contains( return InterpreterConstraints(a).contains(InterpreterConstraints(b), interpreter_universe) +@memoized +def parse_constraint(constraint: str) -> Requirement: + """Parse an interpreter constraint, e.g., CPython>=2.7,<3. + + We allow shorthand such as `>=3.7`, which gets expanded to `CPython>=3.7`. See Pex's + interpreter.py's `parse_requirement()`. + """ + try: + parsed_requirement = Requirement.parse(constraint) + except ValueError: + parsed_requirement = Requirement.parse(f"CPython{constraint}") + return parsed_requirement + + # Normally we would subclass `DeduplicatedCollection`, but we want a custom constructor. class InterpreterConstraints(FrozenOrderedSet[Requirement], EngineAwareParameter): @classmethod @@ -70,7 +83,7 @@ def __init__(self, constraints: Iterable[str | Requirement] = ()) -> None: # We need to sort the component constraints for each requirement _before_ sorting the entire list # for the ordering to be correct. parsed_constraints = ( - i if isinstance(i, Requirement) else self.parse_constraint(i) for i in constraints + i if isinstance(i, Requirement) else parse_constraint(i) for i in constraints ) super().__init__(sorted(parsed_constraints, key=lambda c: str(c))) @@ -80,19 +93,6 @@ def __str__(self) -> str: def debug_hint(self) -> str: return str(self) - @staticmethod - def parse_constraint(constraint: str) -> Requirement: - """Parse an interpreter constraint, e.g., CPython>=2.7,<3. - - We allow shorthand such as `>=3.7`, which gets expanded to `CPython>=3.7`. See Pex's - interpreter.py's `parse_requirement()`. - """ - try: - parsed_requirement = Requirement.parse(constraint) - except ValueError: - parsed_requirement = Requirement.parse(f"CPython{constraint}") - return parsed_requirement - @classmethod def merge(cls, ics: Iterable[InterpreterConstraints]) -> InterpreterConstraints: return InterpreterConstraints( @@ -100,7 +100,9 @@ def merge(cls, ics: Iterable[InterpreterConstraints]) -> InterpreterConstraints: ) @classmethod - def merge_constraint_sets(cls, constraint_sets: Iterable[Iterable[str]]) -> list[Requirement]: + def merge_constraint_sets( + cls, constraint_sets: Iterable[Iterable[str]] + ) -> frozenset[Requirement]: """Given a collection of constraints sets, merge by ORing within each individual constraint set and ANDing across each distinct constraint set. @@ -109,7 +111,7 @@ def merge_constraint_sets(cls, constraint_sets: Iterable[Iterable[str]]) -> list """ # A sentinel to indicate a requirement that is impossible to satisfy (i.e., one that # requires two different interpreter types). - impossible = Requirement.parse("IMPOSSIBLE") + impossible = parse_constraint("IMPOSSIBLE") # Each element (a Set[ParsedConstraint]) will get ANDed. We use sets to deduplicate # identical top-level parsed constraint sets. @@ -119,16 +121,19 @@ def merge_constraint_sets(cls, constraint_sets: Iterable[Iterable[str]]) -> list # the others, without having to deal with the vacuous case below. constraint_sets = [cs for cs in constraint_sets if cs] if not constraint_sets: - return [] + return frozenset() parsed_constraint_sets: set[frozenset[Requirement]] = set() for constraint_set in constraint_sets: # Each element (a ParsedConstraint) will get ORed. parsed_constraint_set = frozenset( - cls.parse_constraint(constraint) for constraint in constraint_set + parse_constraint(constraint) for constraint in constraint_set ) parsed_constraint_sets.add(parsed_constraint_set) + if len(parsed_constraint_sets) == 1: + return next(iter(parsed_constraint_sets)) + def and_constraints(parsed_constraints: Sequence[Requirement]) -> Requirement: merged_specs: set[tuple[str, str]] = set() expected_interpreter = parsed_constraints[0].project_name @@ -138,7 +143,7 @@ def and_constraints(parsed_constraints: Sequence[Requirement]) -> Requirement: merged_specs.update(parsed_constraint.specs) formatted_specs = ",".join(f"{op}{version}" for op, version in merged_specs) - return Requirement.parse(f"{expected_interpreter}{formatted_specs}") + return parse_constraint(f"{expected_interpreter}{formatted_specs}") def cmp_constraints(req1: Requirement, req2: Requirement) -> int: if req1.project_name != req2.project_name: @@ -147,14 +152,11 @@ def cmp_constraints(req1: Requirement, req2: Requirement) -> int: return 0 return -1 if req1.specs < req2.specs else 1 - ored_constraints = sorted( - { - and_constraints(constraints_product) - for constraints_product in itertools.product(*parsed_constraint_sets) - }, - key=functools.cmp_to_key(cmp_constraints), + ored_constraints = ( + and_constraints(constraints_product) + for constraints_product in itertools.product(*parsed_constraint_sets) ) - ret = [cs for cs in ored_constraints if cs != impossible] + ret = frozenset(cs for cs in ored_constraints if cs != impossible) if not ret: # There are no possible combinations. attempted_str = " AND ".join(f"({' OR '.join(cs)})" for cs in constraint_sets) @@ -273,7 +275,7 @@ def snap_to_minimum(self, interpreter_universe: Iterable[str]) -> InterpreterCon ) req_strs.extend(f"!={major}.{minor}.{p}" for p in invalid_patches) req_str = ",".join(req_strs) - snapped = Requirement.parse(req_str) + snapped = parse_constraint(req_str) return InterpreterConstraints([snapped]) return None diff --git a/src/python/pants/core/target_types.py b/src/python/pants/core/target_types.py index 37e593e7015..7faf803f697 100644 --- a/src/python/pants/core/target_types.py +++ b/src/python/pants/core/target_types.py @@ -515,6 +515,7 @@ class GenericTarget(Target): # ----------------------------------------------------------------------------------------------- +@dataclass(frozen=True) class AllAssetTargetsRequest: pass diff --git a/src/python/pants/core/util_rules/system_binaries.py b/src/python/pants/core/util_rules/system_binaries.py index c839010c63c..d1d52257632 100644 --- a/src/python/pants/core/util_rules/system_binaries.py +++ b/src/python/pants/core/util_rules/system_binaries.py @@ -730,38 +730,47 @@ async def find_git() -> GitBinary: # ------------------------------------------------------------------------------------------- +@dataclass(frozen=True) class ZipBinaryRequest: pass +@dataclass(frozen=True) class UnzipBinaryRequest: pass +@dataclass(frozen=True) class GunzipBinaryRequest: pass +@dataclass(frozen=True) class TarBinaryRequest: pass +@dataclass(frozen=True) class MkdirBinaryRequest: pass +@dataclass(frozen=True) class ChmodBinaryRequest: pass +@dataclass(frozen=True) class DiffBinaryRequest: pass +@dataclass(frozen=True) class ReadlinkBinaryRequest: pass +@dataclass(frozen=True) class GitBinaryRequest: pass diff --git a/src/python/pants/goal/anonymous_telemetry.py b/src/python/pants/goal/anonymous_telemetry.py index 0988a2857c3..2be1df2e54e 100644 --- a/src/python/pants/goal/anonymous_telemetry.py +++ b/src/python/pants/goal/anonymous_telemetry.py @@ -7,6 +7,7 @@ import logging import re import uuid +from dataclasses import dataclass from typing import cast from humbug.consent import HumbugConsent @@ -151,6 +152,7 @@ def __call__( reporter.publish(report) +@dataclass(frozen=True) class AnonymousTelemetryCallbackFactoryRequest: """A unique request type that is installed to trigger construction of the WorkunitsCallback.""" diff --git a/src/python/pants/goal/stats_aggregator.py b/src/python/pants/goal/stats_aggregator.py index 8a25d47a379..e05d5b4f0ed 100644 --- a/src/python/pants/goal/stats_aggregator.py +++ b/src/python/pants/goal/stats_aggregator.py @@ -6,6 +6,7 @@ import base64 import logging from collections import Counter +from dataclasses import dataclass from pants.engine.internals.scheduler import Workunit from pants.engine.rules import collect_rules, rule @@ -149,6 +150,7 @@ def __call__( ) +@dataclass(frozen=True) class StatsAggregatorCallbackFactoryRequest: """A unique request type that is installed to trigger construction of the WorkunitsCallback."""