Skip to content

Commit

Permalink
Improve memoization of interpreter constraints, Python parsing, and r…
Browse files Browse the repository at this point in the history
…equest classes (#16141)

Profiling showed that:
1. `Requirement.parse` and `InterpreterConstraint.merge_*` were taking up a significant fraction of time even with singular requirements (~2%).
2. `AllAssetTargetsRequest` did not have stable equality due to not being marked as a `@dataclass`, and so `find_all_assets` was not being memoized (~11%).
3. Repeatedly loading and creating a `Digest` for the `dependency_parser.py` script took a noticeable amount of time (~6%).

Fixing these represents a ~19% performance improvement for `./pants --no-pantsd --changed-diffspec=fcaac98402..2a30000 --changed-dependees=transitive list`, which brings `2.12.x`/`2.13.x` roughly back in line with `2.11.x`.
  • Loading branch information
stuhood committed Jul 12, 2022
1 parent 9608e03 commit ec61fa7
Show file tree
Hide file tree
Showing 7 changed files with 62 additions and 33 deletions.
Expand Up @@ -54,15 +54,27 @@ class ParsePythonDependenciesRequest:
assets_min_slashes: int


@dataclass(frozen=True)
class ParserScript:
digest: Digest


@rule
async def parser_script() -> ParserScript:
script = pkgutil.get_data(__name__, "scripts/dependency_parser.py")
assert script is not None
return ParserScript(
await Get(Digest, CreateDigest([FileContent("__parse_python_dependencies.py", script)]))
)


@rule
async def parse_python_dependencies(
request: ParsePythonDependenciesRequest,
parser_script: ParserScript,
) -> ParsedPythonDependencies:
script = pkgutil.get_data(__name__, "scripts/dependency_parser.py")
assert script is not None
python_interpreter, script_digest, stripped_sources = await MultiGet(
python_interpreter, stripped_sources = await MultiGet(
Get(PythonExecutable, InterpreterConstraints, request.interpreter_constraints),
Get(Digest, CreateDigest([FileContent("__parse_python_dependencies.py", script)])),
Get(StrippedSourceFiles, SourceFilesRequest([request.source])),
)

Expand All @@ -71,7 +83,7 @@ async def parse_python_dependencies(
file = stripped_sources.snapshot.files[0]

input_digest = await Get(
Digest, MergeDigests([script_digest, stripped_sources.snapshot.digest])
Digest, MergeDigests([parser_script.digest, stripped_sources.snapshot.digest])
)
process_result = await Get(
ProcessResult,
Expand Down
1 change: 1 addition & 0 deletions src/python/pants/backend/python/goals/lockfile.py
Expand Up @@ -109,6 +109,7 @@ class MaybeWarnPythonRepos:
pass


@dataclass(frozen=True)
class MaybeWarnPythonReposRequest:
pass

Expand Down
Expand Up @@ -3,7 +3,6 @@

from __future__ import annotations

import functools
import itertools
from collections import defaultdict
from typing import Iterable, Iterator, Sequence, Tuple, TypeVar
Expand Down Expand Up @@ -57,6 +56,20 @@ def interpreter_constraints_contains(
return InterpreterConstraints(a).contains(InterpreterConstraints(b), interpreter_universe)


@memoized
def parse_constraint(constraint: str) -> Requirement:
"""Parse an interpreter constraint, e.g., CPython>=2.7,<3.
We allow shorthand such as `>=3.7`, which gets expanded to `CPython>=3.7`. See Pex's
interpreter.py's `parse_requirement()`.
"""
try:
parsed_requirement = Requirement.parse(constraint)
except ValueError:
parsed_requirement = Requirement.parse(f"CPython{constraint}")
return parsed_requirement


# Normally we would subclass `DeduplicatedCollection`, but we want a custom constructor.
class InterpreterConstraints(FrozenOrderedSet[Requirement], EngineAwareParameter):
@classmethod
Expand All @@ -70,7 +83,7 @@ def __init__(self, constraints: Iterable[str | Requirement] = ()) -> None:
# We need to sort the component constraints for each requirement _before_ sorting the entire list
# for the ordering to be correct.
parsed_constraints = (
i if isinstance(i, Requirement) else self.parse_constraint(i) for i in constraints
i if isinstance(i, Requirement) else parse_constraint(i) for i in constraints
)
super().__init__(sorted(parsed_constraints, key=lambda c: str(c)))

Expand All @@ -80,27 +93,16 @@ def __str__(self) -> str:
def debug_hint(self) -> str:
return str(self)

@staticmethod
def parse_constraint(constraint: str) -> Requirement:
"""Parse an interpreter constraint, e.g., CPython>=2.7,<3.
We allow shorthand such as `>=3.7`, which gets expanded to `CPython>=3.7`. See Pex's
interpreter.py's `parse_requirement()`.
"""
try:
parsed_requirement = Requirement.parse(constraint)
except ValueError:
parsed_requirement = Requirement.parse(f"CPython{constraint}")
return parsed_requirement

@classmethod
def merge(cls, ics: Iterable[InterpreterConstraints]) -> InterpreterConstraints:
return InterpreterConstraints(
cls.merge_constraint_sets(tuple(str(requirement) for requirement in ic) for ic in ics)
)

@classmethod
def merge_constraint_sets(cls, constraint_sets: Iterable[Iterable[str]]) -> list[Requirement]:
def merge_constraint_sets(
cls, constraint_sets: Iterable[Iterable[str]]
) -> frozenset[Requirement]:
"""Given a collection of constraints sets, merge by ORing within each individual constraint
set and ANDing across each distinct constraint set.
Expand All @@ -109,7 +111,7 @@ def merge_constraint_sets(cls, constraint_sets: Iterable[Iterable[str]]) -> list
"""
# A sentinel to indicate a requirement that is impossible to satisfy (i.e., one that
# requires two different interpreter types).
impossible = Requirement.parse("IMPOSSIBLE")
impossible = parse_constraint("IMPOSSIBLE")

# Each element (a Set[ParsedConstraint]) will get ANDed. We use sets to deduplicate
# identical top-level parsed constraint sets.
Expand All @@ -119,16 +121,19 @@ def merge_constraint_sets(cls, constraint_sets: Iterable[Iterable[str]]) -> list
# the others, without having to deal with the vacuous case below.
constraint_sets = [cs for cs in constraint_sets if cs]
if not constraint_sets:
return []
return frozenset()

parsed_constraint_sets: set[frozenset[Requirement]] = set()
for constraint_set in constraint_sets:
# Each element (a ParsedConstraint) will get ORed.
parsed_constraint_set = frozenset(
cls.parse_constraint(constraint) for constraint in constraint_set
parse_constraint(constraint) for constraint in constraint_set
)
parsed_constraint_sets.add(parsed_constraint_set)

if len(parsed_constraint_sets) == 1:
return next(iter(parsed_constraint_sets))

def and_constraints(parsed_constraints: Sequence[Requirement]) -> Requirement:
merged_specs: set[tuple[str, str]] = set()
expected_interpreter = parsed_constraints[0].project_name
Expand All @@ -138,7 +143,7 @@ def and_constraints(parsed_constraints: Sequence[Requirement]) -> Requirement:
merged_specs.update(parsed_constraint.specs)

formatted_specs = ",".join(f"{op}{version}" for op, version in merged_specs)
return Requirement.parse(f"{expected_interpreter}{formatted_specs}")
return parse_constraint(f"{expected_interpreter}{formatted_specs}")

def cmp_constraints(req1: Requirement, req2: Requirement) -> int:
if req1.project_name != req2.project_name:
Expand All @@ -147,14 +152,11 @@ def cmp_constraints(req1: Requirement, req2: Requirement) -> int:
return 0
return -1 if req1.specs < req2.specs else 1

ored_constraints = sorted(
{
and_constraints(constraints_product)
for constraints_product in itertools.product(*parsed_constraint_sets)
},
key=functools.cmp_to_key(cmp_constraints),
ored_constraints = (
and_constraints(constraints_product)
for constraints_product in itertools.product(*parsed_constraint_sets)
)
ret = [cs for cs in ored_constraints if cs != impossible]
ret = frozenset(cs for cs in ored_constraints if cs != impossible)
if not ret:
# There are no possible combinations.
attempted_str = " AND ".join(f"({' OR '.join(cs)})" for cs in constraint_sets)
Expand Down Expand Up @@ -273,7 +275,7 @@ def snap_to_minimum(self, interpreter_universe: Iterable[str]) -> InterpreterCon
)
req_strs.extend(f"!={major}.{minor}.{p}" for p in invalid_patches)
req_str = ",".join(req_strs)
snapped = Requirement.parse(req_str)
snapped = parse_constraint(req_str)
return InterpreterConstraints([snapped])
return None

Expand Down
1 change: 1 addition & 0 deletions src/python/pants/core/target_types.py
Expand Up @@ -515,6 +515,7 @@ class GenericTarget(Target):
# -----------------------------------------------------------------------------------------------


@dataclass(frozen=True)
class AllAssetTargetsRequest:
pass

Expand Down
9 changes: 9 additions & 0 deletions src/python/pants/core/util_rules/system_binaries.py
Expand Up @@ -730,38 +730,47 @@ async def find_git() -> GitBinary:
# -------------------------------------------------------------------------------------------


@dataclass(frozen=True)
class ZipBinaryRequest:
pass


@dataclass(frozen=True)
class UnzipBinaryRequest:
pass


@dataclass(frozen=True)
class GunzipBinaryRequest:
pass


@dataclass(frozen=True)
class TarBinaryRequest:
pass


@dataclass(frozen=True)
class MkdirBinaryRequest:
pass


@dataclass(frozen=True)
class ChmodBinaryRequest:
pass


@dataclass(frozen=True)
class DiffBinaryRequest:
pass


@dataclass(frozen=True)
class ReadlinkBinaryRequest:
pass


@dataclass(frozen=True)
class GitBinaryRequest:
pass

Expand Down
2 changes: 2 additions & 0 deletions src/python/pants/goal/anonymous_telemetry.py
Expand Up @@ -7,6 +7,7 @@
import logging
import re
import uuid
from dataclasses import dataclass
from typing import cast

from humbug.consent import HumbugConsent
Expand Down Expand Up @@ -151,6 +152,7 @@ def __call__(
reporter.publish(report)


@dataclass(frozen=True)
class AnonymousTelemetryCallbackFactoryRequest:
"""A unique request type that is installed to trigger construction of the WorkunitsCallback."""

Expand Down
2 changes: 2 additions & 0 deletions src/python/pants/goal/stats_aggregator.py
Expand Up @@ -6,6 +6,7 @@
import base64
import logging
from collections import Counter
from dataclasses import dataclass

from pants.engine.internals.scheduler import Workunit
from pants.engine.rules import collect_rules, rule
Expand Down Expand Up @@ -149,6 +150,7 @@ def __call__(
)


@dataclass(frozen=True)
class StatsAggregatorCallbackFactoryRequest:
"""A unique request type that is installed to trigger construction of the WorkunitsCallback."""

Expand Down

0 comments on commit ec61fa7

Please sign in to comment.