Skip to content

Commit

Permalink
Add support for rendering a summary of objects held by the graph (pan…
Browse files Browse the repository at this point in the history
…tsbuild#14638)

As described in pantsbuild#12662, some use cases have surprising memory usage. To enable tracking those cases down, this change adds a `--memory-summary` option which summarizes the deep sizes of live objects in the `Graph`.

On the Python side, the deep size is calculated using a very basic deduping walk of `gc.get_referents` (after having investigated [pympler](https://pypi.org/project/Pympler/), [guppy3](https://pypi.org/project/guppy3/), [objsize](https://pypi.org/project/objsize/)). On the Rust side, the `deepsize` crate is used, with sizes derived for all types reachable from `NodeKey` and `NodeOutput`.

Example output:
```
Memory summary:
64		1		pants.backend.docker.subsystems.dockerfile_parser.DockerfileParser
64		1		pants.backend.docker.subsystems.dockerfile_parser.ParserSetup
64		1		pants.backend.java.dependency_inference.java_parser_launcher.JavaParserCompiledClassfiles
64		1		pants.backend.java.dependency_inference.symbol_mapper.FirstPartyJavaTargetsMappingRequest
<snip>
1588620		957		(native) pants.engine.internals.graph.hydrate_sources
2317920		2195		(native) pants.backend.python.dependency_inference.module_mapper.map_module_to_address
2774760		2434		(native) pants.engine.internals.graph.determine_explicitly_provided_dependencies
4446900		1458		(native) pants.engine.internals.graph.resolve_dependencies
```

[ci skip-build-wheels]
  • Loading branch information
stuhood committed Feb 27, 2022
1 parent 01ed811 commit e50818d
Show file tree
Hide file tree
Showing 29 changed files with 244 additions and 92 deletions.
2 changes: 2 additions & 0 deletions pants.ci.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ remote_cache_write = true
# We want to continue to get logs when remote caching errors.
remote_cache_warnings = "backoff"

memory_summary = true

[stats]
log = true

Expand Down
29 changes: 29 additions & 0 deletions src/python/pants/bin/local_pants_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import logging
import sys
from collections import Counter
from dataclasses import dataclass

from pants.base.build_environment import get_buildroot
Expand Down Expand Up @@ -36,6 +37,7 @@
)
from pants.option.options import Options
from pants.option.options_bootstrapper import OptionsBootstrapper
from pants.util.collections import deep_getsizeof
from pants.util.contextutil import maybe_profiled

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -204,6 +206,32 @@ def _perform_run_body(self, goals: tuple[str, ...], poll: bool) -> ExitCode:
def _finish_run(self, code: ExitCode) -> None:
"""Cleans up the run tracker."""

def _maybe_report_memory_summary(self) -> None:
global_options = self.options.for_global_scope()
if not global_options.memory_summary:
return

ids: set[int] = set()
count_by_type: Counter[type] = Counter()
sizes_by_type: Counter[type] = Counter()

items, rust_sizes = self.graph_session.scheduler_session.live_items()
for item in items:
count_by_type[type(item)] += 1
sizes_by_type[type(item)] += deep_getsizeof(item, ids)

entries = [
(size, count_by_type[typ], f"{typ.__module__}.{typ.__qualname__}")
for typ, size in sizes_by_type.items()
]
entries.extend(
(size, count, f"(native) {name}") for name, (count, size) in rust_sizes.items()
)

print("Memory summary:", file=sys.stderr)
for size, count, name in sorted(entries):
print(f"{size}\t\t{count}\t\t{name}", file=sys.stderr)

def _get_workunits_callbacks(self) -> tuple[WorkunitsCallback, ...]:
# Load WorkunitsCallbacks by requesting WorkunitsCallbackFactories, and then constructing
# a per-run instance of each WorkunitsCallback.
Expand Down Expand Up @@ -266,6 +294,7 @@ def run(self, start_time: float) -> ExitCode:
try:
engine_result = self._run_inner()
finally:
self._maybe_report_memory_summary()
metrics = self.graph_session.scheduler_session.metrics()
self.run_tracker.set_pantsd_scheduler_metrics(metrics)
self.run_tracker.end_run(engine_result)
Expand Down
3 changes: 3 additions & 0 deletions src/python/pants/engine/internals/native_engine.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,9 @@ def scheduler_execute(
scheduler: PyScheduler, session: PySession, execution_request: PyExecutionRequest
) -> list: ...
def scheduler_metrics(scheduler: PyScheduler, session: PySession) -> dict[str, int]: ...
def scheduler_live_items(
scheduler: PyScheduler, session: PySession
) -> tuple[list[Any], dict[str, tuple[int, int]]]: ...
def scheduler_shutdown(scheduler: PyScheduler, timeout_secs: int) -> None: ...
def session_new_run_id(session: PySession) -> None: ...
def session_poll_workunits(
Expand Down
4 changes: 4 additions & 0 deletions src/python/pants/engine/internals/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,10 @@ def metrics(self) -> dict[str, int]:
"""Returns metrics for this SchedulerSession as a dict of metric name to metric value."""
return native_engine.scheduler_metrics(self.py_scheduler, self.py_session)

def live_items(self) -> tuple[list[Any], dict[str, tuple[int, int]]]:
"""Return all Python objects held by the Scheduler."""
return native_engine.scheduler_live_items(self.py_scheduler, self.py_session)

def _maybe_visualize(self) -> None:
if self._scheduler.visualize_to_dir is not None:
# TODO: This increment-and-get is racey.
Expand Down
7 changes: 7 additions & 0 deletions src/python/pants/option/global_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -1423,6 +1423,13 @@ def register_options(cls, register):
),
).advanced()

memory_summary = BoolOption(
"--memory-summary",
default=False,
help=("Report a summary of memory usage at the end of each run."),
advanced=True,
)

@classmethod
def validate_instance(cls, opts):
"""Validates an instance of global options for cases that are not prohibited via
Expand Down
18 changes: 18 additions & 0 deletions src/python/pants/util/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@

import collections
import collections.abc
import gc
import math
from sys import getsizeof
from typing import Any, Callable, Iterable, Iterator, MutableMapping, TypeVar

from pants.engine.internals import native_engine
Expand All @@ -21,6 +23,22 @@ def recursively_update(d: MutableMapping, d2: MutableMapping) -> None:
d[k] = v


def deep_getsizeof(o: Any, ids: set[int]) -> int:
"""Find the memory footprint of the given object.
To avoid double-counting, `ids` should be a set of object ids which have been visited by
previous calls to this method.
"""
if id(o) in ids:
return 0

d = deep_getsizeof
r = getsizeof(o)
ids.add(id(o))

return r + sum(d(x, ids) for x in gc.get_referents())


_T = TypeVar("_T")


Expand Down
44 changes: 31 additions & 13 deletions src/rust/engine/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion src/rust/engine/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ bytes = "1.0"
cache = { path = "cache" }
concrete_time = { path = "concrete_time" }
crossbeam-channel = "0.5"
# TODO: Waiting on https://github.com/Aeledfyr/deepsize/pull/30 and https://github.com/Aeledfyr/deepsize/pull/31.
deepsize = { git = "https://github.com/stuhood/deepsize.git", rev = "67c6cfc2afa1303c06b19c1b96ebe11fd3217d34", features=["smallvec"] }
derivative = "2.2"
async-oncecell = "0.2"
either = "1.6"
Expand All @@ -139,7 +141,7 @@ rand = "0.8"
regex = "1"
reqwest = { version = "0.11", default_features = false, features = ["stream", "rustls-tls"] }
rule_graph = { path = "rule_graph" }
smallvec = "0.6"
smallvec = "1"
stdio = { path = "stdio" }
store = { path = "fs/store" }
serde_json = "1.0"
Expand Down
6 changes: 4 additions & 2 deletions src/rust/engine/concrete_time/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@ name = "concrete_time"
publish = false

[dependencies]
# TODO: Waiting on https://github.com/Aeledfyr/deepsize/pull/30 and https://github.com/Aeledfyr/deepsize/pull/31.
deepsize = { git = "https://github.com/stuhood/deepsize.git", rev = "67c6cfc2afa1303c06b19c1b96ebe11fd3217d34" }
log = "0.4"
prost = "0.9"
prost-types = "0.9"
serde_derive = "1.0.98"
serde = "1.0.98"
log = "0.4"
serde_derive = "1.0.98"
3 changes: 2 additions & 1 deletion src/rust/engine/concrete_time/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
// Arc<Mutex> can be more clear than needing to grok Orderings:
#![allow(clippy::mutex_atomic)]

use deepsize::DeepSizeOf;
use serde_derive::Serialize;

/// A concrete data representation of a duration.
Expand All @@ -38,7 +39,7 @@ use serde_derive::Serialize;
///
/// It can be used to represent a timestamp (as a duration since the unix epoch) or simply a
/// duration between two arbitrary timestamps.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
#[derive(Debug, DeepSizeOf, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
pub struct Duration {
/// How many seconds did this `Duration` last?
pub secs: u64,
Expand Down
2 changes: 2 additions & 0 deletions src/rust/engine/fs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ publish = false
# Pin async-trait due to https://github.com/dtolnay/async-trait/issues/144.
async-trait = "=0.1.42"
bytes = "1.0"
# TODO: Waiting on https://github.com/Aeledfyr/deepsize/pull/30 and https://github.com/Aeledfyr/deepsize/pull/31.
deepsize = { git = "https://github.com/stuhood/deepsize.git", rev = "67c6cfc2afa1303c06b19c1b96ebe11fd3217d34" }
dirs-next = "2"
futures = "0.3"
glob = "0.3.0"
Expand Down
Loading

0 comments on commit e50818d

Please sign in to comment.