Skip to content

Commit

Permalink
Reduce memory usage by interning Tasks and RuleGraph entries. (cherry…
Browse files Browse the repository at this point in the history
…pick of #14683) (#14689)

The `--stats-memory-summary` added in #14638/#14652 was [reporting surprisingly large sizes](#12662 (comment))  for native `NodeKey` structs -- even when excluding the actual Python values that they held. Investigation showed that both the `Task` and `Entry` structs were contributing significantly to the size of the `Task` struct.

The [`internment` crate](https://crates.io/crates/internment) used here (and in #14654) is an alternative to giving these values integer IDs. They become pointers to a unique, shared (technically: leaked) copy of the value. They are consequently 1) much smaller, 2) much faster to compare.

The `top`-reported memory usage of `./pants dependencies --transitive ::`:
* `313M` before (summary [before.txt](https://github.com/pantsbuild/pants/files/8175461/before.txt))
* `220M` after (summary [after.txt](https://github.com/pantsbuild/pants/files/8175462/after.txt))

[ci skip-build-wheels]
  • Loading branch information
stuhood committed Mar 3, 2022
1 parent 18ef75f commit e287aac
Show file tree
Hide file tree
Showing 15 changed files with 129 additions and 92 deletions.
39 changes: 36 additions & 3 deletions src/rust/engine/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions src/rust/engine/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,8 @@ bytes = "1.0"
cache = { path = "cache" }
concrete_time = { path = "concrete_time" }
crossbeam-channel = "0.5"
# TODO: Waiting on https://github.com/Aeledfyr/deepsize/pull/30 and https://github.com/Aeledfyr/deepsize/pull/31.
deepsize = { git = "https://github.com/stuhood/deepsize.git", rev = "67c6cfc2afa1303c06b19c1b96ebe11fd3217d34", features=["smallvec"] }
# TODO: Waiting on https://github.com/Aeledfyr/deepsize/pull/{30,31,32}.
deepsize = { git = "https://github.com/stuhood/deepsize.git", rev = "5c8bee5443fcafe4aaa9274490d354412d0955c1", features=["internment", "smallvec"] }
derivative = "2.2"
async-oncecell = "0.2"
either = "1.6"
Expand All @@ -126,6 +126,7 @@ graph = { path = "graph" }
grpc_util = { path = "grpc_util" }
hashing = { path = "hashing" }
indexmap = "1.8"
internment = "0.6"
itertools = "0.10"
lazy_static = "1"
libc = "0.2.112"
Expand Down
4 changes: 2 additions & 2 deletions src/rust/engine/concrete_time/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ name = "concrete_time"
publish = false

[dependencies]
# TODO: Waiting on https://github.com/Aeledfyr/deepsize/pull/30 and https://github.com/Aeledfyr/deepsize/pull/31.
deepsize = { git = "https://github.com/stuhood/deepsize.git", rev = "67c6cfc2afa1303c06b19c1b96ebe11fd3217d34" }
# TODO: Waiting on https://github.com/Aeledfyr/deepsize/pull/{30,31,32}.
deepsize = { git = "https://github.com/stuhood/deepsize.git", rev = "5c8bee5443fcafe4aaa9274490d354412d0955c1" }
log = "0.4"
prost = "0.9"
prost-types = "0.9"
Expand Down
4 changes: 2 additions & 2 deletions src/rust/engine/fs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ publish = false
# Pin async-trait due to https://github.com/dtolnay/async-trait/issues/144.
async-trait = "=0.1.42"
bytes = "1.0"
# TODO: Waiting on https://github.com/Aeledfyr/deepsize/pull/30 and https://github.com/Aeledfyr/deepsize/pull/31.
deepsize = { git = "https://github.com/stuhood/deepsize.git", rev = "67c6cfc2afa1303c06b19c1b96ebe11fd3217d34" }
# TODO: Waiting on https://github.com/Aeledfyr/deepsize/pull/{30,31,32}.
deepsize = { git = "https://github.com/stuhood/deepsize.git", rev = "5c8bee5443fcafe4aaa9274490d354412d0955c1" }
dirs-next = "2"
futures = "0.3"
glob = "0.3.0"
Expand Down
4 changes: 2 additions & 2 deletions src/rust/engine/hashing/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ publish = false

[dependencies]
byteorder = "1.3"
# TODO: Waiting on https://github.com/Aeledfyr/deepsize/pull/30 and https://github.com/Aeledfyr/deepsize/pull/31.
deepsize = { git = "https://github.com/stuhood/deepsize.git", rev = "67c6cfc2afa1303c06b19c1b96ebe11fd3217d34" }
# TODO: Waiting on https://github.com/Aeledfyr/deepsize/pull/{30,31,32}.
deepsize = { git = "https://github.com/stuhood/deepsize.git", rev = "5c8bee5443fcafe4aaa9274490d354412d0955c1" }
digest = "0.9"
generic-array = "0.14"
hex = "0.4.3"
Expand Down
4 changes: 2 additions & 2 deletions src/rust/engine/process_execution/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ protos = { path = "../protos" }
bytes = "1.0"
cache = { path = "../cache" }
derivative = "2.2"
# TODO: Waiting on https://github.com/Aeledfyr/deepsize/pull/30 and https://github.com/Aeledfyr/deepsize/pull/31.
deepsize = { git = "https://github.com/stuhood/deepsize.git", rev = "67c6cfc2afa1303c06b19c1b96ebe11fd3217d34", features=["log"] }
# TODO: Waiting on https://github.com/Aeledfyr/deepsize/pull/{30,31,32}.
deepsize = { git = "https://github.com/stuhood/deepsize.git", rev = "5c8bee5443fcafe4aaa9274490d354412d0955c1", features=["log"] }
grpc_util = { path = "../grpc_util" }
fs = { path = "../fs" }
futures = "0.3"
Expand Down
5 changes: 3 additions & 2 deletions src/rust/engine/rule_graph/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@ authors = [ "Pants Build <pantsbuild@gmail.com>" ]
publish = false

[dependencies]
# TODO: Waiting on https://github.com/Aeledfyr/deepsize/pull/30 and https://github.com/Aeledfyr/deepsize/pull/31.
deepsize = { git = "https://github.com/stuhood/deepsize.git", rev = "67c6cfc2afa1303c06b19c1b96ebe11fd3217d34" }
# TODO: Waiting on https://github.com/Aeledfyr/deepsize/pull/{30,31,32}.
deepsize = { git = "https://github.com/stuhood/deepsize.git", rev = "5c8bee5443fcafe4aaa9274490d354412d0955c1" }
indexmap = "1.8"
internment = "0.6"
log = "0.4"
petgraph = "0.5"

Expand Down
16 changes: 11 additions & 5 deletions src/rust/engine/rule_graph/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use crate::{params_str, Entry, EntryWithDeps, InnerEntry, RootEntry, RuleEdges,
use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};

use indexmap::IndexSet;
use internment::Intern;
use petgraph::graph::{DiGraph, EdgeReference, NodeIndex};
use petgraph::visit::{DfsPostOrder, EdgeRef, IntoNodeReferences, NodeRef, VisitMap, Visitable};
use petgraph::Direction;
Expand Down Expand Up @@ -1156,11 +1157,11 @@ impl<R: Rule> Builder<R> {
let entry_for = |node_id| -> Entry<R> {
let (node, in_set): &(Node<R>, ParamTypes<_>) = &graph[node_id];
match node {
Node::Rule(rule) => Entry::WithDeps(EntryWithDeps::Inner(InnerEntry {
Node::Rule(rule) => Entry::WithDeps(Intern::new(EntryWithDeps::Inner(InnerEntry {
params: in_set.clone(),
rule: rule.clone(),
})),
Node::Query(q) => Entry::WithDeps(EntryWithDeps::Root(RootEntry(q.clone()))),
}))),
Node::Query(q) => Entry::WithDeps(Intern::new(EntryWithDeps::Root(RootEntry(q.clone())))),
Node::Param(p) => Entry::Param(*p),
}
};
Expand All @@ -1182,8 +1183,13 @@ impl<R: Rule> Builder<R> {
// there was one dependency per DependencyKey.
let dependencies = graph
.edges_directed(node_id, Direction::Outgoing)
.map(|edge_ref| (*edge_ref.weight(), vec![entry_for(edge_ref.target())]))
.collect::<HashMap<_, _>>();
.map(|edge_ref| {
(
*edge_ref.weight(),
Intern::new(entry_for(edge_ref.target())),
)
})
.collect::<HashMap<_, Intern<Entry<R>>>>();

match entry {
Entry::WithDeps(wd) => {
Expand Down
40 changes: 18 additions & 22 deletions src/rust/engine/rule_graph/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ use std::io;

use deepsize::DeepSizeOf;
use indexmap::IndexSet;
use internment::Intern;

pub use crate::builder::Builder;
pub use crate::rules::{
Expand Down Expand Up @@ -84,7 +85,7 @@ impl<R: Rule> EntryWithDeps<R> {
#[derive(DeepSizeOf, Eq, Hash, PartialEq, Clone, Debug)]
pub enum Entry<R: Rule> {
Param(R::TypeId),
WithDeps(EntryWithDeps<R>),
WithDeps(Intern<EntryWithDeps<R>>),
}

#[derive(DeepSizeOf, Eq, Hash, PartialEq, Clone, Debug)]
Expand All @@ -102,7 +103,7 @@ impl<R: Rule> InnerEntry<R> {
}
}

type RuleDependencyEdges<R> = HashMap<EntryWithDeps<R>, RuleEdges<R>>;
type RuleDependencyEdges<R> = HashMap<Intern<EntryWithDeps<R>>, RuleEdges<R>>;

#[derive(Eq, Hash, PartialEq, Clone, Debug)]
struct Diagnostic<R: Rule> {
Expand Down Expand Up @@ -279,8 +280,8 @@ impl<R: Rule> RuleGraph<R> {
if let Some(edges) = self.rule_dependency_edges.get(&entry) {
reachable.insert(entry, edges.clone());

entry_stack.extend(edges.all_dependencies().filter_map(|e| match e {
Entry::WithDeps(e) => Some(e.clone()),
entry_stack.extend(edges.all_dependencies().filter_map(|e| match e.as_ref() {
Entry::WithDeps(e) => Some(e),
_ => None,
}));
} else {
Expand Down Expand Up @@ -319,14 +320,14 @@ impl<R: Rule> RuleGraph<R> {
&self,
param_inputs: I,
product: R::TypeId,
) -> Result<(EntryWithDeps<R>, RuleEdges<R>), String> {
) -> Result<(Intern<EntryWithDeps<R>>, RuleEdges<R>), String> {
let params: ParamTypes<_> = param_inputs.into_iter().collect();

// Attempt to find an exact match.
let maybe_root = EntryWithDeps::Root(RootEntry(Query {
let maybe_root = Intern::new(EntryWithDeps::Root(RootEntry(Query {
product,
params: params.clone(),
}));
})));
if let Some(edges) = self.rule_dependency_edges.get(&maybe_root) {
return Ok((maybe_root, edges.clone()));
}
Expand All @@ -336,7 +337,7 @@ impl<R: Rule> RuleGraph<R> {
let subset_matches = self
.rule_dependency_edges
.iter()
.filter_map(|(entry, edges)| match entry {
.filter_map(|(entry, edges)| match entry.as_ref() {
EntryWithDeps::Root(ref root_entry)
if root_entry.0.product == product && root_entry.0.params.is_subset(&params) =>
{
Expand All @@ -349,14 +350,14 @@ impl<R: Rule> RuleGraph<R> {
match subset_matches.len() {
1 => {
let (root_entry, edges) = subset_matches[0];
Ok((root_entry.clone(), edges.clone()))
Ok((*root_entry, edges.clone()))
}
0 => {
// The Params were all registered as RootRules, but the combination wasn't legal.
let mut suggestions: Vec<_> = self
.rule_dependency_edges
.keys()
.filter_map(|entry| match entry {
.filter_map(|entry| match entry.as_ref() {
EntryWithDeps::Root(ref root_entry) if root_entry.0.product == product => {
Some(format!("Params({})", params_str(&root_entry.0.params)))
}
Expand Down Expand Up @@ -431,7 +432,7 @@ impl<R: Rule> RuleGraph<R> {
let mut root_rule_strs = self
.rule_dependency_edges
.iter()
.filter_map(|(k, deps)| match k {
.filter_map(|(k, deps)| match k.as_ref() {
EntryWithDeps::Root(_) => {
let root_str = k.fmt_for_graph(display_args);
let mut dep_entries = deps
Expand Down Expand Up @@ -470,7 +471,7 @@ impl<R: Rule> RuleGraph<R> {
let mut internal_rule_strs = self
.rule_dependency_edges
.iter()
.filter_map(|(k, deps)| match k {
.filter_map(|(k, deps)| match k.as_ref() {
&EntryWithDeps::Inner(_) => {
let mut dep_entries = deps
.all_dependencies()
Expand Down Expand Up @@ -508,23 +509,18 @@ impl<R: Rule> RuleGraph<R> {
///
/// Records the dependency rules for a rule.
///
/// TODO: No longer needs a Vec<Entry>.
///
#[derive(Eq, PartialEq, Clone, Debug)]
pub struct RuleEdges<R: Rule> {
dependencies: HashMap<R::DependencyKey, Vec<Entry<R>>>,
dependencies: HashMap<R::DependencyKey, Intern<Entry<R>>>,
}

impl<R: Rule> RuleEdges<R> {
pub fn entry_for(&self, dependency_key: &R::DependencyKey) -> Option<&Entry<R>> {
self
.dependencies
.get(dependency_key)
.and_then(|entries| entries.first())
pub fn entry_for(&self, dependency_key: &R::DependencyKey) -> Option<Intern<Entry<R>>> {
self.dependencies.get(dependency_key).cloned()
}

pub fn all_dependencies(&self) -> impl Iterator<Item = &Entry<R>> {
self.dependencies.values().flatten()
pub fn all_dependencies(&self) -> impl Iterator<Item = &Intern<Entry<R>>> {
self.dependencies.values()
}
}

Expand Down
6 changes: 4 additions & 2 deletions src/rust/engine/rule_graph/src/rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use super::{params_str, Palette};
pub type ParamTypes<T> = BTreeSet<T>;

pub trait TypeId:
Clone + Copy + Debug + DeepSizeOf + Display + Hash + Eq + Ord + Sized + 'static
Clone + Copy + Debug + DeepSizeOf + Display + Hash + Eq + Ord + Sized + Send + Sync + 'static
{
///
/// Render a string for a collection of TypeIds.
Expand Down Expand Up @@ -77,7 +77,9 @@ impl DisplayForGraphArgs {
}
}

pub trait Rule: Clone + Debug + Display + Hash + Eq + Sized + DisplayForGraph + 'static {
pub trait Rule:
Clone + Debug + Display + Hash + Eq + Sized + DisplayForGraph + Send + Sync + 'static
{
type TypeId: TypeId;
type DependencyKey: DependencyKey<TypeId = Self::TypeId>;

Expand Down
2 changes: 1 addition & 1 deletion src/rust/engine/src/externs/interface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -890,7 +890,7 @@ fn session_run_interactive_process(
true,
&Arc::new(std::sync::atomic::AtomicBool::new(true)),
core.intrinsics.run(
Intrinsic {
&Intrinsic {
product: core.types.interactive_process_result,
inputs: vec![core.types.interactive_process],
},
Expand Down
4 changes: 2 additions & 2 deletions src/rust/engine/src/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,13 @@ impl Intrinsics {

pub async fn run(
&self,
intrinsic: Intrinsic,
intrinsic: &Intrinsic,
context: Context,
args: Vec<Value>,
) -> NodeResult<Value> {
let function = self
.intrinsics
.get(&intrinsic)
.get(intrinsic)
.unwrap_or_else(|| panic!("Unrecognized intrinsic: {:?}", intrinsic));
function(context, args).await
}
Expand Down
Loading

0 comments on commit e287aac

Please sign in to comment.