Skip to content

Commit

Permalink
track ff dictionary indexing memory consumption (#2147)
Browse files Browse the repository at this point in the history
  • Loading branch information
PSeitz committed Aug 16, 2023
1 parent 52d9e6f commit 62ece86
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
6 changes: 5 additions & 1 deletion columnar/src/columnar/writer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,18 @@ fn mutate_or_create_column<V, TMutator>(

impl ColumnarWriter {
pub fn mem_usage(&self) -> usize {
// TODO add dictionary builders.
self.arena.mem_usage()
+ self.numerical_field_hash_map.mem_usage()
+ self.bool_field_hash_map.mem_usage()
+ self.bytes_field_hash_map.mem_usage()
+ self.str_field_hash_map.mem_usage()
+ self.ip_addr_field_hash_map.mem_usage()
+ self.datetime_field_hash_map.mem_usage()
+ self
.dictionaries
.iter()
.map(|dict| dict.mem_usage())
.sum::<usize>()
}

/// Returns the list of doc ids from 0..num_docs sorted by the `sort_field`
Expand Down
7 changes: 7 additions & 0 deletions columnar/src/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ pub struct OrderedId(pub u32);
#[derive(Default)]
pub(crate) struct DictionaryBuilder {
dict: FnvHashMap<Vec<u8>, UnorderedId>,
memory_consumption: usize,
}

impl DictionaryBuilder {
Expand All @@ -43,6 +44,8 @@ impl DictionaryBuilder {
}
let new_id = UnorderedId(self.dict.len() as u32);
self.dict.insert(term.to_vec(), new_id);
self.memory_consumption += term.len();
self.memory_consumption += 40; // Term Metadata + HashMap overhead
new_id
}

Expand All @@ -63,6 +66,10 @@ impl DictionaryBuilder {
sstable_builder.finish()?;
Ok(TermIdMapping { unordered_to_ord })
}

pub(crate) fn mem_usage(&self) -> usize {
self.memory_consumption
}
}

#[cfg(test)]
Expand Down

0 comments on commit 62ece86

Please sign in to comment.