Skip to content

Commit

Permalink
datastore: early exit missing components at table level (#1554)
Browse files Browse the repository at this point in the history
* early exit at index table level

* worst-case benchmark
  • Loading branch information
teh-cmc committed Mar 10, 2023
1 parent baad9be commit 416cb36
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 7 deletions.
22 changes: 15 additions & 7 deletions crates/re_arrow_store/benches/data_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
use arrow2::array::{Array, UnionArray};
use criterion::{criterion_group, criterion_main, Criterion};

use re_arrow_store::{DataStore, LatestAtQuery, RangeQuery, TimeInt, TimeRange};
use re_arrow_store::{DataStore, DataStoreConfig, LatestAtQuery, RangeQuery, TimeInt, TimeRange};
use re_log_types::{
component_types::{InstanceKey, Rect2D},
datagen::{build_frame_nr, build_some_instances, build_some_rects},
Expand Down Expand Up @@ -35,15 +35,15 @@ fn insert(c: &mut Criterion) {
(NUM_RECTS * NUM_FRAMES) as _,
));
group.bench_function("insert", |b| {
b.iter(|| insert_messages(InstanceKey::name(), msgs.iter()));
b.iter(|| insert_messages(Default::default(), InstanceKey::name(), msgs.iter()));
});
}
}

fn latest_at_batch(c: &mut Criterion) {
{
let msgs = build_messages(NUM_RECTS as usize);
let store = insert_messages(InstanceKey::name(), msgs.iter());
let store = insert_messages(Default::default(), InstanceKey::name(), msgs.iter());
let mut group = c.benchmark_group("datastore/latest_at/batch/rects");
group.throughput(criterion::Throughput::Elements(NUM_RECTS as _));
group.bench_function("query", |b| {
Expand All @@ -62,9 +62,16 @@ fn latest_at_batch(c: &mut Criterion) {
}

fn latest_at_missing_components(c: &mut Criterion) {
// Simulate the worst possible case: many many buckets.
let config = DataStoreConfig {
index_bucket_size_bytes: 0,
index_bucket_nb_rows: 0,
..Default::default()
};

{
let msgs = build_messages(NUM_RECTS as usize);
let store = insert_messages(InstanceKey::name(), msgs.iter());
let store = insert_messages(config.clone(), InstanceKey::name(), msgs.iter());
let mut group = c.benchmark_group("datastore/latest_at/missing_components");
group.throughput(criterion::Throughput::Elements(NUM_RECTS as _));
group.bench_function("primary", |b| {
Expand All @@ -78,7 +85,7 @@ fn latest_at_missing_components(c: &mut Criterion) {

{
let msgs = build_messages(NUM_RECTS as usize);
let store = insert_messages(InstanceKey::name(), msgs.iter());
let store = insert_messages(config, InstanceKey::name(), msgs.iter());
let mut group = c.benchmark_group("datastore/latest_at/missing_components");
group.throughput(criterion::Throughput::Elements(NUM_RECTS as _));
group.bench_function("secondaries", |b| {
Expand All @@ -103,7 +110,7 @@ fn latest_at_missing_components(c: &mut Criterion) {
fn range_batch(c: &mut Criterion) {
{
let msgs = build_messages(NUM_RECTS as usize);
let store = insert_messages(InstanceKey::name(), msgs.iter());
let store = insert_messages(Default::default(), InstanceKey::name(), msgs.iter());
let mut group = c.benchmark_group("datastore/range/batch/rects");
group.throughput(criterion::Throughput::Elements(
(NUM_RECTS * NUM_FRAMES) as _,
Expand Down Expand Up @@ -155,10 +162,11 @@ fn build_messages(n: usize) -> Vec<MsgBundle> {
}

fn insert_messages<'a>(
config: DataStoreConfig,
cluster_key: ComponentName,
msgs: impl Iterator<Item = &'a MsgBundle>,
) -> DataStore {
let mut store = DataStore::new(cluster_key, Default::default());
let mut store = DataStore::new(cluster_key, config);
msgs.for_each(|msg_bundle| store.insert(msg_bundle).unwrap());
store
}
Expand Down
5 changes: 5 additions & 0 deletions crates/re_arrow_store/src/store_read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,11 @@ impl IndexTable {
) -> Option<[Option<RowIndex>; N]> {
crate::profile_function!();

// Early-exit if this entire table is unaware of this component.
if !self.all_components.contains(&primary) {
return None;
}

let timeline = self.timeline;

// The time we're looking for gives us an upper bound: all components must be indexed
Expand Down

1 comment on commit 416cb36

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rust Benchmark

Benchmark suite Current: 416cb36 Previous: baad9be Ratio
datastore/insert/batch/rects/insert 563043 ns/iter (± 18984) 542755 ns/iter (± 7157) 1.04
datastore/latest_at/batch/rects/query 1833 ns/iter (± 9) 1813 ns/iter (± 29) 1.01
datastore/latest_at/missing_components/primary 284 ns/iter (± 0) 350 ns/iter (± 7) 0.81
datastore/latest_at/missing_components/secondaries 433 ns/iter (± 1) 417 ns/iter (± 7) 1.04
datastore/range/batch/rects/query 149017 ns/iter (± 421) 143993 ns/iter (± 2633) 1.03
mono_points_arrow/generate_message_bundles 46414401 ns/iter (± 1081118) 43140743 ns/iter (± 1198148) 1.08
mono_points_arrow/generate_messages 126000415 ns/iter (± 1021119) 122067453 ns/iter (± 1456953) 1.03
mono_points_arrow/encode_log_msg 152299265 ns/iter (± 686470) 149247724 ns/iter (± 1455404) 1.02
mono_points_arrow/encode_total 324682162 ns/iter (± 1646520) 317405627 ns/iter (± 2788648) 1.02
mono_points_arrow/decode_log_msg 177873830 ns/iter (± 862924) 171066566 ns/iter (± 1615566) 1.04
mono_points_arrow/decode_message_bundles 64857605 ns/iter (± 1004808) 62717435 ns/iter (± 1014836) 1.03
mono_points_arrow/decode_total 241217422 ns/iter (± 1536681) 235269855 ns/iter (± 3565675) 1.03
batch_points_arrow/generate_message_bundles 331133 ns/iter (± 1076) 320510 ns/iter (± 4271) 1.03
batch_points_arrow/generate_messages 6313 ns/iter (± 31) 5975 ns/iter (± 109) 1.06
batch_points_arrow/encode_log_msg 366188 ns/iter (± 1035) 355048 ns/iter (± 3889) 1.03
batch_points_arrow/encode_total 717465 ns/iter (± 1775) 697693 ns/iter (± 8053) 1.03
batch_points_arrow/decode_log_msg 347324 ns/iter (± 702) 343191 ns/iter (± 3228) 1.01
batch_points_arrow/decode_message_bundles 2132 ns/iter (± 26) 2025 ns/iter (± 29) 1.05
batch_points_arrow/decode_total 354628 ns/iter (± 924) 347596 ns/iter (± 3611) 1.02
arrow_mono_points/insert 6088943362 ns/iter (± 26094815) 6003074553 ns/iter (± 23326006) 1.01
arrow_mono_points/query 1742949 ns/iter (± 5452) 1679289 ns/iter (± 25419) 1.04
arrow_batch_points/insert 2626022 ns/iter (± 9153) 2614748 ns/iter (± 25060) 1.00
arrow_batch_points/query 16868 ns/iter (± 59) 16198 ns/iter (± 237) 1.04
arrow_batch_vecs/insert 43554 ns/iter (± 207) 41160 ns/iter (± 556) 1.06
arrow_batch_vecs/query 505776 ns/iter (± 1950) 485638 ns/iter (± 5398) 1.04
tuid/Tuid::random 34 ns/iter (± 0) 33 ns/iter (± 0) 1.03

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.