Skip to content

Commit

Permalink
Fix quadratic slowdown when ingesting data with uniform time (#3088)
Browse files Browse the repository at this point in the history
### What
* Closes #3086
* Closes #433

This should also overall just speed up data insertion for the common
case of already-sorted data

### Checklist
* [x] I have read and agree to [Contributor
Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and
the [Code of
Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md)
* [x] I've included a screenshot or gif (if applicable)
* [x] I have tested [demo.rerun.io](https://demo.rerun.io/pr/3088) (if
applicable)

- [PR Build Summary](https://build.rerun.io/pr/3088)
- [Docs
preview](https://rerun.io/preview/e5adb1aa580de2274b4eca9f6c5de38ae503b521/docs)
<!--DOCS-PREVIEW-->
- [Examples
preview](https://rerun.io/preview/e5adb1aa580de2274b4eca9f6c5de38ae503b521/examples)
<!--EXAMPLES-PREVIEW--><!--EXAMPLES-PREVIEW-->
- [Recent benchmark results](https://ref.rerun.io/dev/bench/)
- [Wasm size tracking](https://ref.rerun.io/dev/sizes/)
  • Loading branch information
emilk committed Aug 23, 2023
1 parent 6db856a commit 4bc05e5
Showing 1 changed file with 21 additions and 11 deletions.
32 changes: 21 additions & 11 deletions crates/re_arrow_store/src/store_write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -380,15 +380,21 @@ impl IndexedTable {
}
}

debug!(
kind = "insert",
timeline = %timeline.name(),
time = timeline.typ().format(time),
entity = %ent_path,
len_limit = config.indexed_bucket_num_rows,
len, len_overflow,
"couldn't split indexed bucket, proceeding to ignore limits"
let bucket_time_range = bucket.inner.read().time_range;

re_log::debug_once!(
"Failed to split bucket on timeline {}",
bucket.timeline.format_time_range(&bucket_time_range)
);

if bucket_time_range.min == bucket_time_range.max {
re_log::warn_once!(
"Found over {} rows with the same timepoint {:?}={} - perhaps you forgot to update or remove the timeline?",
config.indexed_bucket_num_rows,
bucket.timeline.name(),
bucket.timeline.typ().format(bucket_time_range.min)
);
}
}

trace!(
Expand Down Expand Up @@ -437,6 +443,13 @@ impl IndexedBucket {
} = &mut *inner;

// append time to primary column and update time range appropriately

if let Some(last_time) = col_time.last() {
if time.as_i64() < *last_time {
*is_sorted = false;
}
}

col_time.push(time.as_i64());
*time_range = TimeRange::new(time_range.min.min(time), time_range.max.max(time));
size_bytes_added += time.as_i64().total_size_bytes();
Expand Down Expand Up @@ -495,9 +508,6 @@ impl IndexedBucket {
}
}

// TODO(#433): re_datastore: properly handle already sorted data during insertion
*is_sorted = false;

*size_bytes += size_bytes_added;

#[cfg(debug_assertions)]
Expand Down

0 comments on commit 4bc05e5

Please sign in to comment.