Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve the gap-detector #1363

Merged
merged 1 commit into from
Feb 21, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
182 changes: 118 additions & 64 deletions crates/re_viewer/src/ui/time_panel/time_axis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ use re_log_types::{TimeInt, TimeRange, TimeType};

/// A piece-wise linear view of a single timeline.
///
/// It is piece-wise linear because we sometimes have huge gaps in the data,
/// and we want to present a compressed view of it.
/// It is piece-wise linear because we sometimes have big gaps in the data
/// which we collapse in order to present a compressed view of the data.
#[derive(Clone, Debug)]
pub(crate) struct TimelineAxis {
pub ranges: vec1::Vec1<TimeRange>,
Expand All @@ -16,72 +16,14 @@ pub(crate) struct TimelineAxis {
impl TimelineAxis {
pub fn new<T>(time_type: TimeType, times: &BTreeMap<TimeInt, T>) -> Self {
crate::profile_function!();

assert!(!times.is_empty());

// in seconds or nanos
let time_abs_diff = |a: TimeInt, b: TimeInt| -> u64 { a.as_i64().abs_diff(b.as_i64()) };

// First determine the threshold for when a gap should be closed.
// Sometimes, looking at data spanning milliseconds, a single second pause can be an eternity.
// When looking at data recorded over hours, a few minutes of pause may be nothing.
// So we start with a small gap and keep expanding it while it decreases the number of gaps.

// Anything at least this close are considered one thing.
// Measured in nanos or sequences.
let min_gap_size: u64 = match time_type {
TimeType::Sequence => 9,
TimeType::Time => 1_000_000_000, // nanos!
};

// Collect all gaps larger than our minimum gap size.
let mut gap_sizes = {
crate::profile_scope!("collect_gaps");
times
.keys()
.tuple_windows()
.map(|(a, b)| time_abs_diff(*a, *b))
.filter(|&gap_size| gap_size > min_gap_size)
.collect_vec()
};

gap_sizes.sort_unstable();

// We can probably improve these heuristics a bit.
// Currently the gap-detector is only based on the sizes of gaps, not the sizes of runs.
// If we have a hour-long run, it would make sense that the next gap must be quite big
// for it to be contracted, yet we don't do anything like that yet.

let mut gap_threshold = min_gap_size;

// Progressively expand the gap threshold
for gap in gap_sizes {
if gap >= gap_threshold * 2 {
break; // much bigger gap than anything before, let's stop here
} else if gap > gap_threshold {
gap_threshold *= 2;
}
let gap_threshold = gap_size_heuristic(time_type, times);
Self {
ranges: create_ranges(times, gap_threshold),
}

// ----
// We calculated the threshold for creating gaps, so let's collect all the ranges:

crate::profile_scope!("create_ranges");
let mut values_it = times.keys();
let mut ranges = vec1::vec1![TimeRange::point(*values_it.next().unwrap())];

for &new_value in values_it {
let last_max = &mut ranges.last_mut().max;
if time_abs_diff(*last_max, new_value) <= gap_threshold {
*last_max = new_value; // join previous range
} else {
ranges.push(TimeRange::point(new_value)); // new range
}
}

Self { ranges }
}

/// Total uncollapsed time.
pub fn sum_time_lengths(&self) -> TimeInt {
self.ranges.iter().map(|t| t.length()).sum()
}
Expand All @@ -101,3 +43,115 @@ impl TimelineAxis {
// self.ranges.last().max
// }
}

// in seconds or nanos
fn time_abs_diff(a: TimeInt, b: TimeInt) -> u64 {
a.as_i64().abs_diff(b.as_i64())
}

/// First determine the threshold for when a gap should be closed.
/// Sometimes, looking at data spanning milliseconds, a single second pause can be an eternity.
/// When looking at data recorded over hours, a few minutes of pause may be nothing.
/// We also don't want to produce a timeline of only gaps.
/// Finding a perfect heuristic is impossible, but we do our best!
fn gap_size_heuristic<T>(time_type: TimeType, times: &BTreeMap<TimeInt, T>) -> u64 {
crate::profile_function!();

assert!(!times.is_empty());

if times.len() <= 2 {
return u64::MAX;
}

let total_time_span = time_abs_diff(
*times.first_key_value().unwrap().0,
*times.last_key_value().unwrap().0,
);

// We start off by a minimum gap size - any gap smaller than this will never be collapsed.
// This is partially an optimization, and partially something that "feels right".
let min_gap_size: u64 = match time_type {
TimeType::Sequence => 9,
TimeType::Time => 100_000_000, // nanos!
Copy link
Member

@teh-cmc teh-cmc Feb 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
TimeType::Time => 100_000_000, // nanos!
TimeType::Time => 100_000_000, // 100ms

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I sort of prefer the reader having to do their own math rather than adding a comment that is going to be outdated the next time someone changes that line

};

// Collect all gaps larger than our minimum gap size.
let mut gap_sizes = {
crate::profile_scope!("collect_gaps");
times
.keys()
.tuple_windows()
.map(|(a, b)| time_abs_diff(*a, *b))
.filter(|&gap_size| gap_size > min_gap_size)
.collect_vec()
};
gap_sizes.sort_unstable();

// Don't collapse too many gaps, because then the timeline is all gaps!
let max_collapses: usize = ((times.len() - 1) / 3).min(20);

// Only collapse gaps that take up a significant portion of the total time,
// measured as the fraction of the total time that the gap represents.
let min_collapse_fraction: f64 = (2.0 / (times.len() - 1) as f64).max(0.35);

let mut gap_threshold = u64::MAX;
let mut uncollapsed_time = total_time_span;

// Go through the gaps, largest to smallest:
for &gap in gap_sizes.iter().rev().take(max_collapses) {
// How big is the gap relative to the total uncollapsed time?
let gap_fraction = gap as f64 / uncollapsed_time as f64;
if gap_fraction > min_collapse_fraction {
// Collapse this gap
gap_threshold = gap;
uncollapsed_time -= gap;
} else {
break; // gap is too small to collapse, and so will all following gaps be
}
}

gap_threshold
}

/// Collapse any gaps larger or equals to the given threshold.
fn create_ranges<T>(times: &BTreeMap<TimeInt, T>, gap_threshold: u64) -> vec1::Vec1<TimeRange> {
crate::profile_function!();
let mut values_it = times.keys();
let mut ranges = vec1::vec1![TimeRange::point(*values_it.next().unwrap())];

for &new_value in values_it {
let last_max = &mut ranges.last_mut().max;
if time_abs_diff(*last_max, new_value) < gap_threshold {
*last_max = new_value; // join previous range
} else {
ranges.push(TimeRange::point(new_value)); // new range
}
}

ranges
}

#[cfg(test)]
mod tests {
use super::*;
use re_arrow_store::TimeRange;

fn ranges(times: &[i64]) -> vec1::Vec1<TimeRange> {
#[allow(clippy::zero_sized_map_values)]
let times: BTreeMap<TimeInt, ()> = times
.iter()
.map(|&seq| (TimeInt::from_sequence(seq), ()))
.collect();
TimelineAxis::new(TimeType::Sequence, &times).ranges
}

#[test]
fn test_time_axis() {
assert_eq!(1, ranges(&[1]).len());
assert_eq!(1, ranges(&[1, 2, 3, 4]).len());
assert_eq!(1, ranges(&[10, 20, 30, 40]).len());
assert_eq!(1, ranges(&[1, 2, 3, 11, 12, 13]).len(), "Too small gap");
assert_eq!(2, ranges(&[10, 20, 30, 110, 120, 130]).len());
assert_eq!(1, ranges(&[10, 1000]).len(), "not enough numbers");
}
}