Skip to content

Commit

Permalink
fix bug in groupby_dynamic edges
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jan 26, 2022
1 parent 7eb556c commit 81af2e8
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 10 deletions.
8 changes: 8 additions & 0 deletions polars/polars-core/src/named_from.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,14 @@ impl<'a, T: AsRef<[&'a str]>> NamedFrom<T, [&'a str]> for Series {
}
}

impl NamedFrom<&Series, str> for Series {
fn new(name: &str, s: &Series) -> Self {
let mut s = s.clone();
s.rename(name);
s
}
}

impl<'a, T: AsRef<[&'a str]>> NamedFrom<T, [&'a str]> for Utf8Chunked {
fn new(name: &str, v: T) -> Self {
Utf8Chunked::new_from_slice(name, v.as_ref())
Expand Down
34 changes: 24 additions & 10 deletions polars/polars-time/src/groupby.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub enum TimeUnit {
/// - period
/// - offset
/// window boundaries are created. And every window boundary we search for the values
/// that fit that window by the given `ClowedWindow`. The groups are return as `GroupTuples`
/// that fit that window by the given `ClosedWindow`. The groups are return as `GroupTuples`
/// together with the lower bound and upper bound timestamps. These timestamps indicate the start (lower)
/// and end (upper) of the window of that group.
///
Expand Down Expand Up @@ -65,34 +65,48 @@ pub fn groupby_windows(
Vec::with_capacity(window.estimate_overlapping_bounds_ms(boundary))
}
};
let mut latest_start = 0;
let mut start_offset = 0;

for bi in window.get_overlapping_bounds_iter(boundary, tu) {
let mut skip_window = false;
// find starting point of window
while latest_start < time.len() {
let t = time[latest_start];
while start_offset < time.len() {
let t = time[start_offset];
if bi.is_future(t) {
// the window is behind the time values.
skip_window = true;
break;
}
if bi.is_member(t, closed_window) {
break;
}
latest_start += 1;
start_offset += 1;
}
if skip_window {
latest_start = latest_start.saturating_sub(1);
start_offset = start_offset.saturating_sub(1);
continue;
}
if start_offset == time.len() {
start_offset = start_offset.saturating_sub(1);
}

// find members of this window
let mut i = latest_start;
if i >= time.len() {
break;
let mut i = start_offset;

// last value
if i == time.len() - 1 {
let t = time[i];
if bi.is_member(t, closed_window) {
if include_boundaries {
lower_bound.push(bi.start);
upper_bound.push(bi.stop);
}
groups.push([i as u32, 1])
}
continue;
}

let first = latest_start as u32;
let first = start_offset as u32;

while i < time.len() {
let t = time[i];
Expand Down
35 changes: 35 additions & 0 deletions polars/polars-time/src/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -416,3 +416,38 @@ fn test_rolling_lookback() {
assert_eq!(groups[7], [4, 4]); // bound: 01:30 -> 03:30 time: 03:30
assert_eq!(groups[8], [5, 4]); // bound: 02:00 -> 04:00 time: 04:00
}

#[test]
fn test_end_membership() {
let time = [
NaiveDate::from_ymd(2021, 2, 1)
.and_hms(0, 0, 0)
.timestamp_millis(),
NaiveDate::from_ymd(2021, 5, 1)
.and_hms(0, 0, 0)
.timestamp_millis(),
];
let window = Window::new(
Duration::parse("1mo"),
Duration::parse("2mo"),
Duration::parse("-2mo"),
);
// windows
// 2020-12-01 -> 2021-02-01 members: None
// 2021-01-01 -> 2021-03-01 members: [0]
// 2021-02-01 -> 2021-04-01 members: [0]
// 2021-03-01 -> 2021-05-01 members: None
// 2021-04-01 -> 2021-06-01 members: [1]
// 2021-05-01 -> 2021-07-01 members: [1]
let (groups, _, _) = groupby_windows(
window,
&time,
false,
ClosedWindow::Left,
TimeUnit::Milliseconds,
);
assert_eq!(groups[0], [0, 1]);
assert_eq!(groups[1], [0, 1]);
assert_eq!(groups[2], [1, 1]);
assert_eq!(groups[3], [1, 1]);
}

0 comments on commit 81af2e8

Please sign in to comment.