Skip to content

Commit

Permalink
fix missing latest window in dynamic-groupby (#2170)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Dec 26, 2021
1 parent 28f43b2 commit 84ce73b
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 21 deletions.
4 changes: 3 additions & 1 deletion polars/polars-core/src/frame/groupby/dynamic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ mod test {
let g = out.column("groups").unwrap();
let g = g.utf8().unwrap();
let g = g.into_no_null_iter().collect::<Vec<_>>();
assert_eq!(g, &["a", "a", "a", "b"]);
assert_eq!(g, &["a", "a", "a", "a", "b", "b"]);

let upper = out.column("_upper_boundary").unwrap().slice(0, 3);
let start = NaiveDate::from_ymd(2021, 12, 16)
Expand Down Expand Up @@ -266,7 +266,9 @@ mod test {
(0u32, vec![0u32, 1, 2]),
(2u32, vec![2]),
(5u32, vec![5, 6]),
(6u32, vec![6]),
(3u32, vec![3, 4]),
(4u32, vec![4]),
];
assert_eq!(expected, groups);
}
Expand Down
3 changes: 2 additions & 1 deletion polars/polars-time/src/groupby.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ pub fn groupby(
) -> (GroupTuples, Vec<TimeNanoseconds>, Vec<TimeNanoseconds>) {
let start = time[0];
let boundary = if time.len() > 1 {
let stop = time[time.len() - 1];
// +1 because left or closed boundary could match the next window if it is on the boundary
let stop = time[time.len() - 1] + 1;
Bounds::new(start, stop)
} else {
let stop = start + 1;
Expand Down
9 changes: 8 additions & 1 deletion polars/polars-time/src/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,17 @@ fn test_groups_large_interval() {
let dur = Duration::parse("2d");
let w = Window::new(Duration::parse("2d"), dur.clone(), Duration::from_nsecs(0));
let (groups, _, _) = groupby(w, &ts, false, ClosedWindow::Both);
assert_eq!(groups.len(), 3);
assert_eq!(groups.len(), 4);
assert_eq!(groups[0], (0, vec![0]));
assert_eq!(groups[1], (1, vec![1]));
assert_eq!(groups[2], (1, vec![1, 2, 3]));
assert_eq!(groups[3], (3, vec![3]));
let (groups, _, _) = groupby(w, &ts, false, ClosedWindow::Left);
assert_eq!(groups.len(), 3);
assert_eq!(groups[2], (3, vec![3]));
let (groups, _, _) = groupby(w, &ts, false, ClosedWindow::Right);
assert_eq!(groups.len(), 2);
assert_eq!(groups[1], (2, vec![2, 3]));
}

#[test]
Expand Down
15 changes: 0 additions & 15 deletions polars/polars-time/src/window.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,21 +49,6 @@ impl Window {
+ self.period.duration() / self.every.duration()) as usize
}

pub fn get_overlapping_bounds(&self, boundary: Bounds) -> Vec<Bounds> {
if boundary.is_empty() {
return vec![];
} else {
// estimate size
let size = self.estimate_overlapping_bounds(boundary);
let mut out_bounds = Vec::with_capacity(size);

for bi in self.get_overlapping_bounds_iter(boundary) {
out_bounds.push(bi);
}
out_bounds
}
}

pub fn get_overlapping_bounds_iter(&self, boundary: Bounds) -> BoundsIter {
BoundsIter::new(*self, boundary)
}
Expand Down
14 changes: 11 additions & 3 deletions py-polars/polars/internals/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2406,7 +2406,7 @@ def groupby_dynamic(
... [pl.col("time").count(), pl.col("time").list()]
... )
... )
shape: (3, 3)
shape: (4, 3)
┌─────────────────────┬────────────┬─────────────────────────────────────┐
│ time ┆ time_count ┆ time_agg_list │
│ --- ┆ --- ┆ --- │
Expand All @@ -2417,6 +2417,8 @@ def groupby_dynamic(
│ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-16... │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-16... │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00] │
└─────────────────────┴────────────┴─────────────────────────────────────┘
When closed="both" the time values at the window boundaries belong to 2 groups.
Expand All @@ -2426,7 +2428,7 @@ def groupby_dynamic(
... [pl.col("time").count()]
... )
... )
shape: (3, 2)
shape: (4, 2)
┌─────────────────────┬────────────┐
│ time ┆ time_count │
│ --- ┆ --- │
Expand All @@ -2437,6 +2439,8 @@ def groupby_dynamic(
│ 2021-12-16 01:00:00 ┆ 3 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2021-12-16 02:00:00 ┆ 3 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2021-12-16 03:00:00 ┆ 1 │
└─────────────────────┴────────────┘
Dynamic groupbys can also be combined with grouping on normal keys
Expand Down Expand Up @@ -2481,7 +2485,7 @@ def groupby_dynamic(
... include_boundaries=True,
... ).agg([pl.col("time").count()])
... )
shape: (4, 5)
shape: (6, 5)
┌────────┬─────────────────────┬─────────────────────┬─────────────────────┬────────────┐
│ groups ┆ _lower_boundary ┆ _upper_boundary ┆ time ┆ time_count │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
Expand All @@ -2493,7 +2497,11 @@ def groupby_dynamic(
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ a ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 2 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ a ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 04:00:00 ┆ 2021-12-16 03:00:00 ┆ 1 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ b ┆ 2021-12-16 01:00:00 ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 01:00:00 ┆ 2 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ b ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 1 │
└────────┴─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
"""
Expand Down

0 comments on commit 84ce73b

Please sign in to comment.