Skip to content

Commit

Permalink
fix panic in dynamic groupby of single groups
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Dec 21, 2021
1 parent f33dc82 commit 7e036f8
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 13 deletions.
13 changes: 1 addition & 12 deletions polars/polars-time/src/bounds.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,10 @@ impl Display for Bounds {
}
}

// Get a wrapping boundary form a slice of nanoseconds.
// E.g. the first and last value are the start and stop of the boundary
impl<S: AsRef<[i64]>> From<S> for Bounds {
fn from(s: S) -> Self {
let slice = s.as_ref();
let start = slice[0];
let stop = slice[slice.len() - 1];
Self::new(start, stop)
}
}

impl Bounds {
pub fn new(start: TimeNanoseconds, stop: TimeNanoseconds) -> Self {
assert!(
start < stop,
start <= stop,
"boundary start must be smaller than stop; is your time column sorted in ascending order?"
);
Bounds { start, stop }
Expand Down
10 changes: 9 additions & 1 deletion polars/polars-time/src/groupby.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,15 @@ pub fn groupby(
include_boundaries: bool,
closed_window: ClosedWindow,
) -> (GroupTuples, Vec<TimeNanoseconds>, Vec<TimeNanoseconds>) {
let boundary = Bounds::from(time);
let start = time[0];
let boundary = if time.len() > 1 {
let stop = time[time.len() - 1];
Bounds::new(start, stop)
} else {
let stop = start + 1;
Bounds::new(start, stop)
};

let size = if include_boundaries {
window.estimate_overlapping_bounds(boundary)
} else {
Expand Down
25 changes: 25 additions & 0 deletions py-polars/tests/test_datelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,3 +272,28 @@ def test_truncate_negative_offset() -> None:
datetime(2021, 4, 1),
datetime(2021, 5, 1),
]
df = pl.DataFrame(
{
"event_date": [
datetime(2021, 4, 11),
datetime(2021, 4, 29),
datetime(2021, 5, 29),
],
"adm1_code": [1, 2, 1],
"five_type": ["a", "b", "a"],
"actor": ["a", "a", "a"],
"admin": ["a", "a", "a"],
"fatalities": [10, 20, 30],
}
)

out = df.groupby_dynamic(
time_column="event_date",
every="1mo",
by=["admin", "five_type", "actor"],
).agg([pl.col("adm1_code").unique(), (pl.col("fatalities") > 0).sum()])
assert out["event_date"].to_list() == [
datetime(2021, 4, 1),
datetime(2021, 5, 1),
datetime(2021, 4, 1),
]

0 comments on commit 7e036f8

Please sign in to comment.