Skip to content

Commit

Permalink
no-changelog: fix period in new dynamic groupby starting points (#5623)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Nov 25, 2022
1 parent 1933e90 commit ffa219c
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 40 deletions.
3 changes: 3 additions & 0 deletions polars/polars-time/src/windows/groupby.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ pub fn groupby_windows(
start_by: StartBy,
) -> (GroupsSlice, Vec<i64>, Vec<i64>) {
let start = time[0];
// the boundary we define here is not yet correct. It doesn't take 'period' into account
// and it doesn't have the proper starting point. This boundary is used as a proxy to find
// the proper 'boundary' in 'window.get_overlapping_bounds_iter'.
let boundary = if time.len() > 1 {
// +1 because left or closed boundary could match the next window if it is on the boundary
let stop = time[time.len() - 1] + 1;
Expand Down
18 changes: 15 additions & 3 deletions polars/polars-time/src/windows/window.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,16 @@ pub struct BoundsIter {
impl BoundsIter {
fn new(window: Window, boundary: Bounds, tu: TimeUnit, start_by: StartBy) -> Self {
let bi = match start_by {
StartBy::DataPoint => boundary,
StartBy::DataPoint => {
let mut boundary = boundary;
let offset_fn = match tu {
TimeUnit::Nanoseconds => Duration::add_ns,
TimeUnit::Microseconds => Duration::add_us,
TimeUnit::Milliseconds => Duration::add_ms,
};
boundary.stop = offset_fn(&window.period, boundary.start);
boundary
}
StartBy::WindowBound => match tu {
TimeUnit::Nanoseconds => window.get_earliest_bounds_ns(boundary.start),
TimeUnit::Microseconds => window.get_earliest_bounds_us(boundary.start),
Expand Down Expand Up @@ -200,17 +209,20 @@ impl BoundsIter {
Duration::add_ms,
),
};
// find beginning of the week.
let mut boundary = boundary;
let dt = from(boundary.start);
let tz = chrono_tz::UTC;
let dt = dt.and_local_timezone(tz).unwrap();
let dt = dt.beginning_of_week();
let dt = dt.naive_utc();
let start = to(dt);
// apply the 'offset'
let start = offset(&window.offset, start);
let delta = boundary.stop - boundary.start;
// and compute the end of the window defined by the 'period'
let stop = offset(&window.period, start);
boundary.start = start;
boundary.stop = start + delta;
boundary.stop = stop;
boundary
}
#[cfg(not(feature = "timezones"))]
Expand Down
58 changes: 21 additions & 37 deletions py-polars/tests/unit/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,12 +442,12 @@ def test_groupby_dynamic_startby_5599() -> None:
datetime(2022, 12, 16, 2, 35),
],
"_upper_boundary": [
datetime(2022, 12, 16, 3, 0, 0, 1),
datetime(2022, 12, 16, 3, 31, 0, 1),
datetime(2022, 12, 16, 4, 2, 0, 1),
datetime(2022, 12, 16, 4, 33, 0, 1),
datetime(2022, 12, 16, 5, 4, 0, 1),
datetime(2022, 12, 16, 5, 35, 0, 1),
datetime(2022, 12, 16, 0, 31),
datetime(2022, 12, 16, 1, 2),
datetime(2022, 12, 16, 1, 33),
datetime(2022, 12, 16, 2, 4),
datetime(2022, 12, 16, 2, 35),
datetime(2022, 12, 16, 3, 6),
],
"date": [
datetime(2022, 12, 16, 0, 0),
Expand All @@ -457,7 +457,7 @@ def test_groupby_dynamic_startby_5599() -> None:
datetime(2022, 12, 16, 2, 30),
datetime(2022, 12, 16, 3, 0),
],
"count": [7, 5, 4, 3, 2, 1],
"count": [2, 1, 1, 1, 1, 1],
}

# start by week
Expand All @@ -468,35 +468,19 @@ def test_groupby_dynamic_startby_5599() -> None:
pl.col("date").dt.weekday().alias("day")
)

assert (
df.groupby_dynamic(
"date",
every="1w",
include_boundaries=True,
start_by="monday",
truncate=False,
).agg([pl.count(), pl.col("day").first().alias("data_day")])
).with_column(pl.col("date").dt.weekday().alias("truncated_day")).to_dict(
False
) == {
"_lower_boundary": [
datetime(2021, 12, 27, 0, 0),
datetime(2022, 1, 3, 0, 0),
datetime(2022, 1, 10, 0, 0),
],
"_upper_boundary": [
datetime(2022, 1, 7, 0, 0, 0, 1),
datetime(2022, 1, 14, 0, 0, 0, 1),
datetime(2022, 1, 21, 0, 0, 0, 1),
],
"date": [
datetime(2022, 1, 1, 0, 0),
datetime(2022, 1, 3, 0, 0),
datetime(2022, 1, 10, 0, 0),
],
"count": [13, 19, 5],
"data_day": [6, 1, 1],
"truncated_day": [6, 1, 1],
assert df.groupby_dynamic(
"date",
every="1w",
period="3d",
include_boundaries=True,
start_by="monday",
truncate=False,
).agg([pl.count(), pl.col("day").first().alias("data_day")]).to_dict(False) == {
"_lower_boundary": [datetime(2022, 1, 3, 0, 0), datetime(2022, 1, 10, 0, 0)],
"_upper_boundary": [datetime(2022, 1, 6, 0, 0), datetime(2022, 1, 13, 0, 0)],
"date": [datetime(2022, 1, 3, 0, 0), datetime(2022, 1, 10, 0, 0)],
"count": [6, 5],
"data_day": [1, 1],
}


Expand Down Expand Up @@ -529,5 +513,5 @@ def test_groupby_dynamic_by_monday_and_offset_5444() -> None:
date(2022, 11, 1),
date(2022, 11, 8),
],
"value": [14, 10, 7, 12],
"value": [4, 10, 2, 12],
}

0 comments on commit ffa219c

Please sign in to comment.