Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement $rolling() #470

Merged
merged 14 commits into from
Nov 20, 2023
15 changes: 9 additions & 6 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,17 @@
- New methods `$write_json()` and `$write_ndjson()` for DataFrame (#502).
- Removed argument `name` in `pl$date_range()`, which was deprecated for a while
(#503).
- New private method `.pr$DataFrame$drop_all_in_place(df)` to drop `DataFrame` in-place,
to release memory without invoking gc(). However, if there are other strong references to any of
the underlying Series or arrow arrays, that memory will specifically not be released. This method
is aimed for r-polars extensions, and will be kept stable as much as possible (#504).
- New private method `.pr$DataFrame$drop_all_in_place(df)` to drop `DataFrame`
in-place, to release memory without invoking gc(). However, if there are other
strong references to any of the underlying Series or arrow arrays, that memory
will specifically not be released. This method is aimed for r-polars extensions,
and will be kept stable as much as possible (#504).
- New functions `pl$min_horizontal()`, `pl$max_horizontal()`, `pl$sum_horizontal()`,
`pl$all_horizontal()`, `pl$any_horizontal()` (#508).
- New generic functions `as_polars_df()` and `as_polars_lf()` to create polars DataFrames
and LazyFrames (#519).
- New generic functions `as_polars_df()` and `as_polars_lf()` to create polars
DataFrames and LazyFrames (#519).
- New method `$rolling()` to apply an Expr over a rolling window based on
date/datetime/numeric indices (#470).

# polars 0.10.1

Expand Down
90 changes: 90 additions & 0 deletions R/expr__expr.R
Original file line number Diff line number Diff line change
Expand Up @@ -3358,3 +3358,93 @@ Expr_peak_min = function() {
Expr_peak_max = function() {
.pr$Expr$peak_max(self)
}

#' Create rolling groups based on a time or numeric column
#'
#' @description
#' If you have a time series `<t_0, t_1, ..., t_n>`, then by default the windows
#' created will be:
#' * (t_0 - period, t_0]
#' * (t_1 - period, t_1]
#' * …
#' * (t_n - period, t_n]
#'
#' whereas if you pass a non-default offset, then the windows will be:
#' * (t_0 + offset, t_0 + offset + period]
#' * (t_1 + offset, t_1 + offset + period]
#' * …
#' * (t_n + offset, t_n + offset + period]
#'
#' @param index_column Column used to group based on the time window. Often of
#' type Date/Datetime. This column must be sorted in ascending order. If this
#' column represents an index, it has to be either Int32 or Int64. Note that
#' Int32 gets temporarily cast to Int64, so if performance matters use an Int64
#' column.
#' @param period Length of the window, must be non-negative.
#' @param offset Offset of the window. Default is `-period`.
#' @param closed Define which sides of the temporal interval are closed
#' (inclusive). This can be either `"left"`, `"right"`, `"both"` or `"none"`.
#' @param check_sorted Check whether data is actually sorted. Checking it is
#' expensive so if you are sure the data within the `index_column` is sorted, you
#' can set this to `FALSE` but note that if the data actually is unsorted, it
#' will lead to incorrect output.
#'
#' @details
#' The period and offset arguments are created either from a timedelta, or by
#' using the following string language:
#' * 1ns (1 nanosecond)
#' * 1us (1 microsecond)
#' * 1ms (1 millisecond)
#' * 1s (1 second)
#' * 1m (1 minute)
#' * 1h (1 hour)
#' * 1d (1 calendar day)
#' * 1w (1 calendar week)
#' * 1mo (1 calendar month)
#' * 1q (1 calendar quarter)
#' * 1y (1 calendar year)
#' * 1i (1 index count)
#'
#' Or combine them: "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
#'
#' By "calendar day", we mean the corresponding time on the next day (which may
#' not be 24 hours, due to daylight savings). Similarly for "calendar week",
#' "calendar month", "calendar quarter", and "calendar year".
#'
#' In case of a rolling operation on an integer column, the windows are defined
#' by:
#' * "1i" # length 1
#' * "10i" # length 10
#'
#' @return Expr
#'
#' @examples
#' # create a DataFrame with a Datetime column and an f64 column
#' dates = c("2020-01-01 13:45:48", "2020-01-01 16:42:13", "2020-01-01 16:45:09",
#' "2020-01-02 18:12:48", "2020-01-03 19:45:32", "2020-01-08 23:16:43")
#'
#' df = pl$DataFrame(dt = dates, a = c(3, 7, 5, 9, 2, 1))$
#' with_columns(
#' pl$col("dt")$str$strptime(pl$Datetime(tu = "us"), format = "%Y-%m-%d %H:%M:%S")$set_sorted()
#' )
#'
#' df$with_columns(
#' sum_a=pl$sum("a")$rolling(index_column="dt", period="2d"),
#' min_a=pl$min("a")$rolling(index_column="dt", period="2d"),
#' max_a=pl$max("a")$rolling(index_column="dt", period="2d")
#' )
#'
#' # we can use "offset" to change the start of the window period. Here, with
#' # offset = "1d", we start the window one day after the value in "dt", and
#' # then we add a 2-day window relative to the window start.
#' df$with_columns(
#' sum_a_offset1 = pl$sum("a")$rolling(index_column = "dt", period = "2d", offset = "1d")
#' )
Expr_rolling = function(index_column, period, offset = NULL,
closed = "right", check_sorted = TRUE) {
if (is.null(offset)) {
offset = paste0("-", period)
}
.pr$Expr$rolling(self, index_column, period, offset, closed, check_sorted) |>
unwrap("in $rolling():")
}
2 changes: 2 additions & 0 deletions R/extendr-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,8 @@ Expr$rolling_quantile <- function(quantile, interpolation, window_size, weights,

Expr$rolling_skew <- function(window_size_f, bias) .Call(wrap__Expr__rolling_skew, self, window_size_f, bias)

Expr$rolling <- function(index_column, period, offset, closed, check_sorted) .Call(wrap__Expr__rolling, self, index_column, period, offset, closed, check_sorted)

Expr$abs <- function() .Call(wrap__Expr__abs, self)

Expr$rank <- function(method, descending) .Call(wrap__Expr__rank, self, method, descending)
Expand Down
108 changes: 108 additions & 0 deletions man/Expr_rolling.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 33 additions & 7 deletions src/rust/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
use crate::concurrent::RFnSignature;
use crate::rdatatype::literal_to_any_value;
use crate::rdatatype::new_null_behavior;
use crate::rdatatype::new_rank_method;
use crate::rdatatype::new_rolling_cov_options;
use crate::rdatatype::robj_to_timeunit;
use crate::rdatatype::{DataTypeVector, RPolarsDataType};
use crate::rdatatype::{
literal_to_any_value, new_null_behavior, new_rank_method,
new_rolling_cov_options, robj_to_timeunit, DataTypeVector, RPolarsDataType,
};
use crate::robj_to;
use crate::rpolarserr::polars_to_rpolars_err;
use crate::rpolarserr::{rerr, rpolars_to_polars_err, RResult, Rctx, WithRctx};
Expand All @@ -18,7 +16,10 @@ use crate::utils::{try_f64_into_i64, try_f64_into_u32, try_f64_into_usize};
use crate::CONFIG;
use extendr_api::{extendr, prelude::*, rprintln, Deref, DerefMut, Rinternals};
use pl::PolarsError as pl_error;
use pl::{BinaryNameSpaceImpl, DurationMethods, IntoSeries, TemporalMethods, Utf8NameSpaceImpl};
use pl::{
BinaryNameSpaceImpl, Duration, DurationMethods, IntoSeries, RollingGroupOptions,
TemporalMethods, Utf8NameSpaceImpl,
};
use polars::lazy::dsl;
use polars::prelude as pl;
use polars::prelude::SortOptions;
Expand Down Expand Up @@ -2370,6 +2371,31 @@ impl Expr {
)
.into())
}

pub fn rolling(
&self,
index_column: Robj,
period: Robj,
offset: Robj,
closed: Robj,
check_sorted: Robj,
) -> RResult<Self> {
let index_column = robj_to!(String, index_column)?.into();
let period = Duration::parse(robj_to!(str, period)?);
let offset = Duration::parse(robj_to!(str, offset)?);
let closed_window = robj_to!(ClosedWindow, closed)?;
let check_sorted = robj_to!(bool, check_sorted)?;

let options = RollingGroupOptions {
index_column,
period,
offset,
closed_window,
check_sorted,
};

Ok(self.0.clone().rolling(options).into())
}
}

// handle varition in implementation if not full_features
Expand Down
Loading
Loading