From 0c3ce3c695bf00b389fc30dc799d4fb79a98d488 Mon Sep 17 00:00:00 2001 From: eitsupi <50911393+eitsupi@users.noreply.github.com> Date: Sat, 30 Mar 2024 12:59:26 +0900 Subject: [PATCH] refactor!: `by` -> `group_by` in `$rolling()` and `$group_by_dynamic()` (#983) Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> --- NEWS.md | 4 ++ R/dataframe__frame.R | 53 +++++++++++++++++---------- R/group_by_dynamic.R | 6 +-- R/group_by_rolling.R | 6 +-- R/lazyframe__lazy.R | 52 ++++++++++++++++---------- man/DataFrame_group_by_dynamic.Rd | 16 ++++++-- man/DataFrame_rolling.Rd | 61 +++++++++++++++++++++++++------ man/DynamicGroupBy_agg.Rd | 4 +- man/LazyFrame_group_by_dynamic.Rd | 11 ++++-- man/LazyFrame_rolling.Rd | 59 ++++++++++++++++++++++++------ tests/testthat/test-groupby.R | 10 ++--- tests/testthat/test-lazy.R | 14 +++---- 12 files changed, 207 insertions(+), 89 deletions(-) diff --git a/NEWS.md b/NEWS.md index 8aa7dbd1f..0ac73262b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -46,6 +46,10 @@ - In `$sink_*` functions, the first argument is now `path`. - In `$dt$convert_time_zone()` and `$dt$replace_time_zone()`, the `tz` argument is renamed to `time_zone` (#944). + - In `$rolling()`, `$rolling()`, `$group_by_dynamic()` + and `$group_by_dynamic()`, the `by` argument is renamed to `group_by` (#983). + - In `$rolling()` and `$group_by_dynamic()`, all arguments + except `index_column` must be named arguments (#983). - In `$dt$replace_time_zone()`, all arguments except `time_zone` must be named arguments (#944). - In `$bin$decode()`, the `strict` argument must be a named argument (#980). diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index 0ccec398a..90fe97b4d 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -1995,34 +1995,48 @@ DataFrame_write_ndjson = function(file) { #' @inherit LazyFrame_rolling title description params details #' @return A [RollingGroupBy][RollingGroupBy_class] object -#' +#' @seealso +#' - [`$group_by_dynamic()`][DataFrame_group_by_dynamic] +#' @inheritSection polars_duration_string Polars duration string language #' @examples -#' df = pl$DataFrame( -#' dt = c("2020-01-01", "2020-01-01", "2020-01-01", "2020-01-02", "2020-01-03", "2020-01-08"), -#' a = c(3, 7, 5, 9, 2, 1) -#' )$with_columns( -#' pl$col("dt")$str$strptime(pl$Date, format = NULL)$set_sorted() +#' date = c( +#' "2020-01-01 13:45:48", +#' "2020-01-01 16:42:13", +#' "2020-01-01 16:45:09", +#' "2020-01-02 18:12:48", +#' "2020-01-03 19:45:32", +#' "2020-01-08 23:16:43" +#' ) +#' df = pl$DataFrame(dt = date, a = c(3, 7, 5, 9, 2, 1))$with_columns( +#' pl$col("dt")$str$strptime(pl$Datetime())$set_sorted() #' ) #' #' df$rolling(index_column = "dt", period = "2d")$agg( -#' pl$col("a"), -#' pl$sum("a")$alias("sum_a"), -#' pl$min("a")$alias("min_a"), -#' pl$max("a")$alias("max_a") +#' sum_a = pl$sum("a"), +#' min_a = pl$min("a"), +#' max_a = pl$max("a") #' ) -DataFrame_rolling = function(index_column, period, offset = NULL, closed = "right", by = NULL, check_sorted = TRUE) { +DataFrame_rolling = function( + index_column, + ..., + period, + offset = NULL, + closed = "right", + group_by = NULL, + check_sorted = TRUE) { if (is.null(offset)) { - offset = paste0("-", period) + offset = paste0("-", period) # TODO: `paste0` should be executed after `period` is parsed as string } - construct_rolling_group_by(self, index_column, period, offset, closed, by, check_sorted) + construct_rolling_group_by(self, index_column, period, offset, closed, group_by, check_sorted) } #' @inherit LazyFrame_group_by_dynamic title description details params #' @return A [GroupBy][GroupBy_class] object -#' +#' @seealso +#' - [`$rolling()`][DataFrame_rolling] #' @examples #' df = pl$DataFrame( -#' time = pl$date_range( +#' time = pl$datetime_range( #' start = strptime("2021-12-16 00:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "UTC"), #' end = strptime("2021-12-16 03:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "UTC"), #' interval = "30m" @@ -2065,7 +2079,7 @@ DataFrame_rolling = function(index_column, period, offset = NULL, closed = "righ #' "time", #' every = "1h", #' closed = "both", -#' by = "groups", +#' group_by = "groups", #' include_boundaries = TRUE #' )$agg(pl$col("n")) #' @@ -2085,24 +2099,25 @@ DataFrame_rolling = function(index_column, period, offset = NULL, closed = "righ #' )$agg(A_agg_list = pl$col("A")) DataFrame_group_by_dynamic = function( index_column, + ..., every, period = NULL, offset = NULL, include_boundaries = FALSE, closed = "left", label = "left", - by = NULL, + group_by = NULL, start_by = "window", check_sorted = TRUE) { if (is.null(offset)) { - offset = paste0("-", every) + offset = paste0("-", every) # TODO: `paste0` should be executed after `period` is parsed as string } if (is.null(period)) { period = every } construct_group_by_dynamic( self, index_column, every, period, offset, include_boundaries, closed, label, - by, start_by, check_sorted + group_by, start_by, check_sorted ) } diff --git a/R/group_by_dynamic.R b/R/group_by_dynamic.R index bb9983a7e..f107d74c6 100644 --- a/R/group_by_dynamic.R +++ b/R/group_by_dynamic.R @@ -40,7 +40,7 @@ RPolarsDynamicGroupBy = new.env(parent = emptyenv()) #' @noRd construct_group_by_dynamic = function( df, index_column, every, period, offset, include_boundaries, closed, label, - by, start_by, check_sorted) { + group_by, start_by, check_sorted) { if (!inherits(df, "RPolarsDataFrame")) { stop("internal error: construct_group called not on DataFrame") } @@ -57,7 +57,7 @@ construct_group_by_dynamic = function( include_boundaries = include_boundaries, closed = closed, label = label, - by = by, + group_by = group_by, start_by = start_by, check_sorted = check_sorted ) @@ -95,7 +95,7 @@ DynamicGroupBy_agg = function(...) { include_boundaries = prv$include_boundaries, closed = prv$closed, label = prv$label, - by = prv$by, + group_by = prv$group_by, start_by = prv$start_by, check_sorted = prv$check_sorted )$ diff --git a/R/group_by_rolling.R b/R/group_by_rolling.R index 15bf88aac..636030be5 100644 --- a/R/group_by_rolling.R +++ b/R/group_by_rolling.R @@ -36,7 +36,7 @@ RPolarsRollingGroupBy = new.env(parent = emptyenv()) #' The internal RollingGroupBy constructor #' @return The input as grouped DataFrame #' @noRd -construct_rolling_group_by = function(df, index_column, period, offset, closed, by, check_sorted) { +construct_rolling_group_by = function(df, index_column, period, offset, closed, group_by, check_sorted) { if (!inherits(df, "RPolarsDataFrame")) { stop("internal error: construct_group called not on DataFrame") } @@ -50,7 +50,7 @@ construct_rolling_group_by = function(df, index_column, period, offset, closed, period = period, offset = offset, closed = closed, - by = by, + group_by = group_by, check_sorted = check_sorted ) class(out) = "RPolarsRollingGroupBy" @@ -96,7 +96,7 @@ RollingGroupBy_agg = function(...) { period = prv$period, offset = prv$offset, closed = prv$closed, - by = prv$by, + group_by = prv$group_by, check_sorted = prv$check_sorted )$ agg(unpack_list(..., .context = "in $agg():"))$ diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 30e03c876..b494c579a 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -1793,34 +1793,45 @@ LazyFrame_with_context = function(other) { #' case of a rolling group by on indices, dtype needs to be either Int32 or Int64. #' Note that Int32 gets temporarily cast to Int64, so if performance matters use #' an Int64 column. -#' @param by Also group by this column/these columns. +#' @param group_by Also group by this column/these columns. #' +#' @inheritSection polars_duration_string Polars duration string language #' @return A [LazyGroupBy][LazyGroupBy_class] object -#' +#' @seealso +#' - [`$group_by_dynamic()`][LazyFrame_group_by_dynamic] #' @examples -#' df = pl$LazyFrame( -#' dt = c("2020-01-01", "2020-01-01", "2020-01-01", "2020-01-02", "2020-01-03", "2020-01-08"), -#' a = c(3, 7, 5, 9, 2, 1) -#' )$with_columns( -#' pl$col("dt")$str$strptime(pl$Date, format = NULL)$set_sorted() +#' dates = c( +#' "2020-01-01 13:45:48", +#' "2020-01-01 16:42:13", +#' "2020-01-01 16:45:09", +#' "2020-01-02 18:12:48", +#' "2020-01-03 19:45:32", +#' "2020-01-08 23:16:43" #' ) #' -#' df$collect() +#' df = pl$LazyFrame(dt = dates, a = c(3, 7, 5, 9, 2, 1))$with_columns( +#' pl$col("dt")$str$strptime(pl$Datetime())$set_sorted() +#' ) #' #' df$rolling(index_column = "dt", period = "2d")$agg( -#' pl$col("a"), -#' pl$sum("a")$alias("sum_a"), -#' pl$min("a")$alias("min_a"), -#' pl$max("a")$alias("max_a") +#' sum_a = pl$sum("a"), +#' min_a = pl$min("a"), +#' max_a = pl$max("a") #' )$collect() LazyFrame_rolling = function( - index_column, ..., period, offset = NULL, closed = "right", by = NULL, check_sorted = TRUE) { + index_column, + ..., + period, + offset = NULL, + closed = "right", + group_by = NULL, + check_sorted = TRUE) { if (is.null(offset)) { - offset = paste0("-", period) + offset = paste0("-", period) # TODO: `paste0` should be executed after `period` is parsed as string } .pr$LazyFrame$rolling( self, index_column, period, offset, closed, - wrap_elist_result(by, str_to_lit = FALSE), check_sorted + wrap_elist_result(group_by, str_to_lit = FALSE), check_sorted ) |> unwrap("in $rolling():") } @@ -1848,7 +1859,8 @@ LazyFrame_rolling = function( #' starts the window on the Monday before the first data point, etc. #' #' @return A [LazyGroupBy][LazyGroupBy_class] object -#' +#' @seealso +#' - [`$rolling()`][LazyFrame_rolling] #' @examples #' lf = pl$LazyFrame( #' time = pl$date_range( @@ -1895,7 +1907,7 @@ LazyFrame_rolling = function( #' "time", #' every = "1h", #' closed = "both", -#' by = "groups", +#' group_by = "groups", #' include_boundaries = TRUE #' )$agg(pl$col("n"))$collect() #' @@ -1922,18 +1934,18 @@ LazyFrame_group_by_dynamic = function( include_boundaries = FALSE, closed = "left", label = "left", - by = NULL, + group_by = NULL, start_by = "window", check_sorted = TRUE) { if (is.null(offset)) { - offset = paste0("-", every) + offset = paste0("-", every) # TODO: `paste0` should be executed after `period` is parsed as string } if (is.null(period)) { period = every } .pr$LazyFrame$group_by_dynamic( self, index_column, every, period, offset, label, include_boundaries, closed, - wrap_elist_result(by, str_to_lit = FALSE), start_by, check_sorted + wrap_elist_result(group_by, str_to_lit = FALSE), start_by, check_sorted ) |> unwrap("in $group_by_dynamic():") } diff --git a/man/DataFrame_group_by_dynamic.Rd b/man/DataFrame_group_by_dynamic.Rd index d0e9bbfdb..b8504eeed 100644 --- a/man/DataFrame_group_by_dynamic.Rd +++ b/man/DataFrame_group_by_dynamic.Rd @@ -6,13 +6,14 @@ \usage{ DataFrame_group_by_dynamic( index_column, + ..., every, period = NULL, offset = NULL, include_boundaries = FALSE, closed = "left", label = "left", - by = NULL, + group_by = NULL, start_by = "window", check_sorted = TRUE ) @@ -25,6 +26,8 @@ case of a rolling group by on indices, dtype needs to be either Int32 or Int64. Note that Int32 gets temporarily cast to Int64, so if performance matters use an Int64 column.} +\item{...}{Ignored.} + \item{every}{Interval of the window.} \item{period}{A character representing the length of the window, @@ -51,7 +54,7 @@ you don’t need the label to be at one of the boundaries, choose this option for maximum performance. }} -\item{by}{Also group by this column/these columns.} +\item{group_by}{Also group by this column/these columns.} \item{start_by}{The strategy to determine the start of the first window by: \itemize{ @@ -98,7 +101,7 @@ by: } \examples{ df = pl$DataFrame( - time = pl$date_range( + time = pl$datetime_range( start = strptime("2021-12-16 00:00:00", format = "\%Y-\%m-\%d \%H:\%M:\%S", tz = "UTC"), end = strptime("2021-12-16 03:00:00", format = "\%Y-\%m-\%d \%H:\%M:\%S", tz = "UTC"), interval = "30m" @@ -141,7 +144,7 @@ df$group_by_dynamic( "time", every = "1h", closed = "both", - by = "groups", + group_by = "groups", include_boundaries = TRUE )$agg(pl$col("n")) @@ -160,3 +163,8 @@ df$group_by_dynamic( closed = "right" )$agg(A_agg_list = pl$col("A")) } +\seealso{ +\itemize{ +\item \code{\link[=DataFrame_rolling]{$rolling()}} +} +} diff --git a/man/DataFrame_rolling.Rd b/man/DataFrame_rolling.Rd index 9b3135afa..511eabc76 100644 --- a/man/DataFrame_rolling.Rd +++ b/man/DataFrame_rolling.Rd @@ -6,10 +6,11 @@ \usage{ DataFrame_rolling( index_column, + ..., period, offset = NULL, closed = "right", - by = NULL, + group_by = NULL, check_sorted = TRUE ) } @@ -21,6 +22,8 @@ case of a rolling group by on indices, dtype needs to be either Int32 or Int64. Note that Int32 gets temporarily cast to Int64, so if performance matters use an Int64 column.} +\item{...}{Ignored.} + \item{period}{A character representing the length of the window, must be non-negative. See the \verb{Polars duration string language} section for details.} @@ -32,7 +35,7 @@ See the \verb{Polars duration string language} section for details.} \item{closed}{Define which sides of the temporal interval are closed (inclusive). This can be either \code{"left"}, \code{"right"}, \code{"both"} or \code{"none"}.} -\item{by}{Also group by this column/these columns.} +\item{group_by}{Also group by this column/these columns.} \item{check_sorted}{Check whether data is actually sorted. Checking it is expensive so if you are sure the data within the \code{index_column} is sorted, you @@ -68,18 +71,54 @@ by: \item "10i" # length 10 } } +\section{Polars duration string language}{ + +Polars duration string language is a simple representation of +durations. It is used in many Polars functions that accept durations. + +It has the following format: +\itemize{ +\item 1ns (1 nanosecond) +\item 1us (1 microsecond) +\item 1ms (1 millisecond) +\item 1s (1 second) +\item 1m (1 minute) +\item 1h (1 hour) +\item 1d (1 calendar day) +\item 1w (1 calendar week) +\item 1mo (1 calendar month) +\item 1q (1 calendar quarter) +\item 1y (1 calendar year) +} + +Or combine them: \code{"3d12h4m25s"} # 3 days, 12 hours, 4 minutes, and 25 seconds + +By "calendar day", we mean the corresponding time on the next day +(which may not be 24 hours, due to daylight savings). +Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year". +} + \examples{ -df = pl$DataFrame( - dt = c("2020-01-01", "2020-01-01", "2020-01-01", "2020-01-02", "2020-01-03", "2020-01-08"), - a = c(3, 7, 5, 9, 2, 1) -)$with_columns( - pl$col("dt")$str$strptime(pl$Date, format = NULL)$set_sorted() +date = c( + "2020-01-01 13:45:48", + "2020-01-01 16:42:13", + "2020-01-01 16:45:09", + "2020-01-02 18:12:48", + "2020-01-03 19:45:32", + "2020-01-08 23:16:43" +) +df = pl$DataFrame(dt = date, a = c(3, 7, 5, 9, 2, 1))$with_columns( + pl$col("dt")$str$strptime(pl$Datetime())$set_sorted() ) df$rolling(index_column = "dt", period = "2d")$agg( - pl$col("a"), - pl$sum("a")$alias("sum_a"), - pl$min("a")$alias("min_a"), - pl$max("a")$alias("max_a") + sum_a = pl$sum("a"), + min_a = pl$min("a"), + max_a = pl$max("a") ) } +\seealso{ +\itemize{ +\item \code{\link[=DataFrame_group_by_dynamic]{$group_by_dynamic()}} +} +} diff --git a/man/DynamicGroupBy_agg.Rd b/man/DynamicGroupBy_agg.Rd index 4dac759ba..8a6676995 100644 --- a/man/DynamicGroupBy_agg.Rd +++ b/man/DynamicGroupBy_agg.Rd @@ -19,7 +19,7 @@ Aggregate a DataFrame over a time or integer window created with } \examples{ df = pl$DataFrame( - time = pl$date_range( + time = pl$datetime_range( start = strptime("2021-12-16 00:00:00", format = "\%Y-\%m-\%d \%H:\%M:\%S", tz = "UTC"), end = strptime("2021-12-16 03:00:00", format = "\%Y-\%m-\%d \%H:\%M:\%S", tz = "UTC"), interval = "30m" @@ -62,7 +62,7 @@ df$group_by_dynamic( "time", every = "1h", closed = "both", - by = "groups", + group_by = "groups", include_boundaries = TRUE )$agg(pl$col("n")) diff --git a/man/LazyFrame_group_by_dynamic.Rd b/man/LazyFrame_group_by_dynamic.Rd index ae24827ae..e2300d346 100644 --- a/man/LazyFrame_group_by_dynamic.Rd +++ b/man/LazyFrame_group_by_dynamic.Rd @@ -13,7 +13,7 @@ LazyFrame_group_by_dynamic( include_boundaries = FALSE, closed = "left", label = "left", - by = NULL, + group_by = NULL, start_by = "window", check_sorted = TRUE ) @@ -54,7 +54,7 @@ you don’t need the label to be at one of the boundaries, choose this option for maximum performance. }} -\item{by}{Also group by this column/these columns.} +\item{group_by}{Also group by this column/these columns.} \item{start_by}{The strategy to determine the start of the first window by: \itemize{ @@ -145,7 +145,7 @@ lf$group_by_dynamic( "time", every = "1h", closed = "both", - by = "groups", + group_by = "groups", include_boundaries = TRUE )$agg(pl$col("n"))$collect() @@ -164,3 +164,8 @@ lf$group_by_dynamic( closed = "right" )$agg(A_agg_list = pl$col("A"))$collect() } +\seealso{ +\itemize{ +\item \code{\link[=LazyFrame_rolling]{$rolling()}} +} +} diff --git a/man/LazyFrame_rolling.Rd b/man/LazyFrame_rolling.Rd index 276501783..193fa30c4 100644 --- a/man/LazyFrame_rolling.Rd +++ b/man/LazyFrame_rolling.Rd @@ -10,7 +10,7 @@ LazyFrame_rolling( period, offset = NULL, closed = "right", - by = NULL, + group_by = NULL, check_sorted = TRUE ) } @@ -35,7 +35,7 @@ See the \verb{Polars duration string language} section for details.} \item{closed}{Define which sides of the temporal interval are closed (inclusive). This can be either \code{"left"}, \code{"right"}, \code{"both"} or \code{"none"}.} -\item{by}{Also group by this column/these columns.} +\item{group_by}{Also group by this column/these columns.} \item{check_sorted}{Check whether data is actually sorted. Checking it is expensive so if you are sure the data within the \code{index_column} is sorted, you @@ -71,20 +71,55 @@ by: \item "10i" # length 10 } } +\section{Polars duration string language}{ + +Polars duration string language is a simple representation of +durations. It is used in many Polars functions that accept durations. + +It has the following format: +\itemize{ +\item 1ns (1 nanosecond) +\item 1us (1 microsecond) +\item 1ms (1 millisecond) +\item 1s (1 second) +\item 1m (1 minute) +\item 1h (1 hour) +\item 1d (1 calendar day) +\item 1w (1 calendar week) +\item 1mo (1 calendar month) +\item 1q (1 calendar quarter) +\item 1y (1 calendar year) +} + +Or combine them: \code{"3d12h4m25s"} # 3 days, 12 hours, 4 minutes, and 25 seconds + +By "calendar day", we mean the corresponding time on the next day +(which may not be 24 hours, due to daylight savings). +Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year". +} + \examples{ -df = pl$LazyFrame( - dt = c("2020-01-01", "2020-01-01", "2020-01-01", "2020-01-02", "2020-01-03", "2020-01-08"), - a = c(3, 7, 5, 9, 2, 1) -)$with_columns( - pl$col("dt")$str$strptime(pl$Date, format = NULL)$set_sorted() +dates = c( + "2020-01-01 13:45:48", + "2020-01-01 16:42:13", + "2020-01-01 16:45:09", + "2020-01-02 18:12:48", + "2020-01-03 19:45:32", + "2020-01-08 23:16:43" ) -df$collect() +df = pl$LazyFrame(dt = dates, a = c(3, 7, 5, 9, 2, 1))$with_columns( + pl$col("dt")$str$strptime(pl$Datetime())$set_sorted() +) df$rolling(index_column = "dt", period = "2d")$agg( - pl$col("a"), - pl$sum("a")$alias("sum_a"), - pl$min("a")$alias("min_a"), - pl$max("a")$alias("max_a") + sum_a = pl$sum("a"), + min_a = pl$min("a"), + max_a = pl$max("a") )$collect() } +\seealso{ +\itemize{ +\item \code{\link[=LazyFrame_group_by_dynamic]{$group_by_dynamic()}} +} +} diff --git a/tests/testthat/test-groupby.R b/tests/testthat/test-groupby.R index 283c977ff..6a009eadc 100644 --- a/tests/testthat/test-groupby.R +++ b/tests/testthat/test-groupby.R @@ -375,7 +375,7 @@ test_that("group_by_dynamic for LazyFrame: argument 'by' works", { pl$col("dt")$str$strptime(pl$Datetime("ms"), format = NULL)$set_sorted() ) - actual = df$group_by_dynamic(index_column = "dt", every = "2h", by = pl$col("grp"))$agg( + actual = df$group_by_dynamic(index_column = "dt", every = "2h", group_by = pl$col("grp"))$agg( pl$col("n")$mean() )$collect()$to_data_frame() @@ -386,10 +386,10 @@ test_that("group_by_dynamic for LazyFrame: argument 'by' works", { # string is parsed as column name in "by" expect_equal( - df$group_by_dynamic(index_column = "dt", every = "2h", by = pl$col("grp"))$agg( + df$group_by_dynamic(index_column = "dt", every = "2h", group_by = pl$col("grp"))$agg( pl$col("n")$mean() )$collect()$to_data_frame(), - df$group_by_dynamic(index_column = "dt", every = "2h", by = "grp")$agg( + df$group_by_dynamic(index_column = "dt", every = "2h", group_by = "grp")$agg( pl$col("n")$mean() )$collect()$to_data_frame() ) @@ -402,13 +402,13 @@ test_that("group_by_dynamic for LazyFrame: argument 'check_sorted' works", { a = c(3, 7, 5, 9, 2, 1) ) expect_error( - df$group_by_dynamic(index_column = "index", every = "2i", by = "grp")$agg( + df$group_by_dynamic(index_column = "index", every = "2i", group_by = "grp")$agg( pl$sum("a")$alias("sum_a") )$collect(), "not sorted" ) expect_no_error( - df$group_by_dynamic(index_column = "index", every = "2i", by = "grp", check_sorted = FALSE)$agg( + df$group_by_dynamic(index_column = "index", every = "2i", group_by = "grp", check_sorted = FALSE)$agg( pl$sum("a")$alias("sum_a") )$collect() ) diff --git a/tests/testthat/test-lazy.R b/tests/testthat/test-lazy.R index 219e54031..754a831ce 100644 --- a/tests/testthat/test-lazy.R +++ b/tests/testthat/test-lazy.R @@ -935,13 +935,13 @@ test_that("rolling for LazyFrame: error if not explicitly sorted", { ) }) -test_that("rolling for LazyFrame: argument 'by' works", { +test_that("rolling for LazyFrame: argument 'group_by' works", { df = pl$LazyFrame( index = c(1L, 2L, 3L, 4L, 8L, 9L), grp = c("a", "a", rep("b", 4)), a = c(3, 7, 5, 9, 2, 1) ) - actual = df$rolling(index_column = "index", period = "2i", by = pl$col("grp"))$agg( + actual = df$rolling(index_column = "index", period = "2i", group_by = pl$col("grp"))$agg( pl$sum("a")$alias("sum_a"), pl$min("a")$alias("min_a"), pl$max("a")$alias("max_a") @@ -956,12 +956,12 @@ test_that("rolling for LazyFrame: argument 'by' works", { ) ) - # string is parsed as column name in "by" + # string is parsed as column name in "group_by" expect_equal( - df$rolling(index_column = "index", period = "2i", by = "grp")$agg( + df$rolling(index_column = "index", period = "2i", group_by = "grp")$agg( pl$sum("a")$alias("sum_a") )$collect()$to_data_frame(), - df$rolling(index_column = "index", period = "2i", by = pl$col("grp"))$agg( + df$rolling(index_column = "index", period = "2i", group_by = pl$col("grp"))$agg( pl$sum("a")$alias("sum_a") )$collect()$to_data_frame() ) @@ -974,13 +974,13 @@ test_that("rolling for LazyFrame: argument 'check_sorted' works", { a = c(3, 7, 5, 9, 2, 1) ) expect_error( - df$rolling(index_column = "index", period = "2i", by = "grp")$agg( + df$rolling(index_column = "index", period = "2i", group_by = "grp")$agg( pl$sum("a")$alias("sum_a") )$collect(), "not sorted" ) expect_no_error( - df$rolling(index_column = "index", period = "2i", by = "grp", check_sorted = FALSE)$agg( + df$rolling(index_column = "index", period = "2i", group_by = "grp", check_sorted = FALSE)$agg( pl$sum("a")$alias("sum_a") )$collect() )