Skip to content

Commit

Permalink
refactor!: by -> group_by in $rolling() and `$group_by_dynamic(…
Browse files Browse the repository at this point in the history
…)` (#983)

Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com>
  • Loading branch information
eitsupi and etiennebacher committed Mar 30, 2024
1 parent 5812f3e commit 0c3ce3c
Show file tree
Hide file tree
Showing 12 changed files with 207 additions and 89 deletions.
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@
- In `<LazyFrame>$sink_*` functions, the first argument is now `path`.
- In `$dt$convert_time_zone()` and `$dt$replace_time_zone()`, the `tz` argument
is renamed to `time_zone` (#944).
- In `<DataFrame>$rolling()`, `<LazyFrame>$rolling()`, `<DataFrame>$group_by_dynamic()`
and `<LazyFrame>$group_by_dynamic()`, the `by` argument is renamed to `group_by` (#983).
- In `<DataFrame>$rolling()` and `<DataFrame>$group_by_dynamic()`, all arguments
except `index_column` must be named arguments (#983).
- In `$dt$replace_time_zone()`, all arguments except `time_zone` must be named
arguments (#944).
- In `$bin$decode()`, the `strict` argument must be a named argument (#980).
Expand Down
53 changes: 34 additions & 19 deletions R/dataframe__frame.R
Original file line number Diff line number Diff line change
Expand Up @@ -1995,34 +1995,48 @@ DataFrame_write_ndjson = function(file) {

#' @inherit LazyFrame_rolling title description params details
#' @return A [RollingGroupBy][RollingGroupBy_class] object
#'
#' @seealso
#' - [`<DataFrame>$group_by_dynamic()`][DataFrame_group_by_dynamic]
#' @inheritSection polars_duration_string Polars duration string language
#' @examples
#' df = pl$DataFrame(
#' dt = c("2020-01-01", "2020-01-01", "2020-01-01", "2020-01-02", "2020-01-03", "2020-01-08"),
#' a = c(3, 7, 5, 9, 2, 1)
#' )$with_columns(
#' pl$col("dt")$str$strptime(pl$Date, format = NULL)$set_sorted()
#' date = c(
#' "2020-01-01 13:45:48",
#' "2020-01-01 16:42:13",
#' "2020-01-01 16:45:09",
#' "2020-01-02 18:12:48",
#' "2020-01-03 19:45:32",
#' "2020-01-08 23:16:43"
#' )
#' df = pl$DataFrame(dt = date, a = c(3, 7, 5, 9, 2, 1))$with_columns(
#' pl$col("dt")$str$strptime(pl$Datetime())$set_sorted()
#' )
#'
#' df$rolling(index_column = "dt", period = "2d")$agg(
#' pl$col("a"),
#' pl$sum("a")$alias("sum_a"),
#' pl$min("a")$alias("min_a"),
#' pl$max("a")$alias("max_a")
#' sum_a = pl$sum("a"),
#' min_a = pl$min("a"),
#' max_a = pl$max("a")
#' )
DataFrame_rolling = function(index_column, period, offset = NULL, closed = "right", by = NULL, check_sorted = TRUE) {
DataFrame_rolling = function(
index_column,
...,
period,
offset = NULL,
closed = "right",
group_by = NULL,
check_sorted = TRUE) {
if (is.null(offset)) {
offset = paste0("-", period)
offset = paste0("-", period) # TODO: `paste0` should be executed after `period` is parsed as string
}
construct_rolling_group_by(self, index_column, period, offset, closed, by, check_sorted)
construct_rolling_group_by(self, index_column, period, offset, closed, group_by, check_sorted)
}

#' @inherit LazyFrame_group_by_dynamic title description details params
#' @return A [GroupBy][GroupBy_class] object
#'
#' @seealso
#' - [`<DataFrame>$rolling()`][DataFrame_rolling]
#' @examples
#' df = pl$DataFrame(
#' time = pl$date_range(
#' time = pl$datetime_range(
#' start = strptime("2021-12-16 00:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "UTC"),
#' end = strptime("2021-12-16 03:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "UTC"),
#' interval = "30m"
Expand Down Expand Up @@ -2065,7 +2079,7 @@ DataFrame_rolling = function(index_column, period, offset = NULL, closed = "righ
#' "time",
#' every = "1h",
#' closed = "both",
#' by = "groups",
#' group_by = "groups",
#' include_boundaries = TRUE
#' )$agg(pl$col("n"))
#'
Expand All @@ -2085,24 +2099,25 @@ DataFrame_rolling = function(index_column, period, offset = NULL, closed = "righ
#' )$agg(A_agg_list = pl$col("A"))
DataFrame_group_by_dynamic = function(
index_column,
...,
every,
period = NULL,
offset = NULL,
include_boundaries = FALSE,
closed = "left",
label = "left",
by = NULL,
group_by = NULL,
start_by = "window",
check_sorted = TRUE) {
if (is.null(offset)) {
offset = paste0("-", every)
offset = paste0("-", every) # TODO: `paste0` should be executed after `period` is parsed as string
}
if (is.null(period)) {
period = every
}
construct_group_by_dynamic(
self, index_column, every, period, offset, include_boundaries, closed, label,
by, start_by, check_sorted
group_by, start_by, check_sorted
)
}

Expand Down
6 changes: 3 additions & 3 deletions R/group_by_dynamic.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ RPolarsDynamicGroupBy = new.env(parent = emptyenv())
#' @noRd
construct_group_by_dynamic = function(
df, index_column, every, period, offset, include_boundaries, closed, label,
by, start_by, check_sorted) {
group_by, start_by, check_sorted) {
if (!inherits(df, "RPolarsDataFrame")) {
stop("internal error: construct_group called not on DataFrame")
}
Expand All @@ -57,7 +57,7 @@ construct_group_by_dynamic = function(
include_boundaries = include_boundaries,
closed = closed,
label = label,
by = by,
group_by = group_by,
start_by = start_by,
check_sorted = check_sorted
)
Expand Down Expand Up @@ -95,7 +95,7 @@ DynamicGroupBy_agg = function(...) {
include_boundaries = prv$include_boundaries,
closed = prv$closed,
label = prv$label,
by = prv$by,
group_by = prv$group_by,
start_by = prv$start_by,
check_sorted = prv$check_sorted
)$
Expand Down
6 changes: 3 additions & 3 deletions R/group_by_rolling.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ RPolarsRollingGroupBy = new.env(parent = emptyenv())
#' The internal RollingGroupBy constructor
#' @return The input as grouped DataFrame
#' @noRd
construct_rolling_group_by = function(df, index_column, period, offset, closed, by, check_sorted) {
construct_rolling_group_by = function(df, index_column, period, offset, closed, group_by, check_sorted) {
if (!inherits(df, "RPolarsDataFrame")) {
stop("internal error: construct_group called not on DataFrame")
}
Expand All @@ -50,7 +50,7 @@ construct_rolling_group_by = function(df, index_column, period, offset, closed,
period = period,
offset = offset,
closed = closed,
by = by,
group_by = group_by,
check_sorted = check_sorted
)
class(out) = "RPolarsRollingGroupBy"
Expand Down Expand Up @@ -96,7 +96,7 @@ RollingGroupBy_agg = function(...) {
period = prv$period,
offset = prv$offset,
closed = prv$closed,
by = prv$by,
group_by = prv$group_by,
check_sorted = prv$check_sorted
)$
agg(unpack_list(..., .context = "in $agg():"))$
Expand Down
52 changes: 32 additions & 20 deletions R/lazyframe__lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -1793,34 +1793,45 @@ LazyFrame_with_context = function(other) {
#' case of a rolling group by on indices, dtype needs to be either Int32 or Int64.
#' Note that Int32 gets temporarily cast to Int64, so if performance matters use
#' an Int64 column.
#' @param by Also group by this column/these columns.
#' @param group_by Also group by this column/these columns.
#'
#' @inheritSection polars_duration_string Polars duration string language
#' @return A [LazyGroupBy][LazyGroupBy_class] object
#'
#' @seealso
#' - [`<LazyFrame>$group_by_dynamic()`][LazyFrame_group_by_dynamic]
#' @examples
#' df = pl$LazyFrame(
#' dt = c("2020-01-01", "2020-01-01", "2020-01-01", "2020-01-02", "2020-01-03", "2020-01-08"),
#' a = c(3, 7, 5, 9, 2, 1)
#' )$with_columns(
#' pl$col("dt")$str$strptime(pl$Date, format = NULL)$set_sorted()
#' dates = c(
#' "2020-01-01 13:45:48",
#' "2020-01-01 16:42:13",
#' "2020-01-01 16:45:09",
#' "2020-01-02 18:12:48",
#' "2020-01-03 19:45:32",
#' "2020-01-08 23:16:43"
#' )
#'
#' df$collect()
#' df = pl$LazyFrame(dt = dates, a = c(3, 7, 5, 9, 2, 1))$with_columns(
#' pl$col("dt")$str$strptime(pl$Datetime())$set_sorted()
#' )
#'
#' df$rolling(index_column = "dt", period = "2d")$agg(
#' pl$col("a"),
#' pl$sum("a")$alias("sum_a"),
#' pl$min("a")$alias("min_a"),
#' pl$max("a")$alias("max_a")
#' sum_a = pl$sum("a"),
#' min_a = pl$min("a"),
#' max_a = pl$max("a")
#' )$collect()
LazyFrame_rolling = function(
index_column, ..., period, offset = NULL, closed = "right", by = NULL, check_sorted = TRUE) {
index_column,
...,
period,
offset = NULL,
closed = "right",
group_by = NULL,
check_sorted = TRUE) {
if (is.null(offset)) {
offset = paste0("-", period)
offset = paste0("-", period) # TODO: `paste0` should be executed after `period` is parsed as string
}
.pr$LazyFrame$rolling(
self, index_column, period, offset, closed,
wrap_elist_result(by, str_to_lit = FALSE), check_sorted
wrap_elist_result(group_by, str_to_lit = FALSE), check_sorted
) |>
unwrap("in $rolling():")
}
Expand Down Expand Up @@ -1848,7 +1859,8 @@ LazyFrame_rolling = function(
#' starts the window on the Monday before the first data point, etc.
#'
#' @return A [LazyGroupBy][LazyGroupBy_class] object
#'
#' @seealso
#' - [`<LazyFrame>$rolling()`][LazyFrame_rolling]
#' @examples
#' lf = pl$LazyFrame(
#' time = pl$date_range(
Expand Down Expand Up @@ -1895,7 +1907,7 @@ LazyFrame_rolling = function(
#' "time",
#' every = "1h",
#' closed = "both",
#' by = "groups",
#' group_by = "groups",
#' include_boundaries = TRUE
#' )$agg(pl$col("n"))$collect()
#'
Expand All @@ -1922,18 +1934,18 @@ LazyFrame_group_by_dynamic = function(
include_boundaries = FALSE,
closed = "left",
label = "left",
by = NULL,
group_by = NULL,
start_by = "window",
check_sorted = TRUE) {
if (is.null(offset)) {
offset = paste0("-", every)
offset = paste0("-", every) # TODO: `paste0` should be executed after `period` is parsed as string
}
if (is.null(period)) {
period = every
}
.pr$LazyFrame$group_by_dynamic(
self, index_column, every, period, offset, label, include_boundaries, closed,
wrap_elist_result(by, str_to_lit = FALSE), start_by, check_sorted
wrap_elist_result(group_by, str_to_lit = FALSE), start_by, check_sorted
) |>
unwrap("in $group_by_dynamic():")
}
Expand Down
16 changes: 12 additions & 4 deletions man/DataFrame_group_by_dynamic.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 0c3ce3c

Please sign in to comment.