Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor!: by -> group_by in $rolling() and $group_by_dynamic() #983

Merged
merged 3 commits into from
Mar 30, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@
- In `<LazyFrame>$sink_*` functions, the first argument is now `path`.
- In `$dt$convert_time_zone()` and `$dt$replace_time_zone()`, the `tz` argument
is renamed to `time_zone` (#944).
- In `<DataFrame>$rolling()`, `<LazyFrame>$rolling()`, `<DataFrame>$group_by_dynamic()`
and `<LazyFrame>$group_by_dynamic()`, the `by` argument is renamed to `group_by` (#983).
- In `<DataFrame>$rolling()` and `<DataFrame>$group_by_dynamic()`, all arguments
except `index_column` must be named arguments (#983).
- In `$dt$replace_time_zone()`, all arguments except `time_zone` must be named
arguments (#944).
- In `$bin$decode()`, the `strict` argument must be a named argument (#980).
Expand Down
53 changes: 34 additions & 19 deletions R/dataframe__frame.R
Original file line number Diff line number Diff line change
Expand Up @@ -1995,34 +1995,48 @@ DataFrame_write_ndjson = function(file) {

#' @inherit LazyFrame_rolling title description params details
#' @return A [RollingGroupBy][RollingGroupBy_class] object
#'
#' @seealso
#' - [`<DataFrame>$group_by_dynamic()`][DataFrame_group_by_dynamic]
#' @inheritSection polars_duration_string Polars duration string language
#' @examples
#' df = pl$DataFrame(
#' dt = c("2020-01-01", "2020-01-01", "2020-01-01", "2020-01-02", "2020-01-03", "2020-01-08"),
#' a = c(3, 7, 5, 9, 2, 1)
#' )$with_columns(
#' pl$col("dt")$str$strptime(pl$Date, format = NULL)$set_sorted()
#' date = c(
#' "2020-01-01 13:45:48",
#' "2020-01-01 16:42:13",
#' "2020-01-01 16:45:09",
#' "2020-01-02 18:12:48",
#' "2020-01-03 19:45:32",
#' "2020-01-08 23:16:43"
#' )
#' df = pl$DataFrame(dt = date, a = c(3, 7, 5, 9, 2, 1))$with_columns(
#' pl$col("dt")$str$strptime(pl$Datetime())$set_sorted()
#' )
#'
#' df$rolling(index_column = "dt", period = "2d")$agg(
#' pl$col("a"),
#' pl$sum("a")$alias("sum_a"),
#' pl$min("a")$alias("min_a"),
#' pl$max("a")$alias("max_a")
#' sum_a = pl$sum("a"),
#' min_a = pl$min("a"),
#' max_a = pl$max("a")
#' )
DataFrame_rolling = function(index_column, period, offset = NULL, closed = "right", by = NULL, check_sorted = TRUE) {
DataFrame_rolling = function(
index_column,
...,
period,
offset = NULL,
closed = "right",
group_by = NULL,
check_sorted = TRUE) {
if (is.null(offset)) {
offset = paste0("-", period)
offset = paste0("-", period) # TODO: `paste0` should be executed after `period` is parsed as string
}
construct_rolling_group_by(self, index_column, period, offset, closed, by, check_sorted)
construct_rolling_group_by(self, index_column, period, offset, closed, group_by, check_sorted)
}

#' @inherit LazyFrame_group_by_dynamic title description details params
#' @return A [GroupBy][GroupBy_class] object
#'
#' @seealso
#' - [`<DataFrame>$rolling()`][DataFrame_rolling]
#' @examples
#' df = pl$DataFrame(
#' time = pl$date_range(
#' time = pl$datetime_range(
#' start = strptime("2021-12-16 00:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "UTC"),
#' end = strptime("2021-12-16 03:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "UTC"),
#' interval = "30m"
Expand Down Expand Up @@ -2065,7 +2079,7 @@ DataFrame_rolling = function(index_column, period, offset = NULL, closed = "righ
#' "time",
#' every = "1h",
#' closed = "both",
#' by = "groups",
#' group_by = "groups",
#' include_boundaries = TRUE
#' )$agg(pl$col("n"))
#'
Expand All @@ -2085,24 +2099,25 @@ DataFrame_rolling = function(index_column, period, offset = NULL, closed = "righ
#' )$agg(A_agg_list = pl$col("A"))
DataFrame_group_by_dynamic = function(
index_column,
...,
every,
period = NULL,
offset = NULL,
include_boundaries = FALSE,
closed = "left",
label = "left",
by = NULL,
group_by = NULL,
start_by = "window",
check_sorted = TRUE) {
if (is.null(offset)) {
offset = paste0("-", every)
offset = paste0("-", every) # TODO: `paste0` should be executed after `period` is parsed as string
}
if (is.null(period)) {
period = every
}
construct_group_by_dynamic(
self, index_column, every, period, offset, include_boundaries, closed, label,
by, start_by, check_sorted
group_by, start_by, check_sorted
)
}

Expand Down
2 changes: 1 addition & 1 deletion R/expr__expr.R
Original file line number Diff line number Diff line change
Expand Up @@ -3345,7 +3345,7 @@ Expr_rolling = function(
offset = paste0("-", period) # TODO: `paste0` should be executed after `period` is parsed as string
}
.pr$Expr$rolling(self, index_column, period, offset, closed, check_sorted) |>
unwrap("in $rolling():")
unwrap("in <Expr>$rolling():")
eitsupi marked this conversation as resolved.
Show resolved Hide resolved
}

#' Replace values by different values
Expand Down
6 changes: 3 additions & 3 deletions R/group_by_dynamic.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ RPolarsDynamicGroupBy = new.env(parent = emptyenv())
#' @noRd
construct_group_by_dynamic = function(
df, index_column, every, period, offset, include_boundaries, closed, label,
by, start_by, check_sorted) {
group_by, start_by, check_sorted) {
if (!inherits(df, "RPolarsDataFrame")) {
stop("internal error: construct_group called not on DataFrame")
}
Expand All @@ -57,7 +57,7 @@ construct_group_by_dynamic = function(
include_boundaries = include_boundaries,
closed = closed,
label = label,
by = by,
group_by = group_by,
start_by = start_by,
check_sorted = check_sorted
)
Expand Down Expand Up @@ -95,7 +95,7 @@ DynamicGroupBy_agg = function(...) {
include_boundaries = prv$include_boundaries,
closed = prv$closed,
label = prv$label,
by = prv$by,
group_by = prv$group_by,
start_by = prv$start_by,
check_sorted = prv$check_sorted
)$
Expand Down
6 changes: 3 additions & 3 deletions R/group_by_rolling.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ RPolarsRollingGroupBy = new.env(parent = emptyenv())
#' The internal RollingGroupBy constructor
#' @return The input as grouped DataFrame
#' @noRd
construct_rolling_group_by = function(df, index_column, period, offset, closed, by, check_sorted) {
construct_rolling_group_by = function(df, index_column, period, offset, closed, group_by, check_sorted) {
if (!inherits(df, "RPolarsDataFrame")) {
stop("internal error: construct_group called not on DataFrame")
}
Expand All @@ -50,7 +50,7 @@ construct_rolling_group_by = function(df, index_column, period, offset, closed,
period = period,
offset = offset,
closed = closed,
by = by,
group_by = group_by,
check_sorted = check_sorted
)
class(out) = "RPolarsRollingGroupBy"
Expand Down Expand Up @@ -96,7 +96,7 @@ RollingGroupBy_agg = function(...) {
period = prv$period,
offset = prv$offset,
closed = prv$closed,
by = prv$by,
group_by = prv$group_by,
check_sorted = prv$check_sorted
)$
agg(unpack_list(..., .context = "in $agg():"))$
Expand Down
54 changes: 33 additions & 21 deletions R/lazyframe__lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -1793,36 +1793,47 @@ LazyFrame_with_context = function(other) {
#' case of a rolling group by on indices, dtype needs to be either Int32 or Int64.
#' Note that Int32 gets temporarily cast to Int64, so if performance matters use
#' an Int64 column.
#' @param by Also group by this column/these columns.
#' @param group_by Also group by this column/these columns.
#'
#' @inheritSection polars_duration_string Polars duration string language
#' @return A [LazyGroupBy][LazyGroupBy_class] object
#'
#' @seealso
#' - [`<LazyFrame>$group_by_dynamic()`][LazyFrame_group_by_dynamic]
#' @examples
#' df = pl$LazyFrame(
#' dt = c("2020-01-01", "2020-01-01", "2020-01-01", "2020-01-02", "2020-01-03", "2020-01-08"),
#' a = c(3, 7, 5, 9, 2, 1)
#' )$with_columns(
#' pl$col("dt")$str$strptime(pl$Date, format = NULL)$set_sorted()
#' dates = c(
#' "2020-01-01 13:45:48",
#' "2020-01-01 16:42:13",
#' "2020-01-01 16:45:09",
#' "2020-01-02 18:12:48",
#' "2020-01-03 19:45:32",
#' "2020-01-08 23:16:43"
#' )
#'
#' df$collect()
#' df = pl$LazyFrame(dt = dates, a = c(3, 7, 5, 9, 2, 1))$with_columns(
#' pl$col("dt")$str$strptime(pl$Datetime())$set_sorted()
#' )
#'
#' df$rolling(index_column = "dt", period = "2d")$agg(
#' pl$col("a"),
#' pl$sum("a")$alias("sum_a"),
#' pl$min("a")$alias("min_a"),
#' pl$max("a")$alias("max_a")
#' sum_a = pl$sum("a"),
#' min_a = pl$min("a"),
#' max_a = pl$max("a")
#' )$collect()
LazyFrame_rolling = function(
index_column, ..., period, offset = NULL, closed = "right", by = NULL, check_sorted = TRUE) {
index_column,
...,
period,
offset = NULL,
closed = "right",
group_by = NULL,
check_sorted = TRUE) {
if (is.null(offset)) {
offset = paste0("-", period)
offset = paste0("-", period) # TODO: `paste0` should be executed after `period` is parsed as string
}
.pr$LazyFrame$rolling(
self, index_column, period, offset, closed,
wrap_elist_result(by, str_to_lit = FALSE), check_sorted
wrap_elist_result(group_by, str_to_lit = FALSE), check_sorted
) |>
unwrap("in $rolling():")
unwrap("in <LazyFrame>$rolling():")
eitsupi marked this conversation as resolved.
Show resolved Hide resolved
}


Expand All @@ -1848,7 +1859,8 @@ LazyFrame_rolling = function(
#' starts the window on the Monday before the first data point, etc.
#'
#' @return A [LazyGroupBy][LazyGroupBy_class] object
#'
#' @seealso
#' - [`<LazyFrame>$rolling()`][LazyFrame_rolling]
#' @examples
#' lf = pl$LazyFrame(
#' time = pl$date_range(
Expand Down Expand Up @@ -1895,7 +1907,7 @@ LazyFrame_rolling = function(
#' "time",
#' every = "1h",
#' closed = "both",
#' by = "groups",
#' group_by = "groups",
#' include_boundaries = TRUE
#' )$agg(pl$col("n"))$collect()
#'
Expand All @@ -1922,18 +1934,18 @@ LazyFrame_group_by_dynamic = function(
include_boundaries = FALSE,
closed = "left",
label = "left",
by = NULL,
group_by = NULL,
start_by = "window",
check_sorted = TRUE) {
if (is.null(offset)) {
offset = paste0("-", every)
offset = paste0("-", every) # TODO: `paste0` should be executed after `period` is parsed as string
}
if (is.null(period)) {
period = every
}
.pr$LazyFrame$group_by_dynamic(
self, index_column, every, period, offset, label, include_boundaries, closed,
wrap_elist_result(by, str_to_lit = FALSE), start_by, check_sorted
wrap_elist_result(group_by, str_to_lit = FALSE), start_by, check_sorted
) |>
unwrap("in $group_by_dynamic():")
}
Expand Down
16 changes: 12 additions & 4 deletions man/DataFrame_group_by_dynamic.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading