Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create a separate class RollingGroupBy #694

Merged
merged 9 commits into from
Jan 13, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ Collate:
'functions__lazy.R'
'functions__whenthen.R'
'group_by.R'
'group_by_rolling.R'
'info.R'
'ipc.R'
'is_polars.R'
Expand Down
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ S3method("$",RPolarsProtoExprArray)
S3method("$",RPolarsRField)
S3method("$",RPolarsRNullValues)
S3method("$",RPolarsRThreadHandle)
S3method("$",RPolarsRollingGroupBy)
S3method("$",RPolarsSQLContext)
S3method("$",RPolarsSeries)
S3method("$",RPolarsStringCacheHolder)
Expand Down Expand Up @@ -77,6 +78,7 @@ S3method("[[",RPolarsProtoExprArray)
S3method("[[",RPolarsRField)
S3method("[[",RPolarsRNullValues)
S3method("[[",RPolarsRThreadHandle)
S3method("[[",RPolarsRollingGroupBy)
S3method("[[",RPolarsSQLContext)
S3method("[[",RPolarsSeries)
S3method("[[",RPolarsStringCacheHolder)
Expand All @@ -94,6 +96,7 @@ S3method(.DollarNames,RPolarsGroupBy)
S3method(.DollarNames,RPolarsLazyFrame)
S3method(.DollarNames,RPolarsRField)
S3method(.DollarNames,RPolarsRThreadHandle)
S3method(.DollarNames,RPolarsRollingGroupBy)
S3method(.DollarNames,RPolarsSQLContext)
S3method(.DollarNames,RPolarsSeries)
S3method(.DollarNames,RPolarsThen)
Expand All @@ -115,6 +118,7 @@ S3method(as_polars_df,RPolarsDataFrame)
S3method(as_polars_df,RPolarsGroupBy)
S3method(as_polars_df,RPolarsLazyFrame)
S3method(as_polars_df,RPolarsLazyGroupBy)
S3method(as_polars_df,RPolarsRollingGroupBy)
S3method(as_polars_df,RPolarsSeries)
S3method(as_polars_df,data.frame)
S3method(as_polars_df,default)
Expand Down Expand Up @@ -165,6 +169,7 @@ S3method(print,RPolarsLazyFrame)
S3method(print,RPolarsLazyGroupBy)
S3method(print,RPolarsRField)
S3method(print,RPolarsRThreadHandle)
S3method(print,RPolarsRollingGroupBy)
S3method(print,RPolarsSQLContext)
S3method(print,RPolarsSeries)
S3method(print,RPolarsThen)
Expand Down
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

### What's changed

- New method `$rolling()` for `DataFrame` and `LazyFrame` (#682).
- New method `$rolling()` for `DataFrame` and `LazyFrame`. When this is
applied, it creates an object of class `RPolarsRollingGroupBy` (#682, #694).
- New method `$sink_ndjson()` for LazyFrame (#681).
- New function `pl$duration()` to create a duration by components (week, day,
hour, etc.), and use them with date(time) variables (#692).
Expand Down
5 changes: 5 additions & 0 deletions R/as_polars.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,11 @@ as_polars_df.RPolarsGroupBy = function(x, ...) {
x$ungroup()
}

#' @rdname as_polars_df
#' @export
as_polars_df.RPolarsRollingGroupBy = function(x, ...) {
x$ungroup()
}
etiennebacher marked this conversation as resolved.
Show resolved Hide resolved

#' @rdname as_polars_df
#' @export
Expand Down
8 changes: 4 additions & 4 deletions R/dataframe__frame.R
Original file line number Diff line number Diff line change
Expand Up @@ -1828,8 +1828,8 @@ DataFrame_write_ndjson = function(file) {
#' pl$max("a")$alias("max_a")
#' )
DataFrame_rolling = function(index_column, period, offset = NULL, closed = "right", by = NULL, check_sorted = TRUE) {
out = self$lazy()$rolling(index_column, period, offset, closed, by, check_sorted)
attr(out, "is_rolling_group_by") = TRUE
class(out) = "RPolarsGroupBy"
out
if (is.null(offset)) {
offset = paste0("-", period)
}
construct_rolling_group_by(self, index_column, period, offset, closed, by, check_sorted)
}
67 changes: 28 additions & 39 deletions R/group_by.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
NULL



RPolarsGroupBy = new.env(parent = emptyenv())

#' @export
Expand All @@ -25,28 +24,30 @@ RPolarsGroupBy = new.env(parent = emptyenv())
#' @export
`[[.RPolarsGroupBy` = `$.RPolarsGroupBy`

#' @title auto complete $-access into a polars object
#' @description called by the interactive R session internally
#' @param x GroupBy
#' @param pattern code-stump as string to auto-complete
#' @return char vec
#' @export
#' @inherit .DollarNames.RPolarsDataFrame return
#' @noRd
.DollarNames.RPolarsGroupBy = function(x, pattern = "") {
paste0(ls(RPolarsGroupBy, pattern = pattern), "()")
}


#' The internal GroupBy constructor
#' @return The input as grouped DataFrame
#' @noRd
construct_group_by = function(df, groupby_input, maintain_order) {
if (!inherits(df, "RPolarsDataFrame")) stop("internal error: construct_group called not on DataFrame")
df = df$clone()
attr(df, "private") = list(groupby_input = unlist(groupby_input), maintain_order = maintain_order)
class(df) = "RPolarsGroupBy"
df
if (!inherits(df, "RPolarsDataFrame")) {
stop("internal error: construct_group called not on DataFrame")
}
# Make an empty object. Store everything (including data) in attributes, so
# that we can keep the RPolarsDataFrame class on the data but still return
# a RPolarsGroupBy object here.
out = c(" ")
attr(out, "private") = list(
dat = df$clone(),
groupby_input = unlist(groupby_input),
maintain_order = maintain_order
)
class(out) = "RPolarsGroupBy"
out
}


Expand All @@ -58,13 +59,13 @@ construct_group_by = function(df, groupby_input, maintain_order) {
#' @return self
#' @export
#'
#' @examples pl$DataFrame(iris)$group_by("Species")
#' @examples
#' pl$DataFrame(iris)$group_by("Species")
print.RPolarsGroupBy = function(x, ...) {
.pr$DataFrame$print(x)
cat("groups: ")
prv = attr(x, "private")
cat(toString(prv$groupby_input))
cat("\nmaintain order: ", prv$maintain_order)
.pr$DataFrame$print(prv$dat)
cat("groups:", toString(prv$groupby_input))
cat("\nmaintain order:", prv$maintain_order)
invisible(x)
}

Expand All @@ -86,18 +87,13 @@ print.RPolarsGroupBy = function(x, ...) {
#' pl$col("bar")$mean()$alias("bar_tail_sum")
#' )
GroupBy_agg = function(...) {
if (isTRUE(attributes(self)[["is_rolling_group_by"]])) {
class(self) = "RPolarsLazyGroupBy"
self$agg(unpack_list(..., .context = "in $agg():"))$collect(no_optimization = TRUE)
} else {
class(self) = "RPolarsDataFrame"
self$lazy()$group_by(
attr(self, "private")$groupby_input,
maintain_order = attr(self, "private")$maintain_order
)$
agg(...)$
collect(no_optimization = TRUE)
}
prv = attr(self, "private")
prv$dat$lazy()$group_by(
prv$groupby_input,
maintain_order = prv$maintain_order
)$
agg(...)$
collect(no_optimization = TRUE)
}


Expand Down Expand Up @@ -300,13 +296,6 @@ GroupBy_null_count = function() {
#'
#' gb$ungroup()
GroupBy_ungroup = function() {
if (isTRUE(attributes(self)[["is_rolling_group_by"]])) {
class(self) = "RPolarsLazyGroupBy"
self = self$ungroup()$collect(no_optimization = TRUE)
} else {
self = .pr$DataFrame$clone_in_rust(self)
class(self) = "RPolarsDataFrame"
attr(self, "private") = NULL
}
self
prv = attr(self, "private")
prv$dat
}
133 changes: 133 additions & 0 deletions R/group_by_rolling.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#' Operations on Polars DataFrame grouped by rolling windows
#'
#' @return not applicable
#' @name RollingGroupBy_class
NULL

RPolarsRollingGroupBy = new.env(parent = emptyenv())

#' @export
`$.RPolarsRollingGroupBy` = function(self, name) {
func = RPolarsRollingGroupBy[[name]]
environment(func) = environment()
func
}

#' @export
`[[.RPolarsRollingGroupBy` = `$.RPolarsRollingGroupBy`

#' @export
#' @noRd
.DollarNames.RPolarsRollingGroupBy = function(x, pattern = "") {
paste0(ls(RPolarsRollingGroupBy, pattern = pattern), "()")
}

#' The internal RollingGroupBy constructor
#' @return The input as grouped DataFrame
#' @noRd
construct_rolling_group_by = function(df, index_column, period, offset, closed, by, check_sorted) {
if (!inherits(df, "RPolarsDataFrame")) {
stop("internal error: construct_group called not on DataFrame")
}
# Make an empty object. Store everything (including data) in attributes, so
# that we can keep the RPolarsDataFrame class on the data but still return
# a RPolarsRollingGroupBy object here.
out = c(" ")
attr(out, "private") = list(
dat = df$clone(),
index_column = index_column,
period = period,
offset = offset,
closed = closed,
by = by,
check_sorted = check_sorted
)
class(out) = "RPolarsRollingGroupBy"
out
}

#' print RollingGroupBy
#'
#' @param x DataFrame
#' @param ... not used
#' @noRd
#' @return self
#' @export
#'
#' @examples
#' df = pl$DataFrame(
#' dt = c("2020-01-01", "2020-01-01", "2020-01-01", "2020-01-02", "2020-01-03", "2020-01-08"),
#' a = c(3, 7, 5, 9, 2, 1)
#' )$with_columns(
#' pl$col("dt")$str$strptime(pl$Date, format = NULL)$set_sorted()
#' )
#'
#' df$rolling(index_column = "dt", period = "2d")
print.RPolarsRollingGroupBy = function(x, ...) {
prv = attr(x, "private")
.pr$DataFrame$print(prv$dat)
cat(paste("index column:", prv$index))
cat(paste("\nother groups:", toString(prv$by)))
cat(paste("\nperiod:", prv$period))
cat(paste("\noffset:", prv$offset))
cat(paste("\nclosed:", prv$closed))
}


#' Aggregate over a RollingGroupBy
#'
#' Aggregate a DataFrame over a rolling window created with `$rolling()`.
#'
#' @param ... Exprs to aggregate over. Those can also be passed wrapped in a
#' list, e.g `$agg(list(e1,e2,e3))`.
#'
#' @return An aggregated [DataFrame][DataFrame_class]
#' @examples
#' df = pl$DataFrame(
#' dt = c("2020-01-01", "2020-01-01", "2020-01-01", "2020-01-02", "2020-01-03", "2020-01-08"),
#' a = c(3, 7, 5, 9, 2, 1)
#' )$with_columns(
#' pl$col("dt")$str$strptime(pl$Date, format = NULL)$set_sorted()
#' )
#'
#' df$rolling(index_column = "dt", period = "2d")$agg(
#' pl$col("a"),
#' pl$sum("a")$alias("sum_a"),
#' pl$min("a")$alias("min_a"),
#' pl$max("a")$alias("max_a")
#' )
RollingGroupBy_agg = function(...) {
prv = attr(self, "private")
prv$dat$
lazy()$
rolling(
index_column = prv$index,
period = prv$period,
offset = prv$offset,
closed = prv$closed,
by = prv$by,
check_sorted = prv$check_sorted
)$
agg(unpack_list(..., .context = "in $agg():"))$
collect(no_optimization = TRUE)
}

#' Ungroup a RollingGroupBy object
#'
#' Revert the `$rolling()` operation. Doing `<DataFrame>$rolling(...)$ungroup()`
#' returns the original `DataFrame`.
#'
#' @return [DataFrame][DataFrame_class]
#' @examples
#' df = pl$DataFrame(
#' dt = c("2020-01-01", "2020-01-01", "2020-01-01", "2020-01-02", "2020-01-03", "2020-01-08"),
#' a = c(3, 7, 5, 9, 2, 1)
#' )$with_columns(
#' pl$col("dt")$str$strptime(pl$Date, format = NULL)$set_sorted()
#' )
#'
#' df$rolling(index_column = "dt", period = "2d")$ungroup()
RollingGroupBy_ungroup = function() {
prv = attr(self, "private")
prv$dat
}
3 changes: 3 additions & 0 deletions R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ replace_private_with_pub_methods(RPolarsLazyFrame, "^LazyFrame_")
# LazyGroupBy
replace_private_with_pub_methods(RPolarsLazyGroupBy, "^LazyGroupBy_")

# LazyGroupBy
replace_private_with_pub_methods(RPolarsRollingGroupBy, "^RollingGroupBy_")

# Expr
replace_private_with_pub_methods(RPolarsExpr, "^Expr_")

Expand Down
2 changes: 1 addition & 1 deletion altdoc/altdoc_preprocessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ out = list()
# order determines order in sidebar
classes = c(
"pl", "Series", "DataFrame", "LazyFrame", "GroupBy",
"LazyGroupBy", "ExprList", "ExprBin", "ExprCat", "ExprDT",
"LazyGroupBy", "RollingGroupBy", "ExprList", "ExprBin", "ExprCat", "ExprDT",
"ExprMeta", "ExprName", "ExprStr", "ExprStruct",
"Expr", "IO", "RField", "RThreadHandle", "SQLContext", "S3"
)
Expand Down
33 changes: 33 additions & 0 deletions man/RollingGroupBy_agg.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions man/RollingGroupBy_class.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading