Rename $groupby() to $group_by() (#427)

Co-authored-by: eitsupi <50911393+eitsupi@users.noreply.github.com>
pola-rs · Oct 16, 2023 · ca0b51b · ca0b51b
1 parent 6132f51
commit ca0b51b
Show file tree

Hide file tree

Showing 59 changed files with 165 additions and 164 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -81,10 +81,10 @@ Collate:
     'functions__eager.R'
     'functions__lazy.R'
     'functions__whenthen.R'
-    'groupby.R'
+    'group_by.R'
     'info.R'
     'ipc.R'
-    'lazyframe__groupby.R'
+    'lazyframe__group_by.R'
     'lazyframe__lazy.R'
     'namespace.R'
     'options.R'

diff --git a/NEWS.md b/NEWS.md
@@ -26,6 +26,7 @@
   `$with_columns()` instead (#402).
 - Subnamespace `$arr` has been removed (it was deprecated since 0.8.1). Use `$list`
   instead (#402).
+- `$groupby()` is renamed `$group_by()`. (#427)
 
 ## What's changed
 

diff --git a/R/after-wrappers.R b/R/after-wrappers.R
@@ -295,5 +295,5 @@ pl_pub_class_env = as.environment(mget(pl_class_names, envir = pl_pub_env))
 #'
 #' # The single exception from the rule is class "GroupBy", where objects also have
 #' # two private attributes "groupby_input" and "maintain_order".
-#' str(pl$DataFrame(iris)$groupby("Species"))
+#' str(pl$DataFrame(iris)$group_by("Species"))
 NULL
diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R
@@ -836,24 +836,24 @@ DataFrame_filter = function(bool_expr) {
 }
 
 #' Group a DataFrame
-#' @inherit LazyFrame_groupby description params
+#' @inherit LazyFrame_group_by description params
 #' @keywords DataFrame
 #' @return GroupBy (a DataFrame with special groupby methods like `$agg()`)
 #' @examples
 #' gb = pl$DataFrame(
 #'   foo = c("one", "two", "two", "one", "two"),
 #'   bar = c(5, 3, 2, 4, 1)
-#' )$groupby("foo", maintain_order = TRUE)
+#' )$group_by("foo", maintain_order = TRUE)
 #'
 #' gb
 #'
 #' gb$agg(
 #'   pl$col("bar")$sum()$suffix("_sum"),
 #'   pl$col("bar")$mean()$alias("bar_tail_sum")
 #' )
-DataFrame_groupby = function(..., maintain_order = pl$options$maintain_order) {
+DataFrame_group_by = function(..., maintain_order = pl$options$maintain_order) {
   # clone the DataFrame, bundle args as attributes. Non fallible.
-  construct_groupby(self, groupby_input = unpack_list(...), maintain_order = maintain_order)
+  construct_group_by(self, groupby_input = unpack_list(...), maintain_order = maintain_order)
 }
 
 

diff --git a/R/expr__expr.R b/R/expr__expr.R
@@ -405,7 +405,7 @@ Expr_gt_eq = function(other) {
 #'   group = c("one", "one", "one", "two", "two", "two"),
 #'   value = c(94, 95, 96, 97, 97, 99)
 #' ))
-#' df$groupby("group", maintain_order = TRUE)$agg(pl$col("value")$agg_groups())
+#' df$group_by("group", maintain_order = TRUE)$agg(pl$col("value")$agg_groups())
 Expr_agg_groups = "use_extendr_wrapper"
 
 
@@ -794,7 +794,7 @@ Expr_map = function(f, output_type = NULL, agg_list = FALSE, in_background = FAL
 #' e_all = pl$all() # perform groupby agg on all columns otherwise e.g. pl$col("Sepal.Length")
 #' e_sum = e_all$apply(\(s)  sum(s$to_r()))$suffix("_sum")
 #' e_head = e_all$apply(\(s) head(s$to_r(), 2))$suffix("_head")
-#' pl$DataFrame(iris)$groupby("Species")$agg(e_sum, e_head)
+#' pl$DataFrame(iris)$group_by("Species")$agg(e_sum, e_head)
 #'
 #'
 #' # apply over single values (should be avoided as it takes ~2.5us overhead + R function exec time
@@ -847,7 +847,7 @@ Expr_map = function(f, output_type = NULL, agg_list = FALSE, in_background = FAL
 #' #' #R parallel process example, use Sys.sleep() to imitate some CPU expensive computation.
 #'
 #' # use apply over each Species-group in each column equal to 12 sequential runs ~1.2 sec.
-#' pl$LazyFrame(iris)$groupby("Species")$agg(
+#' pl$LazyFrame(iris)$group_by("Species")$agg(
 #'   pl$all()$apply(\(s) {
 #'     Sys.sleep(.1)
 #'     s$sum()
@@ -858,7 +858,7 @@ Expr_map = function(f, output_type = NULL, agg_list = FALSE, in_background = FAL
 #' pl$set_options(rpool_cap = 0) # drop any previous processes, just to show start-up overhead here
 #' pl$set_options(rpool_cap = 4) # set back to 4, the default
 #' pl$options$rpool_cap
-#' pl$LazyFrame(iris)$groupby("Species")$agg(
+#' pl$LazyFrame(iris)$group_by("Species")$agg(
 #'   pl$all()$apply(\(s) {
 #'     Sys.sleep(.1)
 #'     s$sum()
@@ -867,7 +867,7 @@ Expr_map = function(f, output_type = NULL, agg_list = FALSE, in_background = FAL
 #'
 #' # map in parallel 2: Reuse R processes in "polars global_rpool".
 #' pl$options$rpool_cap
-#' pl$LazyFrame(iris)$groupby("Species")$agg(
+#' pl$LazyFrame(iris)$group_by("Species")$agg(
 #'   pl$all()$apply(\(s) {
 #'     Sys.sleep(.1)
 #'     s$sum()
@@ -2368,7 +2368,7 @@ Expr_quantile = function(quantile, interpolation = "nearest") {
 #'   b = c(1, 2, 3)
 #' ))
 #'
-#' df$groupby("group_col")$agg(
+#' df$group_by("group_col")$agg(
 #'   pl$col("b")$filter(pl$col("b") < 2)$sum()$alias("lt"),
 #'   pl$col("b")$filter(pl$col("b") >= 2)$sum()$alias("gte")
 #' )
@@ -2406,7 +2406,7 @@ Expr_where = Expr_filter
 #' @examples
 #' pl$DataFrame(list(a = letters))$select(pl$col("a")$explode()$take(0:5))
 #'
-#' listed_group_df = pl$DataFrame(iris[c(1:3, 51:53), ])$groupby("Species")$agg(pl$all())
+#' listed_group_df = pl$DataFrame(iris[c(1:3, 51:53), ])$group_by("Species")$agg(pl$all())
 #' print(listed_group_df)
 #' vectors_df = listed_group_df$select(
 #'   pl$col(c("Sepal.Width", "Sepal.Length"))$explode()
@@ -4261,7 +4261,7 @@ Expr_shrink_dtype = "use_extendr_wrapper"
 #' df_with_list = pl$DataFrame(
 #'   group = c(1, 1, 2, 2, 3),
 #'   value = c(1:5)
-#' )$groupby(
+#' )$group_by(
 #'   "group",
 #'   maintain_order = TRUE
 #' )$agg(

diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R
@@ -1031,7 +1031,7 @@ LazyFrame$drop_nulls <- function(subset) .Call(wrap__LazyFrame__drop_nulls, self
 
 LazyFrame$unique <- function(subset, keep, maintain_order) .Call(wrap__LazyFrame__unique, self, subset, keep, maintain_order)
 
-LazyFrame$groupby <- function(exprs, maintain_order) .Call(wrap__LazyFrame__groupby, self, exprs, maintain_order)
+LazyFrame$group_by <- function(exprs, maintain_order) .Call(wrap__LazyFrame__group_by, self, exprs, maintain_order)
 
 LazyFrame$with_row_count <- function(name, offset) .Call(wrap__LazyFrame__with_row_count, self, name, offset)
 

diff --git a/R/functions__lazy.R b/R/functions__lazy.R
@@ -112,7 +112,7 @@ pl$element = function() pl$col("")
 #' df$select(pl$count())
 #'
 #'
-#' df$groupby("c", maintain_order = TRUE)$agg(pl$count())
+#' df$group_by("c", maintain_order = TRUE)$agg(pl$count())
 pl$count = function(column = NULL) { # -> Expr | int:
   if (is.null(column)) {
     return(.pr$Expr$new_count())

diff --git a/R/groupby.R → R/group_by.R b/R/groupby.R → R/group_by.R
@@ -42,7 +42,7 @@ GroupBy = new.env(parent = emptyenv())
 #' @keywords internal
 #' @return The input as grouped DataFrame
 #' @noRd
-construct_groupby = function(df, groupby_input, maintain_order) {
+construct_group_by = function(df, groupby_input, maintain_order) {
   if (!inherits(df, "DataFrame")) stopf("internal error: construct_group called not on DataFrame")
   df = df$clone()
   attr(df, "private") = list(groupby_input = groupby_input, maintain_order = maintain_order)
@@ -59,7 +59,7 @@ construct_groupby = function(df, groupby_input, maintain_order) {
 #' @return self
 #' @export
 #'
-#' @examples pl$DataFrame(iris)$groupby("Species")
+#' @examples pl$DataFrame(iris)$group_by("Species")
 print.GroupBy = function(x, ...) {
   .pr$DataFrame$print(x)
   cat("groups: ")
@@ -81,7 +81,7 @@ print.GroupBy = function(x, ...) {
 #'   foo = c("one", "two", "two", "one", "two"),
 #'   bar = c(5, 3, 2, 4, 1)
 #' )$
-#'   groupby("foo")$
+#'   group_by("foo")$
 #'   agg(
 #'   pl$col("bar")$sum()$suffix("_sum"),
 #'   pl$col("bar")$mean()$alias("bar_tail_sum")
@@ -108,7 +108,7 @@ GroupBy_agg = function(...) {
 #'   c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE),
 #'   d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana")
 #' )
-#' df$groupby("d", maintain_order = TRUE)$first()
+#' df$group_by("d", maintain_order = TRUE)$first()
 GroupBy_first = function() {
   self$agg(pl$all()$first())
 }
@@ -124,7 +124,7 @@ GroupBy_first = function() {
 #'   c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE),
 #'   d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana")
 #' )
-#' df$groupby("d", maintain_order = TRUE)$last()
+#' df$group_by("d", maintain_order = TRUE)$last()
 GroupBy_last = function() {
   self$agg(pl$all()$last())
 }
@@ -140,7 +140,7 @@ GroupBy_last = function() {
 #'   c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE),
 #'   d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana")
 #' )
-#' df$groupby("d", maintain_order = TRUE)$max()
+#' df$group_by("d", maintain_order = TRUE)$max()
 GroupBy_max = function() {
   self$agg(pl$all()$max())
 }
@@ -156,7 +156,7 @@ GroupBy_max = function() {
 #'   c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE),
 #'   d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana")
 #' )
-#' df$groupby("d", maintain_order = TRUE)$mean()
+#' df$group_by("d", maintain_order = TRUE)$mean()
 GroupBy_mean = function() {
   self$agg(pl$all()$mean())
 }
@@ -172,7 +172,7 @@ GroupBy_mean = function() {
 #'   c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE),
 #'   d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana")
 #' )
-#' df$groupby("d", maintain_order = TRUE)$median()
+#' df$group_by("d", maintain_order = TRUE)$median()
 GroupBy_median = function() {
   self$agg(pl$all()$median())
 }
@@ -188,7 +188,7 @@ GroupBy_median = function() {
 #'   c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE),
 #'   d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana")
 #' )
-#' df$groupby("d", maintain_order = TRUE)$min()
+#' df$group_by("d", maintain_order = TRUE)$min()
 GroupBy_min = function() {
   self$agg(pl$all()$min())
 }
@@ -204,7 +204,7 @@ GroupBy_min = function() {
 #'   c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE),
 #'   d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana")
 #' )
-#' df$groupby("d", maintain_order = TRUE)$sum()
+#' df$group_by("d", maintain_order = TRUE)$sum()
 GroupBy_sum = function() {
   self$agg(pl$all()$sum())
 }
@@ -220,7 +220,7 @@ GroupBy_sum = function() {
 #'   c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE),
 #'   d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana")
 #' )
-#' df$groupby("d", maintain_order = TRUE)$var()
+#' df$group_by("d", maintain_order = TRUE)$var()
 GroupBy_var = function() {
   self$agg(pl$all()$var())
 }
@@ -236,7 +236,7 @@ GroupBy_var = function() {
 #'   c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE),
 #'   d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana")
 #' )
-#' df$groupby("d", maintain_order = TRUE)$std()
+#' df$group_by("d", maintain_order = TRUE)$std()
 GroupBy_std = function() {
   self$agg(pl$all()$std())
 }
@@ -257,7 +257,7 @@ GroupBy_quantile = function(quantile, interpolation = "nearest") {
 #' @keywords GroupBy
 #' @param periods integer Number of periods to shift (may be negative).
 #' @return GroupBy
-#' @examples pl$DataFrame(mtcars)$groupby("cyl")$shift(2)
+#' @examples pl$DataFrame(mtcars)$group_by("cyl")$shift(2)
 GroupBy_shift = function(periods = 1) {
   self$agg(pl$all()$shift(periods))
 }
@@ -268,7 +268,7 @@ GroupBy_shift = function(periods = 1) {
 #' @param fill_value fill None values with the result of this expression.
 #' @param periods integer Number of periods to shift (may be negative).
 #' @return GroupBy
-#' @examples pl$DataFrame(mtcars)$groupby("cyl")$shift_and_fill(99, 1)
+#' @examples pl$DataFrame(mtcars)$group_by("cyl")$shift_and_fill(99, 1)
 GroupBy_shift_and_fill = function(fill_value, periods = 1) {
   self$agg(pl$all()$shift_and_fill(periods, fill_value))
 }
@@ -280,7 +280,7 @@ GroupBy_shift_and_fill = function(fill_value, periods = 1) {
 #' @examples
 #' x = mtcars
 #' x[1:10, 3:5] = NA
-#' pl$DataFrame(x)$groupby("cyl")$null_count()
+#' pl$DataFrame(x)$group_by("cyl")$null_count()
 GroupBy_null_count = function() {
   self$agg(pl$all()$null_count())
 }

diff --git a/R/lazyframe__groupby.R → R/lazyframe__group_by.R b/R/lazyframe__groupby.R → R/lazyframe__group_by.R
@@ -16,7 +16,7 @@ print.LazyGroupBy = function(x, ...) {
 
 #' @title LazyGroupBy_agg
 #' @description
-#' aggregate a polar_lazy_groupby
+#' aggregate a polar_lazy_group_by
 #' @param ... exprs to aggregate over.
 #' ... args can also be passed wrapped in a list `$agg(list(e1,e2,e3))`
 #' @return A new `LazyFrame` object.
@@ -26,7 +26,7 @@ print.LazyGroupBy = function(x, ...) {
 #'   bar = c(5, 3, 2, 4, 1)
 #' )$
 #'   lazy()$
-#'   groupby("foo")
+#'   group_by("foo")
 #'
 #' #
 #' print(lgb)

diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R
@@ -859,15 +859,15 @@ LazyFrame_unique = function(subset = NULL, keep = "first", maintain_order = FALS
 #'   foo = c("one", "two", "two", "one", "two"),
 #'   bar = c(5, 3, 2, 4, 1)
 #' )$
-#'   groupby("foo")$
+#'   group_by("foo")$
 #'   agg(
 #'   pl$col("bar")$sum()$suffix("_sum"),
 #'   pl$col("bar")$mean()$alias("bar_tail_sum")
 #' )$
 #'   collect()
-LazyFrame_groupby = function(..., maintain_order = pl$options$maintain_order) {
-  .pr$LazyFrame$groupby(self, unpack_list(...), maintain_order) |>
-    unwrap("in $groupby():")
+LazyFrame_group_by = function(..., maintain_order = pl$options$maintain_order) {
+  .pr$LazyFrame$group_by(self, unpack_list(...), maintain_order) |>
+    unwrap("in $group_by():")
 }
 
 #' Join LazyFrames
@@ -1294,7 +1294,7 @@ LazyFrame_fetch = function(
 #' # -1-  map each Species-group with native polars, takes ~120us only
 #' pl$LazyFrame(iris)$
 #'   sort("Sepal.Length")$
-#'   groupby("Species", maintain_order = TRUE)$
+#'   group_by("Species", maintain_order = TRUE)$
 #'   agg(pl$col(pl$Float64)$first() + 5)$
 #'   profile()
 #'
@@ -1308,7 +1308,7 @@ LazyFrame_fetch = function(
 #'
 #' pl$LazyFrame(iris)$
 #'   sort("Sepal.Length")$
-#'   groupby("Species", maintain_order = TRUE)$
+#'   group_by("Species", maintain_order = TRUE)$
 #'   agg(pl$col(pl$Float64)$apply(r_func))$
 #'   profile()
 LazyFrame_profile = function() {

diff --git a/R/options.R b/R/options.R
@@ -38,7 +38,7 @@ polars_optreq$rpool_cap = list() # rust-side options already check args
 #' general pro "immutable objects". Immutability is also classic in R. To mimic
 #' the Python-polars API, set this to `FALSE.`
 #' @param maintain_order Default for all `maintain_order` options (present in
-#' `$groupby()` or `$unique()` for example).
+#' `$group_by()` or `$unique()` for example).
 #' @param do_not_repeat_call Do not print the call causing the error in error
 #' messages. The default (`FALSE`) is to show them.
 #' @param debug_polars Print additional information to debug Polars.

diff --git a/README.Rmd b/README.Rmd
@@ -193,11 +193,11 @@ dat[1:4, c("mpg", "qsec", "hp")]
 
 However, the true power of Polars is unlocked by using *methods*, which are
 encapsulated in the `DataFrame` object itself. For example, we can chain the
-`$groupby()` and the `$mean()` methods to compute group-wise means for each
+`$group_by()` and the `$mean()` methods to compute group-wise means for each
 column of the dataset:
 
 ```{r}
-dat$groupby("cyl", maintain_order = TRUE)$mean()
+dat$group_by("cyl", maintain_order = TRUE)$mean()
 ```
 
 Note that we use `maintain_order = TRUE` so that `polars` always keeps the groups

diff --git a/README.md b/README.md
@@ -206,11 +206,11 @@ dat[1:4, c("mpg", "qsec", "hp")]
 
 However, the true power of Polars is unlocked by using *methods*, which
 are encapsulated in the `DataFrame` object itself. For example, we can
-chain the `$groupby()` and the `$mean()` methods to compute group-wise
+chain the `$group_by()` and the `$mean()` methods to compute group-wise
 means for each column of the dataset:
 
 ``` r
-dat$groupby("cyl", maintain_order = TRUE)$mean()
+dat$group_by("cyl", maintain_order = TRUE)$mean()
 #> shape: (3, 11)
 #> ┌─────┬───────────┬────────────┬────────────┬───┬──────────┬──────────┬──────────┬──────────┐
 #> │ cyl ┆ mpg       ┆ disp       ┆ hp         ┆ … ┆ vs       ┆ am       ┆ gear     ┆ carb     │

diff --git a/docs/docs/reference_home.Rmd b/docs/docs/reference_home.Rmd
@@ -76,7 +76,7 @@ they are returned in a list, and only the new columns or the grouping columns
 are returned.
 
 ```{r}
-test$groupby(pl$col("cyl"))$agg(
+test$group_by(pl$col("cyl"))$agg(
   pl$col("mpg"), # varying number of values
   pl$col("mpg")$slice(0, 2)$suffix("_sliced"), # two values
   # aggregated to one value and implicitly unpacks list