pola-rs · sorhawell · Apr 14, 2023 · Apr 12, 2023 · Apr 12, 2023 · Apr 12, 2023
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -25,6 +25,7 @@ Suggests:
     patrick,
     bit64,
     knitr,
+    tibble,
     rmarkdown
 Config/testthat/edition: 3
 Collate: 

diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R
@@ -864,3 +864,133 @@ DataFrame_unnest = function(names = NULL) {
 
 
 
+
+#' @title First
+#' @description Get the first row of the DataFrame.
+#' @keywords DataFrame
+#' @return A new `DataFrame` object with applied filter.
+#' @examples pl$DataFrame(mtcars)$first()
+DataFrame_first = function() {
+  self$lazy()$first()$collect()
+}
+
+#' @title Last
+#' @description Get the last row of the DataFrame.
+#' @keywords DataFrame
+#' @return A new `DataFrame` object with applied filter.
+#' @examples pl$DataFrame(mtcars)$last()
+DataFrame_last = function() {
+  self$lazy()$last()$collect()
+}
+
+#' @title Max
+#' @description Aggregate the columns in the DataFrame to their maximum value.
+#' @keywords DataFrame
+#' @return A new `DataFrame` object with applied aggregation.
+#' @examples pl$DataFrame(mtcars)$max()
+DataFrame_max = function() {
+  self$lazy()$max()$collect()
+}
+
+#' @title Mean
+#' @description Aggregate the columns in the DataFrame to their mean value.
+#' @keywords DataFrame
+#' @return A new `DataFrame` object with applied aggregation.
+#' @examples pl$DataFrame(mtcars)$mean()
+DataFrame_mean = function() {
+  self$lazy()$mean()$collect()
+}
+
+#' @title Median
+#' @description Aggregate the columns in the DataFrame to their median value.
+#' @keywords DataFrame
+#' @return A new `DataFrame` object with applied aggregation.
+#' @examples pl$DataFrame(mtcars)$median()
+DataFrame_median = function() {
+  self$lazy()$median()$collect()
+}
+
+#' @title Min
+#' @description Aggregate the columns in the DataFrame to their minimum value.
+#' @keywords DataFrame
+#' @return A new `DataFrame` object with applied aggregation.
+#' @examples pl$DataFrame(mtcars)$min()
+DataFrame_min = function() {
+  self$lazy()$min()$collect()
+}
+
+#' @title Sum
+#' @description Aggregate the columns of this DataFrame to their sum values.
+#' @keywords DataFrame
+#' @return A new `DataFrame` object with applied aggregation.
+#' @examples pl$DataFrame(mtcars)$sum()
+DataFrame_sum = function() {
+  self$lazy()$sum()$collect()
+}
+
+#' @title Var
+#' @description Aggregate the columns of this DataFrame to their variance values.
+#' @keywords DataFrame
+#' @param ddof integer Delta Degrees of Freedom: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.
+#' @return A new `DataFrame` object with applied aggregation.
+#' @examples pl$DataFrame(mtcars)$var()
+DataFrame_var = function(ddof = 1) {
+  self$lazy()$var(ddof)$collect()
+}
+
+#' @title Std
+#' @description Aggregate the columns of this DataFrame to their standard deviation values.
+#' @keywords DataFrame
+#' @param ddof integer Delta Degrees of Freedom: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.
+#' @return A new `DataFrame` object with applied aggregation.
+#' @examples pl$DataFrame(mtcars)$std()
+DataFrame_std = function(ddof = 1) {
+  self$lazy()$std(ddof)$collect()
+}
+
+#' @title Reverse
+#' @description Reverse the DataFrame.
+#' @keywords LazyFrame
+#' @return LazyFrame
+#' @examples pl$DataFrame(mtcars)$reverse()
+DataFrame_reverse = function() {
+  self$lazy()$reverse()$collect()
+}
+
+
+#' @title Slice
+#' @description Get a slice of this DataFrame.
+#' @keywords LazyFrame
+#' @return LazyFrame
+#' @param offset integer
+#' @param length integer or NULL
+#' @examples
+#' pl$DataFrame(mtcars)$slice(2, 4)
+#' mtcars[2:6,]
+DataFrame_slice = function(offset, length = NULL) {
+  self$lazy()$slice(offset, length)$collect()
+}
+
+
+#' @title Null count
+#' @description Create a new DataFrame that shows the null counts per column.
+#' @keywords DataFrame
+#' @return DataFrame
+#' @docType NULL
+#' @format function
+#' @examples
+#' x = mtcars
+#' x[1, 2:3] = NA
+#' pl$DataFrame(x)$null_count()
+DataFrame_null_count = "use_extendr_wrapper"
+
+
+#' @title Estimated size
+#' @description Return an estimation of the total (heap) allocated size of the DataFrame.
+#' @keywords DataFrame
+#' @return Bytes
+#' @docType NULL
+#' @format function
+#' @examples
+#' pl$DataFrame(mtcars)$estimated_size()
+DataFrame_estimated_size = "use_extendr_wrapper"
diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R
@@ -1,4 +1,7 @@
 # Generated by extendr: Do not edit by hand
+
+# nolint start
+
 #
 # This file was created with the following call:
 #   .Call("wrap__make_rpolars_wrappers", use_symbols = TRUE, package_name = "rpolars")
@@ -102,6 +105,10 @@ DataFrame$export_stream <- function(stream_ptr) invisible(.Call(wrap__DataFrame_
 
 DataFrame$from_arrow_record_batches <- function(rbr) .Call(wrap__DataFrame__from_arrow_record_batches, rbr)
 
+DataFrame$estimated_size <- function() .Call(wrap__DataFrame__estimated_size, self)
+
+DataFrame$null_count <- function() .Call(wrap__DataFrame__null_count, self)
+
 #' @export
 `$.DataFrame` <- function (self, name) { func <- DataFrame[[name]]; environment(func) <- environment(); func }
 
@@ -808,6 +815,28 @@ LazyFrame$collect_background <- function() .Call(wrap__LazyFrame__collect_backgr
 
 LazyFrame$collect <- function() .Call(wrap__LazyFrame__collect, self)
 
+LazyFrame$first <- function() .Call(wrap__LazyFrame__first, self)
+
+LazyFrame$last <- function() .Call(wrap__LazyFrame__last, self)
+
+LazyFrame$max <- function() .Call(wrap__LazyFrame__max, self)
+
+LazyFrame$min <- function() .Call(wrap__LazyFrame__min, self)
+
+LazyFrame$mean <- function() .Call(wrap__LazyFrame__mean, self)
+
+LazyFrame$median <- function() .Call(wrap__LazyFrame__median, self)
+
+LazyFrame$sum <- function() .Call(wrap__LazyFrame__sum, self)
+
+LazyFrame$var <- function(ddof) .Call(wrap__LazyFrame__var, self, ddof)
+
+LazyFrame$std <- function(ddof) .Call(wrap__LazyFrame__std, self, ddof)
+
+LazyFrame$reverse <- function() .Call(wrap__LazyFrame__reverse, self)
+
+LazyFrame$slice <- function(offset, length) .Call(wrap__LazyFrame__slice, self, offset, length)
+
 LazyFrame$select <- function(exprs) .Call(wrap__LazyFrame__select, self, exprs)
 
 LazyFrame$limit <- function(n) .Call(wrap__LazyFrame__limit, self, n)
@@ -952,3 +981,5 @@ PolarsBackgroundHandle$is_exhausted <- function() .Call(wrap__PolarsBackgroundHa
 #' @export
 `[[.PolarsBackgroundHandle` <- `$.PolarsBackgroundHandle`
 
+
+# nolint end
diff --git a/R/groupby.R b/R/groupby.R
@@ -75,6 +75,162 @@ GroupBy_agg = function(...) {
 }
 
 
+#' GroupBy First
+#' @description Reduce the groups to the first value.
+#' @return aggregated DataFrame
+#' @keywords GroupBy
+#' @examples
+#' df = pl$DataFrame(
+#'         a = c(1, 2, 2, 3, 4, 5),
+#'         b = c(0.5, 0.5, 4, 10, 13, 14),
+#'         c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE),
+#'         d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana")
+#' )
+#' df$groupby("d", maintain_order=TRUE)$first()
+GroupBy_first = function() {
+  self$agg(pl$all()$first())
+}
+
+#' GroupBy Last
+#' @description Reduce the groups to the last value.
+#' @return aggregated DataFrame
+#' @keywords GroupBy
+#' @examples
+#' df = pl$DataFrame(
+#'         a = c(1, 2, 2, 3, 4, 5),
+#'         b = c(0.5, 0.5, 4, 10, 13, 14),
+#'         c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE),
+#'         d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana")
+#' )
+#' df$groupby("d", maintain_order=TRUE)$last()
+GroupBy_last = function() {
+  self$agg(pl$all()$last())
+}
+
+#' GroupBy Max
+#' @description Reduce the groups to the maximum value.
+#' @return aggregated DataFrame
+#' @keywords GroupBy
+#' @examples
+#' df = pl$DataFrame(
+#'         a = c(1, 2, 2, 3, 4, 5),
+#'         b = c(0.5, 0.5, 4, 10, 13, 14),
+#'         c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE),
+#'         d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana")
+#' )
+#' df$groupby("d", maintain_order=TRUE)$max()
+GroupBy_max = function() {
+  self$agg(pl$all()$max())
+}
+
+#' GroupBy Mean
+#' @description Reduce the groups to the mean value.
+#' @return aggregated DataFrame
+#' @keywords GroupBy
+#' @examples
+#' df = pl$DataFrame(
+#'         a = c(1, 2, 2, 3, 4, 5),
+#'         b = c(0.5, 0.5, 4, 10, 13, 14),
+#'         c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE),
+#'         d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana")
+#' )
+#' df$groupby("d", maintain_order=TRUE)$mean()
+GroupBy_mean = function() {
+  self$agg(pl$all()$mean())
+}
+
+#' GroupBy Median
+#' @description Reduce the groups to the median value.
+#' @return aggregated DataFrame
+#' @keywords GroupBy
+#' @examples
+#' df = pl$DataFrame(
+#'         a = c(1, 2, 2, 3, 4, 5),
+#'         b = c(0.5, 0.5, 4, 10, 13, 14),
+#'         c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE),
+#'         d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana")
+#' )
+#' df$groupby("d", maintain_order=TRUE)$median()
+GroupBy_median = function() {
+  self$agg(pl$all()$median())
+}
+
+#' GroupBy Min
+#' @description Reduce the groups to the minimum value.
+#' @return aggregated DataFrame
+#' @keywords GroupBy
+#' @examples
+#' df = pl$DataFrame(
+#'         a = c(1, 2, 2, 3, 4, 5),
+#'         b = c(0.5, 0.5, 4, 10, 13, 14),
+#'         c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE),
+#'         d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana")
+#' )
+#' df$groupby("d", maintain_order=TRUE)$min()
+GroupBy_min = function() {
+  self$agg(pl$all()$min())
+}
+
+#' GroupBy Sum
+#' @description Reduce the groups to the sum value.
+#' @return aggregated DataFrame
+#' @keywords GroupBy
+#' @examples
+#' df = pl$DataFrame(
+#'         a = c(1, 2, 2, 3, 4, 5),
+#'         b = c(0.5, 0.5, 4, 10, 13, 14),
+#'         c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE),
+#'         d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana")
+#' )
+#' df$groupby("d", maintain_order=TRUE)$sum()
+GroupBy_sum = function() {
+  self$agg(pl$all()$sum())
+}
+
+#' GroupBy Var
+#' @description Reduce the groups to the variance value.
+#' @return aggregated DataFrame
+#' @keywords GroupBy
+#' @examples
+#' df = pl$DataFrame(
+#'         a = c(1, 2, 2, 3, 4, 5),
+#'         b = c(0.5, 0.5, 4, 10, 13, 14),
+#'         c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE),
+#'         d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana")
+#' )
+#' df$groupby("d", maintain_order=TRUE)$var()
+GroupBy_var = function() {
+  self$agg(pl$all()$var())
+}
+
+#' GroupBy Std
+#' @description Reduce the groups to the standard deviation value.
+#' @return aggregated DataFrame
+#' @keywords GroupBy
+#' @examples
+#' df = pl$DataFrame(
+#'         a = c(1, 2, 2, 3, 4, 5),
+#'         b = c(0.5, 0.5, 4, 10, 13, 14),
+#'         c = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE),
+#'         d = c("Apple", "Orange", "Apple", "Apple", "Banana", "Banana")
+#' )
+#' df$groupby("d", maintain_order=TRUE)$std()
+GroupBy_std = function() {
+  self$agg(pl$all()$std())
+}
+
+#' @title GroupBy null count
+#' @description Create a new DataFrame that shows the null counts per column.
+#' @keywords DataFrame
+#' @return DataFrame
+#' @examples 
+#' x = mtcars
+#' x[1:10, 3:5] = NA
+#' pl$DataFrame(x)$groupby("cyl")$null_count()
+GroupBy_null_count <- function() {
+  self$agg(pl$all()$null_count())
+}
+
 #' convert to data.frame
 #'
 #' @param ... any opt param passed to R as.data.frame