diff --git a/R/Field.R b/R/Field.R index f9412a341..099e445d9 100644 --- a/R/Field.R +++ b/R/Field.R @@ -40,7 +40,7 @@ print.RField = function(x, ...) { #' #' Called by the interactive R session internally #' -#' @param x Name of a `"RField"` object +#' @param x Name of a `RField` object #' @param pattern String used to auto-complete #' #' @export diff --git a/R/expr__expr.R b/R/expr__expr.R index 4ce493ea7..f4c592a39 100644 --- a/R/expr__expr.R +++ b/R/expr__expr.R @@ -1,10 +1,8 @@ -#' @title Polars Expressions +#' Polars Expressions #' -#' @name Expr_class -#' @return not applicable -#' @description Expressions are all the functions and methods that are applicable -#' to a Polars DataFrame. They can be split into the following categories (following -#' the [Py-Polars classification](https://pola-rs.github.io/polars/py-polars/html/reference/expressions/)): +#' Expressions are all the functions and methods that are applicable to a Polars +#' DataFrame or LazyFrame. They can be split into the following categories +#' (following the [Py-Polars classification](https://pola-rs.github.io/polars/py-polars/html/reference/expressions/)): #' * Aggregate #' * Binary #' * Categorical @@ -17,60 +15,57 @@ #' * Struct #' * Temporal #' +#' @name Expr_class +#' @rdname Expr_class +#' +#' @return not applicable NULL -#' Print expr +#' S3 method to print an Expr #' #' @param x Expr -#' @param ... not used -#' @keywords Expr +#' @param ... Not used. #' -#' @return self +#' @return No value returned, it prints in the console. #' @export -#' @keywords internal -#' @noRd +#' @rdname Expr_print #' #' @examples -#' pl$col("some_column")$sum()$over("some_other_column") +#' print(pl$col("some_column")$sum()) print.Expr = function(x, ...) { cat("polars Expr: ") x$print() invisible(x) } -#' internal method print Expr -#' @name Expr_print -#' @keywords Expr -#' @examples -#' pl$col("some_column")$sum()$over("some_other_column")$print() -#' @return invisible self -#' @examples pl$DataFrame(iris) +#' @rdname Expr_print Expr_print = function() { .pr$Expr$print(self) invisible(self) } -#' @title auto complete $-access into a polars object -#' @description called by the interactive R session internally -#' @param x Expr -#' @param pattern code-stump as string to auto-complete +#' Auto complete $-access into a polars object +#' +#' Called by the interactive R session internally +#' +#' @param x Name of an `Expr` object +#' @param pattern String used to auto-complete #' @inherit .DollarNames.DataFrame return #' @export -#' @noRd #' @keywords internal .DollarNames.Expr = function(x, pattern = "") { paste0(ls(Expr, pattern = pattern), "()") } -#' @title as.list Expr -#' @description wraps an Expr in a list +#' S3 method to convert an Expr to a list +#' #' @param x Expr -#' @param ... not used +#' @param ... Not used. +#' #' @return One Expr wrapped in a list -#' @noRd #' @export -#' @keywords Expr +#' @keywords internal as.list.Expr = function(x, ...) { list(x) } @@ -156,13 +151,14 @@ wrap_elist_result = function(elist, str_to_lit = TRUE) { } -#' Add -#' @description Addition -#' @keywords Expr Expr_operators -#' @param other literal or Robj which can become a literal -#' @return Exprs +#' Add two expressions +#' +#' The RHS can either be an Expr or an object that can be converted to a literal +#' (e.g an integer). +#' +#' @param other Literal or object that can be converted to a literal +#' @return Expr #' @examples -#' # three syntaxes same result #' pl$lit(5) + 10 #' pl$lit(5) + pl$lit(10) #' pl$lit(5)$add(pl$lit(10)) @@ -170,10 +166,11 @@ wrap_elist_result = function(elist, str_to_lit = TRUE) { Expr_add = function(other) { .pr$Expr$add(self, other) |> unwrap("in $add()") } + #' @export #' @rdname Expr_add -#' @param e1 lhs Expr -#' @param e2 rhs Expr or anything which can become a literal Expression +#' @param e1 Expr only +#' @param e2 Expr or anything that can be converted to a literal "+.Expr" = function(e1, e2) { if (missing(e2)) { return(e1) @@ -181,13 +178,11 @@ Expr_add = function(other) { result(wrap_e(e1)$add(e2)) |> unwrap("using the '+'-operator") } -#' Div -#' @description Divide -#' @keywords Expr Expr_operators -#' @param other literal or Robj which can become a literal -#' @return Exprs +#' Divide two expressions +#' +#' @inherit Expr_add description params return +#' #' @examples -#' # three syntaxes same result #' pl$lit(5) / 10 #' pl$lit(5) / pl$lit(10) #' pl$lit(5)$div(pl$lit(10)) @@ -196,17 +191,14 @@ Expr_div = function(other) { } #' @export #' @rdname Expr_div -#' @param e1 lhs Expr -#' @param e2 rhs Expr or anything which can become a literal Expression +#' @inheritParams Expr_add "/.Expr" = function(e1, e2) result(wrap_e(e1)$div(e2)) |> unwrap("using the '/'-operator") -#' Sub -#' @description Substract -#' @keywords Expr Expr_operators -#' @param other literal or Robj which can become a literal -#' @return Exprs +#' Substract two expressions +#' +#' @inherit Expr_add description params return +#' #' @examples -#' # three syntaxes same result #' pl$lit(5) - 10 #' pl$lit(5) - pl$lit(10) #' pl$lit(5)$sub(pl$lit(10)) @@ -216,21 +208,18 @@ Expr_sub = function(other) { } #' @export #' @rdname Expr_sub -#' @param e1 lhs Expr -#' @param e2 rhs Expr or anything which can become a literal Expression +#' @inheritParams Expr_add "-.Expr" = function(e1, e2) { result( if (missing(e2)) wrap_e(0L)$sub(e1) else wrap_e(e1)$sub(e2) ) |> unwrap("using the '-'-operator") } -#' Mul * -#' @description Multiplication -#' @keywords Expr Expr_operators -#' @param other literal or Robj which can become a literal -#' @return Exprs +#' Multiply two expressions +#' +#' @inherit Expr_add description params return +#' #' @examples -#' # three syntaxes same result #' pl$lit(5) * 10 #' pl$lit(5) * pl$lit(10) #' pl$lit(5)$mul(pl$lit(10)) @@ -240,17 +229,13 @@ Expr_mul = Expr_mul = function(other) { #' @export #' @rdname Expr_mul -#' @param e1 lhs Expr -#' @param e2 rhs Expr or anything which can become a literal Expression +#' @inheritParams Expr_add "*.Expr" = function(e1, e2) result(wrap_e(e1)$mul(e2)) |> unwrap("using the '*'-operator") -#' Not ! -#' @description not method and operator -#' @keywords Expr Expr_operators -#' @param other literal or Robj which can become a literal -#' @return Exprs -#' @usage Expr_not_(other) +#' Negate a boolean expression +#' +#' @inherit Expr_add description return #' @docType NULL #' @format NULL #' @examples @@ -268,13 +253,11 @@ Expr_is_not = function() { .pr$Expr$not_(self) } -#' Less Than < -#' @description lt method and operator -#' @keywords Expr Expr_operators -#' @param other literal or Robj which can become a literal -#' @return Exprs +#' Check strictly lower inequality +#' +#' @inherit Expr_add description params return +#' #' @examples -#' #' #three syntaxes same result #' pl$lit(5) < 10 #' pl$lit(5) < pl$lit(10) #' pl$lit(5)$lt(pl$lit(10)) @@ -282,20 +265,15 @@ Expr_lt = function(other) { .pr$Expr$lt(self, other) |> unwrap("in $lt()") } #' @export -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -#' @param e1 lhs Expr -#' @param e2 rhs Expr or anything which can become a literal Expression +#' @inheritParams Expr_add #' @rdname Expr_lt "<.Expr" = function(e1, e2) result(wrap_e(e1)$lt(e2)) |> unwrap("using the '<'-operator") -#' GreaterThan < -#' @description gt method and operator -#' @keywords Expr Expr_operators -#' @param other literal or Robj which can become a literal -#' @return Exprs +#' Check strictly greater inequality +#' +#' @inherit Expr_add description params return +#' #' @examples -#' #' #three syntaxes same result #' pl$lit(2) > 1 #' pl$lit(2) > pl$lit(1) #' pl$lit(2)$gt(pl$lit(1)) @@ -303,20 +281,15 @@ Expr_gt = function(other) { .pr$Expr$gt(self, other) |> unwrap("in $gt()") } #' @export -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -#' @param e1 lhs Expr -#' @param e2 rhs Expr or anything which can become a literal Expression +#' @inheritParams Expr_add #' @rdname Expr_gt ">.Expr" = function(e1, e2) result(wrap_e(e1)$gt(e2)) |> unwrap("using the '>'-operator") -#' Equal == -#' @description eq method and operator -#' @keywords Expr Expr_operators -#' @param other literal or Robj which can become a literal -#' @return Exprs +#' Check equality +#' +#' @inherit Expr_add description params return +#' #' @examples -#' #' #three syntaxes same result #' pl$lit(2) == 2 #' pl$lit(2) == pl$lit(2) #' pl$lit(2)$eq(pl$lit(2)) @@ -324,21 +297,16 @@ Expr_eq = function(other) { .pr$Expr$eq(self, other) |> unwrap("in $eq()") } #' @export -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -#' @param e1 lhs Expr -#' @param e2 rhs Expr or anything which can become a literal Expression +#' @inheritParams Expr_add #' @rdname Expr_eq "==.Expr" = function(e1, e2) result(wrap_e(e1)$eq(e2)) |> unwrap("using the '=='-operator") -#' Not Equal != -#' @description neq method and operator -#' @keywords Expr Expr_operators -#' @param other literal or Robj which can become a literal -#' @return Exprs +#' Check inequality +#' +#' @inherit Expr_add description params return +#' #' @examples -#' #' #three syntaxes same result #' pl$lit(1) != 2 #' pl$lit(1) != pl$lit(2) #' pl$lit(1)$neq(pl$lit(2)) @@ -346,20 +314,15 @@ Expr_neq = function(other) { .pr$Expr$neq(self, other) |> unwrap("in $neq()") } #' @export -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -#' @param e1 lhs Expr -#' @param e2 rhs Expr or anything which can become a literal Expression +#' @inheritParams Expr_add #' @rdname Expr_neq "!=.Expr" = function(e1, e2) result(wrap_e(e1)$neq(e2)) |> unwrap("using the '!='-operator") -#' Less Than Or Equal <= -#' @description lt_eq method and operator -#' @keywords Expr Expr_operators -#' @param other literal or Robj which can become a literal -#' @return Exprs +#' Check lower or equal inequality +#' +#' @inherit Expr_add description params return +#' #' @examples -#' #' #three syntaxes same result #' pl$lit(2) <= 2 #' pl$lit(2) <= pl$lit(2) #' pl$lit(2)$lt_eq(pl$lit(2)) @@ -367,21 +330,16 @@ Expr_lt_eq = function(other) { .pr$Expr$lt_eq(self, other) |> unwrap("in $lt_eq()") } #' @export -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -#' @param e1 lhs Expr -#' @param e2 rhs Expr or anything which can become a literal Expression +#' @inheritParams Expr_add #' @rdname Expr_lt_eq "<=.Expr" = function(e1, e2) result(wrap_e(e1)$lt_eq(e2)) |> unwrap("using the '<='-operator") -#' Greater Than Or Equal <= -#' @description gt_eq method and operator -#' @keywords Expr Expr_operators -#' @param other literal or Robj which can become a literal -#' @return Exprs +#' Check greater or equal inequality +#' +#' @inherit Expr_add description params return +#' #' @examples -#' #' #three syntaxes same result #' pl$lit(2) >= 2 #' pl$lit(2) >= pl$lit(2) #' pl$lit(2)$gt_eq(pl$lit(2)) @@ -389,21 +347,17 @@ Expr_gt_eq = function(other) { .pr$Expr$gt_eq(self, other) |> unwrap("in $gt_eq()") } #' @export -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -#' @param e1 lhs Expr -#' @param e2 rhs Expr or anything which can become a literal Expression +#' @inheritParams Expr_add #' @rdname Expr_gt_eq ">=.Expr" = function(e1, e2) result(wrap_e(e1)$gt_eq(e2)) |> unwrap("using the '>='-operator") -#' aggregate groups -#' @keywords Expr -#' @description -#' Get the group indexes of the group by operation. -#' Should be used in aggregation context only. -#' @return Exprs +#' Aggregate groups +#' +#' Get the group indexes of the group by operation. Should be used in aggregation +#' context only. +#' @return Expr #' @docType NULL #' @format NULL #' @examples @@ -416,10 +370,10 @@ Expr_agg_groups = "use_extendr_wrapper" #' Rename Expr output -#' @keywords Expr -#' @description +#' #' Rename the output of an expression. -#' @param name string new name of output +#' +#' @param name New name of output #' @return Expr #' @docType NULL #' @format NULL @@ -427,25 +381,23 @@ Expr_agg_groups = "use_extendr_wrapper" #' @examples pl$col("bob")$alias("alice") Expr_alias = "use_extendr_wrapper" -#' All, is true -#' @keywords Expr -#' @description -#' Check if all boolean values in a Boolean column are `TRUE`. -#' This method is an expression - not to be confused with -#' `pl$all` which is a function to select all columns. -#' @aliases Expr_all +#' Apply logical AND on a column +#' +#' Check if all boolean values in a Boolean column are `TRUE`. This method is an +#' expression - not to be confused with `pl$all()` which is a function to select +#' all columns. #' @param drop_nulls Boolean. Default TRUE, as name says. #' @return Boolean literal #' @docType NULL #' @format NULL -#' @details last `all()` in example is this Expr method, the first `pl$all()` refers -#' to "all-columns" and is an expression constructor #' @examples #' pl$DataFrame( #' all = c(TRUE, TRUE), #' any = c(TRUE, FALSE), #' none = c(FALSE, FALSE) #' )$select( +#' # the first $all() selects all columns, the second one applies the AND +#' # logical on the values #' pl$all()$all() #' ) Expr_all = function(drop_nulls = TRUE) { @@ -453,9 +405,8 @@ Expr_all = function(drop_nulls = TRUE) { unwrap("in $all()") } -#' Any (is true) -#' @keywords Expr -#' @description +#' Apply logical OR on a column +#' #' Check if any boolean value in a Boolean column is `TRUE`. #' @param drop_nulls Boolean. Default TRUE, as name says. #' @return Boolean literal @@ -474,14 +425,11 @@ Expr_any = function(drop_nulls = TRUE) { unwrap("in $all()") } - - -#' Count values (len is a alias) -#' @keywords Expr -#' @name Expr_count -#' @description -#' Count the number of values in this expression. -#' Similar to R length() +#' Count elements +#' +#' Count the number of elements in this expression. Note that `NULL` values are +#' also counted. `$len()` is an alias. +#' @rdname Expr_count #' @return Expr #' @docType NULL #' @format NULL @@ -495,51 +443,28 @@ Expr_any = function(drop_nulls = TRUE) { #' ) Expr_count = "use_extendr_wrapper" -#' Count values (len is a alias) -#' @keywords Expr #' @rdname Expr_count -#' @return Expr -#' @docType NULL -#' @format NULL -#' @examples -#' pl$DataFrame( -#' all = c(TRUE, TRUE), -#' any = c(TRUE, FALSE), -#' none = c(FALSE, FALSE) -#' )$select( -#' pl$all()$len(), -#' pl$col("all")$first()$len()$alias("all_first") -#' ) Expr_len = "use_extendr_wrapper" - - -#' Drop null(s) -#' @keywords Expr -#' @description -#' Drop null values. -#' Similar to R syntax `x[!(is.na(x) & !is.nan(x))]` +#' Drop missing values +#' +#' @seealso +#' `drop_nans()` #' @return Expr #' @docType NULL #' @format NULL -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} #' @examples #' pl$DataFrame(list(x = c(1, 2, NaN, NA)))$select(pl$col("x")$drop_nulls()) Expr_drop_nulls = "use_extendr_wrapper" -#' Drop NaN(s) -#' @keywords Expr -#' @description -#' Drop floating point NaN values. -#' Similar to R syntax `x[!is.nan(x)]` -#' @details -#' -#' Note that NaN values are not null values! (null corresponds to R NA, not R NULL) -#' To drop null values, use method `drop_nulls`. +#' Drop NaN #' +#' @details +#' Note that `NaN` values are not `null` values. Null values correspond to NA +#' in R. #' -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +#' @seealso +#' `drop_nulls()` #' #' @return Expr #' @docType NULL @@ -548,45 +473,28 @@ Expr_drop_nulls = "use_extendr_wrapper" #' pl$DataFrame(list(x = c(1, 2, NaN, NA)))$select(pl$col("x")$drop_nans()) Expr_drop_nans = "use_extendr_wrapper" - - - - -#' is_null -#' @keywords Expr -#' @description +#' Check if elements are NULL +#' #' Returns a boolean Series indicating which values are null. -#' Similar to R syntax is.na(x) -#' null polars about the same as R NA #' @return Expr #' @docType NULL #' @format NULL -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} #' @examples #' pl$DataFrame(list(x = c(1, NA, 3)))$select(pl$col("x")$is_null()) Expr_is_null = "use_extendr_wrapper" -#' is_not_null -#' @keywords Expr -#' @description -#' Returns a boolean Series indicating which values are not null. -#' Similar to R syntax !is.na(x) -#' null polars about the same as R NA +#' Check if elements are not NULL +#' +#' Returns a boolean Series indicating which values are not null. Syntactic sugar +#' for `$is_null()$not_()`. #' @return Expr #' @docType NULL #' @format NULL -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} #' @examples #' pl$DataFrame(list(x = c(1, NA, 3)))$select(pl$col("x")$is_not_null()) Expr_is_not_null = "use_extendr_wrapper" - - - - # TODO move this function in to rust with input list of args # TODO deprecate context feature #' construct proto Expr array from args @@ -650,28 +558,24 @@ construct_ProtoExprArray = function(...) { } - - - ## TODO allow list to be formed from recursive R lists ## TODO Contribute polars, seems polars now prefer word f or function in map/apply/rolling/apply # over lambda. However lambda is still in examples. ## TODO Better explain aggregate list -#' Map an expression with an R function. -#' @keywords Expr +#' Map an expression with an R function #' #' @param f a function to map with #' @param output_type `NULL` or a type available in `names(pl$dtypes)`. If `NULL` -#' (default), the output datatype will match is the input datatype. This is used +#' (default), the output datatype will match the input datatype. This is used #' to inform schema of the actual return type of the R function. Setting this wrong #' could theoretically have some downstream implications to the query. -#' @param agg_list Aggregate list. Map from vector to group in groupby context. +#' @param agg_list Aggregate list. Map from vector to group in group_by context. #' @param in_background Boolean. Whether to execute the map in a background R #' process. Combined with setting e.g. `pl$set_options(rpool_cap = 4)` it can speed #' up some slow R functions as they can run in parallel R sessions. The #' communication speed between processes is quite slower than between threads. -#' This will likely only give a speed-up in a "low IO - high CPU" usecase. +#' This will likely only give a speed-up in a "low IO - high CPU" use case. #' If there are multiple `$map(in_background = TRUE)` calls in the query, they #' will be run in parallel. #' @@ -741,7 +645,6 @@ Expr_map = function(f, output_type = NULL, agg_list = FALSE, in_background = FAL } #' Expr_apply -#' @keywords Expr #' #' @description #' Apply a custom/user-defined function (UDF) in a GroupBy or Projection context. @@ -852,7 +755,7 @@ Expr_map = function(f, output_type = NULL, agg_list = FALSE, in_background = FAL #' r_vec * 2L #' }) #' -#' #' #R parallel process example, use Sys.sleep() to imitate some CPU expensive computation. +#' # R parallel process example, use Sys.sleep() to imitate some CPU expensive computation. #' #' # use apply over each Species-group in each column equal to 12 sequential runs ~1.2 sec. #' pl$LazyFrame(iris)$group_by("Species")$agg( @@ -898,13 +801,12 @@ Expr_apply = function(f, return_type = NULL, strict_return_type = TRUE, allow_fa } -#' Return an expression representing a literal value +#' Create a literal value #' -#' @param x An R Scalar, or R vector/list (via Series) +#' @param x A vector of any length #' #' @return Expr #' -#' @aliases lit #' @details #' `pl$lit(NULL)` translates into a polars `null`. #' @@ -928,15 +830,12 @@ Expr_apply = function(f, return_type = NULL, strict_return_type = TRUE, allow_fa #' # vectors to literal implicitly #' (pl$lit(2) + 1:4) / 4:1 Expr_lit = function(x) { - # use .call reduces eval from 22us to 15us, not a bottle-next anyways .Call(wrap__Expr__lit, x) |> unwrap("in $lit()") } -#' polars reverse -#' @keywords Expr +#' Reverse a variable #' @return Expr -#' @aliases reverse #' @name Expr_reverse #' @examples #' pl$DataFrame(list(a = 1:5))$select(pl$col("a")$reverse()) @@ -946,15 +845,12 @@ Expr_reverse = function() { -#' And -#' @name Expr_and -#' @description combine to boolean expressions with AND -#' @keywords Expr Expr_operators -#' @param other literal or Robj which can become a literal -#' @return Expr +#' Apply logical AND on two expressions +#' +#' Combine two boolean expressions with AND. +#' @inherit Expr_add params return #' @docType NULL #' @format NULL -#' @usage Expr_and(other) #' @examples #' pl$lit(TRUE) & TRUE #' pl$lit(TRUE)$and(pl$lit(TRUE)) @@ -965,16 +861,13 @@ Expr_and = function(other) { "&.Expr" = function(e1, e2) result(wrap_e(e1)$and(e2)) |> unwrap("using the '&'-operator") -#' Or -#' @name Expr_or -#' @description combine to boolean expressions with OR -#' @keywords Expr Expr_operators -#' @param other literal or Robj which can become a literal -#' @return Expr +#' Apply logical OR on two expressions +#' +#' Combine two boolean expressions with OR. +#' +#' @inherit Expr_add params return #' @docType NULL #' @format NULL -#' @param other Expr or into Expr -#' @usage Expr_or(other) #' @examples #' pl$lit(TRUE) | FALSE #' pl$lit(TRUE)$or(pl$lit(TRUE)) @@ -985,26 +878,28 @@ Expr_or = function(other) { "|.Expr" = function(e1, e2) result(wrap_e(e1)$or(e2)) |> unwrap("using the '|'-operator") -#' Xor -#' @name Expr_xor -#' @description combine to boolean expressions with XOR -#' @keywords Expr Expr_operators -#' @param other literal or Robj which can become a literal -#' @return Expr +#' Apply logical XOR on two expressions +#' +#' Combine two boolean expressions with XOR. +#' @inherit Expr_add params return #' @docType NULL #' @format NULL -#' @usage Expr_xor(other) #' @examples #' pl$lit(TRUE)$xor(pl$lit(FALSE)) Expr_xor = function(other) { .pr$Expr$xor(self, other) |> unwrap("in $xor()") } - - -#' To physical representation -#' @description expression request underlying physical base representation -#' @keywords Expr +#' Cast an Expr to its physical representation +#' +#' The following DataTypes will be converted: +#' * Date -> Int32 +#' * Datetime -> Int64 +#' * Time -> Int64 +#' * Duration -> Int64 +#' * Categorical -> UInt32 +#' * List(inner) -> List(physical of inner) +#' Other data types will be left unchanged. #' @return Expr #' @docType NULL #' @format NULL @@ -1012,7 +907,7 @@ Expr_xor = function(other) { #' @name Expr_to_physical #' @examples #' pl$DataFrame( -#' list(vals = c("a", "x", NA, "a")) +#' list(vals = c("a", "x", NA, "a", "b")) #' )$with_columns( #' pl$col("vals")$cast(pl$Categorical), #' pl$col("vals") @@ -1023,17 +918,15 @@ Expr_xor = function(other) { Expr_to_physical = "use_extendr_wrapper" -#' Cast between DataType(s) -#' @keywords Expr +#' Cast between DataType +#' #' @param dtype DataType to cast to. -#' @param strict bool if true an error will be thrown if cast failed at resolve time. +#' @param strict If `TRUE` (default), an error will be thrown if cast failed at +#' resolve time. #' @return Expr -#' @aliases cast -#' @name Expr_cast -#' @aliases cast #' @examples #' df = pl$DataFrame(a = 1:3, b = c(1, 2, 3)) -#' df$print()$with_columns( +#' df$with_columns( #' pl$col("a")$cast(pl$dtypes$Float64), #' pl$col("b")$cast(pl$dtypes$Int32) #' ) @@ -1041,62 +934,43 @@ Expr_to_physical = "use_extendr_wrapper" #' # strict FALSE, inserts null for any cast failure #' pl$lit(c(100, 200, 300))$cast(pl$dtypes$UInt8, strict = FALSE)$lit_to_s() #' -#' #' # strict TRUE, raise any failure as an error when query is executed. #' tryCatch( #' { #' pl$lit("a")$cast(pl$dtypes$Float64, strict = TRUE)$lit_to_s() #' }, -#' error = as.character +#' error = function(e) e #' ) Expr_cast = function(dtype, strict = TRUE) { .pr$Expr$cast(self, dtype, strict) } - - -#' Square root -#' @description Compute the square root of the elements. -#' @keywords Expr +#' Compute the square root of the elements +#' #' @return Expr -#' @aliases sqrt -#' @name Expr_sqrt #' @examples -#' pl$DataFrame(list(a = -1:3))$select(pl$col("a")$sqrt()) +#' pl$DataFrame(a = -1:3)$with_columns(a_sqrt = pl$col("a")$sqrt()) Expr_sqrt = function() { self$pow(0.5) } - - - - -#' Compute the exponential, element-wise. -#' @keywords Expr +#' Compute the exponential of the elements #' @return Expr #' @docType NULL #' @format NULL -#' @aliases exp -#' @name Expr_exp -#' @format NULL #' @examples -#' log10123 = suppressWarnings(log(-1:3)) -#' all.equal( -#' pl$DataFrame(list(a = log10123))$select(pl$col("a")$exp())$to_data_frame()$a, -#' exp(1)^log10123 -#' ) +#' pl$DataFrame(a = -1:3)$with_columns(a_exp = pl$col("a")$exp()) Expr_exp = "use_extendr_wrapper" -#' Exclude certain columns from a wildcard/regex selection. -#' @description You may also use regexes in the exclude list. They must start with `^` and end with `$`. -#' @param columns given param type: -#' - string: exclude name of column or exclude regex starting with ^and ending with$ +#' Exclude certain columns from selection +#' +#' @param columns Given param type: +#' - string: single column name or regex starting with `^` and ending with `$` #' - character vector: exclude all these column names, no regex allowed #' - DataType: Exclude any of this DataType #' - List(DataType): Exclude any of these DataType(s) #' -#' @keywords Expr #' @return Expr #' @aliases exclude #' @name Expr_exclude @@ -1116,7 +990,6 @@ Expr_exp = "use_extendr_wrapper" #' df$select(pl$all()$exclude("^Sepal.*$")) #' Expr_exclude = function(columns) { - # handle lists if (is.list(columns)) { columns = pcase( all(sapply(columns, inherits, "RPolarsDataType")), unwrap(.pr$DataTypeVector$from_rlist(columns)), @@ -1125,7 +998,6 @@ Expr_exclude = function(columns) { ) } - # dispatch exclude call on types pcase( is.character(columns), .pr$Expr$exclude(self, columns), inherits(columns, "DataTypeVector"), .pr$Expr$exclude_dtype(self, columns), @@ -1134,28 +1006,21 @@ Expr_exclude = function(columns) { ) } -#' Are elements finite -#' @description Returns a boolean output indicating which values are finite. +#' Check if elements are finite #' -#' @keywords Expr +#' Returns a boolean Series indicating which values are finite. #' @return Expr #' @docType NULL #' @format NULL -#' @aliases is_finite -#' @name Expr_is_finite -#' @format NULL -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} #' @examples -#' pl$DataFrame(list(alice = c(0, NaN, NA, Inf, -Inf)))$select(pl$col("alice")$is_finite()) +#' pl$DataFrame(list(alice = c(0, NaN, NA, Inf, -Inf)))$ +#' with_columns(finite = pl$col("alice")$is_finite()) Expr_is_finite = "use_extendr_wrapper" -#' Are elements infinite -#' @description Returns a boolean output indicating which values are infinite. -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -#' @keywords Expr +#' Check if elements are infinite +#' +#' Returns a boolean Series indicating which values are infinite. #' @return Expr #' @docType NULL #' @format NULL @@ -1163,19 +1028,14 @@ Expr_is_finite = "use_extendr_wrapper" #' @name Expr_is_infinite #' @format NULL #' @examples -#' pl$DataFrame(list(alice = c(0, NaN, NA, Inf, -Inf)))$select(pl$col("alice")$is_infinite()) +#' pl$DataFrame(list(alice = c(0, NaN, NA, Inf, -Inf)))$ +#' with_columns(infinite = pl$col("alice")$is_infinite()) Expr_is_infinite = "use_extendr_wrapper" - - - -#' Are elements NaN's -#' @description Returns a boolean Series indicating which values are NaN. -#' @details Floating point NaN's are a different flag from Null(polars) which is the same as -#' NA_real_(R). -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -#' @keywords Expr +#' Check if elements are NaN +#' +#' Returns a boolean Series indicating which values are NaN. #' @return Expr #' @docType NULL #' @format NULL @@ -1184,36 +1044,37 @@ Expr_is_infinite = "use_extendr_wrapper" #' #' @format NULL #' @examples -#' pl$DataFrame(list(alice = c(0, NaN, NA, Inf, -Inf)))$select(pl$col("alice")$is_nan()) +#' pl$DataFrame(list(alice = c(0, NaN, NA, Inf, -Inf)))$ +#' with_columns(nan = pl$col("alice")$is_nan()) Expr_is_nan = "use_extendr_wrapper" -#' Are elements not NaN's -#' @description Returns a boolean Series indicating which values are not NaN. -#' @details Floating point NaN's are a different flag from Null(polars) which is the same as -#' NA_real_(R). -#' @keywords Expr +#' Check if elements are not NaN +#' +#' Returns a boolean Series indicating which values are not NaN. Syntactic sugar +#' for `$is_nan()$not_()`. #' @return Expr #' @docType NULL #' @format NULL #' @aliases is_not_nan -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} #' @name Expr_is_not_nan #' @format NULL #' @examples -#' pl$DataFrame(list(alice = c(0, NaN, NA, Inf, -Inf)))$select(pl$col("alice")$is_not_nan()) +#' pl$DataFrame(list(alice = c(0, NaN, NA, Inf, -Inf)))$ +#' with_columns(not_nan = pl$col("alice")$is_not_nan()) Expr_is_not_nan = "use_extendr_wrapper" - - -#' Get a slice of this expression. +#' Get a slice of an Expr #' -#' @param offset numeric or expression, zero-indexed where to start slice -#' negative value indicate starting (one-indexed) from back -#' @param length how many elements should slice contain, default NULL is max length +#' Performing a slice of length 1 on a subset of columns will recycle this value +#' in those columns but will not change the number of rows in the data. See +#' examples. +#' +#' @param offset Numeric or expression, zero-indexed. Indicates where to start +#' the slice. A negative value is one-indexed and starts from the end. +#' @param length Maximum number of elements contained in the slice. Default is +#' full data. #' -#' @keywords Expr #' @return Expr #' @aliases slice #' @name Expr_slice @@ -1233,22 +1094,23 @@ Expr_is_not_nan = "use_extendr_wrapper" #' pl$DataFrame(list(a = 0:100))$select( #' pl$all()$slice(80) #' ) +#' +#' # recycling +#' pl$DataFrame(mtcars)$with_columns(pl$col("mpg")$slice(0, 1)) Expr_slice = function(offset, length = NULL) { .pr$Expr$slice(self, wrap_e(offset), wrap_e(length)) } #' Append expressions -#' @description This is done by adding the chunks of `other` to this `output`. #' -#' @param other Expr, into Expr -#' @param upcast bool upcast to, if any supertype of two non equal datatypes. +#' This is done by adding the chunks of `other` to this `output`. +#' +#' @param other Expr or something coercible to an Expr. +#' @param upcast Cast both Expr to a common supertype if they have one. #' -#' @keywords Expr #' @return Expr -#' @aliases Expr_append #' @name Expr_append -#' @format NULL #' @examples #' # append bottom to to row #' df = pl$DataFrame(list(a = 1:3, b = c(NA_real_, 4, 5))) @@ -1264,16 +1126,14 @@ Expr_append = function(other, upcast = TRUE) { #' Rechunk memory layout -#' @description Create a single chunk of memory for this Series. -#' @keywords Expr +#' +#' Create a single chunk of memory for this Series. +#' #' @return Expr #' @docType NULL #' @format NULL -#' @aliases rechunk -#' @name Expr_rechunk -#' @format NULL #' @details -#' See rechunk() explained here \code{\link[polars]{docs_translations}} +#' See rechunk() explained here \code{\link[polars]{docs_translations}}. #' @examples #' # get chunked lengths with/without rechunk #' series_list = pl$DataFrame(list(a = 1:3, b = 4:6))$select( @@ -1284,18 +1144,17 @@ Expr_append = function(other, upcast = TRUE) { Expr_rechunk = "use_extendr_wrapper" #' Cumulative sum -#' @description Get an array with the cumulative sum computed at every element. -#' @keywords Expr -#' @param reverse bool, default FALSE, if true roll over vector from back to forth +#' +#' Get an array with the cumulative sum computed at every element. +#' +#' @param reverse If `TRUE`, start with the total sum of elements and substract +#' each row one by one. #' @return Expr -#' @aliases Expr_cumsum -#' @name Expr_cum_sum #' @details -#' The Dtypes Int8, UInt8, Int16 and UInt16 are cast to -#' Int64 before summing to prevent overflow issues. -#' @format NULL +#' The Dtypes Int8, UInt8, Int16 and UInt16 are cast to Int64 before summing to +#' prevent overflow issues. #' @examples -#' pl$DataFrame(list(a = 1:4))$select( +#' pl$DataFrame(a = 1:4)$with_columns( #' pl$col("a")$cum_sum()$alias("cum_sum"), #' pl$col("a")$cum_sum(reverse = TRUE)$alias("cum_sum_reversed") #' ) @@ -1306,19 +1165,14 @@ Expr_cum_sum = function(reverse = FALSE) { #' Cumulative product -#' @description Get an array with the cumulative product computed at every element. -#' @keywords Expr -#' @param reverse bool, default FALSE, if true roll over vector from back to forth -#' @return Expr -#' @aliases cum_prod -#' @name Expr_cum_prod -#' @details -#' The Dtypes Int8, UInt8, Int16 and UInt16 are cast to -#' Int64 before summing to prevent overflow issues. #' -#' @format NULL +#' Get an array with the cumulative product computed at every element. +#' +#' @param reverse If `TRUE`, start with the total product of elements and divide +#' each row one by one. +#' @inherit Expr_cum_sum return details #' @examples -#' pl$DataFrame(list(a = 1:4))$select( +#' pl$DataFrame(a = 1:4)$with_columns( #' pl$col("a")$cum_prod()$alias("cum_prod"), #' pl$col("a")$cum_prod(reverse = TRUE)$alias("cum_prod_reversed") #' ) @@ -1328,20 +1182,13 @@ Expr_cum_prod = function(reverse = FALSE) { } #' Cumulative minimum -#' @description Get an array with the cumulative min computed at every element. -#' @keywords Expr -#' @param reverse bool, default FALSE, if true roll over vector from back to forth -#' @return Expr -#' @aliases cum_min -#' @name Expr_cum_min -#' @details -#' The Dtypes Int8, UInt8, Int16 and UInt16 are cast to -#' Int64 before summing to prevent overflow issues. #' -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -#' @format NULL +#' Get an array with the cumulative min computed at every element. +#' +#' @param reverse If `TRUE`, start from the last value. +#' @inherit Expr_cum_sum return details #' @examples -#' pl$DataFrame(list(a = 1:4))$select( +#' pl$DataFrame(a = c(1:4, 2L))$with_columns( #' pl$col("a")$cum_min()$alias("cum_min"), #' pl$col("a")$cum_min(reverse = TRUE)$alias("cum_min_reversed") #' ) @@ -1351,20 +1198,13 @@ Expr_cum_min = function(reverse = FALSE) { } #' Cumulative maximum -#' @description Get an array with the cumulative max computed at every element. -#' @keywords Expr -#' @param reverse bool, default FALSE, if true roll over vector from back to forth -#' @return Expr -#' @aliases cummin -#' @name Expr_cum_max -#' @details -#' The Dtypes Int8, UInt8, Int16 and UInt16 are cast to -#' Int64 before summing to prevent overflow issues. #' -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -#' @format NULL +#' Get an array with the cumulative max computed at every element. +#' +#' @param reverse If `TRUE`, start from the last value. +#' @inherit Expr_cum_sum return details #' @examples -#' pl$DataFrame(list(a = 1:4))$select( +#' pl$DataFrame(a = c(1:4, 2L))$with_columns( #' pl$col("a")$cum_max()$alias("cummux"), #' pl$col("a")$cum_max(reverse = TRUE)$alias("cum_max_reversed") #' ) @@ -1374,22 +1214,19 @@ Expr_cum_max = function(reverse = FALSE) { } #' Cumulative count -#' @description Get an array with the cumulative count computed at every element. -#' Counting from 0 to len -#' @keywords Expr -#' @param reverse bool, default FALSE, if true roll over vector from back to forth +#' +#' Get an array with the cumulative count (zero-indexed) computed at every element. +#' +#' @param reverse If `TRUE`, reverse the count. #' @return Expr -#' @aliases cum_count -#' @name Expr_cum_count #' @details -#' The Dtypes Int8, UInt8, Int16 and UInt16 are cast to -#' Int64 before summing to prevent overflow issues. +#' The Dtypes Int8, UInt8, Int16 and UInt16 are cast to Int64 before summing to +#' prevent overflow issues. #' -#' cum_count does not seem to count within lists. +#' `$cum_count()` does not seem to count within lists. #' -#' @format NULL #' @examples -#' pl$DataFrame(list(a = 1:4))$select( +#' pl$DataFrame(a = 1:4)$with_columns( #' pl$col("a")$cum_count()$alias("cum_count"), #' pl$col("a")$cum_count(reverse = TRUE)$alias("cum_count_reversed") #' ) @@ -1400,54 +1237,37 @@ Expr_cum_count = function(reverse = FALSE) { #' Floor -#' @description Rounds down to the nearest integer value. -#' Only works on floating point Series. -#' @keywords Expr +#' +#' Rounds down to the nearest integer value. Only works on floating point Series. #' @return Expr #' @docType NULL #' @format NULL -#' @aliases Expr_floor -#' @name Expr_floor -#' @format NULL #' @examples -#' pl$DataFrame(list( -#' a = c(0.33, 0.5, 1.02, 1.5, NaN, NA, Inf, -Inf) -#' ))$select( -#' pl$col("a")$floor() +#' pl$DataFrame(a = c(0.33, 0.5, 1.02, 1.5, NaN, NA, Inf, -Inf))$with_columns( +#' floor = pl$col("a")$floor() #' ) Expr_floor = "use_extendr_wrapper" #' Ceiling -#' @description Rounds up to the nearest integer value. -#' Only works on floating point Series. -#' @keywords Expr +#' +#' Rounds up to the nearest integer value. Only works on floating point Series. #' @return Expr #' @docType NULL #' @format NULL -#' @aliases Expr_ceil -#' @name Expr_ceil -#' @format NULL #' @examples -#' pl$DataFrame(list( -#' a = c(0.33, 0.5, 1.02, 1.5, NaN, NA, Inf, -Inf) -#' ))$select( -#' pl$col("a")$ceil() +#' pl$DataFrame(a = c(0.33, 0.5, 1.02, 1.5, NaN, NA, Inf, -Inf))$with_columns( +#' ceiling = pl$col("a")$ceil() #' ) Expr_ceil = "use_extendr_wrapper" -#' round -#' @description Round underlying floating point data by `decimals` digits. -#' @keywords Expr -#' @param decimals integer Number of decimals to round by. +#' Round +#' +#' Round underlying floating point data by `decimals` digits. +#' @param decimals Number of decimals to round by. #' @return Expr -#' @aliases round -#' @name Expr_round -#' @format NULL #' @examples -#' pl$DataFrame(list( -#' a = c(0.33, 0.5, 1.02, 1.5, NaN, NA, Inf, -Inf) -#' ))$select( -#' pl$col("a")$round(0) +#' pl$DataFrame(a = c(0.33, 0.5, 1.02, 1.5, NaN, NA, Inf, -Inf))$with_columns( +#' round = pl$col("a")$round(1) #' ) Expr_round = function(decimals) { unwrap(.pr$Expr$round(self, decimals)) @@ -1455,18 +1275,16 @@ Expr_round = function(decimals) { # TODO contribute polars, dot product unwraps if datatypes, pass Result instead + #' Dot product -#' @description Compute the dot/inner product between two Expressions. -#' @keywords Expr -#' @param other Expr to compute dot product with. -#' @return Expr -#' @aliases dot -#' @name Expr_dot -#' @format NULL +#' +#' Compute the dot/inner product between two Expressions. +#' +#' @inherit Expr_add params return #' @examples #' pl$DataFrame( -#' a = 1:4, b = c(1, 2, 3, 4), c = "bob" -#' )$select( +#' a = 1:4, b = c(1, 2, 3, 4) +#' )$with_columns( #' pl$col("a")$dot(pl$col("b"))$alias("a dot b"), #' pl$col("a")$dot(pl$col("a"))$alias("a dot a") #' ) @@ -1476,60 +1294,48 @@ Expr_dot = function(other) { #' Mode -#' @description Compute the most occurring value(s). Can return multiple Values. -#' @keywords Expr +#' +#' Compute the most occurring value(s). Can return multiple values if there are +#' ties. +#' #' @return Expr #' @docType NULL #' @format NULL -#' @aliases mode -#' @name Expr_mode -#' @format NULL #' @examples -#' df = pl$DataFrame(list(a = 1:6, b = c(1L, 1L, 3L, 3L, 5L, 6L), c = c(1L, 1L, 2L, 2L, 3L, 3L))) +#' df = pl$DataFrame(a = 1:6, b = c(1L, 1L, 3L, 3L, 5L, 6L), c = c(1L, 1L, 2L, 2L, 3L, 3L)) #' df$select(pl$col("a")$mode()) #' df$select(pl$col("b")$mode()) #' df$select(pl$col("c")$mode()) Expr_mode = "use_extendr_wrapper" -#' Expr_sort -#' @description Sort this column. In projection/ selection context the whole column is sorted. -#' If used in a groupby context, the groups are sorted. -#' @keywords Expr +#' Sort an Expr +#' +#' Sort this column. If used in a groupby context, the groups are sorted. +#' #' @param descending Sort in descending order. When sorting by multiple columns, -#' can be specified per column by passing a sequence of booleans. -#' @param nulls_last bool, default FALSE, place Nulls last +#' can be specified per column by passing a vector of booleans. +#' @param nulls_last If `TRUE`, place nulls values last. #' @return Expr -#' @aliases sort -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -#' @name Expr_sort -#' @format NULL #' @examples -#' pl$DataFrame(list( -#' a = c(6, 1, 0, NA, Inf, NaN) -#' ))$select(pl$col("a")$sort()) -Expr_sort = function(descending = FALSE, nulls_last = FALSE) { # param reverse named descending on rust side +#' pl$DataFrame(a = c(6, 1, 0, NA, Inf, NaN))$ +#' with_columns(sorted = pl$col("a")$sort()) +Expr_sort = function(descending = FALSE, nulls_last = FALSE) { .pr$Expr$sort(self, descending, nulls_last) } # TODO contribute polars, add arguments for Null/NaN/inf last/first, top_k unwraps k> len column + #' Top k values -#' @description Return the `k` largest elements. -#' @details This has time complexity: \eqn{ O(n + k \\log{}n - \frac{k}{2}) } #' -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -#' @keywords Expr -#' @param k numeric k top values to get +#' Return the `k` largest elements. This has time complexity: \eqn{ O(n + k +#' \\log{}n - \frac{k}{2}) } +#' +#' @param k Number of top values to get #' @return Expr -#' @aliases top_k -#' @name Expr_top_k -#' @format NULL #' @examples -#' pl$DataFrame(list( -#' a = c(6, 1, 0, NA, Inf, NaN) -#' ))$select(pl$col("a")$top_k(5)) +#' pl$DataFrame(a = c(6, 1, 0, NA, Inf, NaN))$select(pl$col("a")$top_k(5)) Expr_top_k = function(k) { if (!is.numeric(k) || k < 0) stop("k must be numeric and positive, prefereably integerish") .pr$Expr$top_k(self, k) |> @@ -1537,175 +1343,118 @@ Expr_top_k = function(k) { } # TODO contribute polars, add arguments for Null/NaN/inf last/first, bottom_k unwraps k> len column + #' Bottom k values -#' @description Return the `k` smallest elements. -#' @details This has time complexity: \eqn{ O(n + k \\log{}n - \frac{k}{2}) } #' -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -#' @keywords Expr -#' @param k numeric k bottom values to get -#' @return Expr -#' @aliases bottom_k -#' @name Expr_bottom_k -#' @format NULL +#' Return the `k` smallest elements. This has time complexity: \eqn{ O(n + k +#' \\log{}n - \frac{k}{2}) } +#' +#' @inherit Expr_top_k params return #' @examples -#' pl$DataFrame(list( -#' a = c(6, 1, 0, NA, Inf, NaN) -#' ))$select(pl$col("a")$bottom_k(5)) +#' pl$DataFrame(a = c(6, 1, 0, NA, Inf, NaN))$select(pl$col("a")$bottom_k(5)) Expr_bottom_k = function(k) { if (!is.numeric(k) || k < 0) stop("k must be numeric and positive, prefereably integerish") .pr$Expr$bottom_k(self, k) |> unwrap("in $bottom_k():") } - #' Index of a sort -#' @description Get the index values that would sort this column. -#' If 'reverse=True` the smallest elements will be given. -#' @keywords Expr -#' @param descending Sort in descending order. When sorting by multiple columns, -#' can be specified per column by passing a sequence of booleans. -#' @param nulls_last bool, default FALSE, place Nulls last -#' @return Expr -#' @aliases arg_sort -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -#' @name Expr_arg_sort -#' @format NULL +#' +#' Get the index values that would sort this column. +#' +#' @inherit Expr_sort params return #' @examples -#' pl$DataFrame(list( +#' pl$DataFrame( #' a = c(6, 1, 0, NA, Inf, NaN) -#' ))$select(pl$col("a")$arg_sort()) -Expr_arg_sort = function(descending = FALSE, nulls_last = FALSE) { # param reverse named descending on rust side +#' )$with_columns(arg_sorted = pl$col("a")$arg_sort()) +Expr_arg_sort = function(descending = FALSE, nulls_last = FALSE) { .pr$Expr$arg_sort(self, descending, nulls_last) } +#' @inherit Expr_arg_sort title params examples +#' @description argsort is a alias for arg_sort +Expr_argsort = Expr_arg_sort #' Index of min value -#' @description Get the index of the minimal value. -#' @keywords Expr +#' +#' Get the index of the minimal value. #' @return Expr #' @docType NULL #' @format NULL -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -#' @name Expr_arg_min -#' @format NULL #' @examples -#' pl$DataFrame(list( +#' pl$DataFrame( #' a = c(6, 1, 0, NA, Inf, NaN) -#' ))$select(pl$col("a")$arg_min()) +#' )$with_columns(arg_min = pl$col("a")$arg_min()) Expr_arg_min = "use_extendr_wrapper" -#' Index of min value -#' @description Get the index of the minimal value. -#' @keywords Expr +#' Index of max value +#' +#' Get the index of the maximal value. #' @return Expr #' @docType NULL #' @format NULL -#' @aliases Expr_arg_max -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -#' @name Expr_arg_max -#' @format NULL #' @examples -#' pl$DataFrame(list( +#' pl$DataFrame( #' a = c(6, 1, 0, NA, Inf, NaN) -#' ))$select(pl$col("a")$arg_max()) +#' )$with_columns(arg_max = pl$col("a")$arg_max()) Expr_arg_max = "use_extendr_wrapper" - - - # TODO contribute pypolars search_sorted behavior is under-documented, does multiple elements work? + #' Where to inject element(s) to maintain sorting #' -#' @description Find indices in self where elements should be inserted into to maintain order. -#' @keywords Expr -#' @param element a R value into literal or an expression of an element +#' Find the index in self where the element should be inserted so that it doesn't +#' break sortedness. +#' @param element Expr or scalar value. #' @return Expr -#' @aliases search_sorted -#' @name Expr_search_sorted -#' @details This function look up where to insert element if to keep self column sorted. -#' It is assumed the self column is already sorted ascending, otherwise wrongs answers. -#' This function is a bit under documented in py-polars. -#' @format NULL +#' @details +#' This function looks up where to insert element to keep self column sorted. +#' It is assumed the self column is already sorted in ascending order (otherwise +#' this leads to wrong results). #' @examples -#' pl$DataFrame(list(a = 0:100))$select(pl$col("a")$search_sorted(pl$lit(42L))) +#' df = pl$DataFrame(a = c(1, 3, 4, 4, 6)) +#' df +#' +#' # in which row should 5 be inserted in order to not break the sort? +#' # (value is 0-indexed) +#' df$select(pl$col("a")$search_sorted(5)) Expr_search_sorted = function(element) { .pr$Expr$search_sorted(self, wrap_e(element)) } - - -#' sort column by order of others -#' @description Sort this column by the ordering of another column, or multiple other columns. -#' @param by one expression or list expressions and/or strings(interpreted as column names) -#' @param descending Sort in descending order. When sorting by multiple columns, -#' can be specified per column by passing a sequence of booleans. -#' @return Expr -#' @keywords Expr -#' @aliases sort_by -#' @name Expr_sort_by -#' @details -#' In projection/ selection context the whole column is sorted. +#' Sort Expr by order of others +#' +#' Sort this column by the ordering of another column, or multiple other columns. #' If used in a groupby context, the groups are sorted. #' -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -#' @format NULL +#' @param by One expression or a list of expressions and/or strings (interpreted +#' as column names). +#' @inheritParams Expr_sort +#' @return Expr #' @examples -#' df = pl$DataFrame(list( +#' df = pl$DataFrame( #' group = c("a", "a", "a", "b", "b", "b"), #' value1 = c(98, 1, 3, 2, 99, 100), #' value2 = c("d", "f", "b", "e", "c", "a") -#' )) +#' ) #' #' # by one column/expression -#' df$select( -#' pl$col("group")$sort_by("value1") +#' df$with_columns( +#' sorted = pl$col("group")$sort_by("value1") #' ) #' #' # by two columns/expressions -#' df$select( -#' pl$col("group")$sort_by(list("value2", pl$col("value1")), descending = c(TRUE, FALSE)) +#' df$with_columns( +#' sorted = pl$col("group")$sort_by( +#' list("value2", pl$col("value1")), +#' descending = c(TRUE, FALSE) +#' ) #' ) #' -#' #' # by some expression -#' df$select( -#' pl$col("group")$sort_by(pl$col("value1")$sort(descending = TRUE)) -#' ) -#' -#' # quite similar usecase as R function `order()` -#' l = list( -#' ab = c(rep("a", 6), rep("b", 6)), -#' v4 = rep(1:4, 3), -#' v3 = rep(1:3, 4), -#' v2 = rep(1:2, 6), -#' v1 = 1:12 -#' ) -#' df = pl$DataFrame(l) -#' -#' -#' # examples of order versus sort_by -#' all.equal( -#' df$select( -#' pl$col("ab")$sort_by("v4")$alias("ab4"), -#' pl$col("ab")$sort_by("v3")$alias("ab3"), -#' pl$col("ab")$sort_by("v2")$alias("ab2"), -#' pl$col("ab")$sort_by("v1")$alias("ab1"), -#' pl$col("ab")$sort_by(list("v3", pl$col("v1")), descending = c(FALSE, TRUE))$alias("ab13FT"), -#' pl$col("ab")$sort_by(list("v3", pl$col("v1")), descending = TRUE)$alias("ab13T") -#' )$to_list(), -#' list( -#' ab4 = l$ab[order(l$v4)], -#' ab3 = l$ab[order(l$v3)], -#' ab2 = l$ab[order(l$v2)], -#' ab1 = l$ab[order(l$v1)], -#' ab13FT = l$ab[order(l$v3, rev(l$v1))], -#' ab13T = l$ab[order(l$v3, l$v1, decreasing = TRUE)] -#' ) +#' df$with_columns( +#' sorted = pl$col("group")$sort_by(pl$col("value1")$sort(descending = TRUE)) #' ) Expr_sort_by = function(by, descending = FALSE) { .pr$Expr$sort_by( @@ -1715,95 +1464,73 @@ Expr_sort_by = function(by, descending = FALSE) { ) |> unwrap("in $sort_by:") } - # TODO coontribute pyPolars, if exceeding u32 return Null, if exceeding column return Error # either it should be error or Null. # pl.DataFrame({"a":[0,1,2,3,4],"b":[4,3,2,1,0]}).select(pl.col("a").take(5294967296.0)) #return Null # pl.DataFrame({"a":[0,1,2,3,4],"b":[4,3,2,1,0]}).select(pl.col("a").take(-3)) #return Null # pl.DataFrame({"a":[0,1,2,3,4],"b":[4,3,2,1,0]}).select(pl.col("a").take(7)) #return Error -#' Take values by index. -#' @param indices R scalar/vector or Series, or Expr that leads to a UInt32 dtyped Series. -#' @return Expr -#' @keywords Expr -#' @aliases take -#' @name Expr_gather -#' @details -#' similar to R indexing syntax e.g. `letters[c(1,3,5)]`, however as an expression, not as eager computation -#' exceeding + +#' Gather values by index #' -#' @format NULL +#' @param indices R scalar/vector or Series, or Expr that leads to a Series of +#' dtype UInt32. +#' @return Expr #' @examples -#' pl$select(pl$lit(0:10)$gather(c(1, 8, 0, 7))) +#' pl$DataFrame(a = c(1, 2, 4, 5, 8))$select(pl$col("a")$gather(c(0, 2, 4))) Expr_gather = function(indices) { .pr$Expr$gather(self, pl$lit(indices)) |> unwrap("in $gather():") } - - #' Shift values -#' @param periods numeric number of periods to shift, may be negative. +#' +#' @param periods Number of periods to shift, may be negative. #' @return Expr -#' @keywords Expr -#' @aliases shift -#' @name Expr_shift -#' @format NULL -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -#' @usage Expr_shift(periods) #' @examples -#' pl$select( -#' pl$lit(0:3)$shift(-2)$alias("shift-2"), -#' pl$lit(0:3)$shift(2)$alias("shift+2") -#' ) +#' pl$DataFrame(a = c(1, 2, 4, 5, 8))$ +#' with_columns( +#' pl$col("a")$shift(-2)$alias("shift-2"), +#' pl$col("a")$shift(2)$alias("shift+2") +#' ) Expr_shift = function(periods = 1) { .pr$Expr$shift(self, periods) |> unwrap("in $shift():") } #' Shift and fill values -#' @description Shift the values by a given period and fill the resulting null values. #' -#' @param periods numeric number of periods to shift, may be negative. -#' @param fill_value Fill None values with the result of this expression. +#' Shift the values by a given period and fill the resulting null values. +#' +#' @inheritParams Expr_shift +#' @param fill_value Fill null values with the result of this expression. #' @return Expr -#' @keywords Expr -#' @aliases shift_and_fill -#' @name Expr_shift_and_fill -#' @format NULL -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} #' @examples -#' pl$select( -#' pl$lit(0:3), -#' pl$lit(0:3)$shift_and_fill(-2, fill_value = 42)$alias("shift-2"), -#' pl$lit(0:3)$shift_and_fill(2, fill_value = pl$lit(42) / 2)$alias("shift+2") -#' ) +#' pl$DataFrame(a = c(1, 2, 4, 5, 8))$ +#' with_columns( +#' pl$col("a")$shift_and_fill(-2, fill_value = 42)$alias("shift-2"), +#' pl$col("a")$shift_and_fill(2, fill_value = pl$col("a") / 2)$alias("shift+2") +#' ) Expr_shift_and_fill = function(periods, fill_value) { .pr$Expr$shift_and_fill(self, periods, pl$lit(fill_value)) |> unwrap("in $shift_and_fill():") } - -#' Fill Nulls with a value or strategy. -#' @description Shift the values by value or as strategy. +#' Fill null values with a value or strategy #' -#' @param value Expr or `Into` to fill Null values with -#' @param strategy default NULL else 'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one' -#' @param limit Number of consecutive null values to fill when using the 'forward' or 'backward' strategy. +#' @param value Expr or something coercible in an Expr +#' @param strategy Possible choice are `NULL` (default, requires a non-null +#' `value`), `"forward"`, `"backward"`, `"min"`, `"max"`, `"mean"`, `"zero"`, +#' `"one"`. +#' @param limit Number of consecutive null values to fill when using the +#' `"forward"` or `"backward"` strategy. #' @return Expr -#' @keywords Expr -#' @aliases fill_null -#' @name Expr_fill_null -#' @format NULL -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -#' #' @examples -#' pl$select( -#' pl$lit(0:3)$shift_and_fill(-2, fill_value = 42)$alias("shift-2"), -#' pl$lit(0:3)$shift_and_fill(2, fill_value = pl$lit(42) / 2)$alias("shift+2") -#' ) +#' pl$DataFrame(a = c(NA, 1, NA, 2, NA))$ +#' with_columns( +#' value = pl$col("a")$fill_null(999), +#' backward = pl$col("a")$fill_null(strategy = "backward"), +#' mean = pl$col("a")$fill_null(strategy = "mean") +#' ) Expr_fill_null = function(value = NULL, strategy = NULL, limit = NULL) { pcase( # the wrong stuff @@ -1823,86 +1550,59 @@ Expr_fill_null = function(value = NULL, strategy = NULL, limit = NULL) { } -#' Fill Nulls Backward -#' @description Fill missing values with the next to be seen values. +#' Fill null values backward +#' +#' Fill missing values with the next to be seen values. Syntactic sugar for +#' `$fill_null(strategy = "backward")`. #' -#' @param limit Expr or `Into` The number of consecutive null values to backward fill. +#' @inheritParams Expr_fill_null #' @return Expr -#' @keywords Expr -#' @aliases backward_fill -#' @name Expr_backward_fill -#' @format NULL -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -#' #' @examples -#' l = list(a = c(1L, rep(NA_integer_, 3L), 10)) -#' pl$DataFrame(l)$select( -#' pl$col("a")$backward_fill()$alias("bf_null"), -#' pl$col("a")$backward_fill(limit = 0)$alias("bf_l0"), -#' pl$col("a")$backward_fill(limit = 1)$alias("bf_l1") -#' )$to_list() +#' pl$DataFrame(a = c(NA, 1, NA, 2, NA))$ +#' with_columns( +#' backward = pl$col("a")$backward_fill() +#' ) Expr_backward_fill = function(limit = NULL) { .pr$Expr$backward_fill(self, limit) } -#' Fill Nulls Forward -#' @description Fill missing values with last seen values. +#' Fill null values forward #' -#' @param limit Expr or `Into` The number of consecutive null values to forward fill. -#' @return Expr -#' @keywords Expr -#' @aliases forward_fill -#' @name Expr_forward_fill -#' @format NULL -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +#' Fill missing values with the last seen values. Syntactic sugar for +#' `$fill_null(strategy = "forward")`. #' +#' @inheritParams Expr_fill_null +#' @return Expr #' @examples -#' l = list(a = c(1L, rep(NA_integer_, 3L), 10)) -#' pl$DataFrame(l)$select( -#' pl$col("a")$forward_fill()$alias("ff_null"), -#' pl$col("a")$forward_fill(limit = 0)$alias("ff_l0"), -#' pl$col("a")$forward_fill(limit = 1)$alias("ff_l1") -#' )$to_list() +#' pl$DataFrame(a = c(NA, 1, NA, 2, NA))$ +#' with_columns( +#' backward = pl$col("a")$forward_fill() +#' ) Expr_forward_fill = function(limit = NULL) { .pr$Expr$forward_fill(self, limit) } -#' Fill Nulls Forward -#' -#' @param expr Expr or into Expr, value to fill NaNs with -#' -#' @description Fill missing values with last seen values. +#' Fill NaN #' +#' @param expr Expr or something coercible in an Expr #' @return Expr -#' @keywords Expr -#' @aliases fill_nan -#' @name Expr_fill_nan -#' @format NULL -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -#' @examples -#' l = list(a = c(1, NaN, NaN, 3)) -#' pl$DataFrame(l)$select( -#' pl$col("a")$fill_nan()$alias("fill_default"), -#' pl$col("a")$fill_nan(pl$lit(NA))$alias("fill_NA"), # same as default -#' pl$col("a")$fill_nan(2)$alias("fill_float2"), -#' pl$col("a")$fill_nan("hej")$alias("fill_str") # implicit cast to Utf8 -#' )$to_list() +#' @examples +#' pl$DataFrame(a = c(NaN, 1, NaN, 2, NA))$ +#' with_columns( +#' literal = pl$col("a")$fill_nan(999), +#' # implicit coercion to string +#' string = pl$col("a")$fill_nan("invalid") +#' ) Expr_fill_nan = function(expr = NULL) { .pr$Expr$fill_nan(self, wrap_e(expr)) } -#' Get Standard Deviation +#' Get standard deviation #' -#' @param ddof integer in range `[0;255]` degrees of freedom -#' @return Expr (f64 scalar) -#' @keywords Expr -#' @name Expr_std -#' @format NULL +#' @param ddof Degrees of freedom, must be an integer between 0 and 255 +#' @return Expr #' #' @examples #' pl$select(pl$lit(1:5)$std()) @@ -1910,13 +1610,9 @@ Expr_std = function(ddof = 1) { unwrap(.pr$Expr$std(self, ddof)) } -#' Get Variance +#' Get variance #' -#' @param ddof integer in range `[0;255]` degrees of freedom -#' @return Expr (f64 scalar) -#' @keywords Expr -#' @name Expr_var -#' @format NULL +#' @inherit Expr_std params return #' #' @examples #' pl$select(pl$lit(1:5)$var()) @@ -1924,33 +1620,24 @@ Expr_var = function(ddof = 1) { unwrap(.pr$Expr$var(self, ddof)) } - -#' max -#' @keywords Expr -#' @description -#' Get maximum value. +#' Get maximum value #' #' @return Expr #' @docType NULL #' @format NULL -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} #' @examples -#' pl$DataFrame(list(x = c(1, NA, 3)))$select(pl$col("x")$max() == 3) # is true +#' pl$DataFrame(x = c(1, NA, 3))$ +#' with_columns(max = pl$col("x")$max()) Expr_max = "use_extendr_wrapper" -#' min -#' @keywords Expr -#' @description -#' Get minimum value. +#' Get minimum value #' #' @return Expr #' @docType NULL #' @format NULL -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} #' @examples -#' pl$DataFrame(list(x = c(1, NA, 3)))$select(pl$col("x")$min() == 1) # is true +#' pl$DataFrame(x = c(1, NA, 3))$ +#' with_columns(min = pl$col("x")$min()) Expr_min = "use_extendr_wrapper" @@ -1959,128 +1646,106 @@ Expr_min = "use_extendr_wrapper" # In R both NA and NaN poisons, but NA has priority which is meaningful, as NA is even less information # then NaN. -#' max -#' @keywords Expr -#' @description Get maximum value, but propagate/poison encountered `NaN` values. -#' Get maximum value. +#' Get maximum value with NaN +#' +#' Get maximum value, but returns `NaN` if there are any. #' @return Expr #' @docType NULL #' @format NULL -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} #' @examples -#' pl$DataFrame(list(x = c(1, NaN, Inf, 3)))$select(pl$col("x")$nan_max()$is_nan()) # is true +#' pl$DataFrame(x = c(1, NA, 3, NaN, Inf))$ +#' with_columns(nan_max = pl$col("x")$nan_max()) Expr_nan_max = "use_extendr_wrapper" -#' min propagate NaN +#' Get minimum value with NaN #' -#' @keywords Expr -#' @description Get minimum value, but propagate/poison encountered `NaN` values. +#' Get minimum value, but returns `NaN` if there are any. #' @return Expr #' @docType NULL #' @format NULL -#' @details -#' See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} #' @examples -#' pl$DataFrame(list(x = c(1, NaN, -Inf, 3)))$select(pl$col("x")$nan_min()$is_nan()) # is true +#' pl$DataFrame(x = c(1, NA, 3, NaN, Inf))$ +#' with_columns(nan_min = pl$col("x")$nan_min()) Expr_nan_min = "use_extendr_wrapper" - - -#' sum -#' @keywords Expr -#' @description #' Get sum value #' #' @details -#' The Dtypes Int8, UInt8, Int16 and UInt16 are cast to -#' Int64 before summing to prevent overflow issues. +#' The dtypes Int8, UInt8, Int16 and UInt16 are cast to Int64 before summing to +#' prevent overflow issues. #' #' @return Expr #' @docType NULL #' @format NULL #' @examples -#' pl$DataFrame(list(x = c(1L, NA, 2L)))$select(pl$col("x")$sum()) # is i32 3 (Int32 not casted) +#' pl$DataFrame(x = c(1L, NA, 2L))$ +#' with_columns(sum = pl$col("x")$sum()) Expr_sum = "use_extendr_wrapper" - - -#' mean -#' @keywords Expr -#' @description -#' Get mean value. +#' Get mean value #' #' @return Expr #' @docType NULL #' @format NULL #' @examples -#' pl$DataFrame(list(x = c(1, NA, 3)))$select(pl$col("x")$mean() == 2) # is true +#' pl$DataFrame(x = c(1L, NA, 2L))$ +#' with_columns(mean = pl$col("x")$mean()) Expr_mean = "use_extendr_wrapper" -#' median -#' @keywords Expr -#' @description -#' Get median value. +#' Get median value #' #' @return Expr #' @docType NULL #' @format NULL #' @examples -#' pl$DataFrame(list(x = c(1, NA, 2)))$select(pl$col("x")$median() == 1.5) # is true +#' pl$DataFrame(x = c(1L, NA, 2L))$ +#' with_columns(median = pl$col("x")$median()) Expr_median = "use_extendr_wrapper" -## TODO contribute polars: product does not support in rust i32 - #' Product -#' @keywords Expr -#' @description Compute the product of an expression. -#' @aliases Product +#' +#' Compute the product of an expression. #' @return Expr #' @docType NULL #' @format NULL -#' @details does not support integer32 currently, .cast() to f64 or i64 first. #' @examples -#' pl$DataFrame(list(x = c(1, 2, 3)))$select(pl$col("x")$product() == 6) # is true +#' pl$DataFrame(x = c(2L, NA, 2L))$ +#' with_columns(product = pl$col("x")$product()) Expr_product = "use_extendr_wrapper" - #' Count number of unique values -#' @keywords Expr -#' @description -#' Count number of unique values. -#' Similar to R length(unique(x)) -#' @aliases n_unique +#' #' @return Expr #' @docType NULL #' @format NULL #' @examples -#' pl$DataFrame(iris)$select(pl$col("Species")$n_unique()) +#' pl$DataFrame(iris[, 4:5])$with_columns(count = pl$col("Species")$n_unique()) Expr_n_unique = "use_extendr_wrapper" -#' Approx count unique values -#' @keywords Expr -#' @description +#' Approx count unique values +#' #' This is done using the HyperLogLog++ algorithm for cardinality estimation. -#' @aliases approx_n_unique #' @return Expr #' @docType NULL #' @format NULL #' @examples -#' pl$DataFrame(iris)$select(pl$col("Species")$approx_n_unique()) +#' pl$DataFrame(iris[, 4:5])$ +#' with_columns(count = pl$col("Species")$approx_n_unique()) Expr_approx_n_unique = "use_extendr_wrapper" -#' Count `Nulls` -#' @keywords Expr -#' @aliases null_count +#' Count missing values +#' #' @return Expr #' @docType NULL #' @format NULL #' @examples -#' pl$select(pl$lit(c(NA, "a", NA, "b"))$null_count()) +#' pl$DataFrame(x = c(NA, "a", NA, "b"))$ +#' with_columns(n_missing = pl$col("x")$null_count()) Expr_null_count = "use_extendr_wrapper" -#' Index of First Unique Value. -#' @keywords Expr +#' Index of first unique values +#' +#' This finds the position of first occurrence of each unique value. #' @aliases arg_unique #' @return Expr #' @docType NULL @@ -2089,13 +1754,10 @@ Expr_null_count = "use_extendr_wrapper" #' pl$select(pl$lit(c(1:2, 1:3))$arg_unique()) Expr_arg_unique = "use_extendr_wrapper" - -#' get unique values -#' @keywords Expr -#' @description -#' Get unique values of this expression. -#' Similar to R unique() -#' @param maintain_order bool, if TRUE guaranteed same order, if FALSE maybe +#' Get unique values +#' +#' @param maintain_order If `TRUE`, the unique values are returned in order of +#' appearance. #' @return Expr #' @examples #' pl$DataFrame(iris)$select(pl$col("Species")$unique()) @@ -2108,40 +1770,30 @@ Expr_unique = function(maintain_order = FALSE) { } } -#' First -#' @keywords Expr -#' @description #' Get the first value. -#' Similar to R head(x,1) +#' #' @return Expr #' @docType NULL #' @format NULL #' @examples -#' pl$DataFrame(list(x = c(1, 2, 3)))$select(pl$col("x")$first()) +#' pl$DataFrame(x = 3:1)$with_columns(first = pl$col("x")$first()) Expr_first = "use_extendr_wrapper" -#' Last -#' @keywords Expr -#' @description -#' Get the lastvalue. -#' Similar to R syntax tail(x,1) +#' Get the last value +#' #' @return Expr #' @docType NULL #' @format NULL #' @examples -#' pl$DataFrame(list(x = c(1, 2, 3)))$select(pl$col("x")$last()) +#' pl$DataFrame(x = 3:1)$with_columns(last = pl$col("x")$last()) Expr_last = "use_extendr_wrapper" - - -#' over -#' @keywords Expr -#' @description -#' Apply window function over a subgroup. -#' This is similar to a groupby + aggregation + self join. -#' Or similar to `window functions in Postgres -#' `_. -#' @param ... of strings or columns to group by +#' Apply window function over a subgroup +#' +#' This applies an expression on groups and returns the same number of rows as +#' the input (contrarily to `$group_by()` + `$agg()`). +#' +#' @param ... Character vector indicating the columns to group by. #' #' @return Expr #' @examples @@ -2149,8 +1801,8 @@ Expr_last = "use_extendr_wrapper" #' val = 1:5, #' a = c("+", "+", "-", "-", "+"), #' b = c("+", "-", "+", "-", "+") -#' )$select( -#' pl$col("val")$count()$over("a", "b") +#' )$with_columns( +#' count = pl$col("val")$count()$over("a", "b") #' ) #' #' over_vars = c("a", "b") @@ -2158,117 +1810,63 @@ Expr_last = "use_extendr_wrapper" #' val = 1:5, #' a = c("+", "+", "-", "-", "+"), #' b = c("+", "-", "+", "-", "+") -#' )$select( -#' pl$col("val")$count()$over(over_vars) +#' )$with_columns( +#' count = pl$col("val")$count()$over(over_vars) #' ) Expr_over = function(...) { - # combine arguments in proto expression array pra = construct_ProtoExprArray(...) - - # pass to over .pr$Expr$over(self, pra) } - -#' Get mask of unique values +#' Check whether each value is unique #' -#' @return Expr (boolean) +#' @return Expr #' @docType NULL #' @format NULL -#' @keywords Expr -#' @name Expr_is_unique -#' @format NULL #' #' @examples -#' v = c(1, 1, 2, 2, 3, NA, NaN, Inf) -#' all.equal( -#' pl$select( -#' pl$lit(v)$is_unique()$alias("is_unique"), -#' pl$lit(v)$is_first()$alias("is_first"), -#' pl$lit(v)$is_duplicated()$alias("is_duplicated"), -#' pl$lit(v)$is_first()$not_()$alias("R_duplicated") -#' )$to_list(), -#' list( -#' is_unique = !v %in% v[duplicated(v)], -#' is_first = !duplicated(v), -#' is_duplicated = v %in% v[duplicated(v)], -#' R_duplicated = duplicated(v) -#' ) -#' ) +#' pl$DataFrame(head(mtcars[, 1:2]))$ +#' with_columns(is_unique = pl$col("mpg")$is_unique()) Expr_is_unique = "use_extendr_wrapper" -#' Get a mask of the first unique value. +#' Check whether each value is the first occurrence #' -#' @return Expr (boolean) +#' @return Expr #' @docType NULL #' @format NULL -#' @keywords Expr -#' @name Expr_is_first -#' @format NULL #' #' @examples -#' v = c(1, 1, 2, 2, 3, NA, NaN, Inf) -#' all.equal( -#' pl$select( -#' pl$lit(v)$is_unique()$alias("is_unique"), -#' pl$lit(v)$is_first()$alias("is_first"), -#' pl$lit(v)$is_duplicated()$alias("is_duplicated"), -#' pl$lit(v)$is_first()$not_()$alias("R_duplicated") -#' )$to_list(), -#' list( -#' is_unique = !v %in% v[duplicated(v)], -#' is_first = !duplicated(v), -#' is_duplicated = v %in% v[duplicated(v)], -#' R_duplicated = duplicated(v) -#' ) -#' ) +#' pl$DataFrame(head(mtcars[, 1:2]))$ +#' with_columns(is_ufirst = pl$col("mpg")$is_first()) Expr_is_first = "use_extendr_wrapper" -#' Get mask of duplicated values. +#' Check whether each value is duplicated #' -#' @return Expr (boolean) +#' This is syntactic sugar for `$is_unique()$not_()`. +#' @return Expr #' @docType NULL #' @format NULL -#' @keywords Expr -#' @aliases is_duplicated -#' @name Expr_is_duplicated -#' @format NULL -#' @details is_duplicated is the opposite of `is_unique()` -#' Looking for R like `duplicated()`?, use `some_expr$is_first()$not_()` -#' -#' @examples -#' v = c(1, 1, 2, 2, 3, NA, NaN, Inf) -#' all.equal( -#' pl$select( -#' pl$lit(v)$is_unique()$alias("is_unique"), -#' pl$lit(v)$is_first()$alias("is_first"), -#' pl$lit(v)$is_duplicated()$alias("is_duplicated"), -#' pl$lit(v)$is_first()$not_()$alias("R_duplicated") -#' )$to_list(), -#' list( -#' is_unique = !v %in% v[duplicated(v)], -#' is_first = !duplicated(v), -#' is_duplicated = v %in% v[duplicated(v)], -#' R_duplicated = duplicated(v) -#' ) -#' ) +#' +#' @examples +#' pl$DataFrame(head(mtcars[, 1:2]))$ +#' with_columns(is_duplicated = pl$col("mpg")$is_duplicated()) Expr_is_duplicated = "use_extendr_wrapper" # TODO contribute polars, example of where NA/Null is omitted and the smallest value + #' Get quantile value. #' -#' @param quantile numeric/Expression 0.0 to 1.0 -#' @param interpolation string value from choices "nearest", "higher", -#' "lower", "midpoint", "linear" +#' @param quantile Either a numeric value or an Expr whose value must be +#' between 0 and 1. +#' @param interpolation One of `"nearest"`, `"higher"`, `"lower"`, +#' `"midpoint"`, or `"linear"`. +#' #' @return Expr -#' @keywords Expr -#' @aliases quantile -#' @name Expr_quantile -#' @format NULL #' -#' @details `Nulls` are ignored and `NaNs` are ranked as the largest value. +#' @details +#' Null values are ignored and `NaN`s are ranked as the largest value. #' For linear interpolation `NaN` poisons `Inf`, that poisons any other value. #' #' @examples @@ -2277,142 +1875,123 @@ Expr_quantile = function(quantile, interpolation = "nearest") { unwrap(.pr$Expr$quantile(self, wrap_e(quantile), interpolation)) } - - #' Filter a single column. -#' @description -#' Mostly useful in an aggregation context. If you want to filter on a DataFrame -#' level, use `LazyFrame.filter`. #' -#' @param predicate Expr or something `Into`. Should be a boolean expression. +#' Mostly useful in an aggregation context. If you want to filter on a +#' DataFrame level, use `DataFrame$filter()` (or `LazyFrame$filter()`). +#' +#' @param predicate An Expr or something coercible to an Expr. Must return a +#' boolean. #' @return Expr -#' @keywords Expr -#' @aliases Expr_filter -#' @format NULL #' #' @examples -#' df = pl$DataFrame(list( +#' df = pl$DataFrame( #' group_col = c("g1", "g1", "g2"), #' b = c(1, 2, 3) -#' )) +#' ) +#' df #' #' df$group_by("group_col")$agg( -#' pl$col("b")$filter(pl$col("b") < 2)$sum()$alias("lt"), -#' pl$col("b")$filter(pl$col("b") >= 2)$sum()$alias("gte") +#' lt = pl$col("b")$filter(pl$col("b") < 2), +#' gte = pl$col("b")$filter(pl$col("b") >= 2) #' ) Expr_filter = function(predicate) { .pr$Expr$filter(self, wrap_e(predicate)) } -#' Where: Filter a single column. -#' @rdname Expr_filter +#' @inherit Expr_filter title params return +#' #' @description -#' where() is an alias for pl$filter +#' This is an alias for `$filter()`. +#' +#' +#' @examples +#' df = pl$DataFrame( +#' group_col = c("g1", "g1", "g2"), +#' b = c(1, 2, 3) +#' ) +#' df #' -#' @aliases where +#' df$group_by("group_col")$agg( +#' lt = pl$col("b")$where(pl$col("b") < 2), +#' gte = pl$col("b")$where(pl$col("b") >= 2) +#' ) Expr_where = Expr_filter - - - - -#' Explode a list or utf8 Series. -#' @description +#' Explode a list or Utf8 Series +#' #' This means that every item is expanded to a new row. #' #' @return Expr #' @docType NULL #' @format NULL -#' @keywords Expr -#' @aliases explode -#' @format NULL #' #' @details -#' explode/flatten does not support categorical +#' Categorical values are not supported. #' #' @examples -#' pl$DataFrame(list(a = letters))$select(pl$col("a")$explode()$gather(0:5)) +#' df = pl$DataFrame(x = c("abc", "ab"), y = c(list(1:3), list(3:5))) +#' df #' -#' listed_group_df = pl$DataFrame(iris[c(1:3, 51:53), ])$group_by("Species")$agg(pl$all()) -#' print(listed_group_df) -#' vectors_df = listed_group_df$select( -#' pl$col(c("Sepal.Width", "Sepal.Length"))$explode() -#' ) -#' print(vectors_df) +#' df$select(pl$col("y")$explode()) Expr_explode = "use_extendr_wrapper" +#' @inherit Expr_explode title return +#' #' @description -#' ( flatten is an alias for explode ) -#' @keywords Expr -#' @aliases flatten -#' @docType NULL -#' @format NULL -#' @format NULL -#' @name Expr_flatten -#' @rdname Expr_explode +#' This is an alias for `$explode()`. +#' +#' @examples +#' df = pl$DataFrame(x = c("abc", "ab"), y = c(list(1:3), list(3:5))) +#' df +#' +#' df$select(pl$col("y")$flatten()) Expr_flatten = "use_extendr_wrapper" -#' Take every n'th element -#' @description -#' Take every nth value in the Series and return as a new Series. -#' @param n positive integerish value +#' Gather every nth element +#' +#' Gather every nth value in the Series and return as a new Series. +#' @param n Positive integer. #' #' @return Expr -#' @keywords Expr -#' @aliases take_every -#' @format NULL #' #' @examples -#' pl$DataFrame(list(a = 0:24))$select(pl$col("a")$gather_every(6)) +#' pl$DataFrame(a = 0:24)$select(pl$col("a")$gather_every(6)) Expr_gather_every = function(n) { unwrap(.pr$Expr$gather_every(self, n)) } - -#' Head -#' @keywords Expr -#' @description -#' Get the head n elements. -#' Similar to R head(x) -#' @param n numeric number of elements to select from head +#' Get the first n elements +#' +#' @param n Number of elements to take. #' @return Expr -#' @aliases head #' @examples -#' # get 3 first elements -#' pl$DataFrame(list(x = 1:11))$select(pl$col("x")$head(3)) +#' pl$DataFrame(x = 1:11)$select(pl$col("x")$head(3)) Expr_head = function(n = 10) { unwrap(.pr$Expr$head(self, n = n), "in $head():") } -#' Tail -#' @keywords Expr -#' @description -#' Get the tail n elements. -#' Similar to R tail(x) -#' @param n numeric number of elements to select from tail +#' Get the last n elements +#' +#' @inheritParams Expr_head #' @return Expr -#' @aliases tail +#' #' @examples -#' # get 3 last elements -#' pl$DataFrame(list(x = 1:11))$select(pl$col("x")$tail(3)) +#' pl$DataFrame(x = 1:11)$select(pl$col("x")$tail(3)) Expr_tail = function(n = 10) { unwrap(.pr$Expr$tail(self, n = n), "in $tail():") } -#' Limit -#' @keywords Expr +#' @inherit Expr_head title params return +#' #' @description -#' Alias for Head -#' Get the head n elements. -#' Similar to R head(x) -#' @param n numeric number of elements to select from head -#' @return Expr +#' This is an alias for `$head()`. +#' #' @examples -#' # get 3 first elements -#' pl$DataFrame(list(x = 1:11))$select(pl$col("x")$limit(3)) +#' pl$DataFrame(x = 1:11)$select(pl$col("x")$limit(3)) Expr_limit = function(n = 10) { if (!is.numeric(n)) stop("limit: n must be numeric") unwrap(.pr$Expr$head(self, n = n)) @@ -2421,28 +2000,17 @@ Expr_limit = function(n = 10) { #' Exponentiation -#' @description Raise expression to the power of exponent. -#' @keywords Expr -#' @param exponent exponent -#' @details The R interpreter will replace the `**` with `^`, such that `**` means `^` (except in -#' strings e.g. "**"). Read further at `?"**"`. In py-polars python `^` is the XOR operator and -#' `**` is the exponentiation operator. +#' +#' Raise expression to the power of exponent. +#' +#' @param exponent Exponent value. #' @return Expr -#' @name Expr_pow -#' @aliases pow #' @examples #' # use via `pow`-method and the `^`-operator -#' pl$DataFrame(a = -1:3)$select( -#' pl$lit(2)$pow(pl$col("a"))$alias("with $pow()"), -#' 2^pl$lit(-2:2), # brief use -#' pl$lit(2)$alias("left hand side name")^pl$lit(-3:1)$alias("right hand side name dropped") +#' pl$DataFrame(a = -1:3, b = 2:6)$with_columns( +#' x = pl$col("a")$pow(2), +#' y = pl$col("a")^3 #' ) -#' -#' # exotic case where '**' will not work, but "^" will -#' safe_chr = \(...) tryCatch(..., error = as.character) -#' get("^")(2, pl$lit(2)) |> safe_chr() -#' get("**")(2, pl$lit(2)) |> safe_chr() -#' get("**")(2, 2) |> safe_chr() Expr_pow = function(exponent) { .pr$Expr$pow(self, exponent) |> unwrap("in $pow()") } @@ -2450,71 +2018,72 @@ Expr_pow = function(exponent) { "^.Expr" = function(e1, e2) result(wrap_e(e1)$pow(e2)) |> unwrap("using '^'-operator") -#' is_in -#' @name Expr_is_in -#' @description combine to boolean expressions with similar to `%in%` -#' @keywords Expr Expr_operators -#' @param other literal or Robj which can become a literal +#' Check whether a value is in a vector +#' +#' Notice that to check whether a factor value is in a vector of strings, you +#' need to use the string cache, either with `pl$enable_string_cache()` or +#' with `pl$with_string_cache()`. See examples. +#' +#' @inheritParams Expr_add #' @return Expr -#' @docType NULL -#' @format NULL -#' @usage Expr_is_in(other) #' @examples +#' pl$DataFrame(a = c(1:4, NA_integer_))$with_columns( +#' in_1_3 = pl$col("a")$is_in(c(1, 3)), +#' in_NA = pl$col("a")$is_in(pl$lit(NA_real_)) +#' ) #' -#' # R Na_integer -> polars Null(Int32) is in polars Null(Int32) -#' pl$DataFrame(list(a = c(1:4, NA_integer_)))$select( -#' pl$col("a")$is_in(pl$lit(NA_real_)) -#' )$to_data_frame()[[1L]] +#' # this fails because we can't compare factors to strings +#' # pl$DataFrame(a = factor(letters[1:5]))$with_columns( +#' # in_abc = pl$col("a")$is_in(c("a", "b", "c")) +#' # ) #' +#' # need to use the string cache for this +#' pl$with_string_cache({ +#' pl$DataFrame(a = factor(letters[1:5]))$with_columns( +#' in_abc = pl$col("a")$is_in(c("a", "b", "c")) +#' ) +#' }) Expr_is_in = function(other) { - .pr$Expr$is_in(self, other) |> unwrap("in $is_in()") + .pr$Expr$is_in(self, other) |> unwrap("in $is_in():") } ## TODO contribute polars, do not panic on by pointing to non positive values -#' Repeat by -#' @keywords Expr -#' @description + +#' Repeat values +#' #' Repeat the elements in this Series as specified in the given expression. #' The repeated elements are expanded into a `List`. -#' @param by Expr Numeric column that determines how often the values will be repeated. -#' The column will be coerced to UInt32. Give this dtype to make the coercion a -#' no-op. +#' @param by Expr that determines how often the values will be repeated. The +#' column will be coerced to UInt32. #' @return Expr #' @examples -#' df = pl$DataFrame(list(a = c("x", "y", "z"), n = c(0:2))) -#' df$select(pl$col("a")$repeat_by("n")) +#' df = pl$DataFrame(a = c("x", "y", "z"), n = c(0:2)) +#' df$with_columns(repeated = pl$col("a")$repeat_by("n")) Expr_repeat_by = function(by) { if (is.numeric(by) && any(by < 0)) stop("In repeat_by: any value less than zero is not allowed") .pr$Expr$repeat_by(self, wrap_e(by, FALSE)) } - - -#' is in between -#' @keywords Expr -#' @description -#' Check if this expression is between start and end. -#' @param start Lower bound as primitive or datetime -#' @param end Lower bound as primitive or datetime -#' @param include_bounds bool vector or scalar: -#' FALSE: Exclude both start and end (default). -#' TRUE: Include both start and end. -#' c(FALSE, FALSE): Exclude start and exclude end. -#' c(TRUE, TRUE): Include start and include end. -#' c(FALSE, TRUE): Exclude start and include end. -#' c(TRUE, FALSE): Include start and exclude end. -#' @details alias the column to 'in_between' -#' This function is equivalent to a combination of < <= >= and the &-and operator. +#' Check whether a value is between two values +#' +#' This is syntactic sugar for `x > start & x < end` (or `x >= start & x <= +#' end`). +#' @param start Lower bound, an Expr that is either numeric or datetime. +#' @param end Upper bound, an Expr that is either numeric or datetime. +#' @param include_bounds If `FALSE` (default), exclude start and end. This can +#' also be a vector of two booleans indicating whether to include the start +#' and/or the end. +#' #' @return Expr #' @examples -#' df = pl$DataFrame(list(num = 1:5)) -#' df$select(pl$col("num")$is_between(2, 4)) -#' df$select(pl$col("num")$is_between(2, 4, TRUE)) -#' df$select(pl$col("num")$is_between(2, 4, c(FALSE, TRUE))) -#' # start end can be a vector/expr with same length as column -#' df$select(pl$col("num")$is_between(c(0, 2, 3, 3, 3), 6)) +#' df = pl$DataFrame(num = 1:5, y = c(0, 2, 3, 3, 3)) +#' df$with_columns( +#' bet_2_4_no_bounds = pl$col("num")$is_between(2, 4), +#' bet_2_4_with_bounds = pl$col("num")$is_between(2, 4, TRUE), +#' bet_2_4_upper_bound = pl$col("num")$is_between(2, 4, c(FALSE, TRUE)), +#' between_y_4 = pl$col("num")$is_between(pl$col("y"), 6) +#' ) Expr_is_between = function(start, end, include_bounds = FALSE) { - # check if ( !length(include_bounds) %in% 1:2 || !is.logical(include_bounds) || @@ -2529,35 +2098,25 @@ Expr_is_between = function(start, end, include_bounds = FALSE) { with_start = include_bounds[1L] with_end = if (length(include_bounds) == 1) include_bounds else include_bounds[2] - # build and return boolean expression within_start_e = if (with_start) self >= start_e else self > start_e within_end_e = if (with_end) self <= end_e else self < end_e (within_start_e & within_end_e)$alias("is_between") } - - -#' hash -#' @keywords Expr -#' @description -#' Hash the elements in the selection. -#' The hash value is of type `UInt64`. -#' @param seed Random seed parameter. Defaults to 0. -#' @param seed_1 Random seed parameter. Defaults to arg seed. -#' @param seed_2 Random seed parameter. Defaults to arg seed. -#' @param seed_3 Random seed parameter. Defaults to arg seed. -#' The column will be coerced to UInt32. Give this dtype to make the coercion a -#' no-op. +#' Hash elements #' -#' @details WARNING in this version of r-polars seed / seed_x takes no effect. -#' Possibly a bug in upstream rust-polars project. +#' The hash value is of type `UInt64`. +#' @param seed Random seed parameter. Defaults to 0. Doesn't have any effect +#' for now. +#' @param seed_1,seed_2,seed_3 Random seed parameter. Defaults to arg seed. +#' The column will be coerced to UInt32. #' #' @return Expr #' @aliases hash #' @examples -#' df = pl$DataFrame(iris) -#' df$select(pl$all()$head(2)$hash(1234)$cast(pl$Utf8))$to_list() +#' df = pl$DataFrame(iris[1:3, c(1, 2)]) +#' df$with_columns(pl$all()$hash(1234)$name$suffix("_hash")) Expr_hash = function(seed = 0, seed_1 = NULL, seed_2 = NULL, seed_3 = NULL) { k0 = seed k1 = seed_1 %||% seed @@ -2566,19 +2125,17 @@ Expr_hash = function(seed = 0, seed_1 = NULL, seed_2 = NULL, seed_3 = NULL) { unwrap(.pr$Expr$hash(self, k0, k1, k2, k3), "in $hash()") } - -#' reinterpret bits -#' @keywords Expr -#' @description -#' Reinterpret the underlying bits as a signed/unsigned integer. -#' This operation is only allowed for 64bit integers. For lower bits integers, -#' you can safely use that cast operation. -#' @param signed bool reinterpret into Int64 else UInt64 +#' Reinterpret bits +#' +#' Reinterpret the underlying bits as a signed/unsigned integer. This +#' operation is only allowed for Int64. For lower bits integers, you can +#' safely use the cast operation. +#' @param signed If `TRUE` (default), reinterpret into Int64. Otherwise, it +#' will be reinterpreted in UInt64. #' @return Expr -#' @aliases reinterpret #' @examples -#' df = pl$DataFrame(iris) -#' df$select(pl$all()$head(2)$hash(1, 2, 3, 4)$reinterpret())$to_data_frame() +#' df = pl$DataFrame(x = 1:5, schema = list(x = pl$Int64)) +#' df$select(pl$all()$reinterpret()) Expr_reinterpret = function(signed = TRUE) { if (!is_bool(signed)) stop("in reinterpret() : arg signed must be a bool") .pr$Expr$reinterpret(self, signed) @@ -2586,24 +2143,23 @@ Expr_reinterpret = function(signed = TRUE) { #' Inspect evaluated Series -#' @keywords Expr -#' @description +#' #' Print the value that this expression evaluates to and pass on the value. #' The printing will happen when the expression evaluates, not when it is formed. -#' @param fmt format string, should contain one set of `{}` where object will be printed -#' This formatting mimics python "string".format() use in pypolars. The string can -#' contain any thing but should have exactly one set of curly bracket `{}`. +#' +#' @param fmt format string, should contain one set of `{}` where object will be +#' printed. This formatting mimics python "string".format() use in py-polars. #' @return Expr -#' @aliases inspect #' @examples #' pl$select(pl$lit(1:5)$inspect( -#' "before dropping half the column it was:{}and not it is dropped" +#' "Here's what the Series looked like before keeping the first two values: {}" #' )$head(2)) Expr_inspect = function(fmt = "{}") { # check fmt and create something to print before and after printing Series. if (!is_string(fmt)) stop("Inspect: arg fmt is not a string (length=1)") strs = strsplit(fmt, split = "\\{\\}")[[1L]] if (identical(strs, "")) strs <- c("", "") + if (length(strs) == 1 && grepl("\\{\\}$", fmt)) strs <- c(strs, "") if (length(strs) != 2L || length(gregexpr("\\{\\}", fmt)[[1L]]) != 1L) { result(stop(paste0( "Inspect: failed to parse arg fmt [", fmt, "] ", @@ -2625,25 +2181,28 @@ Expr_inspect = function(fmt = "{}") { unwrap("in $inspect()") } - - -#' Interpolate `Nulls` -#' @keywords Expr -#' @param method string 'linear' or 'nearest', default "linear" -#' @description -#' Fill nulls with linear interpolation over missing values. -#' Can also be used to regrid data to a new grid - see examples below. +#' Interpolate null values +#' +#' Fill nulls with linear interpolation using non-missing values. Can also be +#' used to regrid data to a new grid - see examples below. +#' +#' @param method String, either `"linear"` (default) or `"nearest"`. #' @return Expr -#' @aliases interpolate #' @examples -#' pl$select(pl$lit(c(1, NA, 4, NA, 100, NaN, 150))$interpolate()) +#' pl$DataFrame(x = c(1, NA, 4, NA, 100, NaN, 150))$ +#' with_columns( +#' interp_lin = pl$col("x")$interpolate(), +#' interp_near = pl$col("x")$interpolate("nearest") +#' ) #' #' # x, y interpolation over a grid -#' df_original_grid = pl$DataFrame(list( +#' df_original_grid = pl$DataFrame( #' grid_points = c(1, 3, 10), #' values = c(2.0, 6.0, 20.0) -#' )) -#' df_new_grid = pl$DataFrame(list(grid_points = (1:10) * 1.0)) +#' ) +#' df_original_grid +#' df_new_grid = pl$DataFrame(grid_points = (1:10) * 1.0) +#' df_new_grid #' #' # Interpolate from this to the new grid #' df_new_grid$join( @@ -2674,13 +2233,11 @@ prepare_rolling_window_args = function( ## and check if it wont mess up optimzation (maybe it is tested for). -#' Rolling Min -#' @keywords Expr -#' @description -#' Apply a rolling min (moving min) over the values in this array. -#' A window of length `window_size` will traverse the array. The values that fill -#' this window will (optionally) be multiplied with the weights given by the -#' `weight` vector. The resulting values will be aggregated to their sum. +#' Rolling minimum +#' +#' Compute the rolling (= moving) min over the values in this array. A window of +#' length `window_size` will traverse the array. The values that fill this window +#' will (optionally) be multiplied with the weights given by the `weight` vector. #' #' @param window_size #' The length of the window. Can be a fixed integer size, or a dynamic temporal @@ -2698,33 +2255,25 @@ prepare_rolling_window_args = function( #' - 1i (1 index count) #' If the dynamic string language is used, the `by` and `closed` arguments must #' also be set. -#' @param weights -#' An optional slice with the same length as the window that will be multiplied -#' elementwise with the values in the window. -#' @param min_periods -#' The number of values in the window that should be non-null before computing -#' a result. If None, it will be set equal to window size. -#' @param center -#' Set the labels at the center of the window -#' @param by -#' If the `window_size` is temporal for instance `"5h"` or `"3s"`, you must -#' set the column that will be used to determine the windows. This column must -#' be of DataType: Date or DateTime. -#' @param closed string option `c("left", "right", "both", "none")`. -#' Define whether the temporal window interval is closed or not. -#' +#' @param weights An optional slice with the same length as the window that will +#' be multiplied elementwise with the values in the window. +#' @param min_periods The number of values in the window that should be non-null +#' before computing a result. If `NULL`, it will be set equal to window size. +#' @param center Set the labels at the center of the window +#' @param by If the `window_size` is temporal for instance `"5h"` or `"3s"`, you +#' must set the column that will be used to determine the windows. This column +#' must be of DataType Date or DateTime. +#' @param closed String, one of `"left"`, `"right"`, `"both"`, `"none"`. Defines +#' whether the temporal window interval is closed or not. #' #' @details -#' This functionality is experimental and may change without it being considered a -#' breaking change. -#' Notes: #' If you want to compute multiple aggregation statistics over the same dynamic -#' window, consider using `groupby_rolling` this method can cache the window size +#' window, consider using `$rolling()` this method can cache the window size #' computation. #' @return Expr -#' @aliases Expr_rolling_min #' @examples -#' pl$DataFrame(list(a = 1:6))$select(pl$col("a")$rolling_min(window_size = 2)) +#' pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$ +#' with_columns(roll_min = pl$col("a")$rolling_min(window_size = 2)) Expr_rolling_min = function( window_size, weights = NULL, @@ -2740,57 +2289,16 @@ Expr_rolling_min = function( unwrap("in $rolling_min():") } -#' Rolling max -#' @keywords Expr -#' @description -#' Apply a rolling max (moving max) over the values in this array. -#' A window of length `window_size` will traverse the array. The values that fill -#' this window will (optionally) be multiplied with the weights given by the -#' `weight` vector. The resulting values will be aggregated to their sum. -#' -#' @param window_size -#' The length of the window. Can be a fixed integer size, or a dynamic temporal -#' size indicated by the following string language: -#' - 1ns (1 nanosecond) -#' - 1us (1 microsecond) -#' - 1ms (1 millisecond) -#' - 1s (1 second) -#' - 1m (1 minute) -#' - 1h (1 hour) -#' - 1d (1 day) -#' - 1w (1 week) -#' - 1mo (1 calendar month) -#' - 1y (1 calendar year) -#' - 1i (1 index count) -#' If the dynamic string language is used, the `by` and `closed` arguments must -#' also be set. -#' @param weights -#' An optional slice with the same length as the window that will be multiplied -#' elementwise with the values in the window. -#' @param min_periods -#' The number of values in the window that should be non-null before computing -#' a result. If None, it will be set equal to window size. -#' @param center -#' Set the labels at the center of the window -#' @param by -#' If the `window_size` is temporal for instance `"5h"` or `"3s"`, you must -#' set the column that will be used to determine the windows. This column must -#' be of DataType: Date or DateTime. -#' @param closed string option `c("left", "right", "both", "none")`. -#' Define whether the temporal window interval is closed or not. +#' Rolling maximum #' +#' Compute the rolling (= moving) max over the values in this array. A window of +#' length `window_size` will traverse the array. The values that fill this window +#' will (optionally) be multiplied with the weights given by the `weight` vector. #' -#' @details -#' This functionality is experimental and may change without it being considered a -#' breaking change. -#' Notes: -#' If you want to compute multiple aggregation statistics over the same dynamic -#' window, consider using `groupby_rolling` this method can cache the window size -#' computation. -#' @return Expr -#' @aliases Expr_rolling_max +#' @inherit Expr_rolling_min params details return #' @examples -#' pl$DataFrame(list(a = 1:6))$select(pl$col("a")$rolling_max(window_size = 2)) +#' pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$ +#' with_columns(roll_max = pl$col("a")$rolling_max(window_size = 2)) Expr_rolling_max = function( window_size, weights = NULL, @@ -2807,55 +2315,15 @@ Expr_rolling_max = function( } #' Rolling mean -#' @keywords Expr -#' @description -#' Apply a rolling mean (moving mean) over the values in this array. -#' A window of length `window_size` will traverse the array. The values that fill -#' this window will (optionally) be multiplied with the weights given by the -#' `weight` vector. The resulting values will be aggregated to their sum. #' -#' @param window_size -#' The length of the window. Can be a fixed integer size, or a dynamic temporal -#' size indicated by the following string language: -#' - 1ns (1 nanosecond) -#' - 1us (1 microsecond) -#' - 1ms (1 millisecond) -#' - 1s (1 second) -#' - 1m (1 minute) -#' - 1h (1 hour) -#' - 1d (1 day) -#' - 1w (1 week) -#' - 1mo (1 calendar month) -#' - 1y (1 calendar year) -#' - 1i (1 index count) -#' If the dynamic string language is used, the `by` and `closed` arguments must -#' also be set. -#' @param weights -#' An optional slice with the same length as the window that will be multiplied -#' elementwise with the values in the window. -#' @param min_periods -#' The number of values in the window that should be non-null before computing -#' a result. If None, it will be set equal to window size. -#' @param center -#' Set the labels at the center of the window -#' @param by -#' If the `window_size` is temporal for instance `"5h"` or `"3s"`, you must -#' set the column that will be used to determine the windows. This column must -#' be of DataType: Date or DateTime. -#' @param closed string option `c("left", "right", "both", "none")`. -#' Define whether the temporal window interval is closed or not. +#' Compute the rolling (= moving) mean over the values in this array. A window of +#' length `window_size` will traverse the array. The values that fill this window +#' will (optionally) be multiplied with the weights given by the `weight` vector. #' -#' @details -#' This functionality is experimental and may change without it being considered a -#' breaking change. -#' Notes: -#' If you want to compute multiple aggregation statistics over the same dynamic -#' window, consider using `groupby_rolling` this method can cache the window size -#' computation. -#' @return Expr -#' @aliases Expr_rolling_mean +#' @inherit Expr_rolling_min params details return #' @examples -#' pl$DataFrame(list(a = 1:6))$select(pl$col("a")$rolling_mean(window_size = 2)) +#' pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$ +#' with_columns(roll_mean = pl$col("a")$rolling_mean(window_size = 2)) Expr_rolling_mean = function( window_size, weights = NULL, @@ -2871,58 +2339,16 @@ Expr_rolling_mean = function( unwrap("in $rolling_mean():") } - - #' Rolling sum -#' @keywords Expr -#' @description -#' Apply a rolling sum (moving sum) over the values in this array. -#' A window of length `window_size` will traverse the array. The values that fill -#' this window will (optionally) be multiplied with the weights given by the -#' `weight` vector. The resulting values will be aggregated to their sum. #' -#' @param window_size -#' The length of the window. Can be a fixed integer size, or a dynamic temporal -#' size indicated by the following string language: -#' - 1ns (1 nanosecond) -#' - 1us (1 microsecond) -#' - 1ms (1 millisecond) -#' - 1s (1 second) -#' - 1m (1 minute) -#' - 1h (1 hour) -#' - 1d (1 day) -#' - 1w (1 week) -#' - 1mo (1 calendar month) -#' - 1y (1 calendar year) -#' - 1i (1 index count) -#' If the dynamic string language is used, the `by` and `closed` arguments must -#' also be set. -#' @param weights -#' An optional slice with the same length as the window that will be multiplied -#' elementwise with the values in the window. -#' @param min_periods -#' The number of values in the window that should be non-null before computing -#' a result. If None, it will be set equal to window size. -#' @param center -#' Set the labels at the center of the window -#' @param by -#' If the `window_size` is temporal for instance `"5h"` or `"3s"`, you must -#' set the column that will be used to determine the windows. This column must -#' be of DataType: Date or DateTime. -#' @param closed string option `c("left", "right", "both", "none")`. -#' Define whether the temporal window interval is closed or not. +#' Compute the rolling (= moving) sum over the values in this array. A window of +#' length `window_size` will traverse the array. The values that fill this window +#' will (optionally) be multiplied with the weights given by the `weight` vector. #' -#' @details -#' This functionality is experimental and may change without it being considered a -#' breaking change. -#' Notes: -#' If you want to compute multiple aggregation statistics over the same dynamic -#' window, consider using `groupby_rolling` this method can cache the window size -#' computation. -#' @return Expr -#' @aliases Expr_rolling_sum +#' @inherit Expr_rolling_min params details return #' @examples -#' pl$DataFrame(list(a = 1:6))$select(pl$col("a")$rolling_sum(window_size = 2)) +#' pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$ +#' with_columns(roll_sum = pl$col("a")$rolling_sum(window_size = 2)) Expr_rolling_sum = function( window_size, weights = NULL, @@ -2939,57 +2365,17 @@ Expr_rolling_sum = function( } -#' Rolling std -#' @keywords Expr -#' @description -#' Apply a rolling std (moving std) over the values in this array. -#' A window of length `window_size` will traverse the array. The values that fill -#' this window will (optionally) be multiplied with the weights given by the -#' `weight` vector. The resulting values will be aggregated to their sum. -#' -#' @param window_size -#' The length of the window. Can be a fixed integer size, or a dynamic temporal -#' size indicated by the following string language: -#' - 1ns (1 nanosecond) -#' - 1us (1 microsecond) -#' - 1ms (1 millisecond) -#' - 1s (1 second) -#' - 1m (1 minute) -#' - 1h (1 hour) -#' - 1d (1 day) -#' - 1w (1 week) -#' - 1mo (1 calendar month) -#' - 1y (1 calendar year) -#' - 1i (1 index count) -#' If the dynamic string language is used, the `by` and `closed` arguments must -#' also be set. -#' @param weights -#' An optional slice with the same length as the window that will be multiplied -#' elementwise with the values in the window. -#' @param min_periods -#' The number of values in the window that should be non-null before computing -#' a result. If None, it will be set equal to window size. -#' @param center -#' Set the labels at the center of the window -#' @param by -#' If the `window_size` is temporal for instance `"5h"` or `"3s"`, you must -#' set the column that will be used to determine the windows. This column must -#' be of DataType: Date or DateTime. -#' @param closed string option `c("left", "right", "both", "none")`. -#' Define whether the temporal window interval is closed or not. +#' Rolling standard deviation #' +#' Compute the rolling (= moving) standard deviation over the values in this +#' array. A window of length `window_size` will traverse the array. The values +#' that fill this window will (optionally) be multiplied with the weights given +#' by the `weight` vector. #' -#' @details -#' This functionality is experimental and may change without it being considered a -#' breaking change. -#' Notes: -#' If you want to compute multiple aggregation statistics over the same dynamic -#' window, consider using `groupby_rolling` this method can cache the window size -#' computation. -#' @return Expr -#' @aliases Expr_rolling_std +#' @inherit Expr_rolling_min params details return #' @examples -#' pl$DataFrame(list(a = 1:6))$select(pl$col("a")$rolling_std(window_size = 2)) +#' pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$ +#' with_columns(roll_std = pl$col("a")$rolling_std(window_size = 2)) Expr_rolling_std = function( window_size, weights = NULL, @@ -3005,57 +2391,17 @@ Expr_rolling_std = function( unwrap("in $rolling_std(): ") } -#' Rolling var -#' @keywords Expr -#' @description -#' Apply a rolling var (moving var) over the values in this array. -#' A window of length `window_size` will traverse the array. The values that fill -#' this window will (optionally) be multiplied with the weights given by the -#' `weight` vector. The resulting values will be aggregated to their sum. -#' -#' @param window_size -#' The length of the window. Can be a fixed integer size, or a dynamic temporal -#' size indicated by the following string language: -#' - 1ns (1 nanosecond) -#' - 1us (1 microsecond) -#' - 1ms (1 millisecond) -#' - 1s (1 second) -#' - 1m (1 minute) -#' - 1h (1 hour) -#' - 1d (1 day) -#' - 1w (1 week) -#' - 1mo (1 calendar month) -#' - 1y (1 calendar year) -#' - 1i (1 index count) -#' If the dynamic string language is used, the `by` and `closed` arguments must -#' also be set. -#' @param weights -#' An optional slice with the same length as the window that will be multiplied -#' elementwise with the values in the window. -#' @param min_periods -#' The number of values in the window that should be non-null before computing -#' a result. If None, it will be set equal to window size. -#' @param center -#' Set the labels at the center of the window -#' @param by -#' If the `window_size` is temporal for instance `"5h"` or `"3s"`, you must -#' set the column that will be used to determine the windows. This column must -#' be of DataType: Date or DateTime. -#' @param closed string option `c("left", "right", "both", "none")`. -#' Define whether the temporal window interval is closed or not. +#' Rolling variance #' +#' Compute the rolling (= moving) variance over the values in this array. A +#' window of length `window_size` will traverse the array. The values that fill +#' this window will (optionally) be multiplied with the weights given by the +#' `weight` vector. #' -#' @details -#' This functionality is experimental and may change without it being considered a -#' breaking change. -#' Notes: -#' If you want to compute multiple aggregation statistics over the same dynamic -#' window, consider using `groupby_rolling` this method can cache the window size -#' computation. -#' @return Expr -#' @aliases Expr_rolling_var +#' @inherit Expr_rolling_min params details return #' @examples -#' pl$DataFrame(list(a = 1:6))$select(pl$col("a")$rolling_var(window_size = 2)) +#' pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$ +#' with_columns(roll_var = pl$col("a")$rolling_var(window_size = 2)) Expr_rolling_var = function( window_size, weights = NULL, @@ -3072,56 +2418,16 @@ Expr_rolling_var = function( } #' Rolling median -#' @keywords Expr -#' @description -#' Apply a rolling median (moving median) over the values in this array. -#' A window of length `window_size` will traverse the array. The values that fill -#' this window will (optionally) be multiplied with the weights given by the -#' `weight` vector. The resulting values will be aggregated to their sum. -#' -#' @param window_size -#' The length of the window. Can be a fixed integer size, or a dynamic temporal -#' size indicated by the following string language: -#' - 1ns (1 nanosecond) -#' - 1us (1 microsecond) -#' - 1ms (1 millisecond) -#' - 1s (1 second) -#' - 1m (1 minute) -#' - 1h (1 hour) -#' - 1d (1 day) -#' - 1w (1 week) -#' - 1mo (1 calendar month) -#' - 1y (1 calendar year) -#' - 1i (1 index count) -#' If the dynamic string language is used, the `by` and `closed` arguments must -#' also be set. -#' @param weights -#' An optional slice with the same length as the window that will be multiplied -#' elementwise with the values in the window. -#' @param min_periods -#' The number of values in the window that should be non-null before computing -#' a result. If None, it will be set equal to window size. -#' @param center -#' Set the labels at the center of the window -#' @param by -#' If the `window_size` is temporal for instance `"5h"` or `"3s"`, you must -#' set the column that will be used to determine the windows. This column must -#' be of DataType: Date or DateTime. -#' @param closed string option `c("left", "right", "both", "none")`. -#' Define whether the temporal window interval is closed or not. #' +#' Compute the rolling (= moving) median over the values in this array. A window +#' of length `window_size` will traverse the array. The values that fill this +#' window will (optionally) be multiplied with the weights given by the `weight` +#' vector. #' -#' @details -#' This functionality is experimental and may change without it being considered a -#' breaking change. -#' Notes: -#' If you want to compute multiple aggregation statistics over the same dynamic -#' window, consider using `groupby_rolling` this method can cache the window size -#' computation. -#' @return Expr -#' @aliases Expr_rolling_median +#' @inherit Expr_rolling_min params details return #' @examples -#' pl$DataFrame(list(a = 1:6))$select(pl$col("a")$rolling_median(window_size = 2)) +#' pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$ +#' with_columns(roll_median = pl$col("a")$rolling_median(window_size = 2)) Expr_rolling_median = function( window_size, weights = NULL, @@ -3138,64 +2444,21 @@ Expr_rolling_median = function( ## TODO contribute polars arg center only allows center + right alignment, also implement left + #' Rolling quantile -#' @keywords Expr -#' @description -#' Apply a rolling quantile (moving quantile) over the values in this array. -#' A window of length `window_size` will traverse the array. The values that fill -#' this window will (optionally) be multiplied with the weights given by the -#' `weight` vector. The resulting values will be aggregated to their sum. -#' -#' @param quantile Quantile between 0.0 and 1.0. -#' @param interpolation choice c('nearest', 'higher', 'lower', 'midpoint', 'linear') -#' -#' @param window_size -#' The length of the window. Can be a fixed integer size, or a dynamic temporal -#' size indicated by the following string language: -#' - 1ns (1 nanosecond) -#' - 1us (1 microsecond) -#' - 1ms (1 millisecond) -#' - 1s (1 second) -#' - 1m (1 minute) -#' - 1h (1 hour) -#' - 1d (1 day) -#' - 1w (1 week) -#' - 1mo (1 calendar month) -#' - 1y (1 calendar year) -#' - 1i (1 index count) -#' If the dynamic string language is used, the `by` and `closed` arguments must -#' also be set. -#' @param weights -#' An optional slice with the same length as the window that will be multiplied -#' elementwise with the values in the window. -#' @param min_periods -#' The number of values in the window that should be non-null before computing -#' a result. If None, it will be set equal to window size. -#' @param center -#' Set the labels at the center of the window -#' @param by -#' If the `window_size` is temporal for instance `"5h"` or `"3s"`, you must -#' set the column that will be used to determine the windows. This column must -#' be of DataType: Date or DateTime. -#' @param closed string option `c("left", "right", "both", "none")`. -#' Define whether the temporal window interval is closed or not. -#' -#' -#' @details #' +#' Compute the rolling (= moving) quantile over the values in this array. A +#' window of length `window_size` will traverse the array. The values that fill +#' this window will (optionally) be multiplied with the weights given by the +#' `weight` vector. #' -#' This functionality is experimental and may change without it being considered a -#' breaking change. -#' Notes: -#' If you want to compute multiple aggregation statistics over the same dynamic -#' window, consider using `groupby_rolling` this method can cache the window size -#' computation. -#' @return Expr -#' @aliases Expr_rolling_quantile +#' @inherit Expr_rolling_min params details return +#' @param quantile Quantile between 0 and 1. +#' @param interpolation String, one of `"nearest"`, `"higher"`, `"lower"`, +#' `"midpoint"`, `"linear"`. #' @examples -#' pl$DataFrame(list(a = 1:6))$select( -#' pl$col("a")$rolling_quantile(window_size = 2, quantile = .5) -#' ) +#' pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$ +#' with_columns(roll_quant = pl$col("a")$rolling_quantile(0.3, window_size = 2)) Expr_rolling_quantile = function( quantile, interpolation = "nearest", @@ -3216,163 +2479,106 @@ Expr_rolling_quantile = function( #' Rolling skew #' -#' @param window_size integerish, Size of the rolling window -#' @param bias bool default = TRUE, If False, then the calculations are corrected for statistical bias. +#' Compute the rolling (= moving) skewness over the values in this array. A +#' window of length `window_size` will traverse the array. #' -#' @keywords Expr -#' @description -#' Compute a rolling skew. -#' @return Expr -#' @aliases rolling_skew +#' @inherit Expr_rolling_min params return +#' @param bias If `FALSE`, the calculations are corrected for statistical bias. + #' @details -#' Extra comments copied from rust-polars_0.25.1 -#' Compute the sample skewness of a data set. -#' #' For normally distributed data, the skewness should be about zero. For #' uni-modal continuous distributions, a skewness value greater than zero means -#' that there is more weight in the right tail of the distribution. The -#' function `skewtest` can be used to determine if the skewness value -#' is close enough to zero, statistically speaking. -#' see: https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/stats/stats.py#L1024 +#' that there is more weight in the right tail of the distribution. #' #' @examples -#' pl$DataFrame(list(a = iris$Sepal.Length))$select(pl$col("a")$rolling_skew(window_size = 4)$head(10)) +#' pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$ +#' with_columns(roll_skew = pl$col("a")$rolling_skew(window_size = 2)) Expr_rolling_skew = function(window_size, bias = TRUE) { unwrap(.pr$Expr$rolling_skew(self, window_size, bias)) } - -#' Abs -#' @description Compute absolute values -#' @keywords Expr -#' @return Exprs abs +#' Compute the absolute values +#' +#' @return Expr #' @docType NULL #' @format NULL #' @examples -#' pl$DataFrame(list(a = -1:1))$select(pl$col("a"), pl$col("a")$abs()$alias("abs")) +#' pl$DataFrame(a = -1:1)$ +#' with_columns(abs = pl$col("a")$abs()) Expr_abs = "use_extendr_wrapper" - -#' Arg Sort -#' @description argsort is a alias for arg_sort -#' @rdname Expr_arg_sort -#' @aliases argsort -#' @keywords Expr -Expr_argsort = Expr_arg_sort - - - -#' Rank -#' @description Assign ranks to data, dealing with ties appropriately. -#' @param method string option 'average', 'min', 'max', 'dense', 'ordinal', 'random' -#' -#' #' The method used to assign ranks to tied elements. -#' The following methods are available (default is 'average'): -#' - 'average' : The average of the ranks that would have been assigned to -#' all the tied values is assigned to each value. -#' - 'min' : The minimum of the ranks that would have been assigned to all -#' the tied values is assigned to each value. (This is also referred to -#' as "competition" ranking.) -#' - 'max' : The maximum of the ranks that would have been assigned to all -#' the tied values is assigned to each value. -#' - 'dense' : Like 'min', but the rank of the next highest element is -#' assigned the rank immediately after those assigned to the tied -#' elements. -#' - 'ordinal' : All values are given a distinct rank, corresponding to -#' the order that the values occur in the Series. -#' - 'random' : Like 'ordinal', but the rank for ties is not dependent -#' on the order that the values occur in the Series. -#' +#' Rank elements +#' +#' Assign ranks to data, dealing with ties appropriately. +#' +#' @param method String, one of `"average"` (default), `"min"`, `"max"`, +#' `"dense"`, `"ordinal"`, `"random"`. The method used to assign ranks to tied +#' elements: +#' - `"average"`: The average of the ranks that would have been assigned to +#' all the tied values is assigned to each value. +#' - `"min"`: The minimum of the ranks that would have been assigned to all +#' the tied values is assigned to each value. (This is also referred to +#' as "competition" ranking.) +#' - `"max"` : The maximum of the ranks that would have been assigned to all +#' the tied values is assigned to each value. +#' - `"dense"`: Like 'min', but the rank of the next highest element is assigned +#' the rank immediately after those assigned to the tied elements. +#' - `"ordinal"` : All values are given a distinct rank, corresponding to the +#' order that the values occur in the Series. +#' - `"random"` : Like 'ordinal', but the rank for ties is not dependent on the +#' order that the values occur in the Series. #' @param descending Rank in descending order. #' @return Expr -#' @aliases rank -#' @keywords Expr #' @examples #' # The 'average' method: -#' df = pl$DataFrame(list(a = c(3, 6, 1, 1, 6))) -#' df$select(pl$col("a")$rank()) +#' pl$DataFrame(a = c(3, 6, 1, 1, 6))$ +#' with_columns(rank = pl$col("a")$rank()) #' #' # The 'ordinal' method: -#' df = pl$DataFrame(list(a = c(3, 6, 1, 1, 6))) -#' df$select(pl$col("a")$rank("ordinal")) +#' pl$DataFrame(a = c(3, 6, 1, 1, 6))$ +#' with_columns(rank = pl$col("a")$rank("ordinal")) Expr_rank = function(method = "average", descending = FALSE) { unwrap(.pr$Expr$rank(self, method, descending)) } - - -#' Diff -#' @description Calculate the n-th discrete difference. -#' @param n Integerish Number of slots to shift. -#' @param null_behavior option default 'ignore', else 'drop' -#' @return Expr -#' @aliases diff -#' @keywords Expr +#' Difference +#' +#' Calculate the n-th discrete difference. +#' +#' @param n Number of slots to shift. +#' @param null_behavior String, either `"ignore"` (default), else `"drop"`. +#' @return Expr #' @examples -#' pl$DataFrame(list(a = c(20L, 10L, 30L, 40L)))$select( -#' pl$col("a")$diff()$alias("diff_default"), -#' pl$col("a")$diff(2, "ignore")$alias("diff_2_ignore") +#' pl$DataFrame(a = c(20L, 10L, 30L, 40L))$with_columns( +#' diff_default = pl$col("a")$diff(), +#' diff_2_ignore = pl$col("a")$diff(2, "ignore") #' ) Expr_diff = function(n = 1, null_behavior = "ignore") { unwrap(.pr$Expr$diff(self, n, null_behavior)) } - - - -#' Pct change -#' @description -#' Computes percentage change between values. -#' Percentage change (as fraction) between current element and most-recent -#' non-null element at least ``n`` period(s) before the current element. +#' Percentage change +#' +#' Computes percentage change (as fraction) between current element and most- +#' recent non-null element at least `n` period(s) before the current element. #' Computes the change from the previous row by default. -#' @param n periods to shift for forming percent change. -#' @return Expr -#' @aliases pct_change -#' @keywords Expr +#' +#' @param n Periods to shift for computing percent change. +#' @return Expr #' @examples -#' df = pl$DataFrame(list(a = c(10L, 11L, 12L, NA_integer_, 12L))) -#' df$with_columns(pl$col("a")$pct_change()$alias("pct_change")) +#' pl$DataFrame(a = c(10L, 11L, 12L, NA_integer_, 12L))$ +#' with_columns(pct_change = pl$col("a")$pct_change()) Expr_pct_change = function(n = 1) { unwrap(.pr$Expr$pct_change(self, n)) } - - #' Skewness -#' @description -#' Compute the sample skewness of a data set. -#' @param bias If False, then the calculations are corrected for statistical bias. -#' @return Expr -#' @aliases skew -#' @keywords Expr -#' @details -#' For normally distributed data, the skewness should be about zero. For -#' unimodal continuous distributions, a skewness value greater than zero means -#' that there is more weight in the right tail of the distribution. The -#' function `skewtest` can be used to determine if the skewness value -#' is close enough to zero, statistically speaking. #' -#' See scipy.stats for more information. -#' -#' Notes -#' ----- -#' The sample skewness is computed as the Fisher-Pearson coefficient -#' of skewness, i.e. -#' -#' \eqn{ g_1=\frac{m_3}{m_2^{3/2}}} -#' -#' where -#' -#' \eqn{ m_i=\frac{1}{N}\sum_{n=1}^N(x[n]-\bar{x})^i} -#' -#' is the biased sample :math:`i\texttt{th}` central moment, and \eqn{\bar{x}} is -#' the sample mean. If ``bias`` is False, the calculations are -#' corrected for bias and the value computed is the adjusted -#' Fisher-Pearson standardized moment coefficient, i.e. -#' -#' \eqn{ G_1 = \frac{k_3}{k_2^{3/2}} = \frac{\sqrt{N(N-1)}}{N-2}\frac{m_3}{m_2^{3/2}}} -#' @references https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.skew.html?highlight=skew#scipy.stats.skew +#' Compute the sample skewness of a data set. +#' @param bias If `FALSE`, then the calculations are corrected for statistical +#' bias. +#' @return Expr +#' @inherit Expr_rolling_skew details #' @examples #' df = pl$DataFrame(list(a = c(1:3, 2:1))) #' df$select(pl$col("a")$skew()) @@ -3380,309 +2586,228 @@ Expr_skew = function(bias = TRUE) { .pr$Expr$skew(self, bias) } - #' Kurtosis -#' @description +#' #' Compute the kurtosis (Fisher or Pearson) of a dataset. #' -#' @param fisher bool se details -#' @param bias bool, If FALSE, then the calculations are corrected for statistical bias. +#' @param fisher If `TRUE` (default), Fisher’s definition is used (normal, +#' centered at 0). Otherwise, Pearson’s definition is used (normal, centered at +#' 3). +#' @inheritParams Expr_rolling_skew #' -#' @return Expr -#' @aliases kurtosis -#' @keywords Expr +#' @return Expr #' @details -#' Kurtosis is the fourth central moment divided by the square of the -#' variance. If Fisher's definition is used, then 3.0 is subtracted from -#' the result to give 0.0 for a normal distribution. -#' If bias is False then the kurtosis is calculated using k statistics to -#' eliminate bias coming from biased moment estimators -#' See scipy.stats for more information -#' -#' #' See scipy.stats for more information. +#' Kurtosis is the fourth central moment divided by the square of the variance. +#' If Fisher's definition is used, then 3 is subtracted from the result to +#' give 0 for a normal distribution. #' -#' @references https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kurtosis.html?highlight=kurtosis +#' If bias is `FALSE`, then the kurtosis is calculated using `k` statistics to +#' eliminate bias coming from biased moment estimators. #' #' @examples -#' df = pl$DataFrame(list(a = c(1:3, 2:1))) -#' df$select(pl$col("a")$kurtosis()) +#' pl$DataFrame(a = c(1:3, 2:1))$ +#' with_columns(kurt = pl$col("a")$kurtosis()) Expr_kurtosis = function(fisher = TRUE, bias = TRUE) { .pr$Expr$kurtosis(self, fisher, bias) } - - -#' Clip -#' @description -#' Clip (limit) the values in an array to a `min` and `max` boundary. -#' @param min Minimum Value, ints and floats or any literal expression of ints and floats -#' @param max Maximum Value, ints and floats or any literal expression of ints and floats -#' @return Expr -#' @aliases clip -#' @keywords Expr -#' @details -#' Only works for numerical types. -#' If you want to clip other dtypes, consider writing a "when, then, otherwise" -#' expression. See :func:`when` for more information. +#' Clip elements +#' +#' Clip (limit) the values in an array to a `min` and `max` boundary. This only +#' works for numerical types. +#' @param min Minimum value, Expr returning a numeric. +#' @param max Maximum value, Expr returning a numeric. +#' @return Expr #' #' @examples -#' df = pl$DataFrame(foo = c(-50L, 5L, NA_integer_, 50L)) -#' df$with_columns(pl$col("foo")$clip(1L, 10L)$alias("foo_clipped")) +#' pl$DataFrame(foo = c(-50L, 5L, NA_integer_, 50L))$ +#' with_columns(clipped = pl$col("foo")$clip(1, 10)) Expr_clip = function(min, max) { unwrap(.pr$Expr$clip(self, wrap_e(min), wrap_e(max))) } -#' Clip min -#' @rdname Expr_clip -#' @aliases clip_min -#' @keywords Expr +#' Clip elements below minimum value +#' +#' Replace all values below a minimum value by this minimum value. +#' @inheritParams Expr_clip +#' #' @examples -#' df$with_columns(pl$col("foo")$clip_min(1L)$alias("foo_clipped")) +#' pl$DataFrame(foo = c(-50L, 5L, NA_integer_, 50L))$ +#' with_columns(clipped = pl$col("foo")$clip_min(1)) Expr_clip_min = function(min) { unwrap(.pr$Expr$clip_min(self, wrap_e(min))) } -#' Clip max -#' @rdname Expr_clip -#' @aliases clip_max -#' @keywords Expr +#' Clip elements above maximum value +#' +#' Replace all values above a maximum value by this maximum value. +#' @inheritParams Expr_clip +#' #' @examples -#' df$with_columns(pl$col("foo")$clip_max(10L)$alias("foo_clipped")) +#' pl$DataFrame(foo = c(-50L, 5L, NA_integer_, 50L))$ +#' with_columns(clipped = pl$col("foo")$clip_max(10)) Expr_clip_max = function(max) { unwrap(.pr$Expr$clip_max(self, wrap_e(max))) } - -#' Upper bound -#' @name Expr_upper_lower_bound -#' @description -#' Calculate the upper/lower bound. -#' Returns a unit Series with the highest value possible for the dtype of this -#' expression. -#' @details -#' Notice lower bound i32 exported to R is NA_integer_ for now -#' @return Expr +#' Find the upper bound of a DataType +#' +#' @return Expr #' @docType NULL #' @format NULL -#' @aliases upper_bound -#' @format NULL -#' @keywords Expr #' @examples -#' pl$DataFrame(i32 = 1L, f64 = 1)$select(pl$all()$upper_bound()) +#' pl$DataFrame(x = c(1, 2, 3), y = -2:0, +#' schema = list(x = pl$Float64, y = pl$Int32))$ +#' select(pl$all()$upper_bound()) Expr_upper_bound = "use_extendr_wrapper" - -#' Lower bound -#' @rdname Expr_upper_lower_bound -#' @aliases lower_bound -#' @format NULL -#' @keywords Expr +#' Find the lower bound of a DataType +#' +#' @return Expr #' @docType NULL #' @format NULL #' @examples -#' pl$DataFrame(i32 = 1L, f64 = 1)$select(pl$all()$lower_bound()) +#' pl$DataFrame(x = 1:3, y = 1:3, +#' schema = list(x = pl$UInt32, y = pl$Int32))$ +#' select(pl$all()$lower_bound()) Expr_lower_bound = "use_extendr_wrapper" - - -#' Sign -#' @description -#' Compute the element-wise indication of the sign. -#' @return Expr +#' Get the sign of elements +#' +#' @return Expr #' @docType NULL #' @format NULL -#' @aliases sign -#' @format NULL -#' @keywords Expr #' @examples -#' pl$DataFrame(a = c(.9, -0, 0, 4, NA_real_))$select(pl$col("a")$sign()) +#' pl$DataFrame(a = c(.9, -3, -0, 0, 4, NA_real_))$ +#' with_columns(sign = pl$col("a")$sign()) Expr_sign = "use_extendr_wrapper" - -#' Sin -#' @description -#' Compute the element-wise value for the sine. -#' @details Evaluated Series has dtype Float64 +#' Compute sine +#' #' @return Expr #' @docType NULL #' @format NULL -#' @aliases sin -#' @format NULL -#' @keywords Expr #' @examples -#' pl$DataFrame(a = c(0, pi / 2, pi, NA_real_))$select(pl$col("a")$sin()) +#' pl$DataFrame(a = c(0, pi / 2, pi, NA_real_))$ +#' with_columns(sine = pl$col("a")$sin()) Expr_sin = "use_extendr_wrapper" - -#' Cos -#' @description -#' Compute the element-wise value for the cosine. -#' @details Evaluated Series has dtype Float64 +#' Compute cosine +#' #' @return Expr #' @docType NULL #' @format NULL -#' @aliases cos -#' @format NULL -#' @keywords Expr #' @examples -#' pl$DataFrame(a = c(0, pi / 2, pi, NA_real_))$select(pl$col("a")$cos()) +#' pl$DataFrame(a = c(0, pi / 2, pi, NA_real_))$ +#' with_columns(cosine = pl$col("a")$cos()) Expr_cos = "use_extendr_wrapper" - -#' Tan -#' @description -#' Compute the element-wise value for the tangent. -#' @details Evaluated Series has dtype Float64 +#' Compute tangent +#' #' @return Expr #' @docType NULL #' @format NULL -#' @aliases Tan -#' @format NULL -#' @keywords Expr #' @examples -#' pl$DataFrame(a = c(0, pi / 2, pi, NA_real_))$select(pl$col("a")$tan()) +#' pl$DataFrame(a = c(0, pi / 2, pi, NA_real_))$ +#' with_columns(tangent = pl$col("a")$tan()) Expr_tan = "use_extendr_wrapper" -#' Arcsin -#' @description -#' Compute the element-wise value for the inverse sine. -#' @details Evaluated Series has dtype Float64 +#' Compute inverse sine +#' #' @return Expr #' @docType NULL #' @format NULL -#' @aliases arcsin -#' @format NULL -#' @keywords Expr #' @examples -#' pl$DataFrame(a = c(-1, sin(0.5), 0, 1, NA_real_))$select(pl$col("a")$arcsin()) +#' pl$DataFrame(a = c(-1, sin(0.5), 0, 1, NA_real_))$ +#' with_columns(arcsin = pl$col("a")$arcsin()) Expr_arcsin = "use_extendr_wrapper" -#' Arccos -#' @description -#' Compute the element-wise value for the inverse cosine. -#' @details Evaluated Series has dtype Float64 +#' Compute inverse cosine +#' #' @return Expr #' @docType NULL #' @format NULL -#' @aliases arccos -#' @format NULL -#' @keywords Expr #' @examples -#' pl$DataFrame(a = c(-1, cos(0.5), 0, 1, NA_real_))$select(pl$col("a")$arccos()) +#' pl$DataFrame(a = c(-1, cos(0.5), 0, 1, NA_real_))$ +#' with_columns(arccos = pl$col("a")$arccos()) Expr_arccos = "use_extendr_wrapper" - -#' Arctan -#' @description -#' Compute the element-wise value for the inverse tangent. -#' @details Evaluated Series has dtype Float64 +#' Compute inverse tangent +#' #' @return Expr #' @docType NULL #' @format NULL -#' @aliases arctan -#' @format NULL -#' @keywords Expr #' @examples -#' pl$DataFrame(a = c(-1, tan(0.5), 0, 1, NA_real_))$select(pl$col("a")$arctan()) +#' pl$DataFrame(a = c(-1, tan(0.5), 0, 1, NA_real_))$ +#' with_columns(arctan = pl$col("a")$arctan()) Expr_arctan = "use_extendr_wrapper" - - -#' Sinh -#' @description -#' Compute the element-wise value for the hyperbolic sine. -#' @details Evaluated Series has dtype Float64 -#' @return Expr +#' Compute hyperbolic sine +#' +#' @return Expr #' @docType NULL #' @format NULL -#' @aliases sinh -#' @format NULL -#' @keywords Expr #' @examples -#' pl$DataFrame(a = c(-1, asinh(0.5), 0, 1, NA_real_))$select(pl$col("a")$sinh()) +#' pl$DataFrame(a = c(-1, asinh(0.5), 0, 1, NA_real_))$ +#' with_columns(sinh = pl$col("a")$sinh()) Expr_sinh = "use_extendr_wrapper" -#' Cosh -#' @description -#' Compute the element-wise value for the hyperbolic cosine. -#' @details Evaluated Series has dtype Float64 -#' @return Expr +#' Compute hyperbolic cosine +#' +#' @return Expr #' @docType NULL #' @format NULL -#' @aliases cosh -#' @format NULL -#' @keywords Expr #' @examples -#' pl$DataFrame(a = c(-1, acosh(1.5), 0, 1, NA_real_))$select(pl$col("a")$cosh()) +#' pl$DataFrame(a = c(-1, acosh(0.5), 0, 1, NA_real_))$ +#' with_columns(cosh = pl$col("a")$cosh()) Expr_cosh = "use_extendr_wrapper" -#' Tanh -#' @description -#' Compute the element-wise value for the hyperbolic tangent. -#' @details Evaluated Series has dtype Float64 -#' @return Expr +#' Compute hyperbolic tangent +#' +#' @return Expr #' @docType NULL #' @format NULL -#' @aliases tanh -#' @format NULL -#' @keywords Expr #' @examples -#' pl$DataFrame(a = c(-1, atanh(0.5), 0, 1, NA_real_))$select(pl$col("a")$tanh()) +#' pl$DataFrame(a = c(-1, atanh(0.5), 0, 1, NA_real_))$ +#' with_columns(tanh = pl$col("a")$tanh()) Expr_tanh = "use_extendr_wrapper" -#' Arcsinh -#' @description -#' Compute the element-wise value for the inverse hyperbolic sine. -#' @details Evaluated Series has dtype Float64 -#' @return Expr +#' Compute inverse hyperbolic sine +#' +#' @return Expr #' @docType NULL #' @format NULL -#' @aliases arcsinh -#' @format NULL -#' @keywords Expr #' @examples -#' pl$DataFrame(a = c(-1, sinh(0.5), 0, 1, NA_real_))$select(pl$col("a")$arcsinh()) +#' pl$DataFrame(a = c(-1, sinh(0.5), 0, 1, NA_real_))$ +#' with_columns(arcsinh = pl$col("a")$arcsinh()) Expr_arcsinh = "use_extendr_wrapper" -#' Arccosh -#' @description -#' Compute the element-wise value for the inverse hyperbolic cosine. -#' @details Evaluated Series has dtype Float64 -#' @return Expr +#' Compute inverse hyperbolic cosine +#' +#' @return Expr #' @docType NULL #' @format NULL -#' @aliases arccosh -#' @format NULL -#' @keywords Expr #' @examples -#' pl$DataFrame(a = c(-1, cosh(0.5), 0, 1, NA_real_))$select(pl$col("a")$arccosh()) +#' pl$DataFrame(a = c(-1, cosh(0.5), 0, 1, NA_real_))$ +#' with_columns(arccosh = pl$col("a")$arccosh()) Expr_arccosh = "use_extendr_wrapper" -#' Arctanh -#' @description -#' Compute the element-wise value for the inverse hyperbolic tangent. -#' @details Evaluated Series has dtype Float64 -#' @return Expr +#' Compute inverse hyperbolic tangent +#' +#' @return Expr #' @docType NULL #' @format NULL -#' @aliases arctanh -#' @format NULL -#' @keywords Expr #' @examples -#' pl$DataFrame(a = c(-1, tanh(0.5), 0, 1, NA_real_))$select(pl$col("a")$arctanh()) +#' pl$DataFrame(a = c(-1, tanh(0.5), 0, 1, NA_real_))$ +#' with_columns(arctanh = pl$col("a")$arctanh()) Expr_arctanh = "use_extendr_wrapper" - #' Reshape -#' @description -#' Reshape this Expr to a flat Series or a Series of Lists. -#' @param dims -#' numeric vec of the dimension sizes. If a -1 is used in any of the dimensions, that -#' dimension is inferred. -#' @return Expr -#' @aliases reshape -#' @format NULL -#' @keywords Expr +#' +#' Reshape an Expr to a flat Series or a Series of Lists. +#' @param dims Numeric vec of the dimension sizes. If a -1 is used in any of the +#' dimensions, that dimension is inferred. +#' @return Expr #' @examples #' pl$select(pl$lit(1:12)$reshape(c(3, 4))) #' pl$select(pl$lit(1:12)$reshape(c(3, -1))) @@ -3692,44 +2817,30 @@ Expr_reshape = function(dims) { unwrap(.pr$Expr$reshape(self, as.numeric(dims))) } - -#' Shuffle -#' @description -#' Shuffle the contents of this expr. -#' @param seed numeric value of 0 to 2^52 -#' Seed for the random number generator. If set to Null (default), a random -#' seed value integerish value between 0 and 10000 is picked -#' @return Expr -#' @aliases shuffle -#' @format NULL -#' @keywords Expr +#' Shuffle values +#' +#' @param seed numeric value of 0 to 2^52 Seed for the random number generator. +#' If `NULL` (default), a random seed value between 0 and 10000 is picked. +#' @return Expr #' @examples -#' pl$DataFrame(a = 1:3)$select(pl$col("a")$shuffle(seed = 1)) +#' pl$DataFrame(a = 1:4)$with_columns(shuff = pl$col("a")$shuffle(seed = 1)) Expr_shuffle = function(seed = NULL) { .pr$Expr$shuffle(self, seed) |> unwrap("in $shuffle()") } - -#' Sample -#' @description -#' #' Sample from this expression. -#' @param frac -#' Fraction of items to return. Cannot be used with `n`. -#' @param with_replacement -#' Allow values to be sampled more than once. -#' @param shuffle -#' Shuffle the order of sampled data points. (implicitly TRUE if, with_replacement = TRUE) -#' @param seed -#' Seed for the random number generator. If set to None (default), a random -#' seed is used. -#' @param n -#' Number of items to return. Cannot be used with `frac`. -#' @return Expr -#' @aliases sample -#' @format NULL -#' @keywords Expr +#' Take a sample +#' +#' @param frac Fraction of items to return (can be higher than 1). Cannot be +#' used with `n`. +#' @param with_replacement If `TRUE` (default), allow values to be sampled more +#' than once. +#' @param shuffle Shuffle the order of sampled data points (implicitly `TRUE` if +#' `with_replacement = TRUE`). +#' @inheritParams Expr_shuffle +#' @param n Number of items to return. Cannot be used with `frac`. +#' @return Expr #' @examples -#' df = pl$DataFrame(a = 1:3) +#' df = pl$DataFrame(a = 1:4) #' df$select(pl$col("a")$sample(frac = 1, with_replacement = TRUE, seed = 1L)) #' df$select(pl$col("a")$sample(frac = 2, with_replacement = TRUE, seed = 1L)) #' df$select(pl$col("a")$sample(n = 2, with_replacement = FALSE, seed = 1L)) @@ -3748,16 +2859,14 @@ Expr_sample = function( unwrap("in $sample()") } - - -#' prepare alpha -#' @description internal function for emw_x expressions +#' Internal function for emw_x expressions #' @param com numeric or NULL #' @param span numeric or NULL #' @param half_life numeric or NULL #' @param alpha numeric or NULL #' @keywords internal #' @return numeric +#' @noRd prepare_alpha = function( com = NULL, span = NULL, @@ -3801,62 +2910,44 @@ prepare_alpha = function( stop("Internal: it seems a input scenario was not handled properly") } - - - -#' Exponentially-weighted moving average/std/var. -#' @name Expr_ewm_mean_std_var -#' @param com -#' Specify decay in terms of center of mass, \eqn{\gamma}, with +#' Exponentially-weighted moving average +#' +#' @param com Specify decay in terms of center of mass, \eqn{\gamma}, with #' \eqn{ #' \alpha = \frac{1}{1 + \gamma} \; \forall \; \gamma \geq 0 #' } -#' @param span -#' Specify decay in terms of span, \eqn{\theta}, with +#' @param span Specify decay in terms of span, \eqn{\theta}, with #' \eqn{\alpha = \frac{2}{\theta + 1} \; \forall \; \theta \geq 1 } -#' @param half_life -#' Specify decay in terms of half-life, :math:`\lambda`, with +#' @param half_life Specify decay in terms of half-life, :math:`\lambda`, with #' \eqn{ \alpha = 1 - \exp \left\{ \frac{ -\ln(2) }{ \lambda } \right\} } #' \eqn{ \forall \; \lambda > 0} -#' @param alpha -#' Specify smoothing factor alpha directly, \eqn{0 < \alpha \leq 1}. -#' @param adjust -#' Divide by decaying adjustment factor in beginning periods to account for -#' imbalance in relative weightings -#' - When ``adjust=TRUE`` the EW function is calculated -#' using weights \eqn{w_i = (1 - \alpha)^i } -#' - When ``adjust=FALSE`` the EW function is calculated -#' recursively by -#' \eqn{ -#' y_0 = x_0 \\ -#' y_t = (1 - \alpha)y_{t - 1} + \alpha x_t -#' } -#' @param min_periods -#' Minimum number of observations in window required to have a value -#' (otherwise result is null). -#' -#' @param ignore_nulls ignore_nulls -#' Ignore missing values when calculating weights. -#' - When ``ignore_nulls=FALSE`` (default), weights are based on absolute -#' positions. -#' For example, the weights of :math:`x_0` and :math:`x_2` used in -#' calculating the final weighted average of -#' `[` \eqn{x_0}, None, \eqn{x_2}\\`]` are -#' \eqn{1-\alpha)^2} and \eqn{1} if ``adjust=TRUE``, and -#' \eqn{(1-\alpha)^2} and \eqn{\alpha} if `adjust=FALSE`. -#' - When ``ignore_nulls=TRUE``, weights are based -#' on relative positions. For example, the weights of -#' \eqn{x_0} and \eqn{x_2} used in calculating the final weighted -#' average of `[` \eqn{x_0}, None, \eqn{x_2}`]` are -#' \eqn{1-\alpha} and \eqn{1} if `adjust=TRUE`, -#' and \eqn{1-\alpha} and \eqn{\alpha} if `adjust=FALSE`. -#' @return Expr -#' @aliases ewm_mean -#' @format NULL -#' @keywords Expr -#' @examples -#' pl$DataFrame(a = 1:3)$select(pl$col("a")$ewm_mean(com = 1)) -#' +#' @param alpha Specify smoothing factor alpha directly, \eqn{0 < \alpha \leq 1}. +#' @param adjust Divide by decaying adjustment factor in beginning periods to +#' account for imbalance in relative weightings: +#' - When ``adjust=TRUE`` the EW function is calculatedusing weights +#' \eqn{w_i = (1 - \alpha)^i } +#' - When ``adjust=FALSE`` the EW function is calculated recursively by +#' \eqn{ +#' y_0 = x_0 \\ +#' y_t = (1 - \alpha)y_{t - 1} + \alpha x_t +#' } +#' @param min_periods Minimum number of observations in window required to have +#' a value (otherwise result is null). +#' @param ignore_nulls Ignore missing values when calculating weights: +#' - When `TRUE` (default), weights are based on relative positions. For example, +#' the weights of \eqn{x_0} and \eqn{x_2} used in calculating the final +#' weighted average of `[` \eqn{x_0}, None, \eqn{x_2}`]` are +#' \eqn{1-\alpha} and \eqn{1} if `adjust=TRUE`, and \eqn{1-\alpha} and +#' \eqn{\alpha} if `adjust=FALSE`. +#' - When `FALSE`, weights are based on absolute positions. For example, the +#' weights of :math:`x_0` and :math:`x_2` used in calculating the final +#' weighted average of `[` \eqn{x_0}, None, \eqn{x_2}\\`]` are +#' \eqn{1-\alpha)^2} and \eqn{1} if ``adjust=TRUE``, and \eqn{(1-\alpha)^2} +#' and \eqn{\alpha} if `adjust=FALSE`. +#' @return Expr +#' @examples +#' pl$DataFrame(a = 1:3)$ +#' with_columns(ewm_mean = pl$col("a")$ewm_mean(com = 1)) Expr_ewm_mean = function( com = NULL, span = NULL, half_life = NULL, alpha = NULL, adjust = TRUE, min_periods = 1L, ignore_nulls = TRUE) { @@ -3864,14 +2955,14 @@ Expr_ewm_mean = function( unwrap(.pr$Expr$ewm_mean(self, alpha, adjust, min_periods, ignore_nulls)) } - -#' Ewm_std -#' @rdname Expr_ewm_mean_std_var -#' @param bias When bias=FALSE`, apply a correction to make the estimate statistically unbiased. -#' @aliases ewm_std -#' @keywords Expr +#' Exponentially-weighted moving standard deviation +#' +#' @inheritParams Expr_ewm_mean +#' @inheritParams Expr_rolling_skew +#' @return Expr #' @examples -#' pl$DataFrame(a = 1:3)$select(pl$col("a")$ewm_std(com = 1)) +#' pl$DataFrame(a = 1:3)$ +#' with_columns(ewm_std = pl$col("a")$ewm_std(com = 1)) Expr_ewm_std = function( com = NULL, span = NULL, half_life = NULL, alpha = NULL, adjust = TRUE, bias = FALSE, min_periods = 1L, ignore_nulls = TRUE) { @@ -3879,12 +2970,14 @@ Expr_ewm_std = function( unwrap(.pr$Expr$ewm_std(self, alpha, adjust, bias, min_periods, ignore_nulls)) } -#' Ewm_var -#' @rdname Expr_ewm_mean_std_var -#' @aliases ewm_var -#' @keywords Expr +#' Exponentially-weighted moving variance +#' +#' @inheritParams Expr_ewm_mean +#' @inheritParams Expr_rolling_skew +#' @return Expr #' @examples -#' pl$DataFrame(a = 1:3)$select(pl$col("a")$ewm_std(com = 1)) +#' pl$DataFrame(a = 1:3)$ +#' with_columns(ewm_var = pl$col("a")$ewm_var(com = 1)) Expr_ewm_var = function( com = NULL, span = NULL, half_life = NULL, alpha = NULL, adjust = TRUE, bias = FALSE, min_periods = 1L, ignore_nulls = TRUE) { @@ -3892,79 +2985,46 @@ Expr_ewm_var = function( unwrap(.pr$Expr$ewm_var(self, alpha, adjust, bias, min_periods, ignore_nulls)) } - - -#' Extend_constant -#' @description +#' Extend Series with a constant +#' #' Extend the Series with given number of values. -#' @param value The value to extend the Series with. -#' This value may be None to fill with nulls. +#' @param value The value to extend the Series with. This value may be `NULL` to +#' fill with nulls. #' @param n The number of values to extend. -#' @return Expr -#' @aliases extend_constant -#' @format NULL -#' @keywords Expr +#' @return Expr #' @examples -#' pl$select( -#' pl$lit(c("5", "Bob_is_not_a_number")) -#' $cast(pl$dtypes$UInt64, strict = FALSE) -#' $extend_constant(10.1, 2) -#' ) -#' -#' pl$select( -#' pl$lit(c("5", "Bob_is_not_a_number")) -#' $cast(pl$dtypes$Utf8, strict = FALSE) -#' $extend_constant("chuchu", 2) -#' ) +#' pl$select(pl$lit(1:4)$extend_constant(10.1, 2)) +#' pl$select(pl$lit(1:4)$extend_constant(NULL, 2)) Expr_extend_constant = function(value, n) { unwrap(.pr$Expr$extend_constant(self, wrap_e(value), n)) } - -#' expression: repeat series -#' @description -#' This expression takes input and repeats it n times and append chunk -#' @param n Numeric the number of times to repeat, must be non-negative and finite -#' @param rechunk bool default = TRUE, if true memory layout will be rewritten +#' Repeat a Series #' -#' @return Expr -#' @aliases Expr_rep -#' @format NULL -#' @details -#' if self$len() == 1 , has a special faster implementation, Here rechunk is not -#' necessary, and takes no effect. +#' This expression takes input and repeats it n times and append chunk. +#' @param n The number of times to repeat, must be non-negative and finite. +#' @param rechunk If `TRUE` (default), memory layout will be rewritten. #' -#' if self$len() > 1 , then the expression instructs the series to append onto -#' itself n time and rewrite memory +#' @return Expr +#' @details +#' If the input has length 1, this uses a special faster implementation that +#' doesn't require rechunking (so `rechunk = TRUE` has no effect). #' -#' @keywords Expr #' @examples -#' -#' pl$select( -#' pl$lit("alice")$rep(n = 3) -#' ) -#' -#' pl$select( -#' pl$lit(1:3)$rep(n = 2) -#' ) -#' +#' pl$select(pl$lit("alice")$rep(n = 3)) +#' pl$select(pl$lit(1:3)$rep(n = 2)) Expr_rep = function(n, rechunk = TRUE) { unwrap(.pr$Expr$rep(self, n, rechunk)) } - -#' extend series with repeated series -#' @description -#' Extend a series with a repeated series or value. -#' @param expr Expr or into Expr -#' @param n Numeric the number of times to repeat, must be non-negative and finite -#' @param rechunk bool default = TRUE, if true memory layout will be rewritten -#' @param upcast bool default = TRUE, passed to self$append(), if TRUE non identical types -#' will be casted to common super type if any. If FALSE or no common super type -#' throw error. -#' @return Expr -#' @format NULL -#' @keywords Expr +#' Extend a Series by repeating values +#' +#' @param expr Expr or something coercible to an Expr. +#' @inheritParams Expr_rep +#' @param upcast If `TRUE` (default), non identical types will be cast to common +#' supertype if there is any. If `FALSE` or no common super type, having +#' different types will throw an error. +#' @return Expr #' @examples #' pl$select(pl$lit(c(1, 2, 3))$rep_extend(1:3, n = 5)) Expr_rep_extend = function(expr, n, rechunk = TRUE, upcast = TRUE) { @@ -3973,20 +3033,17 @@ Expr_rep_extend = function(expr, n, rechunk = TRUE, upcast = TRUE) { if (rechunk) new$rechunk() else new } - -#' to_r: for debuging an expression -#' @description -#' debug an expression by evaluating in empty DataFrame and return first series to R -#' @param df otherwise a DataFrame to evaluate in, default NULL is an empty DataFrame -#' @param i numeric column to extract zero index default first, expression could generate multiple -#' columns -#' @return R object -#' @format NULL -#' @keywords Expr +#' Convert an Expr to R output +#' +#' This is mostly useful to debug an expression. It evaluates the Expr in an +#' empty DataFrame and return the first Series to R. +#' @param df If `NULL` (default), it evaluates the Expr in an empty DataFrame. +#' Otherwise, provide a DataFrame that the Expr should be evaluated in. +#' @param i Numeric column to extract. Default is zero (which gives the first +#' column). +#' @return R object #' @examples #' pl$lit(1:3)$to_r() -#' pl$expr_to_r(pl$lit(1:3)) -#' pl$expr_to_r(1:3) Expr_to_r = function(df = NULL, i = 0) { if (is.null(df)) { pl$select(self)$to_series(i)$to_r() @@ -3998,14 +3055,23 @@ Expr_to_r = function(df = NULL, i = 0) { } } - +#' Convert an Expr to R output +#' +#' This is mostly useful to debug an expression. It evaluates the Expr in an +#' empty DataFrame and return the first Series to R. This is an alias for +#' `$to_r()`. +#' @param df If `NULL` (default), it evaluates the Expr in an empty DataFrame. +#' Otherwise, provide a DataFrame that the Expr should be evaluated in. +#' @param i Numeric column to extract. Default is zero (which gives the first +#' column). #' @name pl_expr_to_r -#' @rdname Expr_to_r +#' @return R object +#' @examples +#' pl$expr_to_r(pl$lit(1:3)) pl$expr_to_r = function(expr, df = NULL, i = 0) { wrap_e(expr)$to_r(df, i) } - #' Value counts #' @description #' Count all unique values and create a struct mapping value to count. @@ -4014,7 +3080,6 @@ pl$expr_to_r = function(expr, df = NULL, i = 0) { #' @param parallel Better to turn this off in the aggregation context, as it can #' lead to contention. #' @format NULL -#' @keywords Expr #' @examples #' df = pl$DataFrame(iris)$select(pl$col("Species")$value_counts()) #' df @@ -4023,110 +3088,91 @@ Expr_value_counts = function(sort = FALSE, parallel = FALSE) { .pr$Expr$value_counts(self, sort, parallel) } -#' Value counts -#' @description -#' Return a count of the unique values in the order of appearance. -#' This method differs from `value_counts` in that it does not return the -#' values, only the counts and might be faster +#' Count unique values +#' +#' Return a count of the unique values in the order of appearance. This method +#' differs from `$value_counts()` in that it does not return the values, only +#' the counts and it might be faster. #' @return Expr #' @docType NULL #' @format NULL -#' @aliases unique_counts -#' @format NULL -#' @keywords Expr #' @examples #' pl$DataFrame(iris)$select(pl$col("Species")$unique_counts()) Expr_unique_counts = "use_extendr_wrapper" -#' Natural Log -#' -#' @param base numeric base value for log, default base::exp(1) +#' Compute the logarithm of elements #' -#' @description Compute the base x logarithm of the input array, element-wise. -#' @keywords Expr +#' @param base Numeric base value for logarithm, default is `exp(1)`. #' @return Expr #' @docType NULL #' @format NULL -#' @aliases log -#' @name Expr_log #' @examples -#' pl$DataFrame(list(a = exp(1)^(-1:3)))$select(pl$col("a")$log()) +#' pl$DataFrame(a = c(1, 2, 3, exp(1)))$ +#' with_columns(log = pl$col("a")$log()) Expr_log = function(base = base::exp(1)) { .pr$Expr$log(self, base) } -#' 10-base log -#' @description Compute the base 10 logarithm of the input array, element-wise. -#' @keywords Expr +#' Compute the base-10 logarithm of elements #' @return Expr #' @docType NULL #' @format NULL -#' @aliases log10 -#' @name Expr_log10 -#' @format NULL #' @examples -#' pl$DataFrame(list(a = 10^(-1:3)))$select(pl$col("a")$log10()) +#' pl$DataFrame(a = c(1, 2, 3, exp(1)))$ +#' with_columns(log10 = pl$col("a")$log10()) Expr_log10 = "use_extendr_wrapper" - - - #' Entropy -#' @description Computes the entropy. -#' Uses the formula `-sum(pk * log(pk))` where `pk` are discrete probabilities. -#' Return Null if input is not values -#' @param base Given exponential base, defaults to `e` -#' @param normalize Normalize pk if it doesn't sum to 1. -#' @keywords Expr +#' +#' The entropy is measured with the formula `-sum(pk * log(pk))` where `pk` are +#' discrete probabilities. +#' @param base Given exponential base, defaults to `exp(1)`. +#' @param normalize Normalize `pk` if it doesn't sum to 1. #' @return Expr -#' @aliases entropy #' @examples -#' pl$select(pl$lit(c("a", "b", "b", "c", "c", "c"))$unique_counts()$entropy(base = 2)) +#' pl$DataFrame(x = c(1, 2, 3, 2))$ +#' with_columns(entropy = pl$col("x")$entropy(base = 2)) Expr_entropy = function(base = base::exp(1), normalize = TRUE) { .pr$Expr$entropy(self, base, normalize) } -#' Cumulative eval -#' @description Run an expression over a sliding window that increases `1` slot every iteration. -#' @param expr Expression to evaluate -#' @param min_periods Number of valid values there should be in the window before the expression -#' is evaluated. valid values = `length - null_count` -#' @param parallel Run in parallel. Don't do this in a groupby or another operation that -#' already has much parallelization. -#' @details +#' Cumulative evaluation of expressions #' -#' Warnings -#' -#' This functionality is experimental and may change without it being considered a -#' breaking change. +#' Run an expression over a sliding window that increases by `1` slot every +#' iteration. +#' @param expr Expression to evaluate. +#' @param min_periods Number of valid (non-null) values there should be in the +#' window before the expression is evaluated. +#' @param parallel Run in parallel. Don't do this in a groupby or another +#' operation that already has much parallelization. +#' @details #' This can be really slow as it can have `O(n^2)` complexity. Don't use this -#' for operations that visit all elements. -#' @keywords Expr +#' for operations that visit all elements. #' @return Expr -#' @aliases cumulative_eval #' @examples -#' pl$lit(1:5)$cumulative_eval(pl$element()$first() - pl$element()$last()**2)$to_r() +#' pl$lit(1:5)$cumulative_eval( +#' pl$element()$first() - pl$element()$last()^2 +#' )$to_r() Expr_cumulative_eval = function(expr, min_periods = 1L, parallel = FALSE) { unwrap(.pr$Expr$cumulative_eval(self, expr, min_periods, parallel)) } - - -#' Set_sorted -#' @description Flags the expression as 'sorted'. -#* Enables downstream code to user fast paths for sorted arrays. +#' Flag an Expr as "sorted" +#' +#' This enables downstream code to use fast paths for sorted arrays. WARNING: +#' this doesn't check whether the data is actually sorted, you have to ensure of +#' that yourself. #' @param descending Sort the columns in descending order. -#' @keywords Expr #' @return Expr -#' @aliases set_sorted #' @examples #' # correct use flag something correctly as ascendingly sorted #' s = pl$select(pl$lit(1:4)$set_sorted()$alias("a"))$get_column("a") -#' s$flags # see flags +#' s$flags #' -#' # incorrect use, flag somthing as not sorted ascendingly +#' # incorrect use, flag something as not sorted ascendingly #' s2 = pl$select(pl$lit(c(1, 3, 2, 4))$set_sorted()$alias("a"))$get_column("a") -#' s2$sort() # sorting skipped, although not actually sorted +#' s2$sort() +#' s2$flags # returns TRUE while it's not actually sorted Expr_set_sorted = function(descending = FALSE) { self$map(\(s) { .pr$Series$set_sorted_mut(s, descending) # use private to bypass mut protection @@ -4134,17 +3180,14 @@ Expr_set_sorted = function(descending = FALSE) { }) } - #' Wrap column in list -#' @description Aggregate values into a list. -#' @keywords Expr +#' +#' Aggregate values into a list. #' @return Expr #' @docType NULL #' @format NULL -#' @aliases list -#' @details use to_struct to wrap a DataFrame. Notice implode() is sometimes referred to -#' as list() . -#' @format NULL +#' @details +#' Use `$to_struct()` to wrap a DataFrame. #' @examples #' df = pl$DataFrame( #' a = 1:3, @@ -4153,214 +3196,129 @@ Expr_set_sorted = function(descending = FALSE) { #' df$select(pl$all()$implode()) Expr_implode = "use_extendr_wrapper" - - -#' Shrink numeric columns to the minimal required datatype. -#' @description -#' Shrink to the dtype needed to fit the extrema of this `[Series]`. -#' This can be used to reduce memory pressure. -#' @keywords Expr +#' Shrink numeric columns to the minimal required datatype +#' +#' Shrink to the dtype needed to fit the extrema of this Series. This can be +#' used to reduce memory pressure. #' @return Expr #' @docType NULL -#' @format NULL -#' @aliases shrink_dtype #' @examples -#' pl$DataFrame( -#' a = c(1L, 2L, 3L), -#' b = c(1L, 2L, bitwShiftL(2L, 29)), -#' c = c(-1L, 2L, bitwShiftL(1L, 15)), -#' d = c(-112L, 2L, 112L), -#' e = c(-112L, 2L, 129L), -#' f = c("a", "b", "c"), -#' g = c(0.1, 1.32, 0.12), -#' h = c(TRUE, NA, FALSE) -#' )$with_columns(pl$col("b")$cast(pl$Int64) * 32L)$select(pl$all()$shrink_dtype()) +#' df = pl$DataFrame( +#' a = 1:3, +#' b = c(1, 2, 3) +#' ) +#' df +#' +#' df$with_columns(pl$all()$shrink_dtype()$name$suffix("_shrunk")) Expr_shrink_dtype = "use_extendr_wrapper" - - -#' list: list related methods -#' @description -#' Create an object namespace of all list related methods. -#' See the individual method pages for full details -#' @keywords Expr +#' List related methods +#' +#' Create an object namespace of all list related methods. See the individual +#' method pages for full details. #' @return Expr -#' @aliases list_ns -#' @examples -#' df_with_list = pl$DataFrame( -#' group = c(1, 1, 2, 2, 3), -#' value = c(1:5) -#' )$group_by( -#' "group", -#' maintain_order = TRUE -#' )$agg( -#' pl$col("value") * 3L -#' ) -#' df_with_list$with_columns( -#' pl$col("value")$list$lengths()$alias("group_size") -#' ) +#' @noRd Expr_list = method_as_property(function() { expr_list_make_sub_ns(self) }) - -#' str: string related methods -#' @description -#' Create an object namespace of all string related methods. -#' See the individual method pages for full details -#' @keywords Expr -#' @return Expr -#' @aliases str_ns -#' @examples -#' -#' # missing +#' String related methods #' +#' Create an object namespace of all string related methods. See the individual +#' method pages for full details. +#' @return Expr +#' @noRd Expr_str = method_as_property(function() { expr_str_make_sub_ns(self) }) -#' bin: binary related methods -#' @description -#' Create an object namespace of all binary related methods. -#' See the individual method pages for full details -#' @keywords Expr -#' @return Expr -#' @aliases bin_ns -#' @examples -#' -#' # missing +#' Binary related methods #' +#' Create an object namespace of all binary related methods. See the individual +#' method pages for full details. +#' @return Expr +#' @noRd Expr_bin = method_as_property(function() { expr_bin_make_sub_ns(self) }) -#' dt: datetime related methods -#' @description -#' Create an object namespace of all datetime related methods. -#' See the individual method pages for full details -#' @keywords Expr -#' @return Expr -#' @aliases dt_ns -#' @examples -#' -#' # missing +#' Datetime related methods #' +#' Create an object namespace of all datetime related methods. See the individual +#' method pages for full details. +#' @return Expr +#' @noRd Expr_dt = method_as_property(function() { expr_dt_make_sub_ns(self) }) -#' meta: related methods -#' @description -#' Create an object namespace of all meta related methods. -#' See the individual method pages for full details -#' @keywords Expr -#' @return Expr -#' @aliases meta_ns -#' @examples -#' -#' # missing +#' Meta related methods #' +#' Create an object namespace of all meta related methods. See the individual +#' method pages for full details. +#' @return Expr +#' @noRd Expr_meta = method_as_property(function() { expr_meta_make_sub_ns(self) }) -#' name: related methods -#' @description -#' Create an object namespace of all name related methods. -#' See the individual method pages for full details -#' @keywords Expr -#' @return Expr -#' @aliases name_ns -#' @examples -#' -#' # missing +#' Name related methods #' +#' Create an object namespace of all name related methods. See the individual +#' method pages for full details. +#' @return Expr +#' @noRd Expr_name = method_as_property(function() { expr_name_make_sub_ns(self) }) -#' cat: related methods -#' @description -#' Create an object namespace of all cat related methods. -#' See the individual method pages for full details -#' @keywords Expr -#' @return Expr -#' @aliases cat_ns -#' @examples -#' -#' # missing +#' Categorical related methods #' +#' Create an object namespace of all categorical related methods. See the +#' individual method pages for full details. +#' @return Expr +#' @noRd Expr_cat = method_as_property(function() { expr_cat_make_sub_ns(self) }) -#' struct: related methods -#' @description -#' Create an object namespace of all struct related methods. -#' See the individual method pages for full details -#' @keywords Expr -#' @return Expr -#' @aliases struct_ns -#' @examples -#' -#' # missing +#' Struct related methods #' +#' Create an object namespace of all struct related methods. See the individual +#' method pages for full details. +#' @return Expr +#' @noRd Expr_struct = method_as_property(function() { expr_struct_make_sub_ns(self) }) -#' to_struct -#' @description pass expr to pl$struct -#' @keywords Expr +#' Convert an Expr to a Struct #' @return Expr -#' @aliases expr_to_struct -#' @keywords Expr #' @examples -#' e = pl$all()$to_struct()$alias("my_struct") -#' print(e) -#' pl$DataFrame(iris)$select(e) +#' pl$DataFrame(iris[, 3:5])$with_columns( +#' my_struct = pl$all()$to_struct() +#' ) Expr_to_struct = function() { pl$struct(self) } - -#' Literal to Series -#' @description -#' collect an expression based on literals into a Series -#' @keywords Expr +#' Convert Literal to Series +#' +#' Collect an expression based on literals into a Series. #' @return Series -#' @aliases lit_to_s -#' @examples -#' ( -#' pl$Series(list(1:1, 1:2, 1:3, 1:4)) -#' $print() -#' $to_lit() -#' $list$lengths() -#' $sum() -#' $cast(pl$dtypes$Int8) -#' $lit_to_s() -#' ) +#' @examples +#' pl$lit(1:5)$lit_to_s() Expr_lit_to_s = function() { pl$select(self)$to_series(0) } -#' Literal to DataFrame -#' @description -#' collect an expression based on literals into a DataFrame -#' @keywords Expr +#' Convert Literal to DataFrame +#' +#' Collect an expression based on literals into a DataFrame. #' @return Series -#' @aliases lit_to_df -#' @examples -#' ( -#' pl$Series(list(1:1, 1:2, 1:3, 1:4)) -#' $print() -#' $to_lit() -#' $list$lengths() -#' $sum() -#' $cast(pl$dtypes$Int8) -#' $lit_to_df() -#' ) +#' @examples +#' pl$lit(1:5)$lit_to_df() Expr_lit_to_df = function() { pl$select(self) } diff --git a/R/functions__lazy.R b/R/functions__lazy.R index ada8103df..5bbb86d8c 100644 --- a/R/functions__lazy.R +++ b/R/functions__lazy.R @@ -23,9 +23,6 @@ pl$all = function(name = NULL) { } - - - #' Start Expression with a column #' @name pl_col #' @description diff --git a/man/ExprList_sort.Rd b/man/ExprList_sort.Rd index 719401909..d391a5694 100644 --- a/man/ExprList_sort.Rd +++ b/man/ExprList_sort.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__list.R \name{ExprList_sort} \alias{ExprList_sort} -\title{Expr_sort} +\title{Sort an Expr} \arguments{ \item{descending}{Sort values in descending order} } @@ -10,6 +10,5 @@ Expr } \description{ -Sort this column. In projection/ selection context the whole column is sorted. -If used in a groupby context, the groups are sorted. +Sort this column. If used in a groupby context, the groups are sorted. } diff --git a/man/Expr_abs.Rd b/man/Expr_abs.Rd index 846d4897c..61984d303 100644 --- a/man/Expr_abs.Rd +++ b/man/Expr_abs.Rd @@ -2,17 +2,18 @@ % Please edit documentation in R/expr__expr.R \name{Expr_abs} \alias{Expr_abs} -\title{Abs} +\title{Compute the absolute values} \usage{ Expr_abs } \value{ -Exprs abs +Expr } \description{ -Compute absolute values +Compute the absolute values } \examples{ -pl$DataFrame(list(a = -1:1))$select(pl$col("a"), pl$col("a")$abs()$alias("abs")) +pl$DataFrame(a = -1:1)$ + with_columns(abs = pl$col("a")$abs()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_add.Rd b/man/Expr_add.Rd index 4e3509bec..e2b16a1ab 100644 --- a/man/Expr_add.Rd +++ b/man/Expr_add.Rd @@ -3,31 +3,29 @@ \name{Expr_add} \alias{Expr_add} \alias{+.Expr} -\title{Add} +\title{Add two expressions} \usage{ Expr_add(other) \method{+}{Expr}(e1, e2) } \arguments{ -\item{other}{literal or Robj which can become a literal} +\item{other}{Literal or object that can be converted to a literal} -\item{e1}{lhs Expr} +\item{e1}{Expr only} -\item{e2}{rhs Expr or anything which can become a literal Expression} +\item{e2}{Expr or anything that can be converted to a literal} } \value{ -Exprs +Expr } \description{ -Addition +The RHS can either be an Expr or an object that can be converted to a literal +(e.g an integer). } \examples{ -# three syntaxes same result pl$lit(5) + 10 pl$lit(5) + pl$lit(10) pl$lit(5)$add(pl$lit(10)) +pl$lit(5) # unary use resolves to same as pl$lit(5) } -\keyword{Expr} -\keyword{Expr_operators} diff --git a/man/Expr_agg_groups.Rd b/man/Expr_agg_groups.Rd index 6f1a22337..3d0722ee8 100644 --- a/man/Expr_agg_groups.Rd +++ b/man/Expr_agg_groups.Rd @@ -2,16 +2,16 @@ % Please edit documentation in R/expr__expr.R \name{Expr_agg_groups} \alias{Expr_agg_groups} -\title{aggregate groups} +\title{Aggregate groups} \usage{ Expr_agg_groups } \value{ -Exprs +Expr } \description{ -Get the group indexes of the group by operation. -Should be used in aggregation context only. +Get the group indexes of the group by operation. Should be used in aggregation +context only. } \examples{ df = pl$DataFrame(list( @@ -20,4 +20,4 @@ df = pl$DataFrame(list( )) df$group_by("group", maintain_order = TRUE)$agg(pl$col("value")$agg_groups()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_alias.Rd b/man/Expr_alias.Rd index edd8eeb42..7f7b10ab2 100644 --- a/man/Expr_alias.Rd +++ b/man/Expr_alias.Rd @@ -7,7 +7,7 @@ Expr_alias(name) } \arguments{ -\item{name}{string new name of output} +\item{name}{New name of output} } \value{ Expr @@ -18,4 +18,4 @@ Rename the output of an expression. \examples{ pl$col("bob")$alias("alice") } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_all.Rd b/man/Expr_all.Rd index b3d080057..2ba848470 100644 --- a/man/Expr_all.Rd +++ b/man/Expr_all.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_all} \alias{Expr_all} -\title{All, is true} +\title{Apply logical AND on a column} \usage{ Expr_all(drop_nulls = TRUE) } @@ -13,13 +13,9 @@ Expr_all(drop_nulls = TRUE) Boolean literal } \description{ -Check if all boolean values in a Boolean column are \code{TRUE}. -This method is an expression - not to be confused with -\code{pl$all} which is a function to select all columns. -} -\details{ -last \code{all()} in example is this Expr method, the first \code{pl$all()} refers -to "all-columns" and is an expression constructor +Check if all boolean values in a Boolean column are \code{TRUE}. This method is an +expression - not to be confused with \code{pl$all()} which is a function to select +all columns. } \examples{ pl$DataFrame( @@ -27,7 +23,8 @@ pl$DataFrame( any = c(TRUE, FALSE), none = c(FALSE, FALSE) )$select( + # the first $all() selects all columns, the second one applies the AND + # logical on the values pl$all()$all() ) } -\keyword{Expr} diff --git a/man/Expr_and.Rd b/man/Expr_and.Rd index 37df9ace8..6b1357fbb 100644 --- a/man/Expr_and.Rd +++ b/man/Expr_and.Rd @@ -2,22 +2,20 @@ % Please edit documentation in R/expr__expr.R \name{Expr_and} \alias{Expr_and} -\title{And} +\title{Apply logical AND on two expressions} \usage{ Expr_and(other) } \arguments{ -\item{other}{literal or Robj which can become a literal} +\item{other}{Literal or object that can be converted to a literal} } \value{ Expr } \description{ -combine to boolean expressions with AND +Combine two boolean expressions with AND. } \examples{ pl$lit(TRUE) & TRUE pl$lit(TRUE)$and(pl$lit(TRUE)) } -\keyword{Expr} -\keyword{Expr_operators} diff --git a/man/Expr_any.Rd b/man/Expr_any.Rd index 8eb3a8b45..0d27f19bb 100644 --- a/man/Expr_any.Rd +++ b/man/Expr_any.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_any} \alias{Expr_any} -\title{Any (is true)} +\title{Apply logical OR on a column} \usage{ Expr_any(drop_nulls = TRUE) } @@ -24,4 +24,3 @@ pl$DataFrame( pl$all()$any() ) } -\keyword{Expr} diff --git a/man/Expr_append.Rd b/man/Expr_append.Rd index ce7543959..2a5da0d4d 100644 --- a/man/Expr_append.Rd +++ b/man/Expr_append.Rd @@ -7,9 +7,9 @@ Expr_append(other, upcast = TRUE) } \arguments{ -\item{other}{Expr, into Expr} +\item{other}{Expr or something coercible to an Expr.} -\item{upcast}{bool upcast to, if any supertype of two non equal datatypes.} +\item{upcast}{Cast both Expr to a common supertype if they have one.} } \value{ Expr @@ -27,4 +27,3 @@ pl$DataFrame(list())$select(pl$lit(42)$append(42L)) pl$DataFrame(list())$select(pl$lit(42)$append(FALSE)) pl$DataFrame(list())$select(pl$lit("Bob")$append(FALSE)) } -\keyword{Expr} diff --git a/man/Expr_apply.Rd b/man/Expr_apply.Rd index d7252f0d2..cb2b25a7f 100644 --- a/man/Expr_apply.Rd +++ b/man/Expr_apply.Rd @@ -125,7 +125,7 @@ system.time({ r_vec * 2L }) -#' #R parallel process example, use Sys.sleep() to imitate some CPU expensive computation. +# R parallel process example, use Sys.sleep() to imitate some CPU expensive computation. # use apply over each Species-group in each column equal to 12 sequential runs ~1.2 sec. pl$LazyFrame(iris)$group_by("Species")$agg( @@ -156,4 +156,3 @@ pl$LazyFrame(iris)$group_by("Species")$agg( )$collect() |> system.time() } -\keyword{Expr} diff --git a/man/Expr_approx_n_unique.Rd b/man/Expr_approx_n_unique.Rd index 5baf4800f..821574475 100644 --- a/man/Expr_approx_n_unique.Rd +++ b/man/Expr_approx_n_unique.Rd @@ -2,7 +2,6 @@ % Please edit documentation in R/expr__expr.R \name{Expr_approx_n_unique} \alias{Expr_approx_n_unique} -\alias{approx_n_unique} \title{Approx count unique values} \usage{ Expr_approx_n_unique @@ -14,6 +13,7 @@ Expr This is done using the HyperLogLog++ algorithm for cardinality estimation. } \examples{ -pl$DataFrame(iris)$select(pl$col("Species")$approx_n_unique()) +pl$DataFrame(iris[, 4:5])$ + with_columns(count = pl$col("Species")$approx_n_unique()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_arccos.Rd b/man/Expr_arccos.Rd index 2f1828b7e..ea92de5be 100644 --- a/man/Expr_arccos.Rd +++ b/man/Expr_arccos.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_arccos} \alias{Expr_arccos} -\alias{arccos} -\title{Arccos} +\title{Compute inverse cosine} \usage{ Expr_arccos } @@ -11,12 +10,10 @@ Expr_arccos Expr } \description{ -Compute the element-wise value for the inverse cosine. -} -\details{ -Evaluated Series has dtype Float64 +Compute inverse cosine } \examples{ -pl$DataFrame(a = c(-1, cos(0.5), 0, 1, NA_real_))$select(pl$col("a")$arccos()) +pl$DataFrame(a = c(-1, cos(0.5), 0, 1, NA_real_))$ + with_columns(arccos = pl$col("a")$arccos()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_arccosh.Rd b/man/Expr_arccosh.Rd index 5507de86a..1f19d5dfb 100644 --- a/man/Expr_arccosh.Rd +++ b/man/Expr_arccosh.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_arccosh} \alias{Expr_arccosh} -\alias{arccosh} -\title{Arccosh} +\title{Compute inverse hyperbolic cosine} \usage{ Expr_arccosh } @@ -11,12 +10,10 @@ Expr_arccosh Expr } \description{ -Compute the element-wise value for the inverse hyperbolic cosine. -} -\details{ -Evaluated Series has dtype Float64 +Compute inverse hyperbolic cosine } \examples{ -pl$DataFrame(a = c(-1, cosh(0.5), 0, 1, NA_real_))$select(pl$col("a")$arccosh()) +pl$DataFrame(a = c(-1, cosh(0.5), 0, 1, NA_real_))$ + with_columns(arccosh = pl$col("a")$arccosh()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_arcsin.Rd b/man/Expr_arcsin.Rd index c286364e0..4d83f198d 100644 --- a/man/Expr_arcsin.Rd +++ b/man/Expr_arcsin.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_arcsin} \alias{Expr_arcsin} -\alias{arcsin} -\title{Arcsin} +\title{Compute inverse sine} \usage{ Expr_arcsin } @@ -11,12 +10,10 @@ Expr_arcsin Expr } \description{ -Compute the element-wise value for the inverse sine. -} -\details{ -Evaluated Series has dtype Float64 +Compute inverse sine } \examples{ -pl$DataFrame(a = c(-1, sin(0.5), 0, 1, NA_real_))$select(pl$col("a")$arcsin()) +pl$DataFrame(a = c(-1, sin(0.5), 0, 1, NA_real_))$ + with_columns(arcsin = pl$col("a")$arcsin()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_arcsinh.Rd b/man/Expr_arcsinh.Rd index 3729790f4..796740534 100644 --- a/man/Expr_arcsinh.Rd +++ b/man/Expr_arcsinh.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_arcsinh} \alias{Expr_arcsinh} -\alias{arcsinh} -\title{Arcsinh} +\title{Compute inverse hyperbolic sine} \usage{ Expr_arcsinh } @@ -11,12 +10,10 @@ Expr_arcsinh Expr } \description{ -Compute the element-wise value for the inverse hyperbolic sine. -} -\details{ -Evaluated Series has dtype Float64 +Compute inverse hyperbolic sine } \examples{ -pl$DataFrame(a = c(-1, sinh(0.5), 0, 1, NA_real_))$select(pl$col("a")$arcsinh()) +pl$DataFrame(a = c(-1, sinh(0.5), 0, 1, NA_real_))$ + with_columns(arcsinh = pl$col("a")$arcsinh()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_arctan.Rd b/man/Expr_arctan.Rd index 1847c0fe2..7b953a048 100644 --- a/man/Expr_arctan.Rd +++ b/man/Expr_arctan.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_arctan} \alias{Expr_arctan} -\alias{arctan} -\title{Arctan} +\title{Compute inverse tangent} \usage{ Expr_arctan } @@ -11,12 +10,10 @@ Expr_arctan Expr } \description{ -Compute the element-wise value for the inverse tangent. -} -\details{ -Evaluated Series has dtype Float64 +Compute inverse tangent } \examples{ -pl$DataFrame(a = c(-1, tan(0.5), 0, 1, NA_real_))$select(pl$col("a")$arctan()) +pl$DataFrame(a = c(-1, tan(0.5), 0, 1, NA_real_))$ + with_columns(arctan = pl$col("a")$arctan()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_arctanh.Rd b/man/Expr_arctanh.Rd index cf898afa1..8cfe9cd31 100644 --- a/man/Expr_arctanh.Rd +++ b/man/Expr_arctanh.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_arctanh} \alias{Expr_arctanh} -\alias{arctanh} -\title{Arctanh} +\title{Compute inverse hyperbolic tangent} \usage{ Expr_arctanh } @@ -11,12 +10,10 @@ Expr_arctanh Expr } \description{ -Compute the element-wise value for the inverse hyperbolic tangent. -} -\details{ -Evaluated Series has dtype Float64 +Compute inverse hyperbolic tangent } \examples{ -pl$DataFrame(a = c(-1, tanh(0.5), 0, 1, NA_real_))$select(pl$col("a")$arctanh()) +pl$DataFrame(a = c(-1, tanh(0.5), 0, 1, NA_real_))$ + with_columns(arctanh = pl$col("a")$arctanh()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_arg_max.Rd b/man/Expr_arg_max.Rd index c0f09645e..33158bd02 100644 --- a/man/Expr_arg_max.Rd +++ b/man/Expr_arg_max.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_arg_max} \alias{Expr_arg_max} -\title{Index of min value} +\title{Index of max value} \usage{ Expr_arg_max } @@ -10,14 +10,11 @@ Expr_arg_max Expr } \description{ -Get the index of the minimal value. -} -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +Get the index of the maximal value. } \examples{ -pl$DataFrame(list( +pl$DataFrame( a = c(6, 1, 0, NA, Inf, NaN) -))$select(pl$col("a")$arg_max()) +)$with_columns(arg_max = pl$col("a")$arg_max()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_arg_min.Rd b/man/Expr_arg_min.Rd index 566b2af2d..fc0386190 100644 --- a/man/Expr_arg_min.Rd +++ b/man/Expr_arg_min.Rd @@ -12,12 +12,9 @@ Expr \description{ Get the index of the minimal value. } -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -} \examples{ -pl$DataFrame(list( +pl$DataFrame( a = c(6, 1, 0, NA, Inf, NaN) -))$select(pl$col("a")$arg_min()) +)$with_columns(arg_min = pl$col("a")$arg_min()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_arg_sort.Rd b/man/Expr_arg_sort.Rd index 20d9468ff..da4a28958 100644 --- a/man/Expr_arg_sort.Rd +++ b/man/Expr_arg_sort.Rd @@ -2,36 +2,24 @@ % Please edit documentation in R/expr__expr.R \name{Expr_arg_sort} \alias{Expr_arg_sort} -\alias{arg_sort} -\alias{Expr_argsort} -\alias{argsort} \title{Index of a sort} \usage{ Expr_arg_sort(descending = FALSE, nulls_last = FALSE) - -Expr_argsort(descending = FALSE, nulls_last = FALSE) } \arguments{ \item{descending}{Sort in descending order. When sorting by multiple columns, -can be specified per column by passing a sequence of booleans.} +can be specified per column by passing a vector of booleans.} -\item{nulls_last}{bool, default FALSE, place Nulls last} +\item{nulls_last}{If \code{TRUE}, place nulls values last.} } \value{ Expr } \description{ Get the index values that would sort this column. -If 'reverse=True` the smallest elements will be given. - -argsort is a alias for arg_sort -} -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} } \examples{ -pl$DataFrame(list( +pl$DataFrame( a = c(6, 1, 0, NA, Inf, NaN) -))$select(pl$col("a")$arg_sort()) +)$with_columns(arg_sorted = pl$col("a")$arg_sort()) } -\keyword{Expr} diff --git a/man/Expr_arg_unique.Rd b/man/Expr_arg_unique.Rd index 7f3e02517..33d58825d 100644 --- a/man/Expr_arg_unique.Rd +++ b/man/Expr_arg_unique.Rd @@ -3,7 +3,7 @@ \name{Expr_arg_unique} \alias{Expr_arg_unique} \alias{arg_unique} -\title{Index of First Unique Value.} +\title{Index of first unique values} \usage{ Expr_arg_unique } @@ -11,9 +11,9 @@ Expr_arg_unique Expr } \description{ -Index of First Unique Value. +This finds the position of first occurrence of each unique value. } \examples{ pl$select(pl$lit(c(1:2, 1:3))$arg_unique()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_argsort.Rd b/man/Expr_argsort.Rd new file mode 100644 index 000000000..063ab4f09 --- /dev/null +++ b/man/Expr_argsort.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__expr.R +\name{Expr_argsort} +\alias{Expr_argsort} +\title{Index of a sort} +\usage{ +Expr_argsort(descending = FALSE, nulls_last = FALSE) +} +\arguments{ +\item{descending}{Sort in descending order. When sorting by multiple columns, +can be specified per column by passing a vector of booleans.} + +\item{nulls_last}{If \code{TRUE}, place nulls values last.} +} +\description{ +argsort is a alias for arg_sort +} +\examples{ +pl$DataFrame( + a = c(6, 1, 0, NA, Inf, NaN) +)$with_columns(arg_sorted = pl$col("a")$arg_sort()) +} diff --git a/man/Expr_backward_fill.Rd b/man/Expr_backward_fill.Rd index 56aa66f7a..7c97a5f5c 100644 --- a/man/Expr_backward_fill.Rd +++ b/man/Expr_backward_fill.Rd @@ -2,29 +2,24 @@ % Please edit documentation in R/expr__expr.R \name{Expr_backward_fill} \alias{Expr_backward_fill} -\alias{backward_fill} -\title{Fill Nulls Backward} +\title{Fill null values backward} \usage{ Expr_backward_fill(limit = NULL) } \arguments{ -\item{limit}{Expr or \verb{Into} The number of consecutive null values to backward fill.} +\item{limit}{Number of consecutive null values to fill when using the +\code{"forward"} or \code{"backward"} strategy.} } \value{ Expr } \description{ -Fill missing values with the next to be seen values. -} -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +Fill missing values with the next to be seen values. Syntactic sugar for +\verb{$fill_null(strategy = "backward")}. } \examples{ -l = list(a = c(1L, rep(NA_integer_, 3L), 10)) -pl$DataFrame(l)$select( - pl$col("a")$backward_fill()$alias("bf_null"), - pl$col("a")$backward_fill(limit = 0)$alias("bf_l0"), - pl$col("a")$backward_fill(limit = 1)$alias("bf_l1") -)$to_list() +pl$DataFrame(a = c(NA, 1, NA, 2, NA))$ + with_columns( + backward = pl$col("a")$backward_fill() + ) } -\keyword{Expr} diff --git a/man/Expr_bin.Rd b/man/Expr_bin.Rd deleted file mode 100644 index ceca7f8cd..000000000 --- a/man/Expr_bin.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/expr__expr.R -\name{Expr_bin} -\alias{Expr_bin} -\alias{bin_ns} -\title{bin: binary related methods} -\usage{ -Expr_bin() -} -\value{ -Expr -} -\description{ -Create an object namespace of all binary related methods. -See the individual method pages for full details -} -\examples{ - -# missing - -} -\keyword{Expr} diff --git a/man/Expr_bottom_k.Rd b/man/Expr_bottom_k.Rd index 4e4eb3fe7..ff05d8a36 100644 --- a/man/Expr_bottom_k.Rd +++ b/man/Expr_bottom_k.Rd @@ -2,28 +2,20 @@ % Please edit documentation in R/expr__expr.R \name{Expr_bottom_k} \alias{Expr_bottom_k} -\alias{bottom_k} \title{Bottom k values} \usage{ Expr_bottom_k(k) } \arguments{ -\item{k}{numeric k bottom values to get} +\item{k}{Number of top values to get} } \value{ Expr } \description{ -Return the \code{k} smallest elements. -} -\details{ -This has time complexity: \eqn{ O(n + k \\log{}n - \frac{k}{2}) } - -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +Return the \code{k} smallest elements. This has time complexity: \eqn{ O(n + k +\\log{}n - \frac{k}{2}) } } \examples{ -pl$DataFrame(list( - a = c(6, 1, 0, NA, Inf, NaN) -))$select(pl$col("a")$bottom_k(5)) +pl$DataFrame(a = c(6, 1, 0, NA, Inf, NaN))$select(pl$col("a")$bottom_k(5)) } -\keyword{Expr} diff --git a/man/Expr_cast.Rd b/man/Expr_cast.Rd index 3e9c650f3..415d4a4cc 100644 --- a/man/Expr_cast.Rd +++ b/man/Expr_cast.Rd @@ -2,25 +2,25 @@ % Please edit documentation in R/expr__expr.R \name{Expr_cast} \alias{Expr_cast} -\alias{cast} -\title{Cast between DataType(s)} +\title{Cast between DataType} \usage{ Expr_cast(dtype, strict = TRUE) } \arguments{ \item{dtype}{DataType to cast to.} -\item{strict}{bool if true an error will be thrown if cast failed at resolve time.} +\item{strict}{If \code{TRUE} (default), an error will be thrown if cast failed at +resolve time.} } \value{ Expr } \description{ -Cast between DataType(s) +Cast between DataType } \examples{ df = pl$DataFrame(a = 1:3, b = c(1, 2, 3)) -df$print()$with_columns( +df$with_columns( pl$col("a")$cast(pl$dtypes$Float64), pl$col("b")$cast(pl$dtypes$Int32) ) @@ -28,13 +28,11 @@ df$print()$with_columns( # strict FALSE, inserts null for any cast failure pl$lit(c(100, 200, 300))$cast(pl$dtypes$UInt8, strict = FALSE)$lit_to_s() - # strict TRUE, raise any failure as an error when query is executed. tryCatch( { pl$lit("a")$cast(pl$dtypes$Float64, strict = TRUE)$lit_to_s() }, - error = as.character + error = function(e) e ) } -\keyword{Expr} diff --git a/man/Expr_cat.Rd b/man/Expr_cat.Rd deleted file mode 100644 index 46504fa32..000000000 --- a/man/Expr_cat.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/expr__expr.R -\name{Expr_cat} -\alias{Expr_cat} -\alias{cat_ns} -\title{cat: related methods} -\usage{ -Expr_cat() -} -\value{ -Expr -} -\description{ -Create an object namespace of all cat related methods. -See the individual method pages for full details -} -\examples{ - -# missing - -} -\keyword{Expr} diff --git a/man/Expr_ceil.Rd b/man/Expr_ceil.Rd index b6fe9f9a7..78c970ca0 100644 --- a/man/Expr_ceil.Rd +++ b/man/Expr_ceil.Rd @@ -10,14 +10,11 @@ Expr_ceil Expr } \description{ -Rounds up to the nearest integer value. -Only works on floating point Series. +Rounds up to the nearest integer value. Only works on floating point Series. } \examples{ -pl$DataFrame(list( - a = c(0.33, 0.5, 1.02, 1.5, NaN, NA, Inf, -Inf) -))$select( - pl$col("a")$ceil() +pl$DataFrame(a = c(0.33, 0.5, 1.02, 1.5, NaN, NA, Inf, -Inf))$with_columns( + ceiling = pl$col("a")$ceil() ) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_class.Rd b/man/Expr_class.Rd index a4ebe935a..f95b935da 100644 --- a/man/Expr_class.Rd +++ b/man/Expr_class.Rd @@ -7,9 +7,9 @@ not applicable } \description{ -Expressions are all the functions and methods that are applicable -to a Polars DataFrame. They can be split into the following categories (following -the \href{https://pola-rs.github.io/polars/py-polars/html/reference/expressions/}{Py-Polars classification}): +Expressions are all the functions and methods that are applicable to a Polars +DataFrame or LazyFrame. They can be split into the following categories +(following the \href{https://pola-rs.github.io/polars/py-polars/html/reference/expressions/}{Py-Polars classification}): \itemize{ \item Aggregate \item Binary diff --git a/man/Expr_clip.Rd b/man/Expr_clip.Rd index 92698d4e4..f05ef3784 100644 --- a/man/Expr_clip.Rd +++ b/man/Expr_clip.Rd @@ -2,39 +2,23 @@ % Please edit documentation in R/expr__expr.R \name{Expr_clip} \alias{Expr_clip} -\alias{clip} -\alias{Expr_clip_min} -\alias{clip_min} -\alias{Expr_clip_max} -\alias{clip_max} -\title{Clip} +\title{Clip elements} \usage{ Expr_clip(min, max) - -Expr_clip_min(min) - -Expr_clip_max(max) } \arguments{ -\item{min}{Minimum Value, ints and floats or any literal expression of ints and floats} +\item{min}{Minimum value, Expr returning a numeric.} -\item{max}{Maximum Value, ints and floats or any literal expression of ints and floats} +\item{max}{Maximum value, Expr returning a numeric.} } \value{ Expr } \description{ -Clip (limit) the values in an array to a \code{min} and \code{max} boundary. -} -\details{ -Only works for numerical types. -If you want to clip other dtypes, consider writing a "when, then, otherwise" -expression. See :func:\code{when} for more information. +Clip (limit) the values in an array to a \code{min} and \code{max} boundary. This only +works for numerical types. } \examples{ -df = pl$DataFrame(foo = c(-50L, 5L, NA_integer_, 50L)) -df$with_columns(pl$col("foo")$clip(1L, 10L)$alias("foo_clipped")) -df$with_columns(pl$col("foo")$clip_min(1L)$alias("foo_clipped")) -df$with_columns(pl$col("foo")$clip_max(10L)$alias("foo_clipped")) +pl$DataFrame(foo = c(-50L, 5L, NA_integer_, 50L))$ + with_columns(clipped = pl$col("foo")$clip(1, 10)) } -\keyword{Expr} diff --git a/man/Expr_clip_max.Rd b/man/Expr_clip_max.Rd new file mode 100644 index 000000000..59d207bcb --- /dev/null +++ b/man/Expr_clip_max.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__expr.R +\name{Expr_clip_max} +\alias{Expr_clip_max} +\title{Clip elements above maximum value} +\usage{ +Expr_clip_max(max) +} +\arguments{ +\item{max}{Maximum value, Expr returning a numeric.} +} +\description{ +Replace all values above a maximum value by this maximum value. +} +\examples{ +pl$DataFrame(foo = c(-50L, 5L, NA_integer_, 50L))$ + with_columns(clipped = pl$col("foo")$clip_max(10)) +} diff --git a/man/Expr_clip_min.Rd b/man/Expr_clip_min.Rd new file mode 100644 index 000000000..2832c5be4 --- /dev/null +++ b/man/Expr_clip_min.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__expr.R +\name{Expr_clip_min} +\alias{Expr_clip_min} +\title{Clip elements below minimum value} +\usage{ +Expr_clip_min(min) +} +\arguments{ +\item{min}{Minimum value, Expr returning a numeric.} +} +\description{ +Replace all values below a minimum value by this minimum value. +} +\examples{ +pl$DataFrame(foo = c(-50L, 5L, NA_integer_, 50L))$ + with_columns(clipped = pl$col("foo")$clip_min(1)) +} diff --git a/man/Expr_cos.Rd b/man/Expr_cos.Rd index 23eba3cbc..dfe7819f7 100644 --- a/man/Expr_cos.Rd +++ b/man/Expr_cos.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_cos} \alias{Expr_cos} -\alias{cos} -\title{Cos} +\title{Compute cosine} \usage{ Expr_cos } @@ -11,12 +10,10 @@ Expr_cos Expr } \description{ -Compute the element-wise value for the cosine. -} -\details{ -Evaluated Series has dtype Float64 +Compute cosine } \examples{ -pl$DataFrame(a = c(0, pi / 2, pi, NA_real_))$select(pl$col("a")$cos()) +pl$DataFrame(a = c(0, pi / 2, pi, NA_real_))$ + with_columns(cosine = pl$col("a")$cos()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_cosh.Rd b/man/Expr_cosh.Rd index 64b20b73d..0dd801e63 100644 --- a/man/Expr_cosh.Rd +++ b/man/Expr_cosh.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_cosh} \alias{Expr_cosh} -\alias{cosh} -\title{Cosh} +\title{Compute hyperbolic cosine} \usage{ Expr_cosh } @@ -11,12 +10,10 @@ Expr_cosh Expr } \description{ -Compute the element-wise value for the hyperbolic cosine. -} -\details{ -Evaluated Series has dtype Float64 +Compute hyperbolic cosine } \examples{ -pl$DataFrame(a = c(-1, acosh(1.5), 0, 1, NA_real_))$select(pl$col("a")$cosh()) +pl$DataFrame(a = c(-1, acosh(0.5), 0, 1, NA_real_))$ + with_columns(cosh = pl$col("a")$cosh()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_count.Rd b/man/Expr_count.Rd index b1953161a..1ef0c381f 100644 --- a/man/Expr_count.Rd +++ b/man/Expr_count.Rd @@ -1,22 +1,24 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__expr.R +\docType{data} \name{Expr_count} \alias{Expr_count} \alias{Expr_len} -\title{Count values (len is a alias)} +\title{Count elements} +\format{ +An object of class \code{character} of length 1. +} \usage{ Expr_count Expr_len } \value{ -Expr - Expr } \description{ -Count the number of values in this expression. -Similar to R length() +Count the number of elements in this expression. Note that \code{NULL} values are +also counted. \verb{$len()} is an alias. } \examples{ pl$DataFrame( @@ -26,13 +28,5 @@ pl$DataFrame( )$select( pl$all()$count() ) -pl$DataFrame( - all = c(TRUE, TRUE), - any = c(TRUE, FALSE), - none = c(FALSE, FALSE) -)$select( - pl$all()$len(), - pl$col("all")$first()$len()$alias("all_first") -) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_cum_count.Rd b/man/Expr_cum_count.Rd index 0f753b36b..504e38928 100644 --- a/man/Expr_cum_count.Rd +++ b/man/Expr_cum_count.Rd @@ -2,31 +2,28 @@ % Please edit documentation in R/expr__expr.R \name{Expr_cum_count} \alias{Expr_cum_count} -\alias{cum_count} \title{Cumulative count} \usage{ Expr_cum_count(reverse = FALSE) } \arguments{ -\item{reverse}{bool, default FALSE, if true roll over vector from back to forth} +\item{reverse}{If \code{TRUE}, reverse the count.} } \value{ Expr } \description{ -Get an array with the cumulative count computed at every element. -Counting from 0 to len +Get an array with the cumulative count (zero-indexed) computed at every element. } \details{ -The Dtypes Int8, UInt8, Int16 and UInt16 are cast to -Int64 before summing to prevent overflow issues. +The Dtypes Int8, UInt8, Int16 and UInt16 are cast to Int64 before summing to +prevent overflow issues. -cum_count does not seem to count within lists. +\verb{$cum_count()} does not seem to count within lists. } \examples{ -pl$DataFrame(list(a = 1:4))$select( +pl$DataFrame(a = 1:4)$with_columns( pl$col("a")$cum_count()$alias("cum_count"), pl$col("a")$cum_count(reverse = TRUE)$alias("cum_count_reversed") ) } -\keyword{Expr} diff --git a/man/Expr_cum_max.Rd b/man/Expr_cum_max.Rd index 9f3edf397..43057e510 100644 --- a/man/Expr_cum_max.Rd +++ b/man/Expr_cum_max.Rd @@ -2,13 +2,12 @@ % Please edit documentation in R/expr__expr.R \name{Expr_cum_max} \alias{Expr_cum_max} -\alias{cummin} \title{Cumulative maximum} \usage{ Expr_cum_max(reverse = FALSE) } \arguments{ -\item{reverse}{bool, default FALSE, if true roll over vector from back to forth} +\item{reverse}{If \code{TRUE}, start from the last value.} } \value{ Expr @@ -17,15 +16,12 @@ Expr Get an array with the cumulative max computed at every element. } \details{ -The Dtypes Int8, UInt8, Int16 and UInt16 are cast to -Int64 before summing to prevent overflow issues. - -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +The Dtypes Int8, UInt8, Int16 and UInt16 are cast to Int64 before summing to +prevent overflow issues. } \examples{ -pl$DataFrame(list(a = 1:4))$select( +pl$DataFrame(a = c(1:4, 2L))$with_columns( pl$col("a")$cum_max()$alias("cummux"), pl$col("a")$cum_max(reverse = TRUE)$alias("cum_max_reversed") ) } -\keyword{Expr} diff --git a/man/Expr_cum_min.Rd b/man/Expr_cum_min.Rd index b4a596b60..21f7573be 100644 --- a/man/Expr_cum_min.Rd +++ b/man/Expr_cum_min.Rd @@ -2,13 +2,12 @@ % Please edit documentation in R/expr__expr.R \name{Expr_cum_min} \alias{Expr_cum_min} -\alias{cum_min} \title{Cumulative minimum} \usage{ Expr_cum_min(reverse = FALSE) } \arguments{ -\item{reverse}{bool, default FALSE, if true roll over vector from back to forth} +\item{reverse}{If \code{TRUE}, start from the last value.} } \value{ Expr @@ -17,15 +16,12 @@ Expr Get an array with the cumulative min computed at every element. } \details{ -The Dtypes Int8, UInt8, Int16 and UInt16 are cast to -Int64 before summing to prevent overflow issues. - -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +The Dtypes Int8, UInt8, Int16 and UInt16 are cast to Int64 before summing to +prevent overflow issues. } \examples{ -pl$DataFrame(list(a = 1:4))$select( +pl$DataFrame(a = c(1:4, 2L))$with_columns( pl$col("a")$cum_min()$alias("cum_min"), pl$col("a")$cum_min(reverse = TRUE)$alias("cum_min_reversed") ) } -\keyword{Expr} diff --git a/man/Expr_cum_prod.Rd b/man/Expr_cum_prod.Rd index 68032c072..033bc3d81 100644 --- a/man/Expr_cum_prod.Rd +++ b/man/Expr_cum_prod.Rd @@ -2,13 +2,13 @@ % Please edit documentation in R/expr__expr.R \name{Expr_cum_prod} \alias{Expr_cum_prod} -\alias{cum_prod} \title{Cumulative product} \usage{ Expr_cum_prod(reverse = FALSE) } \arguments{ -\item{reverse}{bool, default FALSE, if true roll over vector from back to forth} +\item{reverse}{If \code{TRUE}, start with the total product of elements and divide +each row one by one.} } \value{ Expr @@ -17,13 +17,12 @@ Expr Get an array with the cumulative product computed at every element. } \details{ -The Dtypes Int8, UInt8, Int16 and UInt16 are cast to -Int64 before summing to prevent overflow issues. +The Dtypes Int8, UInt8, Int16 and UInt16 are cast to Int64 before summing to +prevent overflow issues. } \examples{ -pl$DataFrame(list(a = 1:4))$select( +pl$DataFrame(a = 1:4)$with_columns( pl$col("a")$cum_prod()$alias("cum_prod"), pl$col("a")$cum_prod(reverse = TRUE)$alias("cum_prod_reversed") ) } -\keyword{Expr} diff --git a/man/Expr_cum_sum.Rd b/man/Expr_cum_sum.Rd index 749d0b634..7fe91ca74 100644 --- a/man/Expr_cum_sum.Rd +++ b/man/Expr_cum_sum.Rd @@ -2,13 +2,13 @@ % Please edit documentation in R/expr__expr.R \name{Expr_cum_sum} \alias{Expr_cum_sum} -\alias{Expr_cumsum} \title{Cumulative sum} \usage{ Expr_cum_sum(reverse = FALSE) } \arguments{ -\item{reverse}{bool, default FALSE, if true roll over vector from back to forth} +\item{reverse}{If \code{TRUE}, start with the total sum of elements and substract +each row one by one.} } \value{ Expr @@ -17,13 +17,12 @@ Expr Get an array with the cumulative sum computed at every element. } \details{ -The Dtypes Int8, UInt8, Int16 and UInt16 are cast to -Int64 before summing to prevent overflow issues. +The Dtypes Int8, UInt8, Int16 and UInt16 are cast to Int64 before summing to +prevent overflow issues. } \examples{ -pl$DataFrame(list(a = 1:4))$select( +pl$DataFrame(a = 1:4)$with_columns( pl$col("a")$cum_sum()$alias("cum_sum"), pl$col("a")$cum_sum(reverse = TRUE)$alias("cum_sum_reversed") ) } -\keyword{Expr} diff --git a/man/Expr_cumulative_eval.Rd b/man/Expr_cumulative_eval.Rd index 79aaff8bf..f5911e8cc 100644 --- a/man/Expr_cumulative_eval.Rd +++ b/man/Expr_cumulative_eval.Rd @@ -2,35 +2,32 @@ % Please edit documentation in R/expr__expr.R \name{Expr_cumulative_eval} \alias{Expr_cumulative_eval} -\alias{cumulative_eval} -\title{Cumulative eval} +\title{Cumulative evaluation of expressions} \usage{ Expr_cumulative_eval(expr, min_periods = 1L, parallel = FALSE) } \arguments{ -\item{expr}{Expression to evaluate} +\item{expr}{Expression to evaluate.} -\item{min_periods}{Number of valid values there should be in the window before the expression -is evaluated. valid values = \code{length - null_count}} +\item{min_periods}{Number of valid (non-null) values there should be in the +window before the expression is evaluated.} -\item{parallel}{Run in parallel. Don't do this in a groupby or another operation that -already has much parallelization.} +\item{parallel}{Run in parallel. Don't do this in a groupby or another +operation that already has much parallelization.} } \value{ Expr } \description{ -Run an expression over a sliding window that increases \code{1} slot every iteration. +Run an expression over a sliding window that increases by \code{1} slot every +iteration. } \details{ -Warnings - -This functionality is experimental and may change without it being considered a -breaking change. This can be really slow as it can have \code{O(n^2)} complexity. Don't use this for operations that visit all elements. } \examples{ -pl$lit(1:5)$cumulative_eval(pl$element()$first() - pl$element()$last()**2)$to_r() +pl$lit(1:5)$cumulative_eval( + pl$element()$first() - pl$element()$last()^2 +)$to_r() } -\keyword{Expr} diff --git a/man/Expr_diff.Rd b/man/Expr_diff.Rd index 7dd0516f5..ea60b3a22 100644 --- a/man/Expr_diff.Rd +++ b/man/Expr_diff.Rd @@ -2,15 +2,14 @@ % Please edit documentation in R/expr__expr.R \name{Expr_diff} \alias{Expr_diff} -\alias{diff} -\title{Diff} +\title{Difference} \usage{ Expr_diff(n = 1, null_behavior = "ignore") } \arguments{ -\item{n}{Integerish Number of slots to shift.} +\item{n}{Number of slots to shift.} -\item{null_behavior}{option default 'ignore', else 'drop'} +\item{null_behavior}{String, either \code{"ignore"} (default), else \code{"drop"}.} } \value{ Expr @@ -19,9 +18,8 @@ Expr Calculate the n-th discrete difference. } \examples{ -pl$DataFrame(list(a = c(20L, 10L, 30L, 40L)))$select( - pl$col("a")$diff()$alias("diff_default"), - pl$col("a")$diff(2, "ignore")$alias("diff_2_ignore") +pl$DataFrame(a = c(20L, 10L, 30L, 40L))$with_columns( + diff_default = pl$col("a")$diff(), + diff_2_ignore = pl$col("a")$diff(2, "ignore") ) } -\keyword{Expr} diff --git a/man/Expr_div.Rd b/man/Expr_div.Rd index cfe8fcf67..8e4cdb50e 100644 --- a/man/Expr_div.Rd +++ b/man/Expr_div.Rd @@ -3,30 +3,28 @@ \name{Expr_div} \alias{Expr_div} \alias{/.Expr} -\title{Div} +\title{Divide two expressions} \usage{ Expr_div(other) \method{/}{Expr}(e1, e2) } \arguments{ -\item{other}{literal or Robj which can become a literal} +\item{other}{Literal or object that can be converted to a literal} -\item{e1}{lhs Expr} +\item{e1}{Expr only} -\item{e2}{rhs Expr or anything which can become a literal Expression} +\item{e2}{Expr or anything that can be converted to a literal} } \value{ -Exprs +Expr } \description{ -Divide +The RHS can either be an Expr or an object that can be converted to a literal +(e.g an integer). } \examples{ -# three syntaxes same result pl$lit(5) / 10 pl$lit(5) / pl$lit(10) pl$lit(5)$div(pl$lit(10)) } -\keyword{Expr} -\keyword{Expr_operators} diff --git a/man/Expr_dot.Rd b/man/Expr_dot.Rd index 6830ecc1e..ddd22a3ab 100644 --- a/man/Expr_dot.Rd +++ b/man/Expr_dot.Rd @@ -2,13 +2,12 @@ % Please edit documentation in R/expr__expr.R \name{Expr_dot} \alias{Expr_dot} -\alias{dot} \title{Dot product} \usage{ Expr_dot(other) } \arguments{ -\item{other}{Expr to compute dot product with.} +\item{other}{Literal or object that can be converted to a literal} } \value{ Expr @@ -18,10 +17,9 @@ Compute the dot/inner product between two Expressions. } \examples{ pl$DataFrame( - a = 1:4, b = c(1, 2, 3, 4), c = "bob" -)$select( + a = 1:4, b = c(1, 2, 3, 4) +)$with_columns( pl$col("a")$dot(pl$col("b"))$alias("a dot b"), pl$col("a")$dot(pl$col("a"))$alias("a dot a") ) } -\keyword{Expr} diff --git a/man/Expr_drop_nans.Rd b/man/Expr_drop_nans.Rd index 54694488d..4cde1e708 100644 --- a/man/Expr_drop_nans.Rd +++ b/man/Expr_drop_nans.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_drop_nans} \alias{Expr_drop_nans} -\title{Drop NaN(s)} +\title{Drop NaN} \usage{ Expr_drop_nans } @@ -10,16 +10,16 @@ Expr_drop_nans Expr } \description{ -Drop floating point NaN values. -Similar to R syntax \code{x[!is.nan(x)]} +Drop NaN } \details{ -Note that NaN values are not null values! (null corresponds to R NA, not R NULL) -To drop null values, use method \code{drop_nulls}. - -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +Note that \code{NaN} values are not \code{null} values. Null values correspond to NA +in R. } \examples{ pl$DataFrame(list(x = c(1, 2, NaN, NA)))$select(pl$col("x")$drop_nans()) } -\keyword{Expr} +\seealso{ +\code{drop_nulls()} +} +\keyword{datasets} diff --git a/man/Expr_drop_nulls.Rd b/man/Expr_drop_nulls.Rd index e5eb108eb..daac9db5e 100644 --- a/man/Expr_drop_nulls.Rd +++ b/man/Expr_drop_nulls.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_drop_nulls} \alias{Expr_drop_nulls} -\title{Drop null(s)} +\title{Drop missing values} \usage{ Expr_drop_nulls } @@ -10,13 +10,12 @@ Expr_drop_nulls Expr } \description{ -Drop null values. -Similar to R syntax \code{x[!(is.na(x) & !is.nan(x))]} -} -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +Drop missing values } \examples{ pl$DataFrame(list(x = c(1, 2, NaN, NA)))$select(pl$col("x")$drop_nulls()) } -\keyword{Expr} +\seealso{ +\code{drop_nans()} +} +\keyword{datasets} diff --git a/man/Expr_dt.Rd b/man/Expr_dt.Rd deleted file mode 100644 index 44da7bb9e..000000000 --- a/man/Expr_dt.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/expr__expr.R -\name{Expr_dt} -\alias{Expr_dt} -\alias{dt_ns} -\title{dt: datetime related methods} -\usage{ -Expr_dt() -} -\value{ -Expr -} -\description{ -Create an object namespace of all datetime related methods. -See the individual method pages for full details -} -\examples{ - -# missing - -} -\keyword{Expr} diff --git a/man/Expr_entropy.Rd b/man/Expr_entropy.Rd index 2725660ce..44b3b41a5 100644 --- a/man/Expr_entropy.Rd +++ b/man/Expr_entropy.Rd @@ -2,25 +2,23 @@ % Please edit documentation in R/expr__expr.R \name{Expr_entropy} \alias{Expr_entropy} -\alias{entropy} \title{Entropy} \usage{ Expr_entropy(base = base::exp(1), normalize = TRUE) } \arguments{ -\item{base}{Given exponential base, defaults to \code{e}} +\item{base}{Given exponential base, defaults to \code{exp(1)}.} -\item{normalize}{Normalize pk if it doesn't sum to 1.} +\item{normalize}{Normalize \code{pk} if it doesn't sum to 1.} } \value{ Expr } \description{ -Computes the entropy. -Uses the formula \code{-sum(pk * log(pk))} where \code{pk} are discrete probabilities. -Return Null if input is not values +The entropy is measured with the formula \code{-sum(pk * log(pk))} where \code{pk} are +discrete probabilities. } \examples{ -pl$select(pl$lit(c("a", "b", "b", "c", "c", "c"))$unique_counts()$entropy(base = 2)) +pl$DataFrame(x = c(1, 2, 3, 2))$ + with_columns(entropy = pl$col("x")$entropy(base = 2)) } -\keyword{Expr} diff --git a/man/Expr_eq.Rd b/man/Expr_eq.Rd index 055f4c924..0dcdc67fe 100644 --- a/man/Expr_eq.Rd +++ b/man/Expr_eq.Rd @@ -3,33 +3,28 @@ \name{Expr_eq} \alias{Expr_eq} \alias{==.Expr} -\title{Equal ==} +\title{Check equality} \usage{ Expr_eq(other) \method{==}{Expr}(e1, e2) } \arguments{ -\item{other}{literal or Robj which can become a literal} +\item{other}{Literal or object that can be converted to a literal} -\item{e1}{lhs Expr} +\item{e1}{Expr only} -\item{e2}{rhs Expr or anything which can become a literal Expression} +\item{e2}{Expr or anything that can be converted to a literal} } \value{ -Exprs +Expr } \description{ -eq method and operator -} -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +The RHS can either be an Expr or an object that can be converted to a literal +(e.g an integer). } \examples{ -#' #three syntaxes same result pl$lit(2) == 2 pl$lit(2) == pl$lit(2) pl$lit(2)$eq(pl$lit(2)) } -\keyword{Expr} -\keyword{Expr_operators} diff --git a/man/Expr_ewm_mean.Rd b/man/Expr_ewm_mean.Rd new file mode 100644 index 000000000..dc91b7ffc --- /dev/null +++ b/man/Expr_ewm_mean.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__expr.R +\name{Expr_ewm_mean} +\alias{Expr_ewm_mean} +\title{Exponentially-weighted moving average} +\usage{ +Expr_ewm_mean( + com = NULL, + span = NULL, + half_life = NULL, + alpha = NULL, + adjust = TRUE, + min_periods = 1L, + ignore_nulls = TRUE +) +} +\arguments{ +\item{com}{Specify decay in terms of center of mass, \eqn{\gamma}, with +\eqn{ + \alpha = \frac{1}{1 + \gamma} \; \forall \; \gamma \geq 0 + }} + +\item{span}{Specify decay in terms of span, \eqn{\theta}, with +\eqn{\alpha = \frac{2}{\theta + 1} \; \forall \; \theta \geq 1 }} + +\item{half_life}{Specify decay in terms of half-life, :math:\verb{\\lambda}, with +\eqn{ \alpha = 1 - \exp \left\{ \frac{ -\ln(2) }{ \lambda } \right\} } +\eqn{ \forall \; \lambda > 0}} + +\item{alpha}{Specify smoothing factor alpha directly, \eqn{0 < \alpha \leq 1}.} + +\item{adjust}{Divide by decaying adjustment factor in beginning periods to +account for imbalance in relative weightings: +\itemize{ +\item When \code{adjust=TRUE} the EW function is calculatedusing weights +\eqn{w_i = (1 - \alpha)^i } +\item When \code{adjust=FALSE} the EW function is calculated recursively by +\eqn{ + y_0 = x_0 \\ + y_t = (1 - \alpha)y_{t - 1} + \alpha x_t + } +}} + +\item{min_periods}{Minimum number of observations in window required to have +a value (otherwise result is null).} + +\item{ignore_nulls}{Ignore missing values when calculating weights: +\itemize{ +\item When \code{TRUE} (default), weights are based on relative positions. For example, +the weights of \eqn{x_0} and \eqn{x_2} used in calculating the final +weighted average of \code{[} \eqn{x_0}, None, \eqn{x_2}\verb{]} are +\eqn{1-\alpha} and \eqn{1} if \code{adjust=TRUE}, and \eqn{1-\alpha} and +\eqn{\alpha} if \code{adjust=FALSE}. +\item When \code{FALSE}, weights are based on absolute positions. For example, the +weights of :math:\code{x_0} and :math:\code{x_2} used in calculating the final +weighted average of \code{[} \eqn{x_0}, None, \eqn{x_2}\\\verb{]} are +\eqn{1-\alpha)^2} and \eqn{1} if \code{adjust=TRUE}, and \eqn{(1-\alpha)^2} +and \eqn{\alpha} if \code{adjust=FALSE}. +}} +} +\value{ +Expr +} +\description{ +Exponentially-weighted moving average +} +\examples{ +pl$DataFrame(a = 1:3)$ + with_columns(ewm_mean = pl$col("a")$ewm_mean(com = 1)) +} diff --git a/man/Expr_ewm_mean_std_var.Rd b/man/Expr_ewm_mean_std_var.Rd deleted file mode 100644 index 2f857282f..000000000 --- a/man/Expr_ewm_mean_std_var.Rd +++ /dev/null @@ -1,112 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/expr__expr.R -\name{Expr_ewm_mean_std_var} -\alias{Expr_ewm_mean_std_var} -\alias{Expr_ewm_mean} -\alias{ewm_mean} -\alias{Expr_ewm_std} -\alias{ewm_std} -\alias{Expr_ewm_var} -\alias{ewm_var} -\title{Exponentially-weighted moving average/std/var.} -\usage{ -Expr_ewm_mean( - com = NULL, - span = NULL, - half_life = NULL, - alpha = NULL, - adjust = TRUE, - min_periods = 1L, - ignore_nulls = TRUE -) - -Expr_ewm_std( - com = NULL, - span = NULL, - half_life = NULL, - alpha = NULL, - adjust = TRUE, - bias = FALSE, - min_periods = 1L, - ignore_nulls = TRUE -) - -Expr_ewm_var( - com = NULL, - span = NULL, - half_life = NULL, - alpha = NULL, - adjust = TRUE, - bias = FALSE, - min_periods = 1L, - ignore_nulls = TRUE -) -} -\arguments{ -\item{com}{Specify decay in terms of center of mass, \eqn{\gamma}, with -\eqn{ - \alpha = \frac{1}{1 + \gamma} \; \forall \; \gamma \geq 0 - }} - -\item{span}{Specify decay in terms of span, \eqn{\theta}, with -\eqn{\alpha = \frac{2}{\theta + 1} \; \forall \; \theta \geq 1 }} - -\item{half_life}{Specify decay in terms of half-life, :math:\verb{\\lambda}, with -\eqn{ \alpha = 1 - \exp \left\{ \frac{ -\ln(2) }{ \lambda } \right\} } -\eqn{ \forall \; \lambda > 0}} - -\item{alpha}{Specify smoothing factor alpha directly, \eqn{0 < \alpha \leq 1}.} - -\item{adjust}{Divide by decaying adjustment factor in beginning periods to account for -imbalance in relative weightings -\itemize{ -\item When \code{adjust=TRUE} the EW function is calculated -using weights \eqn{w_i = (1 - \alpha)^i } -\item When \code{adjust=FALSE} the EW function is calculated -recursively by -\eqn{ - y_0 = x_0 \\ - y_t = (1 - \alpha)y_{t - 1} + \alpha x_t -} -}} - -\item{min_periods}{Minimum number of observations in window required to have a value -(otherwise result is null).} - -\item{ignore_nulls}{ignore_nulls -Ignore missing values when calculating weights. -\itemize{ -\item When \code{ignore_nulls=FALSE} (default), weights are based on absolute -positions. -For example, the weights of :math:\code{x_0} and :math:\code{x_2} used in -calculating the final weighted average of -\code{[} \eqn{x_0}, None, \eqn{x_2}\\\verb{]} are -\eqn{1-\alpha)^2} and \eqn{1} if \code{adjust=TRUE}, and -\eqn{(1-\alpha)^2} and \eqn{\alpha} if \code{adjust=FALSE}. -\item When \code{ignore_nulls=TRUE}, weights are based -on relative positions. For example, the weights of -\eqn{x_0} and \eqn{x_2} used in calculating the final weighted -average of \code{[} \eqn{x_0}, None, \eqn{x_2}\verb{]} are -\eqn{1-\alpha} and \eqn{1} if \code{adjust=TRUE}, -and \eqn{1-\alpha} and \eqn{\alpha} if \code{adjust=FALSE}. -}} - -\item{bias}{When bias=FALSE`, apply a correction to make the estimate statistically unbiased.} -} -\value{ -Expr -} -\description{ -Exponentially-weighted moving average/std/var. - -Ewm_std - -Ewm_var -} -\examples{ -pl$DataFrame(a = 1:3)$select(pl$col("a")$ewm_mean(com = 1)) - -pl$DataFrame(a = 1:3)$select(pl$col("a")$ewm_std(com = 1)) -pl$DataFrame(a = 1:3)$select(pl$col("a")$ewm_std(com = 1)) -} -\keyword{Expr} diff --git a/man/Expr_ewm_std.Rd b/man/Expr_ewm_std.Rd new file mode 100644 index 000000000..d3c7e65e7 --- /dev/null +++ b/man/Expr_ewm_std.Rd @@ -0,0 +1,73 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__expr.R +\name{Expr_ewm_std} +\alias{Expr_ewm_std} +\title{Exponentially-weighted moving standard deviation} +\usage{ +Expr_ewm_std( + com = NULL, + span = NULL, + half_life = NULL, + alpha = NULL, + adjust = TRUE, + bias = FALSE, + min_periods = 1L, + ignore_nulls = TRUE +) +} +\arguments{ +\item{com}{Specify decay in terms of center of mass, \eqn{\gamma}, with +\eqn{ + \alpha = \frac{1}{1 + \gamma} \; \forall \; \gamma \geq 0 + }} + +\item{span}{Specify decay in terms of span, \eqn{\theta}, with +\eqn{\alpha = \frac{2}{\theta + 1} \; \forall \; \theta \geq 1 }} + +\item{half_life}{Specify decay in terms of half-life, :math:\verb{\\lambda}, with +\eqn{ \alpha = 1 - \exp \left\{ \frac{ -\ln(2) }{ \lambda } \right\} } +\eqn{ \forall \; \lambda > 0}} + +\item{alpha}{Specify smoothing factor alpha directly, \eqn{0 < \alpha \leq 1}.} + +\item{adjust}{Divide by decaying adjustment factor in beginning periods to +account for imbalance in relative weightings: +\itemize{ +\item When \code{adjust=TRUE} the EW function is calculatedusing weights +\eqn{w_i = (1 - \alpha)^i } +\item When \code{adjust=FALSE} the EW function is calculated recursively by +\eqn{ + y_0 = x_0 \\ + y_t = (1 - \alpha)y_{t - 1} + \alpha x_t + } +}} + +\item{bias}{If \code{FALSE}, the calculations are corrected for statistical bias.} + +\item{min_periods}{Minimum number of observations in window required to have +a value (otherwise result is null).} + +\item{ignore_nulls}{Ignore missing values when calculating weights: +\itemize{ +\item When \code{TRUE} (default), weights are based on relative positions. For example, +the weights of \eqn{x_0} and \eqn{x_2} used in calculating the final +weighted average of \code{[} \eqn{x_0}, None, \eqn{x_2}\verb{]} are +\eqn{1-\alpha} and \eqn{1} if \code{adjust=TRUE}, and \eqn{1-\alpha} and +\eqn{\alpha} if \code{adjust=FALSE}. +\item When \code{FALSE}, weights are based on absolute positions. For example, the +weights of :math:\code{x_0} and :math:\code{x_2} used in calculating the final +weighted average of \code{[} \eqn{x_0}, None, \eqn{x_2}\\\verb{]} are +\eqn{1-\alpha)^2} and \eqn{1} if \code{adjust=TRUE}, and \eqn{(1-\alpha)^2} +and \eqn{\alpha} if \code{adjust=FALSE}. +}} +} +\value{ +Expr +} +\description{ +Exponentially-weighted moving standard deviation +} +\examples{ +pl$DataFrame(a = 1:3)$ + with_columns(ewm_std = pl$col("a")$ewm_std(com = 1)) +} diff --git a/man/Expr_ewm_var.Rd b/man/Expr_ewm_var.Rd new file mode 100644 index 000000000..750574d1b --- /dev/null +++ b/man/Expr_ewm_var.Rd @@ -0,0 +1,73 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__expr.R +\name{Expr_ewm_var} +\alias{Expr_ewm_var} +\title{Exponentially-weighted moving variance} +\usage{ +Expr_ewm_var( + com = NULL, + span = NULL, + half_life = NULL, + alpha = NULL, + adjust = TRUE, + bias = FALSE, + min_periods = 1L, + ignore_nulls = TRUE +) +} +\arguments{ +\item{com}{Specify decay in terms of center of mass, \eqn{\gamma}, with +\eqn{ + \alpha = \frac{1}{1 + \gamma} \; \forall \; \gamma \geq 0 + }} + +\item{span}{Specify decay in terms of span, \eqn{\theta}, with +\eqn{\alpha = \frac{2}{\theta + 1} \; \forall \; \theta \geq 1 }} + +\item{half_life}{Specify decay in terms of half-life, :math:\verb{\\lambda}, with +\eqn{ \alpha = 1 - \exp \left\{ \frac{ -\ln(2) }{ \lambda } \right\} } +\eqn{ \forall \; \lambda > 0}} + +\item{alpha}{Specify smoothing factor alpha directly, \eqn{0 < \alpha \leq 1}.} + +\item{adjust}{Divide by decaying adjustment factor in beginning periods to +account for imbalance in relative weightings: +\itemize{ +\item When \code{adjust=TRUE} the EW function is calculatedusing weights +\eqn{w_i = (1 - \alpha)^i } +\item When \code{adjust=FALSE} the EW function is calculated recursively by +\eqn{ + y_0 = x_0 \\ + y_t = (1 - \alpha)y_{t - 1} + \alpha x_t + } +}} + +\item{bias}{If \code{FALSE}, the calculations are corrected for statistical bias.} + +\item{min_periods}{Minimum number of observations in window required to have +a value (otherwise result is null).} + +\item{ignore_nulls}{Ignore missing values when calculating weights: +\itemize{ +\item When \code{TRUE} (default), weights are based on relative positions. For example, +the weights of \eqn{x_0} and \eqn{x_2} used in calculating the final +weighted average of \code{[} \eqn{x_0}, None, \eqn{x_2}\verb{]} are +\eqn{1-\alpha} and \eqn{1} if \code{adjust=TRUE}, and \eqn{1-\alpha} and +\eqn{\alpha} if \code{adjust=FALSE}. +\item When \code{FALSE}, weights are based on absolute positions. For example, the +weights of :math:\code{x_0} and :math:\code{x_2} used in calculating the final +weighted average of \code{[} \eqn{x_0}, None, \eqn{x_2}\\\verb{]} are +\eqn{1-\alpha)^2} and \eqn{1} if \code{adjust=TRUE}, and \eqn{(1-\alpha)^2} +and \eqn{\alpha} if \code{adjust=FALSE}. +}} +} +\value{ +Expr +} +\description{ +Exponentially-weighted moving variance +} +\examples{ +pl$DataFrame(a = 1:3)$ + with_columns(ewm_var = pl$col("a")$ewm_var(com = 1)) +} diff --git a/man/Expr_exclude.Rd b/man/Expr_exclude.Rd index 79929ba6c..dbae0b4d5 100644 --- a/man/Expr_exclude.Rd +++ b/man/Expr_exclude.Rd @@ -3,14 +3,14 @@ \name{Expr_exclude} \alias{Expr_exclude} \alias{exclude} -\title{Exclude certain columns from a wildcard/regex selection.} +\title{Exclude certain columns from selection} \usage{ Expr_exclude(columns) } \arguments{ -\item{columns}{given param type: +\item{columns}{Given param type: \itemize{ -\item string: exclude name of column or exclude regex starting with ^and ending with$ +\item string: single column name or regex starting with \code{^} and ending with \code{$} \item character vector: exclude all these column names, no regex allowed \item DataType: Exclude any of this DataType \item List(DataType): Exclude any of these DataType(s) @@ -20,7 +20,7 @@ Expr_exclude(columns) Expr } \description{ -You may also use regexes in the exclude list. They must start with \code{^} and end with \code{$}. +Exclude certain columns from selection } \examples{ @@ -38,4 +38,3 @@ df$select(pl$all()$exclude(list(pl$Categorical, pl$Float64))) df$select(pl$all()$exclude("^Sepal.*$")) } -\keyword{Expr} diff --git a/man/Expr_exp.Rd b/man/Expr_exp.Rd index 870867e9c..9373ff459 100644 --- a/man/Expr_exp.Rd +++ b/man/Expr_exp.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_exp} \alias{Expr_exp} -\alias{exp} -\title{Compute the exponential, element-wise.} +\title{Compute the exponential of the elements} \usage{ Expr_exp } @@ -11,13 +10,9 @@ Expr_exp Expr } \description{ -Compute the exponential, element-wise. +Compute the exponential of the elements } \examples{ -log10123 = suppressWarnings(log(-1:3)) -all.equal( - pl$DataFrame(list(a = log10123))$select(pl$col("a")$exp())$to_data_frame()$a, - exp(1)^log10123 -) +pl$DataFrame(a = -1:3)$with_columns(a_exp = pl$col("a")$exp()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_explode.Rd b/man/Expr_explode.Rd index 3a92ff9c4..374e784a7 100644 --- a/man/Expr_explode.Rd +++ b/man/Expr_explode.Rd @@ -2,34 +2,23 @@ % Please edit documentation in R/expr__expr.R \name{Expr_explode} \alias{Expr_explode} -\alias{explode} -\alias{Expr_flatten} -\alias{flatten} -\title{Explode a list or utf8 Series.} +\title{Explode a list or Utf8 Series} \usage{ Expr_explode - -Expr_flatten } \value{ Expr } \description{ This means that every item is expanded to a new row. - -( flatten is an alias for explode ) } \details{ -explode/flatten does not support categorical +Categorical values are not supported. } \examples{ -pl$DataFrame(list(a = letters))$select(pl$col("a")$explode()$gather(0:5)) +df = pl$DataFrame(x = c("abc", "ab"), y = c(list(1:3), list(3:5))) +df -listed_group_df = pl$DataFrame(iris[c(1:3, 51:53), ])$group_by("Species")$agg(pl$all()) -print(listed_group_df) -vectors_df = listed_group_df$select( - pl$col(c("Sepal.Width", "Sepal.Length"))$explode() -) -print(vectors_df) +df$select(pl$col("y")$explode()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_extend_constant.Rd b/man/Expr_extend_constant.Rd index a7743876e..def711c50 100644 --- a/man/Expr_extend_constant.Rd +++ b/man/Expr_extend_constant.Rd @@ -2,14 +2,13 @@ % Please edit documentation in R/expr__expr.R \name{Expr_extend_constant} \alias{Expr_extend_constant} -\alias{extend_constant} -\title{Extend_constant} +\title{Extend Series with a constant} \usage{ Expr_extend_constant(value, n) } \arguments{ -\item{value}{The value to extend the Series with. -This value may be None to fill with nulls.} +\item{value}{The value to extend the Series with. This value may be \code{NULL} to +fill with nulls.} \item{n}{The number of values to extend.} } @@ -20,16 +19,6 @@ Expr Extend the Series with given number of values. } \examples{ -pl$select( - pl$lit(c("5", "Bob_is_not_a_number")) - $cast(pl$dtypes$UInt64, strict = FALSE) - $extend_constant(10.1, 2) -) - -pl$select( - pl$lit(c("5", "Bob_is_not_a_number")) - $cast(pl$dtypes$Utf8, strict = FALSE) - $extend_constant("chuchu", 2) -) +pl$select(pl$lit(1:4)$extend_constant(10.1, 2)) +pl$select(pl$lit(1:4)$extend_constant(NULL, 2)) } -\keyword{Expr} diff --git a/man/Expr_fill_nan.Rd b/man/Expr_fill_nan.Rd index 3c5d672c3..d04361a79 100644 --- a/man/Expr_fill_nan.Rd +++ b/man/Expr_fill_nan.Rd @@ -2,30 +2,24 @@ % Please edit documentation in R/expr__expr.R \name{Expr_fill_nan} \alias{Expr_fill_nan} -\alias{fill_nan} -\title{Fill Nulls Forward} +\title{Fill NaN} \usage{ Expr_fill_nan(expr = NULL) } \arguments{ -\item{expr}{Expr or into Expr, value to fill NaNs with} +\item{expr}{Expr or something coercible in an Expr} } \value{ Expr } \description{ -Fill missing values with last seen values. -} -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +Fill NaN } \examples{ -l = list(a = c(1, NaN, NaN, 3)) -pl$DataFrame(l)$select( - pl$col("a")$fill_nan()$alias("fill_default"), - pl$col("a")$fill_nan(pl$lit(NA))$alias("fill_NA"), # same as default - pl$col("a")$fill_nan(2)$alias("fill_float2"), - pl$col("a")$fill_nan("hej")$alias("fill_str") # implicit cast to Utf8 -)$to_list() +pl$DataFrame(a = c(NaN, 1, NaN, 2, NA))$ + with_columns( + literal = pl$col("a")$fill_nan(999), + # implicit coercion to string + string = pl$col("a")$fill_nan("invalid") + ) } -\keyword{Expr} diff --git a/man/Expr_fill_null.Rd b/man/Expr_fill_null.Rd index eb6999a10..be08ca779 100644 --- a/man/Expr_fill_null.Rd +++ b/man/Expr_fill_null.Rd @@ -2,31 +2,31 @@ % Please edit documentation in R/expr__expr.R \name{Expr_fill_null} \alias{Expr_fill_null} -\alias{fill_null} -\title{Fill Nulls with a value or strategy.} +\title{Fill null values with a value or strategy} \usage{ Expr_fill_null(value = NULL, strategy = NULL, limit = NULL) } \arguments{ -\item{value}{Expr or \verb{Into} to fill Null values with} +\item{value}{Expr or something coercible in an Expr} -\item{strategy}{default NULL else 'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'} +\item{strategy}{Possible choice are \code{NULL} (default, requires a non-null +\code{value}), \code{"forward"}, \code{"backward"}, \code{"min"}, \code{"max"}, \code{"mean"}, \code{"zero"}, +\code{"one"}.} -\item{limit}{Number of consecutive null values to fill when using the 'forward' or 'backward' strategy.} +\item{limit}{Number of consecutive null values to fill when using the +\code{"forward"} or \code{"backward"} strategy.} } \value{ Expr } \description{ -Shift the values by value or as strategy. -} -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +Fill null values with a value or strategy } \examples{ -pl$select( - pl$lit(0:3)$shift_and_fill(-2, fill_value = 42)$alias("shift-2"), - pl$lit(0:3)$shift_and_fill(2, fill_value = pl$lit(42) / 2)$alias("shift+2") -) +pl$DataFrame(a = c(NA, 1, NA, 2, NA))$ + with_columns( + value = pl$col("a")$fill_null(999), + backward = pl$col("a")$fill_null(strategy = "backward"), + mean = pl$col("a")$fill_null(strategy = "mean") + ) } -\keyword{Expr} diff --git a/man/Expr_filter.Rd b/man/Expr_filter.Rd index 86be5a931..85716ab94 100644 --- a/man/Expr_filter.Rd +++ b/man/Expr_filter.Rd @@ -2,35 +2,30 @@ % Please edit documentation in R/expr__expr.R \name{Expr_filter} \alias{Expr_filter} -\alias{Expr_where} -\alias{where} \title{Filter a single column.} \usage{ Expr_filter(predicate) - -Expr_where(predicate) } \arguments{ -\item{predicate}{Expr or something \verb{Into}. Should be a boolean expression.} +\item{predicate}{An Expr or something coercible to an Expr. Must return a +boolean.} } \value{ Expr } \description{ -Mostly useful in an aggregation context. If you want to filter on a DataFrame -level, use \code{LazyFrame.filter}. - -where() is an alias for pl$filter +Mostly useful in an aggregation context. If you want to filter on a +DataFrame level, use \code{DataFrame$filter()} (or \code{LazyFrame$filter()}). } \examples{ -df = pl$DataFrame(list( +df = pl$DataFrame( group_col = c("g1", "g1", "g2"), b = c(1, 2, 3) -)) +) +df df$group_by("group_col")$agg( - pl$col("b")$filter(pl$col("b") < 2)$sum()$alias("lt"), - pl$col("b")$filter(pl$col("b") >= 2)$sum()$alias("gte") + lt = pl$col("b")$filter(pl$col("b") < 2), + gte = pl$col("b")$filter(pl$col("b") >= 2) ) } -\keyword{Expr} diff --git a/man/Expr_first.Rd b/man/Expr_first.Rd index 964e1240d..ae1cd582c 100644 --- a/man/Expr_first.Rd +++ b/man/Expr_first.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_first} \alias{Expr_first} -\title{First} +\title{Get the first value.} \usage{ Expr_first } @@ -11,9 +11,8 @@ Expr } \description{ Get the first value. -Similar to R head(x,1) } \examples{ -pl$DataFrame(list(x = c(1, 2, 3)))$select(pl$col("x")$first()) +pl$DataFrame(x = 3:1)$with_columns(first = pl$col("x")$first()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_flatten.Rd b/man/Expr_flatten.Rd new file mode 100644 index 000000000..736be03d5 --- /dev/null +++ b/man/Expr_flatten.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__expr.R +\docType{data} +\name{Expr_flatten} +\alias{Expr_flatten} +\title{Explode a list or Utf8 Series} +\format{ +An object of class \code{character} of length 1. +} +\usage{ +Expr_flatten +} +\value{ +Expr +} +\description{ +This is an alias for \verb{$explode()}. +} +\examples{ +df = pl$DataFrame(x = c("abc", "ab"), y = c(list(1:3), list(3:5))) +df + +df$select(pl$col("y")$flatten()) +} +\keyword{datasets} diff --git a/man/Expr_floor.Rd b/man/Expr_floor.Rd index 8208d8981..0eae6a55a 100644 --- a/man/Expr_floor.Rd +++ b/man/Expr_floor.Rd @@ -10,14 +10,11 @@ Expr_floor Expr } \description{ -Rounds down to the nearest integer value. -Only works on floating point Series. +Rounds down to the nearest integer value. Only works on floating point Series. } \examples{ -pl$DataFrame(list( - a = c(0.33, 0.5, 1.02, 1.5, NaN, NA, Inf, -Inf) -))$select( - pl$col("a")$floor() +pl$DataFrame(a = c(0.33, 0.5, 1.02, 1.5, NaN, NA, Inf, -Inf))$with_columns( + floor = pl$col("a")$floor() ) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_forward_fill.Rd b/man/Expr_forward_fill.Rd index 701ab5df9..ee2e65bb7 100644 --- a/man/Expr_forward_fill.Rd +++ b/man/Expr_forward_fill.Rd @@ -2,29 +2,24 @@ % Please edit documentation in R/expr__expr.R \name{Expr_forward_fill} \alias{Expr_forward_fill} -\alias{forward_fill} -\title{Fill Nulls Forward} +\title{Fill null values forward} \usage{ Expr_forward_fill(limit = NULL) } \arguments{ -\item{limit}{Expr or \verb{Into} The number of consecutive null values to forward fill.} +\item{limit}{Number of consecutive null values to fill when using the +\code{"forward"} or \code{"backward"} strategy.} } \value{ Expr } \description{ -Fill missing values with last seen values. -} -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +Fill missing values with the last seen values. Syntactic sugar for +\verb{$fill_null(strategy = "forward")}. } \examples{ -l = list(a = c(1L, rep(NA_integer_, 3L), 10)) -pl$DataFrame(l)$select( - pl$col("a")$forward_fill()$alias("ff_null"), - pl$col("a")$forward_fill(limit = 0)$alias("ff_l0"), - pl$col("a")$forward_fill(limit = 1)$alias("ff_l1") -)$to_list() +pl$DataFrame(a = c(NA, 1, NA, 2, NA))$ + with_columns( + backward = pl$col("a")$forward_fill() + ) } -\keyword{Expr} diff --git a/man/Expr_gather.Rd b/man/Expr_gather.Rd index cd389e06f..c307d6c52 100644 --- a/man/Expr_gather.Rd +++ b/man/Expr_gather.Rd @@ -2,25 +2,20 @@ % Please edit documentation in R/expr__expr.R \name{Expr_gather} \alias{Expr_gather} -\alias{take} -\title{Take values by index.} +\title{Gather values by index} \usage{ Expr_gather(indices) } \arguments{ -\item{indices}{R scalar/vector or Series, or Expr that leads to a UInt32 dtyped Series.} +\item{indices}{R scalar/vector or Series, or Expr that leads to a Series of +dtype UInt32.} } \value{ Expr } \description{ -Take values by index. -} -\details{ -similar to R indexing syntax e.g. \code{letters[c(1,3,5)]}, however as an expression, not as eager computation -exceeding +Gather values by index } \examples{ -pl$select(pl$lit(0:10)$gather(c(1, 8, 0, 7))) +pl$DataFrame(a = c(1, 2, 4, 5, 8))$select(pl$col("a")$gather(c(0, 2, 4))) } -\keyword{Expr} diff --git a/man/Expr_gather_every.Rd b/man/Expr_gather_every.Rd index 5c75eabb6..796eeb88f 100644 --- a/man/Expr_gather_every.Rd +++ b/man/Expr_gather_every.Rd @@ -2,21 +2,19 @@ % Please edit documentation in R/expr__expr.R \name{Expr_gather_every} \alias{Expr_gather_every} -\alias{take_every} -\title{Take every n'th element} +\title{Gather every nth element} \usage{ Expr_gather_every(n) } \arguments{ -\item{n}{positive integerish value} +\item{n}{Positive integer.} } \value{ Expr } \description{ -Take every nth value in the Series and return as a new Series. +Gather every nth value in the Series and return as a new Series. } \examples{ -pl$DataFrame(list(a = 0:24))$select(pl$col("a")$gather_every(6)) +pl$DataFrame(a = 0:24)$select(pl$col("a")$gather_every(6)) } -\keyword{Expr} diff --git a/man/Expr_gt.Rd b/man/Expr_gt.Rd index dd969836a..bc6381cd4 100644 --- a/man/Expr_gt.Rd +++ b/man/Expr_gt.Rd @@ -3,33 +3,28 @@ \name{Expr_gt} \alias{Expr_gt} \alias{>.Expr} -\title{GreaterThan <} +\title{Check strictly greater inequality} \usage{ Expr_gt(other) \method{>}{Expr}(e1, e2) } \arguments{ -\item{other}{literal or Robj which can become a literal} +\item{other}{Literal or object that can be converted to a literal} -\item{e1}{lhs Expr} +\item{e1}{Expr only} -\item{e2}{rhs Expr or anything which can become a literal Expression} +\item{e2}{Expr or anything that can be converted to a literal} } \value{ -Exprs +Expr } \description{ -gt method and operator -} -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +The RHS can either be an Expr or an object that can be converted to a literal +(e.g an integer). } \examples{ -#' #three syntaxes same result pl$lit(2) > 1 pl$lit(2) > pl$lit(1) pl$lit(2)$gt(pl$lit(1)) } -\keyword{Expr} -\keyword{Expr_operators} diff --git a/man/Expr_gt_eq.Rd b/man/Expr_gt_eq.Rd index 441549896..8136c7999 100644 --- a/man/Expr_gt_eq.Rd +++ b/man/Expr_gt_eq.Rd @@ -3,33 +3,28 @@ \name{Expr_gt_eq} \alias{Expr_gt_eq} \alias{>=.Expr} -\title{Greater Than Or Equal <=} +\title{Check greater or equal inequality} \usage{ Expr_gt_eq(other) \method{>=}{Expr}(e1, e2) } \arguments{ -\item{other}{literal or Robj which can become a literal} +\item{other}{Literal or object that can be converted to a literal} -\item{e1}{lhs Expr} +\item{e1}{Expr only} -\item{e2}{rhs Expr or anything which can become a literal Expression} +\item{e2}{Expr or anything that can be converted to a literal} } \value{ -Exprs +Expr } \description{ -gt_eq method and operator -} -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +The RHS can either be an Expr or an object that can be converted to a literal +(e.g an integer). } \examples{ -#' #three syntaxes same result pl$lit(2) >= 2 pl$lit(2) >= pl$lit(2) pl$lit(2)$gt_eq(pl$lit(2)) } -\keyword{Expr} -\keyword{Expr_operators} diff --git a/man/Expr_hash.Rd b/man/Expr_hash.Rd index 61f51106e..6b017a4ec 100644 --- a/man/Expr_hash.Rd +++ b/man/Expr_hash.Rd @@ -3,34 +3,24 @@ \name{Expr_hash} \alias{Expr_hash} \alias{hash} -\title{hash} +\title{Hash elements} \usage{ Expr_hash(seed = 0, seed_1 = NULL, seed_2 = NULL, seed_3 = NULL) } \arguments{ -\item{seed}{Random seed parameter. Defaults to 0.} +\item{seed}{Random seed parameter. Defaults to 0. Doesn't have any effect +for now.} -\item{seed_1}{Random seed parameter. Defaults to arg seed.} - -\item{seed_2}{Random seed parameter. Defaults to arg seed.} - -\item{seed_3}{Random seed parameter. Defaults to arg seed. -The column will be coerced to UInt32. Give this dtype to make the coercion a -no-op.} +\item{seed_1, seed_2, seed_3}{Random seed parameter. Defaults to arg seed. +The column will be coerced to UInt32.} } \value{ Expr } \description{ -Hash the elements in the selection. The hash value is of type \code{UInt64}. } -\details{ -WARNING in this version of r-polars seed / seed_x takes no effect. -Possibly a bug in upstream rust-polars project. -} \examples{ -df = pl$DataFrame(iris) -df$select(pl$all()$head(2)$hash(1234)$cast(pl$Utf8))$to_list() +df = pl$DataFrame(iris[1:3, c(1, 2)]) +df$with_columns(pl$all()$hash(1234)$name$suffix("_hash")) } -\keyword{Expr} diff --git a/man/Expr_head.Rd b/man/Expr_head.Rd index b86ee9bba..47b8a5be3 100644 --- a/man/Expr_head.Rd +++ b/man/Expr_head.Rd @@ -2,23 +2,19 @@ % Please edit documentation in R/expr__expr.R \name{Expr_head} \alias{Expr_head} -\alias{head} -\title{Head} +\title{Get the first n elements} \usage{ Expr_head(n = 10) } \arguments{ -\item{n}{numeric number of elements to select from head} +\item{n}{Number of elements to take.} } \value{ Expr } \description{ -Get the head n elements. -Similar to R head(x) +Get the first n elements } \examples{ -# get 3 first elements -pl$DataFrame(list(x = 1:11))$select(pl$col("x")$head(3)) +pl$DataFrame(x = 1:11)$select(pl$col("x")$head(3)) } -\keyword{Expr} diff --git a/man/Expr_implode.Rd b/man/Expr_implode.Rd index 7b6323778..614991257 100644 --- a/man/Expr_implode.Rd +++ b/man/Expr_implode.Rd @@ -2,7 +2,6 @@ % Please edit documentation in R/expr__expr.R \name{Expr_implode} \alias{Expr_implode} -\alias{list} \title{Wrap column in list} \usage{ Expr_implode @@ -14,8 +13,7 @@ Expr Aggregate values into a list. } \details{ -use to_struct to wrap a DataFrame. Notice implode() is sometimes referred to -as list() . +Use \verb{$to_struct()} to wrap a DataFrame. } \examples{ df = pl$DataFrame( @@ -24,4 +22,4 @@ df = pl$DataFrame( ) df$select(pl$all()$implode()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_inspect.Rd b/man/Expr_inspect.Rd index f5c7807ed..9e08eb3b3 100644 --- a/man/Expr_inspect.Rd +++ b/man/Expr_inspect.Rd @@ -2,15 +2,13 @@ % Please edit documentation in R/expr__expr.R \name{Expr_inspect} \alias{Expr_inspect} -\alias{inspect} \title{Inspect evaluated Series} \usage{ Expr_inspect(fmt = "{}") } \arguments{ -\item{fmt}{format string, should contain one set of \code{{}} where object will be printed -This formatting mimics python "string".format() use in pypolars. The string can -contain any thing but should have exactly one set of curly bracket \code{{}}.} +\item{fmt}{format string, should contain one set of \code{{}} where object will be +printed. This formatting mimics python "string".format() use in py-polars.} } \value{ Expr @@ -21,7 +19,6 @@ The printing will happen when the expression evaluates, not when it is formed. } \examples{ pl$select(pl$lit(1:5)$inspect( - "before dropping half the column it was:{}and not it is dropped" + "Here's what the Series looked like before keeping the first two values: {}" )$head(2)) } -\keyword{Expr} diff --git a/man/Expr_interpolate.Rd b/man/Expr_interpolate.Rd index 9033443ee..0e56d5d22 100644 --- a/man/Expr_interpolate.Rd +++ b/man/Expr_interpolate.Rd @@ -2,30 +2,35 @@ % Please edit documentation in R/expr__expr.R \name{Expr_interpolate} \alias{Expr_interpolate} -\alias{interpolate} -\title{Interpolate \code{Nulls}} +\title{Interpolate null values} \usage{ Expr_interpolate(method = "linear") } \arguments{ -\item{method}{string 'linear' or 'nearest', default "linear"} +\item{method}{String, either \code{"linear"} (default) or \code{"nearest"}.} } \value{ Expr } \description{ -Fill nulls with linear interpolation over missing values. -Can also be used to regrid data to a new grid - see examples below. +Fill nulls with linear interpolation using non-missing values. Can also be +used to regrid data to a new grid - see examples below. } \examples{ -pl$select(pl$lit(c(1, NA, 4, NA, 100, NaN, 150))$interpolate()) +pl$DataFrame(x = c(1, NA, 4, NA, 100, NaN, 150))$ + with_columns( + interp_lin = pl$col("x")$interpolate(), + interp_near = pl$col("x")$interpolate("nearest") + ) # x, y interpolation over a grid -df_original_grid = pl$DataFrame(list( +df_original_grid = pl$DataFrame( grid_points = c(1, 3, 10), values = c(2.0, 6.0, 20.0) -)) -df_new_grid = pl$DataFrame(list(grid_points = (1:10) * 1.0)) +) +df_original_grid +df_new_grid = pl$DataFrame(grid_points = (1:10) * 1.0) +df_new_grid # Interpolate from this to the new grid df_new_grid$join( @@ -33,4 +38,3 @@ df_new_grid$join( on = "grid_points", how = "left" )$with_columns(pl$col("values")$interpolate()) } -\keyword{Expr} diff --git a/man/Expr_is_between.Rd b/man/Expr_is_between.Rd index 1e335cd88..b4be3004c 100644 --- a/man/Expr_is_between.Rd +++ b/man/Expr_is_between.Rd @@ -2,39 +2,31 @@ % Please edit documentation in R/expr__expr.R \name{Expr_is_between} \alias{Expr_is_between} -\title{is in between} +\title{Check whether a value is between two values} \usage{ Expr_is_between(start, end, include_bounds = FALSE) } \arguments{ -\item{start}{Lower bound as primitive or datetime} +\item{start}{Lower bound, an Expr that is either numeric or datetime.} -\item{end}{Lower bound as primitive or datetime} +\item{end}{Upper bound, an Expr that is either numeric or datetime.} -\item{include_bounds}{bool vector or scalar: -FALSE: Exclude both start and end (default). -TRUE: Include both start and end. -c(FALSE, FALSE): Exclude start and exclude end. -c(TRUE, TRUE): Include start and include end. -c(FALSE, TRUE): Exclude start and include end. -c(TRUE, FALSE): Include start and exclude end.} +\item{include_bounds}{If \code{FALSE} (default), exclude start and end. This can +also be a vector of two booleans indicating whether to include the start +and/or the end.} } \value{ Expr } \description{ -Check if this expression is between start and end. -} -\details{ -alias the column to 'in_between' -This function is equivalent to a combination of < <= >= and the &-and operator. +This is syntactic sugar for \code{x > start & x < end} (or \code{x >= start & x <= end}). } \examples{ -df = pl$DataFrame(list(num = 1:5)) -df$select(pl$col("num")$is_between(2, 4)) -df$select(pl$col("num")$is_between(2, 4, TRUE)) -df$select(pl$col("num")$is_between(2, 4, c(FALSE, TRUE))) -# start end can be a vector/expr with same length as column -df$select(pl$col("num")$is_between(c(0, 2, 3, 3, 3), 6)) +df = pl$DataFrame(num = 1:5, y = c(0, 2, 3, 3, 3)) +df$with_columns( + bet_2_4_no_bounds = pl$col("num")$is_between(2, 4), + bet_2_4_with_bounds = pl$col("num")$is_between(2, 4, TRUE), + bet_2_4_upper_bound = pl$col("num")$is_between(2, 4, c(FALSE, TRUE)), + between_y_4 = pl$col("num")$is_between(pl$col("y"), 6) +) } -\keyword{Expr} diff --git a/man/Expr_is_duplicated.Rd b/man/Expr_is_duplicated.Rd index 65831922c..fa36fd67d 100644 --- a/man/Expr_is_duplicated.Rd +++ b/man/Expr_is_duplicated.Rd @@ -2,36 +2,18 @@ % Please edit documentation in R/expr__expr.R \name{Expr_is_duplicated} \alias{Expr_is_duplicated} -\alias{is_duplicated} -\title{Get mask of duplicated values.} +\title{Check whether each value is duplicated} \usage{ Expr_is_duplicated } \value{ -Expr (boolean) +Expr } \description{ -Get mask of duplicated values. -} -\details{ -is_duplicated is the opposite of \code{is_unique()} -Looking for R like \code{duplicated()}?, use \code{some_expr$is_first()$not_()} +This is syntactic sugar for \verb{$is_unique()$not_()}. } \examples{ -v = c(1, 1, 2, 2, 3, NA, NaN, Inf) -all.equal( - pl$select( - pl$lit(v)$is_unique()$alias("is_unique"), - pl$lit(v)$is_first()$alias("is_first"), - pl$lit(v)$is_duplicated()$alias("is_duplicated"), - pl$lit(v)$is_first()$not_()$alias("R_duplicated") - )$to_list(), - list( - is_unique = !v \%in\% v[duplicated(v)], - is_first = !duplicated(v), - is_duplicated = v \%in\% v[duplicated(v)], - R_duplicated = duplicated(v) - ) -) +pl$DataFrame(head(mtcars[, 1:2]))$ + with_columns(is_duplicated = pl$col("mpg")$is_duplicated()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_is_finite.Rd b/man/Expr_is_finite.Rd index 6d1f66b06..c06e54d13 100644 --- a/man/Expr_is_finite.Rd +++ b/man/Expr_is_finite.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_is_finite} \alias{Expr_is_finite} -\alias{is_finite} -\title{Are elements finite} +\title{Check if elements are finite} \usage{ Expr_is_finite } @@ -11,12 +10,10 @@ Expr_is_finite Expr } \description{ -Returns a boolean output indicating which values are finite. -} -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +Returns a boolean Series indicating which values are finite. } \examples{ -pl$DataFrame(list(alice = c(0, NaN, NA, Inf, -Inf)))$select(pl$col("alice")$is_finite()) +pl$DataFrame(list(alice = c(0, NaN, NA, Inf, -Inf)))$ + with_columns(finite = pl$col("alice")$is_finite()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_is_first.Rd b/man/Expr_is_first.Rd index 782110edb..1f5bad84a 100644 --- a/man/Expr_is_first.Rd +++ b/man/Expr_is_first.Rd @@ -2,31 +2,18 @@ % Please edit documentation in R/expr__expr.R \name{Expr_is_first} \alias{Expr_is_first} -\title{Get a mask of the first unique value.} +\title{Check whether each value is the first occurrence} \usage{ Expr_is_first } \value{ -Expr (boolean) +Expr } \description{ -Get a mask of the first unique value. +Check whether each value is the first occurrence } \examples{ -v = c(1, 1, 2, 2, 3, NA, NaN, Inf) -all.equal( - pl$select( - pl$lit(v)$is_unique()$alias("is_unique"), - pl$lit(v)$is_first()$alias("is_first"), - pl$lit(v)$is_duplicated()$alias("is_duplicated"), - pl$lit(v)$is_first()$not_()$alias("R_duplicated") - )$to_list(), - list( - is_unique = !v \%in\% v[duplicated(v)], - is_first = !duplicated(v), - is_duplicated = v \%in\% v[duplicated(v)], - R_duplicated = duplicated(v) - ) -) +pl$DataFrame(head(mtcars[, 1:2]))$ + with_columns(is_ufirst = pl$col("mpg")$is_first()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_is_in.Rd b/man/Expr_is_in.Rd index ab2f084f8..f6d2d69be 100644 --- a/man/Expr_is_in.Rd +++ b/man/Expr_is_in.Rd @@ -2,26 +2,36 @@ % Please edit documentation in R/expr__expr.R \name{Expr_is_in} \alias{Expr_is_in} -\title{is_in} +\title{Check whether a value is in a vector} \usage{ Expr_is_in(other) } \arguments{ -\item{other}{literal or Robj which can become a literal} +\item{other}{Literal or object that can be converted to a literal} } \value{ Expr } \description{ -combine to boolean expressions with similar to \code{\%in\%} +Notice that to check whether a factor value is in a vector of strings, you +need to use the string cache, either with \code{pl$enable_string_cache()} or +with \code{pl$with_string_cache()}. See examples. } \examples{ +pl$DataFrame(a = c(1:4, NA_integer_))$with_columns( + in_1_3 = pl$col("a")$is_in(c(1, 3)), + in_NA = pl$col("a")$is_in(pl$lit(NA_real_)) +) -# R Na_integer -> polars Null(Int32) is in polars Null(Int32) -pl$DataFrame(list(a = c(1:4, NA_integer_)))$select( - pl$col("a")$is_in(pl$lit(NA_real_)) -)$to_data_frame()[[1L]] +# this fails because we can't compare factors to strings +# pl$DataFrame(a = factor(letters[1:5]))$with_columns( +# in_abc = pl$col("a")$is_in(c("a", "b", "c")) +# ) +# need to use the string cache for this +pl$with_string_cache({ + pl$DataFrame(a = factor(letters[1:5]))$with_columns( + in_abc = pl$col("a")$is_in(c("a", "b", "c")) + ) +}) } -\keyword{Expr} -\keyword{Expr_operators} diff --git a/man/Expr_is_infinite.Rd b/man/Expr_is_infinite.Rd index 680a4dbf0..927f1d1dd 100644 --- a/man/Expr_is_infinite.Rd +++ b/man/Expr_is_infinite.Rd @@ -3,7 +3,7 @@ \name{Expr_is_infinite} \alias{Expr_is_infinite} \alias{is_infinite} -\title{Are elements infinite} +\title{Check if elements are infinite} \usage{ Expr_is_infinite } @@ -11,12 +11,10 @@ Expr_is_infinite Expr } \description{ -Returns a boolean output indicating which values are infinite. -} -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +Returns a boolean Series indicating which values are infinite. } \examples{ -pl$DataFrame(list(alice = c(0, NaN, NA, Inf, -Inf)))$select(pl$col("alice")$is_infinite()) +pl$DataFrame(list(alice = c(0, NaN, NA, Inf, -Inf)))$ + with_columns(infinite = pl$col("alice")$is_infinite()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_is_nan.Rd b/man/Expr_is_nan.Rd index 1b5b21d4c..c21a28b30 100644 --- a/man/Expr_is_nan.Rd +++ b/man/Expr_is_nan.Rd @@ -3,7 +3,7 @@ \name{Expr_is_nan} \alias{Expr_is_nan} \alias{is_nan} -\title{Are elements NaN's} +\title{Check if elements are NaN} \usage{ Expr_is_nan } @@ -13,12 +13,8 @@ Expr \description{ Returns a boolean Series indicating which values are NaN. } -\details{ -Floating point NaN's are a different flag from Null(polars) which is the same as -NA_real_(R). -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -} \examples{ -pl$DataFrame(list(alice = c(0, NaN, NA, Inf, -Inf)))$select(pl$col("alice")$is_nan()) +pl$DataFrame(list(alice = c(0, NaN, NA, Inf, -Inf)))$ + with_columns(nan = pl$col("alice")$is_nan()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_is_not_nan.Rd b/man/Expr_is_not_nan.Rd index 419f878f7..aec862cf2 100644 --- a/man/Expr_is_not_nan.Rd +++ b/man/Expr_is_not_nan.Rd @@ -3,7 +3,7 @@ \name{Expr_is_not_nan} \alias{Expr_is_not_nan} \alias{is_not_nan} -\title{Are elements not NaN's} +\title{Check if elements are not NaN} \usage{ Expr_is_not_nan } @@ -11,15 +11,11 @@ Expr_is_not_nan Expr } \description{ -Returns a boolean Series indicating which values are not NaN. -} -\details{ -Floating point NaN's are a different flag from Null(polars) which is the same as -NA_real_(R). - -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +Returns a boolean Series indicating which values are not NaN. Syntactic sugar +for \verb{$is_nan()$not_()}. } \examples{ -pl$DataFrame(list(alice = c(0, NaN, NA, Inf, -Inf)))$select(pl$col("alice")$is_not_nan()) +pl$DataFrame(list(alice = c(0, NaN, NA, Inf, -Inf)))$ + with_columns(not_nan = pl$col("alice")$is_not_nan()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_is_not_null.Rd b/man/Expr_is_not_null.Rd index 788d24f37..ddca827f4 100644 --- a/man/Expr_is_not_null.Rd +++ b/man/Expr_is_not_null.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_is_not_null} \alias{Expr_is_not_null} -\title{is_not_null} +\title{Check if elements are not NULL} \usage{ Expr_is_not_null } @@ -10,14 +10,10 @@ Expr_is_not_null Expr } \description{ -Returns a boolean Series indicating which values are not null. -Similar to R syntax !is.na(x) -null polars about the same as R NA -} -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +Returns a boolean Series indicating which values are not null. Syntactic sugar +for \verb{$is_null()$not_()}. } \examples{ pl$DataFrame(list(x = c(1, NA, 3)))$select(pl$col("x")$is_not_null()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_is_null.Rd b/man/Expr_is_null.Rd index ced3c9b76..81f9e2636 100644 --- a/man/Expr_is_null.Rd +++ b/man/Expr_is_null.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_is_null} \alias{Expr_is_null} -\title{is_null} +\title{Check if elements are NULL} \usage{ Expr_is_null } @@ -11,13 +11,8 @@ Expr } \description{ Returns a boolean Series indicating which values are null. -Similar to R syntax is.na(x) -null polars about the same as R NA -} -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} } \examples{ pl$DataFrame(list(x = c(1, NA, 3)))$select(pl$col("x")$is_null()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_is_unique.Rd b/man/Expr_is_unique.Rd index c28fb715a..ce7166598 100644 --- a/man/Expr_is_unique.Rd +++ b/man/Expr_is_unique.Rd @@ -2,31 +2,18 @@ % Please edit documentation in R/expr__expr.R \name{Expr_is_unique} \alias{Expr_is_unique} -\title{Get mask of unique values} +\title{Check whether each value is unique} \usage{ Expr_is_unique } \value{ -Expr (boolean) +Expr } \description{ -Get mask of unique values +Check whether each value is unique } \examples{ -v = c(1, 1, 2, 2, 3, NA, NaN, Inf) -all.equal( - pl$select( - pl$lit(v)$is_unique()$alias("is_unique"), - pl$lit(v)$is_first()$alias("is_first"), - pl$lit(v)$is_duplicated()$alias("is_duplicated"), - pl$lit(v)$is_first()$not_()$alias("R_duplicated") - )$to_list(), - list( - is_unique = !v \%in\% v[duplicated(v)], - is_first = !duplicated(v), - is_duplicated = v \%in\% v[duplicated(v)], - R_duplicated = duplicated(v) - ) -) +pl$DataFrame(head(mtcars[, 1:2]))$ + with_columns(is_unique = pl$col("mpg")$is_unique()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_kurtosis.Rd b/man/Expr_kurtosis.Rd index cbe5e20d8..c5ca44524 100644 --- a/man/Expr_kurtosis.Rd +++ b/man/Expr_kurtosis.Rd @@ -2,15 +2,16 @@ % Please edit documentation in R/expr__expr.R \name{Expr_kurtosis} \alias{Expr_kurtosis} -\alias{kurtosis} \title{Kurtosis} \usage{ Expr_kurtosis(fisher = TRUE, bias = TRUE) } \arguments{ -\item{fisher}{bool se details} +\item{fisher}{If \code{TRUE} (default), Fisher’s definition is used (normal, +centered at 0). Otherwise, Pearson’s definition is used (normal, centered at +3).} -\item{bias}{bool, If FALSE, then the calculations are corrected for statistical bias.} +\item{bias}{If \code{FALSE}, the calculations are corrected for statistical bias.} } \value{ Expr @@ -19,20 +20,14 @@ Expr Compute the kurtosis (Fisher or Pearson) of a dataset. } \details{ -Kurtosis is the fourth central moment divided by the square of the -variance. If Fisher's definition is used, then 3.0 is subtracted from -the result to give 0.0 for a normal distribution. -If bias is False then the kurtosis is calculated using k statistics to -eliminate bias coming from biased moment estimators -See scipy.stats for more information +Kurtosis is the fourth central moment divided by the square of the variance. +If Fisher's definition is used, then 3 is subtracted from the result to +give 0 for a normal distribution. -#' See scipy.stats for more information. +If bias is \code{FALSE}, then the kurtosis is calculated using \code{k} statistics to +eliminate bias coming from biased moment estimators. } \examples{ -df = pl$DataFrame(list(a = c(1:3, 2:1))) -df$select(pl$col("a")$kurtosis()) +pl$DataFrame(a = c(1:3, 2:1))$ + with_columns(kurt = pl$col("a")$kurtosis()) } -\references{ -https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kurtosis.html?highlight=kurtosis -} -\keyword{Expr} diff --git a/man/Expr_last.Rd b/man/Expr_last.Rd index eee43e046..f0d56dd98 100644 --- a/man/Expr_last.Rd +++ b/man/Expr_last.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_last} \alias{Expr_last} -\title{Last} +\title{Get the last value} \usage{ Expr_last } @@ -10,10 +10,9 @@ Expr_last Expr } \description{ -Get the lastvalue. -Similar to R syntax tail(x,1) +Get the last value } \examples{ -pl$DataFrame(list(x = c(1, 2, 3)))$select(pl$col("x")$last()) +pl$DataFrame(x = 3:1)$with_columns(last = pl$col("x")$last()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_limit.Rd b/man/Expr_limit.Rd index b58bc7239..c3b348496 100644 --- a/man/Expr_limit.Rd +++ b/man/Expr_limit.Rd @@ -2,23 +2,19 @@ % Please edit documentation in R/expr__expr.R \name{Expr_limit} \alias{Expr_limit} -\title{Limit} +\title{Get the first n elements} \usage{ Expr_limit(n = 10) } \arguments{ -\item{n}{numeric number of elements to select from head} +\item{n}{Number of elements to take.} } \value{ Expr } \description{ -Alias for Head -Get the head n elements. -Similar to R head(x) +This is an alias for \verb{$head()}. } \examples{ -# get 3 first elements -pl$DataFrame(list(x = 1:11))$select(pl$col("x")$limit(3)) +pl$DataFrame(x = 1:11)$select(pl$col("x")$limit(3)) } -\keyword{Expr} diff --git a/man/Expr_list.Rd b/man/Expr_list.Rd deleted file mode 100644 index 5ce5d48a4..000000000 --- a/man/Expr_list.Rd +++ /dev/null @@ -1,31 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/expr__expr.R -\name{Expr_list} -\alias{Expr_list} -\alias{list_ns} -\title{list: list related methods} -\usage{ -Expr_list() -} -\value{ -Expr -} -\description{ -Create an object namespace of all list related methods. -See the individual method pages for full details -} -\examples{ -df_with_list = pl$DataFrame( - group = c(1, 1, 2, 2, 3), - value = c(1:5) -)$group_by( - "group", - maintain_order = TRUE -)$agg( - pl$col("value") * 3L -) -df_with_list$with_columns( - pl$col("value")$list$lengths()$alias("group_size") -) -} -\keyword{Expr} diff --git a/man/Expr_lit.Rd b/man/Expr_lit.Rd index 6aed52851..b2aac80ba 100644 --- a/man/Expr_lit.Rd +++ b/man/Expr_lit.Rd @@ -2,19 +2,18 @@ % Please edit documentation in R/expr__expr.R \name{Expr_lit} \alias{Expr_lit} -\alias{lit} -\title{Return an expression representing a literal value} +\title{Create a literal value} \usage{ Expr_lit(x) } \arguments{ -\item{x}{An R Scalar, or R vector/list (via Series)} +\item{x}{A vector of any length} } \value{ Expr } \description{ -Return an expression representing a literal value +Create a literal value } \details{ \code{pl$lit(NULL)} translates into a polars \code{null}. diff --git a/man/Expr_lit_to_df.Rd b/man/Expr_lit_to_df.Rd index 65e8e8d14..b4ed34744 100644 --- a/man/Expr_lit_to_df.Rd +++ b/man/Expr_lit_to_df.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_lit_to_df} \alias{Expr_lit_to_df} -\alias{lit_to_df} -\title{Literal to DataFrame} +\title{Convert Literal to DataFrame} \usage{ Expr_lit_to_df() } @@ -11,17 +10,8 @@ Expr_lit_to_df() Series } \description{ -collect an expression based on literals into a DataFrame +Collect an expression based on literals into a DataFrame. } \examples{ -( - pl$Series(list(1:1, 1:2, 1:3, 1:4)) - $print() - $to_lit() - $list$lengths() - $sum() - $cast(pl$dtypes$Int8) - $lit_to_df() -) +pl$lit(1:5)$lit_to_df() } -\keyword{Expr} diff --git a/man/Expr_lit_to_s.Rd b/man/Expr_lit_to_s.Rd index fab228980..9b0760b56 100644 --- a/man/Expr_lit_to_s.Rd +++ b/man/Expr_lit_to_s.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_lit_to_s} \alias{Expr_lit_to_s} -\alias{lit_to_s} -\title{Literal to Series} +\title{Convert Literal to Series} \usage{ Expr_lit_to_s() } @@ -11,17 +10,8 @@ Expr_lit_to_s() Series } \description{ -collect an expression based on literals into a Series +Collect an expression based on literals into a Series. } \examples{ -( - pl$Series(list(1:1, 1:2, 1:3, 1:4)) - $print() - $to_lit() - $list$lengths() - $sum() - $cast(pl$dtypes$Int8) - $lit_to_s() -) +pl$lit(1:5)$lit_to_s() } -\keyword{Expr} diff --git a/man/Expr_log.Rd b/man/Expr_log.Rd index e334b3797..bc8280d68 100644 --- a/man/Expr_log.Rd +++ b/man/Expr_log.Rd @@ -2,21 +2,20 @@ % Please edit documentation in R/expr__expr.R \name{Expr_log} \alias{Expr_log} -\alias{log} -\title{Natural Log} +\title{Compute the logarithm of elements} \usage{ Expr_log(base = base::exp(1)) } \arguments{ -\item{base}{numeric base value for log, default base::exp(1)} +\item{base}{Numeric base value for logarithm, default is \code{exp(1)}.} } \value{ Expr } \description{ -Compute the base x logarithm of the input array, element-wise. +Compute the logarithm of elements } \examples{ -pl$DataFrame(list(a = exp(1)^(-1:3)))$select(pl$col("a")$log()) +pl$DataFrame(a = c(1, 2, 3, exp(1)))$ + with_columns(log = pl$col("a")$log()) } -\keyword{Expr} diff --git a/man/Expr_log10.Rd b/man/Expr_log10.Rd index 846fade5a..b1d52aac7 100644 --- a/man/Expr_log10.Rd +++ b/man/Expr_log10.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_log10} \alias{Expr_log10} -\alias{log10} -\title{10-base log} +\title{Compute the base-10 logarithm of elements} \usage{ Expr_log10 } @@ -11,9 +10,10 @@ Expr_log10 Expr } \description{ -Compute the base 10 logarithm of the input array, element-wise. +Compute the base-10 logarithm of elements } \examples{ -pl$DataFrame(list(a = 10^(-1:3)))$select(pl$col("a")$log10()) +pl$DataFrame(a = c(1, 2, 3, exp(1)))$ + with_columns(log10 = pl$col("a")$log10()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_lower_bound.Rd b/man/Expr_lower_bound.Rd new file mode 100644 index 000000000..7f20b74e8 --- /dev/null +++ b/man/Expr_lower_bound.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__expr.R +\name{Expr_lower_bound} +\alias{Expr_lower_bound} +\title{Find the lower bound of a DataType} +\usage{ +Expr_lower_bound +} +\value{ +Expr +} +\description{ +Find the lower bound of a DataType +} +\examples{ +pl$DataFrame(x = 1:3, y = 1:3, + schema = list(x = pl$UInt32, y = pl$Int32))$ + select(pl$all()$lower_bound()) +} +\keyword{datasets} diff --git a/man/Expr_lt.Rd b/man/Expr_lt.Rd index 99fa11602..b69a00c81 100644 --- a/man/Expr_lt.Rd +++ b/man/Expr_lt.Rd @@ -3,33 +3,28 @@ \name{Expr_lt} \alias{Expr_lt} \alias{<.Expr} -\title{Less Than <} +\title{Check strictly lower inequality} \usage{ Expr_lt(other) \method{<}{Expr}(e1, e2) } \arguments{ -\item{other}{literal or Robj which can become a literal} +\item{other}{Literal or object that can be converted to a literal} -\item{e1}{lhs Expr} +\item{e1}{Expr only} -\item{e2}{rhs Expr or anything which can become a literal Expression} +\item{e2}{Expr or anything that can be converted to a literal} } \value{ -Exprs +Expr } \description{ -lt method and operator -} -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +The RHS can either be an Expr or an object that can be converted to a literal +(e.g an integer). } \examples{ -#' #three syntaxes same result pl$lit(5) < 10 pl$lit(5) < pl$lit(10) pl$lit(5)$lt(pl$lit(10)) } -\keyword{Expr} -\keyword{Expr_operators} diff --git a/man/Expr_lt_eq.Rd b/man/Expr_lt_eq.Rd index b01d74d97..d89e55147 100644 --- a/man/Expr_lt_eq.Rd +++ b/man/Expr_lt_eq.Rd @@ -3,33 +3,28 @@ \name{Expr_lt_eq} \alias{Expr_lt_eq} \alias{<=.Expr} -\title{Less Than Or Equal <=} +\title{Check lower or equal inequality} \usage{ Expr_lt_eq(other) \method{<=}{Expr}(e1, e2) } \arguments{ -\item{other}{literal or Robj which can become a literal} +\item{other}{Literal or object that can be converted to a literal} -\item{e1}{lhs Expr} +\item{e1}{Expr only} -\item{e2}{rhs Expr or anything which can become a literal Expression} +\item{e2}{Expr or anything that can be converted to a literal} } \value{ -Exprs +Expr } \description{ -lt_eq method and operator -} -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +The RHS can either be an Expr or an object that can be converted to a literal +(e.g an integer). } \examples{ -#' #three syntaxes same result pl$lit(2) <= 2 pl$lit(2) <= pl$lit(2) pl$lit(2)$lt_eq(pl$lit(2)) } -\keyword{Expr} -\keyword{Expr_operators} diff --git a/man/Expr_map.Rd b/man/Expr_map.Rd index 348d3ab21..81b7bf768 100644 --- a/man/Expr_map.Rd +++ b/man/Expr_map.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_map} \alias{Expr_map} -\title{Map an expression with an R function.} +\title{Map an expression with an R function} \usage{ Expr_map(f, output_type = NULL, agg_list = FALSE, in_background = FALSE) } @@ -10,17 +10,17 @@ Expr_map(f, output_type = NULL, agg_list = FALSE, in_background = FALSE) \item{f}{a function to map with} \item{output_type}{\code{NULL} or a type available in \code{names(pl$dtypes)}. If \code{NULL} -(default), the output datatype will match is the input datatype. This is used +(default), the output datatype will match the input datatype. This is used to inform schema of the actual return type of the R function. Setting this wrong could theoretically have some downstream implications to the query.} -\item{agg_list}{Aggregate list. Map from vector to group in groupby context.} +\item{agg_list}{Aggregate list. Map from vector to group in group_by context.} \item{in_background}{Boolean. Whether to execute the map in a background R process. Combined with setting e.g. \code{pl$set_options(rpool_cap = 4)} it can speed up some slow R functions as they can run in parallel R sessions. The communication speed between processes is quite slower than between threads. -This will likely only give a speed-up in a "low IO - high CPU" usecase. +This will likely only give a speed-up in a "low IO - high CPU" use case. If there are multiple \verb{$map(in_background = TRUE)} calls in the query, they will be run in parallel.} } @@ -28,7 +28,7 @@ will be run in parallel.} Expr } \description{ -Map an expression with an R function. +Map an expression with an R function } \details{ It is sometimes necessary to apply a specific R function on one or several @@ -85,4 +85,3 @@ pl$LazyFrame(a = 1, b = 2, c = 3, d = 4)$select( }, in_background = TRUE) )$collect() |> system.time() } -\keyword{Expr} diff --git a/man/Expr_max.Rd b/man/Expr_max.Rd index 2753262f5..6dd1f23bf 100644 --- a/man/Expr_max.Rd +++ b/man/Expr_max.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_max} \alias{Expr_max} -\title{max} +\title{Get maximum value} \usage{ Expr_max } @@ -10,12 +10,10 @@ Expr_max Expr } \description{ -Get maximum value. -} -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +Get maximum value } \examples{ -pl$DataFrame(list(x = c(1, NA, 3)))$select(pl$col("x")$max() == 3) # is true +pl$DataFrame(x = c(1, NA, 3))$ + with_columns(max = pl$col("x")$max()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_mean.Rd b/man/Expr_mean.Rd index 926fd6f1a..764aa31e1 100644 --- a/man/Expr_mean.Rd +++ b/man/Expr_mean.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_mean} \alias{Expr_mean} -\title{mean} +\title{Get mean value} \usage{ Expr_mean } @@ -10,9 +10,10 @@ Expr_mean Expr } \description{ -Get mean value. +Get mean value } \examples{ -pl$DataFrame(list(x = c(1, NA, 3)))$select(pl$col("x")$mean() == 2) # is true +pl$DataFrame(x = c(1L, NA, 2L))$ + with_columns(mean = pl$col("x")$mean()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_median.Rd b/man/Expr_median.Rd index 79027c2f6..0582ac717 100644 --- a/man/Expr_median.Rd +++ b/man/Expr_median.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_median} \alias{Expr_median} -\title{median} +\title{Get median value} \usage{ Expr_median } @@ -10,9 +10,10 @@ Expr_median Expr } \description{ -Get median value. +Get median value } \examples{ -pl$DataFrame(list(x = c(1, NA, 2)))$select(pl$col("x")$median() == 1.5) # is true +pl$DataFrame(x = c(1L, NA, 2L))$ + with_columns(median = pl$col("x")$median()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_meta.Rd b/man/Expr_meta.Rd deleted file mode 100644 index ddadaadba..000000000 --- a/man/Expr_meta.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/expr__expr.R -\name{Expr_meta} -\alias{Expr_meta} -\alias{meta_ns} -\title{meta: related methods} -\usage{ -Expr_meta() -} -\value{ -Expr -} -\description{ -Create an object namespace of all meta related methods. -See the individual method pages for full details -} -\examples{ - -# missing - -} -\keyword{Expr} diff --git a/man/Expr_min.Rd b/man/Expr_min.Rd index b9147645b..fa741cd71 100644 --- a/man/Expr_min.Rd +++ b/man/Expr_min.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_min} \alias{Expr_min} -\title{min} +\title{Get minimum value} \usage{ Expr_min } @@ -10,12 +10,10 @@ Expr_min Expr } \description{ -Get minimum value. -} -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +Get minimum value } \examples{ -pl$DataFrame(list(x = c(1, NA, 3)))$select(pl$col("x")$min() == 1) # is true +pl$DataFrame(x = c(1, NA, 3))$ + with_columns(min = pl$col("x")$min()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_mode.Rd b/man/Expr_mode.Rd index eb376bd4a..756b2b574 100644 --- a/man/Expr_mode.Rd +++ b/man/Expr_mode.Rd @@ -2,7 +2,6 @@ % Please edit documentation in R/expr__expr.R \name{Expr_mode} \alias{Expr_mode} -\alias{mode} \title{Mode} \usage{ Expr_mode @@ -11,12 +10,13 @@ Expr_mode Expr } \description{ -Compute the most occurring value(s). Can return multiple Values. +Compute the most occurring value(s). Can return multiple values if there are +ties. } \examples{ -df = pl$DataFrame(list(a = 1:6, b = c(1L, 1L, 3L, 3L, 5L, 6L), c = c(1L, 1L, 2L, 2L, 3L, 3L))) +df = pl$DataFrame(a = 1:6, b = c(1L, 1L, 3L, 3L, 5L, 6L), c = c(1L, 1L, 2L, 2L, 3L, 3L)) df$select(pl$col("a")$mode()) df$select(pl$col("b")$mode()) df$select(pl$col("c")$mode()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_mul.Rd b/man/Expr_mul.Rd index 43828a4c4..9d50fd35d 100644 --- a/man/Expr_mul.Rd +++ b/man/Expr_mul.Rd @@ -3,30 +3,28 @@ \name{Expr_mul} \alias{Expr_mul} \alias{*.Expr} -\title{Mul *} +\title{Multiply two expressions} \usage{ Expr_mul(other) \method{*}{Expr}(e1, e2) } \arguments{ -\item{other}{literal or Robj which can become a literal} +\item{other}{Literal or object that can be converted to a literal} -\item{e1}{lhs Expr} +\item{e1}{Expr only} -\item{e2}{rhs Expr or anything which can become a literal Expression} +\item{e2}{Expr or anything that can be converted to a literal} } \value{ -Exprs +Expr } \description{ -Multiplication +The RHS can either be an Expr or an object that can be converted to a literal +(e.g an integer). } \examples{ -# three syntaxes same result pl$lit(5) * 10 pl$lit(5) * pl$lit(10) pl$lit(5)$mul(pl$lit(10)) } -\keyword{Expr} -\keyword{Expr_operators} diff --git a/man/Expr_n_unique.Rd b/man/Expr_n_unique.Rd index ed6d72a84..309bdda02 100644 --- a/man/Expr_n_unique.Rd +++ b/man/Expr_n_unique.Rd @@ -2,7 +2,6 @@ % Please edit documentation in R/expr__expr.R \name{Expr_n_unique} \alias{Expr_n_unique} -\alias{n_unique} \title{Count number of unique values} \usage{ Expr_n_unique @@ -11,10 +10,9 @@ Expr_n_unique Expr } \description{ -Count number of unique values. -Similar to R length(unique(x)) +Count number of unique values } \examples{ -pl$DataFrame(iris)$select(pl$col("Species")$n_unique()) +pl$DataFrame(iris[, 4:5])$with_columns(count = pl$col("Species")$n_unique()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_name.Rd b/man/Expr_name.Rd deleted file mode 100644 index 85ba164ce..000000000 --- a/man/Expr_name.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/expr__expr.R -\name{Expr_name} -\alias{Expr_name} -\alias{name_ns} -\title{name: related methods} -\usage{ -Expr_name() -} -\value{ -Expr -} -\description{ -Create an object namespace of all name related methods. -See the individual method pages for full details -} -\examples{ - -# missing - -} -\keyword{Expr} diff --git a/man/Expr_nan_max.Rd b/man/Expr_nan_max.Rd index 591d5840d..3b599d5b9 100644 --- a/man/Expr_nan_max.Rd +++ b/man/Expr_nan_max.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_nan_max} \alias{Expr_nan_max} -\title{max} +\title{Get maximum value with NaN} \usage{ Expr_nan_max } @@ -10,13 +10,10 @@ Expr_nan_max Expr } \description{ -Get maximum value, but propagate/poison encountered \code{NaN} values. -Get maximum value. -} -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +Get maximum value, but returns \code{NaN} if there are any. } \examples{ -pl$DataFrame(list(x = c(1, NaN, Inf, 3)))$select(pl$col("x")$nan_max()$is_nan()) # is true +pl$DataFrame(x = c(1, NA, 3, NaN, Inf))$ + with_columns(nan_max = pl$col("x")$nan_max()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_nan_min.Rd b/man/Expr_nan_min.Rd index bc33b625a..ae823d316 100644 --- a/man/Expr_nan_min.Rd +++ b/man/Expr_nan_min.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_nan_min} \alias{Expr_nan_min} -\title{min propagate NaN} +\title{Get minimum value with NaN} \usage{ Expr_nan_min } @@ -10,12 +10,10 @@ Expr_nan_min Expr } \description{ -Get minimum value, but propagate/poison encountered \code{NaN} values. -} -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +Get minimum value, but returns \code{NaN} if there are any. } \examples{ -pl$DataFrame(list(x = c(1, NaN, -Inf, 3)))$select(pl$col("x")$nan_min()$is_nan()) # is true +pl$DataFrame(x = c(1, NA, 3, NaN, Inf))$ + with_columns(nan_min = pl$col("x")$nan_min()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_neq.Rd b/man/Expr_neq.Rd index a5f69b515..77fb0650b 100644 --- a/man/Expr_neq.Rd +++ b/man/Expr_neq.Rd @@ -3,33 +3,28 @@ \name{Expr_neq} \alias{Expr_neq} \alias{!=.Expr} -\title{Not Equal !=} +\title{Check inequality} \usage{ Expr_neq(other) \method{!=}{Expr}(e1, e2) } \arguments{ -\item{other}{literal or Robj which can become a literal} +\item{other}{Literal or object that can be converted to a literal} -\item{e1}{lhs Expr} +\item{e1}{Expr only} -\item{e2}{rhs Expr or anything which can become a literal Expression} +\item{e2}{Expr or anything that can be converted to a literal} } \value{ -Exprs +Expr } \description{ -neq method and operator -} -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +The RHS can either be an Expr or an object that can be converted to a literal +(e.g an integer). } \examples{ -#' #three syntaxes same result pl$lit(1) != 2 pl$lit(1) != pl$lit(2) pl$lit(1)$neq(pl$lit(2)) } -\keyword{Expr} -\keyword{Expr_operators} diff --git a/man/Expr_not_.Rd b/man/Expr_not_.Rd index 45795d536..60626dbf7 100644 --- a/man/Expr_not_.Rd +++ b/man/Expr_not_.Rd @@ -3,27 +3,25 @@ \name{Expr_not_} \alias{Expr_not_} \alias{!.Expr} -\title{Not !} +\title{Negate a boolean expression} \usage{ -Expr_not_(other) +Expr_not_ \method{!}{Expr}(x) } \arguments{ \item{x}{Expr} - -\item{other}{literal or Robj which can become a literal} } \value{ -Exprs +Expr } \description{ -not method and operator +The RHS can either be an Expr or an object that can be converted to a literal +(e.g an integer). } \examples{ # two syntaxes same result pl$lit(TRUE)$not_() !pl$lit(TRUE) } -\keyword{Expr} -\keyword{Expr_operators} +\keyword{datasets} diff --git a/man/Expr_null_count.Rd b/man/Expr_null_count.Rd index 626c44956..72c88dda5 100644 --- a/man/Expr_null_count.Rd +++ b/man/Expr_null_count.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_null_count} \alias{Expr_null_count} -\alias{null_count} -\title{Count \code{Nulls}} +\title{Count missing values} \usage{ Expr_null_count } @@ -11,9 +10,10 @@ Expr_null_count Expr } \description{ -Count \code{Nulls} +Count missing values } \examples{ -pl$select(pl$lit(c(NA, "a", NA, "b"))$null_count()) +pl$DataFrame(x = c(NA, "a", NA, "b"))$ + with_columns(n_missing = pl$col("x")$null_count()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_or.Rd b/man/Expr_or.Rd index c3eac75cc..9ab2ee3f9 100644 --- a/man/Expr_or.Rd +++ b/man/Expr_or.Rd @@ -2,22 +2,20 @@ % Please edit documentation in R/expr__expr.R \name{Expr_or} \alias{Expr_or} -\title{Or} +\title{Apply logical OR on two expressions} \usage{ Expr_or(other) } \arguments{ -\item{other}{Expr or into Expr} +\item{other}{Literal or object that can be converted to a literal} } \value{ Expr } \description{ -combine to boolean expressions with OR +Combine two boolean expressions with OR. } \examples{ pl$lit(TRUE) | FALSE pl$lit(TRUE)$or(pl$lit(TRUE)) } -\keyword{Expr} -\keyword{Expr_operators} diff --git a/man/Expr_over.Rd b/man/Expr_over.Rd index 35caeb8c9..49a590e68 100644 --- a/man/Expr_over.Rd +++ b/man/Expr_over.Rd @@ -2,28 +2,27 @@ % Please edit documentation in R/expr__expr.R \name{Expr_over} \alias{Expr_over} -\title{over} +\title{Apply window function over a subgroup} \usage{ Expr_over(...) } \arguments{ -\item{...}{of strings or columns to group by} +\item{...}{Character vector indicating the columns to group by.} } \value{ Expr } \description{ -Apply window function over a subgroup. -This is similar to a groupby + aggregation + self join. -Or similar to \verb{window functions in Postgres }_. +This applies an expression on groups and returns the same number of rows as +the input (contrarily to \verb{$group_by()} + \verb{$agg()}). } \examples{ pl$DataFrame( val = 1:5, a = c("+", "+", "-", "-", "+"), b = c("+", "-", "+", "-", "+") -)$select( - pl$col("val")$count()$over("a", "b") +)$with_columns( + count = pl$col("val")$count()$over("a", "b") ) over_vars = c("a", "b") @@ -31,8 +30,7 @@ pl$DataFrame( val = 1:5, a = c("+", "+", "-", "-", "+"), b = c("+", "-", "+", "-", "+") -)$select( - pl$col("val")$count()$over(over_vars) +)$with_columns( + count = pl$col("val")$count()$over(over_vars) ) } -\keyword{Expr} diff --git a/man/Expr_pct_change.Rd b/man/Expr_pct_change.Rd index b8c04c91f..9f537ec2f 100644 --- a/man/Expr_pct_change.Rd +++ b/man/Expr_pct_change.Rd @@ -2,25 +2,22 @@ % Please edit documentation in R/expr__expr.R \name{Expr_pct_change} \alias{Expr_pct_change} -\alias{pct_change} -\title{Pct change} +\title{Percentage change} \usage{ Expr_pct_change(n = 1) } \arguments{ -\item{n}{periods to shift for forming percent change.} +\item{n}{Periods to shift for computing percent change.} } \value{ Expr } \description{ -Computes percentage change between values. -Percentage change (as fraction) between current element and most-recent -non-null element at least \code{n} period(s) before the current element. +Computes percentage change (as fraction) between current element and most- +recent non-null element at least \code{n} period(s) before the current element. Computes the change from the previous row by default. } \examples{ -df = pl$DataFrame(list(a = c(10L, 11L, 12L, NA_integer_, 12L))) -df$with_columns(pl$col("a")$pct_change()$alias("pct_change")) +pl$DataFrame(a = c(10L, 11L, 12L, NA_integer_, 12L))$ + with_columns(pct_change = pl$col("a")$pct_change()) } -\keyword{Expr} diff --git a/man/Expr_pow.Rd b/man/Expr_pow.Rd index a62eb1a2c..c09428668 100644 --- a/man/Expr_pow.Rd +++ b/man/Expr_pow.Rd @@ -2,13 +2,12 @@ % Please edit documentation in R/expr__expr.R \name{Expr_pow} \alias{Expr_pow} -\alias{pow} \title{Exponentiation} \usage{ Expr_pow(exponent) } \arguments{ -\item{exponent}{exponent} +\item{exponent}{Exponent value.} } \value{ Expr @@ -16,23 +15,10 @@ Expr \description{ Raise expression to the power of exponent. } -\details{ -The R interpreter will replace the \verb{**} with \code{^}, such that \verb{**} means \code{^} (except in -strings e.g. "**"). Read further at \code{?"**"}. In py-polars python \code{^} is the XOR operator and -\verb{**} is the exponentiation operator. -} \examples{ # use via `pow`-method and the `^`-operator -pl$DataFrame(a = -1:3)$select( - pl$lit(2)$pow(pl$col("a"))$alias("with $pow()"), - 2^pl$lit(-2:2), # brief use - pl$lit(2)$alias("left hand side name")^pl$lit(-3:1)$alias("right hand side name dropped") +pl$DataFrame(a = -1:3, b = 2:6)$with_columns( + x = pl$col("a")$pow(2), + y = pl$col("a")^3 ) - -# exotic case where '**' will not work, but "^" will -safe_chr = \(...) tryCatch(..., error = as.character) -get("^")(2, pl$lit(2)) |> safe_chr() -get("**")(2, pl$lit(2)) |> safe_chr() -get("**")(2, 2) |> safe_chr() } -\keyword{Expr} diff --git a/man/Expr_print.Rd b/man/Expr_print.Rd index 1309daefa..cc26f24f4 100644 --- a/man/Expr_print.Rd +++ b/man/Expr_print.Rd @@ -1,19 +1,25 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr__expr.R -\name{Expr_print} +\name{print.Expr} +\alias{print.Expr} \alias{Expr_print} -\title{internal method print Expr} +\title{S3 method to print an Expr} \usage{ +\method{print}{Expr}(x, ...) + Expr_print() } +\arguments{ +\item{x}{Expr} + +\item{...}{Not used.} +} \value{ -invisible self +No value returned, it prints in the console. } \description{ -internal method print Expr +S3 method to print an Expr } \examples{ -pl$col("some_column")$sum()$over("some_other_column")$print() -pl$DataFrame(iris) +print(pl$col("some_column")$sum()) } -\keyword{Expr} diff --git a/man/Expr_product.Rd b/man/Expr_product.Rd index 7265e6147..c2c65edef 100644 --- a/man/Expr_product.Rd +++ b/man/Expr_product.Rd @@ -2,7 +2,6 @@ % Please edit documentation in R/expr__expr.R \name{Expr_product} \alias{Expr_product} -\alias{Product} \title{Product} \usage{ Expr_product @@ -13,10 +12,8 @@ Expr \description{ Compute the product of an expression. } -\details{ -does not support integer32 currently, .cast() to f64 or i64 first. -} \examples{ -pl$DataFrame(list(x = c(1, 2, 3)))$select(pl$col("x")$product() == 6) # is true +pl$DataFrame(x = c(2L, NA, 2L))$ + with_columns(product = pl$col("x")$product()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_quantile.Rd b/man/Expr_quantile.Rd index f2bf02ffb..f84336c36 100644 --- a/man/Expr_quantile.Rd +++ b/man/Expr_quantile.Rd @@ -2,16 +2,16 @@ % Please edit documentation in R/expr__expr.R \name{Expr_quantile} \alias{Expr_quantile} -\alias{quantile} \title{Get quantile value.} \usage{ Expr_quantile(quantile, interpolation = "nearest") } \arguments{ -\item{quantile}{numeric/Expression 0.0 to 1.0} +\item{quantile}{Either a numeric value or an Expr whose value must be +between 0 and 1.} -\item{interpolation}{string value from choices "nearest", "higher", -"lower", "midpoint", "linear"} +\item{interpolation}{One of \code{"nearest"}, \code{"higher"}, \code{"lower"}, +\code{"midpoint"}, or \code{"linear"}.} } \value{ Expr @@ -20,10 +20,9 @@ Expr Get quantile value. } \details{ -\code{Nulls} are ignored and \code{NaNs} are ranked as the largest value. +Null values are ignored and \code{NaN}s are ranked as the largest value. For linear interpolation \code{NaN} poisons \code{Inf}, that poisons any other value. } \examples{ pl$select(pl$lit(-5:5)$quantile(.5)) } -\keyword{Expr} diff --git a/man/Expr_rank.Rd b/man/Expr_rank.Rd index 7f83ad7e1..c85ba3a8c 100644 --- a/man/Expr_rank.Rd +++ b/man/Expr_rank.Rd @@ -2,31 +2,28 @@ % Please edit documentation in R/expr__expr.R \name{Expr_rank} \alias{Expr_rank} -\alias{rank} -\title{Rank} +\title{Rank elements} \usage{ Expr_rank(method = "average", descending = FALSE) } \arguments{ -\item{method}{string option 'average', 'min', 'max', 'dense', 'ordinal', 'random' - -#' The method used to assign ranks to tied elements. -The following methods are available (default is 'average'): +\item{method}{String, one of \code{"average"} (default), \code{"min"}, \code{"max"}, +\code{"dense"}, \code{"ordinal"}, \code{"random"}. The method used to assign ranks to tied +elements: \itemize{ -\item 'average' : The average of the ranks that would have been assigned to +\item \code{"average"}: The average of the ranks that would have been assigned to all the tied values is assigned to each value. -\item 'min' : The minimum of the ranks that would have been assigned to all +\item \code{"min"}: The minimum of the ranks that would have been assigned to all the tied values is assigned to each value. (This is also referred to as "competition" ranking.) -\item 'max' : The maximum of the ranks that would have been assigned to all +\item \code{"max"} : The maximum of the ranks that would have been assigned to all the tied values is assigned to each value. -\item 'dense' : Like 'min', but the rank of the next highest element is -assigned the rank immediately after those assigned to the tied -elements. -\item 'ordinal' : All values are given a distinct rank, corresponding to -the order that the values occur in the Series. -\item 'random' : Like 'ordinal', but the rank for ties is not dependent -on the order that the values occur in the Series. +\item \code{"dense"}: Like 'min', but the rank of the next highest element is assigned +the rank immediately after those assigned to the tied elements. +\item \code{"ordinal"} : All values are given a distinct rank, corresponding to the +order that the values occur in the Series. +\item \code{"random"} : Like 'ordinal', but the rank for ties is not dependent on the +order that the values occur in the Series. }} \item{descending}{Rank in descending order.} @@ -39,11 +36,10 @@ Assign ranks to data, dealing with ties appropriately. } \examples{ # The 'average' method: -df = pl$DataFrame(list(a = c(3, 6, 1, 1, 6))) -df$select(pl$col("a")$rank()) +pl$DataFrame(a = c(3, 6, 1, 1, 6))$ + with_columns(rank = pl$col("a")$rank()) # The 'ordinal' method: -df = pl$DataFrame(list(a = c(3, 6, 1, 1, 6))) -df$select(pl$col("a")$rank("ordinal")) +pl$DataFrame(a = c(3, 6, 1, 1, 6))$ + with_columns(rank = pl$col("a")$rank("ordinal")) } -\keyword{Expr} diff --git a/man/Expr_rechunk.Rd b/man/Expr_rechunk.Rd index a0d3570f9..420f429d9 100644 --- a/man/Expr_rechunk.Rd +++ b/man/Expr_rechunk.Rd @@ -2,7 +2,6 @@ % Please edit documentation in R/expr__expr.R \name{Expr_rechunk} \alias{Expr_rechunk} -\alias{rechunk} \title{Rechunk memory layout} \usage{ Expr_rechunk @@ -14,7 +13,7 @@ Expr Create a single chunk of memory for this Series. } \details{ -See rechunk() explained here \code{\link[polars]{docs_translations}} +See rechunk() explained here \code{\link[polars]{docs_translations}}. } \examples{ # get chunked lengths with/without rechunk @@ -24,4 +23,4 @@ series_list = pl$DataFrame(list(a = 1:3, b = 4:6))$select( )$get_columns() lapply(series_list, \(x) x$chunk_lengths()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_reinterpret.Rd b/man/Expr_reinterpret.Rd index c0313d8b6..b1efeeb33 100644 --- a/man/Expr_reinterpret.Rd +++ b/man/Expr_reinterpret.Rd @@ -2,24 +2,23 @@ % Please edit documentation in R/expr__expr.R \name{Expr_reinterpret} \alias{Expr_reinterpret} -\alias{reinterpret} -\title{reinterpret bits} +\title{Reinterpret bits} \usage{ Expr_reinterpret(signed = TRUE) } \arguments{ -\item{signed}{bool reinterpret into Int64 else UInt64} +\item{signed}{If \code{TRUE} (default), reinterpret into Int64. Otherwise, it +will be reinterpreted in UInt64.} } \value{ Expr } \description{ -Reinterpret the underlying bits as a signed/unsigned integer. -This operation is only allowed for 64bit integers. For lower bits integers, -you can safely use that cast operation. +Reinterpret the underlying bits as a signed/unsigned integer. This +operation is only allowed for Int64. For lower bits integers, you can +safely use the cast operation. } \examples{ -df = pl$DataFrame(iris) -df$select(pl$all()$head(2)$hash(1, 2, 3, 4)$reinterpret())$to_data_frame() +df = pl$DataFrame(x = 1:5, schema = list(x = pl$Int64)) +df$select(pl$all()$reinterpret()) } -\keyword{Expr} diff --git a/man/Expr_rep.Rd b/man/Expr_rep.Rd index 7b1edeb2f..58377fe9e 100644 --- a/man/Expr_rep.Rd +++ b/man/Expr_rep.Rd @@ -2,37 +2,26 @@ % Please edit documentation in R/expr__expr.R \name{Expr_rep} \alias{Expr_rep} -\title{expression: repeat series} +\title{Repeat a Series} \usage{ Expr_rep(n, rechunk = TRUE) } \arguments{ -\item{n}{Numeric the number of times to repeat, must be non-negative and finite} +\item{n}{The number of times to repeat, must be non-negative and finite.} -\item{rechunk}{bool default = TRUE, if true memory layout will be rewritten} +\item{rechunk}{If \code{TRUE} (default), memory layout will be rewritten.} } \value{ Expr } \description{ -This expression takes input and repeats it n times and append chunk +This expression takes input and repeats it n times and append chunk. } \details{ -if self$len() == 1 , has a special faster implementation, Here rechunk is not -necessary, and takes no effect. - -if self$len() > 1 , then the expression instructs the series to append onto -itself n time and rewrite memory +If the input has length 1, this uses a special faster implementation that +doesn't require rechunking (so \code{rechunk = TRUE} has no effect). } \examples{ - -pl$select( - pl$lit("alice")$rep(n = 3) -) - -pl$select( - pl$lit(1:3)$rep(n = 2) -) - +pl$select(pl$lit("alice")$rep(n = 3)) +pl$select(pl$lit(1:3)$rep(n = 2)) } -\keyword{Expr} diff --git a/man/Expr_rep_extend.Rd b/man/Expr_rep_extend.Rd index 57ad329ef..bf41175c4 100644 --- a/man/Expr_rep_extend.Rd +++ b/man/Expr_rep_extend.Rd @@ -2,28 +2,27 @@ % Please edit documentation in R/expr__expr.R \name{Expr_rep_extend} \alias{Expr_rep_extend} -\title{extend series with repeated series} +\title{Extend a Series by repeating values} \usage{ Expr_rep_extend(expr, n, rechunk = TRUE, upcast = TRUE) } \arguments{ -\item{expr}{Expr or into Expr} +\item{expr}{Expr or something coercible to an Expr.} -\item{n}{Numeric the number of times to repeat, must be non-negative and finite} +\item{n}{The number of times to repeat, must be non-negative and finite.} -\item{rechunk}{bool default = TRUE, if true memory layout will be rewritten} +\item{rechunk}{If \code{TRUE} (default), memory layout will be rewritten.} -\item{upcast}{bool default = TRUE, passed to self$append(), if TRUE non identical types -will be casted to common super type if any. If FALSE or no common super type -throw error.} +\item{upcast}{If \code{TRUE} (default), non identical types will be cast to common +supertype if there is any. If \code{FALSE} or no common super type, having +different types will throw an error.} } \value{ Expr } \description{ -Extend a series with a repeated series or value. +Extend a Series by repeating values } \examples{ pl$select(pl$lit(c(1, 2, 3))$rep_extend(1:3, n = 5)) } -\keyword{Expr} diff --git a/man/Expr_repeat_by.Rd b/man/Expr_repeat_by.Rd index bcfb71e02..ca1af261b 100644 --- a/man/Expr_repeat_by.Rd +++ b/man/Expr_repeat_by.Rd @@ -2,14 +2,13 @@ % Please edit documentation in R/expr__expr.R \name{Expr_repeat_by} \alias{Expr_repeat_by} -\title{Repeat by} +\title{Repeat values} \usage{ Expr_repeat_by(by) } \arguments{ -\item{by}{Expr Numeric column that determines how often the values will be repeated. -The column will be coerced to UInt32. Give this dtype to make the coercion a -no-op.} +\item{by}{Expr that determines how often the values will be repeated. The +column will be coerced to UInt32.} } \value{ Expr @@ -19,7 +18,6 @@ Repeat the elements in this Series as specified in the given expression. The repeated elements are expanded into a \code{List}. } \examples{ -df = pl$DataFrame(list(a = c("x", "y", "z"), n = c(0:2))) -df$select(pl$col("a")$repeat_by("n")) +df = pl$DataFrame(a = c("x", "y", "z"), n = c(0:2)) +df$with_columns(repeated = pl$col("a")$repeat_by("n")) } -\keyword{Expr} diff --git a/man/Expr_reshape.Rd b/man/Expr_reshape.Rd index 5731ef1c0..b8738ab65 100644 --- a/man/Expr_reshape.Rd +++ b/man/Expr_reshape.Rd @@ -2,23 +2,21 @@ % Please edit documentation in R/expr__expr.R \name{Expr_reshape} \alias{Expr_reshape} -\alias{reshape} \title{Reshape} \usage{ Expr_reshape(dims) } \arguments{ -\item{dims}{numeric vec of the dimension sizes. If a -1 is used in any of the dimensions, that -dimension is inferred.} +\item{dims}{Numeric vec of the dimension sizes. If a -1 is used in any of the +dimensions, that dimension is inferred.} } \value{ Expr } \description{ -Reshape this Expr to a flat Series or a Series of Lists. +Reshape an Expr to a flat Series or a Series of Lists. } \examples{ pl$select(pl$lit(1:12)$reshape(c(3, 4))) pl$select(pl$lit(1:12)$reshape(c(3, -1))) } -\keyword{Expr} diff --git a/man/Expr_reverse.Rd b/man/Expr_reverse.Rd index ba3335bc2..ab44d9a45 100644 --- a/man/Expr_reverse.Rd +++ b/man/Expr_reverse.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_reverse} \alias{Expr_reverse} -\alias{reverse} -\title{polars reverse} +\title{Reverse a variable} \usage{ Expr_reverse() } @@ -11,9 +10,8 @@ Expr_reverse() Expr } \description{ -polars reverse +Reverse a variable } \examples{ pl$DataFrame(list(a = 1:5))$select(pl$col("a")$reverse()) } -\keyword{Expr} diff --git a/man/Expr_rolling_max.Rd b/man/Expr_rolling_max.Rd index c742233ee..f162da59c 100644 --- a/man/Expr_rolling_max.Rd +++ b/man/Expr_rolling_max.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_rolling_max} \alias{Expr_rolling_max} -\title{Rolling max} +\title{Rolling maximum} \usage{ Expr_rolling_max( window_size, @@ -32,39 +32,35 @@ If the dynamic string language is used, the \code{by} and \code{closed} argument also be set. }} -\item{weights}{An optional slice with the same length as the window that will be multiplied -elementwise with the values in the window.} +\item{weights}{An optional slice with the same length as the window that will +be multiplied elementwise with the values in the window.} -\item{min_periods}{The number of values in the window that should be non-null before computing -a result. If None, it will be set equal to window size.} +\item{min_periods}{The number of values in the window that should be non-null +before computing a result. If \code{NULL}, it will be set equal to window size.} \item{center}{Set the labels at the center of the window} -\item{by}{If the \code{window_size} is temporal for instance \code{"5h"} or \code{"3s"}, you must -set the column that will be used to determine the windows. This column must -be of DataType: Date or DateTime.} +\item{by}{If the \code{window_size} is temporal for instance \code{"5h"} or \code{"3s"}, you +must set the column that will be used to determine the windows. This column +must be of DataType Date or DateTime.} -\item{closed}{string option \code{c("left", "right", "both", "none")}. -Define whether the temporal window interval is closed or not.} +\item{closed}{String, one of \code{"left"}, \code{"right"}, \code{"both"}, \code{"none"}. Defines +whether the temporal window interval is closed or not.} } \value{ Expr } \description{ -Apply a rolling max (moving max) over the values in this array. -A window of length \code{window_size} will traverse the array. The values that fill -this window will (optionally) be multiplied with the weights given by the -\code{weight} vector. The resulting values will be aggregated to their sum. +Compute the rolling (= moving) max over the values in this array. A window of +length \code{window_size} will traverse the array. The values that fill this window +will (optionally) be multiplied with the weights given by the \code{weight} vector. } \details{ -This functionality is experimental and may change without it being considered a -breaking change. -Notes: If you want to compute multiple aggregation statistics over the same dynamic -window, consider using \code{groupby_rolling} this method can cache the window size +window, consider using \verb{$rolling()} this method can cache the window size computation. } \examples{ -pl$DataFrame(list(a = 1:6))$select(pl$col("a")$rolling_max(window_size = 2)) +pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$ + with_columns(roll_max = pl$col("a")$rolling_max(window_size = 2)) } -\keyword{Expr} diff --git a/man/Expr_rolling_mean.Rd b/man/Expr_rolling_mean.Rd index 402cbfaa7..c3858a7f2 100644 --- a/man/Expr_rolling_mean.Rd +++ b/man/Expr_rolling_mean.Rd @@ -32,39 +32,35 @@ If the dynamic string language is used, the \code{by} and \code{closed} argument also be set. }} -\item{weights}{An optional slice with the same length as the window that will be multiplied -elementwise with the values in the window.} +\item{weights}{An optional slice with the same length as the window that will +be multiplied elementwise with the values in the window.} -\item{min_periods}{The number of values in the window that should be non-null before computing -a result. If None, it will be set equal to window size.} +\item{min_periods}{The number of values in the window that should be non-null +before computing a result. If \code{NULL}, it will be set equal to window size.} \item{center}{Set the labels at the center of the window} -\item{by}{If the \code{window_size} is temporal for instance \code{"5h"} or \code{"3s"}, you must -set the column that will be used to determine the windows. This column must -be of DataType: Date or DateTime.} +\item{by}{If the \code{window_size} is temporal for instance \code{"5h"} or \code{"3s"}, you +must set the column that will be used to determine the windows. This column +must be of DataType Date or DateTime.} -\item{closed}{string option \code{c("left", "right", "both", "none")}. -Define whether the temporal window interval is closed or not.} +\item{closed}{String, one of \code{"left"}, \code{"right"}, \code{"both"}, \code{"none"}. Defines +whether the temporal window interval is closed or not.} } \value{ Expr } \description{ -Apply a rolling mean (moving mean) over the values in this array. -A window of length \code{window_size} will traverse the array. The values that fill -this window will (optionally) be multiplied with the weights given by the -\code{weight} vector. The resulting values will be aggregated to their sum. +Compute the rolling (= moving) mean over the values in this array. A window of +length \code{window_size} will traverse the array. The values that fill this window +will (optionally) be multiplied with the weights given by the \code{weight} vector. } \details{ -This functionality is experimental and may change without it being considered a -breaking change. -Notes: If you want to compute multiple aggregation statistics over the same dynamic -window, consider using \code{groupby_rolling} this method can cache the window size +window, consider using \verb{$rolling()} this method can cache the window size computation. } \examples{ -pl$DataFrame(list(a = 1:6))$select(pl$col("a")$rolling_mean(window_size = 2)) +pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$ + with_columns(roll_mean = pl$col("a")$rolling_mean(window_size = 2)) } -\keyword{Expr} diff --git a/man/Expr_rolling_median.Rd b/man/Expr_rolling_median.Rd index f35112433..ed6f101d1 100644 --- a/man/Expr_rolling_median.Rd +++ b/man/Expr_rolling_median.Rd @@ -32,39 +32,36 @@ If the dynamic string language is used, the \code{by} and \code{closed} argument also be set. }} -\item{weights}{An optional slice with the same length as the window that will be multiplied -elementwise with the values in the window.} +\item{weights}{An optional slice with the same length as the window that will +be multiplied elementwise with the values in the window.} -\item{min_periods}{The number of values in the window that should be non-null before computing -a result. If None, it will be set equal to window size.} +\item{min_periods}{The number of values in the window that should be non-null +before computing a result. If \code{NULL}, it will be set equal to window size.} \item{center}{Set the labels at the center of the window} -\item{by}{If the \code{window_size} is temporal for instance \code{"5h"} or \code{"3s"}, you must -set the column that will be used to determine the windows. This column must -be of DataType: Date or DateTime.} +\item{by}{If the \code{window_size} is temporal for instance \code{"5h"} or \code{"3s"}, you +must set the column that will be used to determine the windows. This column +must be of DataType Date or DateTime.} -\item{closed}{string option \code{c("left", "right", "both", "none")}. -Define whether the temporal window interval is closed or not.} +\item{closed}{String, one of \code{"left"}, \code{"right"}, \code{"both"}, \code{"none"}. Defines +whether the temporal window interval is closed or not.} } \value{ Expr } \description{ -Apply a rolling median (moving median) over the values in this array. -A window of length \code{window_size} will traverse the array. The values that fill -this window will (optionally) be multiplied with the weights given by the -\code{weight} vector. The resulting values will be aggregated to their sum. +Compute the rolling (= moving) median over the values in this array. A window +of length \code{window_size} will traverse the array. The values that fill this +window will (optionally) be multiplied with the weights given by the \code{weight} +vector. } \details{ -This functionality is experimental and may change without it being considered a -breaking change. -Notes: If you want to compute multiple aggregation statistics over the same dynamic -window, consider using \code{groupby_rolling} this method can cache the window size +window, consider using \verb{$rolling()} this method can cache the window size computation. } \examples{ -pl$DataFrame(list(a = 1:6))$select(pl$col("a")$rolling_median(window_size = 2)) +pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$ + with_columns(roll_median = pl$col("a")$rolling_median(window_size = 2)) } -\keyword{Expr} diff --git a/man/Expr_rolling_min.Rd b/man/Expr_rolling_min.Rd index a3a69e099..488f216a1 100644 --- a/man/Expr_rolling_min.Rd +++ b/man/Expr_rolling_min.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_rolling_min} \alias{Expr_rolling_min} -\title{Rolling Min} +\title{Rolling minimum} \usage{ Expr_rolling_min( window_size, @@ -32,39 +32,35 @@ If the dynamic string language is used, the \code{by} and \code{closed} argument also be set. }} -\item{weights}{An optional slice with the same length as the window that will be multiplied -elementwise with the values in the window.} +\item{weights}{An optional slice with the same length as the window that will +be multiplied elementwise with the values in the window.} -\item{min_periods}{The number of values in the window that should be non-null before computing -a result. If None, it will be set equal to window size.} +\item{min_periods}{The number of values in the window that should be non-null +before computing a result. If \code{NULL}, it will be set equal to window size.} \item{center}{Set the labels at the center of the window} -\item{by}{If the \code{window_size} is temporal for instance \code{"5h"} or \code{"3s"}, you must -set the column that will be used to determine the windows. This column must -be of DataType: Date or DateTime.} +\item{by}{If the \code{window_size} is temporal for instance \code{"5h"} or \code{"3s"}, you +must set the column that will be used to determine the windows. This column +must be of DataType Date or DateTime.} -\item{closed}{string option \code{c("left", "right", "both", "none")}. -Define whether the temporal window interval is closed or not.} +\item{closed}{String, one of \code{"left"}, \code{"right"}, \code{"both"}, \code{"none"}. Defines +whether the temporal window interval is closed or not.} } \value{ Expr } \description{ -Apply a rolling min (moving min) over the values in this array. -A window of length \code{window_size} will traverse the array. The values that fill -this window will (optionally) be multiplied with the weights given by the -\code{weight} vector. The resulting values will be aggregated to their sum. +Compute the rolling (= moving) min over the values in this array. A window of +length \code{window_size} will traverse the array. The values that fill this window +will (optionally) be multiplied with the weights given by the \code{weight} vector. } \details{ -This functionality is experimental and may change without it being considered a -breaking change. -Notes: If you want to compute multiple aggregation statistics over the same dynamic -window, consider using \code{groupby_rolling} this method can cache the window size +window, consider using \verb{$rolling()} this method can cache the window size computation. } \examples{ -pl$DataFrame(list(a = 1:6))$select(pl$col("a")$rolling_min(window_size = 2)) +pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$ + with_columns(roll_min = pl$col("a")$rolling_min(window_size = 2)) } -\keyword{Expr} diff --git a/man/Expr_rolling_quantile.Rd b/man/Expr_rolling_quantile.Rd index 4120bca12..2228e8c17 100644 --- a/man/Expr_rolling_quantile.Rd +++ b/man/Expr_rolling_quantile.Rd @@ -16,9 +16,10 @@ Expr_rolling_quantile( ) } \arguments{ -\item{quantile}{Quantile between 0.0 and 1.0.} +\item{quantile}{Quantile between 0 and 1.} -\item{interpolation}{choice c('nearest', 'higher', 'lower', 'midpoint', 'linear')} +\item{interpolation}{String, one of \code{"nearest"}, \code{"higher"}, \code{"lower"}, +\code{"midpoint"}, \code{"linear"}.} \item{window_size}{The length of the window. Can be a fixed integer size, or a dynamic temporal size indicated by the following string language: @@ -38,41 +39,36 @@ If the dynamic string language is used, the \code{by} and \code{closed} argument also be set. }} -\item{weights}{An optional slice with the same length as the window that will be multiplied -elementwise with the values in the window.} +\item{weights}{An optional slice with the same length as the window that will +be multiplied elementwise with the values in the window.} -\item{min_periods}{The number of values in the window that should be non-null before computing -a result. If None, it will be set equal to window size.} +\item{min_periods}{The number of values in the window that should be non-null +before computing a result. If \code{NULL}, it will be set equal to window size.} \item{center}{Set the labels at the center of the window} -\item{by}{If the \code{window_size} is temporal for instance \code{"5h"} or \code{"3s"}, you must -set the column that will be used to determine the windows. This column must -be of DataType: Date or DateTime.} +\item{by}{If the \code{window_size} is temporal for instance \code{"5h"} or \code{"3s"}, you +must set the column that will be used to determine the windows. This column +must be of DataType Date or DateTime.} -\item{closed}{string option \code{c("left", "right", "both", "none")}. -Define whether the temporal window interval is closed or not.} +\item{closed}{String, one of \code{"left"}, \code{"right"}, \code{"both"}, \code{"none"}. Defines +whether the temporal window interval is closed or not.} } \value{ Expr } \description{ -Apply a rolling quantile (moving quantile) over the values in this array. -A window of length \code{window_size} will traverse the array. The values that fill +Compute the rolling (= moving) quantile over the values in this array. A +window of length \code{window_size} will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the -\code{weight} vector. The resulting values will be aggregated to their sum. +\code{weight} vector. } \details{ -This functionality is experimental and may change without it being considered a -breaking change. -Notes: If you want to compute multiple aggregation statistics over the same dynamic -window, consider using \code{groupby_rolling} this method can cache the window size +window, consider using \verb{$rolling()} this method can cache the window size computation. } \examples{ -pl$DataFrame(list(a = 1:6))$select( - pl$col("a")$rolling_quantile(window_size = 2, quantile = .5) -) +pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$ + with_columns(roll_quant = pl$col("a")$rolling_quantile(0.3, window_size = 2)) } -\keyword{Expr} diff --git a/man/Expr_rolling_skew.Rd b/man/Expr_rolling_skew.Rd index 789759967..14cb82307 100644 --- a/man/Expr_rolling_skew.Rd +++ b/man/Expr_rolling_skew.Rd @@ -2,34 +2,44 @@ % Please edit documentation in R/expr__expr.R \name{Expr_rolling_skew} \alias{Expr_rolling_skew} -\alias{rolling_skew} \title{Rolling skew} \usage{ Expr_rolling_skew(window_size, bias = TRUE) } \arguments{ -\item{window_size}{integerish, Size of the rolling window} +\item{window_size}{The length of the window. Can be a fixed integer size, or a dynamic temporal +size indicated by the following string language: +\itemize{ +\item 1ns (1 nanosecond) +\item 1us (1 microsecond) +\item 1ms (1 millisecond) +\item 1s (1 second) +\item 1m (1 minute) +\item 1h (1 hour) +\item 1d (1 day) +\item 1w (1 week) +\item 1mo (1 calendar month) +\item 1y (1 calendar year) +\item 1i (1 index count) +If the dynamic string language is used, the \code{by} and \code{closed} arguments must +also be set. +}} -\item{bias}{bool default = TRUE, If False, then the calculations are corrected for statistical bias.} +\item{bias}{If \code{FALSE}, the calculations are corrected for statistical bias.} } \value{ Expr } \description{ -Compute a rolling skew. +Compute the rolling (= moving) skewness over the values in this array. A +window of length \code{window_size} will traverse the array. } \details{ -Extra comments copied from rust-polars_0.25.1 -Compute the sample skewness of a data set. - For normally distributed data, the skewness should be about zero. For uni-modal continuous distributions, a skewness value greater than zero means -that there is more weight in the right tail of the distribution. The -function \code{skewtest} can be used to determine if the skewness value -is close enough to zero, statistically speaking. -see: https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/stats/stats.py#L1024 +that there is more weight in the right tail of the distribution. } \examples{ -pl$DataFrame(list(a = iris$Sepal.Length))$select(pl$col("a")$rolling_skew(window_size = 4)$head(10)) +pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$ + with_columns(roll_skew = pl$col("a")$rolling_skew(window_size = 2)) } -\keyword{Expr} diff --git a/man/Expr_rolling_std.Rd b/man/Expr_rolling_std.Rd index b42617e09..3cecc8ca1 100644 --- a/man/Expr_rolling_std.Rd +++ b/man/Expr_rolling_std.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_rolling_std} \alias{Expr_rolling_std} -\title{Rolling std} +\title{Rolling standard deviation} \usage{ Expr_rolling_std( window_size, @@ -32,39 +32,36 @@ If the dynamic string language is used, the \code{by} and \code{closed} argument also be set. }} -\item{weights}{An optional slice with the same length as the window that will be multiplied -elementwise with the values in the window.} +\item{weights}{An optional slice with the same length as the window that will +be multiplied elementwise with the values in the window.} -\item{min_periods}{The number of values in the window that should be non-null before computing -a result. If None, it will be set equal to window size.} +\item{min_periods}{The number of values in the window that should be non-null +before computing a result. If \code{NULL}, it will be set equal to window size.} \item{center}{Set the labels at the center of the window} -\item{by}{If the \code{window_size} is temporal for instance \code{"5h"} or \code{"3s"}, you must -set the column that will be used to determine the windows. This column must -be of DataType: Date or DateTime.} +\item{by}{If the \code{window_size} is temporal for instance \code{"5h"} or \code{"3s"}, you +must set the column that will be used to determine the windows. This column +must be of DataType Date or DateTime.} -\item{closed}{string option \code{c("left", "right", "both", "none")}. -Define whether the temporal window interval is closed or not.} +\item{closed}{String, one of \code{"left"}, \code{"right"}, \code{"both"}, \code{"none"}. Defines +whether the temporal window interval is closed or not.} } \value{ Expr } \description{ -Apply a rolling std (moving std) over the values in this array. -A window of length \code{window_size} will traverse the array. The values that fill -this window will (optionally) be multiplied with the weights given by the -\code{weight} vector. The resulting values will be aggregated to their sum. +Compute the rolling (= moving) standard deviation over the values in this +array. A window of length \code{window_size} will traverse the array. The values +that fill this window will (optionally) be multiplied with the weights given +by the \code{weight} vector. } \details{ -This functionality is experimental and may change without it being considered a -breaking change. -Notes: If you want to compute multiple aggregation statistics over the same dynamic -window, consider using \code{groupby_rolling} this method can cache the window size +window, consider using \verb{$rolling()} this method can cache the window size computation. } \examples{ -pl$DataFrame(list(a = 1:6))$select(pl$col("a")$rolling_std(window_size = 2)) +pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$ + with_columns(roll_std = pl$col("a")$rolling_std(window_size = 2)) } -\keyword{Expr} diff --git a/man/Expr_rolling_sum.Rd b/man/Expr_rolling_sum.Rd index 207180243..8139eb93f 100644 --- a/man/Expr_rolling_sum.Rd +++ b/man/Expr_rolling_sum.Rd @@ -32,39 +32,35 @@ If the dynamic string language is used, the \code{by} and \code{closed} argument also be set. }} -\item{weights}{An optional slice with the same length as the window that will be multiplied -elementwise with the values in the window.} +\item{weights}{An optional slice with the same length as the window that will +be multiplied elementwise with the values in the window.} -\item{min_periods}{The number of values in the window that should be non-null before computing -a result. If None, it will be set equal to window size.} +\item{min_periods}{The number of values in the window that should be non-null +before computing a result. If \code{NULL}, it will be set equal to window size.} \item{center}{Set the labels at the center of the window} -\item{by}{If the \code{window_size} is temporal for instance \code{"5h"} or \code{"3s"}, you must -set the column that will be used to determine the windows. This column must -be of DataType: Date or DateTime.} +\item{by}{If the \code{window_size} is temporal for instance \code{"5h"} or \code{"3s"}, you +must set the column that will be used to determine the windows. This column +must be of DataType Date or DateTime.} -\item{closed}{string option \code{c("left", "right", "both", "none")}. -Define whether the temporal window interval is closed or not.} +\item{closed}{String, one of \code{"left"}, \code{"right"}, \code{"both"}, \code{"none"}. Defines +whether the temporal window interval is closed or not.} } \value{ Expr } \description{ -Apply a rolling sum (moving sum) over the values in this array. -A window of length \code{window_size} will traverse the array. The values that fill -this window will (optionally) be multiplied with the weights given by the -\code{weight} vector. The resulting values will be aggregated to their sum. +Compute the rolling (= moving) sum over the values in this array. A window of +length \code{window_size} will traverse the array. The values that fill this window +will (optionally) be multiplied with the weights given by the \code{weight} vector. } \details{ -This functionality is experimental and may change without it being considered a -breaking change. -Notes: If you want to compute multiple aggregation statistics over the same dynamic -window, consider using \code{groupby_rolling} this method can cache the window size +window, consider using \verb{$rolling()} this method can cache the window size computation. } \examples{ -pl$DataFrame(list(a = 1:6))$select(pl$col("a")$rolling_sum(window_size = 2)) +pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$ + with_columns(roll_sum = pl$col("a")$rolling_sum(window_size = 2)) } -\keyword{Expr} diff --git a/man/Expr_rolling_var.Rd b/man/Expr_rolling_var.Rd index 028a6bb01..1121d0ca5 100644 --- a/man/Expr_rolling_var.Rd +++ b/man/Expr_rolling_var.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_rolling_var} \alias{Expr_rolling_var} -\title{Rolling var} +\title{Rolling variance} \usage{ Expr_rolling_var( window_size, @@ -32,39 +32,36 @@ If the dynamic string language is used, the \code{by} and \code{closed} argument also be set. }} -\item{weights}{An optional slice with the same length as the window that will be multiplied -elementwise with the values in the window.} +\item{weights}{An optional slice with the same length as the window that will +be multiplied elementwise with the values in the window.} -\item{min_periods}{The number of values in the window that should be non-null before computing -a result. If None, it will be set equal to window size.} +\item{min_periods}{The number of values in the window that should be non-null +before computing a result. If \code{NULL}, it will be set equal to window size.} \item{center}{Set the labels at the center of the window} -\item{by}{If the \code{window_size} is temporal for instance \code{"5h"} or \code{"3s"}, you must -set the column that will be used to determine the windows. This column must -be of DataType: Date or DateTime.} +\item{by}{If the \code{window_size} is temporal for instance \code{"5h"} or \code{"3s"}, you +must set the column that will be used to determine the windows. This column +must be of DataType Date or DateTime.} -\item{closed}{string option \code{c("left", "right", "both", "none")}. -Define whether the temporal window interval is closed or not.} +\item{closed}{String, one of \code{"left"}, \code{"right"}, \code{"both"}, \code{"none"}. Defines +whether the temporal window interval is closed or not.} } \value{ Expr } \description{ -Apply a rolling var (moving var) over the values in this array. -A window of length \code{window_size} will traverse the array. The values that fill +Compute the rolling (= moving) variance over the values in this array. A +window of length \code{window_size} will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the -\code{weight} vector. The resulting values will be aggregated to their sum. +\code{weight} vector. } \details{ -This functionality is experimental and may change without it being considered a -breaking change. -Notes: If you want to compute multiple aggregation statistics over the same dynamic -window, consider using \code{groupby_rolling} this method can cache the window size +window, consider using \verb{$rolling()} this method can cache the window size computation. } \examples{ -pl$DataFrame(list(a = 1:6))$select(pl$col("a")$rolling_var(window_size = 2)) +pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$ + with_columns(roll_var = pl$col("a")$rolling_var(window_size = 2)) } -\keyword{Expr} diff --git a/man/Expr_round.Rd b/man/Expr_round.Rd index 7409afa75..c2106dfd6 100644 --- a/man/Expr_round.Rd +++ b/man/Expr_round.Rd @@ -2,13 +2,12 @@ % Please edit documentation in R/expr__expr.R \name{Expr_round} \alias{Expr_round} -\alias{round} -\title{round} +\title{Round} \usage{ Expr_round(decimals) } \arguments{ -\item{decimals}{integer Number of decimals to round by.} +\item{decimals}{Number of decimals to round by.} } \value{ Expr @@ -17,10 +16,7 @@ Expr Round underlying floating point data by \code{decimals} digits. } \examples{ -pl$DataFrame(list( - a = c(0.33, 0.5, 1.02, 1.5, NaN, NA, Inf, -Inf) -))$select( - pl$col("a")$round(0) +pl$DataFrame(a = c(0.33, 0.5, 1.02, 1.5, NaN, NA, Inf, -Inf))$with_columns( + round = pl$col("a")$round(1) ) } -\keyword{Expr} diff --git a/man/Expr_sample.Rd b/man/Expr_sample.Rd index cf5d13e7a..c88da9dcd 100644 --- a/man/Expr_sample.Rd +++ b/man/Expr_sample.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_sample} \alias{Expr_sample} -\alias{sample} -\title{Sample} +\title{Take a sample} \usage{ Expr_sample( frac = NULL, @@ -14,14 +13,17 @@ Expr_sample( ) } \arguments{ -\item{frac}{Fraction of items to return. Cannot be used with \code{n}.} +\item{frac}{Fraction of items to return (can be higher than 1). Cannot be +used with \code{n}.} -\item{with_replacement}{Allow values to be sampled more than once.} +\item{with_replacement}{If \code{TRUE} (default), allow values to be sampled more +than once.} -\item{shuffle}{Shuffle the order of sampled data points. (implicitly TRUE if, with_replacement = TRUE)} +\item{shuffle}{Shuffle the order of sampled data points (implicitly \code{TRUE} if +\code{with_replacement = TRUE}).} -\item{seed}{Seed for the random number generator. If set to None (default), a random -seed is used.} +\item{seed}{numeric value of 0 to 2^52 Seed for the random number generator. +If \code{NULL} (default), a random seed value between 0 and 10000 is picked.} \item{n}{Number of items to return. Cannot be used with \code{frac}.} } @@ -29,12 +31,11 @@ seed is used.} Expr } \description{ -#' Sample from this expression. +Take a sample } \examples{ -df = pl$DataFrame(a = 1:3) +df = pl$DataFrame(a = 1:4) df$select(pl$col("a")$sample(frac = 1, with_replacement = TRUE, seed = 1L)) df$select(pl$col("a")$sample(frac = 2, with_replacement = TRUE, seed = 1L)) df$select(pl$col("a")$sample(n = 2, with_replacement = FALSE, seed = 1L)) } -\keyword{Expr} diff --git a/man/Expr_search_sorted.Rd b/man/Expr_search_sorted.Rd index 16ad86ac6..1d49f2d24 100644 --- a/man/Expr_search_sorted.Rd +++ b/man/Expr_search_sorted.Rd @@ -2,26 +2,30 @@ % Please edit documentation in R/expr__expr.R \name{Expr_search_sorted} \alias{Expr_search_sorted} -\alias{search_sorted} \title{Where to inject element(s) to maintain sorting} \usage{ Expr_search_sorted(element) } \arguments{ -\item{element}{a R value into literal or an expression of an element} +\item{element}{Expr or scalar value.} } \value{ Expr } \description{ -Find indices in self where elements should be inserted into to maintain order. +Find the index in self where the element should be inserted so that it doesn't +break sortedness. } \details{ -This function look up where to insert element if to keep self column sorted. -It is assumed the self column is already sorted ascending, otherwise wrongs answers. -This function is a bit under documented in py-polars. +This function looks up where to insert element to keep self column sorted. +It is assumed the self column is already sorted in ascending order (otherwise +this leads to wrong results). } \examples{ -pl$DataFrame(list(a = 0:100))$select(pl$col("a")$search_sorted(pl$lit(42L))) +df = pl$DataFrame(a = c(1, 3, 4, 4, 6)) +df + +# in which row should 5 be inserted in order to not break the sort? +# (value is 0-indexed) +df$select(pl$col("a")$search_sorted(5)) } -\keyword{Expr} diff --git a/man/Expr_set_sorted.Rd b/man/Expr_set_sorted.Rd index c9b7da9b4..f89d42bc3 100644 --- a/man/Expr_set_sorted.Rd +++ b/man/Expr_set_sorted.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_set_sorted} \alias{Expr_set_sorted} -\alias{set_sorted} -\title{Set_sorted} +\title{Flag an Expr as "sorted"} \usage{ Expr_set_sorted(descending = FALSE) } @@ -14,15 +13,17 @@ Expr_set_sorted(descending = FALSE) Expr } \description{ -Flags the expression as 'sorted'. +This enables downstream code to use fast paths for sorted arrays. WARNING: +this doesn't check whether the data is actually sorted, you have to ensure of +that yourself. } \examples{ # correct use flag something correctly as ascendingly sorted s = pl$select(pl$lit(1:4)$set_sorted()$alias("a"))$get_column("a") -s$flags # see flags +s$flags -# incorrect use, flag somthing as not sorted ascendingly +# incorrect use, flag something as not sorted ascendingly s2 = pl$select(pl$lit(c(1, 3, 2, 4))$set_sorted()$alias("a"))$get_column("a") -s2$sort() # sorting skipped, although not actually sorted +s2$sort() +s2$flags # returns TRUE while it's not actually sorted } -\keyword{Expr} diff --git a/man/Expr_shift.Rd b/man/Expr_shift.Rd index a46603193..c14833495 100644 --- a/man/Expr_shift.Rd +++ b/man/Expr_shift.Rd @@ -2,13 +2,12 @@ % Please edit documentation in R/expr__expr.R \name{Expr_shift} \alias{Expr_shift} -\alias{shift} \title{Shift values} \usage{ -Expr_shift(periods) +Expr_shift(periods = 1) } \arguments{ -\item{periods}{numeric number of periods to shift, may be negative.} +\item{periods}{Number of periods to shift, may be negative.} } \value{ Expr @@ -16,13 +15,10 @@ Expr \description{ Shift values } -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -} \examples{ -pl$select( - pl$lit(0:3)$shift(-2)$alias("shift-2"), - pl$lit(0:3)$shift(2)$alias("shift+2") -) +pl$DataFrame(a = c(1, 2, 4, 5, 8))$ + with_columns( + pl$col("a")$shift(-2)$alias("shift-2"), + pl$col("a")$shift(2)$alias("shift+2") + ) } -\keyword{Expr} diff --git a/man/Expr_shift_and_fill.Rd b/man/Expr_shift_and_fill.Rd index bccc55610..ffdc1289f 100644 --- a/man/Expr_shift_and_fill.Rd +++ b/man/Expr_shift_and_fill.Rd @@ -2,15 +2,14 @@ % Please edit documentation in R/expr__expr.R \name{Expr_shift_and_fill} \alias{Expr_shift_and_fill} -\alias{shift_and_fill} \title{Shift and fill values} \usage{ Expr_shift_and_fill(periods, fill_value) } \arguments{ -\item{periods}{numeric number of periods to shift, may be negative.} +\item{periods}{Number of periods to shift, may be negative.} -\item{fill_value}{Fill None values with the result of this expression.} +\item{fill_value}{Fill null values with the result of this expression.} } \value{ Expr @@ -18,14 +17,10 @@ Expr \description{ Shift the values by a given period and fill the resulting null values. } -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} -} \examples{ -pl$select( - pl$lit(0:3), - pl$lit(0:3)$shift_and_fill(-2, fill_value = 42)$alias("shift-2"), - pl$lit(0:3)$shift_and_fill(2, fill_value = pl$lit(42) / 2)$alias("shift+2") -) +pl$DataFrame(a = c(1, 2, 4, 5, 8))$ + with_columns( + pl$col("a")$shift_and_fill(-2, fill_value = 42)$alias("shift-2"), + pl$col("a")$shift_and_fill(2, fill_value = pl$col("a") / 2)$alias("shift+2") + ) } -\keyword{Expr} diff --git a/man/Expr_shrink_dtype.Rd b/man/Expr_shrink_dtype.Rd index c205ea542..3771d7785 100644 --- a/man/Expr_shrink_dtype.Rd +++ b/man/Expr_shrink_dtype.Rd @@ -2,8 +2,10 @@ % Please edit documentation in R/expr__expr.R \name{Expr_shrink_dtype} \alias{Expr_shrink_dtype} -\alias{shrink_dtype} -\title{Shrink numeric columns to the minimal required datatype.} +\title{Shrink numeric columns to the minimal required datatype} +\format{ +An object of class \code{character} of length 1. +} \usage{ Expr_shrink_dtype } @@ -11,19 +13,16 @@ Expr_shrink_dtype Expr } \description{ -Shrink to the dtype needed to fit the extrema of this \verb{[Series]}. -This can be used to reduce memory pressure. +Shrink to the dtype needed to fit the extrema of this Series. This can be +used to reduce memory pressure. } \examples{ -pl$DataFrame( - a = c(1L, 2L, 3L), - b = c(1L, 2L, bitwShiftL(2L, 29)), - c = c(-1L, 2L, bitwShiftL(1L, 15)), - d = c(-112L, 2L, 112L), - e = c(-112L, 2L, 129L), - f = c("a", "b", "c"), - g = c(0.1, 1.32, 0.12), - h = c(TRUE, NA, FALSE) -)$with_columns(pl$col("b")$cast(pl$Int64) * 32L)$select(pl$all()$shrink_dtype()) +df = pl$DataFrame( + a = 1:3, + b = c(1, 2, 3) +) +df + +df$with_columns(pl$all()$shrink_dtype()$name$suffix("_shrunk")) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_shuffle.Rd b/man/Expr_shuffle.Rd index 0304ac0d3..c8be0e4c2 100644 --- a/man/Expr_shuffle.Rd +++ b/man/Expr_shuffle.Rd @@ -2,23 +2,20 @@ % Please edit documentation in R/expr__expr.R \name{Expr_shuffle} \alias{Expr_shuffle} -\alias{shuffle} -\title{Shuffle} +\title{Shuffle values} \usage{ Expr_shuffle(seed = NULL) } \arguments{ -\item{seed}{numeric value of 0 to 2^52 -Seed for the random number generator. If set to Null (default), a random -seed value integerish value between 0 and 10000 is picked} +\item{seed}{numeric value of 0 to 2^52 Seed for the random number generator. +If \code{NULL} (default), a random seed value between 0 and 10000 is picked.} } \value{ Expr } \description{ -Shuffle the contents of this expr. +Shuffle values } \examples{ -pl$DataFrame(a = 1:3)$select(pl$col("a")$shuffle(seed = 1)) +pl$DataFrame(a = 1:4)$with_columns(shuff = pl$col("a")$shuffle(seed = 1)) } -\keyword{Expr} diff --git a/man/Expr_sign.Rd b/man/Expr_sign.Rd index 05534c9f4..cd4fb4305 100644 --- a/man/Expr_sign.Rd +++ b/man/Expr_sign.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_sign} \alias{Expr_sign} -\alias{sign} -\title{Sign} +\title{Get the sign of elements} \usage{ Expr_sign } @@ -11,9 +10,10 @@ Expr_sign Expr } \description{ -Compute the element-wise indication of the sign. +Get the sign of elements } \examples{ -pl$DataFrame(a = c(.9, -0, 0, 4, NA_real_))$select(pl$col("a")$sign()) +pl$DataFrame(a = c(.9, -3, -0, 0, 4, NA_real_))$ + with_columns(sign = pl$col("a")$sign()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_sin.Rd b/man/Expr_sin.Rd index ca94fd316..1c2bb6571 100644 --- a/man/Expr_sin.Rd +++ b/man/Expr_sin.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_sin} \alias{Expr_sin} -\alias{sin} -\title{Sin} +\title{Compute sine} \usage{ Expr_sin } @@ -11,12 +10,10 @@ Expr_sin Expr } \description{ -Compute the element-wise value for the sine. -} -\details{ -Evaluated Series has dtype Float64 +Compute sine } \examples{ -pl$DataFrame(a = c(0, pi / 2, pi, NA_real_))$select(pl$col("a")$sin()) +pl$DataFrame(a = c(0, pi / 2, pi, NA_real_))$ + with_columns(sine = pl$col("a")$sin()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_sinh.Rd b/man/Expr_sinh.Rd index 4af8c06fa..f6cf3e3d9 100644 --- a/man/Expr_sinh.Rd +++ b/man/Expr_sinh.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_sinh} \alias{Expr_sinh} -\alias{sinh} -\title{Sinh} +\title{Compute hyperbolic sine} \usage{ Expr_sinh } @@ -11,12 +10,10 @@ Expr_sinh Expr } \description{ -Compute the element-wise value for the hyperbolic sine. -} -\details{ -Evaluated Series has dtype Float64 +Compute hyperbolic sine } \examples{ -pl$DataFrame(a = c(-1, asinh(0.5), 0, 1, NA_real_))$select(pl$col("a")$sinh()) +pl$DataFrame(a = c(-1, asinh(0.5), 0, 1, NA_real_))$ + with_columns(sinh = pl$col("a")$sinh()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_skew.Rd b/man/Expr_skew.Rd index 56bb5bf63..732bf1bf2 100644 --- a/man/Expr_skew.Rd +++ b/man/Expr_skew.Rd @@ -2,13 +2,13 @@ % Please edit documentation in R/expr__expr.R \name{Expr_skew} \alias{Expr_skew} -\alias{skew} \title{Skewness} \usage{ Expr_skew(bias = TRUE) } \arguments{ -\item{bias}{If False, then the calculations are corrected for statistical bias.} +\item{bias}{If \code{FALSE}, then the calculations are corrected for statistical +bias.} } \value{ Expr @@ -18,36 +18,10 @@ Compute the sample skewness of a data set. } \details{ For normally distributed data, the skewness should be about zero. For -unimodal continuous distributions, a skewness value greater than zero means -that there is more weight in the right tail of the distribution. The -function \code{skewtest} can be used to determine if the skewness value -is close enough to zero, statistically speaking. - -See scipy.stats for more information. -\subsection{Notes}{ - -The sample skewness is computed as the Fisher-Pearson coefficient -of skewness, i.e. - -\eqn{ g_1=\frac{m_3}{m_2^{3/2}}} - -where - -\eqn{ m_i=\frac{1}{N}\sum_{n=1}^N(x[n]-\bar{x})^i} - -is the biased sample :math:\verb{i\\texttt\{th\}} central moment, and \eqn{\bar{x}} is -the sample mean. If \code{bias} is False, the calculations are -corrected for bias and the value computed is the adjusted -Fisher-Pearson standardized moment coefficient, i.e. - -\eqn{ G_1 = \frac{k_3}{k_2^{3/2}} = \frac{\sqrt{N(N-1)}}{N-2}\frac{m_3}{m_2^{3/2}}} -} +uni-modal continuous distributions, a skewness value greater than zero means +that there is more weight in the right tail of the distribution. } \examples{ df = pl$DataFrame(list(a = c(1:3, 2:1))) df$select(pl$col("a")$skew()) } -\references{ -https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.skew.html?highlight=skew#scipy.stats.skew -} -\keyword{Expr} diff --git a/man/Expr_slice.Rd b/man/Expr_slice.Rd index f02931364..96420196b 100644 --- a/man/Expr_slice.Rd +++ b/man/Expr_slice.Rd @@ -3,21 +3,24 @@ \name{Expr_slice} \alias{Expr_slice} \alias{slice} -\title{Get a slice of this expression.} +\title{Get a slice of an Expr} \usage{ Expr_slice(offset, length = NULL) } \arguments{ -\item{offset}{numeric or expression, zero-indexed where to start slice -negative value indicate starting (one-indexed) from back} +\item{offset}{Numeric or expression, zero-indexed. Indicates where to start +the slice. A negative value is one-indexed and starts from the end.} -\item{length}{how many elements should slice contain, default NULL is max length} +\item{length}{Maximum number of elements contained in the slice. Default is +full data.} } \value{ Expr } \description{ -Get a slice of this expression. +Performing a slice of length 1 on a subset of columns will recycle this value +in those columns but will not change the number of rows in the data. See +examples. } \examples{ @@ -34,5 +37,7 @@ pl$DataFrame(list(a = 0:100))$select( pl$DataFrame(list(a = 0:100))$select( pl$all()$slice(80) ) + +# recycling +pl$DataFrame(mtcars)$with_columns(pl$col("mpg")$slice(0, 1)) } -\keyword{Expr} diff --git a/man/Expr_sort.Rd b/man/Expr_sort.Rd index b35599d4a..d9db04227 100644 --- a/man/Expr_sort.Rd +++ b/man/Expr_sort.Rd @@ -2,30 +2,23 @@ % Please edit documentation in R/expr__expr.R \name{Expr_sort} \alias{Expr_sort} -\alias{sort} -\title{Expr_sort} +\title{Sort an Expr} \usage{ Expr_sort(descending = FALSE, nulls_last = FALSE) } \arguments{ \item{descending}{Sort in descending order. When sorting by multiple columns, -can be specified per column by passing a sequence of booleans.} +can be specified per column by passing a vector of booleans.} -\item{nulls_last}{bool, default FALSE, place Nulls last} +\item{nulls_last}{If \code{TRUE}, place nulls values last.} } \value{ Expr } \description{ -Sort this column. In projection/ selection context the whole column is sorted. -If used in a groupby context, the groups are sorted. -} -\details{ -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +Sort this column. If used in a groupby context, the groups are sorted. } \examples{ -pl$DataFrame(list( - a = c(6, 1, 0, NA, Inf, NaN) -))$select(pl$col("a")$sort()) +pl$DataFrame(a = c(6, 1, 0, NA, Inf, NaN))$ + with_columns(sorted = pl$col("a")$sort()) } -\keyword{Expr} diff --git a/man/Expr_sort_by.Rd b/man/Expr_sort_by.Rd index fb366c57e..3cbf86644 100644 --- a/man/Expr_sort_by.Rd +++ b/man/Expr_sort_by.Rd @@ -2,81 +2,46 @@ % Please edit documentation in R/expr__expr.R \name{Expr_sort_by} \alias{Expr_sort_by} -\alias{sort_by} -\title{sort column by order of others} +\title{Sort Expr by order of others} \usage{ Expr_sort_by(by, descending = FALSE) } \arguments{ -\item{by}{one expression or list expressions and/or strings(interpreted as column names)} +\item{by}{One expression or a list of expressions and/or strings (interpreted +as column names).} \item{descending}{Sort in descending order. When sorting by multiple columns, -can be specified per column by passing a sequence of booleans.} +can be specified per column by passing a vector of booleans.} } \value{ Expr } \description{ Sort this column by the ordering of another column, or multiple other columns. -} -\details{ -In projection/ selection context the whole column is sorted. If used in a groupby context, the groups are sorted. - -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} } \examples{ -df = pl$DataFrame(list( +df = pl$DataFrame( group = c("a", "a", "a", "b", "b", "b"), value1 = c(98, 1, 3, 2, 99, 100), value2 = c("d", "f", "b", "e", "c", "a") -)) +) # by one column/expression -df$select( - pl$col("group")$sort_by("value1") +df$with_columns( + sorted = pl$col("group")$sort_by("value1") ) # by two columns/expressions -df$select( - pl$col("group")$sort_by(list("value2", pl$col("value1")), descending = c(TRUE, FALSE)) +df$with_columns( + sorted = pl$col("group")$sort_by( + list("value2", pl$col("value1")), + descending = c(TRUE, FALSE) + ) ) - # by some expression -df$select( - pl$col("group")$sort_by(pl$col("value1")$sort(descending = TRUE)) -) - -# quite similar usecase as R function `order()` -l = list( - ab = c(rep("a", 6), rep("b", 6)), - v4 = rep(1:4, 3), - v3 = rep(1:3, 4), - v2 = rep(1:2, 6), - v1 = 1:12 -) -df = pl$DataFrame(l) - - -# examples of order versus sort_by -all.equal( - df$select( - pl$col("ab")$sort_by("v4")$alias("ab4"), - pl$col("ab")$sort_by("v3")$alias("ab3"), - pl$col("ab")$sort_by("v2")$alias("ab2"), - pl$col("ab")$sort_by("v1")$alias("ab1"), - pl$col("ab")$sort_by(list("v3", pl$col("v1")), descending = c(FALSE, TRUE))$alias("ab13FT"), - pl$col("ab")$sort_by(list("v3", pl$col("v1")), descending = TRUE)$alias("ab13T") - )$to_list(), - list( - ab4 = l$ab[order(l$v4)], - ab3 = l$ab[order(l$v3)], - ab2 = l$ab[order(l$v2)], - ab1 = l$ab[order(l$v1)], - ab13FT = l$ab[order(l$v3, rev(l$v1))], - ab13T = l$ab[order(l$v3, l$v1, decreasing = TRUE)] - ) +df$with_columns( + sorted = pl$col("group")$sort_by(pl$col("value1")$sort(descending = TRUE)) ) } -\keyword{Expr} diff --git a/man/Expr_sqrt.Rd b/man/Expr_sqrt.Rd index e8dae542c..3d1268b34 100644 --- a/man/Expr_sqrt.Rd +++ b/man/Expr_sqrt.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_sqrt} \alias{Expr_sqrt} -\alias{sqrt} -\title{Square root} +\title{Compute the square root of the elements} \usage{ Expr_sqrt() } @@ -11,9 +10,8 @@ Expr_sqrt() Expr } \description{ -Compute the square root of the elements. +Compute the square root of the elements } \examples{ -pl$DataFrame(list(a = -1:3))$select(pl$col("a")$sqrt()) +pl$DataFrame(a = -1:3)$with_columns(a_sqrt = pl$col("a")$sqrt()) } -\keyword{Expr} diff --git a/man/Expr_std.Rd b/man/Expr_std.Rd index 2e37e31a5..eaf06633b 100644 --- a/man/Expr_std.Rd +++ b/man/Expr_std.Rd @@ -2,20 +2,19 @@ % Please edit documentation in R/expr__expr.R \name{Expr_std} \alias{Expr_std} -\title{Get Standard Deviation} +\title{Get standard deviation} \usage{ Expr_std(ddof = 1) } \arguments{ -\item{ddof}{integer in range \verb{[0;255]} degrees of freedom} +\item{ddof}{Degrees of freedom, must be an integer between 0 and 255} } \value{ -Expr (f64 scalar) +Expr } \description{ -Get Standard Deviation +Get standard deviation } \examples{ pl$select(pl$lit(1:5)$std()) } -\keyword{Expr} diff --git a/man/Expr_str.Rd b/man/Expr_str.Rd deleted file mode 100644 index 19b176afa..000000000 --- a/man/Expr_str.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/expr__expr.R -\name{Expr_str} -\alias{Expr_str} -\alias{str_ns} -\title{str: string related methods} -\usage{ -Expr_str() -} -\value{ -Expr -} -\description{ -Create an object namespace of all string related methods. -See the individual method pages for full details -} -\examples{ - -# missing - -} -\keyword{Expr} diff --git a/man/Expr_struct.Rd b/man/Expr_struct.Rd deleted file mode 100644 index 5c779575e..000000000 --- a/man/Expr_struct.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/expr__expr.R -\name{Expr_struct} -\alias{Expr_struct} -\alias{struct_ns} -\title{struct: related methods} -\usage{ -Expr_struct() -} -\value{ -Expr -} -\description{ -Create an object namespace of all struct related methods. -See the individual method pages for full details -} -\examples{ - -# missing - -} -\keyword{Expr} diff --git a/man/Expr_sub.Rd b/man/Expr_sub.Rd index 4dba5880b..e6fb1c9ae 100644 --- a/man/Expr_sub.Rd +++ b/man/Expr_sub.Rd @@ -3,31 +3,29 @@ \name{Expr_sub} \alias{Expr_sub} \alias{-.Expr} -\title{Sub} +\title{Substract two expressions} \usage{ Expr_sub(other) \method{-}{Expr}(e1, e2) } \arguments{ -\item{other}{literal or Robj which can become a literal} +\item{other}{Literal or object that can be converted to a literal} -\item{e1}{lhs Expr} +\item{e1}{Expr only} -\item{e2}{rhs Expr or anything which can become a literal Expression} +\item{e2}{Expr or anything that can be converted to a literal} } \value{ -Exprs +Expr } \description{ -Substract +The RHS can either be an Expr or an object that can be converted to a literal +(e.g an integer). } \examples{ -# three syntaxes same result pl$lit(5) - 10 pl$lit(5) - pl$lit(10) pl$lit(5)$sub(pl$lit(10)) -pl$lit(5) } -\keyword{Expr} -\keyword{Expr_operators} diff --git a/man/Expr_sum.Rd b/man/Expr_sum.Rd index 2ea27d622..741cfa1b7 100644 --- a/man/Expr_sum.Rd +++ b/man/Expr_sum.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_sum} \alias{Expr_sum} -\title{sum} +\title{Get sum value} \usage{ Expr_sum } @@ -13,10 +13,11 @@ Expr Get sum value } \details{ -The Dtypes Int8, UInt8, Int16 and UInt16 are cast to -Int64 before summing to prevent overflow issues. +The dtypes Int8, UInt8, Int16 and UInt16 are cast to Int64 before summing to +prevent overflow issues. } \examples{ -pl$DataFrame(list(x = c(1L, NA, 2L)))$select(pl$col("x")$sum()) # is i32 3 (Int32 not casted) +pl$DataFrame(x = c(1L, NA, 2L))$ + with_columns(sum = pl$col("x")$sum()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_tail.Rd b/man/Expr_tail.Rd index da1e8faf7..73e96c471 100644 --- a/man/Expr_tail.Rd +++ b/man/Expr_tail.Rd @@ -2,23 +2,19 @@ % Please edit documentation in R/expr__expr.R \name{Expr_tail} \alias{Expr_tail} -\alias{tail} -\title{Tail} +\title{Get the last n elements} \usage{ Expr_tail(n = 10) } \arguments{ -\item{n}{numeric number of elements to select from tail} +\item{n}{Number of elements to take.} } \value{ Expr } \description{ -Get the tail n elements. -Similar to R tail(x) +Get the last n elements } \examples{ -# get 3 last elements -pl$DataFrame(list(x = 1:11))$select(pl$col("x")$tail(3)) +pl$DataFrame(x = 1:11)$select(pl$col("x")$tail(3)) } -\keyword{Expr} diff --git a/man/Expr_tan.Rd b/man/Expr_tan.Rd index 66ad9e74d..e623c4ce5 100644 --- a/man/Expr_tan.Rd +++ b/man/Expr_tan.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_tan} \alias{Expr_tan} -\alias{Tan} -\title{Tan} +\title{Compute tangent} \usage{ Expr_tan } @@ -11,12 +10,10 @@ Expr_tan Expr } \description{ -Compute the element-wise value for the tangent. -} -\details{ -Evaluated Series has dtype Float64 +Compute tangent } \examples{ -pl$DataFrame(a = c(0, pi / 2, pi, NA_real_))$select(pl$col("a")$tan()) +pl$DataFrame(a = c(0, pi / 2, pi, NA_real_))$ + with_columns(tangent = pl$col("a")$tan()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_tanh.Rd b/man/Expr_tanh.Rd index b3a476c17..5208bfadf 100644 --- a/man/Expr_tanh.Rd +++ b/man/Expr_tanh.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_tanh} \alias{Expr_tanh} -\alias{tanh} -\title{Tanh} +\title{Compute hyperbolic tangent} \usage{ Expr_tanh } @@ -11,12 +10,10 @@ Expr_tanh Expr } \description{ -Compute the element-wise value for the hyperbolic tangent. -} -\details{ -Evaluated Series has dtype Float64 +Compute hyperbolic tangent } \examples{ -pl$DataFrame(a = c(-1, atanh(0.5), 0, 1, NA_real_))$select(pl$col("a")$tanh()) +pl$DataFrame(a = c(-1, atanh(0.5), 0, 1, NA_real_))$ + with_columns(tanh = pl$col("a")$tanh()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_to_physical.Rd b/man/Expr_to_physical.Rd index 175176b5f..6afde38c1 100644 --- a/man/Expr_to_physical.Rd +++ b/man/Expr_to_physical.Rd @@ -3,7 +3,7 @@ \name{Expr_to_physical} \alias{Expr_to_physical} \alias{to_physical} -\title{To physical representation} +\title{Cast an Expr to its physical representation} \usage{ Expr_to_physical } @@ -11,11 +11,20 @@ Expr_to_physical Expr } \description{ -expression request underlying physical base representation +The following DataTypes will be converted: +\itemize{ +\item Date -> Int32 +\item Datetime -> Int64 +\item Time -> Int64 +\item Duration -> Int64 +\item Categorical -> UInt32 +\item List(inner) -> List(physical of inner) +Other data types will be left unchanged. +} } \examples{ pl$DataFrame( - list(vals = c("a", "x", NA, "a")) + list(vals = c("a", "x", NA, "a", "b")) )$with_columns( pl$col("vals")$cast(pl$Categorical), pl$col("vals") @@ -24,4 +33,4 @@ pl$DataFrame( $alias("vals_physical") ) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_to_r.Rd b/man/Expr_to_r.Rd index 41a16c5e7..6369a2bab 100644 --- a/man/Expr_to_r.Rd +++ b/man/Expr_to_r.Rd @@ -2,26 +2,24 @@ % Please edit documentation in R/expr__expr.R \name{Expr_to_r} \alias{Expr_to_r} -\alias{pl_expr_to_r} -\title{to_r: for debuging an expression} +\title{Convert an Expr to R output} \usage{ Expr_to_r(df = NULL, i = 0) } \arguments{ -\item{df}{otherwise a DataFrame to evaluate in, default NULL is an empty DataFrame} +\item{df}{If \code{NULL} (default), it evaluates the Expr in an empty DataFrame. +Otherwise, provide a DataFrame that the Expr should be evaluated in.} -\item{i}{numeric column to extract zero index default first, expression could generate multiple -columns} +\item{i}{Numeric column to extract. Default is zero (which gives the first +column).} } \value{ R object } \description{ -debug an expression by evaluating in empty DataFrame and return first series to R +This is mostly useful to debug an expression. It evaluates the Expr in an +empty DataFrame and return the first Series to R. } \examples{ pl$lit(1:3)$to_r() -pl$expr_to_r(pl$lit(1:3)) -pl$expr_to_r(1:3) } -\keyword{Expr} diff --git a/man/Expr_to_struct.Rd b/man/Expr_to_struct.Rd index 93036cc5d..193a76f80 100644 --- a/man/Expr_to_struct.Rd +++ b/man/Expr_to_struct.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_to_struct} \alias{Expr_to_struct} -\alias{expr_to_struct} -\title{to_struct} +\title{Convert an Expr to a Struct} \usage{ Expr_to_struct() } @@ -11,11 +10,10 @@ Expr_to_struct() Expr } \description{ -pass expr to pl$struct +Convert an Expr to a Struct } \examples{ -e = pl$all()$to_struct()$alias("my_struct") -print(e) -pl$DataFrame(iris)$select(e) +pl$DataFrame(iris[, 3:5])$with_columns( + my_struct = pl$all()$to_struct() +) } -\keyword{Expr} diff --git a/man/Expr_top_k.Rd b/man/Expr_top_k.Rd index ee165f4a8..f93dd023c 100644 --- a/man/Expr_top_k.Rd +++ b/man/Expr_top_k.Rd @@ -2,28 +2,20 @@ % Please edit documentation in R/expr__expr.R \name{Expr_top_k} \alias{Expr_top_k} -\alias{top_k} \title{Top k values} \usage{ Expr_top_k(k) } \arguments{ -\item{k}{numeric k top values to get} +\item{k}{Number of top values to get} } \value{ Expr } \description{ -Return the \code{k} largest elements. -} -\details{ -This has time complexity: \eqn{ O(n + k \\log{}n - \frac{k}{2}) } - -See Inf,NaN,NULL,Null/NA translations here \code{\link[polars]{docs_translations}} +Return the \code{k} largest elements. This has time complexity: \eqn{ O(n + k +\\log{}n - \frac{k}{2}) } } \examples{ -pl$DataFrame(list( - a = c(6, 1, 0, NA, Inf, NaN) -))$select(pl$col("a")$top_k(5)) +pl$DataFrame(a = c(6, 1, 0, NA, Inf, NaN))$select(pl$col("a")$top_k(5)) } -\keyword{Expr} diff --git a/man/Expr_unique.Rd b/man/Expr_unique.Rd index 4e88097a2..b525c0fa2 100644 --- a/man/Expr_unique.Rd +++ b/man/Expr_unique.Rd @@ -2,21 +2,20 @@ % Please edit documentation in R/expr__expr.R \name{Expr_unique} \alias{Expr_unique} -\title{get unique values} +\title{Get unique values} \usage{ Expr_unique(maintain_order = FALSE) } \arguments{ -\item{maintain_order}{bool, if TRUE guaranteed same order, if FALSE maybe} +\item{maintain_order}{If \code{TRUE}, the unique values are returned in order of +appearance.} } \value{ Expr } \description{ -Get unique values of this expression. -Similar to R unique() +Get unique values } \examples{ pl$DataFrame(iris)$select(pl$col("Species")$unique()) } -\keyword{Expr} diff --git a/man/Expr_unique_counts.Rd b/man/Expr_unique_counts.Rd index c1012f032..21efc1c3c 100644 --- a/man/Expr_unique_counts.Rd +++ b/man/Expr_unique_counts.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{Expr_unique_counts} \alias{Expr_unique_counts} -\alias{unique_counts} -\title{Value counts} +\title{Count unique values} \usage{ Expr_unique_counts } @@ -11,11 +10,11 @@ Expr_unique_counts Expr } \description{ -Return a count of the unique values in the order of appearance. -This method differs from \code{value_counts} in that it does not return the -values, only the counts and might be faster +Return a count of the unique values in the order of appearance. This method +differs from \verb{$value_counts()} in that it does not return the values, only +the counts and it might be faster. } \examples{ pl$DataFrame(iris)$select(pl$col("Species")$unique_counts()) } -\keyword{Expr} +\keyword{datasets} diff --git a/man/Expr_upper_bound.Rd b/man/Expr_upper_bound.Rd new file mode 100644 index 000000000..c7543c32c --- /dev/null +++ b/man/Expr_upper_bound.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__expr.R +\name{Expr_upper_bound} +\alias{Expr_upper_bound} +\title{Find the upper bound of a DataType} +\usage{ +Expr_upper_bound +} +\value{ +Expr +} +\description{ +Find the upper bound of a DataType +} +\examples{ +pl$DataFrame(x = c(1, 2, 3), y = -2:0, + schema = list(x = pl$Float64, y = pl$Int32))$ + select(pl$all()$upper_bound()) +} +\keyword{datasets} diff --git a/man/Expr_upper_lower_bound.Rd b/man/Expr_upper_lower_bound.Rd deleted file mode 100644 index 41729056a..000000000 --- a/man/Expr_upper_lower_bound.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/expr__expr.R -\name{Expr_upper_lower_bound} -\alias{Expr_upper_lower_bound} -\alias{Expr_upper_bound} -\alias{upper_bound} -\alias{Expr_lower_bound} -\alias{lower_bound} -\title{Upper bound} -\usage{ -Expr_upper_bound - -Expr_lower_bound -} -\value{ -Expr -} -\description{ -Calculate the upper/lower bound. -Returns a unit Series with the highest value possible for the dtype of this -expression. -} -\details{ -Notice lower bound i32 exported to R is NA_integer_ for now -} -\examples{ -pl$DataFrame(i32 = 1L, f64 = 1)$select(pl$all()$upper_bound()) -pl$DataFrame(i32 = 1L, f64 = 1)$select(pl$all()$lower_bound()) -} -\keyword{Expr} diff --git a/man/Expr_value_counts.Rd b/man/Expr_value_counts.Rd index 314fd3065..67c4cfc5e 100644 --- a/man/Expr_value_counts.Rd +++ b/man/Expr_value_counts.Rd @@ -23,4 +23,3 @@ df = pl$DataFrame(iris)$select(pl$col("Species")$value_counts()) df df$unnest()$to_data_frame() # recommended to unnest structs before converting to R } -\keyword{Expr} diff --git a/man/Expr_var.Rd b/man/Expr_var.Rd index d591f0d7a..6b8c37733 100644 --- a/man/Expr_var.Rd +++ b/man/Expr_var.Rd @@ -2,20 +2,19 @@ % Please edit documentation in R/expr__expr.R \name{Expr_var} \alias{Expr_var} -\title{Get Variance} +\title{Get variance} \usage{ Expr_var(ddof = 1) } \arguments{ -\item{ddof}{integer in range \verb{[0;255]} degrees of freedom} +\item{ddof}{Degrees of freedom, must be an integer between 0 and 255} } \value{ -Expr (f64 scalar) +Expr } \description{ -Get Variance +Get variance } \examples{ pl$select(pl$lit(1:5)$var()) } -\keyword{Expr} diff --git a/man/Expr_where.Rd b/man/Expr_where.Rd new file mode 100644 index 000000000..e95542457 --- /dev/null +++ b/man/Expr_where.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__expr.R +\name{Expr_where} +\alias{Expr_where} +\title{Filter a single column.} +\usage{ +Expr_where(predicate) +} +\arguments{ +\item{predicate}{An Expr or something coercible to an Expr. Must return a +boolean.} +} +\value{ +Expr +} +\description{ +This is an alias for \verb{$filter()}. +} +\examples{ +df = pl$DataFrame( + group_col = c("g1", "g1", "g2"), + b = c(1, 2, 3) +) +df + +df$group_by("group_col")$agg( + lt = pl$col("b")$where(pl$col("b") < 2), + gte = pl$col("b")$where(pl$col("b") >= 2) +) +} diff --git a/man/Expr_xor.Rd b/man/Expr_xor.Rd index a14eeb718..5bceb032b 100644 --- a/man/Expr_xor.Rd +++ b/man/Expr_xor.Rd @@ -2,21 +2,19 @@ % Please edit documentation in R/expr__expr.R \name{Expr_xor} \alias{Expr_xor} -\title{Xor} +\title{Apply logical XOR on two expressions} \usage{ Expr_xor(other) } \arguments{ -\item{other}{literal or Robj which can become a literal} +\item{other}{Literal or object that can be converted to a literal} } \value{ Expr } \description{ -combine to boolean expressions with XOR +Combine two boolean expressions with XOR. } \examples{ pl$lit(TRUE)$xor(pl$lit(FALSE)) } -\keyword{Expr} -\keyword{Expr_operators} diff --git a/man/as.list.Expr.Rd b/man/as.list.Expr.Rd new file mode 100644 index 000000000..d415e2818 --- /dev/null +++ b/man/as.list.Expr.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__expr.R +\name{as.list.Expr} +\alias{as.list.Expr} +\title{S3 method to convert an Expr to a list} +\usage{ +\method{as.list}{Expr}(x, ...) +} +\arguments{ +\item{x}{Expr} + +\item{...}{Not used.} +} +\value{ +One Expr wrapped in a list +} +\description{ +S3 method to convert an Expr to a list +} +\keyword{internal} diff --git a/man/dot-DollarNames.Expr.Rd b/man/dot-DollarNames.Expr.Rd new file mode 100644 index 000000000..504fd5ea0 --- /dev/null +++ b/man/dot-DollarNames.Expr.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__expr.R +\name{.DollarNames.Expr} +\alias{.DollarNames.Expr} +\title{Auto complete $-access into a polars object} +\usage{ +\method{.DollarNames}{Expr}(x, pattern = "") +} +\arguments{ +\item{x}{Name of an \code{Expr} object} + +\item{pattern}{String used to auto-complete} +} +\value{ +char vec + +Doesn't return a value. This is used for autocompletion in RStudio. +} +\description{ +Called by the interactive R session internally +} +\keyword{internal} diff --git a/man/dot-DollarNames.RField.Rd b/man/dot-DollarNames.RField.Rd index 1f261dd3f..a03ac2653 100644 --- a/man/dot-DollarNames.RField.Rd +++ b/man/dot-DollarNames.RField.Rd @@ -7,7 +7,7 @@ \method{.DollarNames}{RField}(x, pattern = "") } \arguments{ -\item{x}{Name of a \code{"RField"} object} +\item{x}{Name of a \code{RField} object} \item{pattern}{String used to auto-complete} } diff --git a/man/pl_expr_to_r.Rd b/man/pl_expr_to_r.Rd new file mode 100644 index 000000000..6cbf63ef2 --- /dev/null +++ b/man/pl_expr_to_r.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__expr.R +\name{pl_expr_to_r} +\alias{pl_expr_to_r} +\title{Convert an Expr to R output} +\arguments{ +\item{df}{If \code{NULL} (default), it evaluates the Expr in an empty DataFrame. +Otherwise, provide a DataFrame that the Expr should be evaluated in.} + +\item{i}{Numeric column to extract. Default is zero (which gives the first +column).} +} +\value{ +R object +} +\description{ +This is mostly useful to debug an expression. It evaluates the Expr in an +empty DataFrame and return the first Series to R. This is an alias for +\verb{$to_r()}. +} +\examples{ +pl$expr_to_r(pl$lit(1:3)) +} diff --git a/man/prepare_alpha.Rd b/man/prepare_alpha.Rd deleted file mode 100644 index ff8ca76ef..000000000 --- a/man/prepare_alpha.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/expr__expr.R -\name{prepare_alpha} -\alias{prepare_alpha} -\title{prepare alpha} -\usage{ -prepare_alpha(com = NULL, span = NULL, half_life = NULL, alpha = NULL) -} -\arguments{ -\item{com}{numeric or NULL} - -\item{span}{numeric or NULL} - -\item{half_life}{numeric or NULL} - -\item{alpha}{numeric or NULL} -} -\value{ -numeric -} -\description{ -internal function for emw_x expressions -} -\keyword{internal}