Skip to content

Commit

Permalink
Implement *_horizontal() functions (#508)
Browse files Browse the repository at this point in the history
  • Loading branch information
etiennebacher committed Nov 14, 2023
1 parent addae0e commit 41e82ec
Show file tree
Hide file tree
Showing 15 changed files with 476 additions and 88 deletions.
10 changes: 10 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
# polars (development version)

## Breaking changes

- The rowwise computation when several columns are passed to `pl$min()`, `pl$max()`,
and `pl$sum()` is deprecated and will be removed in 0.12.0. Passing several
columns to these functions will now compute the min/max/sum in each column
separately. Use `pl$min_horizontal()` `pl$max_horizontal()`, and
`pl$sum_horizontal()` instead for rowwise computation (#508).

## What's changed

- New methods `$write_json()` and `$write_ndjson()` for DataFrame (#502).
- Removed argument `name` in `pl$date_range()`, which was deprecated for a while
(#503).
- New functions `pl$min_horizontal()`, `pl$max_horizontal()`, `pl$sum_horizontal()`,
`pl$all_horizontal()`, `pl$any_horizontal()` (#508).

# polars 0.10.1

Expand Down
10 changes: 8 additions & 2 deletions R/extendr-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,15 @@
#' @useDynLib polars, .registration = TRUE
NULL

min_exprs <- function(exprs) .Call(wrap__min_exprs, exprs)
min_horizontal <- function(dotdotdot) .Call(wrap__min_horizontal, dotdotdot)

max_exprs <- function(exprs) .Call(wrap__max_exprs, exprs)
max_horizontal <- function(dotdotdot) .Call(wrap__max_horizontal, dotdotdot)

all_horizontal <- function(dotdotdot) .Call(wrap__all_horizontal, dotdotdot)

any_horizontal <- function(dotdotdot) .Call(wrap__any_horizontal, dotdotdot)

sum_horizontal <- function(dotdotdot) .Call(wrap__sum_horizontal, dotdotdot)

coalesce_exprs <- function(exprs) .Call(wrap__coalesce_exprs, exprs)

Expand Down
209 changes: 172 additions & 37 deletions R/functions__lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -453,9 +453,11 @@ pl$approx_n_unique = function(column) { #-> int or Expr
}


#' sum across expressions / literals / Series
#' @description syntactic sugar for starting a expression with sum
#' Compute sum in one or several columns
#'
#' This is syntactic sugar for `pl$col(...)$sum()`.
#' @name pl_sum
#'
#' @param ... is a:
#' If one arg:
#' - Series or Expr, same as `column$sum()`
Expand All @@ -464,6 +466,9 @@ pl$approx_n_unique = function(column) { #-> int or Expr
#' - list of strings(column names) or expressions to add up as expr1 + expr2 + expr3 + ...
#'
#' If several args, then wrapped in a list and handled as above.
#' @param verbose Show the deprecation message when several columns or Expr are
#' passed in `...`. Will be removed in 0.12.0.
#'
#' @return Expr
#' @keywords Expr_new
#' @examples
Expand All @@ -473,20 +478,12 @@ pl$approx_n_unique = function(column) { #-> int or Expr
#' # column as Expr (prefer pl$col("Petal.Width")$sum())
#' pl$DataFrame(iris)$select(pl$sum(pl$col("Petal.Width")))
#'
#' # column as numeric
#' pl$DataFrame()$select(pl$sum(1:5))
#'
#'
#' df = pl$DataFrame(a = 1:2, b = 3:4, c = 5:6)
#'
#' # column as list
#' df$with_columns(pl$sum(list("a", "c")))
#' df$with_columns(pl$sum(list("a", "c", 42L)))
#'
#' # two eqivalent lines
#' df$with_columns(pl$sum(list(pl$col("a") + pl$col("b"), "c")))
#' df$with_columns(pl$sum(list("*")))
pl$sum = function(...) {
#' # Compute sum in several columns
#' df$with_columns(pl$sum("a", "c"))
#' df$with_columns(pl$sum("*"))
pl$sum = function(..., verbose = TRUE) {
column = list2(...)
if (length(column) == 1L) column <- column[[1L]]
if (inherits(column, "Series") || inherits(column, "Expr")) {
Expand All @@ -499,39 +496,43 @@ pl$sum = function(...) {
return(pl$lit(column)$sum())
}
if (is.list(column)) {
pra = do.call(construct_ProtoExprArray, column)
return(sum_exprs(pra))
if (verbose) {
warning("This usage of `pl$sum()` used to compute the sum rowwise. This is now deprecated, use `pl$sum_horizontal()` instead. This message will be removed in 0.12.0. Set `verbose = FALSE` to remove this message.")
}
return(pl$col(column)$sum())
}
stop("pl$sum: this input is not supported")
}


#' min across expressions / literals / Series
#' @description Folds the expressions from left to right, keeping the first non-null value.
#' Find minimum value in one or several columns
#'
#' This is syntactic sugar for `pl$col(...)$min()`.
#' @name pl_min
#' @param ... is a:
#' If one arg:
#' - Series or Expr, same as `column$sum()`
#' - string, same as `pl$col(column)$sum()`
#' - numeric, same as `pl$lit(column)$sum()`
#' - list of strings(column names) or expressions to add up as expr1 + expr2 + expr3 + ...
#'
#' If several args, then wrapped in a list and handled as above.
#' @param verbose Show the deprecation message when several columns or Expr are
#' passed in `...`. Will be removed in 0.12.0.
#'
#' @return Expr
#' @keywords Expr_new
#' @examples
#' df = pl$DataFrame(
#' a = NA_real_,
#' b = c(2:1, NA_real_, NA_real_),
#' c = c(1:3, NA_real_),
#' d = c(1:2, NA_real_, -Inf)
#' b = c(1:2, NA_real_, NA_real_),
#' c = c(1:4)
#' )
#' # use min to get first non Null value for each row, otherwise insert 99.9
#' df
#'
#' df$with_columns(
#' pl$min("a", "b", "c", 99.9)$alias("d")
#' pl$min("a", "b", "c")
#' )
#'
pl$min = function(...) {
pl$min = function(..., verbose = TRUE) {
column = list2(...)
if (length(column) == 1L) column <- column[[1L]]
if (inherits(column, "Series") || inherits(column, "Expr")) {
Expand All @@ -544,8 +545,10 @@ pl$min = function(...) {
return(pl$lit(column)$min())
}
if (is.list(column)) {
pra = do.call(construct_ProtoExprArray, column)
return(min_exprs(pra))
if (verbose) {
warning("This usage of `pl$min()` used to find the minimum value rowwise. This is now deprecated, use `pl$min_horizontal()` instead. This message will be removed in 0.12.0. Set `verbose = FALSE` to remove this message.")
}
return(pl$col(column)$min())
}
stop("pl$min: this input is not supported")
}
Expand All @@ -554,8 +557,9 @@ pl$min = function(...) {



#' max across expressions / literals / Series
#' @description Folds the expressions from left to right, keeping the first non-null value.
#' Find maximum value in one or several columns
#'
#' This is syntactic sugar for `pl$col(...)$max()`.
#' @name pl_max
#' @param ... is a:
#' If one arg:
Expand All @@ -565,20 +569,23 @@ pl$min = function(...) {
#' - list of strings(column names) or expressions to add up as expr1 + expr2 + expr3 + ...
#'
#' If several args, then wrapped in a list and handled as above.
#' @param verbose Show the deprecation message when several columns or Expr are
#' passed in `...`. Will be removed in 0.12.0.
#'
#' @return Expr
#' @keywords Expr_new
#' @examples
#' df = pl$DataFrame(
#' a = NA_real_,
#' b = c(1:2, NA_real_, NA_real_),
#' c = c(1:3, NA_real_)
#' c = c(1:4)
#' )
#' # use coalesce to get first non Null value for each row, otherwise insert 99.9
#' df
#'
#' df$with_columns(
#' pl$coalesce("a", "b", "c", 99.9)$alias("d")
#' pl$max("a", "b", "c")
#' )
#'
pl$max = function(...) {
pl$max = function(..., verbose = TRUE) {
column = list2(...)
if (length(column) == 1L) column <- column[[1L]]
if (inherits(column, "Series") || inherits(column, "Expr")) {
Expand All @@ -591,8 +598,10 @@ pl$max = function(...) {
return(pl$lit(column)$max())
}
if (is.list(column)) {
pra = do.call(construct_ProtoExprArray, column)
return(max_exprs(pra))
if (verbose) {
warning("This usage of `pl$max()` used to find the maximum value rowwise. This is now deprecated, use `pl$max_horizontal()` instead. This message will be removed in 0.12.0. Set `verbose = FALSE` to remove this message.")
}
return(pl$col(column)$max())
}
stop("pl$max: this input is not supported")
}
Expand Down Expand Up @@ -941,3 +950,129 @@ pl$reduce = function(lambda, exprs) {
reduce(lambda, exprs) |>
unwrap("in pl$reduce():")
}

#' Get the minimum value rowwise
#'
#' @param ... Columns to concatenate into a single string column. Accepts
#' expressions. Strings are parsed as column names, other non-expression inputs
#' are parsed as literals.
#' @name pl_min_horizontal
#' @return Expr
#'
#' @examples
#' df = pl$DataFrame(
#' a = NA_real_,
#' b = c(2:1, NA_real_, NA_real_),
#' c = c(1:2, NA_real_, -Inf)
#' )
#' df$with_columns(
#' pl$min_horizontal("a", "b", "c", 99.9)$alias("min")
#' )
pl$min_horizontal <- function(...) {
min_horizontal(list2(...)) |>
unwrap("in $min_horizontal():")
}

#' Get the maximum value rowwise
#'
#' @param ... Columns to concatenate into a single string column. Accepts
#' expressions. Strings are parsed as column names, other non-expression inputs
#' are parsed as literals.
#' @name pl_max_horizontal
#' @return Expr
#'
#' @examples
#' df = pl$DataFrame(
#' a = NA_real_,
#' b = c(2:1, NA_real_, NA_real_),
#' c = c(1:2, NA_real_, Inf)
#' )
#' df$with_columns(
#' pl$max_horizontal("a", "b", "c", 99.9)$alias("max")
#' )
pl$max_horizontal <- function(...) {
max_horizontal(list2(...)) |>
unwrap("in $max_horizontal():")
}

#' Apply the AND logical rowwise
#'
#' @param ... Columns to concatenate into a single string column. Accepts
#' expressions. Strings are parsed as column names, other non-expression inputs
#' are parsed as literals.
#' @name pl_all_horizontal
#' @return Expr
#'
#' @examples
#' df = pl$DataFrame(
#' a = c(TRUE, FALSE, NA, NA),
#' b = c(TRUE, FALSE, NA, NA),
#' c = c(TRUE, FALSE, NA, TRUE)
#' )
#' df
#'
#' df$with_columns(
#' pl$all_horizontal("a", "b", "c")$alias("all")
#' )
#'
#' # drop rows that have at least one missing value
#' # == keep rows that only have non-missing values
#' df$filter(
#' pl$all_horizontal(pl$all()$is_not_null())
#' )
pl$all_horizontal <- function(...) {
all_horizontal(list2(...)) |>
unwrap("in $all_horizontal():")
}

#' Apply the OR logical rowwise
#'
#' @param ... Columns to concatenate into a single string column. Accepts
#' expressions. Strings are parsed as column names, other non-expression inputs
#' are parsed as literals.
#' @name pl_any_horizontal
#' @return Expr
#'
#' @examples
#' df = pl$DataFrame(
#' a = c(FALSE, FALSE, NA, NA),
#' b = c(TRUE, FALSE, NA, NA),
#' c = c(TRUE, FALSE, NA, TRUE)
#' )
#' df
#'
#' df$with_columns(
#' pl$any_horizontal("a", "b", "c")$alias("any")
#' )
#'
#' # drop rows that only have missing values == keep rows that have at least one
#' # non-missing value
#' df$filter(
#' pl$any_horizontal(pl$all()$is_not_null())
#' )
pl$any_horizontal <- function(...) {
any_horizontal(list2(...)) |>
unwrap("in $any_horizontal():")
}

#' Compute the sum rowwise
#'
#' @param ... Columns to concatenate into a single string column. Accepts
#' expressions. Strings are parsed as column names, other non-expression inputs
#' are parsed as literals.
#' @name pl_sum_horizontal
#' @return Expr
#'
#' @examples
#' df = pl$DataFrame(
#' a = NA_real_,
#' b = c(3:4, NA_real_, NA_real_),
#' c = c(1:2, NA_real_, -Inf)
#' )
#' df$with_columns(
#' pl$sum_horizontal("a", "b", "c", 2)$alias("sum")
#' )
pl$sum_horizontal <- function(...) {
sum_horizontal(list2(...)) |>
unwrap("in $sum_horizontal():")
}
34 changes: 34 additions & 0 deletions man/pl_all_horizontal.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 41e82ec

Please sign in to comment.