Skip to content
Permalink
master
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
#' Operate on a selection of variables
#'
#' @description
#' \Sexpr[results=rd, stage=render]{lifecycle::badge("superseded")}
#'
#' Scoped verbs (`_if`, `_at`, `_all`) have been superseded by the use of
#' [across()] in an existing verb. See `vignette("colwise")` for details.
#'
#' The variants suffixed with `_if`, `_at` or `_all` apply an
#' expression (sometimes several) to all variables within a specified
#' subset. This subset can contain all variables (`_all` variants), a
#' [vars()] selection (`_at` variants), or variables selected with a
#' predicate (`_if` variants).
#'
#' The verbs with scoped variants are:
#'
#' * [mutate()], [transmute()] and [summarise()]. See [summarise_all()].
#' * [filter()]. See [filter_all()].
#' * [group_by()]. See [group_by_all()].
#' * [rename()] and [select()]. See [select_all()].
#' * [arrange()]. See [arrange_all()]
#'
#' There are three kinds of scoped variants. They differ in the scope
#' of the variable selection on which operations are applied:
#'
#' * Verbs suffixed with `_all()` apply an operation on all variables.
#'
#' * Verbs suffixed with `_at()` apply an operation on a subset of
#' variables specified with the quoting function [vars()]. This
#' quoting function accepts [tidyselect::vars_select()] helpers like
#' [starts_with()]. Instead of a [vars()] selection, you can also
#' supply an [integerish][rlang::is_integerish] vector of column
#' positions or a character vector of column names.
#'
#' * Verbs suffixed with `_if()` apply an operation on the subset of
#' variables for which a predicate function returns `TRUE`. Instead
#' of a predicate function, you can also supply a logical vector.
#'
#' @param .tbl A `tbl` object.
#' @param .funs A function `fun`, a quosure style lambda `~ fun(.)` or a list of either form.
#'
#' @param .vars A list of columns generated by [vars()],
#' a character vector of column names, a numeric vector of column
#' positions, or `NULL`.
#' @param .predicate A predicate function to be applied to the columns
#' or a logical vector. The variables for which `.predicate` is or
#' returns `TRUE` are selected. This argument is passed to
#' [rlang::as_function()] and thus supports quosure-style lambda
#' functions and strings representing function names.
#' @param ... Additional arguments for the function calls in
#' `.funs`. These are evaluated only once, with [tidy
#' dots][rlang::tidy-dots] support.
#'
#' @section Grouping variables:
#'
#' Most of these operations also apply on the grouping variables when
#' they are part of the selection. This includes:
#'
#' * [arrange_all()], [arrange_at()], and [arrange_if()]
#' * [distinct_all()], [distinct_at()], and [distinct_if()]
#' * [filter_all()], [filter_at()], and [filter_if()]
#' * [group_by_all()], [group_by_at()], and [group_by_if()]
#' * [select_all()], [select_at()], and [select_if()]
#'
#' This is not the case for summarising and mutating variants where
#' operations are *not* applied on grouping variables. The behaviour
#' depends on whether the selection is **implicit** (`all` and `if`
#' selections) or **explicit** (`at` selections). Grouping variables
#' covered by explicit selections (with [summarise_at()],
#' [mutate_at()], and [transmute_at()]) are always an error. For
#' implicit selections, the grouping variables are always ignored. In
#' this case, the level of verbosity depends on the kind of operation:
#'
#' * Summarising operations ([summarise_all()] and [summarise_if()])
#' ignore grouping variables silently because it is obvious that
#' operations are not applied on grouping variables.
#'
#' * On the other hand it isn't as obvious in the case of mutating
#' operations ([mutate_all()], [mutate_if()], [transmute_all()], and
#' [transmute_if()]). For this reason, they issue a message
#' indicating which grouping variables are ignored.
#'
#' @name scoped
NULL
#' Select variables
#'
#' @description
#' `vars()` was only needed for the scoped verbs, which have been superseded
#' by the use of [across()] in an existing verb. See `vignette("colwise")` for
#' details.
#'
#' This helper is intended to provide equivalent semantics to
#' [select()]. It is used for instance in scoped summarising and
#' mutating verbs ([mutate_at()] and [summarise_at()]).
#'
#' Note that verbs accepting a `vars()` specification also accept a
#' numeric vector of positions or a character vector of column names.
#'
#' @param ... <[`tidy-select`][dplyr_tidy_select]> Variables to include/exclude
#' in mutate/summarise. You can use same specifications as in [select()].
#' If missing, defaults to all non-grouping variables.
#' @seealso [all_vars()] and [any_vars()] for other quoting
#' functions that you can use with scoped verbs.
#' @export
vars <- function(...) {
quos(...)
}
#' Apply predicate to all variables
#'
#' @description
#' \Sexpr[results=rd, stage=render]{lifecycle::badge("superseded")}
#'
#' `all_vars()` and `any_vars()` were only needed for the scoped verbs, which
#' have been superseded by the use of [across()] in an existing verb. See
#' `vignette("colwise")` for details.
#'
#' These quoting functions signal to scoped filtering verbs
#' (e.g. [filter_if()] or [filter_all()]) that a predicate expression
#' should be applied to all relevant variables. The `all_vars()`
#' variant takes the intersection of the predicate expressions with
#' `&` while the `any_vars()` variant takes the union with `|`.
#'
#' @param expr <[`data-masking`][dplyr_data_masking]> An expression that
#' returns a logical vector, using `.` to refer to the "current" variable.
#' @seealso [vars()] for other quoting functions that you
#' can use with scoped verbs.
#' @export
all_vars <- function(expr) {
lifecycle::signal_superseded("1.0.0", "all_vars()", "across()")
structure(enquo(expr), class = c("all_vars", "quosure", "formula"))
}
#' @rdname all_vars
#' @export
any_vars <- function(expr) {
lifecycle::signal_superseded("1.0.0", "any_vars()")
structure(enquo(expr), class = c("any_vars", "quosure", "formula"))
}
#' @export
print.all_vars <- function(x, ...) {
cat("<predicate intersection>\n")
NextMethod()
}
#' @export
print.any_vars <- function(x, ...) {
cat("<predicate union>\n")
NextMethod()
}
# Requires tbl_vars() method
tbl_at_vars <- function(tbl, vars, .include_group_vars = FALSE) {
if (.include_group_vars) {
tibble_vars <- tbl_vars(tbl)
} else {
tibble_vars <- tbl_nongroup_vars(tbl)
}
if (is_null(vars)) {
character()
} else if (is_integerish(vars)) {
tibble_vars[vars]
} else if (is_quosures(vars) || is_character(vars)) {
out <- tidyselect::vars_select(tibble_vars, !!!vars)
if (!any(have_name(vars))) {
names(out) <- NULL
}
out
} else {
bad_args(".vars", "must be a character/numeric vector or a `vars()` object, ",
"not {friendly_type_of(vars)}."
)
}
}
tbl_at_syms <- function(tbl, vars, .include_group_vars = FALSE) {
vars <- tbl_at_vars(tbl, vars, .include_group_vars = .include_group_vars)
set_names(syms(vars), names(vars))
}
# Requires tbl_vars(), `[[`() and length() methods
tbl_if_vars <- function(.tbl, .p, .env, ..., .include_group_vars = FALSE) {
if (.include_group_vars) {
tibble_vars <- tbl_vars(.tbl)
} else {
tibble_vars <- tbl_nongroup_vars(.tbl)
}
if (is_logical(.p)) {
if (length(.p) != length(tibble_vars)) {
abort(c(
"`.p` is invalid.",
x = "`.p` should have the same size as the number of variables in the tibble.",
i = glue("`.p` is size {length(.p)}."),
i = glue("The tibble has {length(tibble_vars)} columns, {including} the grouping variables.", including = if (.include_group_vars) "including" else "non including")
))
}
return(syms(tibble_vars[.p]))
}
.tbl <- tbl_ptype(.tbl)
if (is_fun_list(.p) || is_list(.p)) {
if (length(.p) != 1) {
bad_args(".predicate", "must have length 1, not {length(.p)}.")
}
.p <- .p[[1]]
}
if (is_quosure(.p)) {
.p <- quo_as_function(.p)
} else {
.p <- as_function(.p, .env)
}
n <- length(tibble_vars)
selected <- new_logical(n)
for (i in seq_len(n)) {
column <- pull(.tbl, tibble_vars[[.env$i]])
cond <- eval_tidy(.p(column, ...))
if (!is.logical(cond) || length(cond) != 1) {
abort(c(
"`.p` is invalid.",
x = "`.p` should return a single logical.",
i = if(is.logical(cond)) {
glue("`.p` returns a size {length(cond)} <logical> for column `{tibble_vars[[i]]}`.")
} else {
glue("`.p` returns a <{vec_ptype_full(cond)}> for column `{tibble_vars[[i]]}`.")
}
))
}
selected[[i]] <- isTRUE(cond)
}
tibble_vars[selected]
}
tbl_if_syms <- function(.tbl, .p, .env, ..., .include_group_vars = FALSE) {
syms(tbl_if_vars(.tbl, .p, .env, ..., .include_group_vars = .include_group_vars))
}
#' Return a prototype of a tbl
#'
#' Used in `_if` functions to enable type-based selection even when the data
#' is lazily generated. Should either return the complete tibble, or if that
#' can not be computed quickly, a 0-row tibble where the columns are of
#' the correct type.
#'
#' @export
#' @keywords internal
tbl_ptype <- function(.data) {
UseMethod("tbl_ptype")
}
#' @export
tbl_ptype.default <- function(.data) {
if (inherits(.data, "tbl_lazy")) {
# TODO: remove once moved to dplyr
inform("Applying predicate on the first 100 rows")
collect(.data, n = 100)
} else {
.data
}
}
# The lambda must inherit from:
# - Execution environment (bound arguments with purrr lambda syntax)
# - Lexical environment (local variables)
# - Data mask (other columns)
#
# So we need:
# - Inheritance from closure -> lexical
# - A maskable quosure
as_inlined_function <- function(f, env, ...) {
# Process unquote operator at inlining time
f <- expr_interp(f)
# Transform to a purrr-like lambda
fn <- as_function(f, env = env)
body(fn) <- expr({
# Force all arguments
base::pairlist(...)
# Transform the lambda body into a maskable quosure inheriting
# from the execution environment
`_quo` <- rlang::quo(!!body(fn))
# Evaluate the quosure in the mask
rlang::eval_bare(`_quo`, base::parent.frame())
})
structure(fn, class = "inline_colwise_function", formula = f)
}