Permalink
Branch: master
Find file Copy path
216 lines (203 sloc) 7.72 KB
#' Operate on a selection of variables
#'
#' The variants suffixed with `_if`, `_at` or `_all` apply an
#' expression (sometimes several) to all variables within a specified
#' subset. This subset can contain all variables (`_all` variants), a
#' [vars()] selection (`_at` variants), or variables selected with a
#' predicate (`_if` variants).
#'
#' The verbs with scoped variants are:
#'
#' * [mutate()], [transmute()] and [summarise()]. See [summarise_all()].
#' * [filter()]. See [filter_all()].
#' * [group_by()]. See [group_by_all()].
#' * [rename()] and [select()]. See [select_all()].
#' * [arrange()]. See [arrange_all()]
#'
#' There are three kinds of scoped variants. They differ in the scope
#' of the variable selection on which operations are applied:
#'
#' * Verbs suffixed with `_all()` apply an operation on all variables.
#'
#' * Verbs suffixed with `_at()` apply an operation on a subset of
#' variables specified with the quoting function [vars()]. This
#' quoting function accepts [tidyselect::vars_select()] helpers like
#' [starts_with()]. Instead of a [vars()] selection, you can also
#' supply an [integerish][rlang::is_integerish] vector of column
#' positions or a character vector of column names.
#'
#' * Verbs suffixed with `_if()` apply an operation on the subset of
#' variables for which a predicate function returns `TRUE`. Instead
#' of a predicate function, you can also supply a logical vector.
#'
#' @param .tbl A `tbl` object.
#' @param .funs A function `fun`, a quosure style lambda `~ fun(.)` or a list of either form.
#'
#' @param .vars A list of columns generated by [vars()],
#' a character vector of column names, a numeric vector of column
#' positions, or `NULL`.
#' @param .predicate A predicate function to be applied to the columns
#' or a logical vector. The variables for which `.predicate` is or
#' returns `TRUE` are selected. This argument is passed to
#' [rlang::as_function()] and thus supports quosure-style lambda
#' functions and strings representing function names.
#' @param ... Additional arguments for the function calls in
#' `.funs`. These are evaluated only once, with [tidy
#' dots][rlang::tidy-dots] support.
#'
#' @section Grouping variables:
#'
#' Most of these operations also apply on the grouping variables when
#' they are part of the selection. This includes:
#'
#' * [arrange_all()], [arrange_at()], and [arrange_if()]
#' * [distinct_all()], [distinct_at()], and [distinct_if()]
#' * [filter_all()], [filter_at()], and [filter_if()]
#' * [group_by_all()], [group_by_at()], and [group_by_if()]
#' * [select_all()], [select_at()], and [select_if()]
#'
#' This is not the case for summarising and mutating variants where
#' operations are *not* applied on grouping variables. The behaviour
#' depends on whether the selection is **implicit** (`all` and `if`
#' selections) or **explicit** (`at` selections). Grouping variables
#' covered by explicit selections (with [summarise_at()],
#' [mutate_at()], and [transmute_at()]) are always an error. For
#' implicit selections, the grouping variables are always ignored. In
#' this case, the level of verbosity depends on the kind of operation:
#'
#' * Summarising operations ([summarise_all()] and [summarise_if()])
#' ignore grouping variables silently because it is obvious that
#' operations are not applied on grouping variables.
#'
#' * On the other hand it isn't as obvious in the case of mutating
#' operations ([mutate_all()], [mutate_if()], [transmute_all()], and
#' [transmute_if()]). For this reason, they issue a message
#' indicating which grouping variables are ignored.
#'
#' @name scoped
NULL
#' Select variables
#'
#' This helper is intended to provide equivalent semantics to
#' [select()]. It is used for instance in scoped summarising and
#' mutating verbs ([mutate_at()] and [summarise_at()]).
#'
#' Note that verbs accepting a `vars()` specification also accept a
#' numeric vector of positions or a character vector of column names.
#'
#' @param ... Variables to include/exclude in mutate/summarise. You
#' can use same specifications as in [select()]. If missing,
#' defaults to all non-grouping variables.
#'
#' These arguments are automatically [quoted][rlang::quo] and later
#' [evaluated][rlang::eval_tidy] in the context of the data
#' frame. They support [unquoting][rlang::quasiquotation]. See
#' `vignette("programming")` for an introduction to these concepts.
#' @seealso [all_vars()] and [any_vars()] for other quoting
#' functions that you can use with scoped verbs.
#' @export
vars <- function(...) {
quos(...)
}
#' Apply predicate to all variables
#'
#' These quoting functions signal to scoped filtering verbs
#' (e.g. [filter_if()] or [filter_all()]) that a predicate expression
#' should be applied to all relevant variables. The `all_vars()`
#' variant takes the intersection of the predicate expressions with
#' `&` while the `any_vars()` variant takes the union with `|`.
#'
#' @param expr A predicate expression. This variable supports
#' [unquoting][rlang::quasiquotation] and will be evaluated in the
#' context of the data frame. It should return a logical vector.
#'
#' This argument is automatically [quoted][rlang::quo] and later
#' [evaluated][rlang::eval_tidy] in the context of the data
#' frame. It supports [unquoting][rlang::quasiquotation]. See
#' `vignette("programming")` for an introduction to these concepts.
#' @seealso [vars()] for other quoting functions that you
#' can use with scoped verbs.
#' @export
all_vars <- function(expr) {
structure(enquo(expr), class = c("all_vars", "quosure", "formula"))
}
#' @rdname all_vars
#' @export
any_vars <- function(expr) {
structure(enquo(expr), class = c("any_vars", "quosure", "formula"))
}
#' @export
print.all_vars <- function(x, ...) {
cat("<predicate intersection>\n")
NextMethod()
}
#' @export
print.any_vars <- function(x, ...) {
cat("<predicate union>\n")
NextMethod()
}
# Requires tbl_vars() method
tbl_at_vars <- function(tbl, vars, .include_group_vars = FALSE) {
if (.include_group_vars) {
tibble_vars <- sel_vars(tbl)
} else {
tibble_vars <- tbl_nongroup_vars(tbl)
}
if (is_null(vars)) {
character()
} else if (is_character(vars)) {
vars
} else if (is_integerish(vars)) {
tibble_vars[vars]
} else if (is_quosures(vars)) {
out <- tidyselect::vars_select(tibble_vars, !!!vars)
if (!any(have_name(vars))) {
names(out) <- NULL
}
out
} else {
bad_args(".vars", "must be a character/numeric vector or a `vars()` object, ",
"not {friendly_type_of(vars)}"
)
}
}
tbl_at_syms <- function(tbl, vars, .include_group_vars = FALSE) {
vars <- tbl_at_vars(tbl, vars, .include_group_vars = .include_group_vars)
set_names(syms(vars), names(vars))
}
# Requires tbl_vars(), `[[`() and length() methods
tbl_if_vars <- function(.tbl, .p, .env, ..., .include_group_vars = FALSE) {
if (.include_group_vars) {
tibble_vars <- tbl_vars(.tbl)
} else {
tibble_vars <- tbl_nongroup_vars(.tbl)
}
if (is_logical(.p)) {
stopifnot(length(.p) == length(tibble_vars))
return(syms(tibble_vars[.p]))
}
if (inherits(.tbl, "tbl_lazy")) {
inform("Applying predicate on the first 100 rows")
.tbl <- collect(.tbl, n = 100)
}
if (is_fun_list(.p)) {
if (length(.p) != 1) {
bad_args(".predicate", "must have length 1, not {length(.p)}")
}
.p <- .p[[1]]
}
if (is_quosure(.p)) {
.p <- quo_as_function(.p)
} else {
.p <- as_function(.p, .env)
}
n <- length(tibble_vars)
selected <- new_logical(n)
for (i in seq_len(n)) {
selected[[i]] <- .p(.tbl[[tibble_vars[[i]]]], ...)
}
tibble_vars[selected]
}
tbl_if_syms <- function(.tbl, .p, .env, ..., .include_group_vars = FALSE) {
syms(tbl_if_vars(.tbl, .p, .env, ..., .include_group_vars = .include_group_vars))
}