Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
110 lines (100 sloc) 3.23 KB
#' Create a "lazy" data.table for use with dplyr verbs
#'
#' @description
#' A lazy data.table lazy captures the intent of dplyr verbs, only actually
#' performing computation when requested (with [collect()], [pull()],
#' [as.data.frame()], [data.table::as.data.table()], or [tibble::as_tibble()]).
#' This allows dtplyr to convert dplyr verbs into as few data.table expressions
#' as possible, which leads to a high performance translation.
#'
#' See `vignette("translation")` for the details of the translation.
#'
#' @param x A data table (or something can can be coerced to a data table).
#' @param immutable If `TRUE`, `x` is treated as immutable and will never
#' be modified by any code generated by dtplyr. Alternatively, you can set
#' `immutable = FALSE` to allow dtplyr to modify the input object.
#' @param name Optionally, supply a name to be used in generated expressions.
#' For expert use only.
#' @param key_by Set keys for data frame, using [select()] semantics (e.g.
#' `key_by = c(key1, key2)`.
#'
#' This uses [data.table::setkey()] to sort the table and build an index.
#' This will considerably improve performance for subsets, summaries, and
#' joins that use the keys.
#'
#' See `vignette("datatable-keys-fast-subset")` for more details.
#' @export
#' @aliases tbl_dt grouped_dt
#' @examples
#' library(dplyr, warn.conflicts = FALSE)
#'
#' mtcars2 <- lazy_dt(mtcars)
#' mtcars2
#' mtcars2 %>% select(mpg:cyl)
#' mtcars2 %>% select(x = mpg, y = cyl)
#' mtcars2 %>% filter(cyl == 4) %>% select(mpg)
#' mtcars2 %>% select(mpg, cyl) %>% filter(cyl == 4)
#' mtcars2 %>% mutate(cyl2 = cyl * 2, cyl4 = cyl2 * 2)
#' mtcars2 %>% transmute(cyl2 = cyl * 2, vs2 = vs * 2)
#' mtcars2 %>% filter(cyl == 8) %>% mutate(cyl2 = cyl * 2)
#'
#' by_cyl <- mtcars2 %>% group_by(cyl)
#' by_cyl %>% summarise(mpg = mean(mpg))
#' by_cyl %>% mutate(mpg = mean(mpg))
#' by_cyl %>% filter(mpg < mean(mpg)) %>% summarise(hp = mean(hp))
lazy_dt <- function(x, name = NULL, immutable = TRUE, key_by = NULL) {
if (!is.data.table(x)) {
if (!immutable) {
abort("`immutable` must be `TRUE` when `x` is not already a data table.")
}
x <- as.data.table(x)
copied <- TRUE
} else {
copied <- FALSE
}
key_by <- enquo(key_by)
key_vars <- unname(tidyselect::vars_select(names(x), !!key_by))
if (length(key_vars)) {
if (immutable && !copied) {
x <- copy(x)
}
data.table::setkeyv(x, key_vars)
}
step_first(x, name = name, immutable = immutable, env = caller_env())
}
#' @export
dim.dtplyr_step_first <- function(x) {
dim(x$parent)
}
step_first <- function(parent, name = NULL, immutable = TRUE, env = caller_env()) {
stopifnot(is.data.table(parent))
if (is.null(name)) {
name <- unique_name()
}
new_step(parent,
vars = names(parent),
groups = character(),
implicit_copy = !immutable,
needs_copy = FALSE,
name = sym(name),
env = env,
class = "dtplyr_step_first"
)
}
dt_call.dtplyr_step_first <- function(x, needs_copy = FALSE) {
if (needs_copy) {
expr(copy(!!x$name))
} else {
x$name
}
}
dt_sources.dtplyr_step_first <- function(x) {
stats::setNames(list(x$parent), as.character(x$name))
}
unique_name <- local({
i <- 0
function() {
i <<- i + 1
paste0("_DT", i)
}
})
You can’t perform that action at this time.