-
Notifications
You must be signed in to change notification settings - Fork 58
/
step-first.R
124 lines (113 loc) · 3.62 KB
/
step-first.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#' Create a "lazy" data.table for use with dplyr verbs
#'
#' @description
#' A lazy data.table captures the intent of dplyr verbs, only actually
#' performing computation when requested (with [collect()], [pull()],
#' [as.data.frame()], [data.table::as.data.table()], or [tibble::as_tibble()]).
#' This allows dtplyr to convert dplyr verbs into as few data.table expressions
#' as possible, which leads to a high performance translation.
#'
#' See `vignette("translation")` for the details of the translation.
#'
#' @param x A data table (or something can can be coerced to a data table).
#' @param immutable If `TRUE`, `x` is treated as immutable and will never
#' be modified by any code generated by dtplyr. Alternatively, you can set
#' `immutable = FALSE` to allow dtplyr to modify the input object.
#' @param name Optionally, supply a name to be used in generated expressions.
#' For expert use only.
#' @param key_by Set keys for data frame, using [select()] semantics (e.g.
#' `key_by = c(key1, key2)`.
#'
#' This uses [data.table::setkey()] to sort the table and build an index.
#' This will considerably improve performance for subsets, summaries, and
#' joins that use the keys.
#'
#' See `vignette("datatable-keys-fast-subset")` for more details.
#' @export
#' @aliases tbl_dt grouped_dt
#' @examples
#' library(dplyr, warn.conflicts = FALSE)
#'
#' mtcars2 <- lazy_dt(mtcars)
#' mtcars2
#' mtcars2 %>% select(mpg:cyl)
#' mtcars2 %>% select(x = mpg, y = cyl)
#' mtcars2 %>% filter(cyl == 4) %>% select(mpg)
#' mtcars2 %>% select(mpg, cyl) %>% filter(cyl == 4)
#' mtcars2 %>% mutate(cyl2 = cyl * 2, cyl4 = cyl2 * 2)
#' mtcars2 %>% transmute(cyl2 = cyl * 2, vs2 = vs * 2)
#' mtcars2 %>% filter(cyl == 8) %>% mutate(cyl2 = cyl * 2)
#'
#' # Learn more about translation in vignette("translation")
#' by_cyl <- mtcars2 %>% group_by(cyl)
#' by_cyl %>% summarise(mpg = mean(mpg))
#' by_cyl %>% mutate(mpg = mean(mpg))
#' by_cyl %>%
#' filter(mpg < mean(mpg)) %>%
#' summarise(hp = mean(hp))
lazy_dt <- function(x, name = NULL, immutable = TRUE, key_by = NULL) {
# in case `x` has an `as.data.table()` method but not a `group_vars()` method
groups <- tryCatch(group_vars(x), error = function(e) character())
if (!is.data.table(x)) {
if (!immutable) {
abort("`immutable` must be `TRUE` when `x` is not already a data table.")
}
x <- as.data.table(x)
copied <- TRUE
} else {
copied <- FALSE
}
key_by <- enquo(key_by)
key_vars <- unname(tidyselect::vars_select(names(x), !!key_by))
if (length(key_vars)) {
if (immutable && !copied) {
x <- data.table::copy(x)
}
data.table::setkeyv(x, key_vars)
}
step_first(x, name = name, groups = groups, immutable = immutable, env = caller_env())
}
#' @export
dim.dtplyr_step_first <- function(x) {
dim(x$parent)
}
step_first <- function(parent, name = NULL, groups = character(),
immutable = TRUE, env = caller_env()) {
stopifnot(is.data.table(parent))
if (is.null(name)) {
name <- unique_name()
}
new_step(parent,
vars = names(parent),
groups = groups,
locals = list(),
implicit_copy = !immutable,
needs_copy = FALSE,
name = sym(name),
env = env,
class = "dtplyr_step_first"
)
}
#' @export
dt_call.dtplyr_step_first <- function(x, needs_copy = FALSE) {
if (needs_copy) {
expr(copy(!!x$name))
} else {
x$name
}
}
#' @export
dt_sources.dtplyr_step_first <- function(x) {
stats::setNames(list(x$parent), as.character(x$name))
}
#' @export
dt_has_computation.dtplyr_step_first <- function(x) {
FALSE
}
unique_name <- local({
i <- 0
function() {
i <<- i + 1
paste0("_DT", i)
}
})