/
extract.R
136 lines (135 loc) · 4.61 KB
/
extract.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#' @rdname extract
#' @export
extract <- function(...) {
UseMethod("extract")
}
#' Extract branched values.
#'
#' \code{extract} returns a tibble of selected values
#' across the multiverse in a long format.
#'
#' @examples
#' # Define mutate branches.
#' hurricane_strength <- mutate_branch(
#' # damage vs. wind speed vs.pressure
#' NDAM,
#' HighestWindSpeed,
#' Minpressure_Updated_2014,
#' # Standardized versions
#' scale(NDAM),
#' scale(HighestWindSpeed),
#' -scale(Minpressure_Updated_2014),
#' )
#' y <- mutate_branch(
#' alldeaths, log(alldeaths + 1)
#' )
#' # Create a mverse and add the branches.
#' mv <- create_multiverse(hurricane) %>%
#' add_mutate_branch(hurricane_strength, y)
#' execute_multiverse(mv)
#' # Extract all branched columns from all universes
#' extract(mv)
#' # Specify the columns to extract from each universe using columns
#' # You can select both branched and non-branched columns
#' extract(mv, columns = c("hurricane_strength", "NDAM"))
#' # Specify the universe to extract from using universe
#' extract(mv, universe = 1)
#' # Specify the number of universes to extract from using nuni
#' # The universes are randomly selected
#' extract(mv, nuni = 3)
#' # Specify the proportion of data to extract from each universe using
#' # frow. The rows are randomly selected
#' extract(mv, frow = 0.7)
#' @param .mverse a \code{mverse} object.
#' @param columns a character vector of column names to extract.
#' @param nuni a positive integer for the number of universes to extract.
#' @param frow proportion of rows to extract from each universe.
#' @param include_branch_options when \code{TRUE} (default), include the mutate
#' statements used to specified the options for each branched columns
#' @param ... Ignored.
#' @details This method extracts data values across
#' the multiverse. You can specify a subset of data
#' to extract using \code{columns}, \code{universe},
#' \code{nuni}, and \code{frow}.
#'
#' You can specify the columns to extract from each
#' universe by passing the column names as a character
#' vector to \code{columns}. The default values is
#' \code{NULL} extracting all columns with branches.
#'
#' Use \code{universe} to specify a set of universes
#' by their integer ids. Use \code{nuni} to specify
#' the number of universes to extract data from. The
#' method then selects the subset randomly. Specifying
#' \code{universe} manually will override \code{nuni} value.
#' By default, they are both set to \code{NULL} and
#' the method returns data from all universes.
#'
#' Use \code{frow} to randomly extract a fraction of
#' data from each universe. The default value is \code{NULL}
#' and all rows are returned as they are. Note if you select
#' 1 the method will return shuffle rows in each universe
#' before returning them. If \code{frow} is greater than 1,
#' the method randomly samples rows with replacement.
#' @return a tibble containing the selected columns
#' across the multiverse.
#' @name extract
#' @export
extract.mverse <- function(.mverse, columns = NULL,
nuni = NULL, frow = NULL,
include_branch_options = TRUE, ...) {
stopifnot(inherits(.mverse, "mverse"))
mtable <- summary.mverse(.mverse)
branched_columns <- unlist(
sapply(
c(
attr(.mverse, "branches_list"),
attr(.mverse, "branches_conditioned_list")
),
function(x) if (inherits(x, "mutate_branch")) x$name
)
)
if (is.null(columns)) {
columns <- branched_columns
} else {
branched_columns <- branched_columns[branched_columns %in% columns]
}
columns <- c("universe", columns)
universe <- mtable$universe
if (!is.null(nuni)) {
if (length(nuni) > 1) {
warning("Only the first value of nuni is used.")
nuni <- nuni[1]
}
stopifnot(
"nuni must be less than or equal to the number of universes." =
length(universe) >= nuni
)
universe <- sample(universe, nuni)
}
stopifnot(length(universe) > 0)
extracted <- lapply(
universe, function(x) {
ex <- multiverse::extract_variable_from_universe(.mverse, x, .data_mverse)
if (!is.null(frow)) {
stopifnot("frow must be a positive value." = frow > 0)
ex <- dplyr::sample_frac(ex,
size = frow,
replace = (frow > 1)
)
}
ex$universe <- x
ex
}
)
extracted <- dplyr::bind_rows(extracted)
if (include_branch_options) {
extracted %>%
dplyr::left_join(mtable, by = "universe") %>%
dplyr::select(
dplyr::all_of(c(columns, paste0(branched_columns, "_branch")))
)
} else {
dplyr::select(dplyr::all_of(c(extracted, columns)))
}
}