/
randomizer.R
142 lines (131 loc) · 5.12 KB
/
randomizer.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#' Randomize cases into experimental conditions
#'
#' @details Wrapper for the complete_ra and block_ra from the randomizr package. See \url{https://radiant-rstats.github.io/docs/design/randomizer.html} for an example in Radiant
#'
#' @param dataset Dataset to sample from
#' @param vars The variables to sample
#' @param conditions Conditions to assign to
#' @param blocks A vector to use for blocking or a data.frame from which to construct a blocking vector
#' @param probs A vector of assignment probabilities for each treatment conditions. By default each condition is assigned with equal probability
#' @param label Name to use for the generated condition variable
#' @param seed Random seed to use as the starting point
#' @param data_filter Expression entered in, e.g., Data > View to filter the dataset in Radiant. The expression should be a string (e.g., "price > 10000")
#' @param arr Expression to arrange (sort) the data on (e.g., "color, desc(price)")
#' @param rows Rows to select from the specified dataset
#' @param na.rm Remove rows with missing values (FALSE or TRUE)
#' @param envir Environment to extract data from
#'
#' @return A list of variables defined in randomizer as an object of class randomizer
#'
#' @importFrom randomizr complete_ra block_ra
#' @importFrom dplyr select_at bind_cols
#' @importFrom magrittr set_colnames
#'
#' @examples
#' randomizer(rndnames, "Names", conditions = c("test", "control")) %>% str()
#'
#' @seealso \code{\link{summary.sampling}} to summarize results
#' @export
randomizer <- function(dataset, vars,
conditions = c("A", "B"),
blocks = NULL, probs = NULL,
label = ".conditions",
seed = 1234,
data_filter = "",
arr = "",
rows = NULL,
na.rm = FALSE,
envir = parent.frame()) {
df_name <- if (is_string(dataset)) dataset else deparse(substitute(dataset))
if (!is.empty(blocks)) {
vars <- c(vars, blocks)
}
dataset <- get_data(dataset, vars, filt = data_filter, arr = arr, rows = rows, na.rm = na.rm, envir = envir)
## use seed if provided
seed <- gsub("[^0-9]", "", seed)
if (!is.empty(seed)) set.seed(seed)
if (is.empty(probs)) {
probs <- length(conditions) %>%
(function(x) rep(1 / x, x))
} else if (length(probs) == 1) {
probs <- rep(probs, length(conditions))
} else if (length(probs) != length(conditions)) {
probs <- NULL
}
if (length(blocks) > 0) {
blocks_vct <- do.call(paste, c(select_at(dataset, .vars = blocks), sep = "-"))
cond <- randomizr::block_ra(blocks = blocks_vct, conditions = conditions, prob_each = probs) %>%
as.data.frame() %>%
set_colnames(label)
} else {
cond <- randomizr::complete_ra(N = nrow(dataset), conditions = conditions, prob_each = probs) %>%
as.data.frame() %>%
set_colnames(label)
}
dataset <- bind_cols(cond, dataset)
# removing unneeded arguments
rm(cond, envir)
as.list(environment()) %>% add_class("randomizer")
}
#' Summary method for the randomizer function
#'
#' @details See \url{https://radiant-rstats.github.io/docs/design/randomizer.html} for an example in Radiant
#'
#' @param object Return value from \code{\link{randomizer}}
#' @param dec Number of decimals to show
#' @param ... further arguments passed to or from other methods
#'
#' @importFrom stats addmargins
#' @importFrom dplyr distinct
#'
#' @examples
#' randomizer(rndnames, "Names", conditions = c("test", "control")) %>% summary()
#'
#' @seealso \code{\link{randomizer}} to generate the results
#'
#' @export
summary.randomizer <- function(object, dec = 3, ...) {
if (is.empty(object$blocks)) {
cat("Random assignment (simple random)\n")
} else {
cat("Random assignment (blocking)\n")
}
cat("Data :", object$df_name, "\n")
if (!is.empty(object$data_filter)) {
cat("Filter :", gsub("\\n", "", object$data_filter), "\n")
}
if (!is.empty(object$arr)) {
cat("Arrange :", gsub("\\n", "", object$arr), "\n")
}
if (!is.empty(object$rows)) {
cat("Slice :", gsub("\\n", "", object$rows), "\n")
}
if (!is.empty(object$blocks)) {
cat("Variables :", setdiff(object$vars, object$blocks), "\n")
cat("Blocks :", object$blocks, "\n")
} else {
cat("Variables :", object$vars, "\n")
}
cat("Conditions :", object$conditions, "\n")
cat("Probabilities:", round(object$probs, dec), "\n")
if (!is.empty(object$seed)) {
cat("Random seed :", object$seed, "\n")
}
is_unique <- object$dataset[, -1, drop = FALSE] %>%
(function(x) ifelse(nrow(x) > nrow(distinct(x)), "Based on selected variables some duplicate rows exist", "Based on selected variables, no duplicate rows exist"))
cat("Duplicates :", is_unique, "\n\n")
cat("Assigment frequencies:\n")
if (is.empty(object$blocks_vct)) {
tab <- table(object$dataset[[object$label]])
} else {
tab <- table(object$blocks_vct, object$dataset[[object$label]])
}
tab %>%
addmargins() %>%
print()
cat("\nAssigment proportions:\n")
tab %>%
prop.table() %>%
round(dec) %>%
print()
}