/
debit-map.R
190 lines (162 loc) · 5.93 KB
/
debit-map.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
#' Apply DEBIT to many cases
#'
#' @description Call `debit_map()` to use DEBIT on multiple combinations of
#' mean, standard deviation, and sample size of binary distributions. Mapping
#' function for [`debit()`].
#'
#' For summary statistics, call [`audit()`] on the results.
#'
#' @param data Data frame.
#' @param x,sd,n Optionally, specify these arguments as column names in `data`.
#' @param rounding,threshold,symmetric Arguments passed on to [`debit()`], with
#' the same defaults.
#' @param show_rec If set to `FALSE`, the resulting tibble only includes the
#' columns `x`, `sd`, `n`, and `consistency`. Default is `TRUE`.
#' @param extra Not currently used.
#'
#' @importFrom rlang .data
#'
#' @include debit.R
#' @return A tibble with (at least) these columns --
#' - `x`, `sd`, `n`: the inputs.
#' - `consistency`: DEBIT consistency of `x`, `sd`, and `n`.
#'
#' By default, the tibble also includes the rounding method, boundary values,
#' and information about the boundary values being inclusive or not. The
#' tibble has the `scr_debit_map` class, which is recognized by the `audit()`
#' generic.
#'
#' @section Summaries with [`audit()`]: There is an S3 method for the
#' [`audit()`] generic, so you can call [`audit()`] following `debit_map()`.
#' It returns a tibble with these columns ---
#'
#' 1. `incons_cases`: the number of DEBIT-inconsistent cases.
#' 2. `all_cases`: the total number of cases.
#' 3. `incons_rate`: the rate of inconsistent cases.
#' 4. `mean_x`: the mean `x` (mean) value.
#' 5. `mean_sd`: the mean `sd` value.
#' 6. `distinct_n`: the number of distinct `n` values.
#' @references Heathers, James A. J., and Brown, Nicholas J. L. 2019. DEBIT: A
#' Simple Consistency Test For Binary Data. https://osf.io/5vb3u/.
#'
#' @export
#'
#' @examples
#' # Call `debit_map()` on binary summary
#' # data such as these:
#' pigs3
#'
#' # The `consistency` column shows
#' # whether the values to its left
#' # are DEBIT-consistent:
#' pigs3 %>%
#' debit_map()
#'
#' # Get test summaries with `audit()`:
#' pigs3 %>%
#' debit_map() %>%
#' audit()
debit_map <- function(data, x = NULL, sd = NULL, n = NULL,
rounding = "up_or_down", threshold = 5,
symmetric = FALSE, show_rec = TRUE, extra = Inf) {
# If any two arguments called right below are length > 1, they need to have
# the same length. Otherwise, the call will fail. But even so, there will be a
# warning that values will get paired:
check_lengths_congruent(list(rounding, threshold, symmetric))
if (!missing(x)) {
x <- rlang::enexpr(x)
data <- manage_key_colnames(data, x, "mean/proportion")
}
if (!missing(sd)) {
sd <- rlang::enexpr(sd)
data <- manage_key_colnames(data, sd, "standard deviation")
}
if (!missing(n)) {
n <- rlang::enexpr(n)
data <- manage_key_colnames(data, n, "sample size")
}
# Check the column names of `data`:
check_mapper_input_colnames(data, c("x", "sd", "n"), "DEBIT")
check_tibble(data)
x_spec <- x
sd_spec <- sd
n_spec <- n
# Provide a way to specify the mean (`x`) column from within a function call
# even if the column in question is not named `x`:
if (!is.null(x)) {
x_orig <- x
data <- dplyr::mutate(data, x = {{ x }})
}
# Same with the `sd` column...
if (!is.null(sd)) {
sd_orig <- sd # rlang::expr_text(sd)
data <- dplyr::mutate(data, sd = {{ sd }})
}
# ... and with the sample size (`n`) column:
if (!is.null(n)) {
n_orig <- n # rlang::expr_text(n)
data <- dplyr::mutate(data, n = {{ n }})
}
# Turn `x` and `sd` into the `data` columns by those names to make them more
# easy to work with:
x <- data$x
sd <- data$sd
# With the reported means and standard deviations (`x` and `sd`) now being
# columns in `data` (if they weren't before), some checks are in order. These
# use internal helper functions from the utils.R file. First, since trailing
# zeros matter for DEBIT, make sure both vectors are strings...
if (!is.null(x)) check_type(x, "character")
if (!is.null(sd)) check_type(sd, "character")
# ...and second, check whether they range from 0 to 1:
check_debit_inputs_all(x, sd)
# Create `other_cols`, which contains any and all extra columns from `data`
# (i.e., those which play no role in DEBIT):
if (ncol(data) > 3L) {
other_cols <- data %>%
dplyr::select(-x, -sd, -n)
} else {
other_cols <- NULL
}
# Run checks and isolate the desired extra columns, as specified by the
# `extra` argument (default is `Inf`, i.e., all extra columns):
extra_cols <- manage_extra_cols(data, extra, other_cols)
# Prepare input vectors for the resulting tibble:
sd <- data$sd
x <- data$x
n <- data$n
# Compute the DEBIT results and construct the resulting tibble:
results <- data %>%
dplyr::select(sd, x, n) %>%
purrr::pmap_dfr(
debit_table,
rounding = rounding, threshold = threshold,
symmetric = symmetric
)
# Finally, return the results, with or without the intermediary values
# (rounding method, boundary values, and Logical information about the
# boundary values being inclusive or not):
if (show_rec) {
out <- results %>%
dplyr::mutate(
x = x, n = n, consistency = consistency
) %>%
dplyr::select(
x, sd, n, consistency, rounding,
sd_lower, sd_incl_lower, sd_upper,
sd_incl_upper, x_lower, x_upper
)
} else {
out <- results %>%
dplyr::mutate(
sd = sd, x = x, n = n, consistency = consistency
) %>%
dplyr::select(x, sd, n, consistency)
}
if (length(extra_cols) > 0L) out <- dplyr::mutate(out, extra_cols)
if (!is.null(x_spec)) out <- dplyr::select(out, -all_of(x_orig))
if (!is.null(sd_spec)) out <- dplyr::select(out, -all_of(sd_orig))
if (!is.null(n_spec)) out <- dplyr::select(out, -all_of(n_orig))
rounding_class <- glue::glue("scr_rounding_{rounding}")
out <- add_class(out, c("scr_debit_map", rounding_class))
return(out)
}