Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ export(validate_outcomes_are_numeric)
export(validate_outcomes_are_univariate)
export(validate_prediction_size)
export(validate_predictors_are_numeric)
export(weighted_table)
import(rlang)
import(vctrs)
importFrom(glue,glue)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# hardhat (development version)

* New `weighted_table()` for generating a weighted contingency table, similar to
`table()` (#191).

* Bumped required R version to `>= 3.4.0` to reflect tidyverse standards.

# hardhat 0.2.0
Expand Down
134 changes: 134 additions & 0 deletions R/table.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#' Weighted table
#'
#' @description
#' `weighted_table()` computes a weighted contingency table based on factors
#' provided in `...` and a double vector of weights provided in `weights`. It
#' can be seen as a weighted extension to [base::table()] and an alternative
#' to [stats::xtabs()].
#'
#' `weighted_table()` always uses the _exact_ set of levels returned by
#' `levels()` when constructing the table. This results in the following
#' properties:
#'
#' - Missing values found in the factors are never included in the table unless
#' there is an explicit `NA` factor level. If needed, this can be added to a
#' factor with [base::addNA()] or `forcats::fct_expand(x, NA)`.
#'
#' - Levels found in the factors that aren't actually used in the underlying
#' data are included in the table with a value of `0`. If needed, you can
#' drop unused factor levels by re-running your factor through [factor()],
#' or by calling `forcats::fct_drop()`.
#'
#' See the examples section for more information about these properties.
#'
#' @details
#' The result of `weighted_table()` does not have a `"table"` class attached
#' to it. It is only a double array. This is because "table" objects are
#' defined as containing integer counts, but weighted tables can utilize
#' fractional weights.
#'
#' @param ... Factors of equal length to use in the weighted table. If the
#' `...` are named, those names will propagate onto the "dimnames names" of
#' the resulting table. At least one factor must be provided.
#'
#' @param weights A double vector of weights used to fill the cells of the
#' weighted table. This must be the same length as the factors provided in
#' `...`.
#'
#' @param na_remove A single `TRUE` or `FALSE` for handling whether or not
#' missing values in `weights` should be removed when summing up the weights.
#'
#' @return
#' The weighted table as an array of double values.
#'
#' @export
#' @examples
#' x <- factor(c("x", "y", "z", "x", "x", "y"))
#' y <- factor(c("a", "b", "a", "a", "b", "b"))
#' w <- c(1.5, 2, 1.1, .5, 3, 2)
#'
#' weighted_table(x = x, y = y, weights = w)
#'
#' # ---------------------------------------------------------------------------
#' # If `weights` contains missing values, then missing values will be
#' # propagated into the weighted table
#' x <- factor(c("x", "y", "y"))
#' y <- factor(c("a", "b", "b"))
#' w <- c(1, NA, 3)
#'
#' weighted_table(x = x, y = y, weights = w)
#'
#' # You can remove the missing values while summing up the weights with
#' # `na_remove = TRUE`
#' weighted_table(x = x, y = y, weights = w, na_remove = TRUE)
#'
#' # ---------------------------------------------------------------------------
#' # If there are missing values in the factors, those typically don't show
#' # up in the weighted table
#' x <- factor(c("x", NA, "y", "x"))
#' y <- factor(c("a", "b", "a", NA))
#' w <- 1:4
#'
#' weighted_table(x = x, y = y, weights = w)
#'
#' # This is because the missing values aren't considered explicit levels
#' levels(x)
#'
#' # You can force them to show up in the table by using `addNA()` ahead of time
#' # (or `forcats::fct_expand(x, NA)`)
#' x <- addNA(x, ifany = TRUE)
#' y <- addNA(y, ifany = TRUE)
#' levels(x)
#'
#' weighted_table(x = x, y = y, weights = w)
#'
#' # ---------------------------------------------------------------------------
#' # If there are levels in your factors that aren't actually used in the
#' # underlying data, then they will still show up in the table with a `0` value
#' x <- factor(c("x", "y", "x"), levels = c("x", "y", "z"))
#' y <- factor(c("a", "b", "a"), levels = c("a", "b", "c"))
#' w <- 1:3
#'
#' weighted_table(x = x, y = y, weights = w)
#'
#' # If you want to drop these empty factor levels from the result, you can
#' # rerun `factor()` ahead of time to drop them (or `forcats::fct_drop()`)
#' x <- factor(x)
#' y <- factor(y)
#' levels(x)
#'
#' weighted_table(x = x, y = y, weights = w)
weighted_table <- function(..., weights, na_remove = FALSE) {
args <- list2(...)
n_args <- length(args)

if (n_args == 0L) {
abort("At least one vector must be supplied to `...`.")
}
if (!all(map_lgl(args, is.factor))) {
abort("All elements of `...` must be factors.")
}

sizes <- list_sizes(args)
size <- sizes[[1L]]

if (!all(sizes == size)) {
abort("All elements of `...` must be the same size.")
}

weights <- vec_cast(weights, to = double())
vec_assert(weights, size = size)

if (!is_bool(na_remove)) {
abort("`na_remove` must be a single `TRUE` or `FALSE`.")
}

tapply(
X = weights,
INDEX = args,
FUN = sum,
na.rm = na_remove,
default = 0,
simplify = TRUE
)
}
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ reference:
- standardize
- new_model
- add_intercept_column
- weighted_table
- run_mold

- title: Validation
Expand Down
107 changes: 107 additions & 0 deletions man/weighted_table.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

49 changes: 49 additions & 0 deletions tests/testthat/_snaps/table.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# `na_remove` is validated

Code
weighted_table(x, y, weights = w, na_remove = c(TRUE, FALSE))
Error <rlang_error>
`na_remove` must be a single `TRUE` or `FALSE`.

---

Code
weighted_table(x, y, weights = w, na_remove = 1)
Error <rlang_error>
`na_remove` must be a single `TRUE` or `FALSE`.

# requires at least one `...`

Code
weighted_table(weights = w)
Error <rlang_error>
At least one vector must be supplied to `...`.

# requires all `...` to be factors

Code
weighted_table(1, weights = w)
Error <rlang_error>
All elements of `...` must be factors.

# requires all `...` to be the same size

Code
weighted_table(x, y, weights = w)
Error <rlang_error>
All elements of `...` must be the same size.

# requires all `weights` to be the same size as `...` elements

Code
weighted_table(x, y, weights = w)
Error <vctrs_error_assert_size>
`weights` must have size 3, not size 4.

# requires `weights` to be castable to double

Code
weighted_table(x, weights = "a")
Error <vctrs_error_incompatible_type>
Can't convert <character> to <double>.

Loading