Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ export(new_importance_weights)
export(new_model)
export(new_recipe_blueprint)
export(new_xy_blueprint)
export(recompose)
export(refresh_blueprint)
export(run_forge)
export(run_mold)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# hardhat (development version)

* New internal `recompose()` helper (#220).

* `default_recipe_blueprint()` has gained a `strings_as_factors` argument, which
is passed on to `recipes::prep()` (#212).

Expand Down
4 changes: 2 additions & 2 deletions R/blueprint-formula-default.R
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,7 @@ mold_formula_default_process_predictors <- function(blueprint, data) {

terms <- simplify_terms(framed$terms)

predictors <- recompose(predictors, blueprint$composition)
predictors <- recompose(predictors, composition = blueprint$composition)

blueprint_terms <- blueprint$terms
blueprint_terms$predictors <- terms
Expand Down Expand Up @@ -704,7 +704,7 @@ forge_formula_default_process_predictors <- function(blueprint, predictors) {
data <- reattach_factorish_columns(data, predictors, factorish_names)
}

data <- recompose(data, blueprint$composition)
data <- recompose(data, composition = blueprint$composition)

offset <- extract_offset(framed$terms, framed$data)

Expand Down
4 changes: 2 additions & 2 deletions R/blueprint-recipe-default.R
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ mold_recipe_default_process_predictors <- function(blueprint, data) {

predictors <- maybe_add_intercept_column(predictors, blueprint$intercept)

predictors <- recompose(predictors, blueprint$composition)
predictors <- recompose(predictors, composition = blueprint$composition)

ptype <- get_original_predictor_ptype(blueprint$recipe, data)

Expand Down Expand Up @@ -477,7 +477,7 @@ forge_recipe_default_process <- function(blueprint, predictors, outcomes, extras
forge_recipe_default_process_predictors <- function(blueprint, predictors) {
predictors <- maybe_add_intercept_column(predictors, blueprint$intercept)

predictors <- recompose(predictors, blueprint$composition)
predictors <- recompose(predictors, composition = blueprint$composition)

new_forge_process_terms(
blueprint = blueprint,
Expand Down
4 changes: 2 additions & 2 deletions R/blueprint-xy-default.R
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ mold_xy_default_process_predictors <- function(blueprint, x) {

x <- maybe_add_intercept_column(x, blueprint$intercept)

x <- recompose(x, blueprint$composition)
x <- recompose(x, composition = blueprint$composition)

new_mold_process_terms(
blueprint = blueprint,
Expand Down Expand Up @@ -356,7 +356,7 @@ forge_xy_default_process <- function(blueprint, predictors, outcomes, extras) {
forge_xy_default_process_predictors <- function(blueprint, predictors) {
predictors <- maybe_add_intercept_column(predictors, blueprint$intercept)

predictors <- recompose(predictors, blueprint$composition)
predictors <- recompose(predictors, composition = blueprint$composition)

new_forge_process_terms(
blueprint = blueprint,
Expand Down
2 changes: 2 additions & 0 deletions R/blueprint.R
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,8 @@ check_list <- function(x,
}

check_composition <- function(composition, error_call = caller_env()) {
# `recompose()` technically also supports `"data.frame"`,
# but that is only for recipes, and we probably don't want that here
arg_match0(
arg = composition,
values = c("tibble", "matrix", "dgCMatrix"),
Expand Down
124 changes: 81 additions & 43 deletions R/recompose.R
Original file line number Diff line number Diff line change
@@ -1,50 +1,88 @@
# This is the same as the "recomposition" at the end of recipes::bake()

recompose <- function(data, composition) {
if (identical(composition, "tibble")) {
data
} else if (identical(composition, "dgCMatrix")) {
convert_matrix(data, sparse = TRUE)
} else if (identical(composition, "matrix")) {
convert_matrix(data, sparse = FALSE)
} else {
abort("Internal error: Unknown `composition` type.")
}
}
#' Recompose a data frame into another form
#'
#' @description
#' `recompose()` takes a data frame and converts it into one of:
#' - A tibble
#' - A data frame
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added support for "data.frame" specifically for recipes, but hardhat won't use it

#' - A matrix
#' - A sparse matrix (using the Matrix package)
#'
#' This is an internal function used only by hardhat and recipes.
#'
#' @inheritParams rlang::args_dots_empty
#'
#' @param data A data frame.
#'
#' @param composition One of:
#' - `"tibble"` to convert to a tibble.
#' - `"data.frame"` to convert to a base data frame.
#' - `"matrix"` to convert to a matrix. All columns must be numeric.
#' - `"dgCMatrix"` to convert to a sparse matrix. All columns must be numeric,
#' and the Matrix package must be installed.
#'
#' @returns
#' The output type is determined from the `composition`.
#'
#' @export
#' @keywords internal
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Internal as I don't really want people using this

#'
#' @examples
#' df <- vctrs::data_frame(x = 1)
#'
#' recompose(df)
#' recompose(df, composition = "matrix")
#'
#' # All columns must be numeric to convert to a matrix
#' df <- vctrs::data_frame(x = 1, y = "a")
#' try(recompose(df, composition = "matrix"))
recompose <- function(data,
...,
composition = "tibble") {
check_dots_empty0(...)
check_data_frame(data)

composition <- arg_match0(
arg = composition,
values = c("tibble", "data.frame", "matrix", "dgCMatrix")
)

convert_matrix <- function(x, sparse = TRUE) {
is_num <- vapply(x, is.numeric, logical(1))

if (!all(is_num)) {
num_viol <- sum(!is_num)
if (num_viol < 5) {
abort(
paste0(
"Columns (",
paste0("`", names(is_num)[!is_num], "`", collapse = ", "),
") are not numeric; cannot convert to matrix."
)
)
} else {
abort(
paste0(
num_viol,
" columns are not numeric; cannot ",
"convert to matrix."
)
)
switch(
composition,
tibble = {
coerce_to_tibble(data)
},
data.frame = {
new_data_frame(data, n = vec_size(data))
},
matrix = {
coerce_to_matrix(data)
},
dgCMatrix = {
data <- coerce_to_matrix(data)
coerce_to_sparse(data)
}
}
)
}

# At this point, all cols are numeric so we can just use as.matrix()
res <- as.matrix(x)
coerce_to_matrix <- function(data, error_call = caller_env()) {
numeric <- map_lgl(data, is.numeric)

if (sparse) {
if (!is_installed("Matrix")) {
abort("The Matrix package must be installed to use a 'dgCMatrix' `composition`")
}
res <- Matrix::Matrix(res, sparse = TRUE)
if (!all(numeric)) {
loc <- which(!numeric)
loc <- names(data)[loc]

message <- c(
"{.arg data} must only contain numeric columns.",
i = "These columns aren't numeric: {.str {loc}}."
)

cli::cli_abort(message, call = error_call)
}

res
as.matrix(data)
}

coerce_to_sparse <- function(data, error_call = caller_env()) {
check_installed("Matrix", call = error_call)
Matrix::Matrix(data, sparse = TRUE)
}
47 changes: 47 additions & 0 deletions man/recompose.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions tests/testthat/_snaps/forge-formula.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# can be both missing levels and have new levels

Code
mold(y ~ f, dat, blueprint = bp2)
Condition
Error in `recompose()`:
! `data` must only contain numeric columns.
i These columns aren't numeric: "f".

53 changes: 53 additions & 0 deletions tests/testthat/_snaps/recompose.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# columns must be numeric when coercing to matrix

Code
recompose(df, composition = "matrix")
Condition
Error in `recompose()`:
! `data` must only contain numeric columns.
i These columns aren't numeric: "y" and "z".

# columns must be numeric when coercing to sparse matrix

Code
recompose(df, composition = "dgCMatrix")
Condition
Error in `recompose()`:
! `data` must only contain numeric columns.
i These columns aren't numeric: "y" and "z".

# checks for data frame input

Code
recompose(1)
Condition
Error in `recompose()`:
! `data` must be a data frame, not the number 1.

# dots must be empty

Code
recompose(data.frame(), 1)
Condition
Error in `recompose()`:
! `...` must be empty.
x Problematic argument:
* ..1 = 1
i Did you forget to name an argument?

# validates `composition`

Code
recompose(data.frame(), composition = "foo")
Condition
Error in `recompose()`:
! `composition` must be one of "tibble", "data.frame", "matrix", or "dgCMatrix", not "foo".

---

Code
recompose(data.frame(), composition = 1)
Condition
Error in `recompose()`:
! `composition` must be a string or character vector.

8 changes: 4 additions & 4 deletions tests/testthat/test-forge-formula.R
Original file line number Diff line number Diff line change
Expand Up @@ -744,11 +744,11 @@ test_that("can be both missing levels and have new levels", {

bp1 <- default_formula_blueprint(indicators = "none")
bp2 <- default_formula_blueprint(indicators = "none", composition = "matrix")

x1 <- mold(y ~ f, dat, blueprint = bp1)
expect_error(
x2 <- mold(y ~ f, dat, blueprint = bp2),
"cannot convert to matrix"
)
expect_snapshot(error = TRUE, {
mold(y ~ f, dat, blueprint = bp2)
})

# Warning for the extra level
expect_warning(
Expand Down
Loading