From b927ffe51fcdafdb86d0b1dd0a81be139ec8a02e Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 22 Mar 2022 23:11:10 -0400 Subject: [PATCH 01/41] initial case weight work for xy models --- R/arguments.R | 11 ++++++++++- R/fit.R | 21 ++++++++++++++++++++- R/fit_helpers.R | 2 +- R/logistic_reg_data.R | 2 +- R/misc.R | 11 +++++++++++ man/fit.Rd | 14 ++++++++++++-- 6 files changed, 55 insertions(+), 6 deletions(-) diff --git a/R/arguments.R b/R/arguments.R index a209a0a5f..94a4c67c5 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -177,12 +177,17 @@ make_form_call <- function(object, env = NULL) { fit_call } -make_xy_call <- function(object, target) { +# TODO we need something to indicate that case weights are being used. +make_xy_call <- function(object, target, env) { fit_args <- object$method$fit$args + uses_weights <- !is.null(env$case_weights) # Get the arguments related to data: if (is.null(object$method$fit$data)) { data_args <- c(x = "x", y = "y") + if (uses_weights) { + data_args["weights"] <- "weights" + } } else { data_args <- object$method$fit$data } @@ -196,6 +201,9 @@ make_xy_call <- function(object, target) { matrix = rlang::expr(maybe_matrix(x)), rlang::abort(glue::glue("Invalid data type target: {target}.")) ) + if (uses_weights) { + object$method$fit$args[[ unname(data_args["weights"]) ]] <- rlang::expr(case_weights) + } fit_call <- make_call( fun = object$method$fit$func["fun"], @@ -268,3 +276,4 @@ min_rows <- function(num_rows, source, offset = 0) { as.integer(num_rows) } + diff --git a/R/fit.R b/R/fit.R index ca6c5928e..c495baa76 100644 --- a/R/fit.R +++ b/R/fit.R @@ -18,6 +18,8 @@ #' below). A data frame containing all relevant variables (e.g. #' outcome(s), predictors, case weights, etc). Note: when needed, a #' \emph{named argument} should be used. +#' @param case_weights A vector of numeric case weights with underlying class of +#' "`hardhat_case_weights`". See [hardhat::frequency_weights()] for example. #' @param control A named list with elements `verbosity` and #' `catch`. See [control_parsnip()]. #' @param ... Not currently used; values passed here will be @@ -101,6 +103,7 @@ fit.model_spec <- function(object, formula, data, + case_weights = NULL, control = control_parsnip(), ... ) { @@ -110,6 +113,8 @@ fit.model_spec <- if (!identical(class(control), class(control_parsnip()))) { rlang::abort("The 'control' argument should have class 'control_parsnip'.") } + check_case_weights(case_weights) + dots <- quos(...) if (length(possible_engines(object)) == 0) { @@ -129,8 +134,9 @@ fit.model_spec <- } } - if (all(c("x", "y") %in% names(dots))) + if (all(c("x", "y") %in% names(dots))) { rlang::abort("`fit.model_spec()` is for the formula methods. Use `fit_xy()` instead.") + } cl <- match.call(expand.dots = TRUE) # Create an environment with the evaluated argument objects. This will be # used when a model call is made later. @@ -138,6 +144,14 @@ fit.model_spec <- eval_env$data <- data eval_env$formula <- formula + # TODO do we convert/cast to numeric here? + # as.integer(NULL) is integer(0) + if (is.null(case_weights)) { + eval_env$case_weights <- NULL + } else { + eval_env$case_weights <- as.integer(case_weights) + } + fit_interface <- check_interface(eval_env$formula, eval_env$data, cl, object) @@ -206,6 +220,7 @@ fit_xy.model_spec <- function(object, x, y, + case_weights = NULL, control = control_parsnip(), ... ) { @@ -223,6 +238,7 @@ fit_xy.model_spec <- if (is.null(colnames(x))) { rlang::abort("'x' should have column names.") } + check_case_weights(case_weights) object <- check_mode(object, levels(y)) dots <- quos(...) if (is.null(object$engine)) { @@ -245,6 +261,9 @@ fit_xy.model_spec <- eval_env <- rlang::env() eval_env$x <- x eval_env$y <- y + eval_env$case_weights <- case_weights + + # TODO case weights: pass in eval_env not individual elements fit_interface <- check_xy_interface(eval_env$x, eval_env$y, cl, object) if (object$engine == "spark") diff --git a/R/fit_helpers.R b/R/fit_helpers.R index 61b9344b0..8af0ebb68 100644 --- a/R/fit_helpers.R +++ b/R/fit_helpers.R @@ -88,7 +88,7 @@ xy_xy <- function(object, env, control, target = "none", ...) { # sub in arguments to actual syntax for corresponding engine object <- translate(object, engine = object$engine) - fit_call <- make_xy_call(object, target) + fit_call <- make_xy_call(object, target, env) res <- list(lvl = levels(env$y), spec = object) diff --git a/R/logistic_reg_data.R b/R/logistic_reg_data.R index db6f12389..21eb48d2f 100644 --- a/R/logistic_reg_data.R +++ b/R/logistic_reg_data.R @@ -220,7 +220,7 @@ set_fit( mode = "classification", value = list( interface = "matrix", - protect = c("x", "y", "wi"), + protect = c("x", "y"), data = c(x = "data", y = "target"), func = c(pkg = "LiblineaR", fun = "LiblineaR"), defaults = list(verbose = FALSE) diff --git a/R/misc.R b/R/misc.R index 07ae80274..c52807239 100644 --- a/R/misc.R +++ b/R/misc.R @@ -385,3 +385,14 @@ stan_conf_int <- function(object, newdata) { penalty } + + +check_case_weights <- function(x) { + if (is.null(x)) { + return(invisible(NULL)) + } + if (!inherits(x, "hardhat_case_weights")) { + rlang::abort("'case_weights' should be a single numeric vector of class 'hardhat_case_weights'.") + } + invisible(NULL) +} diff --git a/man/fit.Rd b/man/fit.Rd index 21c79f8ec..b152c7c4d 100644 --- a/man/fit.Rd +++ b/man/fit.Rd @@ -5,9 +5,16 @@ \alias{fit_xy.model_spec} \title{Fit a Model Specification to a Dataset} \usage{ -\method{fit}{model_spec}(object, formula, data, control = control_parsnip(), ...) +\method{fit}{model_spec}( + object, + formula, + data, + case_weights = NULL, + control = control_parsnip(), + ... +) -\method{fit_xy}{model_spec}(object, x, y, control = control_parsnip(), ...) +\method{fit_xy}{model_spec}(object, x, y, case_weights = NULL, control = control_parsnip(), ...) } \arguments{ \item{object}{An object of class \code{model_spec} that has a chosen engine @@ -22,6 +29,9 @@ below). A data frame containing all relevant variables (e.g. outcome(s), predictors, case weights, etc). Note: when needed, a \emph{named argument} should be used.} +\item{case_weights}{A vector of numeric case weights with underlying class of +"\code{hardhat_case_weights}". See \code{\link[hardhat:frequency_weights]{hardhat::frequency_weights()}} for example.} + \item{control}{A named list with elements \code{verbosity} and \code{catch}. See \code{\link[=control_parsnip]{control_parsnip()}}.} From 3bd5f377a9c937e21cef8128ed97df49dc59250e Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Wed, 23 Mar 2022 09:04:43 -0400 Subject: [PATCH 02/41] make the env arg of eval_tidy more explicit --- R/fit.R | 10 +++++----- R/fit_helpers.R | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/R/fit.R b/R/fit.R index c495baa76..5cae71337 100644 --- a/R/fit.R +++ b/R/fit.R @@ -325,18 +325,18 @@ fit_xy.model_spec <- # ------------------------------------------------------------------------------ -eval_mod <- function(e, capture = FALSE, catch = FALSE, ...) { +eval_mod <- function(e, capture = FALSE, catch = FALSE, envir = NULL, ...) { if (capture) { if (catch) { - junk <- capture.output(res <- try(eval_tidy(e, ...), silent = TRUE)) + junk <- capture.output(res <- try(eval_tidy(e, env = envir, ...), silent = TRUE)) } else { - junk <- capture.output(res <- eval_tidy(e, ...)) + junk <- capture.output(res <- eval_tidy(e, env = envir, ...)) } } else { if (catch) { - res <- try(eval_tidy(e, ...), silent = TRUE) + res <- try(eval_tidy(e, env = envir, ...), silent = TRUE) } else { - res <- eval_tidy(e, ...) + res <- eval_tidy(e, env = envir, ...) } } res diff --git a/R/fit_helpers.R b/R/fit_helpers.R index 8af0ebb68..2c51605db 100644 --- a/R/fit_helpers.R +++ b/R/fit_helpers.R @@ -39,7 +39,7 @@ form_form <- fit_call, capture = control$verbosity == 0, catch = control$catch, - env = env, + envir = env, ... ), gcFirst = FALSE @@ -49,7 +49,7 @@ form_form <- fit_call, capture = control$verbosity == 0, catch = control$catch, - env = env, + envir = env, ... ) elapsed <- list(elapsed = NA_real_) @@ -98,7 +98,7 @@ xy_xy <- function(object, env, control, target = "none", ...) { fit_call, capture = control$verbosity == 0, catch = control$catch, - env = env, + envir = env, ... ), gcFirst = FALSE @@ -108,7 +108,7 @@ xy_xy <- function(object, env, control, target = "none", ...) { fit_call, capture = control$verbosity == 0, catch = control$catch, - env = env, + envir = env, ... ) elapsed <- list(elapsed = NA_real_) From 73eae0b46c2476cee2f0d7769e40bc5780c50861 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Wed, 23 Mar 2022 11:13:38 -0400 Subject: [PATCH 03/41] conversion of weights class to some numeric type --- R/fit.R | 10 ++-------- R/fit_helpers.R | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/R/fit.R b/R/fit.R index 5cae71337..6b7f5977f 100644 --- a/R/fit.R +++ b/R/fit.R @@ -144,13 +144,7 @@ fit.model_spec <- eval_env$data <- data eval_env$formula <- formula - # TODO do we convert/cast to numeric here? - # as.integer(NULL) is integer(0) - if (is.null(case_weights)) { - eval_env$case_weights <- NULL - } else { - eval_env$case_weights <- as.integer(case_weights) - } + eval_env$case_weights <- weights_to_numeric(case_weights) fit_interface <- check_interface(eval_env$formula, eval_env$data, cl, object) @@ -261,7 +255,7 @@ fit_xy.model_spec <- eval_env <- rlang::env() eval_env$x <- x eval_env$y <- y - eval_env$case_weights <- case_weights + eval_env$case_weights <- weights_to_numeric(case_weights) # TODO case weights: pass in eval_env not individual elements fit_interface <- check_xy_interface(eval_env$x, eval_env$y, cl, object) diff --git a/R/fit_helpers.R b/R/fit_helpers.R index 2c51605db..db41bef97 100644 --- a/R/fit_helpers.R +++ b/R/fit_helpers.R @@ -200,3 +200,17 @@ xy_form <- function(object, env, control, ...) { res } + +weights_to_numeric <- function(x) { + if (is.null(x)) { + return(NULL) + } + + to_int <- c("hardhat_frequency_weights") + if (inherits(x, to_int)) { + x <- as.integer(x) + } else { + x <- as.numeric(x) + } + x +} From 1d8507b37e3d79dda5b21d28a4d07515dc945338 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Wed, 23 Mar 2022 11:13:51 -0400 Subject: [PATCH 04/41] changes for formula call formation --- R/arguments.R | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/R/arguments.R b/R/arguments.R index 94a4c67c5..ef0bcec32 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -149,10 +149,14 @@ make_call <- function(fun, ns, args, ...) { make_form_call <- function(object, env = NULL) { fit_args <- object$method$fit$args + uses_weights <- !is.null(env$case_weights) # Get the arguments related to data: if (is.null(object$method$fit$data)) { data_args <- c(formula = "formula", data = "data") + if (uses_weights) { + data_args["weights"] <- "weights" + } } else { data_args <- object$method$fit$data } @@ -165,6 +169,13 @@ make_form_call <- function(object, env = NULL) { # sub in actual formula fit_args[[ unname(data_args["formula"]) ]] <- env$formula + # Add in case weights symbol + if (uses_weights) { + fit_args[[ unname(data_args["weights"]) ]] <- rlang::expr(case_weights) + } + + + # TODO remove weights col from data? if (object$engine == "spark") { env$x <- env$data } From b13af4163899bb8925e76ad8768980618d5359d6 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Wed, 23 Mar 2022 11:31:58 -0400 Subject: [PATCH 05/41] check to see if the model can use case weights --- R/fit.R | 5 +++-- R/fit_helpers.R | 22 ++++++++++++++++++++++ R/misc.R | 6 +++++- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/R/fit.R b/R/fit.R index 6b7f5977f..038170090 100644 --- a/R/fit.R +++ b/R/fit.R @@ -113,7 +113,7 @@ fit.model_spec <- if (!identical(class(control), class(control_parsnip()))) { rlang::abort("The 'control' argument should have class 'control_parsnip'.") } - check_case_weights(case_weights) + check_case_weights(case_weights, object) dots <- quos(...) @@ -232,7 +232,8 @@ fit_xy.model_spec <- if (is.null(colnames(x))) { rlang::abort("'x' should have column names.") } - check_case_weights(case_weights) + check_case_weights(case_weights, object) + object <- check_mode(object, levels(y)) dots <- quos(...) if (is.null(object$engine)) { diff --git a/R/fit_helpers.R b/R/fit_helpers.R index db41bef97..9b4542d31 100644 --- a/R/fit_helpers.R +++ b/R/fit_helpers.R @@ -214,3 +214,25 @@ weights_to_numeric <- function(x) { } x } + +case_weights_allowed <- function(spec) { + mod_type <- class(spec)[1] + mod_eng <- spec$engine + mod_mode <- spec$mode + + model_info <- + get_from_env(paste0(mod_type, "_fit")) %>% + dplyr::filter(engine == mod_eng & mode == mod_mode) + if (nrow(model_info) != 1) { + rlang::abort( + glue::glue( + "Error in geting model information for model {mod_type} with engine {mod_eng} and mode {mod_mode}." + ) + ) + } + # If weights are used, they are protected data arguments with the canonical + # name 'weights' (although this may not be the model function's argument name). + data_args <- model_info$value[[1]]$protect + any(data_args == "weights") +} + diff --git a/R/misc.R b/R/misc.R index c52807239..c0c1140d2 100644 --- a/R/misc.R +++ b/R/misc.R @@ -387,12 +387,16 @@ stan_conf_int <- function(object, newdata) { } -check_case_weights <- function(x) { +check_case_weights <- function(x, spec) { if (is.null(x)) { return(invisible(NULL)) } if (!inherits(x, "hardhat_case_weights")) { rlang::abort("'case_weights' should be a single numeric vector of class 'hardhat_case_weights'.") } + allowed <- case_weights_allowed(spec) + if (!allowed) { + rlang::abort("Case weights are not enabled by the underlying model implementation.") + } invisible(NULL) } From b72c7ce2dbe55739d04f5107d347e340644514e5 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Wed, 23 Mar 2022 15:13:32 -0400 Subject: [PATCH 06/41] change envir vector name to weights --- R/arguments.R | 8 ++++---- R/fit.R | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/arguments.R b/R/arguments.R index ef0bcec32..5eb369117 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -149,7 +149,7 @@ make_call <- function(fun, ns, args, ...) { make_form_call <- function(object, env = NULL) { fit_args <- object$method$fit$args - uses_weights <- !is.null(env$case_weights) + uses_weights <- !is.null(env$weights) # Get the arguments related to data: if (is.null(object$method$fit$data)) { @@ -171,7 +171,7 @@ make_form_call <- function(object, env = NULL) { # Add in case weights symbol if (uses_weights) { - fit_args[[ unname(data_args["weights"]) ]] <- rlang::expr(case_weights) + fit_args[[ unname(data_args["weights"]) ]] <- rlang::expr(weights) } @@ -191,7 +191,7 @@ make_form_call <- function(object, env = NULL) { # TODO we need something to indicate that case weights are being used. make_xy_call <- function(object, target, env) { fit_args <- object$method$fit$args - uses_weights <- !is.null(env$case_weights) + uses_weights <- !is.null(env$weights) # Get the arguments related to data: if (is.null(object$method$fit$data)) { @@ -213,7 +213,7 @@ make_xy_call <- function(object, target, env) { rlang::abort(glue::glue("Invalid data type target: {target}.")) ) if (uses_weights) { - object$method$fit$args[[ unname(data_args["weights"]) ]] <- rlang::expr(case_weights) + object$method$fit$args[[ unname(data_args["weights"]) ]] <- rlang::expr(weights) } fit_call <- make_call( diff --git a/R/fit.R b/R/fit.R index 038170090..546fb8900 100644 --- a/R/fit.R +++ b/R/fit.R @@ -144,7 +144,7 @@ fit.model_spec <- eval_env$data <- data eval_env$formula <- formula - eval_env$case_weights <- weights_to_numeric(case_weights) + eval_env$weights <- weights_to_numeric(case_weights) fit_interface <- check_interface(eval_env$formula, eval_env$data, cl, object) @@ -256,7 +256,7 @@ fit_xy.model_spec <- eval_env <- rlang::env() eval_env$x <- x eval_env$y <- y - eval_env$case_weights <- weights_to_numeric(case_weights) + eval_env$weights <- weights_to_numeric(case_weights) # TODO case weights: pass in eval_env not individual elements fit_interface <- check_xy_interface(eval_env$x, eval_env$y, cl, object) From 3f5f6a76a0d52d975ebf948ad890d0dbf8613d55 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Wed, 23 Mar 2022 15:25:02 -0400 Subject: [PATCH 07/41] update model defs for non-standard case weight arg names --- R/rand_forest_data.R | 6 ++++-- R/svm_linear_data.R | 7 ++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/R/rand_forest_data.R b/R/rand_forest_data.R index f3f74d9f4..5ea5e6f1d 100644 --- a/R/rand_forest_data.R +++ b/R/rand_forest_data.R @@ -122,7 +122,8 @@ set_fit( mode = "classification", value = list( interface = "data.frame", - protect = c("x", "y", "case.weights"), + data = c(x = "x", y = "y", weights = "case.weights"), + protect = c("x", "y", "weights"), func = c(pkg = "ranger", fun = "ranger"), defaults = list( @@ -151,7 +152,8 @@ set_fit( mode = "regression", value = list( interface = "data.frame", - protect = c("x", "y", "case.weights"), + data = c(x = "x", y = "y", weights = "case.weights"), + protect = c("x", "y", "weights"), func = c(pkg = "ranger", fun = "ranger"), defaults = list( diff --git a/R/svm_linear_data.R b/R/svm_linear_data.R index 25da85477..d4a14602f 100644 --- a/R/svm_linear_data.R +++ b/R/svm_linear_data.R @@ -33,8 +33,8 @@ set_fit( mode = "regression", value = list( interface = "matrix", - protect = c("x", "y", "wi"), - data = c(x = "data", y = "target"), + protect = c("x", "y", "weights"), + data = c(x = "data", y = "target", weights = "wi"), func = c(pkg = "LiblineaR", fun = "LiblineaR"), defaults = list(type = 11) ) @@ -47,7 +47,8 @@ set_fit( value = list( interface = "matrix", data = c(x = "data", y = "target"), - protect = c("x", "y", "wi"), + protect = c("x", "y", "weights"), + data = c(x = "data", y = "target", weights = "wi"), func = c(pkg = "LiblineaR", fun = "LiblineaR"), defaults = list(type = 1) ) From ee75da570f2b12bb9def882b3ebe756ab8d38f3e Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Wed, 23 Mar 2022 20:41:43 -0400 Subject: [PATCH 08/41] could it possibly be this easy? --- R/arguments.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/R/arguments.R b/R/arguments.R index 5eb369117..c3e9f9384 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -167,6 +167,13 @@ make_form_call <- function(object, env = NULL) { } # sub in actual formula + # Bit first... `lm()` and others use the original model function call to + # construct a call for `model.frame()`. That will normally fail because the + # formula has its own environment attached (usually the global environment) + # and it will look there for a vector named 'weights'. We've stashed that + # vector in the environment 'env' so we reset the reference environment in + # the formula to look in the right place for the case weights. + environment(env$formula) <- env fit_args[[ unname(data_args["formula"]) ]] <- env$formula # Add in case weights symbol From 039190606d966c18965f6d65d2baf1970ebdd08d Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Thu, 24 Mar 2022 14:40:42 -0400 Subject: [PATCH 09/41] better approach to handling model.frame() issues in lm() --- R/arguments.R | 7 ------- R/fit.R | 15 ++++++++++++++- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/R/arguments.R b/R/arguments.R index c3e9f9384..5eb369117 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -167,13 +167,6 @@ make_form_call <- function(object, env = NULL) { } # sub in actual formula - # Bit first... `lm()` and others use the original model function call to - # construct a call for `model.frame()`. That will normally fail because the - # formula has its own environment attached (usually the global environment) - # and it will look there for a vector named 'weights'. We've stashed that - # vector in the environment 'env' so we reset the reference environment in - # the formula to look in the right place for the case weights. - environment(env$formula) <- env fit_args[[ unname(data_args["formula"]) ]] <- env$formula # Add in case weights symbol diff --git a/R/fit.R b/R/fit.R index 546fb8900..795319049 100644 --- a/R/fit.R +++ b/R/fit.R @@ -142,9 +142,22 @@ fit.model_spec <- # used when a model call is made later. eval_env <- rlang::env() + wts <- weights_to_numeric(case_weights) + + # `lm()` and `glm()` and others use the original model function call to + # construct a call for `model.frame()`. That will normally fail because the + # formula has its own environment attached (usually the global environment) + # and it will look there for a vector named 'weights'. We've stashed that + # vector in the environment 'env' so we reset the reference environment in + # the formula to have our data objects so they can be found. + fenv <- rlang::env_clone(environment(formula)) + fenv$data <- data + fenv$weights <- wts + environment(formula) <- fenv + eval_env$data <- data eval_env$formula <- formula - eval_env$weights <- weights_to_numeric(case_weights) + eval_env$weights <- wts fit_interface <- check_interface(eval_env$formula, eval_env$data, cl, object) From 7f4676bb48c4b6ef0fdc84b5c8fd6bc2687003e2 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Thu, 24 Mar 2022 15:11:26 -0400 Subject: [PATCH 10/41] no case weights for LiblinearR (they are class weights) --- R/svm_linear_data.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/svm_linear_data.R b/R/svm_linear_data.R index d4a14602f..ed2d6da95 100644 --- a/R/svm_linear_data.R +++ b/R/svm_linear_data.R @@ -33,8 +33,8 @@ set_fit( mode = "regression", value = list( interface = "matrix", - protect = c("x", "y", "weights"), - data = c(x = "data", y = "target", weights = "wi"), + protect = c("x", "y"), + data = c(x = "data", y = "target"), func = c(pkg = "LiblineaR", fun = "LiblineaR"), defaults = list(type = 11) ) @@ -47,8 +47,8 @@ set_fit( value = list( interface = "matrix", data = c(x = "data", y = "target"), - protect = c("x", "y", "weights"), - data = c(x = "data", y = "target", weights = "wi"), + protect = c("x", "y"), + data = c(x = "data", y = "target"), func = c(pkg = "LiblineaR", fun = "LiblineaR"), defaults = list(type = 1) ) From 6ebce7b888a45bd7c4093af1bca883779b724002 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Thu, 24 Mar 2022 20:28:56 -0400 Subject: [PATCH 11/41] add and update unit tests --- R/parsnip-package.R | 2 +- tests/testthat/test-case-weights.R | 57 ++++++++++++++++++++++++++++++ tests/testthat/test_logistic_reg.R | 5 --- tests/testthat/test_rand_forest.R | 30 ++++++++-------- tests/testthat/test_svm_linear.R | 2 -- 5 files changed, 74 insertions(+), 22 deletions(-) create mode 100644 tests/testthat/test-case-weights.R diff --git a/R/parsnip-package.R b/R/parsnip-package.R index ef002e8e5..ead9ff282 100644 --- a/R/parsnip-package.R +++ b/R/parsnip-package.R @@ -41,7 +41,7 @@ utils::globalVariables( "sub_neighbors", ".pred_class", "x", "y", "predictor_indicators", "compute_intercept", "remove_intercept", "estimate", "term", "call_info", "component", "component_id", "func", "tunable", "label", - "pkg", ".order", "item", "tunable", "has_ext" + "pkg", ".order", "item", "tunable", "has_ext", "weights" ) ) diff --git a/tests/testthat/test-case-weights.R b/tests/testthat/test-case-weights.R new file mode 100644 index 000000000..6b4f8f616 --- /dev/null +++ b/tests/testthat/test-case-weights.R @@ -0,0 +1,57 @@ + +test_that('case weights with xy method', { + + skip_if_not_installed("C50") + skip_if_not_installed("modeldata") + library(hardhat) + data("two_class_dat", package = "modeldata") + + wts <- runif(nrow(two_class_dat)) + wts <- ifelse(wts < 1/5, 0, 1) + two_class_subset <- two_class_dat[wts != 0, ] + wts <- importance_weights(wts) + + expect_error({ + set.seed(1) + C5_bst_wt_fit <- + boost_tree(trees = 5) %>% + set_engine("C5.0") %>% + set_mode("classification") %>% + fit(Class ~ ., data = two_class_dat, case_weights = wts) + }, + regexp = NA) + + expect_output( + print(C5_bst_wt_fit$fit$call), + "weights = weights" + ) +}) + + +test_that('case weights with formula method', { + + skip_if_not_installed("modeldata") + data("ames", package = "modeldata") + ames$Sale_Price <- log10(ames$Sale_Price) + + set.seed(1) + wts <- runif(nrow(ames)) + wts <- ifelse(wts < 1/5, 0L, 1L) + ames_subset <- ames[wts != 0, ] + wts <- frequency_weights(wts) + + expect_error( + lm_wt_fit <- + linear_reg() %>% + fit(Sale_Price ~ Longitude + Latitude, data = ames, case_weights = wts), + regexp = NA) + + lm_sub_fit <- + linear_reg() %>% + fit(Sale_Price ~ Longitude + Latitude, data = ames_subset) + + expect_equal(coef(lm_wt_fit$fit), coef(lm_sub_fit$fit)) +}) + + + diff --git a/tests/testthat/test_logistic_reg.R b/tests/testthat/test_logistic_reg.R index 8f7e2f409..99a6ace79 100644 --- a/tests/testthat/test_logistic_reg.R +++ b/tests/testthat/test_logistic_reg.R @@ -35,7 +35,6 @@ test_that('primary arguments', { list( x = expr(missing_arg()), y = expr(missing_arg()), - wi = expr(missing_arg()), verbose = FALSE ) ) @@ -89,7 +88,6 @@ test_that('primary arguments', { list( x = expr(missing_arg()), y = expr(missing_arg()), - wi = expr(missing_arg()), cost = new_empty_quosure(1), verbose = FALSE ) @@ -115,7 +113,6 @@ test_that('primary arguments', { list( x = expr(missing_arg()), y = expr(missing_arg()), - wi = expr(missing_arg()), type = new_empty_quosure(tune()), verbose = FALSE ) @@ -146,7 +143,6 @@ test_that('primary arguments', { list( x = expr(missing_arg()), y = expr(missing_arg()), - wi = expr(missing_arg()), cost = new_empty_quosure(1), verbose = FALSE ) @@ -195,7 +191,6 @@ test_that('engine arguments', { list( x = expr(missing_arg()), y = expr(missing_arg()), - wi = expr(missing_arg()), bias = new_empty_quosure(0), verbose = FALSE ) diff --git a/tests/testthat/test_rand_forest.R b/tests/testthat/test_rand_forest.R index e5a73c802..6b98c48ca 100644 --- a/tests/testthat/test_rand_forest.R +++ b/tests/testthat/test_rand_forest.R @@ -18,7 +18,7 @@ test_that('primary arguments', { list( x = expr(missing_arg()), y = expr(missing_arg()), - case.weights = expr(missing_arg()), + weights = expr(missing_arg()), mtry = expr(min_cols(~4, x)), num.threads = 1, verbose = FALSE, @@ -49,7 +49,7 @@ test_that('primary arguments', { list( x = expr(missing_arg()), y = expr(missing_arg()), - case.weights = expr(missing_arg()), + weights = expr(missing_arg()), num.trees = new_empty_quosure(1000), num.threads = 1, verbose = FALSE, @@ -82,7 +82,7 @@ test_that('primary arguments', { list( x = expr(missing_arg()), y = expr(missing_arg()), - case.weights = expr(missing_arg()), + weights = expr(missing_arg()), min.node.size = expr(min_rows(~5, x)), num.threads = 1, verbose = FALSE, @@ -109,17 +109,19 @@ test_that('primary arguments', { test_that('engine arguments', { ranger_imp <- rand_forest(mode = "classification") - expect_equal(translate(ranger_imp %>% set_engine("ranger", importance = "impurity"))$method$fit$args, - list( - x = expr(missing_arg()), - y = expr(missing_arg()), - case.weights = expr(missing_arg()), - importance = new_empty_quosure("impurity"), - num.threads = 1, - verbose = FALSE, - seed = expr(sample.int(10^5, 1)), - probability = TRUE - ) + expect_equal( + translate(ranger_imp %>% + set_engine("ranger", importance = "impurity"))$method$fit$args, + list( + x = expr(missing_arg()), + y = expr(missing_arg()), + weights = expr(missing_arg()), + importance = new_empty_quosure("impurity"), + num.threads = 1, + verbose = FALSE, + seed = expr(sample.int(10^5, 1)), + probability = TRUE + ) ) randomForest_votes <- rand_forest(mode = "regression") diff --git a/tests/testthat/test_svm_linear.R b/tests/testthat/test_svm_linear.R index 6e01fa8d1..07d4e37d0 100644 --- a/tests/testthat/test_svm_linear.R +++ b/tests/testthat/test_svm_linear.R @@ -21,7 +21,6 @@ test_that('primary arguments', { expected = list( x = expr(missing_arg()), y = expr(missing_arg()), - wi = expr(missing_arg()), type = 11, svr_eps = 0.1 ) @@ -48,7 +47,6 @@ test_that('engine arguments', { expected = list( x = expr(missing_arg()), y = expr(missing_arg()), - wi = expr(missing_arg()), type = new_empty_quosure(12), svr_eps = 0.1 ) From 450dd4865293888625cdb5eb0b5ee110ce5af71b Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Thu, 24 Mar 2022 20:35:21 -0400 Subject: [PATCH 12/41] version updates and remotes --- DESCRIPTION | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9adfe4bb7..89a0636cd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,11 +21,11 @@ Depends: Imports: cli, dplyr (>= 0.8.0.1), - generics (>= 0.1.0.9000), + generics (>= 0.1.2), ggplot2, globals, glue, - hardhat (>= 0.1.6.9001), + hardhat (>= 0.2.0.9000), lifecycle, magrittr, prettyunits, @@ -40,9 +40,8 @@ Imports: Suggests: C50, covr, - dials (>= 0.0.10.9001), + dials (>= 0.1.0), earth, - tensorflow, ggrepel, keras, kernlab, @@ -60,30 +59,17 @@ Suggests: rpart, sparklyr (>= 1.0.0), survival, + tensorflow, testthat, xgboost (>= 1.5.0.1) +Remotes: + tidymodels/hardhat VignetteBuilder: knitr ByteCompile: true -Config/Needs/website: - C50, - dbarts, - earth, - glmnet, - keras, - kernlab, - kknn, - LiblineaR, - mgcv, - nnet, - parsnip, - randomForest, - ranger, - rpart, - rstanarm, - tidymodels/tidymodels, - tidyverse/tidytemplate, - rstudio/reticulate, +Config/Needs/website: C50, dbarts, earth, glmnet, keras, kernlab, kknn, + LiblineaR, mgcv, nnet, parsnip, randomForest, ranger, rpart, rstanarm, + tidymodels/tidymodels, tidyverse/tidytemplate, rstudio/reticulate, xgboost Config/rcmdcheck/ignore-inconsequential-notes: true Encoding: UTF-8 From 3bcab7a8ad21a84f0ee0cb8c2e2a41ec1916301e Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 28 Mar 2022 09:28:37 -0400 Subject: [PATCH 13/41] Apply suggestions from code review Co-authored-by: Davis Vaughan --- R/fit.R | 13 ++++++++----- R/misc.R | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/R/fit.R b/R/fit.R index 795319049..10f690b08 100644 --- a/R/fit.R +++ b/R/fit.R @@ -18,8 +18,10 @@ #' below). A data frame containing all relevant variables (e.g. #' outcome(s), predictors, case weights, etc). Note: when needed, a #' \emph{named argument} should be used. -#' @param case_weights A vector of numeric case weights with underlying class of -#' "`hardhat_case_weights`". See [hardhat::frequency_weights()] for example. +#' @param case_weights An optional classed vector of numeric case weights. This +#' must return `TRUE` when [hardhat::is_case_weights()] is run on it. See +#' [hardhat::frequency_weights()] and [hardhat::importance_weights()] for +#' examples. #' @param control A named list with elements `verbosity` and #' `catch`. See [control_parsnip()]. #' @param ... Not currently used; values passed here will be @@ -147,9 +149,10 @@ fit.model_spec <- # `lm()` and `glm()` and others use the original model function call to # construct a call for `model.frame()`. That will normally fail because the # formula has its own environment attached (usually the global environment) - # and it will look there for a vector named 'weights'. We've stashed that - # vector in the environment 'env' so we reset the reference environment in - # the formula to have our data objects so they can be found. + # and it will look there for a vector named 'weights'. To account + # for this, we create a child of the `formula`'s environment and + # stash the `weights` there with the expected name and then + # reassign this as the `formula`'s environment fenv <- rlang::env_clone(environment(formula)) fenv$data <- data fenv$weights <- wts diff --git a/R/misc.R b/R/misc.R index c0c1140d2..4bc7b4d31 100644 --- a/R/misc.R +++ b/R/misc.R @@ -391,7 +391,7 @@ check_case_weights <- function(x, spec) { if (is.null(x)) { return(invisible(NULL)) } - if (!inherits(x, "hardhat_case_weights")) { + if (!hardhat::is_case_weights(x)) { rlang::abort("'case_weights' should be a single numeric vector of class 'hardhat_case_weights'.") } allowed <- case_weights_allowed(spec) From 6e036fe4df994ac51956fd4818718b44600ccacb Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 28 Mar 2022 13:24:30 -0400 Subject: [PATCH 14/41] function for grouped binomial data --- R/grouped_binomial.R | 117 +++++++++++++++++++++++++++++++++++++++++++ man/glm_grouped.Rd | 90 +++++++++++++++++++++++++++++++++ 2 files changed, 207 insertions(+) create mode 100644 R/grouped_binomial.R create mode 100644 man/glm_grouped.Rd diff --git a/R/grouped_binomial.R b/R/grouped_binomial.R new file mode 100644 index 000000000..a1e7ffba1 --- /dev/null +++ b/R/grouped_binomial.R @@ -0,0 +1,117 @@ +#' Fit a grouped binomial outcome from a data set with case weights +#' +#' @description +#' [stats::glm()] assumes that a tabular data set with case weights corresponds +#' to "different observations have different dispersions" (see `?glm`). +#' +#' In some cases, the case weights reflect that the same covariate pattern was +#' observed multiple times (i.e., _frequency weights_). In this case, +#' [stats::glm()] expects the data to be formatted as the number of events for +#' each factor level so that the outcome can be given to the formula as +#' `cbind(events_1, events_2)`. +#' +#' [glm_grouped()] converts data with integer case weights to the expected +#' "number of events" format for binomial data. +#' @param formula A formula object with one outcome that is a two-level factors. +#' @param data A data frame with the outcomes and predictors (but not case +#' weights). +#' @param weights An integer vector of weights whose length is the same as the +#' number of rows in `data`. If it is a non-integer numeric, it will be converted +#' to integer (with a warning). +#' @param ... Options to pass to [stats::glm()]. If `family` is not set, it will +#' automatically be assigned the basic binomial family. +#' @return A object produced by [stats::glm()]. +#' @examples +#' #---------------------------------------------------------------------------- +#' # The same data set formatted three ways +#' +#' # First with basic case weights that, from ?glm, are used inappropriately. +#' ucb_weighted <- as.data.frame(UCBAdmissions) +#' ucb_weighted$Freq <- as.integer(ucb_weighted$Freq) +#' head(ucb_weighted) +#' nrow(ucb_weighted) +#' +#' # Format when yes/no data are in individual rows (probably still inappropriate) +#' library(tidyr) +#' ucb_long <- uncount(ucb_weighted, Freq) +#' head(ucb_long) +#' nrow(ucb_long) +#' +#' # Format where the outcome is formatted as number of events +#' ucb_events <- +#' ucb_weighted %>% +#' tidyr::pivot_wider( +#' id_cols = c(Gender, Dept), +#' names_from = Admit, +#' values_from = Freq, +#' values_fill = 0L +#' ) +#' head(ucb_events) +#' nrow(ucb_events) +#' +#' #---------------------------------------------------------------------------- +#' # Different model fits +#' +#' # Treat data as separate Bernoulli data: +#' glm(Admit ~ Gender + Dept, data = ucb_long, family = binomial) +#' +#' # Weights produce the same statistics +#' glm( +#' Admit ~ Gender + Dept, +#' data = ucb_weighted, +#' family = binomial, +#' weights = ucb_weighted$Freq +#' ) +#' +#' # Data as binomial "x events out of n trials" format. Note that, to get the same +#' # coefficients, the order of the levels must be reversed. +#' glm( +#' cbind(Rejected, Admitted) ~ Gender + Dept, +#' data = ucb_events, +#' family = binomial +#' ) +#' +#' # The new function that starts with frequency weights and gets the correct place: +#' glm_grouped(Admit ~ Gender + Dept, data = ucb_weighted, weights = ucb_weighted$Freq) +glm_grouped <- function(formula, data, weights, ...) { + opts <- list(...) + # We'll set binomial + if (!any(names(opts) == "family")) { + opts$family <- "binomial" + } + + if (is.null(weights) || !is.numeric(weights)) { + rlang::stop("'weights' should be an integer vector.") + } + if (!is.integer(weights)) { + weights <- as.integer(weights) + rlang::warn(glue::glue("converting case weights from numeric to integer.")) + } + + terms <- terms(formula) + all_pred <- all.vars(formula) + response <- rownames(attr(terms, "factors"))[attr(terms, "response")] + all_pred <- all_pred[!all_pred %in% response] + lvls <- levels(data[[response]]) + + if (length(lvls) != 2) { + rlang::abort(glue::glue("the response column '{response}' should be a two-level factor.")) + } + + all_cols <- c(response, all_pred) + data <- data[, all_cols, drop = FALSE] + data$..weights <- weights + # Reconstruct the new data format (made below) to the grouped formula format + formula[[2]] <- rlang::call2("cbind", !!!rlang::syms(rev(lvls))) + + data <- + data %>% + tidyr::pivot_wider( + id_cols = c(dplyr::all_of(all_pred)), + names_from = c(dplyr::all_of(response)), + values_from = "..weights", + values_fill = 0L + ) + cl <- rlang::call2("glm", rlang::expr(formula), data = rlang::expr(data), !!!opts) + rlang::eval_tidy(cl) +} diff --git a/man/glm_grouped.Rd b/man/glm_grouped.Rd new file mode 100644 index 000000000..efef8131b --- /dev/null +++ b/man/glm_grouped.Rd @@ -0,0 +1,90 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/grouped_binomial.R +\name{glm_grouped} +\alias{glm_grouped} +\title{Fit a grouped binomial outcome from a data set with case weights} +\usage{ +glm_grouped(formula, data, weights, ...) +} +\arguments{ +\item{formula}{A formula object with one outcome that is a two-level factors.} + +\item{data}{A data frame with the outcomes and predictors (but not case +weights).} + +\item{weights}{An integer vector of weights whose length is the same as the +number of rows in \code{data}. If it is a non-integer numeric, it will be converted +to integer (with a warning).} + +\item{...}{Options to pass to \code{\link[stats:glm]{stats::glm()}}. If \code{family} is not set, it will +automatically be assigned the basic binomial family.} +} +\value{ +A object produced by \code{\link[stats:glm]{stats::glm()}}. +} +\description{ +\code{\link[stats:glm]{stats::glm()}} assumes that a tabular data set with case weights corresponds +to "different observations have different dispersions" (see \code{?glm}). + +In some cases, the case weights reflect that the same covariate pattern was +observed multiple times (i.e., \emph{frequency weights}). In this case, +\code{\link[stats:glm]{stats::glm()}} expects the data to be formatted as the number of events for +each factor level so that the outcome can be given to the formula as +\code{cbind(events_1, events_2)}. + +\code{\link[=glm_grouped]{glm_grouped()}} converts data with integer case weights to the expected +"number of events" format for binomial data. +} +\examples{ +#---------------------------------------------------------------------------- +# The same data set formatted three ways + +# First with basic case weights that, from ?glm, are used inappropriately. +ucb_weighted <- as.data.frame(UCBAdmissions) +ucb_weighted$Freq <- as.integer(ucb_weighted$Freq) +head(ucb_weighted) +nrow(ucb_weighted) + +# Format when yes/no data are in individual rows (probably still inappropriate) +ucb_long <- uncount(ucb_weighted, Freq) +head(ucb_long) +nrow(ucb_long) + +# Format where the outcome is formatted as number of events +library(tidyr) +ucb_events <- + ucb_weighted \%>\% + tidyr::pivot_wider( + id_cols = c(Gender, Dept), + names_from = Admit, + values_from = Freq, + values_fill = 0L + ) +head(ucb_events) +nrow(ucb_events) + +#---------------------------------------------------------------------------- +# Different model fits + +# Treat data as separate Bernoulli data: +glm(Admit ~ Gender + Dept, data = ucb_long, family = binomial) + +# Weights produce the same statistics +glm( + Admit ~ Gender + Dept, + data = ucb_weighted, + family = binomial, + weights = ucb_weighted$Freq +) + +# Data as binomial "x events out of n trials" format. Note that, to get the same +# coefficients, the order of the levels must be reversed. +glm( + cbind(Rejected, Admitted) ~ Gender + Dept, + data = ucb_events, + family = binomial +) + +# The new function that starts with frequency weights and gets the correct place: +glm_grouped(Admit ~ Gender + Dept, data = ucb_weighted, weights = ucb_weighted$Freq) +} From b7b5765b9a5814d8d95a7acafe82f717cf77896f Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 28 Mar 2022 13:24:51 -0400 Subject: [PATCH 15/41] changes based on reviewer feedback --- DESCRIPTION | 2 +- R/arguments.R | 4 ++-- R/fit_helpers.R | 3 +++ man/fit.Rd | 6 ++++-- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 89a0636cd..00d830c25 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -75,4 +75,4 @@ Config/rcmdcheck/ignore-inconsequential-notes: true Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.1.2 +RoxygenNote: 7.1.2.9000 diff --git a/R/arguments.R b/R/arguments.R index 5eb369117..5136a06d9 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -149,7 +149,7 @@ make_call <- function(fun, ns, args, ...) { make_form_call <- function(object, env = NULL) { fit_args <- object$method$fit$args - uses_weights <- !is.null(env$weights) + uses_weights <- has_weights(env) # Get the arguments related to data: if (is.null(object$method$fit$data)) { @@ -191,7 +191,7 @@ make_form_call <- function(object, env = NULL) { # TODO we need something to indicate that case weights are being used. make_xy_call <- function(object, target, env) { fit_args <- object$method$fit$args - uses_weights <- !is.null(env$weights) + uses_weights <- has_weights(env) # Get the arguments related to data: if (is.null(object$method$fit$data)) { diff --git a/R/fit_helpers.R b/R/fit_helpers.R index 9b4542d31..ba7cdcab3 100644 --- a/R/fit_helpers.R +++ b/R/fit_helpers.R @@ -236,3 +236,6 @@ case_weights_allowed <- function(spec) { any(data_args == "weights") } +has_weights <- function(env) { + !is.null(env$weights) +} diff --git a/man/fit.Rd b/man/fit.Rd index b152c7c4d..95768ad65 100644 --- a/man/fit.Rd +++ b/man/fit.Rd @@ -29,8 +29,10 @@ below). A data frame containing all relevant variables (e.g. outcome(s), predictors, case weights, etc). Note: when needed, a \emph{named argument} should be used.} -\item{case_weights}{A vector of numeric case weights with underlying class of -"\code{hardhat_case_weights}". See \code{\link[hardhat:frequency_weights]{hardhat::frequency_weights()}} for example.} +\item{case_weights}{An optional classed vector of numeric case weights. This +must return \code{TRUE} when \code{\link[hardhat:is_case_weights]{hardhat::is_case_weights()}} is run on it. See +\code{\link[hardhat:frequency_weights]{hardhat::frequency_weights()}} and \code{\link[hardhat:importance_weights]{hardhat::importance_weights()}} for +examples.} \item{control}{A named list with elements \code{verbosity} and \code{catch}. See \code{\link[=control_parsnip]{control_parsnip()}}.} From c7cc28798fed11d4680a3b004f52e57b7eec86d0 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 28 Mar 2022 14:34:58 -0400 Subject: [PATCH 16/41] re-export hardhat functions --- R/reexports.R | 9 +++++++++ man/reexports.Rd | 4 +++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/R/reexports.R b/R/reexports.R index e26510794..a7d7264d9 100644 --- a/R/reexports.R +++ b/R/reexports.R @@ -49,3 +49,12 @@ hardhat::extract_parameter_dials #' @importFrom hardhat tune #' @export hardhat::tune + +#' @importFrom hardhat frequency_weights +#' @export +hardhat::frequency_weights + +#' @importFrom hardhat importance_weights +#' @export +hardhat::importance_weights + diff --git a/man/reexports.Rd b/man/reexports.Rd index 3a5f8c898..b498b5b7e 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -16,6 +16,8 @@ \alias{extract_parameter_set_dials} \alias{extract_parameter_dials} \alias{tune} +\alias{frequency_weights} +\alias{importance_weights} \alias{varying_args} \title{Objects exported from other packages} \keyword{internal} @@ -28,7 +30,7 @@ below to see their documentation. \item{ggplot2}{\code{\link[ggplot2]{autoplot}}} - \item{hardhat}{\code{\link[hardhat:hardhat-extract]{extract_fit_engine}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_dials}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_set_dials}}, \code{\link[hardhat:hardhat-extract]{extract_spec_parsnip}}, \code{\link[hardhat]{tune}}} + \item{hardhat}{\code{\link[hardhat:hardhat-extract]{extract_fit_engine}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_dials}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_set_dials}}, \code{\link[hardhat:hardhat-extract]{extract_spec_parsnip}}, \code{\link[hardhat]{frequency_weights}}, \code{\link[hardhat]{importance_weights}}, \code{\link[hardhat]{tune}}} \item{magrittr}{\code{\link[magrittr:pipe]{\%>\%}}} }} From f2d015976046716bf322452c8927de415ebb70f3 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 28 Mar 2022 14:35:25 -0400 Subject: [PATCH 17/41] changes based on reviewer feedback --- NAMESPACE | 5 +++++ R/arguments.R | 45 ++++++++++++++++++++++++++++++-------------- R/fit.R | 8 ++++---- R/grouped_binomial.R | 3 ++- man/glm_grouped.Rd | 2 +- 5 files changed, 43 insertions(+), 20 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 5236d3b7f..050ae20f0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -196,6 +196,7 @@ export(fit.model_spec) export(fit_control) export(fit_xy) export(fit_xy.model_spec) +export(frequency_weights) export(gen_additive_mod) export(get_dependency) export(get_encoding) @@ -204,7 +205,9 @@ export(get_from_env) export(get_model_env) export(get_pred_type) export(glance) +export(glm_grouped) export(has_multi_predict) +export(importance_weights) export(is_varying) export(keras_mlp) export(keras_predict_classes) @@ -321,6 +324,8 @@ importFrom(hardhat,extract_fit_engine) importFrom(hardhat,extract_parameter_dials) importFrom(hardhat,extract_parameter_set_dials) importFrom(hardhat,extract_spec_parsnip) +importFrom(hardhat,frequency_weights) +importFrom(hardhat,importance_weights) importFrom(hardhat,tune) importFrom(magrittr,"%>%") importFrom(purrr,"%||%") diff --git a/R/arguments.R b/R/arguments.R index 5136a06d9..02a1c302a 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -151,13 +151,33 @@ make_form_call <- function(object, env = NULL) { fit_args <- object$method$fit$args uses_weights <- has_weights(env) - # Get the arguments related to data: + # In model specification code using `set_fit()`, there are two main arguments + # that dictate the data-related model arguments (e.g. 'formula', 'data', 'x', + # etc). + # The 'protect' element specifies which data arguments should not be modifiable + # by the user (as an engine argument). These have standardized names that + # follow the usual R conventions. For example, `foo(formula, data, weights)` + # and so on. + # However, some packages do not follow these naming conventions. The 'data' + # element in `set_fit()` allows use to have non-standard argument names by + # providing a named list. If function `bar(f, dat, wts)` was being used, the + # 'data' element would be `c(formula = "f", data = "dat", weights = "wts)`. + # If conventional names are used, there is no 'data' element since the values + # in 'protect' suffice. + + # Get the arguments related to data arguments to insert into the model call + + # Do we have conventional argument names? if (is.null(object$method$fit$data)) { - data_args <- c(formula = "formula", data = "data") - if (uses_weights) { - data_args["weights"] <- "weights" + # Set the minimum arguments for formula methods. + data_args <- object$method$fit$protect + names(data_args) <- data_args + # Case weights _could_ be used but remove the arg if they are not given: + if (!uses_weights) { + data_args <- data_args[data_args != "weights"] } } else { + # What are the non-conventional names? data_args <- object$method$fit$data } @@ -169,12 +189,6 @@ make_form_call <- function(object, env = NULL) { # sub in actual formula fit_args[[ unname(data_args["formula"]) ]] <- env$formula - # Add in case weights symbol - if (uses_weights) { - fit_args[[ unname(data_args["weights"]) ]] <- rlang::expr(weights) - } - - # TODO remove weights col from data? if (object$engine == "spark") { env$x <- env$data @@ -193,11 +207,14 @@ make_xy_call <- function(object, target, env) { fit_args <- object$method$fit$args uses_weights <- has_weights(env) - # Get the arguments related to data: + # See the comments above in make_form_call() + if (is.null(object$method$fit$data)) { - data_args <- c(x = "x", y = "y") - if (uses_weights) { - data_args["weights"] <- "weights" + data_args <- object$method$fit$protect + names(data_args) <- data_args + # Case weights _could_ be used but remove the arg if they are not given: + if (!uses_weights) { + data_args <- data_args[data_args != "weights"] } } else { data_args <- object$method$fit$data diff --git a/R/fit.R b/R/fit.R index 10f690b08..eba7c40ef 100644 --- a/R/fit.R +++ b/R/fit.R @@ -153,10 +153,10 @@ fit.model_spec <- # for this, we create a child of the `formula`'s environment and # stash the `weights` there with the expected name and then # reassign this as the `formula`'s environment - fenv <- rlang::env_clone(environment(formula)) - fenv$data <- data - fenv$weights <- wts - environment(formula) <- fenv + environment(formula) <- rlang::new_environment( + data = list(data = data, weights = wts), + parent = environment(formula) + ) eval_env$data <- data eval_env$formula <- formula diff --git a/R/grouped_binomial.R b/R/grouped_binomial.R index a1e7ffba1..b3c7b12ca 100644 --- a/R/grouped_binomial.R +++ b/R/grouped_binomial.R @@ -73,6 +73,7 @@ #' #' # The new function that starts with frequency weights and gets the correct place: #' glm_grouped(Admit ~ Gender + Dept, data = ucb_weighted, weights = ucb_weighted$Freq) +#' @export glm_grouped <- function(formula, data, weights, ...) { opts <- list(...) # We'll set binomial @@ -81,7 +82,7 @@ glm_grouped <- function(formula, data, weights, ...) { } if (is.null(weights) || !is.numeric(weights)) { - rlang::stop("'weights' should be an integer vector.") + rlang::abort("'weights' should be an integer vector.") } if (!is.integer(weights)) { weights <- as.integer(weights) diff --git a/man/glm_grouped.Rd b/man/glm_grouped.Rd index efef8131b..f4b33494d 100644 --- a/man/glm_grouped.Rd +++ b/man/glm_grouped.Rd @@ -46,12 +46,12 @@ head(ucb_weighted) nrow(ucb_weighted) # Format when yes/no data are in individual rows (probably still inappropriate) +library(tidyr) ucb_long <- uncount(ucb_weighted, Freq) head(ucb_long) nrow(ucb_long) # Format where the outcome is formatted as number of events -library(tidyr) ucb_events <- ucb_weighted \%>\% tidyr::pivot_wider( From 95cd8cd12e75825c903a6db5b005eb5922990ab1 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 28 Mar 2022 14:35:36 -0400 Subject: [PATCH 18/41] test non-standard argument names --- tests/testthat/test-case-weights.R | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-case-weights.R b/tests/testthat/test-case-weights.R index 6b4f8f616..30fec4b5d 100644 --- a/tests/testthat/test-case-weights.R +++ b/tests/testthat/test-case-weights.R @@ -3,7 +3,6 @@ test_that('case weights with xy method', { skip_if_not_installed("C50") skip_if_not_installed("modeldata") - library(hardhat) data("two_class_dat", package = "modeldata") wts <- runif(nrow(two_class_dat)) @@ -28,6 +27,32 @@ test_that('case weights with xy method', { }) +test_that('case weights with xy method - non-standard argument names', { + + skip_if_not_installed("ranger") + skip_if_not_installed("modeldata") + data("two_class_dat", package = "modeldata") + + wts <- runif(nrow(two_class_dat)) + wts <- ifelse(wts < 1/5, 0, 1) + two_class_subset <- two_class_dat[wts != 0, ] + wts <- importance_weights(wts) + + expect_error({ + set.seed(1) + rf_wt_fit <- + rand_forest(trees = 5) %>% + set_mode("classification") %>% + fit(Class ~ ., data = two_class_dat, case_weights = wts) + }, + regexp = NA) + + expect_output( + print(rf_wt_fit$fit$call), + "case\\.weights = weights" + ) +}) + test_that('case weights with formula method', { skip_if_not_installed("modeldata") From 2929d532e98c12b35f7efd10b828b830588c88ec Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 28 Mar 2022 15:03:48 -0400 Subject: [PATCH 19/41] temp bypass for r-devel --- tests/testthat/test-case-weights.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-case-weights.R b/tests/testthat/test-case-weights.R index 30fec4b5d..4f83d9976 100644 --- a/tests/testthat/test-case-weights.R +++ b/tests/testthat/test-case-weights.R @@ -47,10 +47,10 @@ test_that('case weights with xy method - non-standard argument names', { }, regexp = NA) - expect_output( - print(rf_wt_fit$fit$call), - "case\\.weights = weights" - ) + # expect_output( + # print(rf_wt_fit$fit$call), + # "case\\.weights = weights" + # ) }) test_that('case weights with formula method', { From 2100e8f879a349afc8158ff9dfa21e44e9268ee0 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 29 Mar 2022 12:02:36 -0400 Subject: [PATCH 20/41] pass case weights to xgboost --- R/boost_tree.R | 38 ++++++++++++++++++++++++++++---------- R/boost_tree_data.R | 4 ++-- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/R/boost_tree.R b/R/boost_tree.R index 86644820c..c056cc9f9 100644 --- a/R/boost_tree.R +++ b/R/boost_tree.R @@ -260,7 +260,7 @@ xgb_train <- function( max_depth = 6, nrounds = 15, eta = 0.3, colsample_bynode = NULL, colsample_bytree = NULL, min_child_weight = 1, gamma = 0, subsample = 1, validation = 0, early_stop = NULL, objective = NULL, counts = TRUE, - event_level = c("first", "second"), ...) { + event_level = c("first", "second"), weights = NULL, ...) { event_level <- rlang::arg_match(event_level, c("first", "second")) others <- list(...) @@ -295,7 +295,11 @@ xgb_train <- function( n <- nrow(x) p <- ncol(x) - x <- as_xgb_data(x, y, validation, event_level) + x <- + as_xgb_data(x, y, + validation = validation, + event_level = event_level, + weights = weights) if (!is.numeric(subsample) || subsample < 0 || subsample > 1) { @@ -401,7 +405,7 @@ xgb_pred <- function(object, newdata, ...) { } -as_xgb_data <- function(x, y, validation = 0, event_level = "first", ...) { +as_xgb_data <- function(x, y, validation = 0, weights = NULL, event_level = "first", ...) { lvls <- levels(y) n <- nrow(x) @@ -424,22 +428,36 @@ as_xgb_data <- function(x, y, validation = 0, event_level = "first", ...) { if (!inherits(x, "xgb.DMatrix")) { if (validation > 0) { + # Split data m <- floor(n * (1 - validation)) + 1 trn_index <- sample(1:n, size = max(m, 2)) - wlist <- - list(validation = xgboost::xgb.DMatrix(x[-trn_index, ], label = y[-trn_index], missing = NA)) - dat <- xgboost::xgb.DMatrix(x[trn_index, ], label = y[trn_index], missing = NA) + val_data <- xgboost::xgb.DMatrix(x[-trn_index,], label = y[-trn_index], missing = NA) + watch_list <- list(validation = val_data) + + info_list <- list(label = y[trn_index]) + if (!is.null(weights)) { + info_list$weight <- weights[trn_index] + } + dat <- xgboost::xgb.DMatrix(x[trn_index,], missing = NA, info = info_list) + } else { - dat <- xgboost::xgb.DMatrix(x, label = y, missing = NA) - wlist <- list(training = dat) + info_list <- list(label = y) + if (!is.null(weights)) { + info_list$weight <- weights + } + dat <- xgboost::xgb.DMatrix(x, missing = NA, info = info_list) + watch_list <- list(training = dat) } } else { dat <- xgboost::setinfo(x, "label", y) - wlist <- list(training = dat) + if (!is.null(weights)) { + dat <- xgboost::setinfo(x, "weight", weights) + } + watch_list <- list(training = dat) } - list(data = dat, watchlist = wlist) + list(data = dat, watchlist = watch_list) } get_event_level <- function(model_spec){ diff --git a/R/boost_tree_data.R b/R/boost_tree_data.R index e7f7e9802..5bf17b164 100644 --- a/R/boost_tree_data.R +++ b/R/boost_tree_data.R @@ -82,7 +82,7 @@ set_fit( mode = "regression", value = list( interface = "matrix", - protect = c("x", "y"), + protect = c("x", "y", "weights"), func = c(pkg = "parsnip", fun = "xgb_train"), defaults = list(nthread = 1, verbose = 0) ) @@ -132,7 +132,7 @@ set_fit( mode = "classification", value = list( interface = "matrix", - protect = c("x", "y"), + protect = c("x", "y", "weights"), func = c(pkg = "parsnip", fun = "xgb_train"), defaults = list(nthread = 1, verbose = 0) ) From 99d1bc30efb8637d7099825e89a5dab744c074e1 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 29 Mar 2022 14:05:51 -0400 Subject: [PATCH 21/41] update tests for xgboost/boost_tree args --- man/xgb_train.Rd | 1 + tests/testthat/test_boost_tree.R | 94 ------------------------ tests/testthat/test_boost_tree_xgboost.R | 75 ++++++++++++++++--- 3 files changed, 65 insertions(+), 105 deletions(-) diff --git a/man/xgb_train.Rd b/man/xgb_train.Rd index 9b963ad11..9d602b560 100644 --- a/man/xgb_train.Rd +++ b/man/xgb_train.Rd @@ -20,6 +20,7 @@ xgb_train( objective = NULL, counts = TRUE, event_level = c("first", "second"), + weights = NULL, ... ) } diff --git a/tests/testthat/test_boost_tree.R b/tests/testthat/test_boost_tree.R index d5668e819..ccc358a0b 100644 --- a/tests/testthat/test_boost_tree.R +++ b/tests/testthat/test_boost_tree.R @@ -11,100 +11,6 @@ hpc <- hpc_data[1:150, c(2:5, 8)] # ------------------------------------------------------------------------------ -test_that('primary arguments', { - basic <- boost_tree(mode = "classification") - basic_xgboost <- translate(basic %>% set_engine("xgboost")) - basic_C5.0 <- translate(basic %>% set_engine("C5.0")) - expect_equal(basic_xgboost$method$fit$args, - list( - x = expr(missing_arg()), - y = expr(missing_arg()), - nthread = 1, - verbose = 0 - ) - ) - expect_equal(basic_C5.0$method$fit$args, - list( - x = expr(missing_arg()), - y = expr(missing_arg()), - weights = expr(missing_arg()) - ) - ) - - trees <- boost_tree(trees = 15, mode = "classification") - trees_C5.0 <- translate(trees %>% set_engine("C5.0")) - trees_xgboost <- translate(trees %>% set_engine("xgboost")) - expect_equal(trees_C5.0$method$fit$args, - list( - x = expr(missing_arg()), - y = expr(missing_arg()), - weights = expr(missing_arg()), - trials = new_empty_quosure(15) - ) - ) - expect_equal(trees_xgboost$method$fit$args, - list( - x = expr(missing_arg()), - y = expr(missing_arg()), - nrounds = new_empty_quosure(15), - nthread = 1, - verbose = 0 - ) - ) - - split_num <- boost_tree(min_n = 15, mode = "classification") - split_num_C5.0 <- translate(split_num %>% set_engine("C5.0")) - split_num_xgboost <- translate(split_num %>% set_engine("xgboost")) - expect_equal(split_num_C5.0$method$fit$args, - list( - x = expr(missing_arg()), - y = expr(missing_arg()), - weights = expr(missing_arg()), - minCases = new_empty_quosure(15) - ) - ) - expect_equal(split_num_xgboost$method$fit$args, - list( - x = expr(missing_arg()), - y = expr(missing_arg()), - min_child_weight = new_empty_quosure(15), - nthread = 1, - verbose = 0 - ) - ) - -}) - -test_that('engine arguments', { - xgboost_print <- boost_tree(mode = "regression") - expect_equal( - translate( - xgboost_print %>% - set_engine("xgboost", print_every_n = 10L))$method$fit$args, - list( - x = expr(missing_arg()), - y = expr(missing_arg()), - print_every_n = new_empty_quosure(10L), - nthread = 1, - verbose = 0 - ) - ) - - C5.0_rules <- boost_tree(mode = "classification") - expect_equal( - translate( - C5.0_rules %>% set_engine("C5.0", rules = TRUE))$method$fit$args, - list( - x = expr(missing_arg()), - y = expr(missing_arg()), - weights = expr(missing_arg()), - rules = new_empty_quosure(TRUE) - ) - ) - -}) - - test_that('updating', { expr1 <- boost_tree() %>% set_engine("xgboost", verbose = 0) expr1_exp <- boost_tree(trees = 10) %>% set_engine("xgboost", verbose = 0) diff --git a/tests/testthat/test_boost_tree_xgboost.R b/tests/testthat/test_boost_tree_xgboost.R index 0901835ea..00caf79ea 100644 --- a/tests/testthat/test_boost_tree_xgboost.R +++ b/tests/testthat/test_boost_tree_xgboost.R @@ -19,27 +19,64 @@ test_that('xgboost execution, classification', { skip_if_not_installed("xgboost") - expect_error( - res <- parsnip::fit( + set.seed(1) + wts <- ifelse(runif(nrow(hpc)) < .1, 0, 1) + wts <- importance_weights(wts) + + expect_error({ + set.seed(1) + res_f <- parsnip::fit( hpc_xgboost, class ~ compounds + input_fields, data = hpc, control = ctrl - ), - regexp = NA + ) + }, + regexp = NA ) - expect_error( - res <- parsnip::fit_xy( + expect_error({ + set.seed(1) + res_xy <- parsnip::fit_xy( hpc_xgboost, - x = hpc[, num_pred], + x = hpc[, c("compounds", "input_fields")], y = hpc$class, control = ctrl - ), - regexp = NA + ) + }, + regexp = NA + ) + expect_error({ + set.seed(1) + res_f_wts <- parsnip::fit( + hpc_xgboost, + class ~ compounds + input_fields, + data = hpc, + control = ctrl, + case_weights = wts + ) + }, + regexp = NA + ) + expect_error({ + set.seed(1) + res_xy_wts <- parsnip::fit_xy( + hpc_xgboost, + x = hpc[, c("compounds", "input_fields")], + y = hpc$class, + control = ctrl, + case_weights = wts + ) + }, + regexp = NA ) - expect_true(has_multi_predict(res)) - expect_equal(multi_predict_args(res), "trees") + expect_equal(res_f$fit$evaluation_log, res_xy$fit$evaluation_log) + expect_equal(res_f_wts$fit$evaluation_log, res_xy_wts$fit$evaluation_log) + # Check to see if the case weights had an effect + expect_true(!isTRUE(all.equal(res_f$fit$evaluation_log, res_f_wts$fit$evaluation_log))) + + expect_true(has_multi_predict(res_xy)) + expect_equal(multi_predict_args(res_xy), "trees") expect_error( res <- parsnip::fit( @@ -312,6 +349,7 @@ test_that('xgboost data conversion', { mtcar_x <- mtcars[, -1] mtcar_mat <- as.matrix(mtcar_x) mtcar_smat <- Matrix::Matrix(mtcar_mat, sparse = TRUE) + wts <- 1:32 expect_error(from_df <- parsnip:::as_xgb_data(mtcar_x, mtcars$mpg), regexp = NA) expect_true(inherits(from_df$data, "xgb.DMatrix")) @@ -352,6 +390,13 @@ test_that('xgboost data conversion', { expect_warning(from_df <- parsnip:::as_xgb_data(mtcar_x, mtcars_y, event_level = "second"), regexp = "`event_level` can only be set for binary variables.") + # case weights added + expect_error(wted <- parsnip:::as_xgb_data(mtcar_x, mtcars$mpg, weights = wts), regexp = NA) + expect_equal(wts, xgboost::getinfo(wted$data, "weight")) + expect_error(wted_val <- parsnip:::as_xgb_data(mtcar_x, mtcars$mpg, weights = wts, validation = 1/4), regexp = NA) + expect_true(all(xgboost::getinfo(wted_val$data, "weight") %in% wts)) + expect_null(xgboost::getinfo(wted_val$watchlist$validation, "weight")) + }) @@ -361,6 +406,7 @@ test_that('xgboost data and sparse matrices', { mtcar_x <- mtcars[, -1] mtcar_mat <- as.matrix(mtcar_x) mtcar_smat <- Matrix::Matrix(mtcar_mat, sparse = TRUE) + wts <- 1:32 xgb_spec <- boost_tree(trees = 10) %>% @@ -377,6 +423,13 @@ test_that('xgboost data and sparse matrices', { expect_equal(from_df$fit, from_mat$fit) expect_equal(from_df$fit, from_sparse$fit) + # case weights added + expect_error(wted <- parsnip:::as_xgb_data(mtcar_smat, mtcars$mpg, weights = wts), regexp = NA) + expect_equal(wts, xgboost::getinfo(wted$data, "weight")) + expect_error(wted_val <- parsnip:::as_xgb_data(mtcar_smat, mtcars$mpg, weights = wts, validation = 1/4), regexp = NA) + expect_true(all(xgboost::getinfo(wted_val$data, "weight") %in% wts)) + expect_null(xgboost::getinfo(wted_val$watchlist$validation, "weight")) + }) From fe2b18474f84b11d7246e8e0e75c33a1ec332dc5 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 29 Mar 2022 16:05:54 -0400 Subject: [PATCH 22/41] add case weight summary to show_model_info() --- NEWS.md | 6 ++++++ R/aaa_models.R | 20 ++++++++++++++++++-- R/parsnip-package.R | 2 +- 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index 6235be5f0..9e845774b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,11 @@ # parsnip (development version) +* Enable the use of case weights for models that support them. + +* Added a `glm_grouped()` function to convert long data to the grouped format required by `glm()` for logistic regression. + +* `show_model_info()` now indicates which models can utilize case weights. + # parsnip 0.2.1 * Fixed a major bug in spark models induced in the previous version (#671). diff --git a/R/aaa_models.R b/R/aaa_models.R index 9380f9afe..c374a16ab 100644 --- a/R/aaa_models.R +++ b/R/aaa_models.R @@ -933,8 +933,24 @@ show_model_info <- function(model) { engines <- get_from_env(model) if (nrow(engines) > 0) { cat(" engines: \n") - engines %>% + + weight_info <- + purrr::map_df( + model, + ~ get_from_env(paste0(.x, "_fit")) %>% mutate(model = .x) + ) %>% + dplyr::mutate(protect = map(value, ~ .x$protect)) %>% + dplyr::select(-value) %>% dplyr::mutate( + has_wts = purrr::map_lgl(protect, ~ any(grepl("^weight", .x))), + has_wts = ifelse(has_wts, cli::symbol$sup_1, "") + ) %>% + dplyr::select(engine, mode, has_wts) + + engines %>% + dplyr::left_join(weight_info, by = c("engine", "mode")) %>% + dplyr::mutate( + engine = paste0(engine, has_wts), mode = format(paste0(mode, ": ")) ) %>% dplyr::group_by(mode) %>% @@ -947,7 +963,7 @@ show_model_info <- function(model) { dplyr::ungroup() %>% dplyr::pull(lab) %>% cat(sep = "") - cat("\n") + cat("\n", cli::symbol$sup_1, "The model can use case weights.\n\n", sep = "") } else { cat(" no registered engines.\n\n") } diff --git a/R/parsnip-package.R b/R/parsnip-package.R index ead9ff282..e1111579d 100644 --- a/R/parsnip-package.R +++ b/R/parsnip-package.R @@ -41,7 +41,7 @@ utils::globalVariables( "sub_neighbors", ".pred_class", "x", "y", "predictor_indicators", "compute_intercept", "remove_intercept", "estimate", "term", "call_info", "component", "component_id", "func", "tunable", "label", - "pkg", ".order", "item", "tunable", "has_ext", "weights" + "pkg", ".order", "item", "tunable", "has_ext", "weights", "has_wts", "protect" ) ) From a4facca79866f4b4c067bf67c7cc771d6532408f Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Wed, 30 Mar 2022 08:13:08 -0400 Subject: [PATCH 23/41] added glm_grouped to pkgdown --- _pkgdown.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/_pkgdown.yml b/_pkgdown.yml index 6c6c6d2f8..b1862418a 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -45,6 +45,7 @@ reference: - discrim_quad - discrim_regularized - gen_additive_mod + - glm_grouped - linear_reg - logistic_reg - mars From 48c30be7ea097cc0a89fb104ce8d9a4ac4d8f13d Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Wed, 30 Mar 2022 10:34:32 -0400 Subject: [PATCH 24/41] more unit tests --- tests/testthat/test_grouped_glm.R | 18 +++++ tests/testthat/test_model_basics.R | 104 +++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 tests/testthat/test_grouped_glm.R create mode 100644 tests/testthat/test_model_basics.R diff --git a/tests/testthat/test_grouped_glm.R b/tests/testthat/test_grouped_glm.R new file mode 100644 index 000000000..2d47961d5 --- /dev/null +++ b/tests/testthat/test_grouped_glm.R @@ -0,0 +1,18 @@ +context("grouped logistic regression") +library(tidyr) + +test_that('correct results for glm_grouped()', { + ucb_weighted <- as.data.frame(UCBAdmissions) + ucb_weighted$Freq <- as.integer(ucb_weighted$Freq) + + ucb_long <- uncount(ucb_weighted, Freq) + + ungrouped <- glm(Admit ~ Gender + Dept, data = ucb_long, family = binomial) + + expect_error( + grouped <- glm_grouped(Admit ~ Gender + Dept, data = ucb_weighted, weights = ucb_weighted$Freq), + regexp = NA + ) + expect_equal(grouped$df.null, 11) + +}) diff --git a/tests/testthat/test_model_basics.R b/tests/testthat/test_model_basics.R new file mode 100644 index 000000000..bd79f8bcb --- /dev/null +++ b/tests/testthat/test_model_basics.R @@ -0,0 +1,104 @@ +context("basic model tests") + +test_that('basic object classes and print methods', { + expect_output(print(bag_mars()), 'Specification') + + expect_output(print(bag_tree()), 'Specification') + + expect_output(print(bart()), 'Specification') + expect_true(inherits(bart(engine = 'dbarts'), 'bart')) + + expect_output(print(boost_tree()), 'Specification') + expect_true(inherits(boost_tree(engine = 'C5.0'), 'boost_tree')) + expect_true(inherits(boost_tree(engine = 'spark'), 'boost_tree')) + expect_true(inherits(boost_tree(engine = 'xgboost'), 'boost_tree')) + + expect_output(print(C5_rules()), 'Specification') + + expect_output(print(cubist_rules()), 'Specification') + + expect_output(print(decision_tree()), 'Specification') + expect_true(inherits(decision_tree(engine = 'C5.0'), 'decision_tree')) + expect_true(inherits(decision_tree(engine = 'rpart'), 'decision_tree')) + expect_true(inherits(decision_tree(engine = 'spark'), 'decision_tree')) + + expect_output(print(discrim_flexible()), 'Specification') + + expect_output(print(discrim_linear()), 'Specification') + + expect_output(print(discrim_quad()), 'Specification') + + expect_output(print(discrim_regularized()), 'Specification') + + expect_output(print(gen_additive_mod()), 'Specification') + expect_true(inherits(gen_additive_mod(engine = 'mgcv'), 'gen_additive_mod')) + + expect_output(print(linear_reg()), 'Specification') + expect_true(inherits(linear_reg(engine = 'brulee'), 'linear_reg')) + expect_true(inherits(linear_reg(engine = 'glm'), 'linear_reg')) + expect_true(inherits(linear_reg(engine = 'glmnet'), 'linear_reg')) + expect_true(inherits(linear_reg(engine = 'keras'), 'linear_reg')) + expect_true(inherits(linear_reg(engine = 'lm'), 'linear_reg')) + expect_true(inherits(linear_reg(engine = 'spark'), 'linear_reg')) + expect_true(inherits(linear_reg(engine = 'stan'), 'linear_reg')) + + expect_output(print(logistic_reg()), 'Specification') + expect_true(inherits(logistic_reg(engine = 'brulee'), 'logistic_reg')) + expect_true(inherits(logistic_reg(engine = 'glm'), 'logistic_reg')) + expect_true(inherits(logistic_reg(engine = 'glmnet'), 'logistic_reg')) + expect_true(inherits(logistic_reg(engine = 'keras'), 'logistic_reg')) + expect_true(inherits(logistic_reg(engine = 'LiblineaR'), 'logistic_reg')) + expect_true(inherits(logistic_reg(engine = 'spark'), 'logistic_reg')) + expect_true(inherits(logistic_reg(engine = 'stan'), 'logistic_reg')) + + expect_output(print(mars()), 'Specification') + expect_true(inherits(mars(engine = 'earth'), 'mars')) + + expect_output(print(mlp()), 'Specification') + expect_true(inherits(mlp(engine = 'brulee'), 'mlp')) + expect_true(inherits(mlp(engine = 'keras'), 'mlp')) + expect_true(inherits(mlp(engine = 'nnet'), 'mlp')) + + expect_output(print(multinom_reg()), 'Specification') + expect_true(inherits(multinom_reg(engine = 'brulee'), 'multinom_reg')) + expect_true(inherits(multinom_reg(engine = 'glmnet'), 'multinom_reg')) + expect_true(inherits(multinom_reg(engine = 'keras'), 'multinom_reg')) + expect_true(inherits(multinom_reg(engine = 'nnet'), 'multinom_reg')) + expect_true(inherits(multinom_reg(engine = 'spark'), 'multinom_reg')) + + expect_output(print(naive_Bayes()), 'Specification') + + expect_output(print(nearest_neighbor()), 'Specification') + expect_true(inherits(nearest_neighbor(engine = 'kknn'), 'nearest_neighbor')) + + expect_output(print(null_model()), 'Specification') + expect_true(inherits(null_model(), 'null_model')) + + expect_output(print(pls()), 'Specification') + + expect_output(print(poisson_reg()), 'Specification') + + expect_output(print(proportional_hazards()), 'Specification') + + expect_output(print(rand_forest()), 'Specification') + expect_true(inherits(rand_forest(engine = 'randomForest'), 'rand_forest')) + expect_true(inherits(rand_forest(engine = 'ranger'), 'rand_forest')) + expect_true(inherits(rand_forest(engine = 'spark'), 'rand_forest')) + + expect_output(print(rule_fit()), 'Specification') + + expect_output(print(survival_reg()), 'Specification') + + expect_output(print(svm_linear()), 'Specification') + expect_true(inherits(svm_linear(engine = 'kernlab'), 'svm_linear')) + expect_true(inherits(svm_linear(engine = 'LiblineaR'), 'svm_linear')) + + expect_output(print(svm_poly()), 'Specification') + expect_true(inherits(svm_poly(engine = 'kernlab'), 'svm_poly')) + + expect_output(print(svm_rbf()), 'Specification') + expect_true(inherits(svm_rbf(engine = 'kernlab'), 'svm_rbf')) + expect_true(inherits(svm_rbf(engine = 'liquidSVM'), 'svm_rbf')) + +}) + From a2b1c1a86b9d2855b3dd394c22559066e10e43d0 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Wed, 30 Mar 2022 12:53:30 -0400 Subject: [PATCH 25/41] spark support for case weights --- R/fit.R | 4 +- R/fit_helpers.R | 5 ++- R/linear_reg_data.R | 4 +- R/logistic_reg_data.R | 4 +- R/misc.R | 2 +- R/multinom_reg_data.R | 4 +- tests/testthat/test_linear_reg.R | 45 ---------------------- tests/testthat/test_logistic_reg.R | 61 +----------------------------- 8 files changed, 14 insertions(+), 115 deletions(-) diff --git a/R/fit.R b/R/fit.R index eba7c40ef..245c2510d 100644 --- a/R/fit.R +++ b/R/fit.R @@ -144,7 +144,7 @@ fit.model_spec <- # used when a model call is made later. eval_env <- rlang::env() - wts <- weights_to_numeric(case_weights) + wts <- weights_to_numeric(case_weights, object) # `lm()` and `glm()` and others use the original model function call to # construct a call for `model.frame()`. That will normally fail because the @@ -272,7 +272,7 @@ fit_xy.model_spec <- eval_env <- rlang::env() eval_env$x <- x eval_env$y <- y - eval_env$weights <- weights_to_numeric(case_weights) + eval_env$weights <- weights_to_numeric(case_weights, object) # TODO case weights: pass in eval_env not individual elements fit_interface <- check_xy_interface(eval_env$x, eval_env$y, cl, object) diff --git a/R/fit_helpers.R b/R/fit_helpers.R index ba7cdcab3..9f0611419 100644 --- a/R/fit_helpers.R +++ b/R/fit_helpers.R @@ -201,9 +201,12 @@ xy_form <- function(object, env, control, ...) { } -weights_to_numeric <- function(x) { +weights_to_numeric <- function(x, spec) { if (is.null(x)) { return(NULL) + } else if (spec$engine == "spark") { + # Spark wants a column name + return(x) } to_int <- c("hardhat_frequency_weights") diff --git a/R/linear_reg_data.R b/R/linear_reg_data.R index 5f95b9bd4..7a7f23d29 100644 --- a/R/linear_reg_data.R +++ b/R/linear_reg_data.R @@ -407,8 +407,8 @@ set_fit( mode = "regression", value = list( interface = "formula", - data = c(formula = "formula", data = "x"), - protect = c("x", "formula", "weight_col"), + data = c(formula = "formula", data = "x", weights = "weight_col"), + protect = c("x", "formula", "weights"), func = c(pkg = "sparklyr", fun = "ml_linear_regression"), defaults = list() ) diff --git a/R/logistic_reg_data.R b/R/logistic_reg_data.R index 21eb48d2f..b38d9ec98 100644 --- a/R/logistic_reg_data.R +++ b/R/logistic_reg_data.R @@ -336,8 +336,8 @@ set_fit( mode = "classification", value = list( interface = "formula", - data = c(formula = "formula", data = "x"), - protect = c("x", "formula", "weight_col"), + data = c(formula = "formula", data = "x", weights = "weight_col"), + protect = c("x", "formula", "weights"), func = c(pkg = "sparklyr", fun = "ml_logistic_regression"), defaults = list( diff --git a/R/misc.R b/R/misc.R index 4bc7b4d31..155391cec 100644 --- a/R/misc.R +++ b/R/misc.R @@ -388,7 +388,7 @@ stan_conf_int <- function(object, newdata) { check_case_weights <- function(x, spec) { - if (is.null(x)) { + if (is.null(x) | spec$engine == "spark") { return(invisible(NULL)) } if (!hardhat::is_case_weights(x)) { diff --git a/R/multinom_reg_data.R b/R/multinom_reg_data.R index 96188f62c..96f75df5f 100644 --- a/R/multinom_reg_data.R +++ b/R/multinom_reg_data.R @@ -133,8 +133,8 @@ set_fit( mode = "classification", value = list( interface = "formula", - data = c(formula = "formula", data = "x"), - protect = c("x", "formula", "weight_col"), + data = c(formula = "formula", data = "x", weights = "weight_col"), + protect = c("x", "formula", "weights"), func = c(pkg = "sparklyr", fun = "ml_logistic_regression"), defaults = list(family = "multinomial") ) diff --git a/tests/testthat/test_linear_reg.R b/tests/testthat/test_linear_reg.R index fa99c8acd..b31606cb5 100644 --- a/tests/testthat/test_linear_reg.R +++ b/tests/testthat/test_linear_reg.R @@ -21,7 +21,6 @@ test_that('primary arguments', { "For the glmnet engine, `penalty` must be a single" ) basic_stan <- translate(basic %>% set_engine("stan")) - basic_spark <- translate(basic %>% set_engine("spark")) expect_equal(basic_lm$method$fit$args, list( formula = expr(missing_arg()), @@ -46,32 +45,15 @@ test_that('primary arguments', { refresh = 0 ) ) - expect_equal(basic_spark$method$fit$args, - list( - x = expr(missing_arg()), - formula = expr(missing_arg()), - weight_col = expr(missing_arg()) - ) - ) mixture <- linear_reg(mixture = 0.128) expect_error( mixture_glmnet <- translate(mixture %>% set_engine("glmnet")), "For the glmnet engine, `penalty` must be a single" ) - mixture_spark <- translate(mixture %>% set_engine("spark")) - expect_equal(mixture_spark$method$fit$args, - list( - x = expr(missing_arg()), - formula = expr(missing_arg()), - weight_col = expr(missing_arg()), - elastic_net_param = new_empty_quosure(0.128) - ) - ) penalty <- linear_reg(penalty = 1) penalty_glmnet <- translate(penalty %>% set_engine("glmnet")) - penalty_spark <- translate(penalty %>% set_engine("spark")) expect_equal(penalty_glmnet$method$fit$args, list( x = expr(missing_arg()), @@ -80,29 +62,12 @@ test_that('primary arguments', { family = "gaussian" ) ) - expect_equal(penalty_spark$method$fit$args, - list( - x = expr(missing_arg()), - formula = expr(missing_arg()), - weight_col = expr(missing_arg()), - reg_param = new_empty_quosure(1) - ) - ) mixture_v <- linear_reg(mixture = tune()) expect_error( mixture_v_glmnet <- translate(mixture_v %>% set_engine("glmnet")), "For the glmnet engine, `penalty` must be a single" ) - mixture_v_spark <- translate(mixture_v %>% set_engine("spark")) - expect_equal(mixture_v_spark$method$fit$args, - list( - x = expr(missing_arg()), - formula = expr(missing_arg()), - weight_col = expr(missing_arg()), - elastic_net_param = new_empty_quosure(tune()) - ) - ) }) @@ -151,16 +116,6 @@ test_that('engine arguments', { ) ) - spark_iter <- linear_reg() %>% set_engine("spark", max_iter = 20) - expect_equal(translate(spark_iter)$method$fit$args, - list( - x = expr(missing_arg()), - formula = expr(missing_arg()), - weight_col = expr(missing_arg()), - max_iter = new_empty_quosure(20) - ) - ) - # For issue #431 with_path <- linear_reg(penalty = 1) %>% diff --git a/tests/testthat/test_logistic_reg.R b/tests/testthat/test_logistic_reg.R index 99a6ace79..1b179407c 100644 --- a/tests/testthat/test_logistic_reg.R +++ b/tests/testthat/test_logistic_reg.R @@ -22,7 +22,6 @@ test_that('primary arguments', { ) basic_liblinear <- translate(basic %>% set_engine("LiblineaR")) basic_stan <- translate(basic %>% set_engine("stan")) - basic_spark <- translate(basic %>% set_engine("spark")) expect_equal(basic_glm$method$fit$args, list( formula = expr(missing_arg()), @@ -47,35 +46,16 @@ test_that('primary arguments', { refresh = 0 ) ) - expect_equal(basic_spark$method$fit$args, - list( - x = expr(missing_arg()), - formula = expr(missing_arg()), - weight_col = expr(missing_arg()), - family = "binomial" - ) - ) mixture <- logistic_reg(mixture = 0.128) expect_error( mixture_glmnet <- translate(mixture %>% set_engine("glmnet")), "For the glmnet engine, `penalty` must be a single" ) - mixture_spark <- translate(mixture %>% set_engine("spark")) - expect_equal(mixture_spark$method$fit$args, - list( - x = expr(missing_arg()), - formula = expr(missing_arg()), - weight_col = expr(missing_arg()), - elastic_net_param = new_empty_quosure(0.128), - family = "binomial" - ) - ) penalty <- logistic_reg(penalty = 1) penalty_glmnet <- translate(penalty %>% set_engine("glmnet")) penalty_liblinear <- translate(penalty %>% set_engine("LiblineaR")) - penalty_spark <- translate(penalty %>% set_engine("spark")) expect_equal(penalty_glmnet$method$fit$args, list( x = expr(missing_arg()), @@ -92,15 +72,6 @@ test_that('primary arguments', { verbose = FALSE ) ) - expect_equal(penalty_spark$method$fit$args, - list( - x = expr(missing_arg()), - formula = expr(missing_arg()), - weight_col = expr(missing_arg()), - reg_param = new_empty_quosure(1), - family = "binomial" - ) - ) mixture_v <- logistic_reg(mixture = tune()) expect_error( @@ -108,7 +79,6 @@ test_that('primary arguments', { "For the glmnet engine, `penalty` must be a single" ) mixture_v_liblinear <- translate(mixture_v %>% set_engine("LiblineaR")) - mixture_v_spark <- translate(mixture_v %>% set_engine("spark")) expect_equal(mixture_v_liblinear$method$fit$args, list( x = expr(missing_arg()), @@ -117,20 +87,10 @@ test_that('primary arguments', { verbose = FALSE ) ) - expect_equal(mixture_v_spark$method$fit$args, - list( - x = expr(missing_arg()), - formula = expr(missing_arg()), - weight_col = expr(missing_arg()), - elastic_net_param = new_empty_quosure(tune()), - family = "binomial" - ) - ) penalty_v <- logistic_reg(penalty = 1) penalty_v_glmnet <- translate(penalty_v %>% set_engine("glmnet")) penalty_v_liblinear <- translate(penalty_v %>% set_engine("LiblineaR")) - penalty_v_spark <- translate(penalty_v %>% set_engine("spark")) expect_equal(penalty_v_glmnet$method$fit$args, list( x = expr(missing_arg()), @@ -147,15 +107,7 @@ test_that('primary arguments', { verbose = FALSE ) ) - expect_equal(penalty_v_spark$method$fit$args, - list( - x = expr(missing_arg()), - formula = expr(missing_arg()), - weight_col = expr(missing_arg()), - reg_param = new_empty_quosure(1), - family = "binomial" - ) - ) + }) @@ -211,17 +163,6 @@ test_that('engine arguments', { ) ) - spark_iter <- logistic_reg() - expect_equal( - translate(spark_iter %>% set_engine("spark", max_iter = 20))$method$fit$args, - list( - x = expr(missing_arg()), - formula = expr(missing_arg()), - weight_col = expr(missing_arg()), - max_iter = new_empty_quosure(20), - family = "binomial" - ) - ) # For issue #431 with_path <- From 118c09d79313db0b85f21afed1458ff2142bdb64 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Wed, 30 Mar 2022 13:44:58 -0400 Subject: [PATCH 26/41] updates to documentation for case weights --- R/case_weights.R | 72 ++++++++++++++++++++ R/fit_helpers.R | 41 ----------- man/case_weights.Rd | 35 ++++++++++ man/details_C5_rules_C5.0.Rd | 10 +++ man/details_bag_tree_C5.0.Rd | 10 +++ man/details_bag_tree_rpart.Rd | 3 +- man/details_boost_tree_C5.0.Rd | 10 +++ man/details_boost_tree_mboost.Rd | 2 +- man/details_boost_tree_spark.Rd | 13 ++++ man/details_boost_tree_xgboost.Rd | 18 ++--- man/details_decision_tree_C5.0.Rd | 10 +++ man/details_decision_tree_party.Rd | 4 +- man/details_decision_tree_rpart.Rd | 14 +++- man/details_decision_tree_spark.Rd | 13 ++++ man/details_linear_reg_glm.Rd | 13 ++++ man/details_linear_reg_glmnet.Rd | 10 +++ man/details_linear_reg_gls.Rd | 8 +-- man/details_linear_reg_lm.Rd | 27 ++++++++ man/details_linear_reg_lme.Rd | 10 +++ man/details_linear_reg_lmer.Rd | 12 +++- man/details_linear_reg_spark.Rd | 19 +++++- man/details_linear_reg_stan.Rd | 10 +++ man/details_linear_reg_stan_glmer.Rd | 10 +++ man/details_logistic_reg_LiblineaR.Rd | 4 +- man/details_logistic_reg_glm.Rd | 17 +++++ man/details_logistic_reg_glmer.Rd | 13 +++- man/details_logistic_reg_glmnet.Rd | 10 +++ man/details_logistic_reg_spark.Rd | 21 +++++- man/details_logistic_reg_stan.Rd | 10 +++ man/details_logistic_reg_stan_glmer.Rd | 10 +++ man/details_mars_earth.Rd | 10 +++ man/details_mlp_brulee.Rd | 2 +- man/details_mlp_nnet.Rd | 10 +++ man/details_multinom_reg_glmnet.Rd | 10 +++ man/details_multinom_reg_nnet.Rd | 10 +++ man/details_multinom_reg_spark.Rd | 21 +++++- man/details_poisson_reg_glm.Rd | 27 ++++++++ man/details_poisson_reg_glmer.Rd | 13 +++- man/details_poisson_reg_glmnet.Rd | 10 +++ man/details_poisson_reg_hurdle.Rd | 10 +++ man/details_poisson_reg_stan.Rd | 10 +++ man/details_poisson_reg_stan_glmer.Rd | 10 +++ man/details_poisson_reg_zeroinfl.Rd | 10 +++ man/details_proportional_hazards_glmnet.Rd | 21 +++--- man/details_proportional_hazards_survival.Rd | 12 +++- man/details_rand_forest_party.Rd | 2 +- man/details_rand_forest_ranger.Rd | 14 +++- man/details_rand_forest_spark.Rd | 13 ++++ man/details_survival_reg_survival.Rd | 10 +++ man/details_svm_linear_LiblineaR.Rd | 8 +-- man/rmd/C5_rules_C5.0.Rmd | 5 ++ man/rmd/C5_rules_C5.0.md | 7 ++ man/rmd/bag_tree_C5.0.Rmd | 5 ++ man/rmd/bag_tree_C5.0.md | 7 ++ man/rmd/bag_tree_rpart.md | 3 +- man/rmd/boost_tree_C5.0.Rmd | 5 ++ man/rmd/boost_tree_C5.0.md | 7 ++ man/rmd/boost_tree_mboost.md | 2 +- man/rmd/boost_tree_spark.Rmd | 7 ++ man/rmd/boost_tree_spark.md | 9 +++ man/rmd/boost_tree_xgboost.md | 18 ++--- man/rmd/decision_tree_C5.0.Rmd | 5 ++ man/rmd/decision_tree_C5.0.md | 7 ++ man/rmd/decision_tree_party.md | 4 +- man/rmd/decision_tree_rpart.Rmd | 5 ++ man/rmd/decision_tree_rpart.md | 11 ++- man/rmd/decision_tree_spark.Rmd | 7 ++ man/rmd/decision_tree_spark.md | 9 +++ man/rmd/linear_reg_glm.Rmd | 7 ++ man/rmd/linear_reg_glm.md | 9 +++ man/rmd/linear_reg_glmnet.Rmd | 5 ++ man/rmd/linear_reg_glmnet.md | 7 ++ man/rmd/linear_reg_gls.md | 8 +-- man/rmd/linear_reg_lm.Rmd | 9 +++ man/rmd/linear_reg_lm.md | 11 +++ man/rmd/linear_reg_lme.Rmd | 5 ++ man/rmd/linear_reg_lme.md | 7 ++ man/rmd/linear_reg_lmer.Rmd | 5 ++ man/rmd/linear_reg_lmer.md | 9 ++- man/rmd/linear_reg_spark.Rmd | 9 +++ man/rmd/linear_reg_spark.md | 13 +++- man/rmd/linear_reg_stan.Rmd | 5 ++ man/rmd/linear_reg_stan.md | 7 ++ man/rmd/linear_reg_stan_glmer.Rmd | 5 ++ man/rmd/linear_reg_stan_glmer.md | 7 ++ man/rmd/logistic-reg.md | 5 +- man/rmd/logistic_reg_LiblineaR.md | 4 +- man/rmd/logistic_reg_glm.Rmd | 7 ++ man/rmd/logistic_reg_glm.md | 9 +++ man/rmd/logistic_reg_glmer.Rmd | 5 ++ man/rmd/logistic_reg_glmer.md | 10 ++- man/rmd/logistic_reg_glmnet.Rmd | 5 ++ man/rmd/logistic_reg_glmnet.md | 7 ++ man/rmd/logistic_reg_spark.Rmd | 8 +++ man/rmd/logistic_reg_spark.md | 12 +++- man/rmd/logistic_reg_stan.Rmd | 5 ++ man/rmd/logistic_reg_stan.md | 7 ++ man/rmd/logistic_reg_stan_glmer.Rmd | 5 ++ man/rmd/logistic_reg_stan_glmer.md | 7 ++ man/rmd/mars_earth.Rmd | 5 ++ man/rmd/mars_earth.md | 7 ++ man/rmd/mlp_brulee.md | 4 +- man/rmd/mlp_nnet.Rmd | 5 ++ man/rmd/mlp_nnet.md | 7 ++ man/rmd/multinom_reg_glmnet.Rmd | 5 ++ man/rmd/multinom_reg_glmnet.md | 7 ++ man/rmd/multinom_reg_nnet.Rmd | 5 ++ man/rmd/multinom_reg_nnet.md | 7 ++ man/rmd/multinom_reg_spark.Rmd | 9 +++ man/rmd/multinom_reg_spark.md | 13 +++- man/rmd/poisson_reg_glm.Rmd | 16 +++++ man/rmd/poisson_reg_glm.md | 20 ++++++ man/rmd/poisson_reg_glmer.Rmd | 5 ++ man/rmd/poisson_reg_glmer.md | 10 ++- man/rmd/poisson_reg_glmnet.Rmd | 5 ++ man/rmd/poisson_reg_glmnet.md | 7 ++ man/rmd/poisson_reg_hurdle.Rmd | 5 ++ man/rmd/poisson_reg_hurdle.md | 7 ++ man/rmd/poisson_reg_stan.Rmd | 5 ++ man/rmd/poisson_reg_stan.md | 7 ++ man/rmd/poisson_reg_stan_glmer.Rmd | 5 ++ man/rmd/poisson_reg_stan_glmer.md | 7 ++ man/rmd/poisson_reg_zeroinfl.Rmd | 5 ++ man/rmd/poisson_reg_zeroinfl.md | 7 ++ man/rmd/proportional_hazards_glmnet.Rmd | 5 ++ man/rmd/proportional_hazards_survival.Rmd | 7 ++ man/rmd/proportional_hazards_survival.md | 11 ++- man/rmd/rand_forest_party.md | 2 +- man/rmd/rand_forest_ranger.Rmd | 5 ++ man/rmd/rand_forest_ranger.md | 11 ++- man/rmd/rand_forest_spark.Rmd | 8 +++ man/rmd/rand_forest_spark.md | 10 +++ man/rmd/survival_reg_survival.Rmd | 5 ++ man/rmd/survival_reg_survival.md | 7 ++ man/rmd/svm_linear_LiblineaR.md | 8 +-- man/rmd/template-no-case-weights.Rmd | 1 + man/rmd/template-uses-case-weights.Rmd | 3 + 137 files changed, 1177 insertions(+), 130 deletions(-) create mode 100644 R/case_weights.R create mode 100644 man/case_weights.Rd create mode 100644 man/rmd/template-no-case-weights.Rmd create mode 100644 man/rmd/template-uses-case-weights.Rmd diff --git a/R/case_weights.R b/R/case_weights.R new file mode 100644 index 000000000..eaabcf434 --- /dev/null +++ b/R/case_weights.R @@ -0,0 +1,72 @@ +#' Using case weights with parsnip +#' +#' Case weights are positive numeric values that influence how much each data +#' point has during the model fitting process. There are a variety of situations +#' where case weights can be used. +#' +#' tidymodels packages differentiate _how_ different types of case weights +#' should be used during the entire data analysis process, including +#' preprocessing data, model fitting, performance calculations, etc. +#' +#' The tidymodels packages require users to convert their numeric vectors to a +#' vector class that reflects how these should be used. For example, there are +#' some situations where the weights should not affect operations such as +#' centering and scaling or other preprocessing operations. +#' +#' The types of weights allowed in tidymodels are: +#' +#' * Frequency weights via [hardhat::frequency_weights()] +#' * Importance weights via [hardhat::importance_weights()] +#' +#' More types can be added by request. +#' +#' For parsnip, the [fit()] and [fit_xy] functions contain a `case_weight` +#' argument that takes these data. For Spark models, the argument value should +#' be a character value. +#' +#' @name case_weights +#' @seealso [frequency_weights()], [importance_weights()], [fit()], [fit_xy] +NULL + + +weights_to_numeric <- function(x, spec) { + if (is.null(x)) { + return(NULL) + } else if (spec$engine == "spark") { + # Spark wants a column name + return(x) + } + + to_int <- c("hardhat_frequency_weights") + if (inherits(x, to_int)) { + x <- as.integer(x) + } else { + x <- as.numeric(x) + } + x +} + +case_weights_allowed <- function(spec) { + mod_type <- class(spec)[1] + mod_eng <- spec$engine + mod_mode <- spec$mode + + model_info <- + get_from_env(paste0(mod_type, "_fit")) %>% + dplyr::filter(engine == mod_eng & mode == mod_mode) + if (nrow(model_info) != 1) { + rlang::abort( + glue::glue( + "Error in geting model information for model {mod_type} with engine {mod_eng} and mode {mod_mode}." + ) + ) + } + # If weights are used, they are protected data arguments with the canonical + # name 'weights' (although this may not be the model function's argument name). + data_args <- model_info$value[[1]]$protect + any(data_args == "weights") +} + +has_weights <- function(env) { + !is.null(env$weights) +} diff --git a/R/fit_helpers.R b/R/fit_helpers.R index 9f0611419..eae54f9b4 100644 --- a/R/fit_helpers.R +++ b/R/fit_helpers.R @@ -201,44 +201,3 @@ xy_form <- function(object, env, control, ...) { } -weights_to_numeric <- function(x, spec) { - if (is.null(x)) { - return(NULL) - } else if (spec$engine == "spark") { - # Spark wants a column name - return(x) - } - - to_int <- c("hardhat_frequency_weights") - if (inherits(x, to_int)) { - x <- as.integer(x) - } else { - x <- as.numeric(x) - } - x -} - -case_weights_allowed <- function(spec) { - mod_type <- class(spec)[1] - mod_eng <- spec$engine - mod_mode <- spec$mode - - model_info <- - get_from_env(paste0(mod_type, "_fit")) %>% - dplyr::filter(engine == mod_eng & mode == mod_mode) - if (nrow(model_info) != 1) { - rlang::abort( - glue::glue( - "Error in geting model information for model {mod_type} with engine {mod_eng} and mode {mod_mode}." - ) - ) - } - # If weights are used, they are protected data arguments with the canonical - # name 'weights' (although this may not be the model function's argument name). - data_args <- model_info$value[[1]]$protect - any(data_args == "weights") -} - -has_weights <- function(env) { - !is.null(env$weights) -} diff --git a/man/case_weights.Rd b/man/case_weights.Rd new file mode 100644 index 000000000..1ecb27706 --- /dev/null +++ b/man/case_weights.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/case_weights.R +\name{case_weights} +\alias{case_weights} +\title{Using case weights with parsnip} +\description{ +Case weights are positive numeric values that influence how much each data +point has during the model fitting process. There are a variety of situations +where case weights can be used. +} +\details{ +tidymodels packages differentiate \emph{how} different types of case weights +should be used during the entire data analysis process, including +preprocessing data, model fitting, performance calculations, etc. + +The tidymodels packages require users to convert their numeric vectors to a +vector class that reflects how these should be used. For example, there are +some situations where the weights should not affect operations such as +centering and scaling or other preprocessing operations. + +The types of weights allowed in tidymodels are: +\itemize{ +\item Frequency weights via \code{\link[hardhat:frequency_weights]{hardhat::frequency_weights()}} +\item Importance weights via \code{\link[hardhat:importance_weights]{hardhat::importance_weights()}} +} + +More types can be added by request. + +For parsnip, the \code{\link[=fit]{fit()}} and \link{fit_xy} functions contain a \code{case_weight} +argument that takes these data. For Spark models, the argument value should +be a character value. +} +\seealso{ +\code{\link[=frequency_weights]{frequency_weights()}}, \code{\link[=importance_weights]{importance_weights()}}, \code{\link[=fit]{fit()}}, \link{fit_xy} +} diff --git a/man/details_C5_rules_C5.0.Rd b/man/details_C5_rules_C5.0.Rd index 1c333e18c..d43aeaf1f 100644 --- a/man/details_C5_rules_C5.0.Rd +++ b/man/details_C5_rules_C5.0.Rd @@ -56,6 +56,16 @@ Categorical predictors can be partitioned into groups of factor levels are not required for this model. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{References}{ \itemize{ \item Quinlan R (1992). “Learning with Continuous Classes.” Proceedings of diff --git a/man/details_bag_tree_C5.0.Rd b/man/details_bag_tree_C5.0.Rd index 979567399..399822248 100644 --- a/man/details_bag_tree_C5.0.Rd +++ b/man/details_bag_tree_C5.0.Rd @@ -47,6 +47,16 @@ Categorical predictors can be partitioned into groups of factor levels are not required for this model. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{References}{ \itemize{ \item Breiman, L. 1996. “Bagging predictors”. Machine Learning. 24 (2): diff --git a/man/details_bag_tree_rpart.Rd b/man/details_bag_tree_rpart.Rd index 2503f64cc..fd1c5185b 100644 --- a/man/details_bag_tree_rpart.Rd +++ b/man/details_bag_tree_rpart.Rd @@ -96,7 +96,8 @@ bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1 ## ## Model fit template: ## ipred::bagging(formula = missing_arg(), data = missing_arg(), -## cp = double(1), maxdepth = integer(1), minsplit = integer(1)) +## weights = missing_arg(), cp = double(1), maxdepth = integer(1), +## minsplit = integer(1)) } } diff --git a/man/details_boost_tree_C5.0.Rd b/man/details_boost_tree_C5.0.Rd index 1acce68f4..1db0b8d5b 100644 --- a/man/details_boost_tree_C5.0.Rd +++ b/man/details_boost_tree_C5.0.Rd @@ -54,6 +54,16 @@ Categorical predictors can be partitioned into groups of factor levels are not required for this model. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{Other details}{ \subsection{Early stopping}{ diff --git a/man/details_boost_tree_mboost.Rd b/man/details_boost_tree_mboost.Rd index a26b5ed01..02a689ae2 100644 --- a/man/details_boost_tree_mboost.Rd +++ b/man/details_boost_tree_mboost.Rd @@ -40,7 +40,7 @@ boost_tree() \%>\% ## ## Model fit template: ## censored::blackboost_train(formula = missing_arg(), data = missing_arg(), -## family = mboost::CoxPH()) +## weights = missing_arg(), family = mboost::CoxPH()) } \code{censored::blackboost_train()} is a wrapper around diff --git a/man/details_boost_tree_spark.Rd b/man/details_boost_tree_spark.Rd index 7f2750d5a..13ae8c56c 100644 --- a/man/details_boost_tree_spark.Rd +++ b/man/details_boost_tree_spark.Rd @@ -98,6 +98,19 @@ Categorical predictors can be partitioned into groups of factor levels are not required for this model. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. + +Note that, for spark engines, the \code{case_weight} argument value should be +a character string to specify the column with the numeric case weights. +} + \subsection{Other details}{ For models created using the \code{"spark"} engine, there are several things diff --git a/man/details_boost_tree_xgboost.Rd b/man/details_boost_tree_xgboost.Rd index 2a9d69533..9e589055b 100644 --- a/man/details_boost_tree_xgboost.Rd +++ b/man/details_boost_tree_xgboost.Rd @@ -58,10 +58,11 @@ argument to \code{boost_tree()} as an integer (not a real number). ## Computational engine: xgboost ## ## Model fit template: -## parsnip::xgb_train(x = missing_arg(), y = missing_arg(), colsample_bynode = integer(), -## nrounds = integer(), min_child_weight = integer(), max_depth = integer(), -## eta = numeric(), gamma = numeric(), subsample = numeric(), -## early_stop = integer(), nthread = 1, verbose = 0) +## parsnip::xgb_train(x = missing_arg(), y = missing_arg(), weights = missing_arg(), +## colsample_bynode = integer(), nrounds = integer(), min_child_weight = integer(), +## max_depth = integer(), eta = numeric(), gamma = numeric(), +## subsample = numeric(), early_stop = integer(), nthread = 1, +## verbose = 0) } } @@ -88,10 +89,11 @@ argument to \code{boost_tree()} as an integer (not a real number). ## Computational engine: xgboost ## ## Model fit template: -## parsnip::xgb_train(x = missing_arg(), y = missing_arg(), colsample_bynode = integer(), -## nrounds = integer(), min_child_weight = integer(), max_depth = integer(), -## eta = numeric(), gamma = numeric(), subsample = numeric(), -## early_stop = integer(), nthread = 1, verbose = 0) +## parsnip::xgb_train(x = missing_arg(), y = missing_arg(), weights = missing_arg(), +## colsample_bynode = integer(), nrounds = integer(), min_child_weight = integer(), +## max_depth = integer(), eta = numeric(), gamma = numeric(), +## subsample = numeric(), early_stop = integer(), nthread = 1, +## verbose = 0) } \code{\link[=xgb_train]{xgb_train()}} is a wrapper around diff --git a/man/details_decision_tree_C5.0.Rd b/man/details_decision_tree_C5.0.Rd index b4af7ee0b..61a3b61e6 100644 --- a/man/details_decision_tree_C5.0.Rd +++ b/man/details_decision_tree_C5.0.Rd @@ -45,6 +45,16 @@ Categorical predictors can be partitioned into groups of factor levels are not required for this model. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{Examples}{ The “Fitting and Predicting with parsnip” article contains diff --git a/man/details_decision_tree_party.Rd b/man/details_decision_tree_party.Rd index be7a55bf2..cf17577f6 100644 --- a/man/details_decision_tree_party.Rd +++ b/man/details_decision_tree_party.Rd @@ -45,8 +45,8 @@ decision_tree(tree_depth = integer(1), min_n = integer(1)) \%>\% ## ## Model fit template: ## censored::cond_inference_surv_ctree(formula = missing_arg(), -## data = missing_arg(), maxdepth = integer(1), minsplit = min_rows(0L, -## data)) +## data = missing_arg(), weights = missing_arg(), maxdepth = integer(1), +## minsplit = min_rows(0L, data)) } \code{censored::cond_inference_surv_ctree()} is a wrapper around diff --git a/man/details_decision_tree_rpart.Rd b/man/details_decision_tree_rpart.Rd index 04108769b..1956b8198 100644 --- a/man/details_decision_tree_rpart.Rd +++ b/man/details_decision_tree_rpart.Rd @@ -84,8 +84,8 @@ decision_tree( ## ## Model fit template: ## pec::pecRpart(formula = missing_arg(), data = missing_arg(), -## cp = double(1), maxdepth = integer(1), minsplit = min_rows(0L, -## data)) +## weights = missing_arg(), cp = double(1), maxdepth = integer(1), +## minsplit = min_rows(0L, data)) } } @@ -97,6 +97,16 @@ Categorical predictors can be partitioned into groups of factor levels are not required for this model. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{Examples}{ The “Fitting and Predicting with parsnip” article contains diff --git a/man/details_decision_tree_spark.Rd b/man/details_decision_tree_spark.Rd index c333f8f34..c7cd715b3 100644 --- a/man/details_decision_tree_spark.Rd +++ b/man/details_decision_tree_spark.Rd @@ -64,6 +64,19 @@ Categorical predictors can be partitioned into groups of factor levels are not required for this model. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. + +Note that, for spark engines, the \code{case_weight} argument value should be +a character string to specify the column with the numeric case weights. +} + \subsection{Other details}{ For models created using the \code{"spark"} engine, there are several things diff --git a/man/details_linear_reg_glm.Rd b/man/details_linear_reg_glm.Rd index 9e656f4ce..afe66b0c1 100644 --- a/man/details_linear_reg_glm.Rd +++ b/man/details_linear_reg_glm.Rd @@ -53,6 +53,19 @@ formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. + +\emph{However}, the documentation in \code{\link[stats:glm]{stats::glm()}} assumes +that is specific type of case weights are being used:Non-NULL weights can be used to indicate that different observationshave different dispersions (with the values in weights being inverselyproportional to the dispersions); or equivalently, when the elementsof weights are positive integers w_i, that each response y_i isthe mean of w_i unit-weight observations. For a binomial GLM priorweights are used to give the number of trials when the response is theproportion of successes: they would rarely be used for a Poisson GLM. +} + \subsection{Examples}{ The “Fitting and Predicting with parsnip” article contains diff --git a/man/details_linear_reg_glmnet.Rd b/man/details_linear_reg_glmnet.Rd index 0db9b8a89..b5e031f3a 100644 --- a/man/details_linear_reg_glmnet.Rd +++ b/man/details_linear_reg_glmnet.Rd @@ -55,6 +55,16 @@ variance of one. By default, \code{\link[glmnet:glmnet]{glmnet::glmnet()}} uses the argument \code{standardize = TRUE} to center and scale the data. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{Examples}{ The “Fitting and Predicting with parsnip” article contains diff --git a/man/details_linear_reg_gls.Rd b/man/details_linear_reg_gls.Rd index 1f633d1cb..422ed0b80 100644 --- a/man/details_linear_reg_gls.Rd +++ b/man/details_linear_reg_gls.Rd @@ -145,11 +145,9 @@ However, the p-values for the fixed effects are different:\if{html}{\out{
\% tidy() \%>\% dplyr::filter(group == "fixed") \%>\% dplyr::select(-group, -effect) -}\if{html}{\out{
}}\preformatted{## # A tibble: 2 × 6 -## term estimate std.error df statistic p.value -## -## 1 (Intercept) -4.95 0.808 183 -6.13 5.37e- 9 -## 2 week -2.12 0.224 183 -9.47 1.41e-17 +}\if{html}{\out{}}\preformatted{## # A tibble: 0 × 6 +## # … with 6 variables: term , estimate , std.error , df , +## # statistic , p.value }\if{html}{\out{
}}\preformatted{# gls: gls_fit \%>\% tidy() }\if{html}{\out{
}}\preformatted{## # A tibble: 2 × 5 diff --git a/man/details_linear_reg_lm.Rd b/man/details_linear_reg_lm.Rd index a5587a1d0..e568355d7 100644 --- a/man/details_linear_reg_lm.Rd +++ b/man/details_linear_reg_lm.Rd @@ -33,6 +33,33 @@ formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. + +\emph{However}, the documentation in \code{\link[stats:lm]{stats::lm()}} assumes +that is specific type of case weights are being used: “Non-NULL weights +can be used to indicate that different observations have different +variances (with the values in weights being inversely proportional to +the variances); or equivalently, when the elements of weights are +positive integers \code{w_i}, that each response \code{y_i} is the mean of \code{w_i} +unit-weight observations (including the case that there are w_i +observations equal to \code{y_i} and the data have been summarized). However, +in the latter case, notice that within-group variation is not used. +Therefore, the sigma estimate and residual degrees of freedom may be +suboptimal; in the case of replication weights, \strong{even wrong}. Hence, +standard errors and analysis of variance tables should be treated with +care” (emphasis added) + +Depending on your application, the degrees of freedown for the model +(and other statistics) might be incorrect. +} + \subsection{Examples}{ The “Fitting and Predicting with parsnip” article contains diff --git a/man/details_linear_reg_lme.Rd b/man/details_linear_reg_lme.Rd index 1a3e0ac2a..7a9fe1ca5 100644 --- a/man/details_linear_reg_lme.Rd +++ b/man/details_linear_reg_lme.Rd @@ -100,6 +100,16 @@ fit(lme_wflow, data = riesby) }\if{html}{\out{}} } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{References}{ \itemize{ \item J Pinheiro, and D Bates. 2000. \emph{Mixed-effects models in S and diff --git a/man/details_linear_reg_lmer.Rd b/man/details_linear_reg_lmer.Rd index 5246a35b8..84df6ef52 100644 --- a/man/details_linear_reg_lmer.Rd +++ b/man/details_linear_reg_lmer.Rd @@ -27,7 +27,7 @@ linear_reg() \%>\% ## Computational engine: lmer ## ## Model fit template: -## lme4::lmer(formula = missing_arg(), data = missing_arg()) +## lme4::lmer(formula = missing_arg(), data = missing_arg(), weights = missing_arg()) } } @@ -98,6 +98,16 @@ fit(lmer_wflow, data = riesby) }\if{html}{\out{}} } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{References}{ \itemize{ \item J Pinheiro, and D Bates. 2000. \emph{Mixed-effects models in S and diff --git a/man/details_linear_reg_spark.Rd b/man/details_linear_reg_spark.Rd index 5ed759dd1..28bd22f02 100644 --- a/man/details_linear_reg_spark.Rd +++ b/man/details_linear_reg_spark.Rd @@ -37,7 +37,7 @@ A value of \code{mixture = 1} corresponds to a pure lasso model, while ## ## Model fit template: ## sparklyr::ml_linear_regression(x = missing_arg(), formula = missing_arg(), -## weight_col = missing_arg(), reg_param = double(1), elastic_net_param = double(1)) +## weights = missing_arg(), reg_param = double(1), elastic_net_param = double(1)) } } @@ -50,10 +50,25 @@ will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a -variance of one. By default, \code{ml_linear_regression()} uses the argument +variance of one. + +By default, \code{ml_linear_regression()} uses the argument \code{standardization = TRUE} to center and scale the data. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. + +Note that, for spark engines, the \code{case_weight} argument value should be +a character string to specify the column with the numeric case weights. +} + \subsection{Other details}{ For models created using the \code{"spark"} engine, there are several things diff --git a/man/details_linear_reg_stan.Rd b/man/details_linear_reg_stan.Rd index 048ad28ba..b68f13cb1 100644 --- a/man/details_linear_reg_stan.Rd +++ b/man/details_linear_reg_stan.Rd @@ -70,6 +70,16 @@ standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{Examples}{ The “Fitting and Predicting with parsnip” article contains diff --git a/man/details_linear_reg_stan_glmer.Rd b/man/details_linear_reg_stan_glmer.Rd index 6beaeeca3..60461d3d5 100644 --- a/man/details_linear_reg_stan_glmer.Rd +++ b/man/details_linear_reg_stan_glmer.Rd @@ -125,6 +125,16 @@ the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{References}{ \itemize{ \item McElreath, R. 2020 \emph{Statistical Rethinking}. CRC Press. diff --git a/man/details_logistic_reg_LiblineaR.Rd b/man/details_logistic_reg_LiblineaR.Rd index c255ebff6..745f31e41 100644 --- a/man/details_logistic_reg_LiblineaR.Rd +++ b/man/details_logistic_reg_LiblineaR.Rd @@ -41,8 +41,8 @@ parameter estimates. ## Computational engine: LiblineaR ## ## Model fit template: -## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(), -## cost = Inf, type = double(1), verbose = FALSE) +## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), cost = Inf, +## type = double(1), verbose = FALSE) } } diff --git a/man/details_logistic_reg_glm.Rd b/man/details_logistic_reg_glm.Rd index 50a303716..dcdb249ff 100644 --- a/man/details_logistic_reg_glm.Rd +++ b/man/details_logistic_reg_glm.Rd @@ -53,6 +53,23 @@ formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. + +\emph{However}, the documentation in \code{\link[stats:glm]{stats::glm()}} assumes +that is specific type of case weights are being used:Non-NULL weights can be used to indicate that different observationshave different dispersions (with the values in weights being inverselyproportional to the dispersions); or equivalently, when the elementsof weights are positive integers w_i, that each response y_i isthe mean of w_i unit-weight observations. For a binomial GLM priorweights are used to give the number of trials when the response is theproportion of successes: they would rarely be used for a Poisson GLM. + +If frequency weights are being used in your application, the +\code{\link[=glm_grouped]{glm_grouped()}} model (and corresponding engine) may be +more appropriate. +} + \subsection{Examples}{ The “Fitting and Predicting with parsnip” article contains diff --git a/man/details_logistic_reg_glmer.Rd b/man/details_logistic_reg_glmer.Rd index c1e896a4b..3b70c61f6 100644 --- a/man/details_logistic_reg_glmer.Rd +++ b/man/details_logistic_reg_glmer.Rd @@ -26,7 +26,8 @@ logistic_reg() \%>\% ## Computational engine: glmer ## ## Model fit template: -## lme4::glmer(formula = missing_arg(), data = missing_arg(), family = binomial) +## lme4::glmer(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), +## family = binomial) } } @@ -97,6 +98,16 @@ fit(glmer_wflow, data = toenail) }\if{html}{\out{}} } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{References}{ \itemize{ \item J Pinheiro, and D Bates. 2000. \emph{Mixed-effects models in S and diff --git a/man/details_logistic_reg_glmnet.Rd b/man/details_logistic_reg_glmnet.Rd index 7fb503d01..18e551370 100644 --- a/man/details_logistic_reg_glmnet.Rd +++ b/man/details_logistic_reg_glmnet.Rd @@ -57,6 +57,16 @@ variance of one. By default, \code{\link[glmnet:glmnet]{glmnet::glmnet()}} uses the argument \code{standardize = TRUE} to center and scale the data. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{Examples}{ The “Fitting and Predicting with parsnip” article contains diff --git a/man/details_logistic_reg_spark.Rd b/man/details_logistic_reg_spark.Rd index ee470475f..7013d6a00 100644 --- a/man/details_logistic_reg_spark.Rd +++ b/man/details_logistic_reg_spark.Rd @@ -38,7 +38,7 @@ A value of \code{mixture = 1} corresponds to a pure lasso model, while ## ## Model fit template: ## sparklyr::ml_logistic_regression(x = missing_arg(), formula = missing_arg(), -## weight_col = missing_arg(), reg_param = double(1), elastic_net_param = double(1), +## weights = missing_arg(), reg_param = double(1), elastic_net_param = double(1), ## family = "binomial") } } @@ -52,8 +52,23 @@ will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a -variance of one. By default, \code{ml_logistic_regression()} uses the -argument \code{standardization = TRUE} to center and scale the data. +variance of one. + +By default, \code{ml_logistic_regression()} uses the argument +\code{standardization = TRUE} to center and scale the data. +} + +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. + +Note that, for spark engines, the \code{case_weight} argument value should be +a character string to specify the column with the numeric case weights. } \subsection{Other details}{ diff --git a/man/details_logistic_reg_stan.Rd b/man/details_logistic_reg_stan.Rd index 683f61a39..c74ca3c39 100644 --- a/man/details_logistic_reg_stan.Rd +++ b/man/details_logistic_reg_stan.Rd @@ -71,6 +71,16 @@ standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{Examples}{ The “Fitting and Predicting with parsnip” article contains diff --git a/man/details_logistic_reg_stan_glmer.Rd b/man/details_logistic_reg_stan_glmer.Rd index 1f2c2b924..650ab1a93 100644 --- a/man/details_logistic_reg_stan_glmer.Rd +++ b/man/details_logistic_reg_stan_glmer.Rd @@ -124,6 +124,16 @@ the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{References}{ \itemize{ \item McElreath, R. 2020 \emph{Statistical Rethinking}. CRC Press. diff --git a/man/details_mars_earth.Rd b/man/details_mars_earth.Rd index db85fac4f..c01c382eb 100644 --- a/man/details_mars_earth.Rd +++ b/man/details_mars_earth.Rd @@ -80,6 +80,16 @@ formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{Examples}{ The “Fitting and Predicting with parsnip” article contains diff --git a/man/details_mlp_brulee.Rd b/man/details_mlp_brulee.Rd index deb3e578a..ed3035ad4 100644 --- a/man/details_mlp_brulee.Rd +++ b/man/details_mlp_brulee.Rd @@ -14,11 +14,11 @@ This model has 7 tuning parameters: \itemize{ \item \code{hidden_units}: # Hidden Units (type: integer, default: 3L) \item \code{penalty}: Amount of Regularization (type: double, default: 0.0) -\item \code{mixture}: Proportion of Lasso Penalty (type: double, default: 0.0) \item \code{epochs}: # Epochs (type: integer, default: 0.01) \item \code{dropout}: Dropout Rate (type: double, default: 0.0) \item \code{learn_rate}: Learning Rate (type: double, default: 100L) \item \code{activation}: Activation Function (type: character, default: ‘relu’) +\item \code{mixture}: Proportion of Lasso Penalty (type: double, default: 0.0) } The use of the L1 penalty (a.k.a. the lasso penalty) does \emph{not} force diff --git a/man/details_mlp_nnet.Rd b/man/details_mlp_nnet.Rd index 9bf5fb15e..a52a3b2a0 100644 --- a/man/details_mlp_nnet.Rd +++ b/man/details_mlp_nnet.Rd @@ -86,6 +86,16 @@ center and scale each so that each predictor has mean zero and a variance of one. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{Examples}{ The “Fitting and Predicting with parsnip” article contains diff --git a/man/details_multinom_reg_glmnet.Rd b/man/details_multinom_reg_glmnet.Rd index 6c73fc3d9..f8c9c59ac 100644 --- a/man/details_multinom_reg_glmnet.Rd +++ b/man/details_multinom_reg_glmnet.Rd @@ -63,6 +63,16 @@ The “Fitting and Predicting with parsnip” article contains for \code{multinom_reg()} with the \code{"glmnet"} engine. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{References}{ \itemize{ \item Hastie, T, R Tibshirani, and M Wainwright. 2015. \emph{Statistical diff --git a/man/details_multinom_reg_nnet.Rd b/man/details_multinom_reg_nnet.Rd index 5d2875d3c..a80d49348 100644 --- a/man/details_multinom_reg_nnet.Rd +++ b/man/details_multinom_reg_nnet.Rd @@ -55,6 +55,16 @@ The “Fitting and Predicting with parsnip” article contains for \code{multinom_reg()} with the \code{"nnet"} engine. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{References}{ \itemize{ \item Luraschi, J, K Kuo, and E Ruiz. 2019. \emph{Mastering nnet with R}. diff --git a/man/details_multinom_reg_spark.Rd b/man/details_multinom_reg_spark.Rd index 9ba725e06..d68e78321 100644 --- a/man/details_multinom_reg_spark.Rd +++ b/man/details_multinom_reg_spark.Rd @@ -37,7 +37,7 @@ A value of \code{mixture = 1} corresponds to a pure lasso model, while ## ## Model fit template: ## sparklyr::ml_logistic_regression(x = missing_arg(), formula = missing_arg(), -## weight_col = missing_arg(), reg_param = double(1), elastic_net_param = double(1), +## weights = missing_arg(), reg_param = double(1), elastic_net_param = double(1), ## family = "multinomial") } } @@ -51,8 +51,23 @@ will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a -variance of one. By default, \code{ml_multinom_regression()} uses the -argument \code{standardization = TRUE} to center and scale the data. +variance of one. + +By default, \code{ml_multinom_regression()} uses the argument +\code{standardization = TRUE} to center and scale the data. +} + +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. + +Note that, for spark engines, the \code{case_weight} argument value should be +a character string to specify the column with the numeric case weights. } \subsection{Other details}{ diff --git a/man/details_poisson_reg_glm.Rd b/man/details_poisson_reg_glm.Rd index 3db81a0da..f9b38a217 100644 --- a/man/details_poisson_reg_glm.Rd +++ b/man/details_poisson_reg_glm.Rd @@ -37,5 +37,32 @@ Factor/categorical predictors need to be converted to numeric values formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators. } + +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. + +\emph{However}, the documentation in \code{\link[stats:glm]{stats::glm()}} assumes +that is specific type of case weights are being used:Non-NULL weights can be used to indicate that different observationshave different dispersions (with the values in weights being inverselyproportional to the dispersions); or equivalently, when the elementsof weights are positive integers w_i, that each response y_i isthe mean of w_i unit-weight observations. For a binomial GLM priorweights are used to give the number of trials when the response is theproportion of successes: they would rarely be used for a Poisson GLM. + +If frequency weights are being used in your application, the +\code{\link[=glm_grouped]{glm_grouped()}} model (and corresponding engine) may be +more appropriate. +} } \keyword{internal} diff --git a/man/details_poisson_reg_glmer.Rd b/man/details_poisson_reg_glmer.Rd index dd5bc86cc..8161ebd09 100644 --- a/man/details_poisson_reg_glmer.Rd +++ b/man/details_poisson_reg_glmer.Rd @@ -26,7 +26,8 @@ poisson_reg(engine = "glmer") \%>\% ## Computational engine: glmer ## ## Model fit template: -## lme4::glmer(formula = missing_arg(), data = missing_arg(), family = stats::poisson) +## lme4::glmer(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), +## family = stats::poisson) } } @@ -96,6 +97,16 @@ fit(glmer_wflow, data = longitudinal_counts) }\if{html}{\out{}} } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{References}{ \itemize{ \item J Pinheiro, and D Bates. 2000. \emph{Mixed-effects models in S and diff --git a/man/details_poisson_reg_glmnet.Rd b/man/details_poisson_reg_glmnet.Rd index ab5a75e81..5d93f7644 100644 --- a/man/details_poisson_reg_glmnet.Rd +++ b/man/details_poisson_reg_glmnet.Rd @@ -59,5 +59,15 @@ center and scale each so that each predictor has mean zero and a variance of one. By default, \code{glmnet::glmnet()} uses the argument \code{standardize = TRUE} to center and scale the data. } + +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} } \keyword{internal} diff --git a/man/details_poisson_reg_hurdle.Rd b/man/details_poisson_reg_hurdle.Rd index 6274682d0..39f315a10 100644 --- a/man/details_poisson_reg_hurdle.Rd +++ b/man/details_poisson_reg_hurdle.Rd @@ -106,5 +106,15 @@ The reason for this is that create the model matrix and either fail or create dummy variables prematurely. } + +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} } \keyword{internal} diff --git a/man/details_poisson_reg_stan.Rd b/man/details_poisson_reg_stan.Rd index ccfc6f404..6579b82dc 100644 --- a/man/details_poisson_reg_stan.Rd +++ b/man/details_poisson_reg_stan.Rd @@ -74,6 +74,16 @@ standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{Examples}{ The “Fitting and Predicting with parsnip” article contains diff --git a/man/details_poisson_reg_stan_glmer.Rd b/man/details_poisson_reg_stan_glmer.Rd index 20a7d6650..f7c06885f 100644 --- a/man/details_poisson_reg_stan_glmer.Rd +++ b/man/details_poisson_reg_stan_glmer.Rd @@ -123,6 +123,16 @@ the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{References}{ \itemize{ \item McElreath, R. 2020 \emph{Statistical Rethinking}. CRC Press. diff --git a/man/details_poisson_reg_zeroinfl.Rd b/man/details_poisson_reg_zeroinfl.Rd index 2261d4a19..44c05c028 100644 --- a/man/details_poisson_reg_zeroinfl.Rd +++ b/man/details_poisson_reg_zeroinfl.Rd @@ -107,5 +107,15 @@ The reason for this is that create the model matrix and either fail or create dummy variables prematurely. } + +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} } \keyword{internal} diff --git a/man/details_proportional_hazards_glmnet.Rd b/man/details_proportional_hazards_glmnet.Rd index f23032f1c..63a39ce8b 100644 --- a/man/details_proportional_hazards_glmnet.Rd +++ b/man/details_proportional_hazards_glmnet.Rd @@ -42,7 +42,7 @@ proportional_hazards(penalty = double(1), mixture = double(1)) \%>\% ## ## Model fit template: ## censored::glmnet_fit_wrapper(formula = missing_arg(), data = missing_arg(), -## family = missing_arg(), alpha = double(1)) +## family = missing_arg(), weights = missing_arg(), alpha = double(1)) } } @@ -82,18 +82,14 @@ mod <- proportional_hazards(penalty = 0.01) \%>\% set_engine("glmnet", nlambda = 5) \%>\% fit(Surv(futime, fustat) ~ age + ecog.ps + strata(rx), data = ovarian) - -pred_data <- data.frame(age = c(50, 50), ecog.ps = c(1, 1), rx = c(1, 2)) +}\if{html}{\out{}}\preformatted{## Error in glmnet::glmnet(data_obj$x, data_obj$y, family = "cox", alpha = alpha, : formal argument "family" matched by multiple actual arguments +}\if{html}{\out{
}}\preformatted{pred_data <- data.frame(age = c(50, 50), ecog.ps = c(1, 1), rx = c(1, 2)) # Different survival probabilities for different values of 'rx' predict(mod, pred_data, type = "survival", time = 500) \%>\% bind_cols(pred_data) \%>\% unnest(.pred) -}\if{html}{\out{
}}\preformatted{## # A tibble: 2 × 5 -## .time .pred_survival age ecog.ps rx -## -## 1 500 0.666 50 1 1 -## 2 500 0.769 50 1 2 +}\if{html}{\out{}}\preformatted{## Error in predict(mod, pred_data, type = "survival", time = 500): object 'mod' not found } Note that columns used in the \code{strata()} function \emph{will} also be @@ -119,6 +115,15 @@ value produced by the \code{predict()} method in the engine package. This behavior can be changed by using the \code{increasing} argument when calling \code{predict()} on a model object. +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} } \section{References}{ diff --git a/man/details_proportional_hazards_survival.Rd b/man/details_proportional_hazards_survival.Rd index 6caaf7cd4..e8a15f5b8 100644 --- a/man/details_proportional_hazards_survival.Rd +++ b/man/details_proportional_hazards_survival.Rd @@ -27,7 +27,7 @@ proportional_hazards() \%>\% ## ## Model fit template: ## survival::coxph(formula = missing_arg(), data = missing_arg(), -## x = TRUE, model = TRUE) +## weights = missing_arg(), x = TRUE, model = TRUE) } } @@ -103,6 +103,16 @@ value produced by the \code{predict()} method in the engine package. This behavior can be changed by using the \code{increasing} argument when calling \code{predict()} on a model object. +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{References}{ \itemize{ \item Andersen P, Gill R. 1982. Cox’s regression model for counting diff --git a/man/details_rand_forest_party.Rd b/man/details_rand_forest_party.Rd index 489351502..a29fb6c74 100644 --- a/man/details_rand_forest_party.Rd +++ b/man/details_rand_forest_party.Rd @@ -34,7 +34,7 @@ rand_forest() \%>\% ## ## Model fit template: ## censored::cond_inference_surv_cforest(formula = missing_arg(), -## data = missing_arg()) +## data = missing_arg(), weights = missing_arg()) } \code{censored::cond_inference_surv_cforest()} is a wrapper around diff --git a/man/details_rand_forest_ranger.Rd b/man/details_rand_forest_ranger.Rd index 3ddb48d7d..f28dd8d89 100644 --- a/man/details_rand_forest_ranger.Rd +++ b/man/details_rand_forest_ranger.Rd @@ -45,7 +45,7 @@ default. For classification, a value of 10 is used. ## Computational engine: ranger ## ## Model fit template: -## ranger::ranger(x = missing_arg(), y = missing_arg(), case.weights = missing_arg(), +## ranger::ranger(x = missing_arg(), y = missing_arg(), weights = missing_arg(), ## mtry = min_cols(~integer(1), x), num.trees = integer(1), ## min.node.size = min_rows(~integer(1), x), num.threads = 1, ## verbose = FALSE, seed = sample.int(10^5, 1)) @@ -73,7 +73,7 @@ chosen value if it is not consistent with the actual data dimensions. ## Computational engine: ranger ## ## Model fit template: -## ranger::ranger(x = missing_arg(), y = missing_arg(), case.weights = missing_arg(), +## ranger::ranger(x = missing_arg(), y = missing_arg(), weights = missing_arg(), ## mtry = min_cols(~integer(1), x), num.trees = integer(1), ## min.node.size = min_rows(~integer(1), x), num.threads = 1, ## verbose = FALSE, seed = sample.int(10^5, 1), probability = TRUE) @@ -105,6 +105,16 @@ these values can fall outside of \verb{[0, 1]} and will be coerced to be in this range. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{Examples}{ The “Fitting and Predicting with parsnip” article contains diff --git a/man/details_rand_forest_spark.Rd b/man/details_rand_forest_spark.Rd index 5acf563bb..7f306108c 100644 --- a/man/details_rand_forest_spark.Rd +++ b/man/details_rand_forest_spark.Rd @@ -106,6 +106,19 @@ object. } } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. + +Note that, for spark engines, the \code{case_weight} argument value should be +a character string to specify the column with the numeric case weights. +} + \subsection{References}{ \itemize{ \item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. diff --git a/man/details_survival_reg_survival.Rd b/man/details_survival_reg_survival.Rd index e84e7f5fd..dab1c2c02 100644 --- a/man/details_survival_reg_survival.Rd +++ b/man/details_survival_reg_survival.Rd @@ -76,6 +76,16 @@ survival_reg() \%>\% } } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{References}{ \itemize{ \item Kalbfleisch, J. D. and Prentice, R. L. 2002 \emph{The statistical diff --git a/man/details_svm_linear_LiblineaR.Rd b/man/details_svm_linear_LiblineaR.Rd index c3ef574ee..caf23bd68 100644 --- a/man/details_svm_linear_LiblineaR.Rd +++ b/man/details_svm_linear_LiblineaR.Rd @@ -40,8 +40,8 @@ are types 1 (classification) and 11 (regression). ## Computational engine: LiblineaR ## ## Model fit template: -## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(), -## C = double(1), svr_eps = double(1), type = 11) +## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), C = double(1), +## svr_eps = double(1), type = 11) } } @@ -59,8 +59,8 @@ are types 1 (classification) and 11 (regression). ## Computational engine: LiblineaR ## ## Model fit template: -## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(), -## C = double(1), type = 1) +## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), C = double(1), +## type = 1) } The \code{margin} parameter does not apply to classification models. diff --git a/man/rmd/C5_rules_C5.0.Rmd b/man/rmd/C5_rules_C5.0.Rmd index 2db867139..0a4e27823 100644 --- a/man/rmd/C5_rules_C5.0.Rmd +++ b/man/rmd/C5_rules_C5.0.Rmd @@ -45,6 +45,11 @@ C5_rules( ```{r child = "template-tree-split-factors.Rmd"} ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## References - Quinlan R (1992). "Learning with Continuous Classes." Proceedings of the 5th Australian Joint Conference On Artificial Intelligence, pp. 343-348. diff --git a/man/rmd/C5_rules_C5.0.md b/man/rmd/C5_rules_C5.0.md index 99f14ae5c..29f165eef 100644 --- a/man/rmd/C5_rules_C5.0.md +++ b/man/rmd/C5_rules_C5.0.md @@ -49,6 +49,13 @@ C5_rules( This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## References - Quinlan R (1992). "Learning with Continuous Classes." Proceedings of the 5th Australian Joint Conference On Artificial Intelligence, pp. 343-348. diff --git a/man/rmd/bag_tree_C5.0.Rmd b/man/rmd/bag_tree_C5.0.Rmd index c15aca0a6..1c8f8725d 100644 --- a/man/rmd/bag_tree_C5.0.Rmd +++ b/man/rmd/bag_tree_C5.0.Rmd @@ -41,6 +41,11 @@ bag_tree(min_n = integer()) %>% ```{r child = "template-tree-split-factors.Rmd"} ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## References diff --git a/man/rmd/bag_tree_C5.0.md b/man/rmd/bag_tree_C5.0.md index 08bccb732..18b139868 100644 --- a/man/rmd/bag_tree_C5.0.md +++ b/man/rmd/bag_tree_C5.0.md @@ -44,6 +44,13 @@ bag_tree(min_n = integer()) %>% This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## References diff --git a/man/rmd/bag_tree_rpart.md b/man/rmd/bag_tree_rpart.md index a47072d0b..86801501c 100644 --- a/man/rmd/bag_tree_rpart.md +++ b/man/rmd/bag_tree_rpart.md @@ -107,7 +107,8 @@ bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1 ## ## Model fit template: ## ipred::bagging(formula = missing_arg(), data = missing_arg(), -## cp = double(1), maxdepth = integer(1), minsplit = integer(1)) +## weights = missing_arg(), cp = double(1), maxdepth = integer(1), +## minsplit = integer(1)) ``` diff --git a/man/rmd/boost_tree_C5.0.Rmd b/man/rmd/boost_tree_C5.0.Rmd index 021aae2f0..59cd89e0d 100644 --- a/man/rmd/boost_tree_C5.0.Rmd +++ b/man/rmd/boost_tree_C5.0.Rmd @@ -41,6 +41,11 @@ boost_tree(trees = integer(), min_n = integer(), sample_size = numeric()) %>% ```{r child = "template-tree-split-factors.Rmd"} ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## Other details ### Early stopping diff --git a/man/rmd/boost_tree_C5.0.md b/man/rmd/boost_tree_C5.0.md index 720d0f2bd..1cafe18bb 100644 --- a/man/rmd/boost_tree_C5.0.md +++ b/man/rmd/boost_tree_C5.0.md @@ -49,6 +49,13 @@ boost_tree(trees = integer(), min_n = integer(), sample_size = numeric()) %>% This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## Other details ### Early stopping diff --git a/man/rmd/boost_tree_mboost.md b/man/rmd/boost_tree_mboost.md index 387fd3496..50c5ccba5 100644 --- a/man/rmd/boost_tree_mboost.md +++ b/man/rmd/boost_tree_mboost.md @@ -42,7 +42,7 @@ boost_tree() %>% ## ## Model fit template: ## censored::blackboost_train(formula = missing_arg(), data = missing_arg(), -## family = mboost::CoxPH()) +## weights = missing_arg(), family = mboost::CoxPH()) ``` `censored::blackboost_train()` is a wrapper around [mboost::blackboost()] (and other functions) that makes it easier to run this model. diff --git a/man/rmd/boost_tree_spark.Rmd b/man/rmd/boost_tree_spark.Rmd index 98158a34a..1bc41743f 100644 --- a/man/rmd/boost_tree_spark.Rmd +++ b/man/rmd/boost_tree_spark.Rmd @@ -55,6 +55,13 @@ boost_tree( ```{r child = "template-tree-split-factors.Rmd"} ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + +Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights. + ## Other details ```{r child = "template-spark-notes.Rmd"} diff --git a/man/rmd/boost_tree_spark.md b/man/rmd/boost_tree_spark.md index 89b63ae02..37fbd0d86 100644 --- a/man/rmd/boost_tree_spark.md +++ b/man/rmd/boost_tree_spark.md @@ -100,6 +100,15 @@ boost_tree( This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + +Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights. + ## Other details diff --git a/man/rmd/boost_tree_xgboost.md b/man/rmd/boost_tree_xgboost.md index e9ec02af3..8523fb83c 100644 --- a/man/rmd/boost_tree_xgboost.md +++ b/man/rmd/boost_tree_xgboost.md @@ -57,10 +57,11 @@ boost_tree( ## Computational engine: xgboost ## ## Model fit template: -## parsnip::xgb_train(x = missing_arg(), y = missing_arg(), colsample_bynode = integer(), -## nrounds = integer(), min_child_weight = integer(), max_depth = integer(), -## eta = numeric(), gamma = numeric(), subsample = numeric(), -## early_stop = integer(), nthread = 1, verbose = 0) +## parsnip::xgb_train(x = missing_arg(), y = missing_arg(), weights = missing_arg(), +## colsample_bynode = integer(), nrounds = integer(), min_child_weight = integer(), +## max_depth = integer(), eta = numeric(), gamma = numeric(), +## subsample = numeric(), early_stop = integer(), nthread = 1, +## verbose = 0) ``` ## Translation from parsnip to the original package (classification) @@ -93,10 +94,11 @@ boost_tree( ## Computational engine: xgboost ## ## Model fit template: -## parsnip::xgb_train(x = missing_arg(), y = missing_arg(), colsample_bynode = integer(), -## nrounds = integer(), min_child_weight = integer(), max_depth = integer(), -## eta = numeric(), gamma = numeric(), subsample = numeric(), -## early_stop = integer(), nthread = 1, verbose = 0) +## parsnip::xgb_train(x = missing_arg(), y = missing_arg(), weights = missing_arg(), +## colsample_bynode = integer(), nrounds = integer(), min_child_weight = integer(), +## max_depth = integer(), eta = numeric(), gamma = numeric(), +## subsample = numeric(), early_stop = integer(), nthread = 1, +## verbose = 0) ``` [xgb_train()] is a wrapper around [xgboost::xgb.train()] (and other functions) that makes it easier to run this model. diff --git a/man/rmd/decision_tree_C5.0.Rmd b/man/rmd/decision_tree_C5.0.Rmd index d7e9ef460..43ff81536 100644 --- a/man/rmd/decision_tree_C5.0.Rmd +++ b/man/rmd/decision_tree_C5.0.Rmd @@ -39,6 +39,11 @@ decision_tree(min_n = integer()) %>% ```{r child = "template-tree-split-factors.Rmd"} ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#decision-tree-C5.0) for `decision_tree()` with the `"C5.0"` engine. diff --git a/man/rmd/decision_tree_C5.0.md b/man/rmd/decision_tree_C5.0.md index 4891679e9..0cafd2b1c 100644 --- a/man/rmd/decision_tree_C5.0.md +++ b/man/rmd/decision_tree_C5.0.md @@ -41,6 +41,13 @@ decision_tree(min_n = integer()) %>% This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#decision-tree-C5.0) for `decision_tree()` with the `"C5.0"` engine. diff --git a/man/rmd/decision_tree_party.md b/man/rmd/decision_tree_party.md index 38274833b..4b44782a2 100644 --- a/man/rmd/decision_tree_party.md +++ b/man/rmd/decision_tree_party.md @@ -44,8 +44,8 @@ decision_tree(tree_depth = integer(1), min_n = integer(1)) %>% ## ## Model fit template: ## censored::cond_inference_surv_ctree(formula = missing_arg(), -## data = missing_arg(), maxdepth = integer(1), minsplit = min_rows(0L, -## data)) +## data = missing_arg(), weights = missing_arg(), maxdepth = integer(1), +## minsplit = min_rows(0L, data)) ``` `censored::cond_inference_surv_ctree()` is a wrapper around [party::ctree()] (and other functions) that makes it easier to run this model. diff --git a/man/rmd/decision_tree_rpart.Rmd b/man/rmd/decision_tree_rpart.Rmd index 54bc58d78..23a6bf478 100644 --- a/man/rmd/decision_tree_rpart.Rmd +++ b/man/rmd/decision_tree_rpart.Rmd @@ -65,6 +65,11 @@ decision_tree( ```{r child = "template-tree-split-factors.Rmd"} ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#decision-tree-rpart) for `decision_tree()` with the `"rpart"` engine. diff --git a/man/rmd/decision_tree_rpart.md b/man/rmd/decision_tree_rpart.md index 042f71726..f6e5dd6b2 100644 --- a/man/rmd/decision_tree_rpart.md +++ b/man/rmd/decision_tree_rpart.md @@ -99,8 +99,8 @@ decision_tree( ## ## Model fit template: ## pec::pecRpart(formula = missing_arg(), data = missing_arg(), -## cp = double(1), maxdepth = integer(1), minsplit = min_rows(0L, -## data)) +## weights = missing_arg(), cp = double(1), maxdepth = integer(1), +## minsplit = min_rows(0L, data)) ``` ## Preprocessing requirements @@ -108,6 +108,13 @@ decision_tree( This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#decision-tree-rpart) for `decision_tree()` with the `"rpart"` engine. diff --git a/man/rmd/decision_tree_spark.Rmd b/man/rmd/decision_tree_spark.Rmd index 6ad5d4140..b2b7872a1 100644 --- a/man/rmd/decision_tree_spark.Rmd +++ b/man/rmd/decision_tree_spark.Rmd @@ -47,6 +47,13 @@ decision_tree(tree_depth = integer(1), min_n = integer(1)) %>% ```{r child = "template-tree-split-factors.Rmd"} ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + +Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights. + ## Other details ```{r child = "template-spark-notes.Rmd"} diff --git a/man/rmd/decision_tree_spark.md b/man/rmd/decision_tree_spark.md index 0bd0fcc7d..4fa02ec15 100644 --- a/man/rmd/decision_tree_spark.md +++ b/man/rmd/decision_tree_spark.md @@ -69,6 +69,15 @@ decision_tree(tree_depth = integer(1), min_n = integer(1)) %>% This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + +Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights. + ## Other details diff --git a/man/rmd/linear_reg_glm.Rmd b/man/rmd/linear_reg_glm.Rmd index dc786e3ae..5bd2675f0 100644 --- a/man/rmd/linear_reg_glm.Rmd +++ b/man/rmd/linear_reg_glm.Rmd @@ -28,6 +28,13 @@ linear_reg() %>% ```{r child = "template-makes-dummies.Rmd"} ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + +_However_, the documentation in [stats::glm()] assumes that is specific type of case weights are being used:"Non-NULL weights can be used to indicate that different observations have different dispersions (with the values in weights being inversely proportional to the dispersions); or equivalently, when the elements of weights are positive integers `w_i`, that each response `y_i` is the mean of `w_i` unit-weight observations. For a binomial GLM prior weights are used to give the number of trials when the response is the proportion of successes: they would rarely be used for a Poisson GLM." + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-glm) for `linear_reg()` with the `"glm"` engine. diff --git a/man/rmd/linear_reg_glm.md b/man/rmd/linear_reg_glm.md index 15410f4d5..61f12b632 100644 --- a/man/rmd/linear_reg_glm.md +++ b/man/rmd/linear_reg_glm.md @@ -53,6 +53,15 @@ linear_reg() %>% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + +_However_, the documentation in [stats::glm()] assumes that is specific type of case weights are being used:"Non-NULL weights can be used to indicate that different observations have different dispersions (with the values in weights being inversely proportional to the dispersions); or equivalently, when the elements of weights are positive integers `w_i`, that each response `y_i` is the mean of `w_i` unit-weight observations. For a binomial GLM prior weights are used to give the number of trials when the response is the proportion of successes: they would rarely be used for a Poisson GLM." + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-glm) for `linear_reg()` with the `"glm"` engine. diff --git a/man/rmd/linear_reg_glmnet.Rmd b/man/rmd/linear_reg_glmnet.Rmd index 816f8af18..be4160c81 100644 --- a/man/rmd/linear_reg_glmnet.Rmd +++ b/man/rmd/linear_reg_glmnet.Rmd @@ -43,6 +43,11 @@ linear_reg(penalty = double(1), mixture = double(1)) %>% ``` By default, [glmnet::glmnet()] uses the argument `standardize = TRUE` to center and scale the data. +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-glmnet) for `linear_reg()` with the `"glmnet"` engine. diff --git a/man/rmd/linear_reg_glmnet.md b/man/rmd/linear_reg_glmnet.md index 377c93b1f..289843d7d 100644 --- a/man/rmd/linear_reg_glmnet.md +++ b/man/rmd/linear_reg_glmnet.md @@ -50,6 +50,13 @@ Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a variance of one. By default, [glmnet::glmnet()] uses the argument `standardize = TRUE` to center and scale the data. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-glmnet) for `linear_reg()` with the `"glmnet"` engine. diff --git a/man/rmd/linear_reg_gls.md b/man/rmd/linear_reg_gls.md index 68116d07f..5afb7e7c6 100644 --- a/man/rmd/linear_reg_gls.md +++ b/man/rmd/linear_reg_gls.md @@ -179,11 +179,9 @@ lme_fit %>% tidy() %>% ``` ``` -## # A tibble: 2 × 6 -## term estimate std.error df statistic p.value -## -## 1 (Intercept) -4.95 0.808 183 -6.13 5.37e- 9 -## 2 week -2.12 0.224 183 -9.47 1.41e-17 +## # A tibble: 0 × 6 +## # … with 6 variables: term , estimate , std.error , df , +## # statistic , p.value ``` ```r diff --git a/man/rmd/linear_reg_lm.Rmd b/man/rmd/linear_reg_lm.Rmd index 205c21374..125e4b774 100644 --- a/man/rmd/linear_reg_lm.Rmd +++ b/man/rmd/linear_reg_lm.Rmd @@ -20,6 +20,15 @@ linear_reg() %>% ```{r child = "template-makes-dummies.Rmd"} ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + +_However_, the documentation in [stats::lm()] assumes that is specific type of case weights are being used: "Non-NULL weights can be used to indicate that different observations have different variances (with the values in weights being inversely proportional to the variances); or equivalently, when the elements of weights are positive integers `w_i`, that each response `y_i` is the mean of `w_i` unit-weight observations (including the case that there are w_i observations equal to `y_i` and the data have been summarized). However, in the latter case, notice that within-group variation is not used. Therefore, the sigma estimate and residual degrees of freedom may be suboptimal; in the case of replication weights, **even wrong**. Hence, standard errors and analysis of variance tables should be treated with care" (emphasis added) + +Depending on your application, the degrees of freedown for the model (and other statistics) might be incorrect. + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-lm) for `linear_reg()` with the `"lm"` engine. diff --git a/man/rmd/linear_reg_lm.md b/man/rmd/linear_reg_lm.md index d8b203c7d..25f99742b 100644 --- a/man/rmd/linear_reg_lm.md +++ b/man/rmd/linear_reg_lm.md @@ -30,6 +30,17 @@ linear_reg() %>% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + +_However_, the documentation in [stats::lm()] assumes that is specific type of case weights are being used: "Non-NULL weights can be used to indicate that different observations have different variances (with the values in weights being inversely proportional to the variances); or equivalently, when the elements of weights are positive integers `w_i`, that each response `y_i` is the mean of `w_i` unit-weight observations (including the case that there are w_i observations equal to `y_i` and the data have been summarized). However, in the latter case, notice that within-group variation is not used. Therefore, the sigma estimate and residual degrees of freedom may be suboptimal; in the case of replication weights, **even wrong**. Hence, standard errors and analysis of variance tables should be treated with care" (emphasis added) + +Depending on your application, the degrees of freedown for the model (and other statistics) might be incorrect. + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-lm) for `linear_reg()` with the `"lm"` engine. diff --git a/man/rmd/linear_reg_lme.Rmd b/man/rmd/linear_reg_lme.Rmd index 63df4ab93..d75d44e52 100644 --- a/man/rmd/linear_reg_lme.Rmd +++ b/man/rmd/linear_reg_lme.Rmd @@ -60,6 +60,11 @@ lme_wflow <- fit(lme_wflow, data = riesby) ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## References - J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY diff --git a/man/rmd/linear_reg_lme.md b/man/rmd/linear_reg_lme.md index c49497d28..56be80610 100644 --- a/man/rmd/linear_reg_lme.md +++ b/man/rmd/linear_reg_lme.md @@ -89,6 +89,13 @@ lme_wflow <- fit(lme_wflow, data = riesby) ``` +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## References - J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY diff --git a/man/rmd/linear_reg_lmer.Rmd b/man/rmd/linear_reg_lmer.Rmd index 976956223..2a18c9d17 100644 --- a/man/rmd/linear_reg_lmer.Rmd +++ b/man/rmd/linear_reg_lmer.Rmd @@ -60,6 +60,11 @@ lmer_wflow <- fit(lmer_wflow, data = riesby) ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## References - J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY diff --git a/man/rmd/linear_reg_lmer.md b/man/rmd/linear_reg_lmer.md index ff42e214a..067ea848e 100644 --- a/man/rmd/linear_reg_lmer.md +++ b/man/rmd/linear_reg_lmer.md @@ -27,7 +27,7 @@ linear_reg() %>% ## Computational engine: lmer ## ## Model fit template: -## lme4::lmer(formula = missing_arg(), data = missing_arg()) +## lme4::lmer(formula = missing_arg(), data = missing_arg(), weights = missing_arg()) ``` @@ -89,6 +89,13 @@ lmer_wflow <- fit(lmer_wflow, data = riesby) ``` +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## References - J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY diff --git a/man/rmd/linear_reg_spark.Rmd b/man/rmd/linear_reg_spark.Rmd index 255bc71ba..8b8601fae 100644 --- a/man/rmd/linear_reg_spark.Rmd +++ b/man/rmd/linear_reg_spark.Rmd @@ -41,8 +41,17 @@ linear_reg(penalty = double(1), mixture = double(1)) %>% ```{r child = "template-same-scale.Rmd"} ``` + By default, `ml_linear_regression()` uses the argument `standardization = TRUE` to center and scale the data. + +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + +Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights. + ## Other details ```{r child = "template-spark-notes.Rmd"} diff --git a/man/rmd/linear_reg_spark.md b/man/rmd/linear_reg_spark.md index 7db4bfa74..e26938ff1 100644 --- a/man/rmd/linear_reg_spark.md +++ b/man/rmd/linear_reg_spark.md @@ -37,7 +37,7 @@ linear_reg(penalty = double(1), mixture = double(1)) %>% ## ## Model fit template: ## sparklyr::ml_linear_regression(x = missing_arg(), formula = missing_arg(), -## weight_col = missing_arg(), reg_param = double(1), elastic_net_param = double(1)) +## weights = missing_arg(), reg_param = double(1), elastic_net_param = double(1)) ``` ## Preprocessing requirements @@ -48,8 +48,19 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a variance of one. + By default, `ml_linear_regression()` uses the argument `standardization = TRUE` to center and scale the data. + +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + +Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights. + ## Other details diff --git a/man/rmd/linear_reg_stan.Rmd b/man/rmd/linear_reg_stan.Rmd index 6e9961221..804aab653 100644 --- a/man/rmd/linear_reg_stan.Rmd +++ b/man/rmd/linear_reg_stan.Rmd @@ -39,6 +39,11 @@ Note that the `refresh` default prevents logging of the estimation process. Chan For prediction, the `"stan"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome and when `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-stan) for `linear_reg()` with the `"stan"` engine. diff --git a/man/rmd/linear_reg_stan.md b/man/rmd/linear_reg_stan.md index 0f7b8bc60..8da583a1b 100644 --- a/man/rmd/linear_reg_stan.md +++ b/man/rmd/linear_reg_stan.md @@ -50,6 +50,13 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm For prediction, the `"stan"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome and when `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-stan) for `linear_reg()` with the `"stan"` engine. diff --git a/man/rmd/linear_reg_stan_glmer.Rmd b/man/rmd/linear_reg_stan_glmer.Rmd index b2d76f8c7..bbd62de78 100644 --- a/man/rmd/linear_reg_stan_glmer.Rmd +++ b/man/rmd/linear_reg_stan_glmer.Rmd @@ -75,6 +75,11 @@ fit(glmer_wflow, data = riesby) For prediction, the `"stan_glmer"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## References - McElreath, R. 2020 _Statistical Rethinking_. CRC Press. diff --git a/man/rmd/linear_reg_stan_glmer.md b/man/rmd/linear_reg_stan_glmer.md index 9dd2403de..f23832798 100644 --- a/man/rmd/linear_reg_stan_glmer.md +++ b/man/rmd/linear_reg_stan_glmer.md @@ -105,6 +105,13 @@ fit(glmer_wflow, data = riesby) For prediction, the `"stan_glmer"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## References - McElreath, R. 2020 _Statistical Rethinking_. CRC Press. diff --git a/man/rmd/logistic-reg.md b/man/rmd/logistic-reg.md index 2bfd3343c..c98158a9a 100644 --- a/man/rmd/logistic-reg.md +++ b/man/rmd/logistic-reg.md @@ -101,8 +101,7 @@ logistic_reg() %>% ## Computational engine: LiblineaR ## ## Model fit template: -## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(), -## verbose = FALSE) +## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), verbose = FALSE) ``` For `LiblineaR` models, the value for `mixture` can either be 0 (for ridge) or 1 @@ -156,7 +155,7 @@ logistic_reg() %>% ## ## Model fit template: ## sparklyr::ml_logistic_regression(x = missing_arg(), formula = missing_arg(), -## weight_col = missing_arg(), family = "binomial") +## weights = missing_arg(), family = "binomial") ``` ## keras diff --git a/man/rmd/logistic_reg_LiblineaR.md b/man/rmd/logistic_reg_LiblineaR.md index e0d308e0f..761092a85 100644 --- a/man/rmd/logistic_reg_LiblineaR.md +++ b/man/rmd/logistic_reg_LiblineaR.md @@ -36,8 +36,8 @@ logistic_reg(penalty = double(1), mixture = double(1)) %>% ## Computational engine: LiblineaR ## ## Model fit template: -## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(), -## cost = Inf, type = double(1), verbose = FALSE) +## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), cost = Inf, +## type = double(1), verbose = FALSE) ``` ## Preprocessing requirements diff --git a/man/rmd/logistic_reg_glm.Rmd b/man/rmd/logistic_reg_glm.Rmd index 408b84ac0..7db0e2847 100644 --- a/man/rmd/logistic_reg_glm.Rmd +++ b/man/rmd/logistic_reg_glm.Rmd @@ -28,6 +28,13 @@ linear_reg() %>% ```{r child = "template-makes-dummies.Rmd"} ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + +_However_, the documentation in [stats::glm()] assumes that is specific type of case weights are being used:"Non-NULL weights can be used to indicate that different observations have different dispersions (with the values in weights being inversely proportional to the dispersions); or equivalently, when the elements of weights are positive integers `w_i`, that each response `y_i` is the mean of `w_i` unit-weight observations. For a binomial GLM prior weights are used to give the number of trials when the response is the proportion of successes: they would rarely be used for a Poisson GLM." + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#logistic-reg-glm) for `logistic_reg()` with the `"glm"` engine. diff --git a/man/rmd/logistic_reg_glm.md b/man/rmd/logistic_reg_glm.md index af17affb4..7a1cdf2c6 100644 --- a/man/rmd/logistic_reg_glm.md +++ b/man/rmd/logistic_reg_glm.md @@ -53,6 +53,15 @@ linear_reg() %>% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + +_However_, the documentation in [stats::glm()] assumes that is specific type of case weights are being used:"Non-NULL weights can be used to indicate that different observations have different dispersions (with the values in weights being inversely proportional to the dispersions); or equivalently, when the elements of weights are positive integers `w_i`, that each response `y_i` is the mean of `w_i` unit-weight observations. For a binomial GLM prior weights are used to give the number of trials when the response is the proportion of successes: they would rarely be used for a Poisson GLM." + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#logistic-reg-glm) for `logistic_reg()` with the `"glm"` engine. diff --git a/man/rmd/logistic_reg_glmer.Rmd b/man/rmd/logistic_reg_glmer.Rmd index 4951fcf2c..278891d24 100644 --- a/man/rmd/logistic_reg_glmer.Rmd +++ b/man/rmd/logistic_reg_glmer.Rmd @@ -59,6 +59,11 @@ glmer_wflow <- fit(glmer_wflow, data = toenail) ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## References - J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY diff --git a/man/rmd/logistic_reg_glmer.md b/man/rmd/logistic_reg_glmer.md index 2a374bdc7..eebcfd6c8 100644 --- a/man/rmd/logistic_reg_glmer.md +++ b/man/rmd/logistic_reg_glmer.md @@ -26,7 +26,8 @@ logistic_reg() %>% ## Computational engine: glmer ## ## Model fit template: -## lme4::glmer(formula = missing_arg(), data = missing_arg(), family = binomial) +## lme4::glmer(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), +## family = binomial) ``` @@ -88,6 +89,13 @@ glmer_wflow <- fit(glmer_wflow, data = toenail) ``` +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## References - J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY diff --git a/man/rmd/logistic_reg_glmnet.Rmd b/man/rmd/logistic_reg_glmnet.Rmd index d9ae0c970..52c3a5507 100644 --- a/man/rmd/logistic_reg_glmnet.Rmd +++ b/man/rmd/logistic_reg_glmnet.Rmd @@ -43,6 +43,11 @@ logistic_reg(penalty = double(1), mixture = double(1)) %>% ``` By default, [glmnet::glmnet()] uses the argument `standardize = TRUE` to center and scale the data. +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#logistic-reg-glmnet) for `logistic_reg()` with the `"glmnet"` engine. diff --git a/man/rmd/logistic_reg_glmnet.md b/man/rmd/logistic_reg_glmnet.md index 304b7f657..b840222a7 100644 --- a/man/rmd/logistic_reg_glmnet.md +++ b/man/rmd/logistic_reg_glmnet.md @@ -50,6 +50,13 @@ Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a variance of one. By default, [glmnet::glmnet()] uses the argument `standardize = TRUE` to center and scale the data. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#logistic-reg-glmnet) for `logistic_reg()` with the `"glmnet"` engine. diff --git a/man/rmd/logistic_reg_spark.Rmd b/man/rmd/logistic_reg_spark.Rmd index a202cc4fa..bf37a752f 100644 --- a/man/rmd/logistic_reg_spark.Rmd +++ b/man/rmd/logistic_reg_spark.Rmd @@ -41,8 +41,16 @@ logistic_reg(penalty = double(1), mixture = double(1)) %>% ```{r child = "template-same-scale.Rmd"} ``` + By default, `ml_logistic_regression()` uses the argument `standardization = TRUE` to center and scale the data. +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + +Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights. + ## Other details ```{r child = "template-spark-notes.Rmd"} diff --git a/man/rmd/logistic_reg_spark.md b/man/rmd/logistic_reg_spark.md index 90d98c2e1..bf4c33ad7 100644 --- a/man/rmd/logistic_reg_spark.md +++ b/man/rmd/logistic_reg_spark.md @@ -37,7 +37,7 @@ logistic_reg(penalty = double(1), mixture = double(1)) %>% ## ## Model fit template: ## sparklyr::ml_logistic_regression(x = missing_arg(), formula = missing_arg(), -## weight_col = missing_arg(), reg_param = double(1), elastic_net_param = double(1), +## weights = missing_arg(), reg_param = double(1), elastic_net_param = double(1), ## family = "binomial") ``` @@ -49,8 +49,18 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a variance of one. + By default, `ml_logistic_regression()` uses the argument `standardization = TRUE` to center and scale the data. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + +Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights. + ## Other details diff --git a/man/rmd/logistic_reg_stan.Rmd b/man/rmd/logistic_reg_stan.Rmd index 2bb5768ad..ee0e3c6a3 100644 --- a/man/rmd/logistic_reg_stan.Rmd +++ b/man/rmd/logistic_reg_stan.Rmd @@ -39,6 +39,11 @@ Note that the `refresh` default prevents logging of the estimation process. Chan For prediction, the `"stan"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome and when `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#logistic-reg-stan) for `logistic_reg()` with the `"stan"` engine. diff --git a/man/rmd/logistic_reg_stan.md b/man/rmd/logistic_reg_stan.md index 11190f9e2..7587d8db2 100644 --- a/man/rmd/logistic_reg_stan.md +++ b/man/rmd/logistic_reg_stan.md @@ -50,6 +50,13 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm For prediction, the `"stan"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome and when `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#logistic-reg-stan) for `logistic_reg()` with the `"stan"` engine. diff --git a/man/rmd/logistic_reg_stan_glmer.Rmd b/man/rmd/logistic_reg_stan_glmer.Rmd index 0c516680d..59537f316 100644 --- a/man/rmd/logistic_reg_stan_glmer.Rmd +++ b/man/rmd/logistic_reg_stan_glmer.Rmd @@ -74,6 +74,11 @@ fit(glmer_wflow, data = toenail) For prediction, the `"stan_glmer"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## References - McElreath, R. 2020 _Statistical Rethinking_. CRC Press. diff --git a/man/rmd/logistic_reg_stan_glmer.md b/man/rmd/logistic_reg_stan_glmer.md index 953b552df..f14e1d0ea 100644 --- a/man/rmd/logistic_reg_stan_glmer.md +++ b/man/rmd/logistic_reg_stan_glmer.md @@ -104,6 +104,13 @@ fit(glmer_wflow, data = toenail) For prediction, the `"stan_glmer"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## References - McElreath, R. 2020 _Statistical Rethinking_. CRC Press. diff --git a/man/rmd/mars_earth.Rmd b/man/rmd/mars_earth.Rmd index eb58d2620..30543008f 100644 --- a/man/rmd/mars_earth.Rmd +++ b/man/rmd/mars_earth.Rmd @@ -50,6 +50,11 @@ An alternate method for using MARs for categorical outcomes can be found in [dis ```{r child = "template-makes-dummies.Rmd"} ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#mars-earth) for `mars()` with the `"earth"` engine. diff --git a/man/rmd/mars_earth.md b/man/rmd/mars_earth.md index e8d8688be..e0d7826a6 100644 --- a/man/rmd/mars_earth.md +++ b/man/rmd/mars_earth.md @@ -80,6 +80,13 @@ An alternate method for using MARs for categorical outcomes can be found in [dis Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#mars-earth) for `mars()` with the `"earth"` engine. diff --git a/man/rmd/mlp_brulee.md b/man/rmd/mlp_brulee.md index cd86bf9f4..2ec86f5d0 100644 --- a/man/rmd/mlp_brulee.md +++ b/man/rmd/mlp_brulee.md @@ -13,8 +13,6 @@ This model has 7 tuning parameters: - `penalty`: Amount of Regularization (type: double, default: 0.0) -- `mixture`: Proportion of Lasso Penalty (type: double, default: 0.0) - - `epochs`: # Epochs (type: integer, default: 0.01) - `dropout`: Dropout Rate (type: double, default: 0.0) @@ -23,6 +21,8 @@ This model has 7 tuning parameters: - `activation`: Activation Function (type: character, default: 'relu') +- `mixture`: Proportion of Lasso Penalty (type: double, default: 0.0) + The use of the L1 penalty (a.k.a. the lasso penalty) does _not_ force parameters to be strictly zero (as it does in packages such as glmnet). The zeroing out of parameters is a specific feature the optimization method used in those packages. Both `penalty` and `dropout` should be not be used in the same model. diff --git a/man/rmd/mlp_nnet.Rmd b/man/rmd/mlp_nnet.Rmd index 176965ef5..4ba1fdd03 100644 --- a/man/rmd/mlp_nnet.Rmd +++ b/man/rmd/mlp_nnet.Rmd @@ -62,6 +62,11 @@ mlp( ```{r child = "template-same-scale.Rmd"} ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#mlp-nnet) for `mlp()` with the `"nnet"` engine. diff --git a/man/rmd/mlp_nnet.md b/man/rmd/mlp_nnet.md index 6866b1516..fab376f83 100644 --- a/man/rmd/mlp_nnet.md +++ b/man/rmd/mlp_nnet.md @@ -90,6 +90,13 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a variance of one. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#mlp-nnet) for `mlp()` with the `"nnet"` engine. diff --git a/man/rmd/multinom_reg_glmnet.Rmd b/man/rmd/multinom_reg_glmnet.Rmd index 178f8fd1e..fedf8ebf4 100644 --- a/man/rmd/multinom_reg_glmnet.Rmd +++ b/man/rmd/multinom_reg_glmnet.Rmd @@ -47,6 +47,11 @@ By default, [glmnet::glmnet()] uses the argument `standardize = TRUE` to center The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#multinom-reg-glmnet) for `multinom_reg()` with the `"glmnet"` engine. +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## References - Hastie, T, R Tibshirani, and M Wainwright. 2015. _Statistical Learning with Sparsity_. CRC Press. diff --git a/man/rmd/multinom_reg_glmnet.md b/man/rmd/multinom_reg_glmnet.md index abad257f7..382a3243e 100644 --- a/man/rmd/multinom_reg_glmnet.md +++ b/man/rmd/multinom_reg_glmnet.md @@ -54,6 +54,13 @@ By default, [glmnet::glmnet()] uses the argument `standardize = TRUE` to center The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#multinom-reg-glmnet) for `multinom_reg()` with the `"glmnet"` engine. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## References - Hastie, T, R Tibshirani, and M Wainwright. 2015. _Statistical Learning with Sparsity_. CRC Press. diff --git a/man/rmd/multinom_reg_nnet.Rmd b/man/rmd/multinom_reg_nnet.Rmd index 65340d081..60529d727 100644 --- a/man/rmd/multinom_reg_nnet.Rmd +++ b/man/rmd/multinom_reg_nnet.Rmd @@ -44,6 +44,11 @@ multinom_reg(penalty = double(1)) %>% The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#multinom-reg-nnet) for `multinom_reg()` with the `"nnet"` engine. +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## References - Luraschi, J, K Kuo, and E Ruiz. 2019. _Mastering nnet with R_. O'Reilly Media diff --git a/man/rmd/multinom_reg_nnet.md b/man/rmd/multinom_reg_nnet.md index f7072b433..3735616eb 100644 --- a/man/rmd/multinom_reg_nnet.md +++ b/man/rmd/multinom_reg_nnet.md @@ -48,6 +48,13 @@ scale each so that each predictor has mean zero and a variance of one. The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#multinom-reg-nnet) for `multinom_reg()` with the `"nnet"` engine. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## References - Luraschi, J, K Kuo, and E Ruiz. 2019. _Mastering nnet with R_. O'Reilly Media diff --git a/man/rmd/multinom_reg_spark.Rmd b/man/rmd/multinom_reg_spark.Rmd index 3f97894cb..9421a28cf 100644 --- a/man/rmd/multinom_reg_spark.Rmd +++ b/man/rmd/multinom_reg_spark.Rmd @@ -41,8 +41,17 @@ multinom_reg(penalty = double(1), mixture = double(1)) %>% ```{r child = "template-same-scale.Rmd"} ``` + By default, `ml_multinom_regression()` uses the argument `standardization = TRUE` to center and scale the data. +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + +Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights. + + ## Other details ```{r child = "template-spark-notes.Rmd"} diff --git a/man/rmd/multinom_reg_spark.md b/man/rmd/multinom_reg_spark.md index 4ee148d72..7e6b9fc5d 100644 --- a/man/rmd/multinom_reg_spark.md +++ b/man/rmd/multinom_reg_spark.md @@ -37,7 +37,7 @@ multinom_reg(penalty = double(1), mixture = double(1)) %>% ## ## Model fit template: ## sparklyr::ml_logistic_regression(x = missing_arg(), formula = missing_arg(), -## weight_col = missing_arg(), reg_param = double(1), elastic_net_param = double(1), +## weights = missing_arg(), reg_param = double(1), elastic_net_param = double(1), ## family = "multinomial") ``` @@ -49,8 +49,19 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a variance of one. + By default, `ml_multinom_regression()` uses the argument `standardization = TRUE` to center and scale the data. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + +Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights. + + ## Other details diff --git a/man/rmd/poisson_reg_glm.Rmd b/man/rmd/poisson_reg_glm.Rmd index c245b4889..0d0819fa6 100644 --- a/man/rmd/poisson_reg_glm.Rmd +++ b/man/rmd/poisson_reg_glm.Rmd @@ -24,4 +24,20 @@ poisson_reg() %>% ```{r child = "template-makes-dummies.Rmd"} ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + + +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + +_However_, the documentation in [stats::glm()] assumes that is specific type of case weights are being used:"Non-NULL weights can be used to indicate that different observations have different dispersions (with the values in weights being inversely proportional to the dispersions); or equivalently, when the elements of weights are positive integers `w_i`, that each response `y_i` is the mean of `w_i` unit-weight observations. For a binomial GLM prior weights are used to give the number of trials when the response is the proportion of successes: they would rarely be used for a Poisson GLM." + +If frequency weights are being used in your application, the [glm_grouped()] model (and corresponding engine) may be more appropriate. + + diff --git a/man/rmd/poisson_reg_glm.md b/man/rmd/poisson_reg_glm.md index 0473be15f..b107c08f3 100644 --- a/man/rmd/poisson_reg_glm.md +++ b/man/rmd/poisson_reg_glm.md @@ -35,4 +35,24 @@ poisson_reg() %>% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + + +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + +_However_, the documentation in [stats::glm()] assumes that is specific type of case weights are being used:"Non-NULL weights can be used to indicate that different observations have different dispersions (with the values in weights being inversely proportional to the dispersions); or equivalently, when the elements of weights are positive integers `w_i`, that each response `y_i` is the mean of `w_i` unit-weight observations. For a binomial GLM prior weights are used to give the number of trials when the response is the proportion of successes: they would rarely be used for a Poisson GLM." + +If frequency weights are being used in your application, the [glm_grouped()] model (and corresponding engine) may be more appropriate. + + diff --git a/man/rmd/poisson_reg_glmer.Rmd b/man/rmd/poisson_reg_glmer.Rmd index d330b2f33..02b6aed03 100644 --- a/man/rmd/poisson_reg_glmer.Rmd +++ b/man/rmd/poisson_reg_glmer.Rmd @@ -58,6 +58,11 @@ glmer_wflow <- fit(glmer_wflow, data = longitudinal_counts) ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## References - J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY diff --git a/man/rmd/poisson_reg_glmer.md b/man/rmd/poisson_reg_glmer.md index 6a23a873f..61cd2083b 100644 --- a/man/rmd/poisson_reg_glmer.md +++ b/man/rmd/poisson_reg_glmer.md @@ -26,7 +26,8 @@ poisson_reg(engine = "glmer") %>% ## Computational engine: glmer ## ## Model fit template: -## lme4::glmer(formula = missing_arg(), data = missing_arg(), family = stats::poisson) +## lme4::glmer(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), +## family = stats::poisson) ``` @@ -87,6 +88,13 @@ glmer_wflow <- fit(glmer_wflow, data = longitudinal_counts) ``` +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## References - J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY diff --git a/man/rmd/poisson_reg_glmnet.Rmd b/man/rmd/poisson_reg_glmnet.Rmd index 237dae9b1..f2d2f570d 100644 --- a/man/rmd/poisson_reg_glmnet.Rmd +++ b/man/rmd/poisson_reg_glmnet.Rmd @@ -47,3 +47,8 @@ poisson_reg(penalty = double(1), mixture = double(1)) %>% ``` By default, `glmnet::glmnet()` uses the argument `standardize = TRUE` to center and scale the data. + +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` diff --git a/man/rmd/poisson_reg_glmnet.md b/man/rmd/poisson_reg_glmnet.md index 8119746ab..8ef9e9d8b 100644 --- a/man/rmd/poisson_reg_glmnet.md +++ b/man/rmd/poisson_reg_glmnet.md @@ -54,3 +54,10 @@ Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a variance of one. By default, `glmnet::glmnet()` uses the argument `standardize = TRUE` to center and scale the data. + +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. diff --git a/man/rmd/poisson_reg_hurdle.Rmd b/man/rmd/poisson_reg_hurdle.Rmd index 043ee62a3..c6c946744 100644 --- a/man/rmd/poisson_reg_hurdle.Rmd +++ b/man/rmd/poisson_reg_hurdle.Rmd @@ -57,3 +57,8 @@ workflow() %>% ``` The reason for this is that [workflows::add_formula()] will try to create the model matrix and either fail or create dummy variables prematurely. + +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` diff --git a/man/rmd/poisson_reg_hurdle.md b/man/rmd/poisson_reg_hurdle.md index 25c9fde6f..7dfdfb4bf 100644 --- a/man/rmd/poisson_reg_hurdle.md +++ b/man/rmd/poisson_reg_hurdle.md @@ -106,3 +106,10 @@ workflow() %>% ``` The reason for this is that [workflows::add_formula()] will try to create the model matrix and either fail or create dummy variables prematurely. + +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. diff --git a/man/rmd/poisson_reg_stan.Rmd b/man/rmd/poisson_reg_stan.Rmd index d82a3d9d9..bcead8846 100644 --- a/man/rmd/poisson_reg_stan.Rmd +++ b/man/rmd/poisson_reg_stan.Rmd @@ -43,6 +43,11 @@ Note that the `refresh` default prevents logging of the estimation process. Chan For prediction, the `"stan"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-stan) for `poisson_reg()` with the `"stan"` engine. diff --git a/man/rmd/poisson_reg_stan.md b/man/rmd/poisson_reg_stan.md index d4542bd53..73dda6395 100644 --- a/man/rmd/poisson_reg_stan.md +++ b/man/rmd/poisson_reg_stan.md @@ -54,6 +54,13 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm For prediction, the `"stan"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-stan) for `poisson_reg()` with the `"stan"` engine. diff --git a/man/rmd/poisson_reg_stan_glmer.Rmd b/man/rmd/poisson_reg_stan_glmer.Rmd index f912e1d21..19bf1ced1 100644 --- a/man/rmd/poisson_reg_stan_glmer.Rmd +++ b/man/rmd/poisson_reg_stan_glmer.Rmd @@ -73,6 +73,11 @@ fit(glmer_wflow, data = longitudinal_counts) For prediction, the `"stan_glmer"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## References - McElreath, R. 2020 _Statistical Rethinking_. CRC Press. diff --git a/man/rmd/poisson_reg_stan_glmer.md b/man/rmd/poisson_reg_stan_glmer.md index b1724addf..3afdf95e2 100644 --- a/man/rmd/poisson_reg_stan_glmer.md +++ b/man/rmd/poisson_reg_stan_glmer.md @@ -103,6 +103,13 @@ fit(glmer_wflow, data = longitudinal_counts) For prediction, the `"stan_glmer"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## References - McElreath, R. 2020 _Statistical Rethinking_. CRC Press. diff --git a/man/rmd/poisson_reg_zeroinfl.Rmd b/man/rmd/poisson_reg_zeroinfl.Rmd index b424c0ac5..3c87a9e49 100644 --- a/man/rmd/poisson_reg_zeroinfl.Rmd +++ b/man/rmd/poisson_reg_zeroinfl.Rmd @@ -57,3 +57,8 @@ workflow() %>% ``` The reason for this is that [workflows::add_formula()] will try to create the model matrix and either fail or create dummy variables prematurely. + +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` diff --git a/man/rmd/poisson_reg_zeroinfl.md b/man/rmd/poisson_reg_zeroinfl.md index 43b6b2281..afb529e35 100644 --- a/man/rmd/poisson_reg_zeroinfl.md +++ b/man/rmd/poisson_reg_zeroinfl.md @@ -107,3 +107,10 @@ workflow() %>% ``` The reason for this is that [workflows::add_formula()] will try to create the model matrix and either fail or create dummy variables prematurely. + +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. diff --git a/man/rmd/proportional_hazards_glmnet.Rmd b/man/rmd/proportional_hazards_glmnet.Rmd index 2d9635884..645e3c834 100644 --- a/man/rmd/proportional_hazards_glmnet.Rmd +++ b/man/rmd/proportional_hazards_glmnet.Rmd @@ -88,6 +88,11 @@ Note that columns used in the `strata()` function _will_ also be estimated in th ```{r child = "template-censored-linear-predictor.Rmd"} ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + # References - Simon N, Friedman J, Hastie T, Tibshirani R. 2011. "Regularization Paths for Cox’s Proportional Hazards Model via Coordinate Descent." _Journal of Statistical Software_, Articles 39 (5): 1–13. \doi{10.18637/jss.v039.i05}. diff --git a/man/rmd/proportional_hazards_survival.Rmd b/man/rmd/proportional_hazards_survival.Rmd index 91065564f..f262fb032 100644 --- a/man/rmd/proportional_hazards_survival.Rmd +++ b/man/rmd/proportional_hazards_survival.Rmd @@ -44,9 +44,16 @@ Note that columns used in the `strata()` function will not be estimated in the r # Linear predictor values + ```{r child = "template-censored-linear-predictor.Rmd"} ``` + +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## References - Andersen P, Gill R. 1982. Cox's regression model for counting processes, a large sample study. _Annals of Statistics_ 10, 1100-1120. diff --git a/man/rmd/proportional_hazards_survival.md b/man/rmd/proportional_hazards_survival.md index 6c2ad17e0..eec87ddda 100644 --- a/man/rmd/proportional_hazards_survival.md +++ b/man/rmd/proportional_hazards_survival.md @@ -28,7 +28,7 @@ proportional_hazards() %>% ## ## Model fit template: ## survival::coxph(formula = missing_arg(), data = missing_arg(), -## x = TRUE, model = TRUE) +## weights = missing_arg(), x = TRUE, model = TRUE) ``` ## Other details @@ -87,6 +87,7 @@ Note that columns used in the `strata()` function will not be estimated in the r # Linear predictor values + Since risk regression and parametric survival models are modeling different characteristics (e.g. relative hazard versus event time), their linear predictors will be going in opposite directions. For example, for parametric models, the linear predictor _increases with time_. For proportional hazards models the linear predictor _decreases with time_ (since hazard is increasing). As such, the linear predictors for these two quantities will have opposite signs. @@ -95,6 +96,14 @@ tidymodels does not treat different models differently when computing performanc This behavior can be changed by using the `increasing` argument when calling `predict()` on a \pkg{parsnip} model object. + +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## References - Andersen P, Gill R. 1982. Cox's regression model for counting processes, a large sample study. _Annals of Statistics_ 10, 1100-1120. diff --git a/man/rmd/rand_forest_party.md b/man/rmd/rand_forest_party.md index 719d12f26..f883edfb4 100644 --- a/man/rmd/rand_forest_party.md +++ b/man/rmd/rand_forest_party.md @@ -36,7 +36,7 @@ rand_forest() %>% ## ## Model fit template: ## censored::cond_inference_surv_cforest(formula = missing_arg(), -## data = missing_arg()) +## data = missing_arg(), weights = missing_arg()) ``` `censored::cond_inference_surv_cforest()` is a wrapper around [party::cforest()] (and other functions) that makes it easier to run this model. diff --git a/man/rmd/rand_forest_ranger.Rmd b/man/rmd/rand_forest_ranger.Rmd index 86e723fd2..2567858c0 100644 --- a/man/rmd/rand_forest_ranger.Rmd +++ b/man/rmd/rand_forest_ranger.Rmd @@ -67,6 +67,11 @@ By default, parallel processing is turned off. When tuning, it is more efficient For `ranger` confidence intervals, the intervals are constructed using the form `estimate +/- z * std_error`. For classification probabilities, these values can fall outside of `[0, 1]` and will be coerced to be in this range. +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#rand-forest-ranger) for `rand_forest()` with the `"ranger"` engine. diff --git a/man/rmd/rand_forest_ranger.md b/man/rmd/rand_forest_ranger.md index 16b3cdba9..7bd425c48 100644 --- a/man/rmd/rand_forest_ranger.md +++ b/man/rmd/rand_forest_ranger.md @@ -44,7 +44,7 @@ rand_forest( ## Computational engine: ranger ## ## Model fit template: -## ranger::ranger(x = missing_arg(), y = missing_arg(), case.weights = missing_arg(), +## ranger::ranger(x = missing_arg(), y = missing_arg(), weights = missing_arg(), ## mtry = min_cols(~integer(1), x), num.trees = integer(1), ## min.node.size = min_rows(~integer(1), x), num.threads = 1, ## verbose = FALSE, seed = sample.int(10^5, 1)) @@ -77,7 +77,7 @@ rand_forest( ## Computational engine: ranger ## ## Model fit template: -## ranger::ranger(x = missing_arg(), y = missing_arg(), case.weights = missing_arg(), +## ranger::ranger(x = missing_arg(), y = missing_arg(), weights = missing_arg(), ## mtry = min_cols(~integer(1), x), num.trees = integer(1), ## min.node.size = min_rows(~integer(1), x), num.threads = 1, ## verbose = FALSE, seed = sample.int(10^5, 1), probability = TRUE) @@ -96,6 +96,13 @@ By default, parallel processing is turned off. When tuning, it is more efficient For `ranger` confidence intervals, the intervals are constructed using the form `estimate +/- z * std_error`. For classification probabilities, these values can fall outside of `[0, 1]` and will be coerced to be in this range. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#rand-forest-ranger) for `rand_forest()` with the `"ranger"` engine. diff --git a/man/rmd/rand_forest_spark.Rmd b/man/rmd/rand_forest_spark.Rmd index 471342ac5..0afe168de 100644 --- a/man/rmd/rand_forest_spark.Rmd +++ b/man/rmd/rand_forest_spark.Rmd @@ -62,6 +62,14 @@ rand_forest( ```{r child = "template-spark-notes.Rmd"} ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + +Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights. + + ## References - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/rand_forest_spark.md b/man/rmd/rand_forest_spark.md index 9253ebbab..3753b3a3f 100644 --- a/man/rmd/rand_forest_spark.md +++ b/man/rmd/rand_forest_spark.md @@ -96,6 +96,16 @@ For models created using the `"spark"` engine, there are several things to consi * There is no equivalent to factor columns in Spark tables so class predictions are returned as character columns. * To retain the model object for a new R session (via `save()`), the `model$fit` element of the parsnip object should be serialized via `ml_save(object$fit)` and separately saved to disk. In a new session, the object can be reloaded and reattached to the parsnip object. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + +Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights. + + ## References - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/survival_reg_survival.Rmd b/man/rmd/survival_reg_survival.Rmd index 4dabb03b2..a4f49d61b 100644 --- a/man/rmd/survival_reg_survival.Rmd +++ b/man/rmd/survival_reg_survival.Rmd @@ -54,6 +54,11 @@ survival_reg() %>% extract_fit_engine() ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## References - Kalbfleisch, J. D. and Prentice, R. L. 2002 _The statistical analysis of failure time data_, Wiley. diff --git a/man/rmd/survival_reg_survival.md b/man/rmd/survival_reg_survival.md index 26b78030f..9ea328154 100644 --- a/man/rmd/survival_reg_survival.md +++ b/man/rmd/survival_reg_survival.md @@ -75,6 +75,13 @@ survival_reg() %>% ## n= 26 ``` +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## References - Kalbfleisch, J. D. and Prentice, R. L. 2002 _The statistical analysis of failure time data_, Wiley. diff --git a/man/rmd/svm_linear_LiblineaR.md b/man/rmd/svm_linear_LiblineaR.md index 059744312..fa603db80 100644 --- a/man/rmd/svm_linear_LiblineaR.md +++ b/man/rmd/svm_linear_LiblineaR.md @@ -38,8 +38,8 @@ svm_linear( ## Computational engine: LiblineaR ## ## Model fit template: -## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(), -## C = double(1), svr_eps = double(1), type = 11) +## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), C = double(1), +## svr_eps = double(1), type = 11) ``` ## Translation from parsnip to the original package (classification) @@ -63,8 +63,8 @@ svm_linear( ## Computational engine: LiblineaR ## ## Model fit template: -## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(), -## C = double(1), type = 1) +## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), C = double(1), +## type = 1) ``` The `margin` parameter does not apply to classification models. diff --git a/man/rmd/template-no-case-weights.Rmd b/man/rmd/template-no-case-weights.Rmd new file mode 100644 index 000000000..e096d2385 --- /dev/null +++ b/man/rmd/template-no-case-weights.Rmd @@ -0,0 +1 @@ +The underlying model implementation does not allow for case weights. diff --git a/man/rmd/template-uses-case-weights.Rmd b/man/rmd/template-uses-case-weights.Rmd new file mode 100644 index 000000000..f7cf35aaa --- /dev/null +++ b/man/rmd/template-uses-case-weights.Rmd @@ -0,0 +1,3 @@ +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. From ca3e6c846867637c77aee82007ecfb7b9ee2c44f Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Wed, 30 Mar 2022 13:56:49 -0400 Subject: [PATCH 27/41] add missing topic --- _pkgdown.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/_pkgdown.yml b/_pkgdown.yml index b1862418a..63c652148 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -68,6 +68,7 @@ reference: - autoplot.model_fit - add_rowindex - augment.model_fit + - case_weights - descriptors - extract-parsnip - fit.model_spec From ceb9c0b6e7efc1396952478d1d1803e5537b8e99 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Thu, 31 Mar 2022 07:07:35 -0400 Subject: [PATCH 28/41] more engine doc updates --- man/details_bag_tree_rpart.Rd | 3 +- man/details_boost_tree_mboost.Rd | 2 +- man/details_decision_tree_party.Rd | 4 +- man/details_decision_tree_rpart.Rd | 4 +- man/details_linear_reg_brulee.Rd | 5 + man/details_linear_reg_gee.Rd | 5 + man/details_linear_reg_glm.Rd | 9 +- man/details_linear_reg_keras.Rd | 5 + man/details_linear_reg_lme.Rd | 7 +- man/details_logistic_reg_brulee.Rd | 5 + man/details_logistic_reg_glm.Rd | 13 +- man/details_logistic_reg_keras.Rd | 5 + man/details_mlp_brulee.Rd | 5 + man/details_mlp_keras.Rd | 5 + man/details_naive_Bayes_klaR.Rd | 5 + man/details_nearest_neighbor_kknn.Rd | 5 + man/details_poisson_reg_glm.Rd | 9 +- man/details_proportional_hazards_glmnet.Rd | 21 +- man/details_proportional_hazards_survival.Rd | 2 +- man/details_rand_forest_party.Rd | 2 +- man/details_rule_fit_xrf.Rd | 5 + man/details_svm_linear_LiblineaR.Rd | 5 + man/details_svm_linear_kernlab.Rd | 5 + man/details_svm_poly_kernlab.Rd | 5 + man/details_svm_rbf_kernlab.Rd | 5 + man/rmd/bag_tree_rpart.md | 3 +- man/rmd/boost_tree_mboost.md | 2 +- man/rmd/decision_tree_party.md | 4 +- man/rmd/decision_tree_rpart.md | 4 +- man/rmd/linear_reg_brulee.Rmd | 5 + man/rmd/linear_reg_brulee.md | 5 + man/rmd/linear_reg_gee.Rmd | 5 + man/rmd/linear_reg_gee.md | 5 + man/rmd/linear_reg_keras.Rmd | 5 + man/rmd/linear_reg_keras.md | 5 + man/rmd/linear_reg_lme.Rmd | 2 +- man/rmd/linear_reg_lme.md | 4 +- man/rmd/logistic-reg.Rmd | 117 ----------- man/rmd/logistic-reg.md | 195 ------------------- man/rmd/logistic_reg_brulee.Rmd | 5 + man/rmd/logistic_reg_brulee.md | 5 + man/rmd/logistic_reg_keras.Rmd | 5 + man/rmd/logistic_reg_keras.md | 5 + man/rmd/mlp_brulee.Rmd | 5 + man/rmd/mlp_brulee.md | 5 + man/rmd/mlp_keras.Rmd | 5 + man/rmd/mlp_keras.md | 5 + man/rmd/naive_Bayes_klaR.Rmd | 5 + man/rmd/naive_Bayes_klaR.md | 5 + man/rmd/nearest_neighbor_kknn.Rmd | 5 + man/rmd/nearest_neighbor_kknn.md | 5 + man/rmd/proportional_hazards_survival.md | 2 +- man/rmd/rand_forest_party.md | 2 +- man/rmd/rule_fit_xrf.Rmd | 5 + man/rmd/rule_fit_xrf.md | 5 + man/rmd/svm_linear_LiblineaR.Rmd | 5 + man/rmd/svm_linear_LiblineaR.md | 5 + man/rmd/svm_linear_kernlab.Rmd | 5 + man/rmd/svm_linear_kernlab.md | 5 + man/rmd/svm_poly_kernlab.Rmd | 5 + man/rmd/svm_poly_kernlab.md | 5 + man/rmd/svm_rbf_kernlab.Rmd | 5 + man/rmd/svm_rbf_kernlab.md | 5 + 63 files changed, 261 insertions(+), 360 deletions(-) delete mode 100644 man/rmd/logistic-reg.Rmd delete mode 100644 man/rmd/logistic-reg.md diff --git a/man/details_bag_tree_rpart.Rd b/man/details_bag_tree_rpart.Rd index fd1c5185b..2503f64cc 100644 --- a/man/details_bag_tree_rpart.Rd +++ b/man/details_bag_tree_rpart.Rd @@ -96,8 +96,7 @@ bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1 ## ## Model fit template: ## ipred::bagging(formula = missing_arg(), data = missing_arg(), -## weights = missing_arg(), cp = double(1), maxdepth = integer(1), -## minsplit = integer(1)) +## cp = double(1), maxdepth = integer(1), minsplit = integer(1)) } } diff --git a/man/details_boost_tree_mboost.Rd b/man/details_boost_tree_mboost.Rd index 02a689ae2..a26b5ed01 100644 --- a/man/details_boost_tree_mboost.Rd +++ b/man/details_boost_tree_mboost.Rd @@ -40,7 +40,7 @@ boost_tree() \%>\% ## ## Model fit template: ## censored::blackboost_train(formula = missing_arg(), data = missing_arg(), -## weights = missing_arg(), family = mboost::CoxPH()) +## family = mboost::CoxPH()) } \code{censored::blackboost_train()} is a wrapper around diff --git a/man/details_decision_tree_party.Rd b/man/details_decision_tree_party.Rd index cf17577f6..be7a55bf2 100644 --- a/man/details_decision_tree_party.Rd +++ b/man/details_decision_tree_party.Rd @@ -45,8 +45,8 @@ decision_tree(tree_depth = integer(1), min_n = integer(1)) \%>\% ## ## Model fit template: ## censored::cond_inference_surv_ctree(formula = missing_arg(), -## data = missing_arg(), weights = missing_arg(), maxdepth = integer(1), -## minsplit = min_rows(0L, data)) +## data = missing_arg(), maxdepth = integer(1), minsplit = min_rows(0L, +## data)) } \code{censored::cond_inference_surv_ctree()} is a wrapper around diff --git a/man/details_decision_tree_rpart.Rd b/man/details_decision_tree_rpart.Rd index 1956b8198..f2c9d2c48 100644 --- a/man/details_decision_tree_rpart.Rd +++ b/man/details_decision_tree_rpart.Rd @@ -84,8 +84,8 @@ decision_tree( ## ## Model fit template: ## pec::pecRpart(formula = missing_arg(), data = missing_arg(), -## weights = missing_arg(), cp = double(1), maxdepth = integer(1), -## minsplit = min_rows(0L, data)) +## cp = double(1), maxdepth = integer(1), minsplit = min_rows(0L, +## data)) } } diff --git a/man/details_linear_reg_brulee.Rd b/man/details_linear_reg_brulee.Rd index e0c7e85cf..adbc66ff7 100644 --- a/man/details_linear_reg_brulee.Rd +++ b/man/details_linear_reg_brulee.Rd @@ -67,6 +67,11 @@ center and scale each so that each predictor has mean zero and a variance of one. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{References}{ \itemize{ \item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. diff --git a/man/details_linear_reg_gee.Rd b/man/details_linear_reg_gee.Rd index 6256bf53a..9fcab55c2 100644 --- a/man/details_linear_reg_gee.Rd +++ b/man/details_linear_reg_gee.Rd @@ -90,6 +90,11 @@ to \code{glm()} is needed to get the rank and QR decomposition objects so that \code{predict()} can be used. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{References}{ \itemize{ \item Liang, K.Y. and Zeger, S.L. (1986) Longitudinal data analysis using diff --git a/man/details_linear_reg_glm.Rd b/man/details_linear_reg_glm.Rd index afe66b0c1..0621ebc98 100644 --- a/man/details_linear_reg_glm.Rd +++ b/man/details_linear_reg_glm.Rd @@ -63,7 +63,14 @@ The \code{fit()} and \code{fit_xy()} arguments have arguments called \code{case_weights} that expect vectors of case weights. \emph{However}, the documentation in \code{\link[stats:glm]{stats::glm()}} assumes -that is specific type of case weights are being used:Non-NULL weights can be used to indicate that different observationshave different dispersions (with the values in weights being inverselyproportional to the dispersions); or equivalently, when the elementsof weights are positive integers w_i, that each response y_i isthe mean of w_i unit-weight observations. For a binomial GLM priorweights are used to give the number of trials when the response is theproportion of successes: they would rarely be used for a Poisson GLM. +that is specific type of case weights are being used:“Non-NULL weights +can be used to indicate that different observations have different +dispersions (with the values in weights being inversely proportional to +the dispersions); or equivalently, when the elements of weights are +positive integers \code{w_i}, that each response \code{y_i} is the mean of \code{w_i} +unit-weight observations. For a binomial GLM prior weights are used to +give the number of trials when the response is the proportion of +successes: they would rarely be used for a Poisson GLM.” } \subsection{Examples}{ diff --git a/man/details_linear_reg_keras.Rd b/man/details_linear_reg_keras.Rd index 0ad5fd777..2e15d554f 100644 --- a/man/details_linear_reg_keras.Rd +++ b/man/details_linear_reg_keras.Rd @@ -51,6 +51,11 @@ center and scale each so that each predictor has mean zero and a variance of one. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{Examples}{ The “Fitting and Predicting with parsnip” article contains diff --git a/man/details_linear_reg_lme.Rd b/man/details_linear_reg_lme.Rd index 7a9fe1ca5..d82179744 100644 --- a/man/details_linear_reg_lme.Rd +++ b/man/details_linear_reg_lme.Rd @@ -102,12 +102,7 @@ fit(lme_wflow, data = riesby) \subsection{Case weights}{ -This model can utilize case weights during model fitting. To use them, -see the documentation in \link{case_weights} and the examples -on \code{tidymodels.org}. - -The \code{fit()} and \code{fit_xy()} arguments have arguments called -\code{case_weights} that expect vectors of case weights. +The underlying model implementation does not allow for case weights. } \subsection{References}{ diff --git a/man/details_logistic_reg_brulee.Rd b/man/details_logistic_reg_brulee.Rd index ee103c3ea..2c99070ba 100644 --- a/man/details_logistic_reg_brulee.Rd +++ b/man/details_logistic_reg_brulee.Rd @@ -67,6 +67,11 @@ center and scale each so that each predictor has mean zero and a variance of one. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{References}{ \itemize{ \item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. diff --git a/man/details_logistic_reg_glm.Rd b/man/details_logistic_reg_glm.Rd index dcdb249ff..7e7c1f859 100644 --- a/man/details_logistic_reg_glm.Rd +++ b/man/details_logistic_reg_glm.Rd @@ -63,11 +63,14 @@ The \code{fit()} and \code{fit_xy()} arguments have arguments called \code{case_weights} that expect vectors of case weights. \emph{However}, the documentation in \code{\link[stats:glm]{stats::glm()}} assumes -that is specific type of case weights are being used:Non-NULL weights can be used to indicate that different observationshave different dispersions (with the values in weights being inverselyproportional to the dispersions); or equivalently, when the elementsof weights are positive integers w_i, that each response y_i isthe mean of w_i unit-weight observations. For a binomial GLM priorweights are used to give the number of trials when the response is theproportion of successes: they would rarely be used for a Poisson GLM. - -If frequency weights are being used in your application, the -\code{\link[=glm_grouped]{glm_grouped()}} model (and corresponding engine) may be -more appropriate. +that is specific type of case weights are being used:“Non-NULL weights +can be used to indicate that different observations have different +dispersions (with the values in weights being inversely proportional to +the dispersions); or equivalently, when the elements of weights are +positive integers \code{w_i}, that each response \code{y_i} is the mean of \code{w_i} +unit-weight observations. For a binomial GLM prior weights are used to +give the number of trials when the response is the proportion of +successes: they would rarely be used for a Poisson GLM.” } \subsection{Examples}{ diff --git a/man/details_logistic_reg_keras.Rd b/man/details_logistic_reg_keras.Rd index 6bf922f0f..4f33654bd 100644 --- a/man/details_logistic_reg_keras.Rd +++ b/man/details_logistic_reg_keras.Rd @@ -53,6 +53,11 @@ center and scale each so that each predictor has mean zero and a variance of one. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{Examples}{ The “Fitting and Predicting with parsnip” article contains diff --git a/man/details_mlp_brulee.Rd b/man/details_mlp_brulee.Rd index ed3035ad4..dd6a9b355 100644 --- a/man/details_mlp_brulee.Rd +++ b/man/details_mlp_brulee.Rd @@ -116,6 +116,11 @@ center and scale each so that each predictor has mean zero and a variance of one. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{References}{ \itemize{ \item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. diff --git a/man/details_mlp_keras.Rd b/man/details_mlp_keras.Rd index 0a5f4935c..8fb97ed09 100644 --- a/man/details_mlp_keras.Rd +++ b/man/details_mlp_keras.Rd @@ -89,6 +89,11 @@ center and scale each so that each predictor has mean zero and a variance of one. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{Examples}{ The “Fitting and Predicting with parsnip” article contains diff --git a/man/details_naive_Bayes_klaR.Rd b/man/details_naive_Bayes_klaR.Rd index 182adffa4..4c9b6cb66 100644 --- a/man/details_naive_Bayes_klaR.Rd +++ b/man/details_naive_Bayes_klaR.Rd @@ -53,6 +53,11 @@ predictors (i.e., with a single unique value) should be eliminated before fitting the model. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{References}{ \itemize{ \item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. diff --git a/man/details_nearest_neighbor_kknn.Rd b/man/details_nearest_neighbor_kknn.Rd index 5b3c45205..945acaec5 100644 --- a/man/details_nearest_neighbor_kknn.Rd +++ b/man/details_nearest_neighbor_kknn.Rd @@ -88,6 +88,11 @@ The “Fitting and Predicting with parsnip” article contains for \code{nearest_neighbor()} with the \code{"kknn"} engine. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{References}{ \itemize{ \item Hechenbichler K. and Schliep K.P. (2004) \href{https://epub.ub.uni-muenchen.de/1769/}{Weighted k-Nearest-Neighbor Techniques and Ordinal Classification}, Discussion diff --git a/man/details_poisson_reg_glm.Rd b/man/details_poisson_reg_glm.Rd index f9b38a217..f27aa6bdb 100644 --- a/man/details_poisson_reg_glm.Rd +++ b/man/details_poisson_reg_glm.Rd @@ -58,7 +58,14 @@ The \code{fit()} and \code{fit_xy()} arguments have arguments called \code{case_weights} that expect vectors of case weights. \emph{However}, the documentation in \code{\link[stats:glm]{stats::glm()}} assumes -that is specific type of case weights are being used:Non-NULL weights can be used to indicate that different observationshave different dispersions (with the values in weights being inverselyproportional to the dispersions); or equivalently, when the elementsof weights are positive integers w_i, that each response y_i isthe mean of w_i unit-weight observations. For a binomial GLM priorweights are used to give the number of trials when the response is theproportion of successes: they would rarely be used for a Poisson GLM. +that is specific type of case weights are being used:“Non-NULL weights +can be used to indicate that different observations have different +dispersions (with the values in weights being inversely proportional to +the dispersions); or equivalently, when the elements of weights are +positive integers \code{w_i}, that each response \code{y_i} is the mean of \code{w_i} +unit-weight observations. For a binomial GLM prior weights are used to +give the number of trials when the response is the proportion of +successes: they would rarely be used for a Poisson GLM.” If frequency weights are being used in your application, the \code{\link[=glm_grouped]{glm_grouped()}} model (and corresponding engine) may be diff --git a/man/details_proportional_hazards_glmnet.Rd b/man/details_proportional_hazards_glmnet.Rd index 63a39ce8b..f23032f1c 100644 --- a/man/details_proportional_hazards_glmnet.Rd +++ b/man/details_proportional_hazards_glmnet.Rd @@ -42,7 +42,7 @@ proportional_hazards(penalty = double(1), mixture = double(1)) \%>\% ## ## Model fit template: ## censored::glmnet_fit_wrapper(formula = missing_arg(), data = missing_arg(), -## family = missing_arg(), weights = missing_arg(), alpha = double(1)) +## family = missing_arg(), alpha = double(1)) } } @@ -82,14 +82,18 @@ mod <- proportional_hazards(penalty = 0.01) \%>\% set_engine("glmnet", nlambda = 5) \%>\% fit(Surv(futime, fustat) ~ age + ecog.ps + strata(rx), data = ovarian) -}\if{html}{\out{}}\preformatted{## Error in glmnet::glmnet(data_obj$x, data_obj$y, family = "cox", alpha = alpha, : formal argument "family" matched by multiple actual arguments -}\if{html}{\out{
}}\preformatted{pred_data <- data.frame(age = c(50, 50), ecog.ps = c(1, 1), rx = c(1, 2)) + +pred_data <- data.frame(age = c(50, 50), ecog.ps = c(1, 1), rx = c(1, 2)) # Different survival probabilities for different values of 'rx' predict(mod, pred_data, type = "survival", time = 500) \%>\% bind_cols(pred_data) \%>\% unnest(.pred) -}\if{html}{\out{
}}\preformatted{## Error in predict(mod, pred_data, type = "survival", time = 500): object 'mod' not found +}\if{html}{\out{}}\preformatted{## # A tibble: 2 × 5 +## .time .pred_survival age ecog.ps rx +## +## 1 500 0.666 50 1 1 +## 2 500 0.769 50 1 2 } Note that columns used in the \code{strata()} function \emph{will} also be @@ -115,15 +119,6 @@ value produced by the \code{predict()} method in the engine package. This behavior can be changed by using the \code{increasing} argument when calling \code{predict()} on a model object. -\subsection{Case weights}{ - -This model can utilize case weights during model fitting. To use them, -see the documentation in \link{case_weights} and the examples -on \code{tidymodels.org}. - -The \code{fit()} and \code{fit_xy()} arguments have arguments called -\code{case_weights} that expect vectors of case weights. -} } \section{References}{ diff --git a/man/details_proportional_hazards_survival.Rd b/man/details_proportional_hazards_survival.Rd index e8a15f5b8..2f9fd6931 100644 --- a/man/details_proportional_hazards_survival.Rd +++ b/man/details_proportional_hazards_survival.Rd @@ -27,7 +27,7 @@ proportional_hazards() \%>\% ## ## Model fit template: ## survival::coxph(formula = missing_arg(), data = missing_arg(), -## weights = missing_arg(), x = TRUE, model = TRUE) +## x = TRUE, model = TRUE) } } diff --git a/man/details_rand_forest_party.Rd b/man/details_rand_forest_party.Rd index a29fb6c74..489351502 100644 --- a/man/details_rand_forest_party.Rd +++ b/man/details_rand_forest_party.Rd @@ -34,7 +34,7 @@ rand_forest() \%>\% ## ## Model fit template: ## censored::cond_inference_surv_cforest(formula = missing_arg(), -## data = missing_arg(), weights = missing_arg()) +## data = missing_arg()) } \code{censored::cond_inference_surv_cforest()} is a wrapper around diff --git a/man/details_rule_fit_xrf.Rd b/man/details_rule_fit_xrf.Rd index 3febb5875..4816de6a7 100644 --- a/man/details_rule_fit_xrf.Rd +++ b/man/details_rule_fit_xrf.Rd @@ -135,6 +135,11 @@ formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{References}{ \itemize{ \item Friedman and Popescu. “Predictive learning via rule ensembles.” Ann. diff --git a/man/details_svm_linear_LiblineaR.Rd b/man/details_svm_linear_LiblineaR.Rd index caf23bd68..26a47467d 100644 --- a/man/details_svm_linear_LiblineaR.Rd +++ b/man/details_svm_linear_LiblineaR.Rd @@ -84,6 +84,11 @@ center and scale each so that each predictor has mean zero and a variance of one. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{Examples}{ The “Fitting and Predicting with parsnip” article contains diff --git a/man/details_svm_linear_kernlab.Rd b/man/details_svm_linear_kernlab.Rd index bcd98b23b..e772b6680 100644 --- a/man/details_svm_linear_kernlab.Rd +++ b/man/details_svm_linear_kernlab.Rd @@ -81,6 +81,11 @@ center and scale each so that each predictor has mean zero and a variance of one. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{Examples}{ The “Fitting and Predicting with parsnip” article contains diff --git a/man/details_svm_poly_kernlab.Rd b/man/details_svm_poly_kernlab.Rd index f28b8cef5..546fe1e81 100644 --- a/man/details_svm_poly_kernlab.Rd +++ b/man/details_svm_poly_kernlab.Rd @@ -93,6 +93,11 @@ center and scale each so that each predictor has mean zero and a variance of one. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{Examples}{ The “Fitting and Predicting with parsnip” article contains diff --git a/man/details_svm_rbf_kernlab.Rd b/man/details_svm_rbf_kernlab.Rd index 076699ea5..c3ff8ad24 100644 --- a/man/details_svm_rbf_kernlab.Rd +++ b/man/details_svm_rbf_kernlab.Rd @@ -93,6 +93,11 @@ center and scale each so that each predictor has mean zero and a variance of one. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{Examples}{ The “Fitting and Predicting with parsnip” article contains diff --git a/man/rmd/bag_tree_rpart.md b/man/rmd/bag_tree_rpart.md index 86801501c..a47072d0b 100644 --- a/man/rmd/bag_tree_rpart.md +++ b/man/rmd/bag_tree_rpart.md @@ -107,8 +107,7 @@ bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1 ## ## Model fit template: ## ipred::bagging(formula = missing_arg(), data = missing_arg(), -## weights = missing_arg(), cp = double(1), maxdepth = integer(1), -## minsplit = integer(1)) +## cp = double(1), maxdepth = integer(1), minsplit = integer(1)) ``` diff --git a/man/rmd/boost_tree_mboost.md b/man/rmd/boost_tree_mboost.md index 50c5ccba5..387fd3496 100644 --- a/man/rmd/boost_tree_mboost.md +++ b/man/rmd/boost_tree_mboost.md @@ -42,7 +42,7 @@ boost_tree() %>% ## ## Model fit template: ## censored::blackboost_train(formula = missing_arg(), data = missing_arg(), -## weights = missing_arg(), family = mboost::CoxPH()) +## family = mboost::CoxPH()) ``` `censored::blackboost_train()` is a wrapper around [mboost::blackboost()] (and other functions) that makes it easier to run this model. diff --git a/man/rmd/decision_tree_party.md b/man/rmd/decision_tree_party.md index 4b44782a2..38274833b 100644 --- a/man/rmd/decision_tree_party.md +++ b/man/rmd/decision_tree_party.md @@ -44,8 +44,8 @@ decision_tree(tree_depth = integer(1), min_n = integer(1)) %>% ## ## Model fit template: ## censored::cond_inference_surv_ctree(formula = missing_arg(), -## data = missing_arg(), weights = missing_arg(), maxdepth = integer(1), -## minsplit = min_rows(0L, data)) +## data = missing_arg(), maxdepth = integer(1), minsplit = min_rows(0L, +## data)) ``` `censored::cond_inference_surv_ctree()` is a wrapper around [party::ctree()] (and other functions) that makes it easier to run this model. diff --git a/man/rmd/decision_tree_rpart.md b/man/rmd/decision_tree_rpart.md index f6e5dd6b2..b3211c724 100644 --- a/man/rmd/decision_tree_rpart.md +++ b/man/rmd/decision_tree_rpart.md @@ -99,8 +99,8 @@ decision_tree( ## ## Model fit template: ## pec::pecRpart(formula = missing_arg(), data = missing_arg(), -## weights = missing_arg(), cp = double(1), maxdepth = integer(1), -## minsplit = min_rows(0L, data)) +## cp = double(1), maxdepth = integer(1), minsplit = min_rows(0L, +## data)) ``` ## Preprocessing requirements diff --git a/man/rmd/linear_reg_brulee.Rmd b/man/rmd/linear_reg_brulee.Rmd index 94e4ea598..75e55b6af 100644 --- a/man/rmd/linear_reg_brulee.Rmd +++ b/man/rmd/linear_reg_brulee.Rmd @@ -51,6 +51,11 @@ linear_reg(penalty = double(1)) %>% ```{r child = "template-same-scale.Rmd"} ``` +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## References - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/linear_reg_brulee.md b/man/rmd/linear_reg_brulee.md index d5d7d4fc6..ee1fea6f3 100644 --- a/man/rmd/linear_reg_brulee.md +++ b/man/rmd/linear_reg_brulee.md @@ -57,6 +57,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a variance of one. +## Case weights + + +The underlying model implementation does not allow for case weights. + ## References - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/linear_reg_gee.Rmd b/man/rmd/linear_reg_gee.Rmd index 5e6fe8603..dac2c4e51 100644 --- a/man/rmd/linear_reg_gee.Rmd +++ b/man/rmd/linear_reg_gee.Rmd @@ -69,6 +69,11 @@ fit(gee_wflow, data = warpbreaks) Also, because of issues with the `gee()` function, a supplementary call to `glm()` is needed to get the rank and QR decomposition objects so that `predict()` can be used. +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## References - Liang, K.Y. and Zeger, S.L. (1986) Longitudinal data analysis using generalized linear models. _Biometrika_, 73 13–22. diff --git a/man/rmd/linear_reg_gee.md b/man/rmd/linear_reg_gee.md index 835542c94..01aaab16b 100644 --- a/man/rmd/linear_reg_gee.md +++ b/man/rmd/linear_reg_gee.md @@ -80,6 +80,11 @@ The `gee::gee()` function always prints out warnings and output even when `silen Also, because of issues with the `gee()` function, a supplementary call to `glm()` is needed to get the rank and QR decomposition objects so that `predict()` can be used. +## Case weights + + +The underlying model implementation does not allow for case weights. + ## References - Liang, K.Y. and Zeger, S.L. (1986) Longitudinal data analysis using generalized linear models. _Biometrika_, 73 13–22. diff --git a/man/rmd/linear_reg_keras.Rmd b/man/rmd/linear_reg_keras.Rmd index 4e2564e3a..d6caeae42 100644 --- a/man/rmd/linear_reg_keras.Rmd +++ b/man/rmd/linear_reg_keras.Rmd @@ -42,6 +42,11 @@ linear_reg(penalty = double(1)) %>% ```{r child = "template-same-scale.Rmd"} ``` +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-keras) for `linear_reg()` with the `"keras"` engine. diff --git a/man/rmd/linear_reg_keras.md b/man/rmd/linear_reg_keras.md index e50251812..50c0cfac8 100644 --- a/man/rmd/linear_reg_keras.md +++ b/man/rmd/linear_reg_keras.md @@ -46,6 +46,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a variance of one. +## Case weights + + +The underlying model implementation does not allow for case weights. + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-keras) for `linear_reg()` with the `"keras"` engine. diff --git a/man/rmd/linear_reg_lme.Rmd b/man/rmd/linear_reg_lme.Rmd index d75d44e52..4d90dbee6 100644 --- a/man/rmd/linear_reg_lme.Rmd +++ b/man/rmd/linear_reg_lme.Rmd @@ -62,7 +62,7 @@ fit(lme_wflow, data = riesby) ## Case weights -```{r child = "template-uses-case-weights.Rmd"} +```{r child = "template-no-case-weights.Rmd"} ``` ## References diff --git a/man/rmd/linear_reg_lme.md b/man/rmd/linear_reg_lme.md index 56be80610..46013079c 100644 --- a/man/rmd/linear_reg_lme.md +++ b/man/rmd/linear_reg_lme.md @@ -92,9 +92,7 @@ fit(lme_wflow, data = riesby) ## Case weights -This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. - -The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. +The underlying model implementation does not allow for case weights. ## References diff --git a/man/rmd/logistic-reg.Rmd b/man/rmd/logistic-reg.Rmd deleted file mode 100644 index 021055253..000000000 --- a/man/rmd/logistic-reg.Rmd +++ /dev/null @@ -1,117 +0,0 @@ -# Engine Details - -```{r, child = "aaa.Rmd", include = FALSE} -``` - -Engines may have pre-set default arguments when executing the model fit call. -For this type of model, the template of the fit calls are below. - -## glm - -```{r glm-reg} -logistic_reg() %>% - set_engine("glm") %>% - translate() -``` - -## glmnet - -```{r glmnet-csl} -logistic_reg(penalty = 0.1) %>% - set_engine("glmnet") %>% - translate() -``` - -The glmnet engine requires a single value for the `penalty` argument (a number -or `tune()`), but the full regularization path is always fit -regardless of the value given to `penalty`. To pass in a custom sequence of -values for glmnet's `lambda`, use the argument `path_values` in `set_engine()`. -This will assign the value of the glmnet `lambda` parameter without disturbing -the value given of `logistic_reg(penalty)`. For example: - -```{r glmnet-path} -logistic_reg(penalty = .1) %>% - set_engine("glmnet", path_values = c(0, 10^seq(-10, 1, length.out = 20))) %>% - translate() -``` - -When fitting a pure ridge regression model (i.e., `penalty = 0`), we _strongly -suggest_ that you pass in a vector for `path_values` that includes zero. See -[issue #431](https://github.com/tidymodels/parsnip/issues/431) for a discussion. - -When using `predict()`, the single `penalty` value used for prediction is the -one specified in `logistic_reg()`. - -To predict on multiple penalties, use the `multi_predict()` function. -This function returns a tibble with a list column called `.pred` containing -all of the penalty results. - - -## LiblineaR - -```{r liblinear-reg} -logistic_reg() %>% - set_engine("LiblineaR") %>% - translate() -``` - -For `LiblineaR` models, the value for `mixture` can either be 0 (for ridge) or 1 -(for lasso) but not other intermediate values. In the `LiblineaR` documentation, -these correspond to types 0 (L2-regularized) and 6 (L1-regularized). - -Be aware that the `LiblineaR` engine regularizes the intercept. Other -regularized regression models do not, which will result in different parameter estimates. - -## stan - -```{r stan-reg} -logistic_reg() %>% - set_engine("stan") %>% - translate() -``` - -Note that the `refresh` default prevents logging of the estimation process. -Change this value in `set_engine()` to show the logs. - -For prediction, the `stan` engine can compute posterior intervals analogous to -confidence and prediction intervals. In these instances, the units are the -original outcome and when `std_error = TRUE`, the standard deviation of the -posterior distribution (or posterior predictive distribution as appropriate) is -returned. - -## spark - -```{r spark-reg} -logistic_reg() %>% - set_engine("spark") %>% - translate() -``` - -## keras - -```{r keras-reg} -logistic_reg() %>% - set_engine("keras") %>% - translate() -``` - - -## Parameter translations - -The standardized parameter names in parsnip can be mapped to their original -names in each engine that has main parameters. Each engine typically has a -different default value (shown in parentheses) for each parameter. - -```{r echo = FALSE, results = "asis"} -get_defaults_logistic_reg <- function() { - tibble::tribble( - ~model, ~engine, ~parsnip, ~original, ~default, - "logistic_reg", "glmnet", "mixture", "alpha", get_arg("glmnet", "glmnet", "alpha"), - "logistic_reg", "LiblineaR", "mixture", "type", "0", - "logistic_reg", "spark", "penalty", "reg_param", get_arg("sparklyr", "ml_logistic_regression", "reg_param"), - "logistic_reg", "spark", "mixture", "elastic_net_param", get_arg("sparklyr", "ml_logistic_regression", "elastic_net_param"), - "logistic_reg", "keras", "penalty", "penalty", get_arg("parsnip", "keras_mlp", "penalty"), - ) -} -convert_args("logistic_reg") -``` diff --git a/man/rmd/logistic-reg.md b/man/rmd/logistic-reg.md deleted file mode 100644 index c98158a9a..000000000 --- a/man/rmd/logistic-reg.md +++ /dev/null @@ -1,195 +0,0 @@ -# Engine Details - - - - -Engines may have pre-set default arguments when executing the model fit call. -For this type of model, the template of the fit calls are below. - -## glm - - -```r -logistic_reg() %>% - set_engine("glm") %>% - translate() -``` - -``` -## Logistic Regression Model Specification (classification) -## -## Computational engine: glm -## -## Model fit template: -## stats::glm(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), -## family = stats::binomial) -``` - -## glmnet - - -```r -logistic_reg(penalty = 0.1) %>% - set_engine("glmnet") %>% - translate() -``` - -``` -## Logistic Regression Model Specification (classification) -## -## Main Arguments: -## penalty = 0.1 -## -## Computational engine: glmnet -## -## Model fit template: -## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(), -## family = "binomial") -``` - -The glmnet engine requires a single value for the `penalty` argument (a number -or `tune()`), but the full regularization path is always fit -regardless of the value given to `penalty`. To pass in a custom sequence of -values for glmnet's `lambda`, use the argument `path_values` in `set_engine()`. -This will assign the value of the glmnet `lambda` parameter without disturbing -the value given of `logistic_reg(penalty)`. For example: - - -```r -logistic_reg(penalty = .1) %>% - set_engine("glmnet", path_values = c(0, 10^seq(-10, 1, length.out = 20))) %>% - translate() -``` - -``` -## Logistic Regression Model Specification (classification) -## -## Main Arguments: -## penalty = 0.1 -## -## Computational engine: glmnet -## -## Model fit template: -## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(), -## lambda = c(0, 10^seq(-10, 1, length.out = 20)), family = "binomial") -``` - -When fitting a pure ridge regression model (i.e., `penalty = 0`), we _strongly -suggest_ that you pass in a vector for `path_values` that includes zero. See -[issue #431](https://github.com/tidymodels/parsnip/issues/431) for a discussion. - -When using `predict()`, the single `penalty` value used for prediction is the -one specified in `logistic_reg()`. - -To predict on multiple penalties, use the `multi_predict()` function. -This function returns a tibble with a list column called `.pred` containing -all of the penalty results. - - -## LiblineaR - - -```r -logistic_reg() %>% - set_engine("LiblineaR") %>% - translate() -``` - -``` -## Logistic Regression Model Specification (classification) -## -## Computational engine: LiblineaR -## -## Model fit template: -## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), verbose = FALSE) -``` - -For `LiblineaR` models, the value for `mixture` can either be 0 (for ridge) or 1 -(for lasso) but not other intermediate values. In the `LiblineaR` documentation, -these correspond to types 0 (L2-regularized) and 6 (L1-regularized). - -Be aware that the `LiblineaR` engine regularizes the intercept. Other -regularized regression models do not, which will result in different parameter estimates. - -## stan - - -```r -logistic_reg() %>% - set_engine("stan") %>% - translate() -``` - -``` -## Logistic Regression Model Specification (classification) -## -## Computational engine: stan -## -## Model fit template: -## rstanarm::stan_glm(formula = missing_arg(), data = missing_arg(), -## weights = missing_arg(), family = stats::binomial, refresh = 0) -``` - -Note that the `refresh` default prevents logging of the estimation process. -Change this value in `set_engine()` to show the logs. - -For prediction, the `stan` engine can compute posterior intervals analogous to -confidence and prediction intervals. In these instances, the units are the -original outcome and when `std_error = TRUE`, the standard deviation of the -posterior distribution (or posterior predictive distribution as appropriate) is -returned. - -## spark - - -```r -logistic_reg() %>% - set_engine("spark") %>% - translate() -``` - -``` -## Logistic Regression Model Specification (classification) -## -## Computational engine: spark -## -## Model fit template: -## sparklyr::ml_logistic_regression(x = missing_arg(), formula = missing_arg(), -## weights = missing_arg(), family = "binomial") -``` - -## keras - - -```r -logistic_reg() %>% - set_engine("keras") %>% - translate() -``` - -``` -## Logistic Regression Model Specification (classification) -## -## Computational engine: keras -## -## Model fit template: -## parsnip::keras_mlp(x = missing_arg(), y = missing_arg(), hidden_units = 1, -## act = "linear") -``` - - -## Parameter translations - -The standardized parameter names in parsnip can be mapped to their original -names in each engine that has main parameters. Each engine typically has a -different default value (shown in parentheses) for each parameter. - - -|**parsnip** |**glmnet** |**LiblineaR** |**spark** |**keras** |**brulee** | -|:-----------|:----------|:-------------|:---------------------|:-----------|:----------| -|penalty |lambda |cost |reg_param (0) |penalty (0) |penalty | -|mixture |alpha (1) |type (0) |elastic_net_param (0) |NA |mixture | -|epochs |NA |NA |NA |NA |epochs | -|learn_rate |NA |NA |NA |NA |learn_rate | -|momentum |NA |NA |NA |NA |momentum | -|stop_iter |NA |NA |NA |NA |stop_iter | diff --git a/man/rmd/logistic_reg_brulee.Rmd b/man/rmd/logistic_reg_brulee.Rmd index 75cf83785..e6a8d2e3f 100644 --- a/man/rmd/logistic_reg_brulee.Rmd +++ b/man/rmd/logistic_reg_brulee.Rmd @@ -50,6 +50,11 @@ logistic_reg(penalty = double(1)) %>% ```{r child = "template-same-scale.Rmd"} ``` +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## References - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/logistic_reg_brulee.md b/man/rmd/logistic_reg_brulee.md index ca79d0192..9573a98fa 100644 --- a/man/rmd/logistic_reg_brulee.md +++ b/man/rmd/logistic_reg_brulee.md @@ -56,6 +56,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a variance of one. +## Case weights + + +The underlying model implementation does not allow for case weights. + ## References - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/logistic_reg_keras.Rmd b/man/rmd/logistic_reg_keras.Rmd index f984508e2..47c90a887 100644 --- a/man/rmd/logistic_reg_keras.Rmd +++ b/man/rmd/logistic_reg_keras.Rmd @@ -42,6 +42,11 @@ logistic_reg(penalty = double(1)) %>% ```{r child = "template-same-scale.Rmd"} ``` +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#logistic-reg-keras) for `logistic_reg()` with the `"keras"` engine. diff --git a/man/rmd/logistic_reg_keras.md b/man/rmd/logistic_reg_keras.md index a51c0fba6..c98b1791a 100644 --- a/man/rmd/logistic_reg_keras.md +++ b/man/rmd/logistic_reg_keras.md @@ -46,6 +46,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a variance of one. +## Case weights + + +The underlying model implementation does not allow for case weights. + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#logistic-reg-keras) for `logistic_reg()` with the `"keras"` engine. diff --git a/man/rmd/mlp_brulee.Rmd b/man/rmd/mlp_brulee.Rmd index 8f0dddc47..7732c39af 100644 --- a/man/rmd/mlp_brulee.Rmd +++ b/man/rmd/mlp_brulee.Rmd @@ -77,6 +77,11 @@ mlp( ```{r child = "template-same-scale.Rmd"} ``` +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## References - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/mlp_brulee.md b/man/rmd/mlp_brulee.md index 2ec86f5d0..0290d0c08 100644 --- a/man/rmd/mlp_brulee.md +++ b/man/rmd/mlp_brulee.md @@ -119,6 +119,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a variance of one. +## Case weights + + +The underlying model implementation does not allow for case weights. + ## References - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/mlp_keras.Rmd b/man/rmd/mlp_keras.Rmd index 280d99994..a995f7766 100644 --- a/man/rmd/mlp_keras.Rmd +++ b/man/rmd/mlp_keras.Rmd @@ -61,6 +61,11 @@ mlp( ```{r child = "template-same-scale.Rmd"} ``` +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#mlp-keras) for `mlp()` with the `"keras"` engine. diff --git a/man/rmd/mlp_keras.md b/man/rmd/mlp_keras.md index 628bf56f4..5adff8c9e 100644 --- a/man/rmd/mlp_keras.md +++ b/man/rmd/mlp_keras.md @@ -97,6 +97,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a variance of one. +## Case weights + + +The underlying model implementation does not allow for case weights. + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#mlp-keras) for `mlp()` with the `"keras"` engine. diff --git a/man/rmd/naive_Bayes_klaR.Rmd b/man/rmd/naive_Bayes_klaR.Rmd index e1669e977..e77f2eff6 100644 --- a/man/rmd/naive_Bayes_klaR.Rmd +++ b/man/rmd/naive_Bayes_klaR.Rmd @@ -44,6 +44,11 @@ The columns for qualitative predictors should always be represented as factors ( ```{r child = "template-zv.Rmd"} ``` +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## References - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/naive_Bayes_klaR.md b/man/rmd/naive_Bayes_klaR.md index c71f3e031..0fd324c2a 100644 --- a/man/rmd/naive_Bayes_klaR.md +++ b/man/rmd/naive_Bayes_klaR.md @@ -52,6 +52,11 @@ Variance calculations are used in these computations so _zero-variance_ predicto +## Case weights + + +The underlying model implementation does not allow for case weights. + ## References - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/nearest_neighbor_kknn.Rmd b/man/rmd/nearest_neighbor_kknn.Rmd index 2795d52e2..c0bb8a6ec 100644 --- a/man/rmd/nearest_neighbor_kknn.Rmd +++ b/man/rmd/nearest_neighbor_kknn.Rmd @@ -62,6 +62,11 @@ nearest_neighbor( The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#nearest-neighbor-kknn) for `nearest_neighbor()` with the `"kknn"` engine. +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## References - Hechenbichler K. and Schliep K.P. (2004) [Weighted k-Nearest-Neighbor Techniques and Ordinal Classification](https://epub.ub.uni-muenchen.de/1769/), Discussion Paper 399, SFB 386, Ludwig-Maximilians University Munich diff --git a/man/rmd/nearest_neighbor_kknn.md b/man/rmd/nearest_neighbor_kknn.md index f39c597bd..7af970293 100644 --- a/man/rmd/nearest_neighbor_kknn.md +++ b/man/rmd/nearest_neighbor_kknn.md @@ -88,6 +88,11 @@ scale each so that each predictor has mean zero and a variance of one. The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#nearest-neighbor-kknn) for `nearest_neighbor()` with the `"kknn"` engine. +## Case weights + + +The underlying model implementation does not allow for case weights. + ## References - Hechenbichler K. and Schliep K.P. (2004) [Weighted k-Nearest-Neighbor Techniques and Ordinal Classification](https://epub.ub.uni-muenchen.de/1769/), Discussion Paper 399, SFB 386, Ludwig-Maximilians University Munich diff --git a/man/rmd/proportional_hazards_survival.md b/man/rmd/proportional_hazards_survival.md index eec87ddda..2c5e52fe4 100644 --- a/man/rmd/proportional_hazards_survival.md +++ b/man/rmd/proportional_hazards_survival.md @@ -28,7 +28,7 @@ proportional_hazards() %>% ## ## Model fit template: ## survival::coxph(formula = missing_arg(), data = missing_arg(), -## weights = missing_arg(), x = TRUE, model = TRUE) +## x = TRUE, model = TRUE) ``` ## Other details diff --git a/man/rmd/rand_forest_party.md b/man/rmd/rand_forest_party.md index f883edfb4..719d12f26 100644 --- a/man/rmd/rand_forest_party.md +++ b/man/rmd/rand_forest_party.md @@ -36,7 +36,7 @@ rand_forest() %>% ## ## Model fit template: ## censored::cond_inference_surv_cforest(formula = missing_arg(), -## data = missing_arg(), weights = missing_arg()) +## data = missing_arg()) ``` `censored::cond_inference_surv_cforest()` is a wrapper around [party::cforest()] (and other functions) that makes it easier to run this model. diff --git a/man/rmd/rule_fit_xrf.Rmd b/man/rmd/rule_fit_xrf.Rmd index 3a7ce74bd..332fa83ae 100644 --- a/man/rmd/rule_fit_xrf.Rmd +++ b/man/rmd/rule_fit_xrf.Rmd @@ -88,6 +88,11 @@ These differences will create a disparity in the values of the `penalty` argumen ```{r child = "template-makes-dummies.Rmd"} ``` +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## References - Friedman and Popescu. "Predictive learning via rule ensembles." Ann. Appl. Stat. 2 (3) 916- 954, September 2008 diff --git a/man/rmd/rule_fit_xrf.md b/man/rmd/rule_fit_xrf.md index 005106f30..9d9e5b8c5 100644 --- a/man/rmd/rule_fit_xrf.md +++ b/man/rmd/rule_fit_xrf.md @@ -137,6 +137,11 @@ These differences will create a disparity in the values of the `penalty` argumen Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators. +## Case weights + + +The underlying model implementation does not allow for case weights. + ## References - Friedman and Popescu. "Predictive learning via rule ensembles." Ann. Appl. Stat. 2 (3) 916- 954, September 2008 diff --git a/man/rmd/svm_linear_LiblineaR.Rmd b/man/rmd/svm_linear_LiblineaR.Rmd index 86b74c70e..fb7419c81 100644 --- a/man/rmd/svm_linear_LiblineaR.Rmd +++ b/man/rmd/svm_linear_LiblineaR.Rmd @@ -59,6 +59,11 @@ Note that the `LiblineaR` engine does not produce class probabilities. When opti ```{r child = "template-same-scale.Rmd"} ``` +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#svm-linear-LiblineaR) for `svm_linear()` with the `"LiblineaR"` engine. diff --git a/man/rmd/svm_linear_LiblineaR.md b/man/rmd/svm_linear_LiblineaR.md index fa603db80..1afe530f8 100644 --- a/man/rmd/svm_linear_LiblineaR.md +++ b/man/rmd/svm_linear_LiblineaR.md @@ -80,6 +80,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a variance of one. +## Case weights + + +The underlying model implementation does not allow for case weights. + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#svm-linear-LiblineaR) for `svm_linear()` with the `"LiblineaR"` engine. diff --git a/man/rmd/svm_linear_kernlab.Rmd b/man/rmd/svm_linear_kernlab.Rmd index 560b4b129..f3afcc31b 100644 --- a/man/rmd/svm_linear_kernlab.Rmd +++ b/man/rmd/svm_linear_kernlab.Rmd @@ -57,6 +57,11 @@ Note that the `"kernlab"` engine does not naturally estimate class probabilities ```{r child = "template-same-scale.Rmd"} ``` +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#svm-linear-kernlab) for `svm_linear()` with the `"kernlab"` engine. diff --git a/man/rmd/svm_linear_kernlab.md b/man/rmd/svm_linear_kernlab.md index 399e80d93..f8aadb40b 100644 --- a/man/rmd/svm_linear_kernlab.md +++ b/man/rmd/svm_linear_kernlab.md @@ -78,6 +78,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a variance of one. +## Case weights + + +The underlying model implementation does not allow for case weights. + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#svm-linear-kernlab) for `svm_linear()` with the `"kernlab"` engine. diff --git a/man/rmd/svm_poly_kernlab.Rmd b/man/rmd/svm_poly_kernlab.Rmd index 78efd4743..d5555dca2 100644 --- a/man/rmd/svm_poly_kernlab.Rmd +++ b/man/rmd/svm_poly_kernlab.Rmd @@ -61,6 +61,11 @@ Note that the `"kernlab"` engine does not naturally estimate class probabilities ```{r child = "template-same-scale.Rmd"} ``` +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#svm-poly-kernlab) for `svm_poly()` with the `"kernlab"` engine. diff --git a/man/rmd/svm_poly_kernlab.md b/man/rmd/svm_poly_kernlab.md index 1bdf9d6b2..584d602ec 100644 --- a/man/rmd/svm_poly_kernlab.md +++ b/man/rmd/svm_poly_kernlab.md @@ -92,6 +92,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a variance of one. +## Case weights + + +The underlying model implementation does not allow for case weights. + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#svm-poly-kernlab) for `svm_poly()` with the `"kernlab"` engine. diff --git a/man/rmd/svm_rbf_kernlab.Rmd b/man/rmd/svm_rbf_kernlab.Rmd index 338e9c542..88e9bc46f 100644 --- a/man/rmd/svm_rbf_kernlab.Rmd +++ b/man/rmd/svm_rbf_kernlab.Rmd @@ -61,6 +61,11 @@ Note that the `"kernlab"` engine does not naturally estimate class probabilities ```{r child = "template-same-scale.Rmd"} ``` +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#svm-rbf-kernlab) for `svm_rbf()` with the `"kernlab"` engine. diff --git a/man/rmd/svm_rbf_kernlab.md b/man/rmd/svm_rbf_kernlab.md index 8eaa338b1..3be887803 100644 --- a/man/rmd/svm_rbf_kernlab.md +++ b/man/rmd/svm_rbf_kernlab.md @@ -86,6 +86,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a variance of one. +## Case weights + + +The underlying model implementation does not allow for case weights. + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#svm-rbf-kernlab) for `svm_rbf()` with the `"kernlab"` engine. From 8a6f61cb6da52c8dd8702d04340051624833e585 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Thu, 31 Mar 2022 17:56:27 -0400 Subject: [PATCH 29/41] added more notes in engine docs --- man/rmd/bag_mars_earth.Rmd | 5 +++++ man/rmd/bag_mars_earth.md | 7 +++++++ man/rmd/bag_tree_rpart.Rmd | 5 +++++ man/rmd/bag_tree_rpart.md | 7 +++++++ man/rmd/discrim_flexible_earth.Rmd | 4 ++++ man/rmd/discrim_flexible_earth.md | 6 ++++++ man/rmd/discrim_linear_mda.Rmd | 5 +++++ man/rmd/discrim_linear_mda.md | 7 +++++++ 8 files changed, 46 insertions(+) diff --git a/man/rmd/bag_mars_earth.Rmd b/man/rmd/bag_mars_earth.Rmd index b7df34e07..143cb5b4a 100644 --- a/man/rmd/bag_mars_earth.Rmd +++ b/man/rmd/bag_mars_earth.Rmd @@ -57,6 +57,11 @@ bag_mars( ```{r child = "template-makes-dummies.Rmd"} ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## References - Breiman, L. 1996. "Bagging predictors". Machine Learning. 24 (2): 123-140 diff --git a/man/rmd/bag_mars_earth.md b/man/rmd/bag_mars_earth.md index 35a5db199..8bf913843 100644 --- a/man/rmd/bag_mars_earth.md +++ b/man/rmd/bag_mars_earth.md @@ -84,6 +84,13 @@ bag_mars( Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## References - Breiman, L. 1996. "Bagging predictors". Machine Learning. 24 (2): 123-140 diff --git a/man/rmd/bag_tree_rpart.Rmd b/man/rmd/bag_tree_rpart.Rmd index c419455ed..390f029cc 100644 --- a/man/rmd/bag_tree_rpart.Rmd +++ b/man/rmd/bag_tree_rpart.Rmd @@ -72,6 +72,11 @@ bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1 ```{r child = "template-tree-split-factors.Rmd"} ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## References - Breiman L. 1996. "Bagging predictors". Machine Learning. 24 (2): 123-140 diff --git a/man/rmd/bag_tree_rpart.md b/man/rmd/bag_tree_rpart.md index a47072d0b..e2da8fac5 100644 --- a/man/rmd/bag_tree_rpart.md +++ b/man/rmd/bag_tree_rpart.md @@ -116,6 +116,13 @@ bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1 This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## References - Breiman L. 1996. "Bagging predictors". Machine Learning. 24 (2): 123-140 diff --git a/man/rmd/discrim_flexible_earth.Rmd b/man/rmd/discrim_flexible_earth.Rmd index 76a631b65..7507c7e49 100644 --- a/man/rmd/discrim_flexible_earth.Rmd +++ b/man/rmd/discrim_flexible_earth.Rmd @@ -44,6 +44,10 @@ discrim_flexible( ```{r child = "template-makes-dummies.Rmd"} ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` ## References diff --git a/man/rmd/discrim_flexible_earth.md b/man/rmd/discrim_flexible_earth.md index 8fed9cda4..e81bf76e8 100644 --- a/man/rmd/discrim_flexible_earth.md +++ b/man/rmd/discrim_flexible_earth.md @@ -53,6 +53,12 @@ discrim_flexible( Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. ## References diff --git a/man/rmd/discrim_linear_mda.Rmd b/man/rmd/discrim_linear_mda.Rmd index 9430e2fa1..f5ce5442a 100644 --- a/man/rmd/discrim_linear_mda.Rmd +++ b/man/rmd/discrim_linear_mda.Rmd @@ -43,6 +43,11 @@ discrim_linear(penalty = numeric(0)) %>% ```{r child = "template-zv.Rmd"} ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## References - Hastie, Tibshirani & Buja (1994) Flexible Discriminant Analysis by Optimal diff --git a/man/rmd/discrim_linear_mda.md b/man/rmd/discrim_linear_mda.md index 045d9502b..0bac0c65c 100644 --- a/man/rmd/discrim_linear_mda.md +++ b/man/rmd/discrim_linear_mda.md @@ -48,6 +48,13 @@ Variance calculations are used in these computations so _zero-variance_ predicto +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## References - Hastie, Tibshirani & Buja (1994) Flexible Discriminant Analysis by Optimal From eb81af5e7701283f69c6891cf97125421e0ea70c Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Thu, 31 Mar 2022 18:03:42 -0400 Subject: [PATCH 30/41] added more notes in engine docs --- man/rmd/discrim_linear_MASS.Rmd | 5 +++ man/rmd/discrim_linear_MASS.md | 5 +++ man/rmd/discrim_linear_sda.Rmd | 5 +++ man/rmd/discrim_linear_sda.md | 5 +++ man/rmd/discrim_linear_sparsediscrim.Rmd | 5 +++ man/rmd/discrim_linear_sparsediscrim.md | 5 +++ man/rmd/discrim_quad_MASS.Rmd | 5 +++ man/rmd/discrim_quad_MASS.md | 5 +++ man/rmd/discrim_quad_sparsediscrim.Rmd | 5 +++ man/rmd/discrim_quad_sparsediscrim.md | 5 +++ man/rmd/discrim_regularized_klaR.Rmd | 5 +++ man/rmd/discrim_regularized_klaR.md | 5 +++ man/rmd/linear_reg_gls.Rmd | 3 ++ man/rmd/linear_reg_gls.md | 3 ++ man/rmd/logistic_reg_gee.Rmd | 5 +++ man/rmd/logistic_reg_gee.md | 5 +++ man/rmd/multinom_reg_brulee.Rmd | 5 +++ man/rmd/multinom_reg_brulee.md | 5 +++ man/rmd/multinom_reg_keras.Rmd | 5 +++ man/rmd/multinom_reg_keras.md | 5 +++ man/rmd/naive_Bayes_naivebayes.Rmd | 5 +++ man/rmd/naive_Bayes_naivebayes.md | 5 +++ man/rmd/nearest-neighbor.Rmd | 49 ------------------------ man/rmd/pls_mixOmics.Rmd | 7 +++- man/rmd/pls_mixOmics.md | 7 +++- man/rmd/poisson_reg_gee.Rmd | 7 +++- man/rmd/poisson_reg_gee.md | 7 +++- 27 files changed, 126 insertions(+), 57 deletions(-) delete mode 100644 man/rmd/nearest-neighbor.Rmd diff --git a/man/rmd/discrim_linear_MASS.Rmd b/man/rmd/discrim_linear_MASS.Rmd index c8f6ef6a8..db58ad2be 100644 --- a/man/rmd/discrim_linear_MASS.Rmd +++ b/man/rmd/discrim_linear_MASS.Rmd @@ -27,6 +27,11 @@ discrim_linear() %>% ```{r child = "template-zv.Rmd"} ``` +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## References - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/discrim_linear_MASS.md b/man/rmd/discrim_linear_MASS.md index 234a5b64f..deb6efb09 100644 --- a/man/rmd/discrim_linear_MASS.md +++ b/man/rmd/discrim_linear_MASS.md @@ -39,6 +39,11 @@ Variance calculations are used in these computations so _zero-variance_ predicto +## Case weights + + +The underlying model implementation does not allow for case weights. + ## References - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/discrim_linear_sda.Rmd b/man/rmd/discrim_linear_sda.Rmd index 653988dd2..d979220e0 100644 --- a/man/rmd/discrim_linear_sda.Rmd +++ b/man/rmd/discrim_linear_sda.Rmd @@ -37,6 +37,11 @@ discrim_linear() %>% ```{r child = "template-zv.Rmd"} ``` +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## References - Ahdesmaki, A., and K. Strimmer. 2010. Feature selection in omics prediction problems using cat scores and false non-discovery rate control. Ann. Appl. Stat. 4: 503-519. [Preprint](https://arxiv.org/abs/0903.2003). diff --git a/man/rmd/discrim_linear_sda.md b/man/rmd/discrim_linear_sda.md index 9c283f3e2..fdab652d6 100644 --- a/man/rmd/discrim_linear_sda.md +++ b/man/rmd/discrim_linear_sda.md @@ -49,6 +49,11 @@ Variance calculations are used in these computations so _zero-variance_ predicto +## Case weights + + +The underlying model implementation does not allow for case weights. + ## References - Ahdesmaki, A., and K. Strimmer. 2010. Feature selection in omics prediction problems using cat scores and false non-discovery rate control. Ann. Appl. Stat. 4: 503-519. [Preprint](https://arxiv.org/abs/0903.2003). diff --git a/man/rmd/discrim_linear_sparsediscrim.Rmd b/man/rmd/discrim_linear_sparsediscrim.Rmd index cdd4c12c7..06c0cd4e7 100644 --- a/man/rmd/discrim_linear_sparsediscrim.Rmd +++ b/man/rmd/discrim_linear_sparsediscrim.Rmd @@ -49,6 +49,11 @@ discrim_linear(regularization_method = character(0)) %>% ```{r child = "template-zv.Rmd"} ``` +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## References diff --git a/man/rmd/discrim_linear_sparsediscrim.md b/man/rmd/discrim_linear_sparsediscrim.md index 2ea48fb2d..69eaf780b 100644 --- a/man/rmd/discrim_linear_sparsediscrim.md +++ b/man/rmd/discrim_linear_sparsediscrim.md @@ -54,6 +54,11 @@ Variance calculations are used in these computations so _zero-variance_ predicto +## Case weights + + +The underlying model implementation does not allow for case weights. + ## References diff --git a/man/rmd/discrim_quad_MASS.Rmd b/man/rmd/discrim_quad_MASS.Rmd index d25fa1aee..0a1c42da8 100644 --- a/man/rmd/discrim_quad_MASS.Rmd +++ b/man/rmd/discrim_quad_MASS.Rmd @@ -27,6 +27,11 @@ discrim_quad() %>% ```{r child = "template-zv-conditional.Rmd"} ``` +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## References - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/discrim_quad_MASS.md b/man/rmd/discrim_quad_MASS.md index bbf121130..26fcfb940 100644 --- a/man/rmd/discrim_quad_MASS.md +++ b/man/rmd/discrim_quad_MASS.md @@ -39,6 +39,11 @@ Variance calculations are used in these computations within each outcome class. +## Case weights + + +The underlying model implementation does not allow for case weights. + ## References - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/discrim_quad_sparsediscrim.Rmd b/man/rmd/discrim_quad_sparsediscrim.Rmd index 17f1e4973..650affc04 100644 --- a/man/rmd/discrim_quad_sparsediscrim.Rmd +++ b/man/rmd/discrim_quad_sparsediscrim.Rmd @@ -48,6 +48,11 @@ discrim_quad(regularization_method = character(0)) %>% ```{r child = "template-zv-conditional.Rmd"} ``` +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## References - `qda_diag()`: Dudoit, Fridlyand and Speed (2002) Comparison of Discrimination Methods for the Classification of Tumors Using Gene Expression Data, _Journal of the American Statistical Association_, 97:457, 77-87. diff --git a/man/rmd/discrim_quad_sparsediscrim.md b/man/rmd/discrim_quad_sparsediscrim.md index a3cc0d175..055b4c825 100644 --- a/man/rmd/discrim_quad_sparsediscrim.md +++ b/man/rmd/discrim_quad_sparsediscrim.md @@ -53,6 +53,11 @@ Variance calculations are used in these computations within each outcome class. +## Case weights + + +The underlying model implementation does not allow for case weights. + ## References - `qda_diag()`: Dudoit, Fridlyand and Speed (2002) Comparison of Discrimination Methods for the Classification of Tumors Using Gene Expression Data, _Journal of the American Statistical Association_, 97:457, 77-87. diff --git a/man/rmd/discrim_regularized_klaR.Rmd b/man/rmd/discrim_regularized_klaR.Rmd index 582491bc8..75bfde845 100644 --- a/man/rmd/discrim_regularized_klaR.Rmd +++ b/man/rmd/discrim_regularized_klaR.Rmd @@ -51,6 +51,11 @@ discrim_regularized(frac_identity = numeric(0), frac_common_cov = numeric(0)) %> ```{r child = "template-zv-conditional.Rmd"} ``` +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## References - Friedman, J (1989). Regularized Discriminant Analysis. _Journal of the American Statistical Association_, 84, 165-175. diff --git a/man/rmd/discrim_regularized_klaR.md b/man/rmd/discrim_regularized_klaR.md index 96f8a93a1..e5fcc0d3e 100644 --- a/man/rmd/discrim_regularized_klaR.md +++ b/man/rmd/discrim_regularized_klaR.md @@ -59,6 +59,11 @@ Variance calculations are used in these computations within each outcome class. +## Case weights + + +The underlying model implementation does not allow for case weights. + ## References - Friedman, J (1989). Regularized Discriminant Analysis. _Journal of the American Statistical Association_, 84, 165-175. diff --git a/man/rmd/linear_reg_gls.Rmd b/man/rmd/linear_reg_gls.Rmd index 717108cc7..88e0aa5c2 100644 --- a/man/rmd/linear_reg_gls.Rmd +++ b/man/rmd/linear_reg_gls.Rmd @@ -119,7 +119,10 @@ lme_fit %>% tidy() %>% gls_fit %>% tidy() ``` +## Case weights +```{r child = "template-no-case-weights.Rmd"} +``` ## References diff --git a/man/rmd/linear_reg_gls.md b/man/rmd/linear_reg_gls.md index 5afb7e7c6..3290b6d02 100644 --- a/man/rmd/linear_reg_gls.md +++ b/man/rmd/linear_reg_gls.md @@ -197,8 +197,11 @@ gls_fit %>% tidy() ## 2 week -2.12 0.224 -9.47 2.26e-18 ``` +## Case weights +The underlying model implementation does not allow for case weights. + ## References - J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY diff --git a/man/rmd/logistic_reg_gee.Rmd b/man/rmd/logistic_reg_gee.Rmd index 219750979..ac3a60707 100644 --- a/man/rmd/logistic_reg_gee.Rmd +++ b/man/rmd/logistic_reg_gee.Rmd @@ -69,6 +69,11 @@ fit(gee_wflow, data = toenail) Also, because of issues with the `gee()` function, a supplementary call to `glm()` is needed to get the rank and QR decomposition objects so that `predict()` can be used. +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## References - Liang, K.Y. and Zeger, S.L. (1986) Longitudinal data analysis using generalized linear models. _Biometrika_, 73 13–22. diff --git a/man/rmd/logistic_reg_gee.md b/man/rmd/logistic_reg_gee.md index 84f7de293..9a63f2c58 100644 --- a/man/rmd/logistic_reg_gee.md +++ b/man/rmd/logistic_reg_gee.md @@ -80,6 +80,11 @@ The `gee::gee()` function always prints out warnings and output even when `silen Also, because of issues with the `gee()` function, a supplementary call to `glm()` is needed to get the rank and QR decomposition objects so that `predict()` can be used. +## Case weights + + +The underlying model implementation does not allow for case weights. + ## References - Liang, K.Y. and Zeger, S.L. (1986) Longitudinal data analysis using generalized linear models. _Biometrika_, 73 13–22. diff --git a/man/rmd/multinom_reg_brulee.Rmd b/man/rmd/multinom_reg_brulee.Rmd index 6e30242ba..86cc2afdd 100644 --- a/man/rmd/multinom_reg_brulee.Rmd +++ b/man/rmd/multinom_reg_brulee.Rmd @@ -50,6 +50,11 @@ multinom_reg(penalty = double(1)) %>% ```{r child = "template-same-scale.Rmd"} ``` +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## References - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/multinom_reg_brulee.md b/man/rmd/multinom_reg_brulee.md index 8cfbc5f42..20166fac6 100644 --- a/man/rmd/multinom_reg_brulee.md +++ b/man/rmd/multinom_reg_brulee.md @@ -56,6 +56,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a variance of one. +## Case weights + + +The underlying model implementation does not allow for case weights. + ## References - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/multinom_reg_keras.Rmd b/man/rmd/multinom_reg_keras.Rmd index f182c82f6..a5c15402b 100644 --- a/man/rmd/multinom_reg_keras.Rmd +++ b/man/rmd/multinom_reg_keras.Rmd @@ -42,6 +42,11 @@ multinom_reg(penalty = double(1)) %>% ```{r child = "template-same-scale.Rmd"} ``` +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#multinom-reg-keras) for `multinom_reg()` with the `"keras"` engine. diff --git a/man/rmd/multinom_reg_keras.md b/man/rmd/multinom_reg_keras.md index 6a88fde7c..acea8e2fc 100644 --- a/man/rmd/multinom_reg_keras.md +++ b/man/rmd/multinom_reg_keras.md @@ -46,6 +46,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a variance of one. +## Case weights + + +The underlying model implementation does not allow for case weights. + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#multinom-reg-keras) for `multinom_reg()` with the `"keras"` engine. diff --git a/man/rmd/naive_Bayes_naivebayes.Rmd b/man/rmd/naive_Bayes_naivebayes.Rmd index dc02f66e3..ec575a26f 100644 --- a/man/rmd/naive_Bayes_naivebayes.Rmd +++ b/man/rmd/naive_Bayes_naivebayes.Rmd @@ -44,6 +44,11 @@ For count data, integers can be estimated using a Poisson distribution if the ar ```{r child = "template-zv.Rmd"} ``` +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## References - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/naive_Bayes_naivebayes.md b/man/rmd/naive_Bayes_naivebayes.md index 41818ed2b..e015205d2 100644 --- a/man/rmd/naive_Bayes_naivebayes.md +++ b/man/rmd/naive_Bayes_naivebayes.md @@ -52,6 +52,11 @@ Variance calculations are used in these computations so _zero-variance_ predicto +## Case weights + + +The underlying model implementation does not allow for case weights. + ## References - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/nearest-neighbor.Rmd b/man/rmd/nearest-neighbor.Rmd deleted file mode 100644 index 47f8e110d..000000000 --- a/man/rmd/nearest-neighbor.Rmd +++ /dev/null @@ -1,49 +0,0 @@ -# Engine Details - -```{r, child = "aaa.Rmd", include = FALSE} -``` - -Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are below: - -## kknn - -```{r kknn-reg} -nearest_neighbor() %>% - set_engine("kknn") %>% - set_mode("regression") %>% - translate() -``` - -```{r kknn-cls} -nearest_neighbor() %>% - set_engine("kknn") %>% - set_mode("classification") %>% - translate() -``` - -For `kknn`, the underlying modeling function used is a restricted version of -`train.kknn()` and not `kknn()`. It is set up in this way so that `parsnip` can -utilize the underlying `predict.train.kknn` method to predict on new data. This -also means that a single value of that function's `kernel` argument (a.k.a -`weight_func` here) can be supplied - -For this engine, tuning over `neighbors` is very efficient since the same model -object can be used to make predictions over multiple values of `neighbors`. - -## Parameter translations - -The standardized parameter names in parsnip can be mapped to their original -names in each engine that has main parameters. Each engine typically has a -different default value (shown in parentheses) for each parameter. - -```{r echo = FALSE, results = "asis"} -get_defaults_nearest_neighbor <- function() { - tibble::tribble( - ~model, ~engine, ~parsnip, ~original, ~default, - "nearest_neighbor", "kknn", "weight_func", "kernel", get_arg("kknn", "train.kknn", "kernel"), - "nearest_neighbor", "kknn", "dist_power", "distance", get_arg("kknn", "train.kknn", "distance"), - ) -} -convert_args("nearest_neighbor") -``` - diff --git a/man/rmd/pls_mixOmics.Rmd b/man/rmd/pls_mixOmics.Rmd index 1bc593da8..c12a9b581 100644 --- a/man/rmd/pls_mixOmics.Rmd +++ b/man/rmd/pls_mixOmics.Rmd @@ -71,8 +71,6 @@ This package is available via the Bioconductor repository and is not accessible remotes::install_bioc("mixOmics") ``` - - ## Preprocessing requirements ```{r child = "template-makes-dummies.Rmd"} @@ -84,6 +82,11 @@ This package is available via the Bioconductor repository and is not accessible ```{r child = "template-same-scale.Rmd"} ``` +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` + ## References - Rohart F and Gautier B and Singh A and Le Cao K-A (2017). "mixOmics: An R package for 'omics feature selection and multiple data integration." PLoS computational biology, 13(11), e1005752. diff --git a/man/rmd/pls_mixOmics.md b/man/rmd/pls_mixOmics.md index 733b840ad..4228a894c 100644 --- a/man/rmd/pls_mixOmics.md +++ b/man/rmd/pls_mixOmics.md @@ -92,8 +92,6 @@ This package is available via the Bioconductor repository and is not accessible remotes::install_bioc("mixOmics") ``` - - ## Preprocessing requirements @@ -108,6 +106,11 @@ Variance calculations are used in these computations so _zero-variance_ predicto Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a variance of one. +## Case weights + + +The underlying model implementation does not allow for case weights. + ## References - Rohart F and Gautier B and Singh A and Le Cao K-A (2017). "mixOmics: An R package for 'omics feature selection and multiple data integration." PLoS computational biology, 13(11), e1005752. diff --git a/man/rmd/poisson_reg_gee.Rmd b/man/rmd/poisson_reg_gee.Rmd index 393c43e7b..db6254ed0 100644 --- a/man/rmd/poisson_reg_gee.Rmd +++ b/man/rmd/poisson_reg_gee.Rmd @@ -26,9 +26,12 @@ poisson_reg(engine = "gee") %>% There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. -## Other details +## Case weights + +```{r child = "template-no-case-weights.Rmd"} +``` -The model cannot accept case weights. +## Other details Both `gee:gee()` and `gee:geepack()` specify the id/cluster variable using an argument `id` that requires a vector. parsnip doesn't work that way so we enable this model to be fit using a artificial function `id_var()` to be used in the formula. So, in the original package, the call would look like: diff --git a/man/rmd/poisson_reg_gee.md b/man/rmd/poisson_reg_gee.md index 5a37ca8b3..fff3b0503 100644 --- a/man/rmd/poisson_reg_gee.md +++ b/man/rmd/poisson_reg_gee.md @@ -37,9 +37,12 @@ poisson_reg(engine = "gee") %>% There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. -## Other details +## Case weights + -The model cannot accept case weights. +The underlying model implementation does not allow for case weights. + +## Other details Both `gee:gee()` and `gee:geepack()` specify the id/cluster variable using an argument `id` that requires a vector. parsnip doesn't work that way so we enable this model to be fit using a artificial function `id_var()` to be used in the formula. So, in the original package, the call would look like: From 69c7c14214cbb5b3dd494284687d56be3f2e61fc Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Thu, 31 Mar 2022 18:05:29 -0400 Subject: [PATCH 31/41] gam weights --- R/gen_additive_mod_data.R | 4 ++-- man/rmd/gen_additive_mod_mgcv.Rmd | 5 +++++ man/rmd/gen_additive_mod_mgcv.md | 7 +++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/R/gen_additive_mod_data.R b/R/gen_additive_mod_data.R index e65c33e1e..80fd1c2d7 100644 --- a/R/gen_additive_mod_data.R +++ b/R/gen_additive_mod_data.R @@ -47,7 +47,7 @@ set_fit( mode = "regression", value = list( interface = "formula", - protect = c("formula", "data"), + protect = c("formula", "data", "weights"), func = c(pkg = "mgcv", fun = "gam"), defaults = list() ) @@ -127,7 +127,7 @@ set_fit( mode = "classification", value = list( interface = "formula", - protect = c("formula", "data"), + protect = c("formula", "data", "weights"), func = c(pkg = "mgcv", fun = "gam"), defaults = list( family = quote(stats::binomial(link = "logit")) diff --git a/man/rmd/gen_additive_mod_mgcv.Rmd b/man/rmd/gen_additive_mod_mgcv.Rmd index 3c30a6531..f4126991c 100644 --- a/man/rmd/gen_additive_mod_mgcv.Rmd +++ b/man/rmd/gen_additive_mod_mgcv.Rmd @@ -64,6 +64,11 @@ The smoothness of the terms will need to be manually specified (e.g., using `s(x ```{r child = "template-makes-dummies.Rmd"} ``` +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + ## References - Ross, W. 2021. [_Generalized Additive Models in R: A Free, Interactive Course using mgcv_](https://noamross.github.io/gams-in-r-course/) diff --git a/man/rmd/gen_additive_mod_mgcv.md b/man/rmd/gen_additive_mod_mgcv.md index b78e38603..f2c72b0d1 100644 --- a/man/rmd/gen_additive_mod_mgcv.md +++ b/man/rmd/gen_additive_mod_mgcv.md @@ -100,6 +100,13 @@ The smoothness of the terms will need to be manually specified (e.g., using `s(x Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + ## References - Ross, W. 2021. [_Generalized Additive Models in R: A Free, Interactive Course using mgcv_](https://noamross.github.io/gams-in-r-course/) From c75ed66c3b155222d42f3477e473f39ab336e728 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Wed, 13 Apr 2022 10:08:15 -0400 Subject: [PATCH 32/41] doc update --- man/details_bag_mars_earth.Rd | 10 ++++++++++ man/details_bag_tree_rpart.Rd | 10 ++++++++++ man/details_discrim_flexible_earth.Rd | 10 ++++++++++ man/details_discrim_linear_MASS.Rd | 5 +++++ man/details_discrim_linear_mda.Rd | 10 ++++++++++ man/details_discrim_linear_sda.Rd | 5 +++++ man/details_discrim_linear_sparsediscrim.Rd | 5 +++++ man/details_discrim_quad_MASS.Rd | 5 +++++ man/details_discrim_quad_sparsediscrim.Rd | 5 +++++ man/details_discrim_regularized_klaR.Rd | 5 +++++ man/details_gen_additive_mod_mgcv.Rd | 10 ++++++++++ man/details_linear_reg_gls.Rd | 5 +++++ man/details_logistic_reg_gee.Rd | 5 +++++ man/details_multinom_reg_brulee.Rd | 5 +++++ man/details_multinom_reg_keras.Rd | 5 +++++ man/details_naive_Bayes_naivebayes.Rd | 5 +++++ man/details_pls_mixOmics.Rd | 5 +++++ man/details_poisson_reg_gee.Rd | 7 +++++-- man/xgb_train.Rd | 1 - 19 files changed, 115 insertions(+), 3 deletions(-) diff --git a/man/details_bag_mars_earth.Rd b/man/details_bag_mars_earth.Rd index 3e3312bed..47b191c4f 100644 --- a/man/details_bag_mars_earth.Rd +++ b/man/details_bag_mars_earth.Rd @@ -83,6 +83,16 @@ formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{References}{ \itemize{ \item Breiman, L. 1996. “Bagging predictors”. Machine Learning. 24 (2): diff --git a/man/details_bag_tree_rpart.Rd b/man/details_bag_tree_rpart.Rd index 2503f64cc..e4b2db46a 100644 --- a/man/details_bag_tree_rpart.Rd +++ b/man/details_bag_tree_rpart.Rd @@ -108,6 +108,16 @@ Categorical predictors can be partitioned into groups of factor levels are not required for this model. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{References}{ \itemize{ \item Breiman L. 1996. “Bagging predictors”. Machine Learning. 24 (2): diff --git a/man/details_discrim_flexible_earth.Rd b/man/details_discrim_flexible_earth.Rd index 2bac1fc77..9641d0e40 100644 --- a/man/details_discrim_flexible_earth.Rd +++ b/man/details_discrim_flexible_earth.Rd @@ -59,6 +59,16 @@ formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{References}{ \itemize{ \item Hastie, Tibshirani & Buja (1994) Flexible Discriminant Analysis by diff --git a/man/details_discrim_linear_MASS.Rd b/man/details_discrim_linear_MASS.Rd index cc2ca4162..ffb1786f9 100644 --- a/man/details_discrim_linear_MASS.Rd +++ b/man/details_discrim_linear_MASS.Rd @@ -44,6 +44,11 @@ predictors (i.e., with a single unique value) should be eliminated before fitting the model. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{References}{ \itemize{ \item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. diff --git a/man/details_discrim_linear_mda.Rd b/man/details_discrim_linear_mda.Rd index 1ca65a955..3a880177f 100644 --- a/man/details_discrim_linear_mda.Rd +++ b/man/details_discrim_linear_mda.Rd @@ -50,6 +50,16 @@ predictors (i.e., with a single unique value) should be eliminated before fitting the model. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{References}{ \itemize{ \item Hastie, Tibshirani & Buja (1994) Flexible Discriminant Analysis by diff --git a/man/details_discrim_linear_sda.Rd b/man/details_discrim_linear_sda.Rd index 52857aa48..34e04d17e 100644 --- a/man/details_discrim_linear_sda.Rd +++ b/man/details_discrim_linear_sda.Rd @@ -60,6 +60,11 @@ predictors (i.e., with a single unique value) should be eliminated before fitting the model. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{References}{ \itemize{ \item Ahdesmaki, A., and K. Strimmer. 2010. Feature selection in omics diff --git a/man/details_discrim_linear_sparsediscrim.Rd b/man/details_discrim_linear_sparsediscrim.Rd index 5b2a07e30..3ba9a1f65 100644 --- a/man/details_discrim_linear_sparsediscrim.Rd +++ b/man/details_discrim_linear_sparsediscrim.Rd @@ -64,6 +64,11 @@ predictors (i.e., with a single unique value) should be eliminated before fitting the model. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{References}{ \itemize{ \item \code{lda_diag()}: Dudoit, Fridlyand and Speed (2002) Comparison of diff --git a/man/details_discrim_quad_MASS.Rd b/man/details_discrim_quad_MASS.Rd index 375646ff3..773912945 100644 --- a/man/details_discrim_quad_MASS.Rd +++ b/man/details_discrim_quad_MASS.Rd @@ -45,6 +45,11 @@ unique value) within each class should be eliminated before fitting the model. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{References}{ \itemize{ \item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. diff --git a/man/details_discrim_quad_sparsediscrim.Rd b/man/details_discrim_quad_sparsediscrim.Rd index bdac315ca..b6bbe132f 100644 --- a/man/details_discrim_quad_sparsediscrim.Rd +++ b/man/details_discrim_quad_sparsediscrim.Rd @@ -63,6 +63,11 @@ unique value) within each class should be eliminated before fitting the model. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{References}{ \itemize{ \item \code{qda_diag()}: Dudoit, Fridlyand and Speed (2002) Comparison of diff --git a/man/details_discrim_regularized_klaR.Rd b/man/details_discrim_regularized_klaR.Rd index 1679b4110..112212763 100644 --- a/man/details_discrim_regularized_klaR.Rd +++ b/man/details_discrim_regularized_klaR.Rd @@ -65,6 +65,11 @@ unique value) within each class should be eliminated before fitting the model. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{References}{ \itemize{ \item Friedman, J (1989). Regularized Discriminant Analysis. \emph{Journal of diff --git a/man/details_gen_additive_mod_mgcv.Rd b/man/details_gen_additive_mod_mgcv.Rd index e52a7e7e5..0d2231988 100644 --- a/man/details_gen_additive_mod_mgcv.Rd +++ b/man/details_gen_additive_mod_mgcv.Rd @@ -91,6 +91,16 @@ formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators. } +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + \subsection{References}{ \itemize{ \item Ross, W. 2021. \href{https://noamross.github.io/gams-in-r-course/}{\emph{Generalized Additive Models in R: A Free, Interactive Course using mgcv}} diff --git a/man/details_linear_reg_gls.Rd b/man/details_linear_reg_gls.Rd index 9685d4526..c340b5491 100644 --- a/man/details_linear_reg_gls.Rd +++ b/man/details_linear_reg_gls.Rd @@ -156,6 +156,11 @@ gls_fit \%>\% tidy() ## 1 (Intercept) -4.95 0.808 -6.13 3.50e- 9 ## 2 week -2.12 0.224 -9.47 2.26e-18 } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{References}{ \itemize{ \item J Pinheiro, and D Bates. 2000. \emph{Mixed-effects models in S and diff --git a/man/details_logistic_reg_gee.Rd b/man/details_logistic_reg_gee.Rd index 8de14d748..d26d1b04c 100644 --- a/man/details_logistic_reg_gee.Rd +++ b/man/details_logistic_reg_gee.Rd @@ -90,6 +90,11 @@ to \code{glm()} is needed to get the rank and QR decomposition objects so that \code{predict()} can be used. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{References}{ \itemize{ \item Liang, K.Y. and Zeger, S.L. (1986) Longitudinal data analysis using diff --git a/man/details_multinom_reg_brulee.Rd b/man/details_multinom_reg_brulee.Rd index ed15244c0..b7545dcc8 100644 --- a/man/details_multinom_reg_brulee.Rd +++ b/man/details_multinom_reg_brulee.Rd @@ -66,6 +66,11 @@ center and scale each so that each predictor has mean zero and a variance of one. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{References}{ \itemize{ \item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. diff --git a/man/details_multinom_reg_keras.Rd b/man/details_multinom_reg_keras.Rd index 648209fad..3279b74a4 100644 --- a/man/details_multinom_reg_keras.Rd +++ b/man/details_multinom_reg_keras.Rd @@ -52,6 +52,11 @@ center and scale each so that each predictor has mean zero and a variance of one. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{Examples}{ The “Fitting and Predicting with parsnip” article contains diff --git a/man/details_naive_Bayes_naivebayes.Rd b/man/details_naive_Bayes_naivebayes.Rd index 118aed909..bb0941287 100644 --- a/man/details_naive_Bayes_naivebayes.Rd +++ b/man/details_naive_Bayes_naivebayes.Rd @@ -54,6 +54,11 @@ predictors (i.e., with a single unique value) should be eliminated before fitting the model. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{References}{ \itemize{ \item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. diff --git a/man/details_pls_mixOmics.Rd b/man/details_pls_mixOmics.Rd index 36d56e03b..0ed70e557 100644 --- a/man/details_pls_mixOmics.Rd +++ b/man/details_pls_mixOmics.Rd @@ -104,6 +104,11 @@ center and scale each so that each predictor has mean zero and a variance of one. } +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} + \subsection{References}{ \itemize{ \item Rohart F and Gautier B and Singh A and Le Cao K-A (2017). “mixOmics: diff --git a/man/details_poisson_reg_gee.Rd b/man/details_poisson_reg_gee.Rd index 39eceea63..5931e0b6f 100644 --- a/man/details_poisson_reg_gee.Rd +++ b/man/details_poisson_reg_gee.Rd @@ -45,9 +45,12 @@ keep the clustering/subject identifier column as factor or character next section. } -\subsection{Other details}{ +\subsection{Case weights}{ + +The underlying model implementation does not allow for case weights. +} -The model cannot accept case weights. +\subsection{Other details}{ Both \code{gee:gee()} and \code{gee:geepack()} specify the id/cluster variable using an argument \code{id} that requires a vector. parsnip doesn’t work that diff --git a/man/xgb_train.Rd b/man/xgb_train.Rd index 72efa77d0..204bd8f7b 100644 --- a/man/xgb_train.Rd +++ b/man/xgb_train.Rd @@ -22,7 +22,6 @@ xgb_train( objective = NULL, counts = TRUE, event_level = c("first", "second"), - weights = NULL, ... ) From bc81160240d10ae565c5c54bb6c982cfa24dc5f1 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Wed, 13 Apr 2022 12:06:38 -0400 Subject: [PATCH 33/41] revert nnet case weights --- R/mlp_data.R | 4 ++-- R/multinom_reg_data.R | 2 +- man/details_mlp_nnet.Rd | 17 +++++------------ man/details_multinom_reg_nnet.Rd | 9 ++------- man/rmd/mlp_nnet.Rmd | 2 +- man/rmd/mlp_nnet.md | 14 +++++--------- man/rmd/multinom_reg_nnet.Rmd | 3 ++- man/rmd/multinom_reg_nnet.md | 5 ++--- 8 files changed, 20 insertions(+), 36 deletions(-) diff --git a/R/mlp_data.R b/R/mlp_data.R index b912bfd0c..a0e5bf0a1 100644 --- a/R/mlp_data.R +++ b/R/mlp_data.R @@ -233,7 +233,7 @@ set_fit( mode = "regression", value = list( interface = "formula", - protect = c("formula", "data", "weights"), + protect = c("formula", "data"), func = c(pkg = "nnet", fun = "nnet"), defaults = list(trace = FALSE) ) @@ -257,7 +257,7 @@ set_fit( mode = "classification", value = list( interface = "formula", - protect = c("formula", "data", "weights"), + protect = c("formula", "data"), func = c(pkg = "nnet", fun = "nnet"), defaults = list(trace = FALSE) ) diff --git a/R/multinom_reg_data.R b/R/multinom_reg_data.R index 96f75df5f..114f23e02 100644 --- a/R/multinom_reg_data.R +++ b/R/multinom_reg_data.R @@ -282,7 +282,7 @@ set_fit( mode = "classification", value = list( interface = "formula", - protect = c("formula", "data", "weights"), + protect = c("formula", "data"), func = c(pkg = "nnet", fun = "multinom"), defaults = list(trace = FALSE) ) diff --git a/man/details_mlp_nnet.Rd b/man/details_mlp_nnet.Rd index a52a3b2a0..d6120222d 100644 --- a/man/details_mlp_nnet.Rd +++ b/man/details_mlp_nnet.Rd @@ -41,9 +41,8 @@ some models, you may need to pass this value in via ## Computational engine: nnet ## ## Model fit template: -## nnet::nnet(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), -## size = integer(1), decay = double(1), maxit = integer(1), -## trace = FALSE, linout = TRUE) +## nnet::nnet(formula = missing_arg(), data = missing_arg(), size = integer(1), +## decay = double(1), maxit = integer(1), trace = FALSE, linout = TRUE) } Note that parsnip automatically sets linear activation in the last @@ -68,9 +67,8 @@ layer. ## Computational engine: nnet ## ## Model fit template: -## nnet::nnet(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), -## size = integer(1), decay = double(1), maxit = integer(1), -## trace = FALSE, linout = FALSE) +## nnet::nnet(formula = missing_arg(), data = missing_arg(), size = integer(1), +## decay = double(1), maxit = integer(1), trace = FALSE, linout = FALSE) } } @@ -88,12 +86,7 @@ variance of one. \subsection{Case weights}{ -This model can utilize case weights during model fitting. To use them, -see the documentation in \link{case_weights} and the examples -on \code{tidymodels.org}. - -The \code{fit()} and \code{fit_xy()} arguments have arguments called -\code{case_weights} that expect vectors of case weights. +The underlying model implementation does not allow for case weights. } \subsection{Examples}{ diff --git a/man/details_multinom_reg_nnet.Rd b/man/details_multinom_reg_nnet.Rd index a80d49348..3f175549b 100644 --- a/man/details_multinom_reg_nnet.Rd +++ b/man/details_multinom_reg_nnet.Rd @@ -32,7 +32,7 @@ For \code{penalty}, the amount of regularization includes only the L2 penalty ## ## Model fit template: ## nnet::multinom(formula = missing_arg(), data = missing_arg(), -## weights = missing_arg(), decay = double(1), trace = FALSE) +## decay = double(1), trace = FALSE) } } @@ -57,12 +57,7 @@ for \code{multinom_reg()} with the \code{"nnet"} engine. \subsection{Case weights}{ -This model can utilize case weights during model fitting. To use them, -see the documentation in \link{case_weights} and the examples -on \code{tidymodels.org}. - -The \code{fit()} and \code{fit_xy()} arguments have arguments called -\code{case_weights} that expect vectors of case weights. +The underlying model implementation does not allow for case weights. } \subsection{References}{ diff --git a/man/rmd/mlp_nnet.Rmd b/man/rmd/mlp_nnet.Rmd index 4ba1fdd03..1893fea42 100644 --- a/man/rmd/mlp_nnet.Rmd +++ b/man/rmd/mlp_nnet.Rmd @@ -64,7 +64,7 @@ mlp( ## Case weights -```{r child = "template-uses-case-weights.Rmd"} +```{r child = "template-no-case-weights.Rmd"} ``` ## Examples diff --git a/man/rmd/mlp_nnet.md b/man/rmd/mlp_nnet.md index fab376f83..a85ae1103 100644 --- a/man/rmd/mlp_nnet.md +++ b/man/rmd/mlp_nnet.md @@ -43,9 +43,8 @@ mlp( ## Computational engine: nnet ## ## Model fit template: -## nnet::nnet(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), -## size = integer(1), decay = double(1), maxit = integer(1), -## trace = FALSE, linout = TRUE) +## nnet::nnet(formula = missing_arg(), data = missing_arg(), size = integer(1), +## decay = double(1), maxit = integer(1), trace = FALSE, linout = TRUE) ``` Note that parsnip automatically sets linear activation in the last layer. @@ -75,9 +74,8 @@ mlp( ## Computational engine: nnet ## ## Model fit template: -## nnet::nnet(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), -## size = integer(1), decay = double(1), maxit = integer(1), -## trace = FALSE, linout = FALSE) +## nnet::nnet(formula = missing_arg(), data = missing_arg(), size = integer(1), +## decay = double(1), maxit = integer(1), trace = FALSE, linout = FALSE) ``` @@ -93,9 +91,7 @@ scale each so that each predictor has mean zero and a variance of one. ## Case weights -This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. - -The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. +The underlying model implementation does not allow for case weights. ## Examples diff --git a/man/rmd/multinom_reg_nnet.Rmd b/man/rmd/multinom_reg_nnet.Rmd index 60529d727..185c9ec77 100644 --- a/man/rmd/multinom_reg_nnet.Rmd +++ b/man/rmd/multinom_reg_nnet.Rmd @@ -46,9 +46,10 @@ The "Fitting and Predicting with parsnip" article contains [examples](https://pa ## Case weights -```{r child = "template-uses-case-weights.Rmd"} +```{r child = "template-no-case-weights.Rmd"} ``` + ## References - Luraschi, J, K Kuo, and E Ruiz. 2019. _Mastering nnet with R_. O'Reilly Media diff --git a/man/rmd/multinom_reg_nnet.md b/man/rmd/multinom_reg_nnet.md index 3735616eb..94fd309d4 100644 --- a/man/rmd/multinom_reg_nnet.md +++ b/man/rmd/multinom_reg_nnet.md @@ -32,7 +32,7 @@ multinom_reg(penalty = double(1)) %>% ## ## Model fit template: ## nnet::multinom(formula = missing_arg(), data = missing_arg(), -## weights = missing_arg(), decay = double(1), trace = FALSE) +## decay = double(1), trace = FALSE) ``` ## Preprocessing requirements @@ -51,9 +51,8 @@ The "Fitting and Predicting with parsnip" article contains [examples](https://pa ## Case weights -This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. +The underlying model implementation does not allow for case weights. -The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. ## References From 0698b6d25ab2f4190ae50c4aff6207a996873aee Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Thu, 14 Apr 2022 09:50:02 -0400 Subject: [PATCH 34/41] S3 method to convert hardhat format to numeric --- NAMESPACE | 4 ++++ R/case_weights.R | 46 +++++++++++++++++++++++++++++++++++++ man/convert_case_weights.Rd | 28 ++++++++++++++++++++++ parsnip.Rproj | 1 + 4 files changed, 79 insertions(+) create mode 100644 man/convert_case_weights.Rd diff --git a/NAMESPACE b/NAMESPACE index 932dca370..b662f6ab7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,9 @@ # Generated by roxygen2: do not edit by hand S3method(augment,model_fit) +S3method(convert_case_weights,default) +S3method(convert_case_weights,hardhat_frequency_weights) +S3method(convert_case_weights,hardhat_importance_weights) S3method(extract_fit_engine,model_fit) S3method(extract_parameter_dials,model_spec) S3method(extract_parameter_set_dials,model_spec) @@ -178,6 +181,7 @@ export(check_model_doesnt_exist) export(check_model_exists) export(contr_one_hot) export(control_parsnip) +export(convert_case_weights) export(convert_stan_interval) export(ctree_train) export(cubist_rules) diff --git a/R/case_weights.R b/R/case_weights.R index eaabcf434..92b759500 100644 --- a/R/case_weights.R +++ b/R/case_weights.R @@ -28,6 +28,7 @@ #' @seealso [frequency_weights()], [importance_weights()], [fit()], [fit_xy] NULL +# ------------------------------------------------------------------------------ weights_to_numeric <- function(x, spec) { if (is.null(x)) { @@ -46,6 +47,51 @@ weights_to_numeric <- function(x, spec) { x } +#' Convert case weights to final from +#' +#' tidymodels requires case weights to have special classes. To use them in +#' model fitting or performance evaluation, they need to be converted to +#' numeric. +#' @param x A vector with class `"hardhat_case_weights"`. +#' @param where The location where they will be used: `"parsnip"` or +#' `"yardstick"`. +#' @return A numeric vector or NULL. +#' @export +convert_case_weights <- function(x, where = "parsnip", ...) { + UseMethod("convert_case_weights") +} + +#' @export +convert_case_weights.default <- function(x, where = "parsnip", ...) { + where <- rlang::arg_match0(where, c("parsnip", "yardstick")) + if (!inherits(x, "hardhat_case_weights")) { + rlang::abort("'case_weights' should be vector of class 'hardhat_case_weights'") + } + invisible(NULL) +} + +#' @export +#' @rdname convert_case_weights +convert_case_weights.hardhat_importance_weights <- + function(x, where = "parsnip", ...) { + if (where == "parsnip") { + x <- as.double(x) + } else { + x <- NULL + } + x + } + +#' @export +#' @rdname convert_case_weights +convert_case_weights.hardhat_frequency_weights <- + function(x, where = "parsnip", ...) { + as.integer(x) + } + + +# ------------------------------------------------------------------------------ + case_weights_allowed <- function(spec) { mod_type <- class(spec)[1] mod_eng <- spec$engine diff --git a/man/convert_case_weights.Rd b/man/convert_case_weights.Rd new file mode 100644 index 000000000..015e0c5cf --- /dev/null +++ b/man/convert_case_weights.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/case_weights.R +\name{convert_case_weights} +\alias{convert_case_weights} +\alias{convert_case_weights.hardhat_importance_weights} +\alias{convert_case_weights.hardhat_frequency_weights} +\title{Convert case weights to final from} +\usage{ +convert_case_weights(x, where = "parsnip", ...) + +\method{convert_case_weights}{hardhat_importance_weights}(x, where = "parsnip", ...) + +\method{convert_case_weights}{hardhat_frequency_weights}(x, where = "parsnip", ...) +} +\arguments{ +\item{x}{A vector with class \code{"hardhat_case_weights"}.} + +\item{where}{The location where they will be used: \code{"parsnip"} or +\code{"yardstick"}.} +} +\value{ +A numeric vector or NULL. +} +\description{ +tidymodels requires case weights to have special classes. To use them in +model fitting or performance evaluation, they need to be converted to +numeric. +} diff --git a/parsnip.Rproj b/parsnip.Rproj index 30e02be1a..a23ce7189 100644 --- a/parsnip.Rproj +++ b/parsnip.Rproj @@ -17,5 +17,6 @@ StripTrailingWhitespace: Yes BuildType: Package PackageUseDevtools: Yes +PackageCleanBeforeInstall: Yes PackageInstallArgs: --no-multiarch --with-keep.source PackageRoxygenize: rd,collate,namespace From 7c70d26320ae9402c7e0ae56d292556f0c9744e1 Mon Sep 17 00:00:00 2001 From: Davis Vaughan Date: Thu, 21 Apr 2022 12:33:23 -0400 Subject: [PATCH 35/41] Ensure that `fit_xy()` patches the formula environment with weights (#705) * Prefix everywhere we use `new_quosure()` or `empty_env()` We don't import these, so we have to do this. Tests were only working by chance because we have `library(rlang)` in some of the test files! * Ensure that `fit_xy()` patches the formula environment with weights * missing roxygen tag * avoid deprecated tests Co-authored-by: Max Kuhn --- R/case_weights.R | 19 ++++++++++ R/convert_data.R | 6 +++ R/fit.R | 14 ++----- R/fit_helpers.R | 2 +- man/convert_case_weights.Rd | 2 + tests/testthat/test-case-weights.R | 61 ++++++++++++++++++++++++++++++ tests/testthat/test_mlp.R | 1 + 7 files changed, 94 insertions(+), 11 deletions(-) diff --git a/R/case_weights.R b/R/case_weights.R index 92b759500..a4c60bc9c 100644 --- a/R/case_weights.R +++ b/R/case_weights.R @@ -47,6 +47,24 @@ weights_to_numeric <- function(x, spec) { x } +patch_formula_environment_with_case_weights <- function(formula, + data, + case_weights) { + # `lm()` and `glm()` and others use the original model function call to + # construct a call for `model.frame()`. That will normally fail because the + # formula has its own environment attached (usually the global environment) + # and it will look there for a vector named 'weights'. To account + # for this, we create a child of the `formula`'s environment and + # stash the `weights` there with the expected name and then + # reassign this as the `formula`'s environment + environment(formula) <- rlang::new_environment( + data = list(data = data, weights = case_weights), + parent = environment(formula) + ) + + formula +} + #' Convert case weights to final from #' #' tidymodels requires case weights to have special classes. To use them in @@ -55,6 +73,7 @@ weights_to_numeric <- function(x, spec) { #' @param x A vector with class `"hardhat_case_weights"`. #' @param where The location where they will be used: `"parsnip"` or #' `"yardstick"`. +#' @param ... Additional options (not currently used). #' @return A numeric vector or NULL. #' @export convert_case_weights <- function(x, where = "parsnip", ...) { diff --git a/R/convert_data.R b/R/convert_data.R index 7af0c34f3..ef8fa0673 100644 --- a/R/convert_data.R +++ b/R/convert_data.R @@ -252,6 +252,12 @@ if (length(weights) != nrow(x)) { rlang::abort(glue::glue("`weights` should have {nrow(x)} elements")) } + + form <- patch_formula_environment_with_case_weights( + formula = form, + data = x, + case_weights = weights + ) } res <- list( diff --git a/R/fit.R b/R/fit.R index 93f9c4b7e..6cda2e2c0 100644 --- a/R/fit.R +++ b/R/fit.R @@ -146,16 +146,10 @@ fit.model_spec <- wts <- weights_to_numeric(case_weights, object) - # `lm()` and `glm()` and others use the original model function call to - # construct a call for `model.frame()`. That will normally fail because the - # formula has its own environment attached (usually the global environment) - # and it will look there for a vector named 'weights'. To account - # for this, we create a child of the `formula`'s environment and - # stash the `weights` there with the expected name and then - # reassign this as the `formula`'s environment - environment(formula) <- rlang::new_environment( - data = list(data = data, weights = wts), - parent = environment(formula) + formula <- patch_formula_environment_with_case_weights( + formula = formula, + data = data, + case_weights = wts ) eval_env$data <- data diff --git a/R/fit_helpers.R b/R/fit_helpers.R index eae54f9b4..d4fbdf6b8 100644 --- a/R/fit_helpers.R +++ b/R/fit_helpers.R @@ -177,7 +177,7 @@ xy_form <- function(object, env, control, ...) { .convert_xy_to_form_fit( x = env$x, y = env$y, - weights = NULL, + weights = env$weights, y_name = "..y", remove_intercept = remove_intercept ) diff --git a/man/convert_case_weights.Rd b/man/convert_case_weights.Rd index 015e0c5cf..619770eb1 100644 --- a/man/convert_case_weights.Rd +++ b/man/convert_case_weights.Rd @@ -17,6 +17,8 @@ convert_case_weights(x, where = "parsnip", ...) \item{where}{The location where they will be used: \code{"parsnip"} or \code{"yardstick"}.} + +\item{...}{Additional options (not currently used).} } \value{ A numeric vector or NULL. diff --git a/tests/testthat/test-case-weights.R b/tests/testthat/test-case-weights.R index 4f83d9976..f2bce3656 100644 --- a/tests/testthat/test-case-weights.R +++ b/tests/testthat/test-case-weights.R @@ -24,6 +24,25 @@ test_that('case weights with xy method', { print(C5_bst_wt_fit$fit$call), "weights = weights" ) + + expect_error({ + set.seed(1) + C5_bst_wt_fit <- + boost_tree(trees = 5) %>% + set_engine("C5.0") %>% + set_mode("classification") %>% + fit_xy( + x = two_class_dat[c("A", "B")], + y = two_class_dat$Class, + case_weights = wts + ) + }, + regexp = NA) + + expect_output( + print(C5_bst_wt_fit$fit$call), + "weights = weights" + ) }) @@ -51,6 +70,19 @@ test_that('case weights with xy method - non-standard argument names', { # print(rf_wt_fit$fit$call), # "case\\.weights = weights" # ) + + expect_error({ + set.seed(1) + rf_wt_fit <- + rand_forest(trees = 5) %>% + set_mode("classification") %>% + fit_xy( + x = two_class_dat[c("A", "B")], + y = two_class_dat$Class, + case_weights = wts + ) + }, + regexp = NA) }) test_that('case weights with formula method', { @@ -78,5 +110,34 @@ test_that('case weights with formula method', { expect_equal(coef(lm_wt_fit$fit), coef(lm_sub_fit$fit)) }) +test_that('case weights with formula method that goes through `fit_xy()`', { + + skip_if_not_installed("modeldata") + data("ames", package = "modeldata") + ames$Sale_Price <- log10(ames$Sale_Price) + + set.seed(1) + wts <- runif(nrow(ames)) + wts <- ifelse(wts < 1/5, 0L, 1L) + ames_subset <- ames[wts != 0, ] + wts <- frequency_weights(wts) + + expect_error( + lm_wt_fit <- + linear_reg() %>% + fit_xy( + x = ames[c("Longitude", "Latitude")], + y = ames$Sale_Price, + case_weights = wts + ), + regexp = NA) + lm_sub_fit <- + linear_reg() %>% + fit_xy( + x = ames_subset[c("Longitude", "Latitude")], + y = ames_subset$Sale_Price + ) + expect_equal(coef(lm_wt_fit$fit), coef(lm_sub_fit$fit)) +}) diff --git a/tests/testthat/test_mlp.R b/tests/testthat/test_mlp.R index 0ade6a0ab..2169ec55d 100644 --- a/tests/testthat/test_mlp.R +++ b/tests/testthat/test_mlp.R @@ -1,3 +1,4 @@ + test_that('updating', { expr1 <- mlp(mode = "regression") %>% set_engine("nnet", Hess = FALSE, abstol = tune()) expr2 <- mlp(mode = "regression") %>% set_engine("nnet", Hess = tune()) From 2f1833263711556320576ebec9ddbaa119f554cc Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Thu, 21 Apr 2022 12:35:21 -0400 Subject: [PATCH 36/41] updated for latest roxygen2 --- man/contr_one_hot.Rd | 48 ++++++++--- man/details_C5_rules_C5.0.Rd | 10 ++- man/details_bag_mars_earth.Rd | 20 +++-- man/details_bag_tree_C5.0.Rd | 10 ++- man/details_bag_tree_rpart.Rd | 30 +++++-- man/details_bart_dbarts.Rd | 20 +++-- man/details_boost_tree_C5.0.Rd | 10 ++- man/details_boost_tree_mboost.Rd | 10 ++- man/details_boost_tree_spark.Rd | 20 +++-- man/details_boost_tree_xgboost.Rd | 20 +++-- man/details_cubist_rules_Cubist.Rd | 10 ++- man/details_decision_tree_C5.0.Rd | 10 ++- man/details_decision_tree_party.Rd | 10 ++- man/details_decision_tree_rpart.Rd | 30 +++++-- man/details_decision_tree_spark.Rd | 20 +++-- man/details_discrim_flexible_earth.Rd | 10 ++- man/details_discrim_linear_MASS.Rd | 10 ++- man/details_discrim_linear_mda.Rd | 10 ++- man/details_discrim_linear_sda.Rd | 10 ++- man/details_discrim_linear_sparsediscrim.Rd | 10 ++- man/details_discrim_quad_MASS.Rd | 10 ++- man/details_discrim_quad_sparsediscrim.Rd | 10 ++- man/details_discrim_regularized_klaR.Rd | 10 ++- man/details_gen_additive_mod_mgcv.Rd | 30 +++++-- man/details_linear_reg_brulee.Rd | 10 ++- man/details_linear_reg_gee.Rd | 22 +++-- man/details_linear_reg_glm.Rd | 20 +++-- man/details_linear_reg_glmnet.Rd | 10 ++- man/details_linear_reg_gls.Rd | 82 +++++++++++++----- man/details_linear_reg_keras.Rd | 10 ++- man/details_linear_reg_lm.Rd | 10 ++- man/details_linear_reg_lme.Rd | 18 ++-- man/details_linear_reg_lmer.Rd | 18 ++-- man/details_linear_reg_spark.Rd | 10 ++- man/details_linear_reg_stan.Rd | 10 ++- man/details_linear_reg_stan_glmer.Rd | 18 ++-- man/details_logistic_reg_LiblineaR.Rd | 10 ++- man/details_logistic_reg_brulee.Rd | 10 ++- man/details_logistic_reg_gee.Rd | 22 +++-- man/details_logistic_reg_glm.Rd | 20 +++-- man/details_logistic_reg_glmer.Rd | 18 ++-- man/details_logistic_reg_glmnet.Rd | 10 ++- man/details_logistic_reg_keras.Rd | 10 ++- man/details_logistic_reg_spark.Rd | 10 ++- man/details_logistic_reg_stan.Rd | 10 ++- man/details_logistic_reg_stan_glmer.Rd | 18 ++-- man/details_mars_earth.Rd | 20 +++-- man/details_mlp_brulee.Rd | 20 +++-- man/details_mlp_keras.Rd | 20 +++-- man/details_mlp_nnet.Rd | 20 +++-- man/details_multinom_reg_brulee.Rd | 10 ++- man/details_multinom_reg_glmnet.Rd | 10 ++- man/details_multinom_reg_keras.Rd | 10 ++- man/details_multinom_reg_nnet.Rd | 10 ++- man/details_multinom_reg_spark.Rd | 10 ++- man/details_naive_Bayes_klaR.Rd | 10 ++- man/details_naive_Bayes_naivebayes.Rd | 10 ++- man/details_nearest_neighbor_kknn.Rd | 20 +++-- man/details_pls_mixOmics.Rd | 24 ++++-- man/details_poisson_reg_gee.Rd | 22 +++-- man/details_poisson_reg_glm.Rd | 10 ++- man/details_poisson_reg_glmer.Rd | 18 ++-- man/details_poisson_reg_glmnet.Rd | 10 ++- man/details_poisson_reg_hurdle.Rd | 30 +++++-- man/details_poisson_reg_stan.Rd | 10 ++- man/details_poisson_reg_stan_glmer.Rd | 18 ++-- man/details_poisson_reg_zeroinfl.Rd | 30 +++++-- man/details_proportional_hazards_glmnet.Rd | 20 +++-- man/details_proportional_hazards_survival.Rd | 20 +++-- man/details_rand_forest_party.Rd | 10 ++- man/details_rand_forest_randomForest.Rd | 20 +++-- man/details_rand_forest_ranger.Rd | 20 +++-- man/details_rand_forest_spark.Rd | 20 +++-- man/details_rule_fit_xrf.Rd | 20 +++-- man/details_surv_reg_flexsurv.Rd | 10 ++- man/details_surv_reg_survival.Rd | 20 +++-- man/details_survival_reg_flexsurv.Rd | 10 ++- man/details_survival_reg_survival.Rd | 20 +++-- man/details_svm_linear_LiblineaR.Rd | 20 +++-- man/details_svm_linear_kernlab.Rd | 20 +++-- man/details_svm_poly_kernlab.Rd | 20 +++-- man/details_svm_rbf_kernlab.Rd | 20 +++-- man/extract-parsnip.Rd | 8 +- man/glmnet-details.Rd | 88 ++++++++++++++------ man/null_model.Rd | 18 ++-- man/parsnip_update.Rd | 4 +- man/reexports.Rd | 24 ++++++ 87 files changed, 1078 insertions(+), 440 deletions(-) diff --git a/man/contr_one_hot.Rd b/man/contr_one_hot.Rd index 79341f889..f57e589fc 100644 --- a/man/contr_one_hot.Rd +++ b/man/contr_one_hot.Rd @@ -29,31 +29,49 @@ incomplete set of indicators are created; no indicator is made for the first level of the factor. For example, \code{species} and \code{island} both have three levels but -\code{model.matrix()} creates two indicator variables for each:\if{html}{\out{
}}\preformatted{library(dplyr) +\code{model.matrix()} creates two indicator variables for each: + +\if{html}{\out{
}}\preformatted{library(dplyr) library(modeldata) data(penguins) levels(penguins$species) -}\if{html}{\out{
}}\preformatted{## [1] "Adelie" "Chinstrap" "Gentoo" -}\if{html}{\out{
}}\preformatted{levels(penguins$island) -}\if{html}{\out{
}}\preformatted{## [1] "Biscoe" "Dream" "Torgersen" -}\if{html}{\out{
}}\preformatted{model.matrix(~ species + island, data = penguins) \%>\% +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## [1] "Adelie" "Chinstrap" "Gentoo" +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{levels(penguins$island) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## [1] "Biscoe" "Dream" "Torgersen" +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{model.matrix(~ species + island, data = penguins) \%>\% colnames() -}\if{html}{\out{
}}\preformatted{## [1] "(Intercept)" "speciesChinstrap" "speciesGentoo" "islandDream" +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## [1] "(Intercept)" "speciesChinstrap" "speciesGentoo" "islandDream" ## [5] "islandTorgersen" -} +}\if{html}{\out{
}} For a formula with no intercept, the first factor is expanded to indicators for \emph{all} factor levels but all other factors are expanded to -all but one (as above):\if{html}{\out{
}}\preformatted{model.matrix(~ 0 + species + island, data = penguins) \%>\% +all but one (as above): + +\if{html}{\out{
}}\preformatted{model.matrix(~ 0 + species + island, data = penguins) \%>\% colnames() -}\if{html}{\out{
}}\preformatted{## [1] "speciesAdelie" "speciesChinstrap" "speciesGentoo" "islandDream" +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## [1] "speciesAdelie" "speciesChinstrap" "speciesGentoo" "islandDream" ## [5] "islandTorgersen" -} +}\if{html}{\out{
}} For inference, this hybrid encoding can be problematic. -To generate all indicators, use this contrast:\if{html}{\out{
}}\preformatted{# Switch out the contrast method +To generate all indicators, use this contrast: + +\if{html}{\out{
}}\preformatted{# Switch out the contrast method old_contr <- options("contrasts")$contrasts new_contr <- old_contr new_contr["unordered"] <- "contr_one_hot" @@ -61,9 +79,13 @@ options(contrasts = new_contr) model.matrix(~ species + island, data = penguins) \%>\% colnames() -}\if{html}{\out{
}}\preformatted{## [1] "(Intercept)" "speciesAdelie" "speciesChinstrap" "speciesGentoo" +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## [1] "(Intercept)" "speciesAdelie" "speciesChinstrap" "speciesGentoo" ## [5] "islandBiscoe" "islandDream" "islandTorgersen" -}\if{html}{\out{
}}\preformatted{options(contrasts = old_contr) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{options(contrasts = old_contr) }\if{html}{\out{
}} Removing the intercept here does not affect the factor encodings. diff --git a/man/details_C5_rules_C5.0.Rd b/man/details_C5_rules_C5.0.Rd index d43aeaf1f..3182de8ca 100644 --- a/man/details_C5_rules_C5.0.Rd +++ b/man/details_C5_rules_C5.0.Rd @@ -25,7 +25,9 @@ less iterations of boosting are performed than the number requested. \subsection{Translation from parsnip to the underlying model call (classification)}{ -The \strong{rules} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(rules) +The \strong{rules} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(rules) C5_rules( trees = integer(1), @@ -34,7 +36,9 @@ C5_rules( set_engine("C5.0") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## C5.0 Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## C5.0 Model Specification (classification) ## ## Main Arguments: ## trees = integer(1) @@ -45,7 +49,7 @@ C5_rules( ## Model fit template: ## rules::c5_fit(x = missing_arg(), y = missing_arg(), weights = missing_arg(), ## trials = integer(1), minCases = integer(1)) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_bag_mars_earth.Rd b/man/details_bag_mars_earth.Rd index 47b191c4f..e56102f71 100644 --- a/man/details_bag_mars_earth.Rd +++ b/man/details_bag_mars_earth.Rd @@ -27,11 +27,15 @@ columns. For a data frame \code{x}, the default is \subsection{Translation from parsnip to the original package (regression)}{ -The \strong{baguette} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{bag_mars(num_terms = integer(1), prod_degree = integer(1), prune_method = character(1)) \%>\% +The \strong{baguette} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{bag_mars(num_terms = integer(1), prod_degree = integer(1), prune_method = character(1)) \%>\% set_engine("earth") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Bagged MARS Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Bagged MARS Model Specification (regression) ## ## Main Arguments: ## num_terms = integer(1) @@ -44,12 +48,14 @@ The \strong{baguette} extension package is required to fit this model.\if{html}{ ## baguette::bagger(formula = missing_arg(), data = missing_arg(), ## weights = missing_arg(), nprune = integer(1), degree = integer(1), ## pmethod = character(1), base_model = "MARS") -} +}\if{html}{\out{
}} } \subsection{Translation from parsnip to the original package (classification)}{ -The \strong{baguette} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(baguette) +The \strong{baguette} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(baguette) bag_mars( num_terms = integer(1), @@ -59,7 +65,9 @@ bag_mars( set_engine("earth") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Bagged MARS Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Bagged MARS Model Specification (classification) ## ## Main Arguments: ## num_terms = integer(1) @@ -72,7 +80,7 @@ bag_mars( ## baguette::bagger(formula = missing_arg(), data = missing_arg(), ## weights = missing_arg(), nprune = integer(1), degree = integer(1), ## pmethod = character(1), base_model = "MARS") -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_bag_tree_C5.0.Rd b/man/details_bag_tree_C5.0.Rd index 399822248..c6370328b 100644 --- a/man/details_bag_tree_C5.0.Rd +++ b/man/details_bag_tree_C5.0.Rd @@ -19,13 +19,17 @@ This model has 1 tuning parameters: \subsection{Translation from parsnip to the original package (classification)}{ -The \strong{baguette} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(baguette) +The \strong{baguette} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(baguette) bag_tree(min_n = integer()) \%>\% set_engine("C5.0") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Bagged Decision Tree Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Bagged Decision Tree Model Specification (classification) ## ## Main Arguments: ## cost_complexity = 0 @@ -36,7 +40,7 @@ bag_tree(min_n = integer()) \%>\% ## Model fit template: ## baguette::bagger(x = missing_arg(), y = missing_arg(), weights = missing_arg(), ## minCases = integer(), base_model = "C5.0") -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_bag_tree_rpart.Rd b/man/details_bag_tree_rpart.Rd index e4b2db46a..9618fb549 100644 --- a/man/details_bag_tree_rpart.Rd +++ b/man/details_bag_tree_rpart.Rd @@ -31,13 +31,17 @@ the second level of the factor. \subsection{Translation from parsnip to the original package (classification)}{ -The \strong{baguette} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(baguette) +The \strong{baguette} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(baguette) bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% set_engine("rpart") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Bagged Decision Tree Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Bagged Decision Tree Model Specification (classification) ## ## Main Arguments: ## cost_complexity = double(1) @@ -50,18 +54,22 @@ bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1 ## baguette::bagger(formula = missing_arg(), data = missing_arg(), ## weights = missing_arg(), cp = double(1), maxdepth = integer(1), ## minsplit = integer(1), base_model = "CART") -} +}\if{html}{\out{
}} } \subsection{Translation from parsnip to the original package (regression)}{ -The \strong{baguette} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(baguette) +The \strong{baguette} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(baguette) bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% set_engine("rpart") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Bagged Decision Tree Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Bagged Decision Tree Model Specification (regression) ## ## Main Arguments: ## cost_complexity = double(1) @@ -74,18 +82,22 @@ bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1 ## baguette::bagger(formula = missing_arg(), data = missing_arg(), ## weights = missing_arg(), cp = double(1), maxdepth = integer(1), ## minsplit = integer(1), base_model = "CART") -} +}\if{html}{\out{
}} } \subsection{Translation from parsnip to the original package (censored regression)}{ -The \strong{censored} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(censored) +The \strong{censored} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(censored) bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% set_engine("rpart") \%>\% set_mode("censored regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Bagged Decision Tree Model Specification (censored regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Bagged Decision Tree Model Specification (censored regression) ## ## Main Arguments: ## cost_complexity = double(1) @@ -97,7 +109,7 @@ bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1 ## Model fit template: ## ipred::bagging(formula = missing_arg(), data = missing_arg(), ## cp = double(1), maxdepth = integer(1), minsplit = integer(1)) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_bart_dbarts.Rd b/man/details_bart_dbarts.Rd index 68f7bf9ff..37daf89e5 100644 --- a/man/details_bart_dbarts.Rd +++ b/man/details_bart_dbarts.Rd @@ -47,7 +47,9 @@ times number of observations. } } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{bart( +\subsection{Translation from parsnip to the original package (classification)}{ + +\if{html}{\out{
}}\preformatted{bart( trees = integer(1), prior_terminal_node_coef = double(1), prior_terminal_node_expo = double(1), @@ -56,7 +58,9 @@ times number of observations. set_engine("dbarts") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## BART Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## BART Model Specification (classification) ## ## Main Arguments: ## trees = integer(1) @@ -70,10 +74,12 @@ times number of observations. ## dbarts::bart(x = missing_arg(), y = missing_arg(), ntree = integer(1), ## base = double(1), power = double(1), k = double(1), verbose = FALSE, ## keeptrees = TRUE, keepcall = FALSE) -} +}\if{html}{\out{
}} } -\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{bart( +\subsection{Translation from parsnip to the original package (regression)}{ + +\if{html}{\out{
}}\preformatted{bart( trees = integer(1), prior_terminal_node_coef = double(1), prior_terminal_node_expo = double(1), @@ -82,7 +88,9 @@ times number of observations. set_engine("dbarts") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## BART Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## BART Model Specification (regression) ## ## Main Arguments: ## trees = integer(1) @@ -96,7 +104,7 @@ times number of observations. ## dbarts::bart(x = missing_arg(), y = missing_arg(), ntree = integer(1), ## base = double(1), power = double(1), k = double(1), verbose = FALSE, ## keeptrees = TRUE, keepcall = FALSE) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_boost_tree_C5.0.Rd b/man/details_boost_tree_C5.0.Rd index 1db0b8d5b..5e8cd42b5 100644 --- a/man/details_boost_tree_C5.0.Rd +++ b/man/details_boost_tree_C5.0.Rd @@ -24,11 +24,15 @@ The implementation of C5.0 limits the number of trees to be between 1 and 100. } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{boost_tree(trees = integer(), min_n = integer(), sample_size = numeric()) \%>\% +\subsection{Translation from parsnip to the original package (classification)}{ + +\if{html}{\out{
}}\preformatted{boost_tree(trees = integer(), min_n = integer(), sample_size = numeric()) \%>\% set_engine("C5.0") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Boosted Tree Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Boosted Tree Model Specification (classification) ## ## Main Arguments: ## trees = integer() @@ -40,7 +44,7 @@ and 100. ## Model fit template: ## parsnip::C5.0_train(x = missing_arg(), y = missing_arg(), weights = missing_arg(), ## trials = integer(), minCases = integer(), sample = numeric()) -} +}\if{html}{\out{
}} \code{\link[=C5.0_train]{C5.0_train()}} is a wrapper around \code{\link[C50:C5.0]{C50::C5.0()}} that makes it easier to run this model. diff --git a/man/details_boost_tree_mboost.Rd b/man/details_boost_tree_mboost.Rd index a26b5ed01..c0855c5d0 100644 --- a/man/details_boost_tree_mboost.Rd +++ b/man/details_boost_tree_mboost.Rd @@ -28,20 +28,24 @@ is to use all predictors. \subsection{Translation from parsnip to the original package (censored regression)}{ -The \strong{censored} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(censored) +The \strong{censored} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(censored) boost_tree() \%>\% set_engine("mboost") \%>\% set_mode("censored regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Boosted Tree Model Specification (censored regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Boosted Tree Model Specification (censored regression) ## ## Computational engine: mboost ## ## Model fit template: ## censored::blackboost_train(formula = missing_arg(), data = missing_arg(), ## family = mboost::CoxPH()) -} +}\if{html}{\out{
}} \code{censored::blackboost_train()} is a wrapper around \code{\link[mboost:blackboost]{mboost::blackboost()}} (and other functions) diff --git a/man/details_boost_tree_spark.Rd b/man/details_boost_tree_spark.Rd index 13ae8c56c..7d105c259 100644 --- a/man/details_boost_tree_spark.Rd +++ b/man/details_boost_tree_spark.Rd @@ -32,14 +32,18 @@ number of predictors is used and for regression, one third of the predictors are sampled. } -\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{boost_tree( +\subsection{Translation from parsnip to the original package (regression)}{ + +\if{html}{\out{
}}\preformatted{boost_tree( mtry = integer(), trees = integer(), min_n = integer(), tree_depth = integer(), learn_rate = numeric(), loss_reduction = numeric(), sample_size = numeric() ) \%>\% set_engine("spark") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Boosted Tree Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Boosted Tree Model Specification (regression) ## ## Main Arguments: ## mtry = integer() @@ -58,17 +62,21 @@ predictors are sampled. ## max_iter = integer(), min_instances_per_node = min_rows(integer(0), ## x), max_depth = integer(), step_size = numeric(), min_info_gain = numeric(), ## subsampling_rate = numeric(), seed = sample.int(10^5, 1)) -} +}\if{html}{\out{
}} } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{boost_tree( +\subsection{Translation from parsnip to the original package (classification)}{ + +\if{html}{\out{
}}\preformatted{boost_tree( mtry = integer(), trees = integer(), min_n = integer(), tree_depth = integer(), learn_rate = numeric(), loss_reduction = numeric(), sample_size = numeric() ) \%>\% set_engine("spark") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Boosted Tree Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Boosted Tree Model Specification (classification) ## ## Main Arguments: ## mtry = integer() @@ -87,7 +95,7 @@ predictors are sampled. ## max_iter = integer(), min_instances_per_node = min_rows(integer(0), ## x), max_depth = integer(), step_size = numeric(), min_info_gain = numeric(), ## subsampling_rate = numeric(), seed = sample.int(10^5, 1)) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_boost_tree_xgboost.Rd b/man/details_boost_tree_xgboost.Rd index 9e589055b..f2c3b42ff 100644 --- a/man/details_boost_tree_xgboost.Rd +++ b/man/details_boost_tree_xgboost.Rd @@ -35,7 +35,9 @@ the number of columns to this type of value. The user should give the argument to \code{boost_tree()} as an integer (not a real number). } -\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{boost_tree( +\subsection{Translation from parsnip to the original package (regression)}{ + +\if{html}{\out{
}}\preformatted{boost_tree( mtry = integer(), trees = integer(), min_n = integer(), tree_depth = integer(), learn_rate = numeric(), loss_reduction = numeric(), sample_size = numeric(), stop_iter = integer() @@ -43,7 +45,9 @@ argument to \code{boost_tree()} as an integer (not a real number). set_engine("xgboost") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Boosted Tree Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Boosted Tree Model Specification (regression) ## ## Main Arguments: ## mtry = integer() @@ -63,10 +67,12 @@ argument to \code{boost_tree()} as an integer (not a real number). ## max_depth = integer(), eta = numeric(), gamma = numeric(), ## subsample = numeric(), early_stop = integer(), nthread = 1, ## verbose = 0) -} +}\if{html}{\out{
}} } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{boost_tree( +\subsection{Translation from parsnip to the original package (classification)}{ + +\if{html}{\out{
}}\preformatted{boost_tree( mtry = integer(), trees = integer(), min_n = integer(), tree_depth = integer(), learn_rate = numeric(), loss_reduction = numeric(), sample_size = numeric(), stop_iter = integer() @@ -74,7 +80,9 @@ argument to \code{boost_tree()} as an integer (not a real number). set_engine("xgboost") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Boosted Tree Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Boosted Tree Model Specification (classification) ## ## Main Arguments: ## mtry = integer() @@ -94,7 +102,7 @@ argument to \code{boost_tree()} as an integer (not a real number). ## max_depth = integer(), eta = numeric(), gamma = numeric(), ## subsample = numeric(), early_stop = integer(), nthread = 1, ## verbose = 0) -} +}\if{html}{\out{
}} \code{\link[=xgb_train]{xgb_train()}} is a wrapper around \code{\link[xgboost:xgb.train]{xgboost::xgb.train()}} (and other functions) diff --git a/man/details_cubist_rules_Cubist.Rd b/man/details_cubist_rules_Cubist.Rd index 29e8f9665..8e6c6f39f 100644 --- a/man/details_cubist_rules_Cubist.Rd +++ b/man/details_cubist_rules_Cubist.Rd @@ -22,7 +22,9 @@ This model has 3 tuning parameters: \subsection{Translation from parsnip to the underlying model call (regression)}{ -The \strong{rules} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(rules) +The \strong{rules} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(rules) cubist_rules( committees = integer(1), @@ -32,7 +34,9 @@ cubist_rules( set_engine("Cubist") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Cubist Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Cubist Model Specification (regression) ## ## Main Arguments: ## committees = integer(1) @@ -44,7 +48,7 @@ cubist_rules( ## Model fit template: ## rules::cubist_fit(x = missing_arg(), y = missing_arg(), weights = missing_arg(), ## committees = integer(1), neighbors = integer(1), max_rules = integer(1)) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_decision_tree_C5.0.Rd b/man/details_decision_tree_C5.0.Rd index 61a3b61e6..d52eeb99b 100644 --- a/man/details_decision_tree_C5.0.Rd +++ b/man/details_decision_tree_C5.0.Rd @@ -17,11 +17,15 @@ This model has 1 tuning parameters: } } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{decision_tree(min_n = integer()) \%>\% +\subsection{Translation from parsnip to the original package (classification)}{ + +\if{html}{\out{
}}\preformatted{decision_tree(min_n = integer()) \%>\% set_engine("C5.0") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (classification) ## ## Main Arguments: ## min_n = integer() @@ -31,7 +35,7 @@ This model has 1 tuning parameters: ## Model fit template: ## parsnip::C5.0_train(x = missing_arg(), y = missing_arg(), weights = missing_arg(), ## minCases = integer(), trials = 1) -} +}\if{html}{\out{
}} \code{\link[=C5.0_train]{C5.0_train()}} is a wrapper around \code{\link[C50:C5.0]{C50::C5.0()}} that makes it easier to run this model. diff --git a/man/details_decision_tree_party.Rd b/man/details_decision_tree_party.Rd index be7a55bf2..45cfbe069 100644 --- a/man/details_decision_tree_party.Rd +++ b/man/details_decision_tree_party.Rd @@ -29,13 +29,17 @@ evaluated for splitting. The default is to use all predictors. \subsection{Translation from parsnip to the original package (censored regression)}{ -The \strong{censored} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(censored) +The \strong{censored} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(censored) decision_tree(tree_depth = integer(1), min_n = integer(1)) \%>\% set_engine("party") \%>\% set_mode("censored regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (censored regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (censored regression) ## ## Main Arguments: ## tree_depth = integer(1) @@ -47,7 +51,7 @@ decision_tree(tree_depth = integer(1), min_n = integer(1)) \%>\% ## censored::cond_inference_surv_ctree(formula = missing_arg(), ## data = missing_arg(), maxdepth = integer(1), minsplit = min_rows(0L, ## data)) -} +}\if{html}{\out{
}} \code{censored::cond_inference_surv_ctree()} is a wrapper around \code{\link[party:ctree]{party::ctree()}} (and other functions) that makes it diff --git a/man/details_decision_tree_rpart.Rd b/man/details_decision_tree_rpart.Rd index f2c9d2c48..bd6f831ef 100644 --- a/man/details_decision_tree_rpart.Rd +++ b/man/details_decision_tree_rpart.Rd @@ -21,11 +21,15 @@ This model has 3 tuning parameters: } } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{decision_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% +\subsection{Translation from parsnip to the original package (classification)}{ + +\if{html}{\out{
}}\preformatted{decision_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% set_engine("rpart") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (classification) ## ## Main Arguments: ## cost_complexity = double(1) @@ -38,14 +42,18 @@ This model has 3 tuning parameters: ## rpart::rpart(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), ## cp = double(1), maxdepth = integer(1), minsplit = min_rows(0L, ## data)) -} +}\if{html}{\out{
}} } -\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{decision_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% +\subsection{Translation from parsnip to the original package (regression)}{ + +\if{html}{\out{
}}\preformatted{decision_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% set_engine("rpart") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (regression) ## ## Main Arguments: ## cost_complexity = double(1) @@ -58,12 +66,14 @@ This model has 3 tuning parameters: ## rpart::rpart(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), ## cp = double(1), maxdepth = integer(1), minsplit = min_rows(0L, ## data)) -} +}\if{html}{\out{
}} } \subsection{Translation from parsnip to the original package (censored regression)}{ -The \strong{censored} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(censored) +The \strong{censored} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(censored) decision_tree( tree_depth = integer(1), @@ -73,7 +83,9 @@ decision_tree( set_engine("rpart") \%>\% set_mode("censored regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (censored regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (censored regression) ## ## Main Arguments: ## cost_complexity = double(1) @@ -86,7 +98,7 @@ decision_tree( ## pec::pecRpart(formula = missing_arg(), data = missing_arg(), ## cp = double(1), maxdepth = integer(1), minsplit = min_rows(0L, ## data)) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_decision_tree_spark.Rd b/man/details_decision_tree_spark.Rd index c7cd715b3..c1ab4a8ee 100644 --- a/man/details_decision_tree_spark.Rd +++ b/man/details_decision_tree_spark.Rd @@ -18,11 +18,15 @@ This model has 2 tuning parameters: } } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{decision_tree(tree_depth = integer(1), min_n = integer(1)) \%>\% +\subsection{Translation from parsnip to the original package (classification)}{ + +\if{html}{\out{
}}\preformatted{decision_tree(tree_depth = integer(1), min_n = integer(1)) \%>\% set_engine("spark") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (classification) ## ## Main Arguments: ## tree_depth = integer(1) @@ -34,14 +38,18 @@ This model has 2 tuning parameters: ## sparklyr::ml_decision_tree_classifier(x = missing_arg(), formula = missing_arg(), ## max_depth = integer(1), min_instances_per_node = min_rows(0L, ## x), seed = sample.int(10^5, 1)) -} +}\if{html}{\out{
}} } -\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{decision_tree(tree_depth = integer(1), min_n = integer(1)) \%>\% +\subsection{Translation from parsnip to the original package (regression)}{ + +\if{html}{\out{
}}\preformatted{decision_tree(tree_depth = integer(1), min_n = integer(1)) \%>\% set_engine("spark") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (regression) ## ## Main Arguments: ## tree_depth = integer(1) @@ -53,7 +61,7 @@ This model has 2 tuning parameters: ## sparklyr::ml_decision_tree_regressor(x = missing_arg(), formula = missing_arg(), ## max_depth = integer(1), min_instances_per_node = min_rows(0L, ## x), seed = sample.int(10^5, 1)) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_discrim_flexible_earth.Rd b/man/details_discrim_flexible_earth.Rd index 9641d0e40..59fc2be41 100644 --- a/man/details_discrim_flexible_earth.Rd +++ b/man/details_discrim_flexible_earth.Rd @@ -28,7 +28,9 @@ intercept-only model. \subsection{Translation from parsnip to the original package}{ -The \strong{discrim} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(discrim) +The \strong{discrim} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(discrim) discrim_flexible( num_terms = integer(0), @@ -36,7 +38,9 @@ discrim_flexible( prune_method = character(0) ) \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Flexible Discriminant Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Flexible Discriminant Model Specification (classification) ## ## Main Arguments: ## num_terms = integer(0) @@ -48,7 +52,7 @@ discrim_flexible( ## Model fit template: ## mda::fda(formula = missing_arg(), data = missing_arg(), nprune = integer(0), ## degree = integer(0), pmethod = character(0), method = earth::earth) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_discrim_linear_MASS.Rd b/man/details_discrim_linear_MASS.Rd index ffb1786f9..7cefe9968 100644 --- a/man/details_discrim_linear_MASS.Rd +++ b/man/details_discrim_linear_MASS.Rd @@ -18,18 +18,22 @@ This engine has no tuning parameters. \subsection{Translation from parsnip to the original package}{ -The \strong{discrim} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(discrim) +The \strong{discrim} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(discrim) discrim_linear() \%>\% set_engine("MASS") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Linear Discriminant Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Discriminant Model Specification (classification) ## ## Computational engine: MASS ## ## Model fit template: ## MASS::lda(formula = missing_arg(), data = missing_arg()) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_discrim_linear_mda.Rd b/man/details_discrim_linear_mda.Rd index 3a880177f..0ae09e1ed 100644 --- a/man/details_discrim_linear_mda.Rd +++ b/man/details_discrim_linear_mda.Rd @@ -20,12 +20,16 @@ This model has 1 tuning parameter: \subsection{Translation from parsnip to the original package}{ -The \strong{discrim} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(discrim) +The \strong{discrim} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(discrim) discrim_linear(penalty = numeric(0)) \%>\% set_engine("mda") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Linear Discriminant Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Discriminant Model Specification (classification) ## ## Main Arguments: ## penalty = numeric(0) @@ -35,7 +39,7 @@ discrim_linear(penalty = numeric(0)) \%>\% ## Model fit template: ## mda::fda(formula = missing_arg(), data = missing_arg(), lambda = numeric(0), ## method = mda::gen.ridge, keep.fitted = FALSE) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_discrim_linear_sda.Rd b/man/details_discrim_linear_sda.Rd index 34e04d17e..9b0108ee7 100644 --- a/man/details_discrim_linear_sda.Rd +++ b/man/details_discrim_linear_sda.Rd @@ -34,18 +34,22 @@ This maps to \subsection{Translation from parsnip to the original package}{ -The \strong{discrim} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(discrim) +The \strong{discrim} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(discrim) discrim_linear() \%>\% set_engine("sda") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Linear Discriminant Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Discriminant Model Specification (classification) ## ## Computational engine: sda ## ## Model fit template: ## sda::sda(Xtrain = missing_arg(), L = missing_arg(), verbose = FALSE) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_discrim_linear_sparsediscrim.Rd b/man/details_discrim_linear_sparsediscrim.Rd index 3ba9a1f65..1d9edc451 100644 --- a/man/details_discrim_linear_sparsediscrim.Rd +++ b/man/details_discrim_linear_sparsediscrim.Rd @@ -34,12 +34,16 @@ execute, are: \subsection{Translation from parsnip to the original package}{ -The \strong{discrim} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(discrim) +The \strong{discrim} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(discrim) discrim_linear(regularization_method = character(0)) \%>\% set_engine("sparsediscrim") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Linear Discriminant Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Discriminant Model Specification (classification) ## ## Main Arguments: ## regularization_method = character(0) @@ -49,7 +53,7 @@ discrim_linear(regularization_method = character(0)) \%>\% ## Model fit template: ## discrim::fit_regularized_linear(x = missing_arg(), y = missing_arg(), ## method = character(0)) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_discrim_quad_MASS.Rd b/man/details_discrim_quad_MASS.Rd index 773912945..1341a35a2 100644 --- a/man/details_discrim_quad_MASS.Rd +++ b/man/details_discrim_quad_MASS.Rd @@ -18,18 +18,22 @@ This engine has no tuning parameters. \subsection{Translation from parsnip to the original package}{ -The \strong{discrim} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(discrim) +The \strong{discrim} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(discrim) discrim_quad() \%>\% set_engine("MASS") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Quadratic Discriminant Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Quadratic Discriminant Model Specification (classification) ## ## Computational engine: MASS ## ## Model fit template: ## MASS::qda(formula = missing_arg(), data = missing_arg()) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_discrim_quad_sparsediscrim.Rd b/man/details_discrim_quad_sparsediscrim.Rd index b6bbe132f..8bf70c2d6 100644 --- a/man/details_discrim_quad_sparsediscrim.Rd +++ b/man/details_discrim_quad_sparsediscrim.Rd @@ -32,12 +32,16 @@ execute, are: \subsection{Translation from parsnip to the original package}{ -The \strong{discrim} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(discrim) +The \strong{discrim} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(discrim) discrim_quad(regularization_method = character(0)) \%>\% set_engine("sparsediscrim") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Quadratic Discriminant Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Quadratic Discriminant Model Specification (classification) ## ## Main Arguments: ## regularization_method = character(0) @@ -47,7 +51,7 @@ discrim_quad(regularization_method = character(0)) \%>\% ## Model fit template: ## discrim::fit_regularized_quad(x = missing_arg(), y = missing_arg(), ## method = character(0)) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_discrim_regularized_klaR.Rd b/man/details_discrim_regularized_klaR.Rd index 112212763..5d9bc0152 100644 --- a/man/details_discrim_regularized_klaR.Rd +++ b/man/details_discrim_regularized_klaR.Rd @@ -33,12 +33,16 @@ discriminant analysis (QDA) model. \subsection{Translation from parsnip to the original package}{ -The \strong{discrim} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(discrim) +The \strong{discrim} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(discrim) discrim_regularized(frac_identity = numeric(0), frac_common_cov = numeric(0)) \%>\% set_engine("klaR") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Regularized Discriminant Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Regularized Discriminant Model Specification (classification) ## ## Main Arguments: ## frac_common_cov = numeric(0) @@ -49,7 +53,7 @@ discrim_regularized(frac_identity = numeric(0), frac_common_cov = numeric(0)) \% ## Model fit template: ## klaR::rda(formula = missing_arg(), data = missing_arg(), lambda = numeric(0), ## gamma = numeric(0)) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_gen_additive_mod_mgcv.Rd b/man/details_gen_additive_mod_mgcv.Rd index 0d2231988..b8f1c39c0 100644 --- a/man/details_gen_additive_mod_mgcv.Rd +++ b/man/details_gen_additive_mod_mgcv.Rd @@ -19,11 +19,15 @@ This model has 2 tuning parameters: } } -\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{gen_additive_mod(adjust_deg_free = numeric(1), select_features = logical(1)) \%>\% +\subsection{Translation from parsnip to the original package (regression)}{ + +\if{html}{\out{
}}\preformatted{gen_additive_mod(adjust_deg_free = numeric(1), select_features = logical(1)) \%>\% set_engine("mgcv") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## GAM Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## GAM Specification (regression) ## ## Main Arguments: ## select_features = logical(1) @@ -34,14 +38,18 @@ This model has 2 tuning parameters: ## Model fit template: ## mgcv::gam(formula = missing_arg(), data = missing_arg(), select = logical(1), ## gamma = numeric(1)) -} +}\if{html}{\out{
}} } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{gen_additive_mod(adjust_deg_free = numeric(1), select_features = logical(1)) \%>\% +\subsection{Translation from parsnip to the original package (classification)}{ + +\if{html}{\out{
}}\preformatted{gen_additive_mod(adjust_deg_free = numeric(1), select_features = logical(1)) \%>\% set_engine("mgcv") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## GAM Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## GAM Specification (classification) ## ## Main Arguments: ## select_features = logical(1) @@ -52,18 +60,22 @@ This model has 2 tuning parameters: ## Model fit template: ## mgcv::gam(formula = missing_arg(), data = missing_arg(), select = logical(1), ## gamma = numeric(1), family = stats::binomial(link = "logit")) -} +}\if{html}{\out{
}} } \subsection{Model fitting}{ This model should be used with a model formula so that smooth terms can -be specified. For example:\if{html}{\out{
}}\preformatted{library(mgcv) +be specified. For example: + +\if{html}{\out{
}}\preformatted{library(mgcv) gen_additive_mod() \%>\% set_engine("mgcv") \%>\% set_mode("regression") \%>\% fit(mpg ~ wt + gear + cyl + s(disp, k = 10), data = mtcars) -}\if{html}{\out{
}}\preformatted{## parsnip model object +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## parsnip model object ## ## ## Family: gaussian @@ -76,7 +88,7 @@ gen_additive_mod() \%>\% ## 7.52 total = 11.52 ## ## GCV score: 4.225228 -} +}\if{html}{\out{
}} The smoothness of the terms will need to be manually specified (e.g., using \code{s(x, df = 10)}) in the formula. Tuning can be accomplished using diff --git a/man/details_linear_reg_brulee.Rd b/man/details_linear_reg_brulee.Rd index adbc66ff7..313a2110d 100644 --- a/man/details_linear_reg_brulee.Rd +++ b/man/details_linear_reg_brulee.Rd @@ -39,10 +39,14 @@ no improvement before stopping. (default: 5L). } } -\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{linear_reg(penalty = double(1)) \%>\% +\subsection{Translation from parsnip to the original package (regression)}{ + +\if{html}{\out{
}}\preformatted{linear_reg(penalty = double(1)) \%>\% set_engine("brulee") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) ## ## Main Arguments: ## penalty = double(1) @@ -52,7 +56,7 @@ no improvement before stopping. (default: 5L). ## Model fit template: ## brulee::brulee_linear_reg(x = missing_arg(), y = missing_arg(), ## penalty = double(1)) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_linear_reg_gee.Rd b/man/details_linear_reg_gee.Rd index 9fcab55c2..69e493d8a 100644 --- a/man/details_linear_reg_gee.Rd +++ b/man/details_linear_reg_gee.Rd @@ -20,20 +20,24 @@ values. \subsection{Translation from parsnip to the original package}{ -The \strong{multilevelmod} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(multilevelmod) +The \strong{multilevelmod} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(multilevelmod) linear_reg() \%>\% set_engine("gee") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) ## ## Computational engine: gee ## ## Model fit template: ## multilevelmod::gee_fit(formula = missing_arg(), data = missing_arg(), ## family = gaussian) -} +}\if{html}{\out{
}} \code{multilevelmod::gee_fit()} is a wrapper model around \code{gee::gee()}. } @@ -54,10 +58,14 @@ Both \code{gee:gee()} and \code{gee:geepack()} specify the id/cluster variable using an argument \code{id} that requires a vector. parsnip doesn’t work that way so we enable this model to be fit using a artificial function \code{id_var()} to be used in the formula. So, in the original package, the -call would look like:\if{html}{\out{
}}\preformatted{gee(breaks ~ tension, id = wool, data = warpbreaks, corstr = "exchangeable") +call would look like: + +\if{html}{\out{
}}\preformatted{gee(breaks ~ tension, id = wool, data = warpbreaks, corstr = "exchangeable") }\if{html}{\out{
}} -With parsnip, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) +With parsnip, we suggest using the formula method when fitting: + +\if{html}{\out{
}}\preformatted{library(tidymodels) linear_reg() \%>\% set_engine("gee", corstr = "exchangeable") \%>\% @@ -66,7 +74,9 @@ linear_reg() \%>\% When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using -\code{add_variables()} then supply the GEE formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) +\code{add_variables()} then supply the GEE formula when adding the model: + +\if{html}{\out{
}}\preformatted{library(tidymodels) gee_spec <- linear_reg() \%>\% diff --git a/man/details_linear_reg_glm.Rd b/man/details_linear_reg_glm.Rd index 0621ebc98..6118cac23 100644 --- a/man/details_linear_reg_glm.Rd +++ b/man/details_linear_reg_glm.Rd @@ -16,23 +16,31 @@ This engine has no tuning parameters but you can set the \code{family} parameter (and/or \code{link}) as an engine argument (see below). } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{linear_reg() \%>\% +\subsection{Translation from parsnip to the original package}{ + +\if{html}{\out{
}}\preformatted{linear_reg() \%>\% set_engine("glm") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) ## ## Computational engine: glm ## ## Model fit template: ## stats::glm(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), ## family = stats::gaussian) -} +}\if{html}{\out{
}} To use a non-default \code{family} and/or \code{link}, pass in as an argument to -\code{set_engine()}:\if{html}{\out{
}}\preformatted{linear_reg() \%>\% +\code{set_engine()}: + +\if{html}{\out{
}}\preformatted{linear_reg() \%>\% set_engine("glm", family = stats::poisson(link = "sqrt")) \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) ## ## Engine-Specific Arguments: ## family = stats::poisson(link = "sqrt") @@ -42,7 +50,7 @@ To use a non-default \code{family} and/or \code{link}, pass in as an argument to ## Model fit template: ## stats::glm(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), ## family = stats::poisson(link = "sqrt")) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_linear_reg_glmnet.Rd b/man/details_linear_reg_glmnet.Rd index b5e031f3a..0ce52aeab 100644 --- a/man/details_linear_reg_glmnet.Rd +++ b/man/details_linear_reg_glmnet.Rd @@ -25,10 +25,14 @@ value. For more details about this, and the \code{glmnet} model in general, see \link{glmnet-details}. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{linear_reg(penalty = double(1), mixture = double(1)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +\if{html}{\out{
}}\preformatted{linear_reg(penalty = double(1), mixture = double(1)) \%>\% set_engine("glmnet") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) ## ## Main Arguments: ## penalty = 0 @@ -39,7 +43,7 @@ see \link{glmnet-details}. ## Model fit template: ## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(), ## alpha = double(1), family = "gaussian") -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_linear_reg_gls.Rd b/man/details_linear_reg_gls.Rd index c340b5491..e6135b979 100644 --- a/man/details_linear_reg_gls.Rd +++ b/man/details_linear_reg_gls.Rd @@ -16,19 +16,23 @@ This model has no tuning parameters. \subsection{Translation from parsnip to the original package}{ -The \strong{multilevelmod} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(multilevelmod) +The \strong{multilevelmod} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(multilevelmod) linear_reg() \%>\% set_engine("gls") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) ## ## Computational engine: gls ## ## Model fit template: ## nlme::gls(formula = missing_arg(), data = missing_arg()) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ @@ -45,7 +49,9 @@ The model can accept case weights. With parsnip, we suggest using the \emph{fixed effects} formula method when fitting, but the details of the correlation structure should be passed -to \code{set_engine()} since it is an irregular (but required) argument:\if{html}{\out{
}}\preformatted{library(tidymodels) +to \code{set_engine()} since it is an irregular (but required) argument: + +\if{html}{\out{
}}\preformatted{library(tidymodels) # load nlme to be able to use the `cor*()` functions library(nlme) @@ -54,7 +60,9 @@ data("riesby") linear_reg() \%>\% set_engine("gls", correlation = corCompSymm(form = ~ 1 | subject)) \%>\% fit(depr_score ~ week, data = riesby) -}\if{html}{\out{
}}\preformatted{## parsnip model object +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## parsnip model object ## ## Generalized least squares fit by REML ## Model: depr_score ~ week @@ -72,11 +80,13 @@ linear_reg() \%>\% ## 0.6820145 ## Degrees of freedom: 250 total; 248 residual ## Residual standard error: 6.868785 -} +}\if{html}{\out{
}} When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using -\code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) +\code{add_variables()} then supply the typical formula when adding the model: + +\if{html}{\out{
}}\preformatted{library(tidymodels) gls_spec <- linear_reg() \%>\% @@ -104,7 +114,9 @@ effect parameters, the residual degrees of freedom are: As a result, p-values will be different. For example, we can fit the same model using different estimation methods (assuming a positive -covariance value):\if{html}{\out{
}}\preformatted{gls_fit <- +covariance value): + +\if{html}{\out{
}}\preformatted{gls_fit <- linear_reg() \%>\% set_engine("gls", correlation = corCompSymm(form = ~ 1 | subject)) \%>\% fit(depr_score ~ week, data = riesby) @@ -115,47 +127,71 @@ lme_fit <- fit(depr_score ~ week, data = riesby) }\if{html}{\out{
}} -The estimated within-subject correlations are the same:\if{html}{\out{
}}\preformatted{library(ape) +The estimated within-subject correlations are the same: + +\if{html}{\out{
}}\preformatted{library(ape) # lme, use ape package: lme_within_sub <- varcomp(lme_fit$fit)/sum(varcomp(lme_fit$fit)) lme_within_sub["subject"] -}\if{html}{\out{
}}\preformatted{## subject +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## subject ## 0.6820145 -}\if{html}{\out{
}}\preformatted{# gls: +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{# gls: summary(gls_fit$fit$modelStruct) -}\if{html}{\out{
}}\preformatted{## Correlation Structure: Compound symmetry +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Correlation Structure: Compound symmetry ## Formula: ~1 | subject ## Parameter estimate(s): ## Rho ## 0.6820145 -} +}\if{html}{\out{
}} + +as are the fixed effects (and their standard errors): -as are the fixed effects (and their standard errors):\if{html}{\out{
}}\preformatted{nlme::fixef(lme_fit$fit) -}\if{html}{\out{
}}\preformatted{## (Intercept) week +\if{html}{\out{
}}\preformatted{nlme::fixef(lme_fit$fit) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## (Intercept) week ## -4.953439 -2.119678 -}\if{html}{\out{
}}\preformatted{coef(gls_fit$fit) -}\if{html}{\out{
}}\preformatted{## (Intercept) week +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{coef(gls_fit$fit) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## (Intercept) week ## -4.953439 -2.119678 -} +}\if{html}{\out{
}} -However, the p-values for the fixed effects are different:\if{html}{\out{
}}\preformatted{library(broom.mixed) +However, the p-values for the fixed effects are different: + +\if{html}{\out{
}}\preformatted{library(broom.mixed) # lme: lme_fit \%>\% tidy() \%>\% dplyr::filter(group == "fixed") \%>\% dplyr::select(-group, -effect) -}\if{html}{\out{
}}\preformatted{## # A tibble: 0 × 6 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## # A tibble: 0 × 6 ## # … with 6 variables: term , estimate , std.error , df , ## # statistic , p.value -}\if{html}{\out{
}}\preformatted{# gls: +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{# gls: gls_fit \%>\% tidy() -}\if{html}{\out{
}}\preformatted{## # A tibble: 2 × 5 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## # A tibble: 2 × 5 ## term estimate std.error statistic p.value ## ## 1 (Intercept) -4.95 0.808 -6.13 3.50e- 9 ## 2 week -2.12 0.224 -9.47 2.26e-18 -} +}\if{html}{\out{
}} \subsection{Case weights}{ The underlying model implementation does not allow for case weights. diff --git a/man/details_linear_reg_keras.Rd b/man/details_linear_reg_keras.Rd index 2e15d554f..148ca862e 100644 --- a/man/details_linear_reg_keras.Rd +++ b/man/details_linear_reg_keras.Rd @@ -19,10 +19,14 @@ For \code{penalty}, the amount of regularization is \emph{only} L2 penalty (i.e. ridge or weight decay). } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{linear_reg(penalty = double(1)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +\if{html}{\out{
}}\preformatted{linear_reg(penalty = double(1)) \%>\% set_engine("keras") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) ## ## Main Arguments: ## penalty = double(1) @@ -32,7 +36,7 @@ ridge or weight decay). ## Model fit template: ## parsnip::keras_mlp(x = missing_arg(), y = missing_arg(), penalty = double(1), ## hidden_units = 1, act = "linear") -} +}\if{html}{\out{
}} \code{\link[=keras_mlp]{keras_mlp()}} is a parsnip wrapper around keras code for neural networks. This model fits a linear regression as a network with a diff --git a/man/details_linear_reg_lm.Rd b/man/details_linear_reg_lm.Rd index e568355d7..4e28dfead 100644 --- a/man/details_linear_reg_lm.Rd +++ b/man/details_linear_reg_lm.Rd @@ -13,16 +13,20 @@ For this engine, there is a single mode: regression This engine has no tuning parameters. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{linear_reg() \%>\% +\subsection{Translation from parsnip to the original package}{ + +\if{html}{\out{
}}\preformatted{linear_reg() \%>\% set_engine("lm") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) ## ## Computational engine: lm ## ## Model fit template: ## stats::lm(formula = missing_arg(), data = missing_arg(), weights = missing_arg()) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_linear_reg_lme.Rd b/man/details_linear_reg_lme.Rd index d82179744..c76854041 100644 --- a/man/details_linear_reg_lme.Rd +++ b/man/details_linear_reg_lme.Rd @@ -16,19 +16,23 @@ This model has no tuning parameters. \subsection{Translation from parsnip to the original package}{ -The \strong{multilevelmod} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(multilevelmod) +The \strong{multilevelmod} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(multilevelmod) linear_reg() \%>\% set_engine("lme") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) ## ## Computational engine: lme ## ## Model fit template: ## nlme::lme(fixed = missing_arg(), data = missing_arg()) -} +}\if{html}{\out{
}} } \subsection{Predicting new samples}{ @@ -74,7 +78,9 @@ The model can accept case weights. With parsnip, we suggest using the \emph{fixed effects} formula method when fitting, but the random effects formula should be passed to -\code{set_engine()} since it is an irregular (but required) argument:\if{html}{\out{
}}\preformatted{library(tidymodels) +\code{set_engine()} since it is an irregular (but required) argument: + +\if{html}{\out{
}}\preformatted{library(tidymodels) data("riesby") linear_reg() \%>\% @@ -84,7 +90,9 @@ linear_reg() \%>\% When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using -\code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) +\code{add_variables()} then supply the typical formula when adding the model: + +\if{html}{\out{
}}\preformatted{library(tidymodels) lme_spec <- linear_reg() \%>\% diff --git a/man/details_linear_reg_lmer.Rd b/man/details_linear_reg_lmer.Rd index 84df6ef52..2130a8d5b 100644 --- a/man/details_linear_reg_lmer.Rd +++ b/man/details_linear_reg_lmer.Rd @@ -16,19 +16,23 @@ This model has no tuning parameters. \subsection{Translation from parsnip to the original package}{ -The \strong{multilevelmod} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(multilevelmod) +The \strong{multilevelmod} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(multilevelmod) linear_reg() \%>\% set_engine("lmer") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) ## ## Computational engine: lmer ## ## Model fit template: ## lme4::lmer(formula = missing_arg(), data = missing_arg(), weights = missing_arg()) -} +}\if{html}{\out{
}} } \subsection{Predicting new samples}{ @@ -72,7 +76,9 @@ next section. The model can accept case weights. -With parsnip, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) +With parsnip, we suggest using the formula method when fitting: + +\if{html}{\out{
}}\preformatted{library(tidymodels) data("riesby") linear_reg() \%>\% @@ -82,7 +88,9 @@ linear_reg() \%>\% When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using -\code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) +\code{add_variables()} then supply the typical formula when adding the model: + +\if{html}{\out{
}}\preformatted{library(tidymodels) lmer_spec <- linear_reg() \%>\% diff --git a/man/details_linear_reg_spark.Rd b/man/details_linear_reg_spark.Rd index 28bd22f02..776e48549 100644 --- a/man/details_linear_reg_spark.Rd +++ b/man/details_linear_reg_spark.Rd @@ -24,10 +24,14 @@ A value of \code{mixture = 1} corresponds to a pure lasso model, while \code{mixture = 0} indicates ridge regression. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{linear_reg(penalty = double(1), mixture = double(1)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +\if{html}{\out{
}}\preformatted{linear_reg(penalty = double(1), mixture = double(1)) \%>\% set_engine("spark") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) ## ## Main Arguments: ## penalty = double(1) @@ -38,7 +42,7 @@ A value of \code{mixture = 1} corresponds to a pure lasso model, while ## Model fit template: ## sparklyr::ml_linear_regression(x = missing_arg(), formula = missing_arg(), ## weights = missing_arg(), reg_param = double(1), elastic_net_param = double(1)) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_linear_reg_stan.Rd b/man/details_linear_reg_stan.Rd index b68f13cb1..c61bafb88 100644 --- a/man/details_linear_reg_stan.Rd +++ b/man/details_linear_reg_stan.Rd @@ -37,17 +37,21 @@ See \code{\link[rstan:stanmodel-method-sampling]{rstan::sampling()}} and and other options. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{linear_reg() \%>\% +\subsection{Translation from parsnip to the original package}{ + +\if{html}{\out{
}}\preformatted{linear_reg() \%>\% set_engine("stan") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) ## ## Computational engine: stan ## ## Model fit template: ## rstanarm::stan_glm(formula = missing_arg(), data = missing_arg(), ## weights = missing_arg(), family = stats::gaussian, refresh = 0) -} +}\if{html}{\out{
}} Note that the \code{refresh} default prevents logging of the estimation process. Change this value in \code{set_engine()} to show the MCMC logs. diff --git a/man/details_linear_reg_stan_glmer.Rd b/man/details_linear_reg_stan_glmer.Rd index 60461d3d5..7ddad52cd 100644 --- a/man/details_linear_reg_stan_glmer.Rd +++ b/man/details_linear_reg_stan_glmer.Rd @@ -36,20 +36,24 @@ See \code{?rstanarm::stan_glmer} and \code{?rstan::sampling} for more informatio \subsection{Translation from parsnip to the original package}{ -The \strong{multilevelmod} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(multilevelmod) +The \strong{multilevelmod} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(multilevelmod) linear_reg() \%>\% set_engine("stan_glmer") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) ## ## Computational engine: stan_glmer ## ## Model fit template: ## rstanarm::stan_glmer(formula = missing_arg(), data = missing_arg(), ## weights = missing_arg(), family = stats::gaussian, refresh = 0) -} +}\if{html}{\out{
}} } \subsection{Predicting new samples}{ @@ -93,7 +97,9 @@ next section. The model can accept case weights. -With parsnip, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) +With parsnip, we suggest using the formula method when fitting: + +\if{html}{\out{
}}\preformatted{library(tidymodels) data("riesby") linear_reg() \%>\% @@ -103,7 +109,9 @@ linear_reg() \%>\% When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using -\code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) +\code{add_variables()} then supply the typical formula when adding the model: + +\if{html}{\out{
}}\preformatted{library(tidymodels) glmer_spec <- linear_reg() \%>\% diff --git a/man/details_logistic_reg_LiblineaR.Rd b/man/details_logistic_reg_LiblineaR.Rd index 745f31e41..94245330c 100644 --- a/man/details_logistic_reg_LiblineaR.Rd +++ b/man/details_logistic_reg_LiblineaR.Rd @@ -29,10 +29,14 @@ regularized regression models do not, which will result in different parameter estimates. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{logistic_reg(penalty = double(1), mixture = double(1)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +\if{html}{\out{
}}\preformatted{logistic_reg(penalty = double(1), mixture = double(1)) \%>\% set_engine("LiblineaR") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) ## ## Main Arguments: ## penalty = double(1) @@ -43,7 +47,7 @@ parameter estimates. ## Model fit template: ## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), cost = Inf, ## type = double(1), verbose = FALSE) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_logistic_reg_brulee.Rd b/man/details_logistic_reg_brulee.Rd index 2c99070ba..bc5c72bc5 100644 --- a/man/details_logistic_reg_brulee.Rd +++ b/man/details_logistic_reg_brulee.Rd @@ -42,10 +42,14 @@ no improvement before stopping. (default: 5L). } } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{logistic_reg(penalty = double(1)) \%>\% +\subsection{Translation from parsnip to the original package (classification)}{ + +\if{html}{\out{
}}\preformatted{logistic_reg(penalty = double(1)) \%>\% set_engine("brulee") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) ## ## Main Arguments: ## penalty = double(1) @@ -55,7 +59,7 @@ no improvement before stopping. (default: 5L). ## Model fit template: ## brulee::brulee_logistic_reg(x = missing_arg(), y = missing_arg(), ## penalty = double(1)) -} +}\if{html}{\out{
}} Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the diff --git a/man/details_logistic_reg_gee.Rd b/man/details_logistic_reg_gee.Rd index d26d1b04c..df4b6db1e 100644 --- a/man/details_logistic_reg_gee.Rd +++ b/man/details_logistic_reg_gee.Rd @@ -20,19 +20,23 @@ values. \subsection{Translation from parsnip to the original package}{ -The \strong{multilevelmod} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(multilevelmod) +The \strong{multilevelmod} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(multilevelmod) logistic_reg() \%>\% set_engine("gee") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) ## ## Computational engine: gee ## ## Model fit template: ## multilevelmod::gee_fit(formula = missing_arg(), data = missing_arg(), ## family = binomial) -} +}\if{html}{\out{
}} \code{multilevelmod::gee_fit()} is a wrapper model around \code{gee::gee()}. } @@ -53,10 +57,14 @@ Both \code{gee:gee()} and \code{gee:geepack()} specify the id/cluster variable using an argument \code{id} that requires a vector. parsnip doesn’t work that way so we enable this model to be fit using a artificial function \code{id_var()} to be used in the formula. So, in the original package, the -call would look like:\if{html}{\out{
}}\preformatted{gee(breaks ~ tension, id = wool, data = warpbreaks, corstr = "exchangeable") +call would look like: + +\if{html}{\out{
}}\preformatted{gee(breaks ~ tension, id = wool, data = warpbreaks, corstr = "exchangeable") }\if{html}{\out{
}} -With \code{parsnip}, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) +With \code{parsnip}, we suggest using the formula method when fitting: + +\if{html}{\out{
}}\preformatted{library(tidymodels) data("toenail", package = "HSAUR3") logistic_reg() \%>\% @@ -66,7 +74,9 @@ logistic_reg() \%>\% When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using -\code{add_variables()} then supply the GEE formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) +\code{add_variables()} then supply the GEE formula when adding the model: + +\if{html}{\out{
}}\preformatted{library(tidymodels) gee_spec <- logistic_reg() \%>\% diff --git a/man/details_logistic_reg_glm.Rd b/man/details_logistic_reg_glm.Rd index 7e7c1f859..1b6715e4f 100644 --- a/man/details_logistic_reg_glm.Rd +++ b/man/details_logistic_reg_glm.Rd @@ -16,23 +16,31 @@ This engine has no tuning parameters but you can set the \code{family} parameter (and/or \code{link}) as an engine argument (see below). } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{logistic_reg() \%>\% +\subsection{Translation from parsnip to the original package}{ + +\if{html}{\out{
}}\preformatted{logistic_reg() \%>\% set_engine("glm") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) ## ## Computational engine: glm ## ## Model fit template: ## stats::glm(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), ## family = stats::binomial) -} +}\if{html}{\out{
}} To use a non-default \code{family} and/or \code{link}, pass in as an argument to -\code{set_engine()}:\if{html}{\out{
}}\preformatted{linear_reg() \%>\% +\code{set_engine()}: + +\if{html}{\out{
}}\preformatted{linear_reg() \%>\% set_engine("glm", family = stats::binomial(link = "probit")) \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) ## ## Engine-Specific Arguments: ## family = stats::binomial(link = "probit") @@ -42,7 +50,7 @@ To use a non-default \code{family} and/or \code{link}, pass in as an argument to ## Model fit template: ## stats::glm(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), ## family = stats::binomial(link = "probit")) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_logistic_reg_glmer.Rd b/man/details_logistic_reg_glmer.Rd index 3b70c61f6..127ce2748 100644 --- a/man/details_logistic_reg_glmer.Rd +++ b/man/details_logistic_reg_glmer.Rd @@ -16,19 +16,23 @@ This model has no tuning parameters. \subsection{Translation from parsnip to the original package}{ -The \strong{multilevelmod} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(multilevelmod) +The \strong{multilevelmod} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(multilevelmod) logistic_reg() \%>\% set_engine("glmer") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) ## ## Computational engine: glmer ## ## Model fit template: ## lme4::glmer(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), ## family = binomial) -} +}\if{html}{\out{
}} } \subsection{Predicting new samples}{ @@ -72,7 +76,9 @@ next section. The model can accept case weights. -With parsnip, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) +With parsnip, we suggest using the formula method when fitting: + +\if{html}{\out{
}}\preformatted{library(tidymodels) data("toenail", package = "HSAUR3") logistic_reg() \%>\% @@ -82,7 +88,9 @@ logistic_reg() \%>\% When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using -\code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) +\code{add_variables()} then supply the typical formula when adding the model: + +\if{html}{\out{
}}\preformatted{library(tidymodels) glmer_spec <- logistic_reg() \%>\% diff --git a/man/details_logistic_reg_glmnet.Rd b/man/details_logistic_reg_glmnet.Rd index 18e551370..db680323a 100644 --- a/man/details_logistic_reg_glmnet.Rd +++ b/man/details_logistic_reg_glmnet.Rd @@ -27,10 +27,14 @@ value. For more details about this, and the \code{glmnet} model in general, see \link{glmnet-details}. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{logistic_reg(penalty = double(1), mixture = double(1)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +\if{html}{\out{
}}\preformatted{logistic_reg(penalty = double(1), mixture = double(1)) \%>\% set_engine("glmnet") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) ## ## Main Arguments: ## penalty = 0 @@ -41,7 +45,7 @@ see \link{glmnet-details}. ## Model fit template: ## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(), ## alpha = double(1), family = "binomial") -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_logistic_reg_keras.Rd b/man/details_logistic_reg_keras.Rd index 4f33654bd..ce6abb50f 100644 --- a/man/details_logistic_reg_keras.Rd +++ b/man/details_logistic_reg_keras.Rd @@ -21,10 +21,14 @@ For \code{penalty}, the amount of regularization is \emph{only} L2 penalty (i.e. ridge or weight decay). } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{logistic_reg(penalty = double(1)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +\if{html}{\out{
}}\preformatted{logistic_reg(penalty = double(1)) \%>\% set_engine("keras") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) ## ## Main Arguments: ## penalty = double(1) @@ -34,7 +38,7 @@ ridge or weight decay). ## Model fit template: ## parsnip::keras_mlp(x = missing_arg(), y = missing_arg(), penalty = double(1), ## hidden_units = 1, act = "linear") -} +}\if{html}{\out{
}} \code{\link[=keras_mlp]{keras_mlp()}} is a parsnip wrapper around keras code for neural networks. This model fits a linear regression as a network with a diff --git a/man/details_logistic_reg_spark.Rd b/man/details_logistic_reg_spark.Rd index 7013d6a00..5b4c2fbe2 100644 --- a/man/details_logistic_reg_spark.Rd +++ b/man/details_logistic_reg_spark.Rd @@ -25,10 +25,14 @@ A value of \code{mixture = 1} corresponds to a pure lasso model, while \code{mixture = 0} indicates ridge regression. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{logistic_reg(penalty = double(1), mixture = double(1)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +\if{html}{\out{
}}\preformatted{logistic_reg(penalty = double(1), mixture = double(1)) \%>\% set_engine("spark") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) ## ## Main Arguments: ## penalty = double(1) @@ -40,7 +44,7 @@ A value of \code{mixture = 1} corresponds to a pure lasso model, while ## sparklyr::ml_logistic_regression(x = missing_arg(), formula = missing_arg(), ## weights = missing_arg(), reg_param = double(1), elastic_net_param = double(1), ## family = "binomial") -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_logistic_reg_stan.Rd b/man/details_logistic_reg_stan.Rd index c74ca3c39..1bbf10b3c 100644 --- a/man/details_logistic_reg_stan.Rd +++ b/man/details_logistic_reg_stan.Rd @@ -38,17 +38,21 @@ See \code{\link[rstan:stanmodel-method-sampling]{rstan::sampling()}} and and other options. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{logistic_reg() \%>\% +\subsection{Translation from parsnip to the original package}{ + +\if{html}{\out{
}}\preformatted{logistic_reg() \%>\% set_engine("stan") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) ## ## Computational engine: stan ## ## Model fit template: ## rstanarm::stan_glm(formula = missing_arg(), data = missing_arg(), ## weights = missing_arg(), family = stats::binomial, refresh = 0) -} +}\if{html}{\out{
}} Note that the \code{refresh} default prevents logging of the estimation process. Change this value in \code{set_engine()} to show the MCMC logs. diff --git a/man/details_logistic_reg_stan_glmer.Rd b/man/details_logistic_reg_stan_glmer.Rd index 650ab1a93..5e0323c44 100644 --- a/man/details_logistic_reg_stan_glmer.Rd +++ b/man/details_logistic_reg_stan_glmer.Rd @@ -36,19 +36,23 @@ See \code{?rstanarm::stan_glmer} and \code{?rstan::sampling} for more informatio \subsection{Translation from parsnip to the original package}{ -The \strong{multilevelmod} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(multilevelmod) +The \strong{multilevelmod} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(multilevelmod) logistic_reg() \%>\% set_engine("stan_glmer") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) ## ## Computational engine: stan_glmer ## ## Model fit template: ## rstanarm::stan_glmer(formula = missing_arg(), data = missing_arg(), ## weights = missing_arg(), family = stats::binomial, refresh = 0) -} +}\if{html}{\out{
}} } \subsection{Predicting new samples}{ @@ -92,7 +96,9 @@ next section. The model can accept case weights. -With parsnip, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) +With parsnip, we suggest using the formula method when fitting: + +\if{html}{\out{
}}\preformatted{library(tidymodels) data("toenail", package = "HSAUR3") logistic_reg() \%>\% @@ -102,7 +108,9 @@ logistic_reg() \%>\% When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using -\code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) +\code{add_variables()} then supply the typical formula when adding the model: + +\if{html}{\out{
}}\preformatted{library(tidymodels) glmer_spec <- logistic_reg() \%>\% diff --git a/man/details_mars_earth.Rd b/man/details_mars_earth.Rd index c01c382eb..cfc390da8 100644 --- a/man/details_mars_earth.Rd +++ b/man/details_mars_earth.Rd @@ -26,11 +26,15 @@ columns. For a data frame \code{x}, the default is \code{\link[earth:earth]{earth::earth()}} and the reference below). } -\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{mars(num_terms = integer(1), prod_degree = integer(1), prune_method = character(1)) \%>\% +\subsection{Translation from parsnip to the original package (regression)}{ + +\if{html}{\out{
}}\preformatted{mars(num_terms = integer(1), prod_degree = integer(1), prune_method = character(1)) \%>\% set_engine("earth") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## MARS Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## MARS Model Specification (regression) ## ## Main Arguments: ## num_terms = integer(1) @@ -43,14 +47,18 @@ columns. For a data frame \code{x}, the default is ## earth::earth(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), ## nprune = integer(1), degree = integer(1), pmethod = character(1), ## keepxy = TRUE) -} +}\if{html}{\out{
}} } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{mars(num_terms = integer(1), prod_degree = integer(1), prune_method = character(1)) \%>\% +\subsection{Translation from parsnip to the original package (classification)}{ + +\if{html}{\out{
}}\preformatted{mars(num_terms = integer(1), prod_degree = integer(1), prune_method = character(1)) \%>\% set_engine("earth") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## MARS Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## MARS Model Specification (classification) ## ## Main Arguments: ## num_terms = integer(1) @@ -66,7 +74,7 @@ columns. For a data frame \code{x}, the default is ## earth::earth(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), ## nprune = integer(1), degree = integer(1), pmethod = character(1), ## glm = list(family = stats::binomial), keepxy = TRUE) -} +}\if{html}{\out{
}} An alternate method for using MARs for categorical outcomes can be found in \code{\link[=discrim_flexible]{discrim_flexible()}}. diff --git a/man/details_mlp_brulee.Rd b/man/details_mlp_brulee.Rd index dd6a9b355..3279187db 100644 --- a/man/details_mlp_brulee.Rd +++ b/man/details_mlp_brulee.Rd @@ -41,7 +41,9 @@ no improvement before stopping. (default: 5L). } } -\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{mlp( +\subsection{Translation from parsnip to the original package (regression)}{ + +\if{html}{\out{
}}\preformatted{mlp( hidden_units = integer(1), penalty = double(1), dropout = double(1), @@ -52,7 +54,9 @@ no improvement before stopping. (default: 5L). set_engine("brulee") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Single Layer Neural Network Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Single Layer Neural Network Specification (regression) ## ## Main Arguments: ## hidden_units = integer(1) @@ -68,13 +72,15 @@ no improvement before stopping. (default: 5L). ## brulee::brulee_mlp(x = missing_arg(), y = missing_arg(), hidden_units = integer(1), ## penalty = double(1), dropout = double(1), epochs = integer(1), ## activation = character(1), learn_rate = double(1)) -} +}\if{html}{\out{
}} Note that parsnip automatically sets linear activation in the last layer. } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{mlp( +\subsection{Translation from parsnip to the original package (classification)}{ + +\if{html}{\out{
}}\preformatted{mlp( hidden_units = integer(1), penalty = double(1), dropout = double(1), @@ -85,7 +91,9 @@ layer. set_engine("brulee") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Single Layer Neural Network Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Single Layer Neural Network Specification (classification) ## ## Main Arguments: ## hidden_units = integer(1) @@ -101,7 +109,7 @@ layer. ## brulee::brulee_mlp(x = missing_arg(), y = missing_arg(), hidden_units = integer(1), ## penalty = double(1), dropout = double(1), epochs = integer(1), ## activation = character(1), learn_rate = double(1)) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_mlp_keras.Rd b/man/details_mlp_keras.Rd index 8fb97ed09..4f587bba1 100644 --- a/man/details_mlp_keras.Rd +++ b/man/details_mlp_keras.Rd @@ -21,7 +21,9 @@ This model has 5 tuning parameters: } } -\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{mlp( +\subsection{Translation from parsnip to the original package (regression)}{ + +\if{html}{\out{
}}\preformatted{mlp( hidden_units = integer(1), penalty = double(1), dropout = double(1), @@ -31,7 +33,9 @@ This model has 5 tuning parameters: set_engine("keras") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Single Layer Neural Network Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Single Layer Neural Network Specification (regression) ## ## Main Arguments: ## hidden_units = integer(1) @@ -46,10 +50,12 @@ This model has 5 tuning parameters: ## parsnip::keras_mlp(x = missing_arg(), y = missing_arg(), hidden_units = integer(1), ## penalty = double(1), dropout = double(1), epochs = integer(1), ## activation = character(1)) -} +}\if{html}{\out{
}} } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{mlp( +\subsection{Translation from parsnip to the original package (classification)}{ + +\if{html}{\out{
}}\preformatted{mlp( hidden_units = integer(1), penalty = double(1), dropout = double(1), @@ -59,7 +65,9 @@ This model has 5 tuning parameters: set_engine("keras") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Single Layer Neural Network Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Single Layer Neural Network Specification (classification) ## ## Main Arguments: ## hidden_units = integer(1) @@ -74,7 +82,7 @@ This model has 5 tuning parameters: ## parsnip::keras_mlp(x = missing_arg(), y = missing_arg(), hidden_units = integer(1), ## penalty = double(1), dropout = double(1), epochs = integer(1), ## activation = character(1)) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_mlp_nnet.Rd b/man/details_mlp_nnet.Rd index d6120222d..e545b0f46 100644 --- a/man/details_mlp_nnet.Rd +++ b/man/details_mlp_nnet.Rd @@ -23,7 +23,9 @@ some models, you may need to pass this value in via \code{\link[=set_engine]{set_engine()}} so that the model does not fail. } -\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{mlp( +\subsection{Translation from parsnip to the original package (regression)}{ + +\if{html}{\out{
}}\preformatted{mlp( hidden_units = integer(1), penalty = double(1), epochs = integer(1) @@ -31,7 +33,9 @@ some models, you may need to pass this value in via set_engine("nnet") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Single Layer Neural Network Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Single Layer Neural Network Specification (regression) ## ## Main Arguments: ## hidden_units = integer(1) @@ -43,13 +47,15 @@ some models, you may need to pass this value in via ## Model fit template: ## nnet::nnet(formula = missing_arg(), data = missing_arg(), size = integer(1), ## decay = double(1), maxit = integer(1), trace = FALSE, linout = TRUE) -} +}\if{html}{\out{
}} Note that parsnip automatically sets linear activation in the last layer. } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{mlp( +\subsection{Translation from parsnip to the original package (classification)}{ + +\if{html}{\out{
}}\preformatted{mlp( hidden_units = integer(1), penalty = double(1), epochs = integer(1) @@ -57,7 +63,9 @@ layer. set_engine("nnet") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Single Layer Neural Network Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Single Layer Neural Network Specification (classification) ## ## Main Arguments: ## hidden_units = integer(1) @@ -69,7 +77,7 @@ layer. ## Model fit template: ## nnet::nnet(formula = missing_arg(), data = missing_arg(), size = integer(1), ## decay = double(1), maxit = integer(1), trace = FALSE, linout = FALSE) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_multinom_reg_brulee.Rd b/man/details_multinom_reg_brulee.Rd index b7545dcc8..38b730089 100644 --- a/man/details_multinom_reg_brulee.Rd +++ b/man/details_multinom_reg_brulee.Rd @@ -41,10 +41,14 @@ no improvement before stopping. (default: 5L). } } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{multinom_reg(penalty = double(1)) \%>\% +\subsection{Translation from parsnip to the original package (classification)}{ + +\if{html}{\out{
}}\preformatted{multinom_reg(penalty = double(1)) \%>\% set_engine("brulee") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Multinomial Regression Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Multinomial Regression Model Specification (classification) ## ## Main Arguments: ## penalty = double(1) @@ -54,7 +58,7 @@ no improvement before stopping. (default: 5L). ## Model fit template: ## brulee::brulee_multinomial_reg(x = missing_arg(), y = missing_arg(), ## penalty = double(1)) -} +}\if{html}{\out{
}} Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the diff --git a/man/details_multinom_reg_glmnet.Rd b/man/details_multinom_reg_glmnet.Rd index f8c9c59ac..7ee652583 100644 --- a/man/details_multinom_reg_glmnet.Rd +++ b/man/details_multinom_reg_glmnet.Rd @@ -26,10 +26,14 @@ value. For more details about this, and the \code{glmnet} model in general, see \link{glmnet-details}. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{multinom_reg(penalty = double(1), mixture = double(1)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +\if{html}{\out{
}}\preformatted{multinom_reg(penalty = double(1), mixture = double(1)) \%>\% set_engine("glmnet") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Multinomial Regression Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Multinomial Regression Model Specification (classification) ## ## Main Arguments: ## penalty = 0 @@ -40,7 +44,7 @@ see \link{glmnet-details}. ## Model fit template: ## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(), ## alpha = double(1), family = "multinomial") -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_multinom_reg_keras.Rd b/man/details_multinom_reg_keras.Rd index 3279b74a4..a46f4273d 100644 --- a/man/details_multinom_reg_keras.Rd +++ b/man/details_multinom_reg_keras.Rd @@ -20,10 +20,14 @@ For \code{penalty}, the amount of regularization is \emph{only} L2 penalty (i.e. ridge or weight decay). } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{multinom_reg(penalty = double(1)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +\if{html}{\out{
}}\preformatted{multinom_reg(penalty = double(1)) \%>\% set_engine("keras") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Multinomial Regression Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Multinomial Regression Model Specification (classification) ## ## Main Arguments: ## penalty = double(1) @@ -33,7 +37,7 @@ ridge or weight decay). ## Model fit template: ## parsnip::keras_mlp(x = missing_arg(), y = missing_arg(), penalty = double(1), ## hidden_units = 1, act = "linear") -} +}\if{html}{\out{
}} \code{\link[=keras_mlp]{keras_mlp()}} is a parsnip wrapper around keras code for neural networks. This model fits a linear regression as a network with a diff --git a/man/details_multinom_reg_nnet.Rd b/man/details_multinom_reg_nnet.Rd index 3f175549b..51a01c386 100644 --- a/man/details_multinom_reg_nnet.Rd +++ b/man/details_multinom_reg_nnet.Rd @@ -20,10 +20,14 @@ For \code{penalty}, the amount of regularization includes only the L2 penalty (i.e., ridge or weight decay). } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{multinom_reg(penalty = double(1)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +\if{html}{\out{
}}\preformatted{multinom_reg(penalty = double(1)) \%>\% set_engine("nnet") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Multinomial Regression Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Multinomial Regression Model Specification (classification) ## ## Main Arguments: ## penalty = double(1) @@ -33,7 +37,7 @@ For \code{penalty}, the amount of regularization includes only the L2 penalty ## Model fit template: ## nnet::multinom(formula = missing_arg(), data = missing_arg(), ## decay = double(1), trace = FALSE) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_multinom_reg_spark.Rd b/man/details_multinom_reg_spark.Rd index d68e78321..99c7258a2 100644 --- a/man/details_multinom_reg_spark.Rd +++ b/man/details_multinom_reg_spark.Rd @@ -24,10 +24,14 @@ A value of \code{mixture = 1} corresponds to a pure lasso model, while \code{mixture = 0} indicates ridge regression. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{multinom_reg(penalty = double(1), mixture = double(1)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +\if{html}{\out{
}}\preformatted{multinom_reg(penalty = double(1), mixture = double(1)) \%>\% set_engine("spark") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Multinomial Regression Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Multinomial Regression Model Specification (classification) ## ## Main Arguments: ## penalty = double(1) @@ -39,7 +43,7 @@ A value of \code{mixture = 1} corresponds to a pure lasso model, while ## sparklyr::ml_logistic_regression(x = missing_arg(), formula = missing_arg(), ## weights = missing_arg(), reg_param = double(1), elastic_net_param = double(1), ## family = "multinomial") -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_naive_Bayes_klaR.Rd b/man/details_naive_Bayes_klaR.Rd index 4c9b6cb66..8dad0cfa8 100644 --- a/man/details_naive_Bayes_klaR.Rd +++ b/man/details_naive_Bayes_klaR.Rd @@ -22,12 +22,16 @@ Note that \code{usekernel} is always set to \code{TRUE} for the \code{klaR} engi \subsection{Translation from parsnip to the original package}{ -The \strong{discrim} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(discrim) +The \strong{discrim} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(discrim) naive_Bayes(smoothness = numeric(0), Laplace = numeric(0)) \%>\% set_engine("klaR") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Naive Bayes Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Naive Bayes Model Specification (classification) ## ## Main Arguments: ## smoothness = numeric(0) @@ -38,7 +42,7 @@ naive_Bayes(smoothness = numeric(0), Laplace = numeric(0)) \%>\% ## Model fit template: ## discrim::klar_bayes_wrapper(x = missing_arg(), y = missing_arg(), ## adjust = numeric(0), fL = numeric(0), usekernel = TRUE) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_naive_Bayes_naivebayes.Rd b/man/details_naive_Bayes_naivebayes.Rd index bb0941287..d931f13c2 100644 --- a/man/details_naive_Bayes_naivebayes.Rd +++ b/man/details_naive_Bayes_naivebayes.Rd @@ -20,12 +20,16 @@ This model has 2 tuning parameter: \subsection{Translation from parsnip to the original package}{ -The \strong{discrim} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(discrim) +The \strong{discrim} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(discrim) naive_Bayes(smoothness = numeric(0), Laplace = numeric(0)) \%>\% set_engine("naivebayes") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Naive Bayes Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Naive Bayes Model Specification (classification) ## ## Main Arguments: ## smoothness = numeric(0) @@ -36,7 +40,7 @@ naive_Bayes(smoothness = numeric(0), Laplace = numeric(0)) \%>\% ## Model fit template: ## naivebayes::naive_bayes(x = missing_arg(), y = missing_arg(), ## adjust = numeric(0), laplace = numeric(0), usekernel = TRUE) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_nearest_neighbor_kknn.Rd b/man/details_nearest_neighbor_kknn.Rd index 945acaec5..6a645c942 100644 --- a/man/details_nearest_neighbor_kknn.Rd +++ b/man/details_nearest_neighbor_kknn.Rd @@ -20,7 +20,9 @@ default: ‘optimal’) } } -\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{nearest_neighbor( +\subsection{Translation from parsnip to the original package (regression)}{ + +\if{html}{\out{
}}\preformatted{nearest_neighbor( neighbors = integer(1), weight_func = character(1), dist_power = double(1) @@ -28,7 +30,9 @@ default: ‘optimal’) set_engine("kknn") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## K-Nearest Neighbor Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## K-Nearest Neighbor Model Specification (regression) ## ## Main Arguments: ## neighbors = integer(1) @@ -40,13 +44,15 @@ default: ‘optimal’) ## Model fit template: ## kknn::train.kknn(formula = missing_arg(), data = missing_arg(), ## ks = min_rows(0L, data, 5), kernel = character(1), distance = double(1)) -} +}\if{html}{\out{
}} \code{min_rows()} will adjust the number of neighbors if the chosen value if it is not consistent with the actual data dimensions. } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{nearest_neighbor( +\subsection{Translation from parsnip to the original package (classification)}{ + +\if{html}{\out{
}}\preformatted{nearest_neighbor( neighbors = integer(1), weight_func = character(1), dist_power = double(1) @@ -54,7 +60,9 @@ it is not consistent with the actual data dimensions. set_engine("kknn") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## K-Nearest Neighbor Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## K-Nearest Neighbor Model Specification (classification) ## ## Main Arguments: ## neighbors = integer(1) @@ -66,7 +74,7 @@ it is not consistent with the actual data dimensions. ## Model fit template: ## kknn::train.kknn(formula = missing_arg(), data = missing_arg(), ## ks = min_rows(0L, data, 5), kernel = character(1), distance = double(1)) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_pls_mixOmics.Rd b/man/details_pls_mixOmics.Rd index 0ed70e557..c1a6b4e70 100644 --- a/man/details_pls_mixOmics.Rd +++ b/man/details_pls_mixOmics.Rd @@ -20,13 +20,17 @@ see below) \subsection{Translation from parsnip to the underlying model call (regression)}{ -The \strong{plsmod} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(plsmod) +The \strong{plsmod} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(plsmod) pls(num_comp = integer(1), predictor_prop = double(1)) \%>\% set_engine("mixOmics") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## PLS Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## PLS Model Specification (regression) ## ## Main Arguments: ## predictor_prop = double(1) @@ -37,7 +41,7 @@ pls(num_comp = integer(1), predictor_prop = double(1)) \%>\% ## Model fit template: ## plsmod::pls_fit(x = missing_arg(), y = missing_arg(), predictor_prop = double(1), ## ncomp = integer(1)) -} +}\if{html}{\out{
}} \code{\link[plsmod:pls_fit]{plsmod::pls_fit()}} is a function that: \itemize{ @@ -53,13 +57,17 @@ for sparse models. \subsection{Translation from parsnip to the underlying model call (classification)}{ -The \strong{plsmod} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(plsmod) +The \strong{plsmod} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(plsmod) pls(num_comp = integer(1), predictor_prop = double(1)) \%>\% set_engine("mixOmics") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## PLS Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## PLS Model Specification (classification) ## ## Main Arguments: ## predictor_prop = double(1) @@ -70,7 +78,7 @@ pls(num_comp = integer(1), predictor_prop = double(1)) \%>\% ## Model fit template: ## plsmod::pls_fit(x = missing_arg(), y = missing_arg(), predictor_prop = double(1), ## ncomp = integer(1)) -} +}\if{html}{\out{
}} In this case, \code{\link[plsmod:pls_fit]{plsmod::pls_fit()}} has the same role as above but eventually targets \code{\link[mixOmics:plsda]{mixOmics::plsda()}} @@ -80,7 +88,9 @@ or \code{\link[mixOmics:splsda]{mixOmics::splsda()}} . \subsection{Installing mixOmics}{ This package is available via the Bioconductor repository and is not -accessible via CRAN. You can install using:\if{html}{\out{
}}\preformatted{ if (!require("remotes", quietly = TRUE)) \{ +accessible via CRAN. You can install using: + +\if{html}{\out{
}}\preformatted{ if (!require("remotes", quietly = TRUE)) \{ install.packages("remotes") \} diff --git a/man/details_poisson_reg_gee.Rd b/man/details_poisson_reg_gee.Rd index 5931e0b6f..e624d36e6 100644 --- a/man/details_poisson_reg_gee.Rd +++ b/man/details_poisson_reg_gee.Rd @@ -20,19 +20,23 @@ values. \subsection{Translation from parsnip to the original package}{ -The \strong{multilevelmod} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(multilevelmod) +The \strong{multilevelmod} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(multilevelmod) poisson_reg(engine = "gee") \%>\% set_engine("gee") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) ## ## Computational engine: gee ## ## Model fit template: ## multilevelmod::gee_fit(formula = missing_arg(), data = missing_arg(), ## family = stats::poisson) -} +}\if{html}{\out{
}} \code{multilevelmod::gee_fit()} is a wrapper model around \code{gee()}. } @@ -56,10 +60,14 @@ Both \code{gee:gee()} and \code{gee:geepack()} specify the id/cluster variable using an argument \code{id} that requires a vector. parsnip doesn’t work that way so we enable this model to be fit using a artificial function \code{id_var()} to be used in the formula. So, in the original package, the -call would look like:\if{html}{\out{
}}\preformatted{gee(breaks ~ tension, id = wool, data = warpbreaks, corstr = "exchangeable") +call would look like: + +\if{html}{\out{
}}\preformatted{gee(breaks ~ tension, id = wool, data = warpbreaks, corstr = "exchangeable") }\if{html}{\out{
}} -With parsnip, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) +With parsnip, we suggest using the formula method when fitting: + +\if{html}{\out{
}}\preformatted{library(tidymodels) poisson_reg() \%>\% set_engine("gee", corstr = "exchangeable") \%>\% @@ -68,7 +76,9 @@ poisson_reg() \%>\% When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using -\code{add_variables()} then supply the GEE formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) +\code{add_variables()} then supply the GEE formula when adding the model: + +\if{html}{\out{
}}\preformatted{library(tidymodels) gee_spec <- poisson_reg() \%>\% diff --git a/man/details_poisson_reg_glm.Rd b/man/details_poisson_reg_glm.Rd index f27aa6bdb..ba525fee9 100644 --- a/man/details_poisson_reg_glm.Rd +++ b/man/details_poisson_reg_glm.Rd @@ -15,19 +15,23 @@ This engine has no tuning parameters. \subsection{Translation from parsnip to the underlying model call (regression)}{ -The \strong{poissonreg} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(poissonreg) +The \strong{poissonreg} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(poissonreg) poisson_reg() \%>\% set_engine("glm") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) ## ## Computational engine: glm ## ## Model fit template: ## stats::glm(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), ## family = stats::poisson) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_poisson_reg_glmer.Rd b/man/details_poisson_reg_glmer.Rd index 8161ebd09..f3258bb5a 100644 --- a/man/details_poisson_reg_glmer.Rd +++ b/man/details_poisson_reg_glmer.Rd @@ -16,19 +16,23 @@ This model has no tuning parameters. \subsection{Translation from parsnip to the original package}{ -The \strong{multilevelmod} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(multilevelmod) +The \strong{multilevelmod} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(multilevelmod) poisson_reg(engine = "glmer") \%>\% set_engine("glmer") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) ## ## Computational engine: glmer ## ## Model fit template: ## lme4::glmer(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), ## family = stats::poisson) -} +}\if{html}{\out{
}} } \subsection{Predicting new samples}{ @@ -72,7 +76,9 @@ next section. The model can accept case weights. -With parsnip, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) +With parsnip, we suggest using the formula method when fitting: + +\if{html}{\out{
}}\preformatted{library(tidymodels) poisson_reg() \%>\% set_engine("glmer") \%>\% @@ -81,7 +87,9 @@ poisson_reg() \%>\% When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using -\code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) +\code{add_variables()} then supply the typical formula when adding the model: + +\if{html}{\out{
}}\preformatted{library(tidymodels) glmer_spec <- poisson_reg() \%>\% diff --git a/man/details_poisson_reg_glmnet.Rd b/man/details_poisson_reg_glmnet.Rd index 5d93f7644..26c1e11a6 100644 --- a/man/details_poisson_reg_glmnet.Rd +++ b/man/details_poisson_reg_glmnet.Rd @@ -28,12 +28,16 @@ see \link{glmnet-details}. \subsection{Translation from parsnip to the original package}{ -The \strong{poissonreg} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(poissonreg) +The \strong{poissonreg} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(poissonreg) poisson_reg(penalty = double(1), mixture = double(1)) \%>\% set_engine("glmnet") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) ## ## Main Arguments: ## penalty = 0 @@ -44,7 +48,7 @@ poisson_reg(penalty = double(1), mixture = double(1)) \%>\% ## Model fit template: ## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(), ## alpha = double(1), family = "poisson") -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_poisson_reg_hurdle.Rd b/man/details_poisson_reg_hurdle.Rd index 39f315a10..622d441c0 100644 --- a/man/details_poisson_reg_hurdle.Rd +++ b/man/details_poisson_reg_hurdle.Rd @@ -17,18 +17,22 @@ This engine has no tuning parameters. \subsection{Translation from parsnip to the underlying model call (regression)}{ -The \strong{poissonreg} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(poissonreg) +The \strong{poissonreg} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(poissonreg) poisson_reg() \%>\% set_engine("hurdle") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) ## ## Computational engine: hurdle ## ## Model fit template: ## pscl::hurdle(formula = missing_arg(), data = missing_arg(), weights = missing_arg()) -} +}\if{html}{\out{
}} } \subsection{Preprocessing and special formulas for zero-inflated Poisson models}{ @@ -45,14 +49,18 @@ of zero counts. These sets of terms are separated by a bar. For example, infrastructure (e.g. \code{model.matrix()}) When fitting a parsnip model with this engine directly, the formula -method is required and the formula is just passed through. For example:\if{html}{\out{
}}\preformatted{library(tidymodels) +method is required and the formula is just passed through. For example: + +\if{html}{\out{
}}\preformatted{library(tidymodels) tidymodels_prefer() data("bioChemists", package = "pscl") poisson_reg() \%>\% set_engine("hurdle") \%>\% fit(art ~ fem + mar | ment, data = bioChemists) -}\if{html}{\out{
}}\preformatted{## parsnip model object +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## parsnip model object ## ## ## Call: @@ -65,12 +73,14 @@ poisson_reg() \%>\% ## Zero hurdle model coefficients (binomial with logit link): ## (Intercept) ment ## 0.24871 0.08092 -} +}\if{html}{\out{
}} However, when using a workflow, the best approach is to avoid using \code{\link[workflows:add_formula]{workflows::add_formula()}} and use \code{\link[workflows:add_variables]{workflows::add_variables()}} in -conjunction with a model formula:\if{html}{\out{
}}\preformatted{data("bioChemists", package = "pscl") +conjunction with a model formula: + +\if{html}{\out{
}}\preformatted{data("bioChemists", package = "pscl") spec <- poisson_reg() \%>\% set_engine("hurdle") @@ -79,7 +89,9 @@ workflow() \%>\% add_variables(outcomes = c(art), predictors = c(fem, mar, ment)) \%>\% add_model(spec, formula = art ~ fem + mar | ment) \%>\% fit(data = bioChemists) -}\if{html}{\out{
}}\preformatted{## ══ Workflow [trained] ══════════════════════════════════════════════════════════ +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## ══ Workflow [trained] ══════════════════════════════════════════════════════════ ## Preprocessor: Variables ## Model: poisson_reg() ## @@ -99,7 +111,7 @@ workflow() \%>\% ## Zero hurdle model coefficients (binomial with logit link): ## (Intercept) ment ## 0.24871 0.08092 -} +}\if{html}{\out{
}} The reason for this is that \code{\link[workflows:add_formula]{workflows::add_formula()}} will try to diff --git a/man/details_poisson_reg_stan.Rd b/man/details_poisson_reg_stan.Rd index 6579b82dc..a86d72afe 100644 --- a/man/details_poisson_reg_stan.Rd +++ b/man/details_poisson_reg_stan.Rd @@ -39,19 +39,23 @@ and other options. \subsection{Translation from parsnip to the original package}{ -The \strong{poissonreg} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(poissonreg) +The \strong{poissonreg} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(poissonreg) poisson_reg() \%>\% set_engine("stan") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) ## ## Computational engine: stan ## ## Model fit template: ## rstanarm::stan_glm(formula = missing_arg(), data = missing_arg(), ## weights = missing_arg(), family = stats::poisson) -} +}\if{html}{\out{
}} Note that the \code{refresh} default prevents logging of the estimation process. Change this value in \code{set_engine()} to show the MCMC logs. diff --git a/man/details_poisson_reg_stan_glmer.Rd b/man/details_poisson_reg_stan_glmer.Rd index f7c06885f..e87b57af6 100644 --- a/man/details_poisson_reg_stan_glmer.Rd +++ b/man/details_poisson_reg_stan_glmer.Rd @@ -36,19 +36,23 @@ See \code{?rstanarm::stan_glmer} and \code{?rstan::sampling} for more informatio \subsection{Translation from parsnip to the original package}{ -The \strong{multilevelmod} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(multilevelmod) +The \strong{multilevelmod} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(multilevelmod) poisson_reg(engine = "stan_glmer") \%>\% set_engine("stan_glmer") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) ## ## Computational engine: stan_glmer ## ## Model fit template: ## rstanarm::stan_glmer(formula = missing_arg(), data = missing_arg(), ## weights = missing_arg(), family = stats::poisson, refresh = 0) -} +}\if{html}{\out{
}} } \subsection{Predicting new samples}{ @@ -92,7 +96,9 @@ next section. The model can accept case weights. -With parsnip, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) +With parsnip, we suggest using the formula method when fitting: + +\if{html}{\out{
}}\preformatted{library(tidymodels) poisson_reg() \%>\% set_engine("stan_glmer") \%>\% @@ -101,7 +107,9 @@ poisson_reg() \%>\% When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using -\code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) +\code{add_variables()} then supply the typical formula when adding the model: + +\if{html}{\out{
}}\preformatted{library(tidymodels) glmer_spec <- poisson_reg() \%>\% diff --git a/man/details_poisson_reg_zeroinfl.Rd b/man/details_poisson_reg_zeroinfl.Rd index 44c05c028..593846154 100644 --- a/man/details_poisson_reg_zeroinfl.Rd +++ b/man/details_poisson_reg_zeroinfl.Rd @@ -17,19 +17,23 @@ This engine has no tuning parameters. \subsection{Translation from parsnip to the underlying model call (regression)}{ -The \strong{poissonreg} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(poissonreg) +The \strong{poissonreg} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(poissonreg) poisson_reg() \%>\% set_engine("zeroinfl") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) ## ## Computational engine: zeroinfl ## ## Model fit template: ## pscl::zeroinfl(formula = missing_arg(), data = missing_arg(), ## weights = missing_arg()) -} +}\if{html}{\out{
}} } \subsection{Preprocessing and special formulas for zero-inflated Poisson models}{ @@ -46,14 +50,18 @@ of zero counts. These sets of terms are separated by a bar. For example, infrastructure (e.g. \code{model.matrix()}) When fitting a parsnip model with this engine directly, the formula -method is required and the formula is just passed through. For example:\if{html}{\out{
}}\preformatted{library(tidymodels) +method is required and the formula is just passed through. For example: + +\if{html}{\out{
}}\preformatted{library(tidymodels) tidymodels_prefer() data("bioChemists", package = "pscl") poisson_reg() \%>\% set_engine("zeroinfl") \%>\% fit(art ~ fem + mar | ment, data = bioChemists) -}\if{html}{\out{
}}\preformatted{## parsnip model object +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## parsnip model object ## ## ## Call: @@ -66,12 +74,14 @@ poisson_reg() \%>\% ## Zero-inflation model coefficients (binomial with logit link): ## (Intercept) ment ## -0.363 -0.166 -} +}\if{html}{\out{
}} However, when using a workflow, the best approach is to avoid using \code{\link[workflows:add_formula]{workflows::add_formula()}} and use \code{\link[workflows:add_variables]{workflows::add_variables()}} in -conjunction with a model formula:\if{html}{\out{
}}\preformatted{data("bioChemists", package = "pscl") +conjunction with a model formula: + +\if{html}{\out{
}}\preformatted{data("bioChemists", package = "pscl") spec <- poisson_reg() \%>\% set_engine("zeroinfl") @@ -80,7 +90,9 @@ workflow() \%>\% add_variables(outcomes = c(art), predictors = c(fem, mar, ment)) \%>\% add_model(spec, formula = art ~ fem + mar | ment) \%>\% fit(data = bioChemists) -}\if{html}{\out{
}}\preformatted{## ══ Workflow [trained] ══════════════════════════════════════════════════════════ +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## ══ Workflow [trained] ══════════════════════════════════════════════════════════ ## Preprocessor: Variables ## Model: poisson_reg() ## @@ -100,7 +112,7 @@ workflow() \%>\% ## Zero-inflation model coefficients (binomial with logit link): ## (Intercept) ment ## -0.363 -0.166 -} +}\if{html}{\out{
}} The reason for this is that \code{\link[workflows:add_formula]{workflows::add_formula()}} will try to diff --git a/man/details_proportional_hazards_glmnet.Rd b/man/details_proportional_hazards_glmnet.Rd index f23032f1c..19753317f 100644 --- a/man/details_proportional_hazards_glmnet.Rd +++ b/man/details_proportional_hazards_glmnet.Rd @@ -27,12 +27,16 @@ see \link{glmnet-details}. \subsection{Translation from parsnip to the original package}{ -The \strong{censored} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(censored) +The \strong{censored} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(censored) proportional_hazards(penalty = double(1), mixture = double(1)) \%>\% set_engine("glmnet") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Proportional Hazards Model Specification (censored regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Proportional Hazards Model Specification (censored regression) ## ## Main Arguments: ## penalty = 0 @@ -43,7 +47,7 @@ proportional_hazards(penalty = double(1), mixture = double(1)) \%>\% ## Model fit template: ## censored::glmnet_fit_wrapper(formula = missing_arg(), data = missing_arg(), ## family = missing_arg(), alpha = double(1)) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ @@ -73,7 +77,9 @@ This is different than the syntax offered by the recommended here. For example, in this model, the numeric column \code{rx} is used to estimate -two different baseline hazards for each value of the column:\if{html}{\out{
}}\preformatted{library(survival) +two different baseline hazards for each value of the column: + +\if{html}{\out{
}}\preformatted{library(survival) library(censored) library(dplyr) library(tidyr) @@ -89,12 +95,14 @@ pred_data <- data.frame(age = c(50, 50), ecog.ps = c(1, 1), rx = c(1, 2)) predict(mod, pred_data, type = "survival", time = 500) \%>\% bind_cols(pred_data) \%>\% unnest(.pred) -}\if{html}{\out{
}}\preformatted{## # A tibble: 2 × 5 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## # A tibble: 2 × 5 ## .time .pred_survival age ecog.ps rx ## ## 1 500 0.666 50 1 1 ## 2 500 0.769 50 1 2 -} +}\if{html}{\out{
}} Note that columns used in the \code{strata()} function \emph{will} also be estimated in the regular portion of the model (i.e., within the linear diff --git a/man/details_proportional_hazards_survival.Rd b/man/details_proportional_hazards_survival.Rd index 2f9fd6931..8a5b1306f 100644 --- a/man/details_proportional_hazards_survival.Rd +++ b/man/details_proportional_hazards_survival.Rd @@ -15,20 +15,24 @@ This model has no tuning parameters. \subsection{Translation from parsnip to the original package}{ -The \strong{censored} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(censored) +The \strong{censored} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(censored) proportional_hazards() \%>\% set_engine("survival") \%>\% set_mode("censored regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Proportional Hazards Model Specification (censored regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Proportional Hazards Model Specification (censored regression) ## ## Computational engine: survival ## ## Model fit template: ## survival::coxph(formula = missing_arg(), data = missing_arg(), ## x = TRUE, model = TRUE) -} +}\if{html}{\out{
}} } \subsection{Other details}{ @@ -45,14 +49,18 @@ hazard to differ between groups contained in the function. The column used inside \code{strata()} is treated as qualitative no matter its type. For example, in this model, the numeric column \code{rx} is used to estimate -two different baseline hazards for each value of the column:\if{html}{\out{
}}\preformatted{library(survival) +two different baseline hazards for each value of the column: + +\if{html}{\out{
}}\preformatted{library(survival) proportional_hazards() \%>\% fit(Surv(futime, fustat) ~ age + strata(rx), data = ovarian) \%>\% extract_fit_engine() \%>\% # Two different hazards for each value of 'rx' basehaz() -}\if{html}{\out{
}}\preformatted{## hazard time strata +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## hazard time strata ## 1 0.02250134 59 rx=1 ## 2 0.05088586 115 rx=1 ## 3 0.09467873 156 rx=1 @@ -79,7 +87,7 @@ proportional_hazards() \%>\% ## 24 0.50804209 1129 rx=2 ## 25 0.50804209 1206 rx=2 ## 26 0.50804209 1227 rx=2 -} +}\if{html}{\out{
}} Note that columns used in the \code{strata()} function will not be estimated in the regular portion of the model (i.e., within the linear predictor). diff --git a/man/details_rand_forest_party.Rd b/man/details_rand_forest_party.Rd index 489351502..b1786e02b 100644 --- a/man/details_rand_forest_party.Rd +++ b/man/details_rand_forest_party.Rd @@ -22,20 +22,24 @@ This model has 3 tuning parameters: \subsection{Translation from parsnip to the original package (censored regression)}{ -The \strong{censored} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(censored) +The \strong{censored} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(censored) rand_forest() \%>\% set_engine("party") \%>\% set_mode("censored regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (censored regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (censored regression) ## ## Computational engine: party ## ## Model fit template: ## censored::cond_inference_surv_cforest(formula = missing_arg(), ## data = missing_arg()) -} +}\if{html}{\out{
}} \code{censored::cond_inference_surv_cforest()} is a wrapper around \code{\link[party:cforest]{party::cforest()}} (and other functions) that makes diff --git a/man/details_rand_forest_randomForest.Rd b/man/details_rand_forest_randomForest.Rd index 8f621e72d..5f5271765 100644 --- a/man/details_rand_forest_randomForest.Rd +++ b/man/details_rand_forest_randomForest.Rd @@ -29,7 +29,9 @@ regression. default. For classification, a value of 10 is used. } -\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{rand_forest( +\subsection{Translation from parsnip to the original package (regression)}{ + +\if{html}{\out{
}}\preformatted{rand_forest( mtry = integer(1), trees = integer(1), min_n = integer(1) @@ -37,7 +39,9 @@ default. For classification, a value of 10 is used. set_engine("randomForest") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (regression) ## ## Main Arguments: ## mtry = integer(1) @@ -50,13 +54,15 @@ default. For classification, a value of 10 is used. ## randomForest::randomForest(x = missing_arg(), y = missing_arg(), ## mtry = min_cols(~integer(1), x), ntree = integer(1), nodesize = min_rows(~integer(1), ## x)) -} +}\if{html}{\out{
}} \code{min_rows()} and \code{min_cols()} will adjust the number of neighbors if the chosen value if it is not consistent with the actual data dimensions. } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{rand_forest( +\subsection{Translation from parsnip to the original package (classification)}{ + +\if{html}{\out{
}}\preformatted{rand_forest( mtry = integer(1), trees = integer(1), min_n = integer(1) @@ -64,7 +70,9 @@ chosen value if it is not consistent with the actual data dimensions. set_engine("randomForest") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (classification) ## ## Main Arguments: ## mtry = integer(1) @@ -77,7 +85,7 @@ chosen value if it is not consistent with the actual data dimensions. ## randomForest::randomForest(x = missing_arg(), y = missing_arg(), ## mtry = min_cols(~integer(1), x), ntree = integer(1), nodesize = min_rows(~integer(1), ## x)) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_rand_forest_ranger.Rd b/man/details_rand_forest_ranger.Rd index f28dd8d89..e3cefb47e 100644 --- a/man/details_rand_forest_ranger.Rd +++ b/man/details_rand_forest_ranger.Rd @@ -27,7 +27,9 @@ below) default. For classification, a value of 10 is used. } -\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{rand_forest( +\subsection{Translation from parsnip to the original package (regression)}{ + +\if{html}{\out{
}}\preformatted{rand_forest( mtry = integer(1), trees = integer(1), min_n = integer(1) @@ -35,7 +37,9 @@ default. For classification, a value of 10 is used. set_engine("ranger") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (regression) ## ## Main Arguments: ## mtry = integer(1) @@ -49,13 +53,15 @@ default. For classification, a value of 10 is used. ## mtry = min_cols(~integer(1), x), num.trees = integer(1), ## min.node.size = min_rows(~integer(1), x), num.threads = 1, ## verbose = FALSE, seed = sample.int(10^5, 1)) -} +}\if{html}{\out{
}} \code{min_rows()} and \code{min_cols()} will adjust the number of neighbors if the chosen value if it is not consistent with the actual data dimensions. } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{rand_forest( +\subsection{Translation from parsnip to the original package (classification)}{ + +\if{html}{\out{
}}\preformatted{rand_forest( mtry = integer(1), trees = integer(1), min_n = integer(1) @@ -63,7 +69,9 @@ chosen value if it is not consistent with the actual data dimensions. set_engine("ranger") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (classification) ## ## Main Arguments: ## mtry = integer(1) @@ -77,7 +85,7 @@ chosen value if it is not consistent with the actual data dimensions. ## mtry = min_cols(~integer(1), x), num.trees = integer(1), ## min.node.size = min_rows(~integer(1), x), num.threads = 1, ## verbose = FALSE, seed = sample.int(10^5, 1), probability = TRUE) -} +}\if{html}{\out{
}} Note that a \code{ranger} probability forest is always fit (unless the \code{probability} argument is changed by the user via diff --git a/man/details_rand_forest_spark.Rd b/man/details_rand_forest_spark.Rd index 7f306108c..4a791bb6b 100644 --- a/man/details_rand_forest_spark.Rd +++ b/man/details_rand_forest_spark.Rd @@ -26,7 +26,9 @@ in \code{\link[sparklyr:ml_random_forest]{sparklyr::ml_random_forest()}} is regression. } -\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{rand_forest( +\subsection{Translation from parsnip to the original package (regression)}{ + +\if{html}{\out{
}}\preformatted{rand_forest( mtry = integer(1), trees = integer(1), min_n = integer(1) @@ -34,7 +36,9 @@ regression. set_engine("spark") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (regression) ## ## Main Arguments: ## mtry = integer(1) @@ -48,13 +52,15 @@ regression. ## type = "regression", feature_subset_strategy = integer(1), ## num_trees = integer(1), min_instances_per_node = min_rows(~integer(1), ## x), seed = sample.int(10^5, 1)) -} +}\if{html}{\out{
}} \code{min_rows()} and \code{min_cols()} will adjust the number of neighbors if the chosen value if it is not consistent with the actual data dimensions. } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{rand_forest( +\subsection{Translation from parsnip to the original package (classification)}{ + +\if{html}{\out{
}}\preformatted{rand_forest( mtry = integer(1), trees = integer(1), min_n = integer(1) @@ -62,7 +68,9 @@ chosen value if it is not consistent with the actual data dimensions. set_engine("spark") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (classification) ## ## Main Arguments: ## mtry = integer(1) @@ -76,7 +84,7 @@ chosen value if it is not consistent with the actual data dimensions. ## type = "classification", feature_subset_strategy = integer(1), ## num_trees = integer(1), min_instances_per_node = min_rows(~integer(1), ## x), seed = sample.int(10^5, 1)) -} +}\if{html}{\out{
}} } \subsection{Preprocessing requirements}{ diff --git a/man/details_rule_fit_xrf.Rd b/man/details_rule_fit_xrf.Rd index 4816de6a7..f4687c460 100644 --- a/man/details_rule_fit_xrf.Rd +++ b/man/details_rule_fit_xrf.Rd @@ -30,7 +30,9 @@ default: 1.0) \subsection{Translation from parsnip to the underlying model call (regression)}{ -The \strong{rules} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(rules) +The \strong{rules} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(rules) rule_fit( mtry = numeric(1), @@ -45,7 +47,9 @@ rule_fit( set_engine("xrf") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## RuleFit Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## RuleFit Model Specification (regression) ## ## Main Arguments: ## mtry = numeric(1) @@ -64,12 +68,14 @@ rule_fit( ## colsample_bytree = numeric(1), nrounds = integer(1), min_child_weight = integer(1), ## max_depth = integer(1), eta = numeric(1), gamma = numeric(1), ## subsample = numeric(1), lambda = numeric(1)) -} +}\if{html}{\out{
}} } \subsection{Translation from parsnip to the underlying model call (classification)}{ -The \strong{rules} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(rules) +The \strong{rules} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(rules) rule_fit( mtry = numeric(1), @@ -84,7 +90,9 @@ rule_fit( set_engine("xrf") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## RuleFit Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## RuleFit Model Specification (classification) ## ## Main Arguments: ## mtry = numeric(1) @@ -103,7 +111,7 @@ rule_fit( ## colsample_bytree = numeric(1), nrounds = integer(1), min_child_weight = integer(1), ## max_depth = integer(1), eta = numeric(1), gamma = numeric(1), ## subsample = numeric(1), lambda = numeric(1)) -} +}\if{html}{\out{
}} } \subsection{Differences from the xrf package}{ diff --git a/man/details_surv_reg_flexsurv.Rd b/man/details_surv_reg_flexsurv.Rd index b654470e3..ce6804418 100644 --- a/man/details_surv_reg_flexsurv.Rd +++ b/man/details_surv_reg_flexsurv.Rd @@ -16,11 +16,15 @@ This model has 1 tuning parameters: } } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{surv_reg(dist = character(1)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +\if{html}{\out{
}}\preformatted{surv_reg(dist = character(1)) \%>\% set_engine("flexsurv") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Parametric Survival Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Parametric Survival Regression Model Specification (regression) ## ## Main Arguments: ## dist = character(1) @@ -30,7 +34,7 @@ This model has 1 tuning parameters: ## Model fit template: ## flexsurv::flexsurvreg(formula = missing_arg(), data = missing_arg(), ## weights = missing_arg(), dist = character(1)) -} +}\if{html}{\out{
}} } \subsection{Other details}{ diff --git a/man/details_surv_reg_survival.Rd b/man/details_surv_reg_survival.Rd index cc863e3c2..b57f851d8 100644 --- a/man/details_surv_reg_survival.Rd +++ b/man/details_surv_reg_survival.Rd @@ -16,11 +16,15 @@ This model has 1 tuning parameters: } } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{surv_reg(dist = character(1)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +\if{html}{\out{
}}\preformatted{surv_reg(dist = character(1)) \%>\% set_engine("survival") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Parametric Survival Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Parametric Survival Regression Model Specification (regression) ## ## Main Arguments: ## dist = character(1) @@ -30,7 +34,7 @@ This model has 1 tuning parameters: ## Model fit template: ## survival::survreg(formula = missing_arg(), data = missing_arg(), ## weights = missing_arg(), dist = character(1), model = TRUE) -} +}\if{html}{\out{
}} } \subsection{Other details}{ @@ -48,12 +52,16 @@ parameter to differ between groups contained in the function. The column used inside \code{strata()} is treated as qualitative no matter its type. For example, in this model, the numeric column \code{rx} is used to estimate -two different scale parameters for each value of the column:\if{html}{\out{
}}\preformatted{library(survival) +two different scale parameters for each value of the column: + +\if{html}{\out{
}}\preformatted{library(survival) surv_reg() \%>\% fit(Surv(futime, fustat) ~ age + strata(rx), data = ovarian) \%>\% extract_fit_engine() -}\if{html}{\out{
}}\preformatted{## Call: +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Call: ## survival::survreg(formula = Surv(futime, fustat) ~ age + strata(rx), ## data = data, model = TRUE) ## @@ -68,7 +76,7 @@ surv_reg() \%>\% ## Loglik(model)= -89.4 Loglik(intercept only)= -97.1 ## Chisq= 15.36 on 1 degrees of freedom, p= 8.88e-05 ## n= 26 -} +}\if{html}{\out{
}} } \subsection{References}{ diff --git a/man/details_survival_reg_flexsurv.Rd b/man/details_survival_reg_flexsurv.Rd index d50009458..92349f89e 100644 --- a/man/details_survival_reg_flexsurv.Rd +++ b/man/details_survival_reg_flexsurv.Rd @@ -18,13 +18,17 @@ This model has 1 tuning parameters: \subsection{Translation from parsnip to the original package}{ -The \strong{censored} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(censored) +The \strong{censored} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(censored) survival_reg(dist = character(1)) \%>\% set_engine("flexsurv") \%>\% set_mode("censored regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Parametric Survival Regression Model Specification (censored regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Parametric Survival Regression Model Specification (censored regression) ## ## Main Arguments: ## dist = character(1) @@ -34,7 +38,7 @@ survival_reg(dist = character(1)) \%>\% ## Model fit template: ## flexsurv::flexsurvreg(formula = missing_arg(), data = missing_arg(), ## weights = missing_arg(), dist = character(1)) -} +}\if{html}{\out{
}} } \subsection{Other details}{ diff --git a/man/details_survival_reg_survival.Rd b/man/details_survival_reg_survival.Rd index dab1c2c02..02d130f27 100644 --- a/man/details_survival_reg_survival.Rd +++ b/man/details_survival_reg_survival.Rd @@ -18,13 +18,17 @@ This model has 1 tuning parameters: \subsection{Translation from parsnip to the original package}{ -The \strong{censored} extension package is required to fit this model.\if{html}{\out{
}}\preformatted{library(censored) +The \strong{censored} extension package is required to fit this model. + +\if{html}{\out{
}}\preformatted{library(censored) survival_reg(dist = character(1)) \%>\% set_engine("survival") \%>\% set_mode("censored regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Parametric Survival Regression Model Specification (censored regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Parametric Survival Regression Model Specification (censored regression) ## ## Main Arguments: ## dist = character(1) @@ -34,7 +38,7 @@ survival_reg(dist = character(1)) \%>\% ## Model fit template: ## survival::survreg(formula = missing_arg(), data = missing_arg(), ## weights = missing_arg(), dist = character(1), model = TRUE) -} +}\if{html}{\out{
}} } \subsection{Other details}{ @@ -53,12 +57,16 @@ parameter to differ between groups contained in the function. The column used inside \code{strata()} is treated as qualitative no matter its type. For example, in this model, the numeric column \code{rx} is used to estimate -two different scale parameters for each value of the column:\if{html}{\out{
}}\preformatted{library(survival) +two different scale parameters for each value of the column: + +\if{html}{\out{
}}\preformatted{library(survival) survival_reg() \%>\% fit(Surv(futime, fustat) ~ age + strata(rx), data = ovarian) \%>\% extract_fit_engine() -}\if{html}{\out{
}}\preformatted{## Call: +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Call: ## survival::survreg(formula = Surv(futime, fustat) ~ age + strata(rx), ## data = data, model = TRUE) ## @@ -73,7 +81,7 @@ survival_reg() \%>\% ## Loglik(model)= -89.4 Loglik(intercept only)= -97.1 ## Chisq= 15.36 on 1 degrees of freedom, p= 8.88e-05 ## n= 26 -} +}\if{html}{\out{
}} } \subsection{Case weights}{ diff --git a/man/details_svm_linear_LiblineaR.Rd b/man/details_svm_linear_LiblineaR.Rd index 26a47467d..2e7c70f81 100644 --- a/man/details_svm_linear_LiblineaR.Rd +++ b/man/details_svm_linear_LiblineaR.Rd @@ -24,14 +24,18 @@ This engine fits models that are L2-regularized for L2-loss. In the are types 1 (classification) and 11 (regression). } -\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{svm_linear( +\subsection{Translation from parsnip to the original package (regression)}{ + +\if{html}{\out{
}}\preformatted{svm_linear( cost = double(1), margin = double(1) ) \%>\% set_engine("LiblineaR") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Linear Support Vector Machine Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Support Vector Machine Specification (regression) ## ## Main Arguments: ## cost = double(1) @@ -42,16 +46,20 @@ are types 1 (classification) and 11 (regression). ## Model fit template: ## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), C = double(1), ## svr_eps = double(1), type = 11) -} +}\if{html}{\out{
}} } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{svm_linear( +\subsection{Translation from parsnip to the original package (classification)}{ + +\if{html}{\out{
}}\preformatted{svm_linear( cost = double(1) ) \%>\% set_engine("LiblineaR") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Linear Support Vector Machine Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Support Vector Machine Specification (classification) ## ## Main Arguments: ## cost = double(1) @@ -61,7 +69,7 @@ are types 1 (classification) and 11 (regression). ## Model fit template: ## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), C = double(1), ## type = 1) -} +}\if{html}{\out{
}} The \code{margin} parameter does not apply to classification models. diff --git a/man/details_svm_linear_kernlab.Rd b/man/details_svm_linear_kernlab.Rd index e772b6680..ac377d121 100644 --- a/man/details_svm_linear_kernlab.Rd +++ b/man/details_svm_linear_kernlab.Rd @@ -20,14 +20,18 @@ This model has 2 tuning parameters: } } -\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{svm_linear( +\subsection{Translation from parsnip to the original package (regression)}{ + +\if{html}{\out{
}}\preformatted{svm_linear( cost = double(1), margin = double(1) ) \%>\% set_engine("kernlab") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Linear Support Vector Machine Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Support Vector Machine Specification (regression) ## ## Main Arguments: ## cost = double(1) @@ -38,16 +42,20 @@ This model has 2 tuning parameters: ## Model fit template: ## kernlab::ksvm(x = missing_arg(), data = missing_arg(), C = double(1), ## epsilon = double(1), kernel = "vanilladot") -} +}\if{html}{\out{
}} } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{svm_linear( +\subsection{Translation from parsnip to the original package (classification)}{ + +\if{html}{\out{
}}\preformatted{svm_linear( cost = double(1) ) \%>\% set_engine("kernlab") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Linear Support Vector Machine Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Support Vector Machine Specification (classification) ## ## Main Arguments: ## cost = double(1) @@ -57,7 +65,7 @@ This model has 2 tuning parameters: ## Model fit template: ## kernlab::ksvm(x = missing_arg(), data = missing_arg(), C = double(1), ## kernel = "vanilladot", prob.model = TRUE) -} +}\if{html}{\out{
}} The \code{margin} parameter does not apply to classification models. diff --git a/man/details_svm_poly_kernlab.Rd b/man/details_svm_poly_kernlab.Rd index 546fe1e81..6896183fc 100644 --- a/man/details_svm_poly_kernlab.Rd +++ b/man/details_svm_poly_kernlab.Rd @@ -22,7 +22,9 @@ This model has 4 tuning parameters: } } -\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{svm_poly( +\subsection{Translation from parsnip to the original package (regression)}{ + +\if{html}{\out{
}}\preformatted{svm_poly( cost = double(1), degree = integer(1), scale_factor = double(1), @@ -31,7 +33,9 @@ This model has 4 tuning parameters: set_engine("kernlab") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Polynomial Support Vector Machine Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Polynomial Support Vector Machine Specification (regression) ## ## Main Arguments: ## cost = double(1) @@ -45,10 +49,12 @@ This model has 4 tuning parameters: ## kernlab::ksvm(x = missing_arg(), data = missing_arg(), C = double(1), ## epsilon = double(1), kernel = "polydot", kpar = list(degree = ~integer(1), ## scale = ~double(1))) -} +}\if{html}{\out{
}} } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{svm_poly( +\subsection{Translation from parsnip to the original package (classification)}{ + +\if{html}{\out{
}}\preformatted{svm_poly( cost = double(1), degree = integer(1), scale_factor = double(1) @@ -56,7 +62,9 @@ This model has 4 tuning parameters: set_engine("kernlab") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Polynomial Support Vector Machine Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Polynomial Support Vector Machine Specification (classification) ## ## Main Arguments: ## cost = double(1) @@ -69,7 +77,7 @@ This model has 4 tuning parameters: ## kernlab::ksvm(x = missing_arg(), data = missing_arg(), C = double(1), ## kernel = "polydot", prob.model = TRUE, kpar = list(degree = ~integer(1), ## scale = ~double(1))) -} +}\if{html}{\out{
}} The \code{margin} parameter does not apply to classification models. diff --git a/man/details_svm_rbf_kernlab.Rd b/man/details_svm_rbf_kernlab.Rd index c3ff8ad24..640a24c8d 100644 --- a/man/details_svm_rbf_kernlab.Rd +++ b/man/details_svm_rbf_kernlab.Rd @@ -28,7 +28,9 @@ numbers so, without setting the seed before fitting, the model will not be reproducible. } -\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{svm_rbf( +\subsection{Translation from parsnip to the original package (regression)}{ + +\if{html}{\out{
}}\preformatted{svm_rbf( cost = double(1), rbf_sigma = double(1), margin = double(1) @@ -36,7 +38,9 @@ be reproducible. set_engine("kernlab") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Radial Basis Function Support Vector Machine Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Radial Basis Function Support Vector Machine Specification (regression) ## ## Main Arguments: ## cost = double(1) @@ -48,17 +52,21 @@ be reproducible. ## Model fit template: ## kernlab::ksvm(x = missing_arg(), data = missing_arg(), C = double(1), ## epsilon = double(1), kernel = "rbfdot", kpar = list(sigma = ~double(1))) -} +}\if{html}{\out{
}} } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{svm_rbf( +\subsection{Translation from parsnip to the original package (classification)}{ + +\if{html}{\out{
}}\preformatted{svm_rbf( cost = double(1), rbf_sigma = double(1) ) \%>\% set_engine("kernlab") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Radial Basis Function Support Vector Machine Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Radial Basis Function Support Vector Machine Specification (classification) ## ## Main Arguments: ## cost = double(1) @@ -69,7 +77,7 @@ be reproducible. ## Model fit template: ## kernlab::ksvm(x = missing_arg(), data = missing_arg(), C = double(1), ## kernel = "rbfdot", prob.model = TRUE, kpar = list(sigma = ~double(1))) -} +}\if{html}{\out{
}} The \code{margin} parameter does not apply to classification models. diff --git a/man/extract-parsnip.Rd b/man/extract-parsnip.Rd index 26520f85d..a9a8d8b60 100644 --- a/man/extract-parsnip.Rd +++ b/man/extract-parsnip.Rd @@ -49,10 +49,14 @@ model. There may be preprocessing operations that \code{parsnip} has executed on the data prior to giving it to the model. Bypassing these can lead to errors or silently generating incorrect predictions. -\strong{Good}:\if{html}{\out{
}}\preformatted{ parsnip_fit \%>\% predict(new_data) +\strong{Good}: + +\if{html}{\out{
}}\preformatted{ parsnip_fit \%>\% predict(new_data) }\if{html}{\out{
}} -\strong{Bad}:\if{html}{\out{
}}\preformatted{ parsnip_fit \%>\% extract_fit_engine() \%>\% predict(new_data) +\strong{Bad}: + +\if{html}{\out{
}}\preformatted{ parsnip_fit \%>\% extract_fit_engine() \%>\% predict(new_data) }\if{html}{\out{
}} } \examples{ diff --git a/man/glmnet-details.Rd b/man/glmnet-details.Rd index eb6dac1f8..f45308bd0 100644 --- a/man/glmnet-details.Rd +++ b/man/glmnet-details.Rd @@ -33,11 +33,15 @@ In tidymodels, our \code{predict()} methods are defined to make one prediction at a time. For this model, that means predictions are for a single penalty value. For this reason, models that have glmnet engines require the user to always specify a single penalty value when the model -is defined. For example, for linear regression:\if{html}{\out{
}}\preformatted{linear_reg(penalty = 1) \%>\% set_engine("glmnet") +is defined. For example, for linear regression: + +\if{html}{\out{
}}\preformatted{linear_reg(penalty = 1) \%>\% set_engine("glmnet") }\if{html}{\out{
}} When the \code{predict()} method is called, it automatically uses the penalty -that was given when the model was defined. For example:\if{html}{\out{
}}\preformatted{library(tidymodels) +that was given when the model was defined. For example: + +\if{html}{\out{
}}\preformatted{library(tidymodels) fit <- linear_reg(penalty = 1) \%>\% @@ -46,28 +50,38 @@ fit <- # predict at penalty = 1 predict(fit, mtcars[1:3,]) -}\if{html}{\out{
}}\preformatted{## # A tibble: 3 × 1 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## # A tibble: 3 × 1 ## .pred ## ## 1 22.2 ## 2 21.5 ## 3 24.9 -} +}\if{html}{\out{
}} However, any penalty values can be predicted simultaneously using the -\code{multi_predict()} method:\if{html}{\out{
}}\preformatted{# predict at c(0.00, 0.01) +\code{multi_predict()} method: + +\if{html}{\out{
}}\preformatted{# predict at c(0.00, 0.01) multi_predict(fit, mtcars[1:3,], penalty = c(0.00, 0.01)) -}\if{html}{\out{
}}\preformatted{## # A tibble: 3 × 1 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## # A tibble: 3 × 1 ## .pred ## ## 1 ## 2 ## 3 -}\if{html}{\out{
}}\preformatted{# unnested: +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{# unnested: multi_predict(fit, mtcars[1:3,], penalty = c(0.00, 0.01)) \%>\% add_rowindex() \%>\% unnest(cols = ".pred") -}\if{html}{\out{
}}\preformatted{## # A tibble: 6 × 3 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## # A tibble: 6 × 3 ## penalty .pred .row ## ## 1 0 22.6 1 @@ -76,13 +90,17 @@ multi_predict(fit, mtcars[1:3,], penalty = c(0.00, 0.01)) \%>\% ## 4 0.01 22.1 2 ## 5 0 26.3 3 ## 6 0.01 26.3 3 -} +}\if{html}{\out{
}} \subsection{Where did \code{lambda} go?}{ -It may appear odd that the \code{lambda} value does not get used in the fit:\if{html}{\out{
}}\preformatted{linear_reg(penalty = 1) \%>\% +It may appear odd that the \code{lambda} value does not get used in the fit: + +\if{html}{\out{
}}\preformatted{linear_reg(penalty = 1) \%>\% set_engine("glmnet") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) ## ## Main Arguments: ## penalty = 1 @@ -92,7 +110,7 @@ It may appear odd that the \code{lambda} value does not get used in the fit:\if{ ## Model fit template: ## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(), ## family = "gaussian") -} +}\if{html}{\out{
}} Internally, the value of \code{penalty = 1} is saved in the parsnip object and no value is set for \code{lambda}. This enables the full path to be fit @@ -119,7 +137,9 @@ model (i.e., \code{mixture = 0}), you can get the \emph{wrong coefficients} if t path does not contain zero (see \href{https://github.com/tidymodels/parsnip/issues/431#issuecomment-782883848}{issue #431}). If we want to use our own path, the argument is passed as an -engine-specific option:\if{html}{\out{
}}\preformatted{coef_path_values <- c(0, 10^seq(-5, 1, length.out = 7)) +engine-specific option: + +\if{html}{\out{
}}\preformatted{coef_path_values <- c(0, 10^seq(-5, 1, length.out = 7)) fit_ridge <- linear_reg(penalty = 1, mixture = 0) \%>\% @@ -127,16 +147,22 @@ fit_ridge <- fit(mpg ~ ., data = mtcars) all.equal(sort(fit_ridge$fit$lambda), coef_path_values) -}\if{html}{\out{
}}\preformatted{## [1] TRUE -}\if{html}{\out{
}}\preformatted{# predict at penalty = 1 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## [1] TRUE +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{# predict at penalty = 1 predict(fit_ridge, mtcars[1:3,]) -}\if{html}{\out{
}}\preformatted{## # A tibble: 3 × 1 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## # A tibble: 3 × 1 ## .pred ## ## 1 22.1 ## 2 21.8 ## 3 26.6 -} +}\if{html}{\out{
}} } \subsection{Tidying the model object}{ @@ -149,8 +175,12 @@ coefficients for the value given by \code{penalty}. When parsnip makes a model, it gives it an extra class. Use the \code{tidy()} method on the object, it produces coefficients for the penalty that was -originally requested:\if{html}{\out{
}}\preformatted{tidy(fit) -}\if{html}{\out{
}}\preformatted{## # A tibble: 11 × 3 +originally requested: + +\if{html}{\out{
}}\preformatted{tidy(fit) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## # A tibble: 11 × 3 ## term estimate penalty ## ## 1 (Intercept) 35.3 1 @@ -164,14 +194,18 @@ originally requested:\if{html}{\out{
}}\preformatted{ti ## 9 am 0 1 ## 10 gear 0 1 ## 11 carb 0 1 -} +}\if{html}{\out{
}} Note that there is a \code{tidy()} method for \code{glmnet} objects in the \code{broom} package. If this is used directly on the underlying \code{glmnet} object, it -returns \emph{all of coefficients on the path}:\if{html}{\out{
}}\preformatted{# Use the basic tidy() method for glmnet +returns \emph{all of coefficients on the path}: + +\if{html}{\out{
}}\preformatted{# Use the basic tidy() method for glmnet all_tidy_coefs <- broom:::tidy.glmnet(fit$fit) all_tidy_coefs -}\if{html}{\out{
}}\preformatted{## # A tibble: 640 × 5 +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## # A tibble: 640 × 5 ## term step estimate lambda dev.ratio ## ## 1 (Intercept) 1 20.1 5.15 0 @@ -185,9 +219,13 @@ all_tidy_coefs ## 9 (Intercept) 9 30.3 2.45 0.640 ## 10 (Intercept) 10 31.1 2.23 0.673 ## # … with 630 more rows -}\if{html}{\out{
}}\preformatted{length(unique(all_tidy_coefs$lambda)) -}\if{html}{\out{
}}\preformatted{## [1] 79 -} +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{length(unique(all_tidy_coefs$lambda)) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## [1] 79 +}\if{html}{\out{
}} This can be nice for plots but it might not contain the penalty value that you are interested in. diff --git a/man/null_model.Rd b/man/null_model.Rd index 976e51585..dc463bdde 100644 --- a/man/null_model.Rd +++ b/man/null_model.Rd @@ -25,27 +25,35 @@ following \emph{engines}: \section{Engine Details}{ Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are below: -\subsection{parsnip}{\if{html}{\out{
}}\preformatted{null_model() \%>\% +\subsection{parsnip}{ + +\if{html}{\out{
}}\preformatted{null_model() \%>\% set_engine("parsnip") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Model Specification (regression) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Model Specification (regression) ## ## Computational engine: parsnip ## ## Model fit template: ## nullmodel(x = missing_arg(), y = missing_arg()) -}\if{html}{\out{
}}\preformatted{null_model() \%>\% +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{null_model() \%>\% set_engine("parsnip") \%>\% set_mode("classification") \%>\% translate() -}\if{html}{\out{
}}\preformatted{## Model Specification (classification) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Model Specification (classification) ## ## Computational engine: parsnip ## ## Model fit template: ## nullmodel(x = missing_arg(), y = missing_arg()) -} +}\if{html}{\out{
}} } } diff --git a/man/parsnip_update.Rd b/man/parsnip_update.Rd index 046a88155..2788682b1 100644 --- a/man/parsnip_update.Rd +++ b/man/parsnip_update.Rd @@ -382,9 +382,7 @@ regularization used by some of the engines.} estimation. Possible values are: "\code{diagonal}", "\code{min_distance}", "\code{shrink_cov}", and "\code{shrink_mean}" (\code{sparsediscrim} engine only).} -\item{frac_common_cov}{Numeric values between zero and one.} - -\item{frac_identity}{Numeric values between zero and one.} +\item{frac_common_cov, frac_identity}{Numeric values between zero and one.} \item{select_features}{\code{TRUE} or \code{FALSE.} If \code{TRUE}, the model has the ability to eliminate a predictor (via penalization). Increasing diff --git a/man/reexports.Rd b/man/reexports.Rd index b498b5b7e..9622b3c65 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -28,10 +28,34 @@ below to see their documentation. \describe{ \item{generics}{\code{\link[generics]{augment}}, \code{\link[generics]{fit}}, \code{\link[generics]{fit_xy}}, \code{\link[generics]{glance}}, \code{\link[generics]{required_pkgs}}, \code{\link[generics]{tidy}}, \code{\link[generics]{varying_args}}} + \item{generics}{\code{\link[generics]{augment}}, \code{\link[generics]{fit}}, \code{\link[generics]{fit_xy}}, \code{\link[generics]{glance}}, \code{\link[generics]{required_pkgs}}, \code{\link[generics]{tidy}}, \code{\link[generics]{varying_args}}} + + \item{generics}{\code{\link[generics]{augment}}, \code{\link[generics]{fit}}, \code{\link[generics]{fit_xy}}, \code{\link[generics]{glance}}, \code{\link[generics]{required_pkgs}}, \code{\link[generics]{tidy}}, \code{\link[generics]{varying_args}}} + + \item{generics}{\code{\link[generics]{augment}}, \code{\link[generics]{fit}}, \code{\link[generics]{fit_xy}}, \code{\link[generics]{glance}}, \code{\link[generics]{required_pkgs}}, \code{\link[generics]{tidy}}, \code{\link[generics]{varying_args}}} + + \item{generics}{\code{\link[generics]{augment}}, \code{\link[generics]{fit}}, \code{\link[generics]{fit_xy}}, \code{\link[generics]{glance}}, \code{\link[generics]{required_pkgs}}, \code{\link[generics]{tidy}}, \code{\link[generics]{varying_args}}} + + \item{generics}{\code{\link[generics]{augment}}, \code{\link[generics]{fit}}, \code{\link[generics]{fit_xy}}, \code{\link[generics]{glance}}, \code{\link[generics]{required_pkgs}}, \code{\link[generics]{tidy}}, \code{\link[generics]{varying_args}}} + + \item{generics}{\code{\link[generics]{augment}}, \code{\link[generics]{fit}}, \code{\link[generics]{fit_xy}}, \code{\link[generics]{glance}}, \code{\link[generics]{required_pkgs}}, \code{\link[generics]{tidy}}, \code{\link[generics]{varying_args}}} + \item{ggplot2}{\code{\link[ggplot2]{autoplot}}} \item{hardhat}{\code{\link[hardhat:hardhat-extract]{extract_fit_engine}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_dials}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_set_dials}}, \code{\link[hardhat:hardhat-extract]{extract_spec_parsnip}}, \code{\link[hardhat]{frequency_weights}}, \code{\link[hardhat]{importance_weights}}, \code{\link[hardhat]{tune}}} + \item{hardhat}{\code{\link[hardhat:hardhat-extract]{extract_fit_engine}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_dials}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_set_dials}}, \code{\link[hardhat:hardhat-extract]{extract_spec_parsnip}}, \code{\link[hardhat]{frequency_weights}}, \code{\link[hardhat]{importance_weights}}, \code{\link[hardhat]{tune}}} + + \item{hardhat}{\code{\link[hardhat:hardhat-extract]{extract_fit_engine}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_dials}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_set_dials}}, \code{\link[hardhat:hardhat-extract]{extract_spec_parsnip}}, \code{\link[hardhat]{frequency_weights}}, \code{\link[hardhat]{importance_weights}}, \code{\link[hardhat]{tune}}} + + \item{hardhat}{\code{\link[hardhat:hardhat-extract]{extract_fit_engine}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_dials}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_set_dials}}, \code{\link[hardhat:hardhat-extract]{extract_spec_parsnip}}, \code{\link[hardhat]{frequency_weights}}, \code{\link[hardhat]{importance_weights}}, \code{\link[hardhat]{tune}}} + + \item{hardhat}{\code{\link[hardhat:hardhat-extract]{extract_fit_engine}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_dials}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_set_dials}}, \code{\link[hardhat:hardhat-extract]{extract_spec_parsnip}}, \code{\link[hardhat]{frequency_weights}}, \code{\link[hardhat]{importance_weights}}, \code{\link[hardhat]{tune}}} + + \item{hardhat}{\code{\link[hardhat:hardhat-extract]{extract_fit_engine}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_dials}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_set_dials}}, \code{\link[hardhat:hardhat-extract]{extract_spec_parsnip}}, \code{\link[hardhat]{frequency_weights}}, \code{\link[hardhat]{importance_weights}}, \code{\link[hardhat]{tune}}} + + \item{hardhat}{\code{\link[hardhat:hardhat-extract]{extract_fit_engine}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_dials}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_set_dials}}, \code{\link[hardhat:hardhat-extract]{extract_spec_parsnip}}, \code{\link[hardhat]{frequency_weights}}, \code{\link[hardhat]{importance_weights}}, \code{\link[hardhat]{tune}}} + \item{magrittr}{\code{\link[magrittr:pipe]{\%>\%}}} }} From 68e5c97771dd6c9410397d29ea5dd5488b18ae63 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Thu, 21 Apr 2022 12:37:30 -0400 Subject: [PATCH 37/41] get xgb to stop being so chatty --- tests/testthat/test_boost_tree_xgboost.R | 40 +++++++++++++++++++++--- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test_boost_tree_xgboost.R b/tests/testthat/test_boost_tree_xgboost.R index a53e15f06..690a8a0a1 100644 --- a/tests/testthat/test_boost_tree_xgboost.R +++ b/tests/testthat/test_boost_tree_xgboost.R @@ -12,6 +12,8 @@ test_that('xgboost execution, classification', { skip_if_not_installed("xgboost") + ctrl$verbosity <- 0L + set.seed(1) wts <- ifelse(runif(nrow(hpc)) < .1, 0, 1) wts <- importance_weights(wts) @@ -87,6 +89,9 @@ test_that('xgboost classification prediction', { skip_if_not_installed("xgboost") library(xgboost) + + ctrl$verbosity <- 0L + xy_fit <- fit_xy( hpc_xgboost, x = hpc[, num_pred], @@ -133,6 +138,8 @@ test_that('xgboost execution, regression', { skip_if_not_installed("xgboost") + ctrl$verbosity <- 0L + expect_error( res <- parsnip::fit( car_basic, @@ -170,6 +177,8 @@ test_that('xgboost regression prediction', { skip_if_not_installed("xgboost") + ctrl$verbosity <- 0L + xy_fit <- fit_xy( car_basic, x = mtcars[, -1], @@ -199,6 +208,8 @@ test_that('xgboost regression prediction', { test_that('xgboost alternate objective', { skip_if_not_installed("xgboost") + ctrl$verbosity <- 0L + spec <- boost_tree() %>% set_engine("xgboost", objective = "reg:pseudohubererror") %>% @@ -213,6 +224,8 @@ test_that('submodel prediction', { skip_if_not_installed("xgboost") + ctrl$verbosity <- 0L + reg_fit <- boost_tree(trees = 20, mode = "regression") %>% set_engine("xgboost") %>% @@ -231,7 +244,7 @@ test_that('submodel prediction', { class_fit <- boost_tree(trees = 20, mode = "classification") %>% set_engine("xgboost") %>% - fit(churn ~ ., data = wa_churn[-(1:4), c("churn", vars)]) + fit(churn ~ ., data = wa_churn[-(1:4), c("churn", vars)], control = ctrl) x <- xgboost::xgb.DMatrix(as.matrix(wa_churn[1:4, vars])) @@ -251,6 +264,9 @@ test_that('submodel prediction', { test_that('validation sets', { skip_if_not_installed("xgboost") + + ctrl$verbosity <- 0L + expect_error( reg_fit <- boost_tree(trees = 20, mode = "regression") %>% @@ -296,6 +312,9 @@ test_that('validation sets', { test_that('early stopping', { skip_if_not_installed("xgboost") + + ctrl$verbosity <- 0L + set.seed(233456) expect_error( reg_fit <- @@ -395,6 +414,8 @@ test_that('xgboost data conversion', { test_that('xgboost data and sparse matrices', { skip_if_not_installed("xgboost") + ctrl$verbosity <- 0L + mtcar_x <- mtcars[, -1] mtcar_mat <- as.matrix(mtcar_x) mtcar_smat <- Matrix::Matrix(mtcar_mat, sparse = TRUE) @@ -436,6 +457,8 @@ test_that('argument checks for data dimensions', { skip_if_not_installed("xgboost") + ctrl$verbosity <- 0L + data(penguins, package = "modeldata") penguins <- na.omit(penguins) @@ -448,11 +471,11 @@ test_that('argument checks for data dimensions', { penguins_dummy <- as.data.frame(penguins_dummy[, -1]) expect_warning( - f_fit <- spec %>% fit(species ~ ., data = penguins), + f_fit <- spec %>% fit(species ~ ., data = penguins, control = ctrl), "1000 samples were requested" ) expect_warning( - xy_fit <- spec %>% fit_xy(x = penguins_dummy, y = penguins$species), + xy_fit <- spec %>% fit_xy(x = penguins_dummy, y = penguins$species, control = ctrl), "1000 samples were requested" ) expect_equal(extract_fit_engine(f_fit)$params$colsample_bynode, 1) @@ -466,6 +489,8 @@ test_that("fit and prediction with `event_level`", { skip_if_not_installed("xgboost") + ctrl$verbosity <- 0L + data(penguins, package = "modeldata") penguins <- na.omit(penguins[, -c(1:2)]) @@ -491,7 +516,8 @@ test_that("fit and prediction with `event_level`", { nrounds = 10, watchlist = list("training" = xgbmat_train_1), objective = "binary:logistic", - eval_metric = "auc") + eval_metric = "auc", + verbose = 0) expect_equal(extract_fit_engine(fit_p_1)$evaluation_log, fit_xgb_1$evaluation_log) @@ -514,7 +540,8 @@ test_that("fit and prediction with `event_level`", { nrounds = 10, watchlist = list("training" = xgbmat_train_2), objective = "binary:logistic", - eval_metric = "auc") + eval_metric = "auc", + verbose = 0) expect_equal(extract_fit_engine(fit_p_2)$evaluation_log, fit_xgb_2$evaluation_log) @@ -526,6 +553,9 @@ test_that("fit and prediction with `event_level`", { test_that("count/proportion parameters", { skip_if_not_installed("xgboost") + + ctrl$verbosity <- 0L + fit1 <- boost_tree(mtry = 7, trees = 4) %>% set_engine("xgboost") %>% From 284252bacbf00a325d657c29f16704745d08e8d5 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Thu, 21 Apr 2022 13:32:42 -0400 Subject: [PATCH 38/41] update snapshots --- tests/testthat/_snaps/translate.md | 73 +++++++++++------------------- 1 file changed, 26 insertions(+), 47 deletions(-) diff --git a/tests/testthat/_snaps/translate.md b/tests/testthat/_snaps/translate.md index 9bf0cf123..e1eb66a7b 100644 --- a/tests/testthat/_snaps/translate.md +++ b/tests/testthat/_snaps/translate.md @@ -9,6 +9,9 @@ $y missing_arg() + $weights + missing_arg() + $nthread [1] 1 @@ -62,6 +65,9 @@ $y missing_arg() + $weights + missing_arg() + $print_every_n expr: ^10L @@ -105,6 +111,9 @@ $y missing_arg() + $weights + missing_arg() + $nrounds expr: ^15 @@ -148,6 +157,9 @@ $y missing_arg() + $weights + missing_arg() + $min_child_weight expr: ^15 @@ -462,7 +474,7 @@ $formula missing_arg() - $weight_col + $weights missing_arg() @@ -477,7 +489,7 @@ $formula missing_arg() - $weight_col + $weights missing_arg() $max_iter @@ -519,7 +531,7 @@ $formula missing_arg() - $weight_col + $weights missing_arg() $elastic_net_param @@ -539,7 +551,7 @@ $formula missing_arg() - $weight_col + $weights missing_arg() $elastic_net_param @@ -611,7 +623,7 @@ $formula missing_arg() - $weight_col + $weights missing_arg() $reg_param @@ -680,9 +692,6 @@ $y missing_arg() - $wi - missing_arg() - $verbose [1] FALSE @@ -698,9 +707,6 @@ $y missing_arg() - $wi - missing_arg() - $bias expr: ^0 @@ -773,7 +779,7 @@ $formula missing_arg() - $weight_col + $weights missing_arg() $family @@ -791,7 +797,7 @@ $formula missing_arg() - $weight_col + $weights missing_arg() $max_iter @@ -825,7 +831,7 @@ $formula missing_arg() - $weight_col + $weights missing_arg() $elastic_net_param @@ -912,9 +918,6 @@ $y missing_arg() - $wi - missing_arg() - $cost expr: ^1 @@ -935,7 +938,7 @@ $formula missing_arg() - $weight_col + $weights missing_arg() $reg_param @@ -969,9 +972,6 @@ $y missing_arg() - $wi - missing_arg() - $type expr: ^tune() @@ -992,7 +992,7 @@ $formula missing_arg() - $weight_col + $weights missing_arg() $elastic_net_param @@ -1127,9 +1127,6 @@ $data missing_arg() - $weights - missing_arg() - $size expr: ^4 @@ -1170,9 +1167,6 @@ $data missing_arg() - $weights - missing_arg() - $size [1] 5 @@ -1194,9 +1188,6 @@ $data missing_arg() - $weights - missing_arg() - $size [1] 5 @@ -1240,9 +1231,6 @@ $data missing_arg() - $weights - missing_arg() - $size [1] 5 @@ -1269,9 +1257,6 @@ $data missing_arg() - $weights - missing_arg() - $size expr: ^4 @@ -1593,7 +1578,7 @@ $y missing_arg() - $case.weights + $weights missing_arg() $mtry @@ -1656,7 +1641,7 @@ $y missing_arg() - $case.weights + $weights missing_arg() $num.trees @@ -1688,7 +1673,7 @@ $y missing_arg() - $case.weights + $weights missing_arg() $num.trees @@ -1765,7 +1750,7 @@ $y missing_arg() - $case.weights + $weights missing_arg() $min.node.size @@ -1903,9 +1888,6 @@ $y missing_arg() - $wi - missing_arg() - $type [1] 11 @@ -1924,9 +1906,6 @@ $y missing_arg() - $wi - missing_arg() - $type expr: ^12 From 4cb16cf43cd053bedcbe3bb3c42205e0a0019e77 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Thu, 19 May 2022 14:26:21 -0400 Subject: [PATCH 39/41] doc update --- inst/models.tsv | 2 -- man/rmd/boost_tree_lightgbm.md | 10 ++++++---- man/rmd/decision_tree_partykit.md | 6 +++--- man/rmd/gen_additive_mod_mgcv.md | 8 ++++---- man/rmd/proportional_hazards_glmnet.md | 9 ++++++++- man/rmd/rand_forest_partykit.md | 6 +++--- 6 files changed, 24 insertions(+), 17 deletions(-) diff --git a/inst/models.tsv b/inst/models.tsv index 14ca78838..674052438 100644 --- a/inst/models.tsv +++ b/inst/models.tsv @@ -18,7 +18,6 @@ "C5_rules" "classification" "C5.0" "rules" "cubist_rules" "regression" "Cubist" "rules" "decision_tree" "censored regression" "partykit" "censored" -"decision_tree" "censored regression" "partykit" "bonsai" "decision_tree" "censored regression" "rpart" "censored" "decision_tree" "classification" "C5.0" NA "decision_tree" "classification" "partykit" "bonsai" @@ -91,7 +90,6 @@ "proportional_hazards" "censored regression" "glmnet" "censored" "proportional_hazards" "censored regression" "survival" "censored" "rand_forest" "censored regression" "partykit" "censored" -"rand_forest" "censored regression" "partykit" "bonsai" "rand_forest" "classification" "partykit" "bonsai" "rand_forest" "classification" "randomForest" NA "rand_forest" "classification" "ranger" NA diff --git a/man/rmd/boost_tree_lightgbm.md b/man/rmd/boost_tree_lightgbm.md index d98e89f8c..45883bcef 100644 --- a/man/rmd/boost_tree_lightgbm.md +++ b/man/rmd/boost_tree_lightgbm.md @@ -29,7 +29,7 @@ Note that parsnip's translation can be overridden via the `counts` argument, sup ## Translation from parsnip to the original package (regression) - +The **bonsai** extension package is required to fit this model. ```r @@ -59,12 +59,13 @@ boost_tree( ## bonsai::train_lightgbm(x = missing_arg(), y = missing_arg(), ## feature_fraction = integer(), num_iterations = integer(), ## min_data_in_leaf = integer(), max_depth = integer(), learning_rate = numeric(), -## min_gain_to_split = numeric(), verbose = -1) +## min_gain_to_split = numeric(), verbose = -1, num_threads = 0, +## seed = sample.int(10^5, 1), deterministic = TRUE) ``` ## Translation from parsnip to the original package (classification) - +The **bonsai** extension package is required to fit this model. ```r @@ -94,7 +95,8 @@ boost_tree( ## bonsai::train_lightgbm(x = missing_arg(), y = missing_arg(), ## feature_fraction = integer(), num_iterations = integer(), ## min_data_in_leaf = integer(), max_depth = integer(), learning_rate = numeric(), -## min_gain_to_split = numeric(), verbose = -1) +## min_gain_to_split = numeric(), verbose = -1, num_threads = 0, +## seed = sample.int(10^5, 1), deterministic = TRUE) ``` [train_lightgbm()] is a wrapper around [lightgbm::lgb.train()] (and other functions) that make it easier to run this model. diff --git a/man/rmd/decision_tree_partykit.md b/man/rmd/decision_tree_partykit.md index 645dfd2c1..d7ab63a0e 100644 --- a/man/rmd/decision_tree_partykit.md +++ b/man/rmd/decision_tree_partykit.md @@ -21,7 +21,7 @@ An engine-specific parameter for this model is: ## Translation from parsnip to the original package (regression) - +The **bonsai** extension package is required to fit this model. ```r @@ -50,7 +50,7 @@ decision_tree(tree_depth = integer(1), min_n = integer(1)) %>% ## Translation from parsnip to the original package (classification) - +The **bonsai** extension package is required to fit this model. ```r @@ -81,7 +81,7 @@ decision_tree(tree_depth = integer(1), min_n = integer(1)) %>% ## Translation from parsnip to the original package (censored regression) - +The **censored** extension package is required to fit this model. ```r diff --git a/man/rmd/gen_additive_mod_mgcv.md b/man/rmd/gen_additive_mod_mgcv.md index f2c72b0d1..9f9ec1cf9 100644 --- a/man/rmd/gen_additive_mod_mgcv.md +++ b/man/rmd/gen_additive_mod_mgcv.md @@ -35,8 +35,8 @@ gen_additive_mod(adjust_deg_free = numeric(1), select_features = logical(1)) %>% ## Computational engine: mgcv ## ## Model fit template: -## mgcv::gam(formula = missing_arg(), data = missing_arg(), select = logical(1), -## gamma = numeric(1)) +## mgcv::gam(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), +## select = logical(1), gamma = numeric(1)) ``` ## Translation from parsnip to the original package (classification) @@ -59,8 +59,8 @@ gen_additive_mod(adjust_deg_free = numeric(1), select_features = logical(1)) %>% ## Computational engine: mgcv ## ## Model fit template: -## mgcv::gam(formula = missing_arg(), data = missing_arg(), select = logical(1), -## gamma = numeric(1), family = stats::binomial(link = "logit")) +## mgcv::gam(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), +## select = logical(1), gamma = numeric(1), family = stats::binomial(link = "logit")) ``` ## Model fitting diff --git a/man/rmd/proportional_hazards_glmnet.md b/man/rmd/proportional_hazards_glmnet.md index 269602a7f..8e5fe8c39 100644 --- a/man/rmd/proportional_hazards_glmnet.md +++ b/man/rmd/proportional_hazards_glmnet.md @@ -43,7 +43,7 @@ proportional_hazards(penalty = double(1), mixture = double(1)) %>% ## ## Model fit template: ## censored::glmnet_fit_wrapper(formula = missing_arg(), data = missing_arg(), -## family = missing_arg(), alpha = double(1)) +## alpha = double(1)) ``` ## Preprocessing requirements @@ -107,6 +107,13 @@ tidymodels does not treat different models differently when computing performanc This behavior can be changed by using the `increasing` argument when calling `predict()` on a \pkg{parsnip} model object. +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + # References - Simon N, Friedman J, Hastie T, Tibshirani R. 2011. "Regularization Paths for Cox’s Proportional Hazards Model via Coordinate Descent." _Journal of Statistical Software_, Articles 39 (5): 1–13. \doi{10.18637/jss.v039.i05}. diff --git a/man/rmd/rand_forest_partykit.md b/man/rmd/rand_forest_partykit.md index 47c15ed8b..6fa48e2d7 100644 --- a/man/rmd/rand_forest_partykit.md +++ b/man/rmd/rand_forest_partykit.md @@ -17,7 +17,7 @@ This model has 3 tuning parameters: ## Translation from parsnip to the original package (regression) - +The **bonsai** extension package is required to fit this model. ```r @@ -41,7 +41,7 @@ rand_forest() %>% ## Translation from parsnip to the original package (classification) - +The **bonsai** extension package is required to fit this model. ```r @@ -67,7 +67,7 @@ rand_forest() %>% # Translation from parsnip to the original package (censored regression) - +The **censored** extension package is required to fit this model. ```r From f2f24a0eb2d52a8efbf2f0b795d9bd300895f38b Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Thu, 19 May 2022 14:27:42 -0400 Subject: [PATCH 40/41] missing doc entry --- _pkgdown.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/_pkgdown.yml b/_pkgdown.yml index 57b7fd750..5bd30c4bc 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -94,6 +94,7 @@ reference: - title: Developer tools contents: - contr_one_hot + - convert_case_weights - set_new_model - maybe_matrix - min_cols From a6e7849041e4f9948478b517796b4b7b21724c72 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 6 Jun 2022 07:02:49 -0400 Subject: [PATCH 41/41] remove convert_case_weights --- NAMESPACE | 6 +---- R/case_weights.R | 44 ------------------------------------- man/convert_case_weights.Rd | 30 ------------------------- 3 files changed, 1 insertion(+), 79 deletions(-) delete mode 100644 man/convert_case_weights.Rd diff --git a/NAMESPACE b/NAMESPACE index 2e5038ae7..d0e378058 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,6 @@ # Generated by roxygen2: do not edit by hand S3method(augment,model_fit) -S3method(convert_case_weights,default) -S3method(convert_case_weights,hardhat_frequency_weights) -S3method(convert_case_weights,hardhat_importance_weights) S3method(extract_fit_engine,model_fit) S3method(extract_parameter_dials,model_spec) S3method(extract_parameter_set_dials,model_spec) @@ -181,7 +178,6 @@ export(check_model_doesnt_exist) export(check_model_exists) export(contr_one_hot) export(control_parsnip) -export(convert_case_weights) export(convert_stan_interval) export(ctree_train) export(cubist_rules) @@ -202,7 +198,6 @@ export(fit.model_spec) export(fit_control) export(fit_xy) export(fit_xy.model_spec) -export(frequency_weights) export(format_class) export(format_classprobs) export(format_hazard) @@ -210,6 +205,7 @@ export(format_linear_pred) export(format_num) export(format_survival) export(format_time) +export(frequency_weights) export(gen_additive_mod) export(get_dependency) export(get_encoding) diff --git a/R/case_weights.R b/R/case_weights.R index a4c60bc9c..9aba25d28 100644 --- a/R/case_weights.R +++ b/R/case_weights.R @@ -65,50 +65,6 @@ patch_formula_environment_with_case_weights <- function(formula, formula } -#' Convert case weights to final from -#' -#' tidymodels requires case weights to have special classes. To use them in -#' model fitting or performance evaluation, they need to be converted to -#' numeric. -#' @param x A vector with class `"hardhat_case_weights"`. -#' @param where The location where they will be used: `"parsnip"` or -#' `"yardstick"`. -#' @param ... Additional options (not currently used). -#' @return A numeric vector or NULL. -#' @export -convert_case_weights <- function(x, where = "parsnip", ...) { - UseMethod("convert_case_weights") -} - -#' @export -convert_case_weights.default <- function(x, where = "parsnip", ...) { - where <- rlang::arg_match0(where, c("parsnip", "yardstick")) - if (!inherits(x, "hardhat_case_weights")) { - rlang::abort("'case_weights' should be vector of class 'hardhat_case_weights'") - } - invisible(NULL) -} - -#' @export -#' @rdname convert_case_weights -convert_case_weights.hardhat_importance_weights <- - function(x, where = "parsnip", ...) { - if (where == "parsnip") { - x <- as.double(x) - } else { - x <- NULL - } - x - } - -#' @export -#' @rdname convert_case_weights -convert_case_weights.hardhat_frequency_weights <- - function(x, where = "parsnip", ...) { - as.integer(x) - } - - # ------------------------------------------------------------------------------ case_weights_allowed <- function(spec) { diff --git a/man/convert_case_weights.Rd b/man/convert_case_weights.Rd deleted file mode 100644 index 619770eb1..000000000 --- a/man/convert_case_weights.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/case_weights.R -\name{convert_case_weights} -\alias{convert_case_weights} -\alias{convert_case_weights.hardhat_importance_weights} -\alias{convert_case_weights.hardhat_frequency_weights} -\title{Convert case weights to final from} -\usage{ -convert_case_weights(x, where = "parsnip", ...) - -\method{convert_case_weights}{hardhat_importance_weights}(x, where = "parsnip", ...) - -\method{convert_case_weights}{hardhat_frequency_weights}(x, where = "parsnip", ...) -} -\arguments{ -\item{x}{A vector with class \code{"hardhat_case_weights"}.} - -\item{where}{The location where they will be used: \code{"parsnip"} or -\code{"yardstick"}.} - -\item{...}{Additional options (not currently used).} -} -\value{ -A numeric vector or NULL. -} -\description{ -tidymodels requires case weights to have special classes. To use them in -model fitting or performance evaluation, they need to be converted to -numeric. -}