diff --git a/DESCRIPTION b/DESCRIPTION
index 8ec090dd3..95b61d7bb 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -21,11 +21,11 @@ Depends:
Imports:
cli,
dplyr (>= 0.8.0.1),
- generics (>= 0.1.0.9000),
+ generics (>= 0.1.2),
ggplot2,
globals,
glue,
- hardhat (>= 0.1.6.9001),
+ hardhat (>= 0.2.0.9000),
lifecycle,
magrittr,
prettyunits,
@@ -40,9 +40,8 @@ Imports:
Suggests:
C50,
covr,
- dials (>= 0.0.10.9001),
+ dials (>= 0.1.0),
earth,
- tensorflow,
ggrepel,
keras,
kernlab,
@@ -60,30 +59,17 @@ Suggests:
rpart,
sparklyr (>= 1.0.0),
survival,
+ tensorflow,
testthat (>= 3.0.0),
xgboost (>= 1.5.0.1)
+Remotes:
+ tidymodels/hardhat
VignetteBuilder:
knitr
ByteCompile: true
-Config/Needs/website:
- C50,
- dbarts,
- earth,
- glmnet,
- keras,
- kernlab,
- kknn,
- LiblineaR,
- mgcv,
- nnet,
- parsnip,
- randomForest,
- ranger,
- rpart,
- rstanarm,
- tidymodels/tidymodels,
- tidyverse/tidytemplate,
- rstudio/reticulate,
+Config/Needs/website: C50, dbarts, earth, glmnet, keras, kernlab, kknn,
+ LiblineaR, mgcv, nnet, parsnip, randomForest, ranger, rpart, rstanarm,
+ tidymodels/tidymodels, tidyverse/tidytemplate, rstudio/reticulate,
xgboost
Config/rcmdcheck/ignore-inconsequential-notes: true
Encoding: UTF-8
diff --git a/NAMESPACE b/NAMESPACE
index 78f9f4d5e..d0e378058 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -205,6 +205,7 @@ export(format_linear_pred)
export(format_num)
export(format_survival)
export(format_time)
+export(frequency_weights)
export(gen_additive_mod)
export(get_dependency)
export(get_encoding)
@@ -213,7 +214,9 @@ export(get_from_env)
export(get_model_env)
export(get_pred_type)
export(glance)
+export(glm_grouped)
export(has_multi_predict)
+export(importance_weights)
export(is_varying)
export(keras_mlp)
export(keras_predict_classes)
@@ -333,6 +336,8 @@ importFrom(hardhat,extract_fit_engine)
importFrom(hardhat,extract_parameter_dials)
importFrom(hardhat,extract_parameter_set_dials)
importFrom(hardhat,extract_spec_parsnip)
+importFrom(hardhat,frequency_weights)
+importFrom(hardhat,importance_weights)
importFrom(hardhat,tune)
importFrom(magrittr,"%>%")
importFrom(purrr,"%||%")
diff --git a/NEWS.md b/NEWS.md
index 26ea3d841..03c53e57b 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,12 @@
# parsnip (development version)
+
+* Enable the use of case weights for models that support them.
+
+* Added a `glm_grouped()` function to convert long data to the grouped format required by `glm()` for logistic regression.
+
+* `show_model_info()` now indicates which models can utilize case weights.
+
* `xgb_train()` now allows for case weights
* Added `ctree_train()` and `cforest_train()` wrappers for the functions in the partykit package. Engines for these will be added to other parsnip extension packages.
@@ -14,6 +21,7 @@
* Model type functions will now message informatively if a needed parsnip extension package is not loaded (#731).
+
# parsnip 0.2.1
* Fixed a major bug in spark models induced in the previous version (#671).
diff --git a/R/aaa_models.R b/R/aaa_models.R
index 198ca55a3..58ca6a722 100644
--- a/R/aaa_models.R
+++ b/R/aaa_models.R
@@ -927,8 +927,24 @@ show_model_info <- function(model) {
engines <- get_from_env(model)
if (nrow(engines) > 0) {
cat(" engines: \n")
- engines %>%
+
+ weight_info <-
+ purrr::map_df(
+ model,
+ ~ get_from_env(paste0(.x, "_fit")) %>% mutate(model = .x)
+ ) %>%
+ dplyr::mutate(protect = map(value, ~ .x$protect)) %>%
+ dplyr::select(-value) %>%
dplyr::mutate(
+ has_wts = purrr::map_lgl(protect, ~ any(grepl("^weight", .x))),
+ has_wts = ifelse(has_wts, cli::symbol$sup_1, "")
+ ) %>%
+ dplyr::select(engine, mode, has_wts)
+
+ engines %>%
+ dplyr::left_join(weight_info, by = c("engine", "mode")) %>%
+ dplyr::mutate(
+ engine = paste0(engine, has_wts),
mode = format(paste0(mode, ": "))
) %>%
dplyr::group_by(mode) %>%
@@ -941,7 +957,7 @@ show_model_info <- function(model) {
dplyr::ungroup() %>%
dplyr::pull(lab) %>%
cat(sep = "")
- cat("\n")
+ cat("\n", cli::symbol$sup_1, "The model can use case weights.\n\n", sep = "")
} else {
cat(" no registered engines.\n\n")
}
diff --git a/R/arguments.R b/R/arguments.R
index da570c58b..5282e6909 100644
--- a/R/arguments.R
+++ b/R/arguments.R
@@ -150,11 +150,35 @@ make_call <- function(fun, ns, args, ...) {
make_form_call <- function(object, env = NULL) {
fit_args <- object$method$fit$args
+ uses_weights <- has_weights(env)
- # Get the arguments related to data:
+ # In model specification code using `set_fit()`, there are two main arguments
+ # that dictate the data-related model arguments (e.g. 'formula', 'data', 'x',
+ # etc).
+ # The 'protect' element specifies which data arguments should not be modifiable
+ # by the user (as an engine argument). These have standardized names that
+ # follow the usual R conventions. For example, `foo(formula, data, weights)`
+ # and so on.
+ # However, some packages do not follow these naming conventions. The 'data'
+ # element in `set_fit()` allows use to have non-standard argument names by
+ # providing a named list. If function `bar(f, dat, wts)` was being used, the
+ # 'data' element would be `c(formula = "f", data = "dat", weights = "wts)`.
+ # If conventional names are used, there is no 'data' element since the values
+ # in 'protect' suffice.
+
+ # Get the arguments related to data arguments to insert into the model call
+
+ # Do we have conventional argument names?
if (is.null(object$method$fit$data)) {
- data_args <- c(formula = "formula", data = "data")
+ # Set the minimum arguments for formula methods.
+ data_args <- object$method$fit$protect
+ names(data_args) <- data_args
+ # Case weights _could_ be used but remove the arg if they are not given:
+ if (!uses_weights) {
+ data_args <- data_args[data_args != "weights"]
+ }
} else {
+ # What are the non-conventional names?
data_args <- object$method$fit$data
}
@@ -166,6 +190,7 @@ make_form_call <- function(object, env = NULL) {
# sub in actual formula
fit_args[[ unname(data_args["formula"]) ]] <- env$formula
+ # TODO remove weights col from data?
if (object$engine == "spark") {
env$x <- env$data
}
@@ -178,12 +203,20 @@ make_form_call <- function(object, env = NULL) {
fit_call
}
-make_xy_call <- function(object, target) {
+# TODO we need something to indicate that case weights are being used.
+make_xy_call <- function(object, target, env) {
fit_args <- object$method$fit$args
+ uses_weights <- has_weights(env)
+
+ # See the comments above in make_form_call()
- # Get the arguments related to data:
if (is.null(object$method$fit$data)) {
- data_args <- c(x = "x", y = "y")
+ data_args <- object$method$fit$protect
+ names(data_args) <- data_args
+ # Case weights _could_ be used but remove the arg if they are not given:
+ if (!uses_weights) {
+ data_args <- data_args[data_args != "weights"]
+ }
} else {
data_args <- object$method$fit$data
}
@@ -197,6 +230,9 @@ make_xy_call <- function(object, target) {
matrix = rlang::expr(maybe_matrix(x)),
rlang::abort(glue::glue("Invalid data type target: {target}."))
)
+ if (uses_weights) {
+ object$method$fit$args[[ unname(data_args["weights"]) ]] <- rlang::expr(weights)
+ }
fit_call <- make_call(
fun = object$method$fit$func["fun"],
@@ -269,3 +305,4 @@ min_rows <- function(num_rows, source, offset = 0) {
as.integer(num_rows)
}
+
diff --git a/R/boost_tree_data.R b/R/boost_tree_data.R
index 63c90b2d9..2ec5b03bd 100644
--- a/R/boost_tree_data.R
+++ b/R/boost_tree_data.R
@@ -82,7 +82,7 @@ set_fit(
mode = "regression",
value = list(
interface = "matrix",
- protect = c("x", "y"),
+ protect = c("x", "y", "weights"),
func = c(pkg = "parsnip", fun = "xgb_train"),
defaults = list(nthread = 1, verbose = 0)
)
@@ -132,7 +132,7 @@ set_fit(
mode = "classification",
value = list(
interface = "matrix",
- protect = c("x", "y"),
+ protect = c("x", "y", "weights"),
func = c(pkg = "parsnip", fun = "xgb_train"),
defaults = list(nthread = 1, verbose = 0)
)
diff --git a/R/case_weights.R b/R/case_weights.R
new file mode 100644
index 000000000..9aba25d28
--- /dev/null
+++ b/R/case_weights.R
@@ -0,0 +1,93 @@
+#' Using case weights with parsnip
+#'
+#' Case weights are positive numeric values that influence how much each data
+#' point has during the model fitting process. There are a variety of situations
+#' where case weights can be used.
+#'
+#' tidymodels packages differentiate _how_ different types of case weights
+#' should be used during the entire data analysis process, including
+#' preprocessing data, model fitting, performance calculations, etc.
+#'
+#' The tidymodels packages require users to convert their numeric vectors to a
+#' vector class that reflects how these should be used. For example, there are
+#' some situations where the weights should not affect operations such as
+#' centering and scaling or other preprocessing operations.
+#'
+#' The types of weights allowed in tidymodels are:
+#'
+#' * Frequency weights via [hardhat::frequency_weights()]
+#' * Importance weights via [hardhat::importance_weights()]
+#'
+#' More types can be added by request.
+#'
+#' For parsnip, the [fit()] and [fit_xy] functions contain a `case_weight`
+#' argument that takes these data. For Spark models, the argument value should
+#' be a character value.
+#'
+#' @name case_weights
+#' @seealso [frequency_weights()], [importance_weights()], [fit()], [fit_xy]
+NULL
+
+# ------------------------------------------------------------------------------
+
+weights_to_numeric <- function(x, spec) {
+ if (is.null(x)) {
+ return(NULL)
+ } else if (spec$engine == "spark") {
+ # Spark wants a column name
+ return(x)
+ }
+
+ to_int <- c("hardhat_frequency_weights")
+ if (inherits(x, to_int)) {
+ x <- as.integer(x)
+ } else {
+ x <- as.numeric(x)
+ }
+ x
+}
+
+patch_formula_environment_with_case_weights <- function(formula,
+ data,
+ case_weights) {
+ # `lm()` and `glm()` and others use the original model function call to
+ # construct a call for `model.frame()`. That will normally fail because the
+ # formula has its own environment attached (usually the global environment)
+ # and it will look there for a vector named 'weights'. To account
+ # for this, we create a child of the `formula`'s environment and
+ # stash the `weights` there with the expected name and then
+ # reassign this as the `formula`'s environment
+ environment(formula) <- rlang::new_environment(
+ data = list(data = data, weights = case_weights),
+ parent = environment(formula)
+ )
+
+ formula
+}
+
+# ------------------------------------------------------------------------------
+
+case_weights_allowed <- function(spec) {
+ mod_type <- class(spec)[1]
+ mod_eng <- spec$engine
+ mod_mode <- spec$mode
+
+ model_info <-
+ get_from_env(paste0(mod_type, "_fit")) %>%
+ dplyr::filter(engine == mod_eng & mode == mod_mode)
+ if (nrow(model_info) != 1) {
+ rlang::abort(
+ glue::glue(
+ "Error in geting model information for model {mod_type} with engine {mod_eng} and mode {mod_mode}."
+ )
+ )
+ }
+ # If weights are used, they are protected data arguments with the canonical
+ # name 'weights' (although this may not be the model function's argument name).
+ data_args <- model_info$value[[1]]$protect
+ any(data_args == "weights")
+}
+
+has_weights <- function(env) {
+ !is.null(env$weights)
+}
diff --git a/R/convert_data.R b/R/convert_data.R
index 7af0c34f3..ef8fa0673 100644
--- a/R/convert_data.R
+++ b/R/convert_data.R
@@ -252,6 +252,12 @@
if (length(weights) != nrow(x)) {
rlang::abort(glue::glue("`weights` should have {nrow(x)} elements"))
}
+
+ form <- patch_formula_environment_with_case_weights(
+ formula = form,
+ data = x,
+ case_weights = weights
+ )
}
res <- list(
diff --git a/R/fit.R b/R/fit.R
index f0c994012..6cda2e2c0 100644
--- a/R/fit.R
+++ b/R/fit.R
@@ -18,6 +18,10 @@
#' below). A data frame containing all relevant variables (e.g.
#' outcome(s), predictors, case weights, etc). Note: when needed, a
#' \emph{named argument} should be used.
+#' @param case_weights An optional classed vector of numeric case weights. This
+#' must return `TRUE` when [hardhat::is_case_weights()] is run on it. See
+#' [hardhat::frequency_weights()] and [hardhat::importance_weights()] for
+#' examples.
#' @param control A named list with elements `verbosity` and
#' `catch`. See [control_parsnip()].
#' @param ... Not currently used; values passed here will be
@@ -101,6 +105,7 @@ fit.model_spec <-
function(object,
formula,
data,
+ case_weights = NULL,
control = control_parsnip(),
...
) {
@@ -110,6 +115,8 @@ fit.model_spec <-
if (!identical(class(control), class(control_parsnip()))) {
rlang::abort("The 'control' argument should have class 'control_parsnip'.")
}
+ check_case_weights(case_weights, object)
+
dots <- quos(...)
if (length(possible_engines(object)) == 0) {
@@ -129,15 +136,26 @@ fit.model_spec <-
}
}
- if (all(c("x", "y") %in% names(dots)))
+ if (all(c("x", "y") %in% names(dots))) {
rlang::abort("`fit.model_spec()` is for the formula methods. Use `fit_xy()` instead.")
+ }
cl <- match.call(expand.dots = TRUE)
# Create an environment with the evaluated argument objects. This will be
# used when a model call is made later.
eval_env <- rlang::env()
+ wts <- weights_to_numeric(case_weights, object)
+
+ formula <- patch_formula_environment_with_case_weights(
+ formula = formula,
+ data = data,
+ case_weights = wts
+ )
+
eval_env$data <- data
eval_env$formula <- formula
+ eval_env$weights <- wts
+
fit_interface <-
check_interface(eval_env$formula, eval_env$data, cl, object)
@@ -206,6 +224,7 @@ fit_xy.model_spec <-
function(object,
x,
y,
+ case_weights = NULL,
control = control_parsnip(),
...
) {
@@ -223,6 +242,8 @@ fit_xy.model_spec <-
if (is.null(colnames(x))) {
rlang::abort("'x' should have column names.")
}
+ check_case_weights(case_weights, object)
+
object <- check_mode(object, levels(y))
dots <- quos(...)
if (is.null(object$engine)) {
@@ -245,6 +266,9 @@ fit_xy.model_spec <-
eval_env <- rlang::env()
eval_env$x <- x
eval_env$y <- y
+ eval_env$weights <- weights_to_numeric(case_weights, object)
+
+ # TODO case weights: pass in eval_env not individual elements
fit_interface <- check_xy_interface(eval_env$x, eval_env$y, cl, object)
if (object$engine == "spark")
@@ -306,18 +330,18 @@ fit_xy.model_spec <-
# ------------------------------------------------------------------------------
-eval_mod <- function(e, capture = FALSE, catch = FALSE, ...) {
+eval_mod <- function(e, capture = FALSE, catch = FALSE, envir = NULL, ...) {
if (capture) {
if (catch) {
- junk <- capture.output(res <- try(eval_tidy(e, ...), silent = TRUE))
+ junk <- capture.output(res <- try(eval_tidy(e, env = envir, ...), silent = TRUE))
} else {
- junk <- capture.output(res <- eval_tidy(e, ...))
+ junk <- capture.output(res <- eval_tidy(e, env = envir, ...))
}
} else {
if (catch) {
- res <- try(eval_tidy(e, ...), silent = TRUE)
+ res <- try(eval_tidy(e, env = envir, ...), silent = TRUE)
} else {
- res <- eval_tidy(e, ...)
+ res <- eval_tidy(e, env = envir, ...)
}
}
res
diff --git a/R/fit_helpers.R b/R/fit_helpers.R
index 61b9344b0..d4fbdf6b8 100644
--- a/R/fit_helpers.R
+++ b/R/fit_helpers.R
@@ -39,7 +39,7 @@ form_form <-
fit_call,
capture = control$verbosity == 0,
catch = control$catch,
- env = env,
+ envir = env,
...
),
gcFirst = FALSE
@@ -49,7 +49,7 @@ form_form <-
fit_call,
capture = control$verbosity == 0,
catch = control$catch,
- env = env,
+ envir = env,
...
)
elapsed <- list(elapsed = NA_real_)
@@ -88,7 +88,7 @@ xy_xy <- function(object, env, control, target = "none", ...) {
# sub in arguments to actual syntax for corresponding engine
object <- translate(object, engine = object$engine)
- fit_call <- make_xy_call(object, target)
+ fit_call <- make_xy_call(object, target, env)
res <- list(lvl = levels(env$y), spec = object)
@@ -98,7 +98,7 @@ xy_xy <- function(object, env, control, target = "none", ...) {
fit_call,
capture = control$verbosity == 0,
catch = control$catch,
- env = env,
+ envir = env,
...
),
gcFirst = FALSE
@@ -108,7 +108,7 @@ xy_xy <- function(object, env, control, target = "none", ...) {
fit_call,
capture = control$verbosity == 0,
catch = control$catch,
- env = env,
+ envir = env,
...
)
elapsed <- list(elapsed = NA_real_)
@@ -177,7 +177,7 @@ xy_form <- function(object, env, control, ...) {
.convert_xy_to_form_fit(
x = env$x,
y = env$y,
- weights = NULL,
+ weights = env$weights,
y_name = "..y",
remove_intercept = remove_intercept
)
@@ -200,3 +200,4 @@ xy_form <- function(object, env, control, ...) {
res
}
+
diff --git a/R/gen_additive_mod_data.R b/R/gen_additive_mod_data.R
index 261fa2f12..ff5f59ff2 100644
--- a/R/gen_additive_mod_data.R
+++ b/R/gen_additive_mod_data.R
@@ -47,7 +47,7 @@ set_fit(
mode = "regression",
value = list(
interface = "formula",
- protect = c("formula", "data"),
+ protect = c("formula", "data", "weights"),
func = c(pkg = "mgcv", fun = "gam"),
defaults = list()
)
@@ -127,7 +127,7 @@ set_fit(
mode = "classification",
value = list(
interface = "formula",
- protect = c("formula", "data"),
+ protect = c("formula", "data", "weights"),
func = c(pkg = "mgcv", fun = "gam"),
defaults = list(
family = quote(stats::binomial(link = "logit"))
diff --git a/R/grouped_binomial.R b/R/grouped_binomial.R
new file mode 100644
index 000000000..b3c7b12ca
--- /dev/null
+++ b/R/grouped_binomial.R
@@ -0,0 +1,118 @@
+#' Fit a grouped binomial outcome from a data set with case weights
+#'
+#' @description
+#' [stats::glm()] assumes that a tabular data set with case weights corresponds
+#' to "different observations have different dispersions" (see `?glm`).
+#'
+#' In some cases, the case weights reflect that the same covariate pattern was
+#' observed multiple times (i.e., _frequency weights_). In this case,
+#' [stats::glm()] expects the data to be formatted as the number of events for
+#' each factor level so that the outcome can be given to the formula as
+#' `cbind(events_1, events_2)`.
+#'
+#' [glm_grouped()] converts data with integer case weights to the expected
+#' "number of events" format for binomial data.
+#' @param formula A formula object with one outcome that is a two-level factors.
+#' @param data A data frame with the outcomes and predictors (but not case
+#' weights).
+#' @param weights An integer vector of weights whose length is the same as the
+#' number of rows in `data`. If it is a non-integer numeric, it will be converted
+#' to integer (with a warning).
+#' @param ... Options to pass to [stats::glm()]. If `family` is not set, it will
+#' automatically be assigned the basic binomial family.
+#' @return A object produced by [stats::glm()].
+#' @examples
+#' #----------------------------------------------------------------------------
+#' # The same data set formatted three ways
+#'
+#' # First with basic case weights that, from ?glm, are used inappropriately.
+#' ucb_weighted <- as.data.frame(UCBAdmissions)
+#' ucb_weighted$Freq <- as.integer(ucb_weighted$Freq)
+#' head(ucb_weighted)
+#' nrow(ucb_weighted)
+#'
+#' # Format when yes/no data are in individual rows (probably still inappropriate)
+#' library(tidyr)
+#' ucb_long <- uncount(ucb_weighted, Freq)
+#' head(ucb_long)
+#' nrow(ucb_long)
+#'
+#' # Format where the outcome is formatted as number of events
+#' ucb_events <-
+#' ucb_weighted %>%
+#' tidyr::pivot_wider(
+#' id_cols = c(Gender, Dept),
+#' names_from = Admit,
+#' values_from = Freq,
+#' values_fill = 0L
+#' )
+#' head(ucb_events)
+#' nrow(ucb_events)
+#'
+#' #----------------------------------------------------------------------------
+#' # Different model fits
+#'
+#' # Treat data as separate Bernoulli data:
+#' glm(Admit ~ Gender + Dept, data = ucb_long, family = binomial)
+#'
+#' # Weights produce the same statistics
+#' glm(
+#' Admit ~ Gender + Dept,
+#' data = ucb_weighted,
+#' family = binomial,
+#' weights = ucb_weighted$Freq
+#' )
+#'
+#' # Data as binomial "x events out of n trials" format. Note that, to get the same
+#' # coefficients, the order of the levels must be reversed.
+#' glm(
+#' cbind(Rejected, Admitted) ~ Gender + Dept,
+#' data = ucb_events,
+#' family = binomial
+#' )
+#'
+#' # The new function that starts with frequency weights and gets the correct place:
+#' glm_grouped(Admit ~ Gender + Dept, data = ucb_weighted, weights = ucb_weighted$Freq)
+#' @export
+glm_grouped <- function(formula, data, weights, ...) {
+ opts <- list(...)
+ # We'll set binomial
+ if (!any(names(opts) == "family")) {
+ opts$family <- "binomial"
+ }
+
+ if (is.null(weights) || !is.numeric(weights)) {
+ rlang::abort("'weights' should be an integer vector.")
+ }
+ if (!is.integer(weights)) {
+ weights <- as.integer(weights)
+ rlang::warn(glue::glue("converting case weights from numeric to integer."))
+ }
+
+ terms <- terms(formula)
+ all_pred <- all.vars(formula)
+ response <- rownames(attr(terms, "factors"))[attr(terms, "response")]
+ all_pred <- all_pred[!all_pred %in% response]
+ lvls <- levels(data[[response]])
+
+ if (length(lvls) != 2) {
+ rlang::abort(glue::glue("the response column '{response}' should be a two-level factor."))
+ }
+
+ all_cols <- c(response, all_pred)
+ data <- data[, all_cols, drop = FALSE]
+ data$..weights <- weights
+ # Reconstruct the new data format (made below) to the grouped formula format
+ formula[[2]] <- rlang::call2("cbind", !!!rlang::syms(rev(lvls)))
+
+ data <-
+ data %>%
+ tidyr::pivot_wider(
+ id_cols = c(dplyr::all_of(all_pred)),
+ names_from = c(dplyr::all_of(response)),
+ values_from = "..weights",
+ values_fill = 0L
+ )
+ cl <- rlang::call2("glm", rlang::expr(formula), data = rlang::expr(data), !!!opts)
+ rlang::eval_tidy(cl)
+}
diff --git a/R/linear_reg_data.R b/R/linear_reg_data.R
index 5f95b9bd4..7a7f23d29 100644
--- a/R/linear_reg_data.R
+++ b/R/linear_reg_data.R
@@ -407,8 +407,8 @@ set_fit(
mode = "regression",
value = list(
interface = "formula",
- data = c(formula = "formula", data = "x"),
- protect = c("x", "formula", "weight_col"),
+ data = c(formula = "formula", data = "x", weights = "weight_col"),
+ protect = c("x", "formula", "weights"),
func = c(pkg = "sparklyr", fun = "ml_linear_regression"),
defaults = list()
)
diff --git a/R/logistic_reg_data.R b/R/logistic_reg_data.R
index db6f12389..b38d9ec98 100644
--- a/R/logistic_reg_data.R
+++ b/R/logistic_reg_data.R
@@ -220,7 +220,7 @@ set_fit(
mode = "classification",
value = list(
interface = "matrix",
- protect = c("x", "y", "wi"),
+ protect = c("x", "y"),
data = c(x = "data", y = "target"),
func = c(pkg = "LiblineaR", fun = "LiblineaR"),
defaults = list(verbose = FALSE)
@@ -336,8 +336,8 @@ set_fit(
mode = "classification",
value = list(
interface = "formula",
- data = c(formula = "formula", data = "x"),
- protect = c("x", "formula", "weight_col"),
+ data = c(formula = "formula", data = "x", weights = "weight_col"),
+ protect = c("x", "formula", "weights"),
func = c(pkg = "sparklyr", fun = "ml_logistic_regression"),
defaults =
list(
diff --git a/R/misc.R b/R/misc.R
index 6907b388c..aaa997063 100644
--- a/R/misc.R
+++ b/R/misc.R
@@ -438,3 +438,18 @@ stan_conf_int <- function(object, newdata) {
penalty
}
+
+
+check_case_weights <- function(x, spec) {
+ if (is.null(x) | spec$engine == "spark") {
+ return(invisible(NULL))
+ }
+ if (!hardhat::is_case_weights(x)) {
+ rlang::abort("'case_weights' should be a single numeric vector of class 'hardhat_case_weights'.")
+ }
+ allowed <- case_weights_allowed(spec)
+ if (!allowed) {
+ rlang::abort("Case weights are not enabled by the underlying model implementation.")
+ }
+ invisible(NULL)
+}
diff --git a/R/mlp_data.R b/R/mlp_data.R
index b912bfd0c..a0e5bf0a1 100644
--- a/R/mlp_data.R
+++ b/R/mlp_data.R
@@ -233,7 +233,7 @@ set_fit(
mode = "regression",
value = list(
interface = "formula",
- protect = c("formula", "data", "weights"),
+ protect = c("formula", "data"),
func = c(pkg = "nnet", fun = "nnet"),
defaults = list(trace = FALSE)
)
@@ -257,7 +257,7 @@ set_fit(
mode = "classification",
value = list(
interface = "formula",
- protect = c("formula", "data", "weights"),
+ protect = c("formula", "data"),
func = c(pkg = "nnet", fun = "nnet"),
defaults = list(trace = FALSE)
)
diff --git a/R/multinom_reg_data.R b/R/multinom_reg_data.R
index 96188f62c..114f23e02 100644
--- a/R/multinom_reg_data.R
+++ b/R/multinom_reg_data.R
@@ -133,8 +133,8 @@ set_fit(
mode = "classification",
value = list(
interface = "formula",
- data = c(formula = "formula", data = "x"),
- protect = c("x", "formula", "weight_col"),
+ data = c(formula = "formula", data = "x", weights = "weight_col"),
+ protect = c("x", "formula", "weights"),
func = c(pkg = "sparklyr", fun = "ml_logistic_regression"),
defaults = list(family = "multinomial")
)
@@ -282,7 +282,7 @@ set_fit(
mode = "classification",
value = list(
interface = "formula",
- protect = c("formula", "data", "weights"),
+ protect = c("formula", "data"),
func = c(pkg = "nnet", fun = "multinom"),
defaults = list(trace = FALSE)
)
diff --git a/R/parsnip-package.R b/R/parsnip-package.R
index ef002e8e5..e1111579d 100644
--- a/R/parsnip-package.R
+++ b/R/parsnip-package.R
@@ -41,7 +41,7 @@ utils::globalVariables(
"sub_neighbors", ".pred_class", "x", "y", "predictor_indicators",
"compute_intercept", "remove_intercept", "estimate", "term",
"call_info", "component", "component_id", "func", "tunable", "label",
- "pkg", ".order", "item", "tunable", "has_ext"
+ "pkg", ".order", "item", "tunable", "has_ext", "weights", "has_wts", "protect"
)
)
diff --git a/R/rand_forest_data.R b/R/rand_forest_data.R
index f3f74d9f4..5ea5e6f1d 100644
--- a/R/rand_forest_data.R
+++ b/R/rand_forest_data.R
@@ -122,7 +122,8 @@ set_fit(
mode = "classification",
value = list(
interface = "data.frame",
- protect = c("x", "y", "case.weights"),
+ data = c(x = "x", y = "y", weights = "case.weights"),
+ protect = c("x", "y", "weights"),
func = c(pkg = "ranger", fun = "ranger"),
defaults =
list(
@@ -151,7 +152,8 @@ set_fit(
mode = "regression",
value = list(
interface = "data.frame",
- protect = c("x", "y", "case.weights"),
+ data = c(x = "x", y = "y", weights = "case.weights"),
+ protect = c("x", "y", "weights"),
func = c(pkg = "ranger", fun = "ranger"),
defaults =
list(
diff --git a/R/reexports.R b/R/reexports.R
index e26510794..a7d7264d9 100644
--- a/R/reexports.R
+++ b/R/reexports.R
@@ -49,3 +49,12 @@ hardhat::extract_parameter_dials
#' @importFrom hardhat tune
#' @export
hardhat::tune
+
+#' @importFrom hardhat frequency_weights
+#' @export
+hardhat::frequency_weights
+
+#' @importFrom hardhat importance_weights
+#' @export
+hardhat::importance_weights
+
diff --git a/R/svm_linear_data.R b/R/svm_linear_data.R
index 25da85477..ed2d6da95 100644
--- a/R/svm_linear_data.R
+++ b/R/svm_linear_data.R
@@ -33,7 +33,7 @@ set_fit(
mode = "regression",
value = list(
interface = "matrix",
- protect = c("x", "y", "wi"),
+ protect = c("x", "y"),
data = c(x = "data", y = "target"),
func = c(pkg = "LiblineaR", fun = "LiblineaR"),
defaults = list(type = 11)
@@ -47,7 +47,8 @@ set_fit(
value = list(
interface = "matrix",
data = c(x = "data", y = "target"),
- protect = c("x", "y", "wi"),
+ protect = c("x", "y"),
+ data = c(x = "data", y = "target"),
func = c(pkg = "LiblineaR", fun = "LiblineaR"),
defaults = list(type = 1)
)
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 0af70134a..5bd30c4bc 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -45,6 +45,7 @@ reference:
- discrim_quad
- discrim_regularized
- gen_additive_mod
+ - glm_grouped
- linear_reg
- logistic_reg
- mars
@@ -67,6 +68,7 @@ reference:
- autoplot.model_fit
- add_rowindex
- augment.model_fit
+ - case_weights
- descriptors
- extract-parsnip
- fit.model_spec
@@ -92,6 +94,7 @@ reference:
- title: Developer tools
contents:
- contr_one_hot
+ - convert_case_weights
- set_new_model
- maybe_matrix
- min_cols
diff --git a/man/case_weights.Rd b/man/case_weights.Rd
new file mode 100644
index 000000000..1ecb27706
--- /dev/null
+++ b/man/case_weights.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/case_weights.R
+\name{case_weights}
+\alias{case_weights}
+\title{Using case weights with parsnip}
+\description{
+Case weights are positive numeric values that influence how much each data
+point has during the model fitting process. There are a variety of situations
+where case weights can be used.
+}
+\details{
+tidymodels packages differentiate \emph{how} different types of case weights
+should be used during the entire data analysis process, including
+preprocessing data, model fitting, performance calculations, etc.
+
+The tidymodels packages require users to convert their numeric vectors to a
+vector class that reflects how these should be used. For example, there are
+some situations where the weights should not affect operations such as
+centering and scaling or other preprocessing operations.
+
+The types of weights allowed in tidymodels are:
+\itemize{
+\item Frequency weights via \code{\link[hardhat:frequency_weights]{hardhat::frequency_weights()}}
+\item Importance weights via \code{\link[hardhat:importance_weights]{hardhat::importance_weights()}}
+}
+
+More types can be added by request.
+
+For parsnip, the \code{\link[=fit]{fit()}} and \link{fit_xy} functions contain a \code{case_weight}
+argument that takes these data. For Spark models, the argument value should
+be a character value.
+}
+\seealso{
+\code{\link[=frequency_weights]{frequency_weights()}}, \code{\link[=importance_weights]{importance_weights()}}, \code{\link[=fit]{fit()}}, \link{fit_xy}
+}
diff --git a/man/details_C5_rules_C5.0.Rd b/man/details_C5_rules_C5.0.Rd
index 48fc1e03d..3182de8ca 100644
--- a/man/details_C5_rules_C5.0.Rd
+++ b/man/details_C5_rules_C5.0.Rd
@@ -60,6 +60,16 @@ Categorical predictors can be partitioned into groups of factor levels
are not required for this model.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{References}{
\itemize{
\item Quinlan R (1992). “Learning with Continuous Classes.” Proceedings of
diff --git a/man/details_bag_mars_earth.Rd b/man/details_bag_mars_earth.Rd
index 816ec8161..aea7cd508 100644
--- a/man/details_bag_mars_earth.Rd
+++ b/man/details_bag_mars_earth.Rd
@@ -91,6 +91,16 @@ formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will
convert factor columns to indicators.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{References}{
\itemize{
\item Breiman, L. 1996. “Bagging predictors”. Machine Learning. 24 (2):
diff --git a/man/details_bag_tree_C5.0.Rd b/man/details_bag_tree_C5.0.Rd
index 85b20281c..c6370328b 100644
--- a/man/details_bag_tree_C5.0.Rd
+++ b/man/details_bag_tree_C5.0.Rd
@@ -51,6 +51,16 @@ Categorical predictors can be partitioned into groups of factor levels
are not required for this model.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{References}{
\itemize{
\item Breiman, L. 1996. “Bagging predictors”. Machine Learning. 24 (2):
diff --git a/man/details_bag_tree_rpart.Rd b/man/details_bag_tree_rpart.Rd
index 388344f8a..9618fb549 100644
--- a/man/details_bag_tree_rpart.Rd
+++ b/man/details_bag_tree_rpart.Rd
@@ -120,6 +120,16 @@ Categorical predictors can be partitioned into groups of factor levels
are not required for this model.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{References}{
\itemize{
\item Breiman L. 1996. “Bagging predictors”. Machine Learning. 24 (2):
diff --git a/man/details_boost_tree_C5.0.Rd b/man/details_boost_tree_C5.0.Rd
index a95500342..5e8cd42b5 100644
--- a/man/details_boost_tree_C5.0.Rd
+++ b/man/details_boost_tree_C5.0.Rd
@@ -58,6 +58,16 @@ Categorical predictors can be partitioned into groups of factor levels
are not required for this model.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{Other details}{
\subsection{Early stopping}{
diff --git a/man/details_boost_tree_spark.Rd b/man/details_boost_tree_spark.Rd
index 3c2d055f7..7d105c259 100644
--- a/man/details_boost_tree_spark.Rd
+++ b/man/details_boost_tree_spark.Rd
@@ -106,6 +106,19 @@ Categorical predictors can be partitioned into groups of factor levels
are not required for this model.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+
+Note that, for spark engines, the \code{case_weight} argument value should be
+a character string to specify the column with the numeric case weights.
+}
+
\subsection{Other details}{
For models created using the \code{"spark"} engine, there are several things
diff --git a/man/details_boost_tree_xgboost.Rd b/man/details_boost_tree_xgboost.Rd
index 86132645c..fcdb36667 100644
--- a/man/details_boost_tree_xgboost.Rd
+++ b/man/details_boost_tree_xgboost.Rd
@@ -56,10 +56,11 @@ Inf)
## Computational engine: xgboost
##
## Model fit template:
-## parsnip::xgb_train(x = missing_arg(), y = missing_arg(), colsample_bynode = integer(),
-## nrounds = integer(), min_child_weight = integer(), max_depth = integer(),
-## eta = numeric(), gamma = numeric(), subsample = numeric(),
-## early_stop = integer(), nthread = 1, verbose = 0)
+## parsnip::xgb_train(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
+## colsample_bynode = integer(), nrounds = integer(), min_child_weight = integer(),
+## max_depth = integer(), eta = numeric(), gamma = numeric(),
+## subsample = numeric(), early_stop = integer(), nthread = 1,
+## verbose = 0)
}\if{html}{\out{}}
}
@@ -90,10 +91,11 @@ Inf)
## Computational engine: xgboost
##
## Model fit template:
-## parsnip::xgb_train(x = missing_arg(), y = missing_arg(), colsample_bynode = integer(),
-## nrounds = integer(), min_child_weight = integer(), max_depth = integer(),
-## eta = numeric(), gamma = numeric(), subsample = numeric(),
-## early_stop = integer(), nthread = 1, verbose = 0)
+## parsnip::xgb_train(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
+## colsample_bynode = integer(), nrounds = integer(), min_child_weight = integer(),
+## max_depth = integer(), eta = numeric(), gamma = numeric(),
+## subsample = numeric(), early_stop = integer(), nthread = 1,
+## verbose = 0)
}\if{html}{\out{}}
\code{\link[=xgb_train]{xgb_train()}} is a wrapper around
diff --git a/man/details_decision_tree_C5.0.Rd b/man/details_decision_tree_C5.0.Rd
index ab1c04905..d52eeb99b 100644
--- a/man/details_decision_tree_C5.0.Rd
+++ b/man/details_decision_tree_C5.0.Rd
@@ -49,6 +49,16 @@ Categorical predictors can be partitioned into groups of factor levels
are not required for this model.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{Examples}{
The “Fitting and Predicting with parsnip” article contains
diff --git a/man/details_decision_tree_party.Rd b/man/details_decision_tree_party.Rd
new file mode 100644
index 000000000..45cfbe069
--- /dev/null
+++ b/man/details_decision_tree_party.Rd
@@ -0,0 +1,79 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/decision_tree_party.R
+\name{details_decision_tree_party}
+\alias{details_decision_tree_party}
+\title{Decision trees via party}
+\description{
+\code{\link[party:ctree]{party::ctree()}} fits a model as a set of if/then statements that creates a
+tree-based structure using hypothesis testing methods.
+}
+\details{
+For this engine, there is a single mode: censored regression
+\subsection{Tuning Parameters}{
+
+This model has 2 tuning parameters:
+\itemize{
+\item \code{tree_depth}: Tree Depth (type: integer, default: see below)
+\item \code{min_n}: Minimal Node Size (type: integer, default: 20L)
+}
+
+The \code{tree_depth} parameter defaults to \code{0} which means no restrictions
+are applied to tree depth.
+
+An engine-specific parameter for this model is:
+\itemize{
+\item \code{mtry}: the number of predictors, selected at random, that are
+evaluated for splitting. The default is to use all predictors.
+}
+}
+
+\subsection{Translation from parsnip to the original package (censored regression)}{
+
+The \strong{censored} extension package is required to fit this model.
+
+\if{html}{\out{
}}\preformatted{library(censored)
+
+decision_tree(tree_depth = integer(1), min_n = integer(1)) \%>\%
+ set_engine("party") \%>\%
+ set_mode("censored regression") \%>\%
+ translate()
+}\if{html}{\out{
}}
+
+\if{html}{\out{}}\preformatted{## Decision Tree Model Specification (censored regression)
+##
+## Main Arguments:
+## tree_depth = integer(1)
+## min_n = integer(1)
+##
+## Computational engine: party
+##
+## Model fit template:
+## censored::cond_inference_surv_ctree(formula = missing_arg(),
+## data = missing_arg(), maxdepth = integer(1), minsplit = min_rows(0L,
+## data))
+}\if{html}{\out{
}}
+
+\code{censored::cond_inference_surv_ctree()} is a wrapper around
+\code{\link[party:ctree]{party::ctree()}} (and other functions) that makes it
+easier to run this model.
+}
+
+\subsection{Preprocessing requirements}{
+
+This engine does not require any special encoding of the predictors.
+Categorical predictors can be partitioned into groups of factor levels
+(e.g. \verb{\{a, c\}} vs \verb{\{b, d\}}) when splitting at a node. Dummy variables
+are not required for this model.
+}
+
+\subsection{References}{
+\itemize{
+\item Hothorn T, Hornik K, Zeileis A. 2006. Unbiased Recursive
+Partitioning: A Conditional Inference Framework. \emph{Journal of
+Computational and Graphical Statistics}, 15(3), 651–674.
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_decision_tree_rpart.Rd b/man/details_decision_tree_rpart.Rd
index 153a1d381..bd6f831ef 100644
--- a/man/details_decision_tree_rpart.Rd
+++ b/man/details_decision_tree_rpart.Rd
@@ -109,6 +109,16 @@ Categorical predictors can be partitioned into groups of factor levels
are not required for this model.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{Examples}{
The “Fitting and Predicting with parsnip” article contains
diff --git a/man/details_decision_tree_spark.Rd b/man/details_decision_tree_spark.Rd
index 7731626c2..c1ab4a8ee 100644
--- a/man/details_decision_tree_spark.Rd
+++ b/man/details_decision_tree_spark.Rd
@@ -72,6 +72,19 @@ Categorical predictors can be partitioned into groups of factor levels
are not required for this model.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+
+Note that, for spark engines, the \code{case_weight} argument value should be
+a character string to specify the column with the numeric case weights.
+}
+
\subsection{Other details}{
For models created using the \code{"spark"} engine, there are several things
diff --git a/man/details_discrim_flexible_earth.Rd b/man/details_discrim_flexible_earth.Rd
index 903d47670..f319fc669 100644
--- a/man/details_discrim_flexible_earth.Rd
+++ b/man/details_discrim_flexible_earth.Rd
@@ -63,6 +63,16 @@ formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will
convert factor columns to indicators.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{References}{
\itemize{
\item Hastie, Tibshirani & Buja (1994) Flexible Discriminant Analysis by
diff --git a/man/details_discrim_linear_MASS.Rd b/man/details_discrim_linear_MASS.Rd
index 75ebc7140..704888d05 100644
--- a/man/details_discrim_linear_MASS.Rd
+++ b/man/details_discrim_linear_MASS.Rd
@@ -48,6 +48,11 @@ predictors (i.e., with a single unique value) should be eliminated
before fitting the model.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{References}{
\itemize{
\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
diff --git a/man/details_discrim_linear_mda.Rd b/man/details_discrim_linear_mda.Rd
index 38236ea17..b8e2ce229 100644
--- a/man/details_discrim_linear_mda.Rd
+++ b/man/details_discrim_linear_mda.Rd
@@ -54,6 +54,16 @@ predictors (i.e., with a single unique value) should be eliminated
before fitting the model.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{References}{
\itemize{
\item Hastie, Tibshirani & Buja (1994) Flexible Discriminant Analysis by
diff --git a/man/details_discrim_linear_sda.Rd b/man/details_discrim_linear_sda.Rd
index 53630060c..6c6444460 100644
--- a/man/details_discrim_linear_sda.Rd
+++ b/man/details_discrim_linear_sda.Rd
@@ -64,6 +64,11 @@ predictors (i.e., with a single unique value) should be eliminated
before fitting the model.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{References}{
\itemize{
\item Ahdesmaki, A., and K. Strimmer. 2010. Feature selection in omics
diff --git a/man/details_discrim_linear_sparsediscrim.Rd b/man/details_discrim_linear_sparsediscrim.Rd
index 9370f8382..fb0e1cbee 100644
--- a/man/details_discrim_linear_sparsediscrim.Rd
+++ b/man/details_discrim_linear_sparsediscrim.Rd
@@ -68,6 +68,11 @@ predictors (i.e., with a single unique value) should be eliminated
before fitting the model.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{References}{
\itemize{
\item \code{lda_diag()}: Dudoit, Fridlyand and Speed (2002) Comparison of
diff --git a/man/details_discrim_quad_MASS.Rd b/man/details_discrim_quad_MASS.Rd
index 36de36223..2b83b92c5 100644
--- a/man/details_discrim_quad_MASS.Rd
+++ b/man/details_discrim_quad_MASS.Rd
@@ -49,6 +49,11 @@ unique value) within each class should be eliminated before fitting the
model.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{References}{
\itemize{
\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
diff --git a/man/details_discrim_quad_sparsediscrim.Rd b/man/details_discrim_quad_sparsediscrim.Rd
index 445ca9a12..962ee6a8c 100644
--- a/man/details_discrim_quad_sparsediscrim.Rd
+++ b/man/details_discrim_quad_sparsediscrim.Rd
@@ -67,6 +67,11 @@ unique value) within each class should be eliminated before fitting the
model.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{References}{
\itemize{
\item \code{qda_diag()}: Dudoit, Fridlyand and Speed (2002) Comparison of
diff --git a/man/details_discrim_regularized_klaR.Rd b/man/details_discrim_regularized_klaR.Rd
index 0290e95d0..fb946b0d1 100644
--- a/man/details_discrim_regularized_klaR.Rd
+++ b/man/details_discrim_regularized_klaR.Rd
@@ -69,6 +69,11 @@ unique value) within each class should be eliminated before fitting the
model.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{References}{
\itemize{
\item Friedman, J (1989). Regularized Discriminant Analysis. \emph{Journal of
diff --git a/man/details_gen_additive_mod_mgcv.Rd b/man/details_gen_additive_mod_mgcv.Rd
index 818ab106a..4f1c0c5ae 100644
--- a/man/details_gen_additive_mod_mgcv.Rd
+++ b/man/details_gen_additive_mod_mgcv.Rd
@@ -103,6 +103,16 @@ formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will
convert factor columns to indicators.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{References}{
\itemize{
\item Ross, W. 2021. \href{https://noamross.github.io/gams-in-r-course/}{\emph{Generalized Additive Models in R: A Free, Interactive Course using mgcv}}
diff --git a/man/details_linear_reg_brulee.Rd b/man/details_linear_reg_brulee.Rd
index 1185efcec..82780893d 100644
--- a/man/details_linear_reg_brulee.Rd
+++ b/man/details_linear_reg_brulee.Rd
@@ -71,6 +71,11 @@ center and scale each so that each predictor has mean zero and a
variance of one.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{References}{
\itemize{
\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
diff --git a/man/details_linear_reg_gee.Rd b/man/details_linear_reg_gee.Rd
index 2a911b81a..69e493d8a 100644
--- a/man/details_linear_reg_gee.Rd
+++ b/man/details_linear_reg_gee.Rd
@@ -100,6 +100,11 @@ to \code{glm()} is needed to get the rank and QR decomposition objects so
that \code{predict()} can be used.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{References}{
\itemize{
\item Liang, K.Y. and Zeger, S.L. (1986) Longitudinal data analysis using
diff --git a/man/details_linear_reg_glm.Rd b/man/details_linear_reg_glm.Rd
index 0d6d27f74..3987a5c68 100644
--- a/man/details_linear_reg_glm.Rd
+++ b/man/details_linear_reg_glm.Rd
@@ -61,6 +61,26 @@ formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will
convert factor columns to indicators.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+
+\emph{However}, the documentation in \code{\link[stats:glm]{stats::glm()}} assumes
+that is specific type of case weights are being used:“Non-NULL weights
+can be used to indicate that different observations have different
+dispersions (with the values in weights being inversely proportional to
+the dispersions); or equivalently, when the elements of weights are
+positive integers \code{w_i}, that each response \code{y_i} is the mean of \code{w_i}
+unit-weight observations. For a binomial GLM prior weights are used to
+give the number of trials when the response is the proportion of
+successes: they would rarely be used for a Poisson GLM.”
+}
+
\subsection{Examples}{
The “Fitting and Predicting with parsnip” article contains
diff --git a/man/details_linear_reg_glmnet.Rd b/man/details_linear_reg_glmnet.Rd
index 4f5d04b96..5f6513ec5 100644
--- a/man/details_linear_reg_glmnet.Rd
+++ b/man/details_linear_reg_glmnet.Rd
@@ -59,6 +59,16 @@ variance of one. By default, \code{\link[glmnet:glmnet]{glmnet::glmnet()}} uses
the argument \code{standardize = TRUE} to center and scale the data.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{Examples}{
The “Fitting and Predicting with parsnip” article contains
diff --git a/man/details_linear_reg_gls.Rd b/man/details_linear_reg_gls.Rd
index a1a828da1..e6135b979 100644
--- a/man/details_linear_reg_gls.Rd
+++ b/man/details_linear_reg_gls.Rd
@@ -192,6 +192,11 @@ gls_fit \%>\% tidy()
## 1 (Intercept) -4.95 0.808 -6.13 3.50e- 9
## 2 week -2.12 0.224 -9.47 2.26e-18
}\if{html}{\out{}}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{References}{
\itemize{
\item J Pinheiro, and D Bates. 2000. \emph{Mixed-effects models in S and
diff --git a/man/details_linear_reg_keras.Rd b/man/details_linear_reg_keras.Rd
index b467794c1..bbef52386 100644
--- a/man/details_linear_reg_keras.Rd
+++ b/man/details_linear_reg_keras.Rd
@@ -55,6 +55,11 @@ center and scale each so that each predictor has mean zero and a
variance of one.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{Examples}{
The “Fitting and Predicting with parsnip” article contains
diff --git a/man/details_linear_reg_lm.Rd b/man/details_linear_reg_lm.Rd
index 165e7f0d3..515f955ea 100644
--- a/man/details_linear_reg_lm.Rd
+++ b/man/details_linear_reg_lm.Rd
@@ -37,6 +37,33 @@ formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will
convert factor columns to indicators.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+
+\emph{However}, the documentation in \code{\link[stats:lm]{stats::lm()}} assumes
+that is specific type of case weights are being used: “Non-NULL weights
+can be used to indicate that different observations have different
+variances (with the values in weights being inversely proportional to
+the variances); or equivalently, when the elements of weights are
+positive integers \code{w_i}, that each response \code{y_i} is the mean of \code{w_i}
+unit-weight observations (including the case that there are w_i
+observations equal to \code{y_i} and the data have been summarized). However,
+in the latter case, notice that within-group variation is not used.
+Therefore, the sigma estimate and residual degrees of freedom may be
+suboptimal; in the case of replication weights, \strong{even wrong}. Hence,
+standard errors and analysis of variance tables should be treated with
+care” (emphasis added)
+
+Depending on your application, the degrees of freedown for the model
+(and other statistics) might be incorrect.
+}
+
\subsection{Examples}{
The “Fitting and Predicting with parsnip” article contains
diff --git a/man/details_linear_reg_lme.Rd b/man/details_linear_reg_lme.Rd
index 472fbc87f..c76854041 100644
--- a/man/details_linear_reg_lme.Rd
+++ b/man/details_linear_reg_lme.Rd
@@ -108,6 +108,11 @@ fit(lme_wflow, data = riesby)
}\if{html}{\out{}}
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{References}{
\itemize{
\item J Pinheiro, and D Bates. 2000. \emph{Mixed-effects models in S and
diff --git a/man/details_linear_reg_lmer.Rd b/man/details_linear_reg_lmer.Rd
index 82b085f40..2130a8d5b 100644
--- a/man/details_linear_reg_lmer.Rd
+++ b/man/details_linear_reg_lmer.Rd
@@ -106,6 +106,16 @@ fit(lmer_wflow, data = riesby)
}\if{html}{\out{}}
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{References}{
\itemize{
\item J Pinheiro, and D Bates. 2000. \emph{Mixed-effects models in S and
diff --git a/man/details_linear_reg_spark.Rd b/man/details_linear_reg_spark.Rd
index ae418054f..776e48549 100644
--- a/man/details_linear_reg_spark.Rd
+++ b/man/details_linear_reg_spark.Rd
@@ -18,14 +18,10 @@ This model has 2 tuning parameters:
}
For \code{penalty}, the amount of regularization includes both the L1 penalty
-(i.e., lasso) and the L2 penalty (i.e., ridge or weight decay). As for
-\code{mixture}:
-\itemize{
-\item \code{mixture = 1} specifies a pure lasso model,
-\item \code{mixture = 0} specifies a ridge regression model, and
-\item \verb{0 < mixture < 1} specifies an elastic net model, interpolating
-lasso and ridge.
-}
+(i.e., lasso) and the L2 penalty (i.e., ridge or weight decay).
+
+A value of \code{mixture = 1} corresponds to a pure lasso model, while
+\code{mixture = 0} indicates ridge regression.
}
\subsection{Translation from parsnip to the original package}{
@@ -45,7 +41,7 @@ lasso and ridge.
##
## Model fit template:
## sparklyr::ml_linear_regression(x = missing_arg(), formula = missing_arg(),
-## weight_col = missing_arg(), reg_param = double(1), elastic_net_param = double(1))
+## weights = missing_arg(), reg_param = double(1), elastic_net_param = double(1))
}\if{html}{\out{}}
}
@@ -53,15 +49,30 @@ lasso and ridge.
Factor/categorical predictors need to be converted to numeric values
(e.g., dummy or indicator variables) for this engine. When using the
-formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will
-convert factor columns to indicators.
+formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip
+will convert factor columns to indicators.
Predictors should have the same scale. One way to achieve this is to
center and scale each so that each predictor has mean zero and a
-variance of one. By default, \code{ml_linear_regression()} uses the argument
+variance of one.
+
+By default, \code{ml_linear_regression()} uses the argument
\code{standardization = TRUE} to center and scale the data.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+
+Note that, for spark engines, the \code{case_weight} argument value should be
+a character string to specify the column with the numeric case weights.
+}
+
\subsection{Other details}{
For models created using the \code{"spark"} engine, there are several things
diff --git a/man/details_linear_reg_stan.Rd b/man/details_linear_reg_stan.Rd
index b92fa5dac..98652904f 100644
--- a/man/details_linear_reg_stan.Rd
+++ b/man/details_linear_reg_stan.Rd
@@ -74,6 +74,16 @@ standard deviation of the posterior distribution (or posterior
predictive distribution as appropriate) is returned.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{Examples}{
The “Fitting and Predicting with parsnip” article contains
diff --git a/man/details_linear_reg_stan_glmer.Rd b/man/details_linear_reg_stan_glmer.Rd
index 54d39f4ba..7ddad52cd 100644
--- a/man/details_linear_reg_stan_glmer.Rd
+++ b/man/details_linear_reg_stan_glmer.Rd
@@ -133,6 +133,16 @@ the standard deviation of the posterior distribution (or posterior
predictive distribution as appropriate) is returned.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{References}{
\itemize{
\item McElreath, R. 2020 \emph{Statistical Rethinking}. CRC Press.
diff --git a/man/details_logistic_reg_LiblineaR.Rd b/man/details_logistic_reg_LiblineaR.Rd
index 7cc0da26b..94245330c 100644
--- a/man/details_logistic_reg_LiblineaR.Rd
+++ b/man/details_logistic_reg_LiblineaR.Rd
@@ -45,8 +45,8 @@ parameter estimates.
## Computational engine: LiblineaR
##
## Model fit template:
-## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(),
-## cost = Inf, type = double(1), verbose = FALSE)
+## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), cost = Inf,
+## type = double(1), verbose = FALSE)
}\if{html}{\out{}}
}
@@ -54,8 +54,8 @@ parameter estimates.
Factor/categorical predictors need to be converted to numeric values
(e.g., dummy or indicator variables) for this engine. When using the
-formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will
-convert factor columns to indicators.
+formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip
+will convert factor columns to indicators.
Predictors should have the same scale. One way to achieve this is to
center and scale each so that each predictor has mean zero and a
diff --git a/man/details_logistic_reg_brulee.Rd b/man/details_logistic_reg_brulee.Rd
index d19a8d46a..0a688aa16 100644
--- a/man/details_logistic_reg_brulee.Rd
+++ b/man/details_logistic_reg_brulee.Rd
@@ -71,6 +71,11 @@ center and scale each so that each predictor has mean zero and a
variance of one.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{References}{
\itemize{
\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
diff --git a/man/details_logistic_reg_gee.Rd b/man/details_logistic_reg_gee.Rd
index 4b2137823..df4b6db1e 100644
--- a/man/details_logistic_reg_gee.Rd
+++ b/man/details_logistic_reg_gee.Rd
@@ -100,6 +100,11 @@ to \code{glm()} is needed to get the rank and QR decomposition objects so
that \code{predict()} can be used.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{References}{
\itemize{
\item Liang, K.Y. and Zeger, S.L. (1986) Longitudinal data analysis using
diff --git a/man/details_logistic_reg_glm.Rd b/man/details_logistic_reg_glm.Rd
index 57cfedea6..5f2aa99b1 100644
--- a/man/details_logistic_reg_glm.Rd
+++ b/man/details_logistic_reg_glm.Rd
@@ -61,6 +61,26 @@ formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will
convert factor columns to indicators.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+
+\emph{However}, the documentation in \code{\link[stats:glm]{stats::glm()}} assumes
+that is specific type of case weights are being used:“Non-NULL weights
+can be used to indicate that different observations have different
+dispersions (with the values in weights being inversely proportional to
+the dispersions); or equivalently, when the elements of weights are
+positive integers \code{w_i}, that each response \code{y_i} is the mean of \code{w_i}
+unit-weight observations. For a binomial GLM prior weights are used to
+give the number of trials when the response is the proportion of
+successes: they would rarely be used for a Poisson GLM.”
+}
+
\subsection{Examples}{
The “Fitting and Predicting with parsnip” article contains
diff --git a/man/details_logistic_reg_glmer.Rd b/man/details_logistic_reg_glmer.Rd
index 335957c85..127ce2748 100644
--- a/man/details_logistic_reg_glmer.Rd
+++ b/man/details_logistic_reg_glmer.Rd
@@ -106,6 +106,16 @@ fit(glmer_wflow, data = toenail)
}\if{html}{\out{}}
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{References}{
\itemize{
\item J Pinheiro, and D Bates. 2000. \emph{Mixed-effects models in S and
diff --git a/man/details_logistic_reg_glmnet.Rd b/man/details_logistic_reg_glmnet.Rd
index a311ad067..8793c642f 100644
--- a/man/details_logistic_reg_glmnet.Rd
+++ b/man/details_logistic_reg_glmnet.Rd
@@ -64,6 +64,16 @@ variance of one. By default, \code{\link[glmnet:glmnet]{glmnet::glmnet()}} uses
the argument \code{standardize = TRUE} to center and scale the data.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{Examples}{
The “Fitting and Predicting with parsnip” article contains
diff --git a/man/details_logistic_reg_keras.Rd b/man/details_logistic_reg_keras.Rd
index 4576d9f2e..78c31e115 100644
--- a/man/details_logistic_reg_keras.Rd
+++ b/man/details_logistic_reg_keras.Rd
@@ -57,6 +57,11 @@ center and scale each so that each predictor has mean zero and a
variance of one.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{Examples}{
The “Fitting and Predicting with parsnip” article contains
diff --git a/man/details_logistic_reg_spark.Rd b/man/details_logistic_reg_spark.Rd
index 6dbee05e8..275b5155d 100644
--- a/man/details_logistic_reg_spark.Rd
+++ b/man/details_logistic_reg_spark.Rd
@@ -46,7 +46,7 @@ lasso and ridge.
##
## Model fit template:
## sparklyr::ml_logistic_regression(x = missing_arg(), formula = missing_arg(),
-## weight_col = missing_arg(), reg_param = double(1), elastic_net_param = double(1),
+## weights = missing_arg(), reg_param = double(1), elastic_net_param = double(1),
## family = "binomial")
}\if{html}{\out{}}
}
@@ -60,8 +60,23 @@ convert factor columns to indicators.
Predictors should have the same scale. One way to achieve this is to
center and scale each so that each predictor has mean zero and a
-variance of one. By default, \code{ml_logistic_regression()} uses the
-argument \code{standardization = TRUE} to center and scale the data.
+variance of one.
+
+By default, \code{ml_logistic_regression()} uses the argument
+\code{standardization = TRUE} to center and scale the data.
+}
+
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+
+Note that, for spark engines, the \code{case_weight} argument value should be
+a character string to specify the column with the numeric case weights.
}
\subsection{Other details}{
diff --git a/man/details_logistic_reg_stan.Rd b/man/details_logistic_reg_stan.Rd
index 281185d4a..a4268b0ef 100644
--- a/man/details_logistic_reg_stan.Rd
+++ b/man/details_logistic_reg_stan.Rd
@@ -75,6 +75,16 @@ standard deviation of the posterior distribution (or posterior
predictive distribution as appropriate) is returned.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{Examples}{
The “Fitting and Predicting with parsnip” article contains
diff --git a/man/details_logistic_reg_stan_glmer.Rd b/man/details_logistic_reg_stan_glmer.Rd
index bcbfd5d58..5e0323c44 100644
--- a/man/details_logistic_reg_stan_glmer.Rd
+++ b/man/details_logistic_reg_stan_glmer.Rd
@@ -132,6 +132,16 @@ the standard deviation of the posterior distribution (or posterior
predictive distribution as appropriate) is returned.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{References}{
\itemize{
\item McElreath, R. 2020 \emph{Statistical Rethinking}. CRC Press.
diff --git a/man/details_mars_earth.Rd b/man/details_mars_earth.Rd
index 78686e205..598add896 100644
--- a/man/details_mars_earth.Rd
+++ b/man/details_mars_earth.Rd
@@ -88,6 +88,16 @@ formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will
convert factor columns to indicators.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{Examples}{
The “Fitting and Predicting with parsnip” article contains
diff --git a/man/details_mlp_brulee.Rd b/man/details_mlp_brulee.Rd
index 8866058f4..741569ee0 100644
--- a/man/details_mlp_brulee.Rd
+++ b/man/details_mlp_brulee.Rd
@@ -124,6 +124,11 @@ center and scale each so that each predictor has mean zero and a
variance of one.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{References}{
\itemize{
\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
diff --git a/man/details_mlp_keras.Rd b/man/details_mlp_keras.Rd
index 8b110c03f..ae7a18979 100644
--- a/man/details_mlp_keras.Rd
+++ b/man/details_mlp_keras.Rd
@@ -97,6 +97,11 @@ center and scale each so that each predictor has mean zero and a
variance of one.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{Examples}{
The “Fitting and Predicting with parsnip” article contains
diff --git a/man/details_mlp_nnet.Rd b/man/details_mlp_nnet.Rd
index 538ae3662..e545b0f46 100644
--- a/man/details_mlp_nnet.Rd
+++ b/man/details_mlp_nnet.Rd
@@ -45,9 +45,8 @@ some models, you may need to pass this value in via
## Computational engine: nnet
##
## Model fit template:
-## nnet::nnet(formula = missing_arg(), data = missing_arg(), weights = missing_arg(),
-## size = integer(1), decay = double(1), maxit = integer(1),
-## trace = FALSE, linout = TRUE)
+## nnet::nnet(formula = missing_arg(), data = missing_arg(), size = integer(1),
+## decay = double(1), maxit = integer(1), trace = FALSE, linout = TRUE)
}\if{html}{\out{}}
Note that parsnip automatically sets linear activation in the last
@@ -76,9 +75,8 @@ layer.
## Computational engine: nnet
##
## Model fit template:
-## nnet::nnet(formula = missing_arg(), data = missing_arg(), weights = missing_arg(),
-## size = integer(1), decay = double(1), maxit = integer(1),
-## trace = FALSE, linout = FALSE)
+## nnet::nnet(formula = missing_arg(), data = missing_arg(), size = integer(1),
+## decay = double(1), maxit = integer(1), trace = FALSE, linout = FALSE)
}\if{html}{\out{}}
}
@@ -86,14 +84,19 @@ layer.
Factor/categorical predictors need to be converted to numeric values
(e.g., dummy or indicator variables) for this engine. When using the
-formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will
-convert factor columns to indicators.
+formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip
+will convert factor columns to indicators.
Predictors should have the same scale. One way to achieve this is to
center and scale each so that each predictor has mean zero and a
variance of one.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{Examples}{
The “Fitting and Predicting with parsnip” article contains
diff --git a/man/details_multinom_reg_brulee.Rd b/man/details_multinom_reg_brulee.Rd
index d47f034df..5285ca592 100644
--- a/man/details_multinom_reg_brulee.Rd
+++ b/man/details_multinom_reg_brulee.Rd
@@ -70,6 +70,11 @@ center and scale each so that each predictor has mean zero and a
variance of one.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{References}{
\itemize{
\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
diff --git a/man/details_multinom_reg_glmnet.Rd b/man/details_multinom_reg_glmnet.Rd
index 9253bab19..b51a7ca50 100644
--- a/man/details_multinom_reg_glmnet.Rd
+++ b/man/details_multinom_reg_glmnet.Rd
@@ -70,6 +70,16 @@ The “Fitting and Predicting with parsnip” article contains
for \code{multinom_reg()} with the \code{"glmnet"} engine.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{References}{
\itemize{
\item Hastie, T, R Tibshirani, and M Wainwright. 2015. \emph{Statistical
diff --git a/man/details_multinom_reg_keras.Rd b/man/details_multinom_reg_keras.Rd
index 203650ea8..f2e8d00d5 100644
--- a/man/details_multinom_reg_keras.Rd
+++ b/man/details_multinom_reg_keras.Rd
@@ -56,6 +56,11 @@ center and scale each so that each predictor has mean zero and a
variance of one.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{Examples}{
The “Fitting and Predicting with parsnip” article contains
diff --git a/man/details_multinom_reg_nnet.Rd b/man/details_multinom_reg_nnet.Rd
index 5bf7ea518..51a01c386 100644
--- a/man/details_multinom_reg_nnet.Rd
+++ b/man/details_multinom_reg_nnet.Rd
@@ -36,7 +36,7 @@ For \code{penalty}, the amount of regularization includes only the L2 penalty
##
## Model fit template:
## nnet::multinom(formula = missing_arg(), data = missing_arg(),
-## weights = missing_arg(), decay = double(1), trace = FALSE)
+## decay = double(1), trace = FALSE)
}\if{html}{\out{}}
}
@@ -44,8 +44,8 @@ For \code{penalty}, the amount of regularization includes only the L2 penalty
Factor/categorical predictors need to be converted to numeric values
(e.g., dummy or indicator variables) for this engine. When using the
-formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will
-convert factor columns to indicators.
+formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip
+will convert factor columns to indicators.
Predictors should have the same scale. One way to achieve this is to
center and scale each so that each predictor has mean zero and a
@@ -59,6 +59,11 @@ The “Fitting and Predicting with parsnip” article contains
for \code{multinom_reg()} with the \code{"nnet"} engine.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{References}{
\itemize{
\item Luraschi, J, K Kuo, and E Ruiz. 2019. \emph{Mastering nnet with R}.
diff --git a/man/details_multinom_reg_spark.Rd b/man/details_multinom_reg_spark.Rd
index 8c627c245..ff7f9a1e7 100644
--- a/man/details_multinom_reg_spark.Rd
+++ b/man/details_multinom_reg_spark.Rd
@@ -45,7 +45,7 @@ lasso and ridge.
##
## Model fit template:
## sparklyr::ml_logistic_regression(x = missing_arg(), formula = missing_arg(),
-## weight_col = missing_arg(), reg_param = double(1), elastic_net_param = double(1),
+## weights = missing_arg(), reg_param = double(1), elastic_net_param = double(1),
## family = "multinomial")
}\if{html}{\out{}}
}
@@ -59,8 +59,23 @@ convert factor columns to indicators.
Predictors should have the same scale. One way to achieve this is to
center and scale each so that each predictor has mean zero and a
-variance of one. By default, \code{ml_multinom_regression()} uses the
-argument \code{standardization = TRUE} to center and scale the data.
+variance of one.
+
+By default, \code{ml_multinom_regression()} uses the argument
+\code{standardization = TRUE} to center and scale the data.
+}
+
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+
+Note that, for spark engines, the \code{case_weight} argument value should be
+a character string to specify the column with the numeric case weights.
}
\subsection{Other details}{
diff --git a/man/details_naive_Bayes_klaR.Rd b/man/details_naive_Bayes_klaR.Rd
index d2542bcf0..9fa60decc 100644
--- a/man/details_naive_Bayes_klaR.Rd
+++ b/man/details_naive_Bayes_klaR.Rd
@@ -58,6 +58,11 @@ predictors (i.e., with a single unique value) should be eliminated
before fitting the model.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{References}{
\itemize{
\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
diff --git a/man/details_naive_Bayes_naivebayes.Rd b/man/details_naive_Bayes_naivebayes.Rd
index 5851999c7..ad99f199e 100644
--- a/man/details_naive_Bayes_naivebayes.Rd
+++ b/man/details_naive_Bayes_naivebayes.Rd
@@ -61,6 +61,11 @@ predictors (i.e., with a single unique value) should be eliminated
before fitting the model.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{References}{
\itemize{
\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
diff --git a/man/details_nearest_neighbor_kknn.Rd b/man/details_nearest_neighbor_kknn.Rd
index 01f57c468..64911693e 100644
--- a/man/details_nearest_neighbor_kknn.Rd
+++ b/man/details_nearest_neighbor_kknn.Rd
@@ -96,6 +96,11 @@ The “Fitting and Predicting with parsnip” article contains
for \code{nearest_neighbor()} with the \code{"kknn"} engine.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{References}{
\itemize{
\item Hechenbichler K. and Schliep K.P. (2004) \href{https://epub.ub.uni-muenchen.de/1769/}{Weighted k-Nearest-Neighbor Techniques and Ordinal Classification}, Discussion
diff --git a/man/details_pls_mixOmics.Rd b/man/details_pls_mixOmics.Rd
index 4ef7fb7da..fd7a156a7 100644
--- a/man/details_pls_mixOmics.Rd
+++ b/man/details_pls_mixOmics.Rd
@@ -114,6 +114,11 @@ center and scale each so that each predictor has mean zero and a
variance of one.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{References}{
\itemize{
\item Rohart F and Gautier B and Singh A and Le Cao K-A (2017). “mixOmics:
diff --git a/man/details_poisson_reg_gee.Rd b/man/details_poisson_reg_gee.Rd
index 9f627f05c..e624d36e6 100644
--- a/man/details_poisson_reg_gee.Rd
+++ b/man/details_poisson_reg_gee.Rd
@@ -49,9 +49,12 @@ keep the clustering/subject identifier column as factor or character
next section.
}
-\subsection{Other details}{
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
-The model cannot accept case weights.
+\subsection{Other details}{
Both \code{gee:gee()} and \code{gee:geepack()} specify the id/cluster variable
using an argument \code{id} that requires a vector. parsnip doesn’t work that
diff --git a/man/details_poisson_reg_glm.Rd b/man/details_poisson_reg_glm.Rd
index 604e1b600..9f9c3d561 100644
--- a/man/details_poisson_reg_glm.Rd
+++ b/man/details_poisson_reg_glm.Rd
@@ -41,5 +41,39 @@ Factor/categorical predictors need to be converted to numeric values
formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will
convert factor columns to indicators.
}
+
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+
+\emph{However}, the documentation in \code{\link[stats:glm]{stats::glm()}} assumes
+that is specific type of case weights are being used:“Non-NULL weights
+can be used to indicate that different observations have different
+dispersions (with the values in weights being inversely proportional to
+the dispersions); or equivalently, when the elements of weights are
+positive integers \code{w_i}, that each response \code{y_i} is the mean of \code{w_i}
+unit-weight observations. For a binomial GLM prior weights are used to
+give the number of trials when the response is the proportion of
+successes: they would rarely be used for a Poisson GLM.”
+
+If frequency weights are being used in your application, the
+\code{\link[=glm_grouped]{glm_grouped()}} model (and corresponding engine) may be
+more appropriate.
+}
}
\keyword{internal}
diff --git a/man/details_poisson_reg_glmer.Rd b/man/details_poisson_reg_glmer.Rd
index fc4f469cd..f3258bb5a 100644
--- a/man/details_poisson_reg_glmer.Rd
+++ b/man/details_poisson_reg_glmer.Rd
@@ -105,6 +105,16 @@ fit(glmer_wflow, data = longitudinal_counts)
}\if{html}{\out{}}
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{References}{
\itemize{
\item J Pinheiro, and D Bates. 2000. \emph{Mixed-effects models in S and
diff --git a/man/details_poisson_reg_glmnet.Rd b/man/details_poisson_reg_glmnet.Rd
index 90dffa3ed..1c621d0b1 100644
--- a/man/details_poisson_reg_glmnet.Rd
+++ b/man/details_poisson_reg_glmnet.Rd
@@ -66,5 +66,15 @@ center and scale each so that each predictor has mean zero and a
variance of one. By default, \code{glmnet::glmnet()} uses the argument
\code{standardize = TRUE} to center and scale the data.
}
+
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
}
\keyword{internal}
diff --git a/man/details_poisson_reg_hurdle.Rd b/man/details_poisson_reg_hurdle.Rd
index 0b6588aac..7024a964d 100644
--- a/man/details_poisson_reg_hurdle.Rd
+++ b/man/details_poisson_reg_hurdle.Rd
@@ -118,5 +118,15 @@ The reason for this is that
create the model matrix and either fail or create dummy variables
prematurely.
}
+
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
}
\keyword{internal}
diff --git a/man/details_poisson_reg_stan.Rd b/man/details_poisson_reg_stan.Rd
index 42792d8f4..9c5084970 100644
--- a/man/details_poisson_reg_stan.Rd
+++ b/man/details_poisson_reg_stan.Rd
@@ -78,6 +78,16 @@ standard deviation of the posterior distribution (or posterior
predictive distribution as appropriate) is returned.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{Examples}{
The “Fitting and Predicting with parsnip” article contains
diff --git a/man/details_poisson_reg_stan_glmer.Rd b/man/details_poisson_reg_stan_glmer.Rd
index c36f736e6..e87b57af6 100644
--- a/man/details_poisson_reg_stan_glmer.Rd
+++ b/man/details_poisson_reg_stan_glmer.Rd
@@ -131,6 +131,16 @@ the standard deviation of the posterior distribution (or posterior
predictive distribution as appropriate) is returned.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{References}{
\itemize{
\item McElreath, R. 2020 \emph{Statistical Rethinking}. CRC Press.
diff --git a/man/details_poisson_reg_zeroinfl.Rd b/man/details_poisson_reg_zeroinfl.Rd
index 8c1b963aa..0289c749a 100644
--- a/man/details_poisson_reg_zeroinfl.Rd
+++ b/man/details_poisson_reg_zeroinfl.Rd
@@ -119,5 +119,15 @@ The reason for this is that
create the model matrix and either fail or create dummy variables
prematurely.
}
+
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
}
\keyword{internal}
diff --git a/man/details_proportional_hazards_survival.Rd b/man/details_proportional_hazards_survival.Rd
index c8e5bc0ac..8a5b1306f 100644
--- a/man/details_proportional_hazards_survival.Rd
+++ b/man/details_proportional_hazards_survival.Rd
@@ -111,6 +111,16 @@ value produced by the \code{predict()} method in the engine package.
This behavior can be changed by using the \code{increasing} argument when
calling \code{predict()} on a model object.
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{References}{
\itemize{
\item Andersen P, Gill R. 1982. Cox’s regression model for counting
diff --git a/man/details_rand_forest_party.Rd b/man/details_rand_forest_party.Rd
new file mode 100644
index 000000000..b1786e02b
--- /dev/null
+++ b/man/details_rand_forest_party.Rd
@@ -0,0 +1,73 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rand_forest_party.R
+\name{details_rand_forest_party}
+\alias{details_rand_forest_party}
+\title{Random forests via party}
+\description{
+\code{\link[party:cforest]{party::cforest()}} fits a model that creates a large number of decision
+trees, each independent of the others. The final prediction uses all
+predictions from the individual trees and combines them.
+}
+\details{
+For this engine, there is a single mode: censored regression
+\subsection{Tuning Parameters}{
+
+This model has 3 tuning parameters:
+\itemize{
+\item \code{trees}: # Trees (type: integer, default: 500L)
+\item \code{min_n}: Minimal Node Size (type: integer, default: 20L)
+\item \code{mtry}: # Randomly Selected Predictors (type: integer, default: 5L)
+}
+}
+
+\subsection{Translation from parsnip to the original package (censored regression)}{
+
+The \strong{censored} extension package is required to fit this model.
+
+\if{html}{\out{}}\preformatted{library(censored)
+
+rand_forest() \%>\%
+ set_engine("party") \%>\%
+ set_mode("censored regression") \%>\%
+ translate()
+}\if{html}{\out{
}}
+
+\if{html}{\out{}}\preformatted{## Random Forest Model Specification (censored regression)
+##
+## Computational engine: party
+##
+## Model fit template:
+## censored::cond_inference_surv_cforest(formula = missing_arg(),
+## data = missing_arg())
+}\if{html}{\out{
}}
+
+\code{censored::cond_inference_surv_cforest()} is a wrapper around
+\code{\link[party:cforest]{party::cforest()}} (and other functions) that makes
+it easier to run this model.
+}
+
+\subsection{Preprocessing requirements}{
+
+This engine does not require any special encoding of the predictors.
+Categorical predictors can be partitioned into groups of factor levels
+(e.g. \verb{\{a, c\}} vs \verb{\{b, d\}}) when splitting at a node. Dummy variables
+are not required for this model.
+}
+
+\subsection{Other details}{
+
+The main interface for this model uses the formula method since the
+model specification typically involved the use of
+\code{\link[survival:Surv]{survival::Surv()}}.
+}
+
+\subsection{References}{
+\itemize{
+\item Hothorn T, Buhlmann P, Dudoit S, Molinaro A, Van der Laan MJ. 2006.
+Survival Ensembles. \emph{Biostatistics}, 7(3), 355–373.
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_rand_forest_ranger.Rd b/man/details_rand_forest_ranger.Rd
index 7484b34bf..e3cefb47e 100644
--- a/man/details_rand_forest_ranger.Rd
+++ b/man/details_rand_forest_ranger.Rd
@@ -49,7 +49,7 @@ default. For classification, a value of 10 is used.
## Computational engine: ranger
##
## Model fit template:
-## ranger::ranger(x = missing_arg(), y = missing_arg(), case.weights = missing_arg(),
+## ranger::ranger(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
## mtry = min_cols(~integer(1), x), num.trees = integer(1),
## min.node.size = min_rows(~integer(1), x), num.threads = 1,
## verbose = FALSE, seed = sample.int(10^5, 1))
@@ -81,7 +81,7 @@ chosen value if it is not consistent with the actual data dimensions.
## Computational engine: ranger
##
## Model fit template:
-## ranger::ranger(x = missing_arg(), y = missing_arg(), case.weights = missing_arg(),
+## ranger::ranger(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
## mtry = min_cols(~integer(1), x), num.trees = integer(1),
## min.node.size = min_rows(~integer(1), x), num.threads = 1,
## verbose = FALSE, seed = sample.int(10^5, 1), probability = TRUE)
@@ -113,6 +113,16 @@ these values can fall outside of \verb{[0, 1]} and will be coerced to be in
this range.
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{Examples}{
The “Fitting and Predicting with parsnip” article contains
diff --git a/man/details_rand_forest_spark.Rd b/man/details_rand_forest_spark.Rd
index 30625f513..4a791bb6b 100644
--- a/man/details_rand_forest_spark.Rd
+++ b/man/details_rand_forest_spark.Rd
@@ -114,6 +114,19 @@ object.
}
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+
+Note that, for spark engines, the \code{case_weight} argument value should be
+a character string to specify the column with the numeric case weights.
+}
+
\subsection{References}{
\itemize{
\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
diff --git a/man/details_rule_fit_xrf.Rd b/man/details_rule_fit_xrf.Rd
index 07bb0c442..4f2d278b9 100644
--- a/man/details_rule_fit_xrf.Rd
+++ b/man/details_rule_fit_xrf.Rd
@@ -64,7 +64,7 @@ rule_fit(
## Computational engine: xrf
##
## Model fit template:
-## rules::xrf_fit(formula = missing_arg(), data = missing_arg(),
+## rules::xrf_fit(object = missing_arg(), data = missing_arg(),
## colsample_bytree = numeric(1), nrounds = integer(1), min_child_weight = integer(1),
## max_depth = integer(1), eta = numeric(1), gamma = numeric(1),
## subsample = numeric(1), lambda = numeric(1))
@@ -107,7 +107,7 @@ rule_fit(
## Computational engine: xrf
##
## Model fit template:
-## rules::xrf_fit(formula = missing_arg(), data = missing_arg(),
+## rules::xrf_fit(object = missing_arg(), data = missing_arg(),
## colsample_bytree = numeric(1), nrounds = integer(1), min_child_weight = integer(1),
## max_depth = integer(1), eta = numeric(1), gamma = numeric(1),
## subsample = numeric(1), lambda = numeric(1))
@@ -135,7 +135,6 @@ whereas \strong{xrf} uses an internal 5-fold cross-validation to determine it
(by default).
}
-\subsection{Other details}{
\subsection{Preprocessing requirements}{
Factor/categorical predictors need to be converted to numeric values
@@ -144,37 +143,9 @@ formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will
convert factor columns to indicators.
}
-\subsection{Interpreting \code{mtry}}{
-
-The \code{mtry} argument denotes the number of predictors that will be
-randomly sampled at each split when creating tree models.
-
-Some engines, such as \code{"xgboost"}, \code{"xrf"}, and \code{"lightgbm"}, interpret
-their analogue to the \code{mtry} argument as the \emph{proportion} of predictors
-that will be randomly sampled at each split rather than the \emph{count}. In
-some settings, such as when tuning over preprocessors that influence the
-number of predictors, this parameterization is quite
-helpful—interpreting \code{mtry} as a proportion means that [0,1] is always
-a valid range for that parameter, regardless of input data.
-
-parsnip and its extensions accommodate this parameterization using the
-\code{counts} argument: a logical indicating whether \code{mtry} should be
-interpreted as the number of predictors that will be randomly sampled at
-each split. \code{TRUE} indicates that \code{mtry} will be interpreted in its
-sense as a count, \code{FALSE} indicates that the argument will be
-interpreted in its sense as a proportion.
-
-\code{mtry} is a main model argument for
-\code{\link[=boost_tree]{boost_tree()}} and
-\code{\link[=rand_forest]{rand_forest()}}, and thus should not have an
-engine-specific interface. So, regardless of engine, \code{counts} defaults
-to \code{TRUE}. For engines that support the proportion
-interpretation—currently \code{"xgboost"}, \code{"xrf"} (via the rules package),
-and \code{"lightgbm"} (via the bonsai package)—the user can pass the
-\code{counts = FALSE} argument to \code{set_engine()} to supply \code{mtry} values
-within [0,1].
-}
+\subsection{Case weights}{
+The underlying model implementation does not allow for case weights.
}
\subsection{References}{
diff --git a/man/details_survival_reg_survival.Rd b/man/details_survival_reg_survival.Rd
index 1e162298a..02d130f27 100644
--- a/man/details_survival_reg_survival.Rd
+++ b/man/details_survival_reg_survival.Rd
@@ -84,6 +84,16 @@ survival_reg() \%>\%
}\if{html}{\out{}}
}
+\subsection{Case weights}{
+
+This model can utilize case weights during model fitting. To use them,
+see the documentation in \link{case_weights} and the examples
+on \code{tidymodels.org}.
+
+The \code{fit()} and \code{fit_xy()} arguments have arguments called
+\code{case_weights} that expect vectors of case weights.
+}
+
\subsection{References}{
\itemize{
\item Kalbfleisch, J. D. and Prentice, R. L. 2002 \emph{The statistical
diff --git a/man/details_svm_linear_LiblineaR.Rd b/man/details_svm_linear_LiblineaR.Rd
index 1591d0e0b..2e7c70f81 100644
--- a/man/details_svm_linear_LiblineaR.Rd
+++ b/man/details_svm_linear_LiblineaR.Rd
@@ -44,8 +44,8 @@ are types 1 (classification) and 11 (regression).
## Computational engine: LiblineaR
##
## Model fit template:
-## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(),
-## C = double(1), svr_eps = double(1), type = 11)
+## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), C = double(1),
+## svr_eps = double(1), type = 11)
}\if{html}{\out{}}
}
@@ -67,8 +67,8 @@ are types 1 (classification) and 11 (regression).
## Computational engine: LiblineaR
##
## Model fit template:
-## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(),
-## C = double(1), type = 1)
+## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), C = double(1),
+## type = 1)
}\if{html}{\out{}}
The \code{margin} parameter does not apply to classification models.
@@ -84,14 +84,19 @@ class predictions (e.g., accuracy).
Factor/categorical predictors need to be converted to numeric values
(e.g., dummy or indicator variables) for this engine. When using the
-formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will
-convert factor columns to indicators.
+formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip
+will convert factor columns to indicators.
Predictors should have the same scale. One way to achieve this is to
center and scale each so that each predictor has mean zero and a
variance of one.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{Examples}{
The “Fitting and Predicting with parsnip” article contains
diff --git a/man/details_svm_linear_kernlab.Rd b/man/details_svm_linear_kernlab.Rd
index 512b2dc33..64883637a 100644
--- a/man/details_svm_linear_kernlab.Rd
+++ b/man/details_svm_linear_kernlab.Rd
@@ -89,6 +89,11 @@ center and scale each so that each predictor has mean zero and a
variance of one.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{Examples}{
The “Fitting and Predicting with parsnip” article contains
diff --git a/man/details_svm_poly_kernlab.Rd b/man/details_svm_poly_kernlab.Rd
index aa694f6a1..ef38a28b9 100644
--- a/man/details_svm_poly_kernlab.Rd
+++ b/man/details_svm_poly_kernlab.Rd
@@ -101,6 +101,11 @@ center and scale each so that each predictor has mean zero and a
variance of one.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{Examples}{
The “Fitting and Predicting with parsnip” article contains
diff --git a/man/details_svm_rbf_kernlab.Rd b/man/details_svm_rbf_kernlab.Rd
index 5e2834eba..1cba88f36 100644
--- a/man/details_svm_rbf_kernlab.Rd
+++ b/man/details_svm_rbf_kernlab.Rd
@@ -101,6 +101,11 @@ center and scale each so that each predictor has mean zero and a
variance of one.
}
+\subsection{Case weights}{
+
+The underlying model implementation does not allow for case weights.
+}
+
\subsection{Examples}{
The “Fitting and Predicting with parsnip” article contains
diff --git a/man/fit.Rd b/man/fit.Rd
index 21c79f8ec..95768ad65 100644
--- a/man/fit.Rd
+++ b/man/fit.Rd
@@ -5,9 +5,16 @@
\alias{fit_xy.model_spec}
\title{Fit a Model Specification to a Dataset}
\usage{
-\method{fit}{model_spec}(object, formula, data, control = control_parsnip(), ...)
+\method{fit}{model_spec}(
+ object,
+ formula,
+ data,
+ case_weights = NULL,
+ control = control_parsnip(),
+ ...
+)
-\method{fit_xy}{model_spec}(object, x, y, control = control_parsnip(), ...)
+\method{fit_xy}{model_spec}(object, x, y, case_weights = NULL, control = control_parsnip(), ...)
}
\arguments{
\item{object}{An object of class \code{model_spec} that has a chosen engine
@@ -22,6 +29,11 @@ below). A data frame containing all relevant variables (e.g.
outcome(s), predictors, case weights, etc). Note: when needed, a
\emph{named argument} should be used.}
+\item{case_weights}{An optional classed vector of numeric case weights. This
+must return \code{TRUE} when \code{\link[hardhat:is_case_weights]{hardhat::is_case_weights()}} is run on it. See
+\code{\link[hardhat:frequency_weights]{hardhat::frequency_weights()}} and \code{\link[hardhat:importance_weights]{hardhat::importance_weights()}} for
+examples.}
+
\item{control}{A named list with elements \code{verbosity} and
\code{catch}. See \code{\link[=control_parsnip]{control_parsnip()}}.}
diff --git a/man/glm_grouped.Rd b/man/glm_grouped.Rd
new file mode 100644
index 000000000..f4b33494d
--- /dev/null
+++ b/man/glm_grouped.Rd
@@ -0,0 +1,90 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/grouped_binomial.R
+\name{glm_grouped}
+\alias{glm_grouped}
+\title{Fit a grouped binomial outcome from a data set with case weights}
+\usage{
+glm_grouped(formula, data, weights, ...)
+}
+\arguments{
+\item{formula}{A formula object with one outcome that is a two-level factors.}
+
+\item{data}{A data frame with the outcomes and predictors (but not case
+weights).}
+
+\item{weights}{An integer vector of weights whose length is the same as the
+number of rows in \code{data}. If it is a non-integer numeric, it will be converted
+to integer (with a warning).}
+
+\item{...}{Options to pass to \code{\link[stats:glm]{stats::glm()}}. If \code{family} is not set, it will
+automatically be assigned the basic binomial family.}
+}
+\value{
+A object produced by \code{\link[stats:glm]{stats::glm()}}.
+}
+\description{
+\code{\link[stats:glm]{stats::glm()}} assumes that a tabular data set with case weights corresponds
+to "different observations have different dispersions" (see \code{?glm}).
+
+In some cases, the case weights reflect that the same covariate pattern was
+observed multiple times (i.e., \emph{frequency weights}). In this case,
+\code{\link[stats:glm]{stats::glm()}} expects the data to be formatted as the number of events for
+each factor level so that the outcome can be given to the formula as
+\code{cbind(events_1, events_2)}.
+
+\code{\link[=glm_grouped]{glm_grouped()}} converts data with integer case weights to the expected
+"number of events" format for binomial data.
+}
+\examples{
+#----------------------------------------------------------------------------
+# The same data set formatted three ways
+
+# First with basic case weights that, from ?glm, are used inappropriately.
+ucb_weighted <- as.data.frame(UCBAdmissions)
+ucb_weighted$Freq <- as.integer(ucb_weighted$Freq)
+head(ucb_weighted)
+nrow(ucb_weighted)
+
+# Format when yes/no data are in individual rows (probably still inappropriate)
+library(tidyr)
+ucb_long <- uncount(ucb_weighted, Freq)
+head(ucb_long)
+nrow(ucb_long)
+
+# Format where the outcome is formatted as number of events
+ucb_events <-
+ ucb_weighted \%>\%
+ tidyr::pivot_wider(
+ id_cols = c(Gender, Dept),
+ names_from = Admit,
+ values_from = Freq,
+ values_fill = 0L
+ )
+head(ucb_events)
+nrow(ucb_events)
+
+#----------------------------------------------------------------------------
+# Different model fits
+
+# Treat data as separate Bernoulli data:
+glm(Admit ~ Gender + Dept, data = ucb_long, family = binomial)
+
+# Weights produce the same statistics
+glm(
+ Admit ~ Gender + Dept,
+ data = ucb_weighted,
+ family = binomial,
+ weights = ucb_weighted$Freq
+)
+
+# Data as binomial "x events out of n trials" format. Note that, to get the same
+# coefficients, the order of the levels must be reversed.
+glm(
+ cbind(Rejected, Admitted) ~ Gender + Dept,
+ data = ucb_events,
+ family = binomial
+)
+
+# The new function that starts with frequency weights and gets the correct place:
+glm_grouped(Admit ~ Gender + Dept, data = ucb_weighted, weights = ucb_weighted$Freq)
+}
diff --git a/man/reexports.Rd b/man/reexports.Rd
index 3a5f8c898..9622b3c65 100644
--- a/man/reexports.Rd
+++ b/man/reexports.Rd
@@ -16,6 +16,8 @@
\alias{extract_parameter_set_dials}
\alias{extract_parameter_dials}
\alias{tune}
+\alias{frequency_weights}
+\alias{importance_weights}
\alias{varying_args}
\title{Objects exported from other packages}
\keyword{internal}
@@ -26,9 +28,33 @@ below to see their documentation.
\describe{
\item{generics}{\code{\link[generics]{augment}}, \code{\link[generics]{fit}}, \code{\link[generics]{fit_xy}}, \code{\link[generics]{glance}}, \code{\link[generics]{required_pkgs}}, \code{\link[generics]{tidy}}, \code{\link[generics]{varying_args}}}
+ \item{generics}{\code{\link[generics]{augment}}, \code{\link[generics]{fit}}, \code{\link[generics]{fit_xy}}, \code{\link[generics]{glance}}, \code{\link[generics]{required_pkgs}}, \code{\link[generics]{tidy}}, \code{\link[generics]{varying_args}}}
+
+ \item{generics}{\code{\link[generics]{augment}}, \code{\link[generics]{fit}}, \code{\link[generics]{fit_xy}}, \code{\link[generics]{glance}}, \code{\link[generics]{required_pkgs}}, \code{\link[generics]{tidy}}, \code{\link[generics]{varying_args}}}
+
+ \item{generics}{\code{\link[generics]{augment}}, \code{\link[generics]{fit}}, \code{\link[generics]{fit_xy}}, \code{\link[generics]{glance}}, \code{\link[generics]{required_pkgs}}, \code{\link[generics]{tidy}}, \code{\link[generics]{varying_args}}}
+
+ \item{generics}{\code{\link[generics]{augment}}, \code{\link[generics]{fit}}, \code{\link[generics]{fit_xy}}, \code{\link[generics]{glance}}, \code{\link[generics]{required_pkgs}}, \code{\link[generics]{tidy}}, \code{\link[generics]{varying_args}}}
+
+ \item{generics}{\code{\link[generics]{augment}}, \code{\link[generics]{fit}}, \code{\link[generics]{fit_xy}}, \code{\link[generics]{glance}}, \code{\link[generics]{required_pkgs}}, \code{\link[generics]{tidy}}, \code{\link[generics]{varying_args}}}
+
+ \item{generics}{\code{\link[generics]{augment}}, \code{\link[generics]{fit}}, \code{\link[generics]{fit_xy}}, \code{\link[generics]{glance}}, \code{\link[generics]{required_pkgs}}, \code{\link[generics]{tidy}}, \code{\link[generics]{varying_args}}}
+
\item{ggplot2}{\code{\link[ggplot2]{autoplot}}}
- \item{hardhat}{\code{\link[hardhat:hardhat-extract]{extract_fit_engine}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_dials}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_set_dials}}, \code{\link[hardhat:hardhat-extract]{extract_spec_parsnip}}, \code{\link[hardhat]{tune}}}
+ \item{hardhat}{\code{\link[hardhat:hardhat-extract]{extract_fit_engine}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_dials}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_set_dials}}, \code{\link[hardhat:hardhat-extract]{extract_spec_parsnip}}, \code{\link[hardhat]{frequency_weights}}, \code{\link[hardhat]{importance_weights}}, \code{\link[hardhat]{tune}}}
+
+ \item{hardhat}{\code{\link[hardhat:hardhat-extract]{extract_fit_engine}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_dials}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_set_dials}}, \code{\link[hardhat:hardhat-extract]{extract_spec_parsnip}}, \code{\link[hardhat]{frequency_weights}}, \code{\link[hardhat]{importance_weights}}, \code{\link[hardhat]{tune}}}
+
+ \item{hardhat}{\code{\link[hardhat:hardhat-extract]{extract_fit_engine}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_dials}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_set_dials}}, \code{\link[hardhat:hardhat-extract]{extract_spec_parsnip}}, \code{\link[hardhat]{frequency_weights}}, \code{\link[hardhat]{importance_weights}}, \code{\link[hardhat]{tune}}}
+
+ \item{hardhat}{\code{\link[hardhat:hardhat-extract]{extract_fit_engine}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_dials}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_set_dials}}, \code{\link[hardhat:hardhat-extract]{extract_spec_parsnip}}, \code{\link[hardhat]{frequency_weights}}, \code{\link[hardhat]{importance_weights}}, \code{\link[hardhat]{tune}}}
+
+ \item{hardhat}{\code{\link[hardhat:hardhat-extract]{extract_fit_engine}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_dials}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_set_dials}}, \code{\link[hardhat:hardhat-extract]{extract_spec_parsnip}}, \code{\link[hardhat]{frequency_weights}}, \code{\link[hardhat]{importance_weights}}, \code{\link[hardhat]{tune}}}
+
+ \item{hardhat}{\code{\link[hardhat:hardhat-extract]{extract_fit_engine}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_dials}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_set_dials}}, \code{\link[hardhat:hardhat-extract]{extract_spec_parsnip}}, \code{\link[hardhat]{frequency_weights}}, \code{\link[hardhat]{importance_weights}}, \code{\link[hardhat]{tune}}}
+
+ \item{hardhat}{\code{\link[hardhat:hardhat-extract]{extract_fit_engine}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_dials}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_set_dials}}, \code{\link[hardhat:hardhat-extract]{extract_spec_parsnip}}, \code{\link[hardhat]{frequency_weights}}, \code{\link[hardhat]{importance_weights}}, \code{\link[hardhat]{tune}}}
\item{magrittr}{\code{\link[magrittr:pipe]{\%>\%}}}
}}
diff --git a/man/rmd/C5_rules_C5.0.Rmd b/man/rmd/C5_rules_C5.0.Rmd
index 2db867139..0a4e27823 100644
--- a/man/rmd/C5_rules_C5.0.Rmd
+++ b/man/rmd/C5_rules_C5.0.Rmd
@@ -45,6 +45,11 @@ C5_rules(
```{r child = "template-tree-split-factors.Rmd"}
```
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## References
- Quinlan R (1992). "Learning with Continuous Classes." Proceedings of the 5th Australian Joint Conference On Artificial Intelligence, pp. 343-348.
diff --git a/man/rmd/C5_rules_C5.0.md b/man/rmd/C5_rules_C5.0.md
index 99f14ae5c..29f165eef 100644
--- a/man/rmd/C5_rules_C5.0.md
+++ b/man/rmd/C5_rules_C5.0.md
@@ -49,6 +49,13 @@ C5_rules(
This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## References
- Quinlan R (1992). "Learning with Continuous Classes." Proceedings of the 5th Australian Joint Conference On Artificial Intelligence, pp. 343-348.
diff --git a/man/rmd/bag_mars_earth.Rmd b/man/rmd/bag_mars_earth.Rmd
index b7df34e07..143cb5b4a 100644
--- a/man/rmd/bag_mars_earth.Rmd
+++ b/man/rmd/bag_mars_earth.Rmd
@@ -57,6 +57,11 @@ bag_mars(
```{r child = "template-makes-dummies.Rmd"}
```
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## References
- Breiman, L. 1996. "Bagging predictors". Machine Learning. 24 (2): 123-140
diff --git a/man/rmd/bag_mars_earth.md b/man/rmd/bag_mars_earth.md
index 35a5db199..8bf913843 100644
--- a/man/rmd/bag_mars_earth.md
+++ b/man/rmd/bag_mars_earth.md
@@ -84,6 +84,13 @@ bag_mars(
Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## References
- Breiman, L. 1996. "Bagging predictors". Machine Learning. 24 (2): 123-140
diff --git a/man/rmd/bag_tree_C5.0.Rmd b/man/rmd/bag_tree_C5.0.Rmd
index c15aca0a6..1c8f8725d 100644
--- a/man/rmd/bag_tree_C5.0.Rmd
+++ b/man/rmd/bag_tree_C5.0.Rmd
@@ -41,6 +41,11 @@ bag_tree(min_n = integer()) %>%
```{r child = "template-tree-split-factors.Rmd"}
```
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## References
diff --git a/man/rmd/bag_tree_C5.0.md b/man/rmd/bag_tree_C5.0.md
index 08bccb732..18b139868 100644
--- a/man/rmd/bag_tree_C5.0.md
+++ b/man/rmd/bag_tree_C5.0.md
@@ -44,6 +44,13 @@ bag_tree(min_n = integer()) %>%
This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## References
diff --git a/man/rmd/bag_tree_rpart.Rmd b/man/rmd/bag_tree_rpart.Rmd
index c419455ed..390f029cc 100644
--- a/man/rmd/bag_tree_rpart.Rmd
+++ b/man/rmd/bag_tree_rpart.Rmd
@@ -72,6 +72,11 @@ bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1
```{r child = "template-tree-split-factors.Rmd"}
```
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## References
- Breiman L. 1996. "Bagging predictors". Machine Learning. 24 (2): 123-140
diff --git a/man/rmd/bag_tree_rpart.md b/man/rmd/bag_tree_rpart.md
index a47072d0b..e2da8fac5 100644
--- a/man/rmd/bag_tree_rpart.md
+++ b/man/rmd/bag_tree_rpart.md
@@ -116,6 +116,13 @@ bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1
This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## References
- Breiman L. 1996. "Bagging predictors". Machine Learning. 24 (2): 123-140
diff --git a/man/rmd/boost_tree_C5.0.Rmd b/man/rmd/boost_tree_C5.0.Rmd
index 021aae2f0..59cd89e0d 100644
--- a/man/rmd/boost_tree_C5.0.Rmd
+++ b/man/rmd/boost_tree_C5.0.Rmd
@@ -41,6 +41,11 @@ boost_tree(trees = integer(), min_n = integer(), sample_size = numeric()) %>%
```{r child = "template-tree-split-factors.Rmd"}
```
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## Other details
### Early stopping
diff --git a/man/rmd/boost_tree_C5.0.md b/man/rmd/boost_tree_C5.0.md
index 720d0f2bd..1cafe18bb 100644
--- a/man/rmd/boost_tree_C5.0.md
+++ b/man/rmd/boost_tree_C5.0.md
@@ -49,6 +49,13 @@ boost_tree(trees = integer(), min_n = integer(), sample_size = numeric()) %>%
This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## Other details
### Early stopping
diff --git a/man/rmd/boost_tree_spark.Rmd b/man/rmd/boost_tree_spark.Rmd
index 98158a34a..1bc41743f 100644
--- a/man/rmd/boost_tree_spark.Rmd
+++ b/man/rmd/boost_tree_spark.Rmd
@@ -55,6 +55,13 @@ boost_tree(
```{r child = "template-tree-split-factors.Rmd"}
```
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
+Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights.
+
## Other details
```{r child = "template-spark-notes.Rmd"}
diff --git a/man/rmd/boost_tree_spark.md b/man/rmd/boost_tree_spark.md
index 89b63ae02..37fbd0d86 100644
--- a/man/rmd/boost_tree_spark.md
+++ b/man/rmd/boost_tree_spark.md
@@ -100,6 +100,15 @@ boost_tree(
This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
+Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights.
+
## Other details
diff --git a/man/rmd/boost_tree_xgboost.md b/man/rmd/boost_tree_xgboost.md
index 213b597bc..091ea3b4d 100644
--- a/man/rmd/boost_tree_xgboost.md
+++ b/man/rmd/boost_tree_xgboost.md
@@ -55,10 +55,11 @@ boost_tree(
## Computational engine: xgboost
##
## Model fit template:
-## parsnip::xgb_train(x = missing_arg(), y = missing_arg(), colsample_bynode = integer(),
-## nrounds = integer(), min_child_weight = integer(), max_depth = integer(),
-## eta = numeric(), gamma = numeric(), subsample = numeric(),
-## early_stop = integer(), nthread = 1, verbose = 0)
+## parsnip::xgb_train(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
+## colsample_bynode = integer(), nrounds = integer(), min_child_weight = integer(),
+## max_depth = integer(), eta = numeric(), gamma = numeric(),
+## subsample = numeric(), early_stop = integer(), nthread = 1,
+## verbose = 0)
```
## Translation from parsnip to the original package (classification)
@@ -91,10 +92,11 @@ boost_tree(
## Computational engine: xgboost
##
## Model fit template:
-## parsnip::xgb_train(x = missing_arg(), y = missing_arg(), colsample_bynode = integer(),
-## nrounds = integer(), min_child_weight = integer(), max_depth = integer(),
-## eta = numeric(), gamma = numeric(), subsample = numeric(),
-## early_stop = integer(), nthread = 1, verbose = 0)
+## parsnip::xgb_train(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
+## colsample_bynode = integer(), nrounds = integer(), min_child_weight = integer(),
+## max_depth = integer(), eta = numeric(), gamma = numeric(),
+## subsample = numeric(), early_stop = integer(), nthread = 1,
+## verbose = 0)
```
[xgb_train()] is a wrapper around [xgboost::xgb.train()] (and other functions) that makes it easier to run this model.
diff --git a/man/rmd/decision_tree_C5.0.Rmd b/man/rmd/decision_tree_C5.0.Rmd
index d7e9ef460..43ff81536 100644
--- a/man/rmd/decision_tree_C5.0.Rmd
+++ b/man/rmd/decision_tree_C5.0.Rmd
@@ -39,6 +39,11 @@ decision_tree(min_n = integer()) %>%
```{r child = "template-tree-split-factors.Rmd"}
```
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#decision-tree-C5.0) for `decision_tree()` with the `"C5.0"` engine.
diff --git a/man/rmd/decision_tree_C5.0.md b/man/rmd/decision_tree_C5.0.md
index 4891679e9..0cafd2b1c 100644
--- a/man/rmd/decision_tree_C5.0.md
+++ b/man/rmd/decision_tree_C5.0.md
@@ -41,6 +41,13 @@ decision_tree(min_n = integer()) %>%
This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#decision-tree-C5.0) for `decision_tree()` with the `"C5.0"` engine.
diff --git a/man/rmd/decision_tree_partykit.md b/man/rmd/decision_tree_partykit.md
index 645dfd2c1..d7ab63a0e 100644
--- a/man/rmd/decision_tree_partykit.md
+++ b/man/rmd/decision_tree_partykit.md
@@ -21,7 +21,7 @@ An engine-specific parameter for this model is:
## Translation from parsnip to the original package (regression)
-
+The **bonsai** extension package is required to fit this model.
```r
@@ -50,7 +50,7 @@ decision_tree(tree_depth = integer(1), min_n = integer(1)) %>%
## Translation from parsnip to the original package (classification)
-
+The **bonsai** extension package is required to fit this model.
```r
@@ -81,7 +81,7 @@ decision_tree(tree_depth = integer(1), min_n = integer(1)) %>%
## Translation from parsnip to the original package (censored regression)
-
+The **censored** extension package is required to fit this model.
```r
diff --git a/man/rmd/decision_tree_rpart.Rmd b/man/rmd/decision_tree_rpart.Rmd
index 54bc58d78..23a6bf478 100644
--- a/man/rmd/decision_tree_rpart.Rmd
+++ b/man/rmd/decision_tree_rpart.Rmd
@@ -65,6 +65,11 @@ decision_tree(
```{r child = "template-tree-split-factors.Rmd"}
```
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#decision-tree-rpart) for `decision_tree()` with the `"rpart"` engine.
diff --git a/man/rmd/decision_tree_rpart.md b/man/rmd/decision_tree_rpart.md
index 042f71726..b3211c724 100644
--- a/man/rmd/decision_tree_rpart.md
+++ b/man/rmd/decision_tree_rpart.md
@@ -108,6 +108,13 @@ decision_tree(
This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#decision-tree-rpart) for `decision_tree()` with the `"rpart"` engine.
diff --git a/man/rmd/decision_tree_spark.Rmd b/man/rmd/decision_tree_spark.Rmd
index 6ad5d4140..b2b7872a1 100644
--- a/man/rmd/decision_tree_spark.Rmd
+++ b/man/rmd/decision_tree_spark.Rmd
@@ -47,6 +47,13 @@ decision_tree(tree_depth = integer(1), min_n = integer(1)) %>%
```{r child = "template-tree-split-factors.Rmd"}
```
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
+Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights.
+
## Other details
```{r child = "template-spark-notes.Rmd"}
diff --git a/man/rmd/decision_tree_spark.md b/man/rmd/decision_tree_spark.md
index 0bd0fcc7d..4fa02ec15 100644
--- a/man/rmd/decision_tree_spark.md
+++ b/man/rmd/decision_tree_spark.md
@@ -69,6 +69,15 @@ decision_tree(tree_depth = integer(1), min_n = integer(1)) %>%
This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
+Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights.
+
## Other details
diff --git a/man/rmd/discrim_flexible_earth.Rmd b/man/rmd/discrim_flexible_earth.Rmd
index 76a631b65..7507c7e49 100644
--- a/man/rmd/discrim_flexible_earth.Rmd
+++ b/man/rmd/discrim_flexible_earth.Rmd
@@ -44,6 +44,10 @@ discrim_flexible(
```{r child = "template-makes-dummies.Rmd"}
```
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
## References
diff --git a/man/rmd/discrim_flexible_earth.md b/man/rmd/discrim_flexible_earth.md
index 8fed9cda4..e81bf76e8 100644
--- a/man/rmd/discrim_flexible_earth.md
+++ b/man/rmd/discrim_flexible_earth.md
@@ -53,6 +53,12 @@ discrim_flexible(
Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
## References
diff --git a/man/rmd/discrim_linear_MASS.Rmd b/man/rmd/discrim_linear_MASS.Rmd
index c8f6ef6a8..db58ad2be 100644
--- a/man/rmd/discrim_linear_MASS.Rmd
+++ b/man/rmd/discrim_linear_MASS.Rmd
@@ -27,6 +27,11 @@ discrim_linear() %>%
```{r child = "template-zv.Rmd"}
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## References
- Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
diff --git a/man/rmd/discrim_linear_MASS.md b/man/rmd/discrim_linear_MASS.md
index 234a5b64f..deb6efb09 100644
--- a/man/rmd/discrim_linear_MASS.md
+++ b/man/rmd/discrim_linear_MASS.md
@@ -39,6 +39,11 @@ Variance calculations are used in these computations so _zero-variance_ predicto
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## References
- Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
diff --git a/man/rmd/discrim_linear_mda.Rmd b/man/rmd/discrim_linear_mda.Rmd
index 9430e2fa1..f5ce5442a 100644
--- a/man/rmd/discrim_linear_mda.Rmd
+++ b/man/rmd/discrim_linear_mda.Rmd
@@ -43,6 +43,11 @@ discrim_linear(penalty = numeric(0)) %>%
```{r child = "template-zv.Rmd"}
```
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## References
- Hastie, Tibshirani & Buja (1994) Flexible Discriminant Analysis by Optimal
diff --git a/man/rmd/discrim_linear_mda.md b/man/rmd/discrim_linear_mda.md
index 045d9502b..0bac0c65c 100644
--- a/man/rmd/discrim_linear_mda.md
+++ b/man/rmd/discrim_linear_mda.md
@@ -48,6 +48,13 @@ Variance calculations are used in these computations so _zero-variance_ predicto
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## References
- Hastie, Tibshirani & Buja (1994) Flexible Discriminant Analysis by Optimal
diff --git a/man/rmd/discrim_linear_sda.Rmd b/man/rmd/discrim_linear_sda.Rmd
index 653988dd2..d979220e0 100644
--- a/man/rmd/discrim_linear_sda.Rmd
+++ b/man/rmd/discrim_linear_sda.Rmd
@@ -37,6 +37,11 @@ discrim_linear() %>%
```{r child = "template-zv.Rmd"}
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## References
- Ahdesmaki, A., and K. Strimmer. 2010. Feature selection in omics prediction problems using cat scores and false non-discovery rate control. Ann. Appl. Stat. 4: 503-519. [Preprint](https://arxiv.org/abs/0903.2003).
diff --git a/man/rmd/discrim_linear_sda.md b/man/rmd/discrim_linear_sda.md
index 9c283f3e2..fdab652d6 100644
--- a/man/rmd/discrim_linear_sda.md
+++ b/man/rmd/discrim_linear_sda.md
@@ -49,6 +49,11 @@ Variance calculations are used in these computations so _zero-variance_ predicto
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## References
- Ahdesmaki, A., and K. Strimmer. 2010. Feature selection in omics prediction problems using cat scores and false non-discovery rate control. Ann. Appl. Stat. 4: 503-519. [Preprint](https://arxiv.org/abs/0903.2003).
diff --git a/man/rmd/discrim_linear_sparsediscrim.Rmd b/man/rmd/discrim_linear_sparsediscrim.Rmd
index cdd4c12c7..06c0cd4e7 100644
--- a/man/rmd/discrim_linear_sparsediscrim.Rmd
+++ b/man/rmd/discrim_linear_sparsediscrim.Rmd
@@ -49,6 +49,11 @@ discrim_linear(regularization_method = character(0)) %>%
```{r child = "template-zv.Rmd"}
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## References
diff --git a/man/rmd/discrim_linear_sparsediscrim.md b/man/rmd/discrim_linear_sparsediscrim.md
index 2ea48fb2d..69eaf780b 100644
--- a/man/rmd/discrim_linear_sparsediscrim.md
+++ b/man/rmd/discrim_linear_sparsediscrim.md
@@ -54,6 +54,11 @@ Variance calculations are used in these computations so _zero-variance_ predicto
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## References
diff --git a/man/rmd/discrim_quad_MASS.Rmd b/man/rmd/discrim_quad_MASS.Rmd
index d25fa1aee..0a1c42da8 100644
--- a/man/rmd/discrim_quad_MASS.Rmd
+++ b/man/rmd/discrim_quad_MASS.Rmd
@@ -27,6 +27,11 @@ discrim_quad() %>%
```{r child = "template-zv-conditional.Rmd"}
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## References
- Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
diff --git a/man/rmd/discrim_quad_MASS.md b/man/rmd/discrim_quad_MASS.md
index bbf121130..26fcfb940 100644
--- a/man/rmd/discrim_quad_MASS.md
+++ b/man/rmd/discrim_quad_MASS.md
@@ -39,6 +39,11 @@ Variance calculations are used in these computations within each outcome class.
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## References
- Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
diff --git a/man/rmd/discrim_quad_sparsediscrim.Rmd b/man/rmd/discrim_quad_sparsediscrim.Rmd
index 17f1e4973..650affc04 100644
--- a/man/rmd/discrim_quad_sparsediscrim.Rmd
+++ b/man/rmd/discrim_quad_sparsediscrim.Rmd
@@ -48,6 +48,11 @@ discrim_quad(regularization_method = character(0)) %>%
```{r child = "template-zv-conditional.Rmd"}
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## References
- `qda_diag()`: Dudoit, Fridlyand and Speed (2002) Comparison of Discrimination Methods for the Classification of Tumors Using Gene Expression Data, _Journal of the American Statistical Association_, 97:457, 77-87.
diff --git a/man/rmd/discrim_quad_sparsediscrim.md b/man/rmd/discrim_quad_sparsediscrim.md
index a3cc0d175..055b4c825 100644
--- a/man/rmd/discrim_quad_sparsediscrim.md
+++ b/man/rmd/discrim_quad_sparsediscrim.md
@@ -53,6 +53,11 @@ Variance calculations are used in these computations within each outcome class.
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## References
- `qda_diag()`: Dudoit, Fridlyand and Speed (2002) Comparison of Discrimination Methods for the Classification of Tumors Using Gene Expression Data, _Journal of the American Statistical Association_, 97:457, 77-87.
diff --git a/man/rmd/discrim_regularized_klaR.Rmd b/man/rmd/discrim_regularized_klaR.Rmd
index 582491bc8..75bfde845 100644
--- a/man/rmd/discrim_regularized_klaR.Rmd
+++ b/man/rmd/discrim_regularized_klaR.Rmd
@@ -51,6 +51,11 @@ discrim_regularized(frac_identity = numeric(0), frac_common_cov = numeric(0)) %>
```{r child = "template-zv-conditional.Rmd"}
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## References
- Friedman, J (1989). Regularized Discriminant Analysis. _Journal of the American Statistical Association_, 84, 165-175.
diff --git a/man/rmd/discrim_regularized_klaR.md b/man/rmd/discrim_regularized_klaR.md
index 96f8a93a1..e5fcc0d3e 100644
--- a/man/rmd/discrim_regularized_klaR.md
+++ b/man/rmd/discrim_regularized_klaR.md
@@ -59,6 +59,11 @@ Variance calculations are used in these computations within each outcome class.
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## References
- Friedman, J (1989). Regularized Discriminant Analysis. _Journal of the American Statistical Association_, 84, 165-175.
diff --git a/man/rmd/gen_additive_mod_mgcv.Rmd b/man/rmd/gen_additive_mod_mgcv.Rmd
index 3c30a6531..f4126991c 100644
--- a/man/rmd/gen_additive_mod_mgcv.Rmd
+++ b/man/rmd/gen_additive_mod_mgcv.Rmd
@@ -64,6 +64,11 @@ The smoothness of the terms will need to be manually specified (e.g., using `s(x
```{r child = "template-makes-dummies.Rmd"}
```
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## References
- Ross, W. 2021. [_Generalized Additive Models in R: A Free, Interactive Course using mgcv_](https://noamross.github.io/gams-in-r-course/)
diff --git a/man/rmd/gen_additive_mod_mgcv.md b/man/rmd/gen_additive_mod_mgcv.md
index b78e38603..9f9ec1cf9 100644
--- a/man/rmd/gen_additive_mod_mgcv.md
+++ b/man/rmd/gen_additive_mod_mgcv.md
@@ -35,8 +35,8 @@ gen_additive_mod(adjust_deg_free = numeric(1), select_features = logical(1)) %>%
## Computational engine: mgcv
##
## Model fit template:
-## mgcv::gam(formula = missing_arg(), data = missing_arg(), select = logical(1),
-## gamma = numeric(1))
+## mgcv::gam(formula = missing_arg(), data = missing_arg(), weights = missing_arg(),
+## select = logical(1), gamma = numeric(1))
```
## Translation from parsnip to the original package (classification)
@@ -59,8 +59,8 @@ gen_additive_mod(adjust_deg_free = numeric(1), select_features = logical(1)) %>%
## Computational engine: mgcv
##
## Model fit template:
-## mgcv::gam(formula = missing_arg(), data = missing_arg(), select = logical(1),
-## gamma = numeric(1), family = stats::binomial(link = "logit"))
+## mgcv::gam(formula = missing_arg(), data = missing_arg(), weights = missing_arg(),
+## select = logical(1), gamma = numeric(1), family = stats::binomial(link = "logit"))
```
## Model fitting
@@ -100,6 +100,13 @@ The smoothness of the terms will need to be manually specified (e.g., using `s(x
Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## References
- Ross, W. 2021. [_Generalized Additive Models in R: A Free, Interactive Course using mgcv_](https://noamross.github.io/gams-in-r-course/)
diff --git a/man/rmd/linear_reg_brulee.Rmd b/man/rmd/linear_reg_brulee.Rmd
index 94e4ea598..75e55b6af 100644
--- a/man/rmd/linear_reg_brulee.Rmd
+++ b/man/rmd/linear_reg_brulee.Rmd
@@ -51,6 +51,11 @@ linear_reg(penalty = double(1)) %>%
```{r child = "template-same-scale.Rmd"}
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## References
- Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
diff --git a/man/rmd/linear_reg_brulee.md b/man/rmd/linear_reg_brulee.md
index d5d7d4fc6..ee1fea6f3 100644
--- a/man/rmd/linear_reg_brulee.md
+++ b/man/rmd/linear_reg_brulee.md
@@ -57,6 +57,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm
Predictors should have the same scale. One way to achieve this is to center and
scale each so that each predictor has mean zero and a variance of one.
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## References
- Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
diff --git a/man/rmd/linear_reg_gee.Rmd b/man/rmd/linear_reg_gee.Rmd
index 5e6fe8603..dac2c4e51 100644
--- a/man/rmd/linear_reg_gee.Rmd
+++ b/man/rmd/linear_reg_gee.Rmd
@@ -69,6 +69,11 @@ fit(gee_wflow, data = warpbreaks)
Also, because of issues with the `gee()` function, a supplementary call to `glm()` is needed to get the rank and QR decomposition objects so that `predict()` can be used.
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## References
- Liang, K.Y. and Zeger, S.L. (1986) Longitudinal data analysis using generalized linear models. _Biometrika_, 73 13–22.
diff --git a/man/rmd/linear_reg_gee.md b/man/rmd/linear_reg_gee.md
index 835542c94..01aaab16b 100644
--- a/man/rmd/linear_reg_gee.md
+++ b/man/rmd/linear_reg_gee.md
@@ -80,6 +80,11 @@ The `gee::gee()` function always prints out warnings and output even when `silen
Also, because of issues with the `gee()` function, a supplementary call to `glm()` is needed to get the rank and QR decomposition objects so that `predict()` can be used.
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## References
- Liang, K.Y. and Zeger, S.L. (1986) Longitudinal data analysis using generalized linear models. _Biometrika_, 73 13–22.
diff --git a/man/rmd/linear_reg_glm.Rmd b/man/rmd/linear_reg_glm.Rmd
index dc786e3ae..5bd2675f0 100644
--- a/man/rmd/linear_reg_glm.Rmd
+++ b/man/rmd/linear_reg_glm.Rmd
@@ -28,6 +28,13 @@ linear_reg() %>%
```{r child = "template-makes-dummies.Rmd"}
```
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
+_However_, the documentation in [stats::glm()] assumes that is specific type of case weights are being used:"Non-NULL weights can be used to indicate that different observations have different dispersions (with the values in weights being inversely proportional to the dispersions); or equivalently, when the elements of weights are positive integers `w_i`, that each response `y_i` is the mean of `w_i` unit-weight observations. For a binomial GLM prior weights are used to give the number of trials when the response is the proportion of successes: they would rarely be used for a Poisson GLM."
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-glm) for `linear_reg()` with the `"glm"` engine.
diff --git a/man/rmd/linear_reg_glm.md b/man/rmd/linear_reg_glm.md
index 15410f4d5..61f12b632 100644
--- a/man/rmd/linear_reg_glm.md
+++ b/man/rmd/linear_reg_glm.md
@@ -53,6 +53,15 @@ linear_reg() %>%
Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
+_However_, the documentation in [stats::glm()] assumes that is specific type of case weights are being used:"Non-NULL weights can be used to indicate that different observations have different dispersions (with the values in weights being inversely proportional to the dispersions); or equivalently, when the elements of weights are positive integers `w_i`, that each response `y_i` is the mean of `w_i` unit-weight observations. For a binomial GLM prior weights are used to give the number of trials when the response is the proportion of successes: they would rarely be used for a Poisson GLM."
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-glm) for `linear_reg()` with the `"glm"` engine.
diff --git a/man/rmd/linear_reg_glmnet.Rmd b/man/rmd/linear_reg_glmnet.Rmd
index 816f8af18..be4160c81 100644
--- a/man/rmd/linear_reg_glmnet.Rmd
+++ b/man/rmd/linear_reg_glmnet.Rmd
@@ -43,6 +43,11 @@ linear_reg(penalty = double(1), mixture = double(1)) %>%
```
By default, [glmnet::glmnet()] uses the argument `standardize = TRUE` to center and scale the data.
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-glmnet) for `linear_reg()` with the `"glmnet"` engine.
diff --git a/man/rmd/linear_reg_glmnet.md b/man/rmd/linear_reg_glmnet.md
index 377c93b1f..289843d7d 100644
--- a/man/rmd/linear_reg_glmnet.md
+++ b/man/rmd/linear_reg_glmnet.md
@@ -50,6 +50,13 @@ Predictors should have the same scale. One way to achieve this is to center and
scale each so that each predictor has mean zero and a variance of one.
By default, [glmnet::glmnet()] uses the argument `standardize = TRUE` to center and scale the data.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-glmnet) for `linear_reg()` with the `"glmnet"` engine.
diff --git a/man/rmd/linear_reg_gls.Rmd b/man/rmd/linear_reg_gls.Rmd
index 717108cc7..88e0aa5c2 100644
--- a/man/rmd/linear_reg_gls.Rmd
+++ b/man/rmd/linear_reg_gls.Rmd
@@ -119,7 +119,10 @@ lme_fit %>% tidy() %>%
gls_fit %>% tidy()
```
+## Case weights
+```{r child = "template-no-case-weights.Rmd"}
+```
## References
diff --git a/man/rmd/linear_reg_gls.md b/man/rmd/linear_reg_gls.md
index 5afb7e7c6..3290b6d02 100644
--- a/man/rmd/linear_reg_gls.md
+++ b/man/rmd/linear_reg_gls.md
@@ -197,8 +197,11 @@ gls_fit %>% tidy()
## 2 week -2.12 0.224 -9.47 2.26e-18
```
+## Case weights
+The underlying model implementation does not allow for case weights.
+
## References
- J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY
diff --git a/man/rmd/linear_reg_keras.Rmd b/man/rmd/linear_reg_keras.Rmd
index 4e2564e3a..d6caeae42 100644
--- a/man/rmd/linear_reg_keras.Rmd
+++ b/man/rmd/linear_reg_keras.Rmd
@@ -42,6 +42,11 @@ linear_reg(penalty = double(1)) %>%
```{r child = "template-same-scale.Rmd"}
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-keras) for `linear_reg()` with the `"keras"` engine.
diff --git a/man/rmd/linear_reg_keras.md b/man/rmd/linear_reg_keras.md
index e50251812..50c0cfac8 100644
--- a/man/rmd/linear_reg_keras.md
+++ b/man/rmd/linear_reg_keras.md
@@ -46,6 +46,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm
Predictors should have the same scale. One way to achieve this is to center and
scale each so that each predictor has mean zero and a variance of one.
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-keras) for `linear_reg()` with the `"keras"` engine.
diff --git a/man/rmd/linear_reg_lm.Rmd b/man/rmd/linear_reg_lm.Rmd
index 205c21374..125e4b774 100644
--- a/man/rmd/linear_reg_lm.Rmd
+++ b/man/rmd/linear_reg_lm.Rmd
@@ -20,6 +20,15 @@ linear_reg() %>%
```{r child = "template-makes-dummies.Rmd"}
```
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
+_However_, the documentation in [stats::lm()] assumes that is specific type of case weights are being used: "Non-NULL weights can be used to indicate that different observations have different variances (with the values in weights being inversely proportional to the variances); or equivalently, when the elements of weights are positive integers `w_i`, that each response `y_i` is the mean of `w_i` unit-weight observations (including the case that there are w_i observations equal to `y_i` and the data have been summarized). However, in the latter case, notice that within-group variation is not used. Therefore, the sigma estimate and residual degrees of freedom may be suboptimal; in the case of replication weights, **even wrong**. Hence, standard errors and analysis of variance tables should be treated with care" (emphasis added)
+
+Depending on your application, the degrees of freedown for the model (and other statistics) might be incorrect.
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-lm) for `linear_reg()` with the `"lm"` engine.
diff --git a/man/rmd/linear_reg_lm.md b/man/rmd/linear_reg_lm.md
index d8b203c7d..25f99742b 100644
--- a/man/rmd/linear_reg_lm.md
+++ b/man/rmd/linear_reg_lm.md
@@ -30,6 +30,17 @@ linear_reg() %>%
Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
+_However_, the documentation in [stats::lm()] assumes that is specific type of case weights are being used: "Non-NULL weights can be used to indicate that different observations have different variances (with the values in weights being inversely proportional to the variances); or equivalently, when the elements of weights are positive integers `w_i`, that each response `y_i` is the mean of `w_i` unit-weight observations (including the case that there are w_i observations equal to `y_i` and the data have been summarized). However, in the latter case, notice that within-group variation is not used. Therefore, the sigma estimate and residual degrees of freedom may be suboptimal; in the case of replication weights, **even wrong**. Hence, standard errors and analysis of variance tables should be treated with care" (emphasis added)
+
+Depending on your application, the degrees of freedown for the model (and other statistics) might be incorrect.
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-lm) for `linear_reg()` with the `"lm"` engine.
diff --git a/man/rmd/linear_reg_lme.Rmd b/man/rmd/linear_reg_lme.Rmd
index 63df4ab93..4d90dbee6 100644
--- a/man/rmd/linear_reg_lme.Rmd
+++ b/man/rmd/linear_reg_lme.Rmd
@@ -60,6 +60,11 @@ lme_wflow <-
fit(lme_wflow, data = riesby)
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## References
- J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY
diff --git a/man/rmd/linear_reg_lme.md b/man/rmd/linear_reg_lme.md
index c49497d28..46013079c 100644
--- a/man/rmd/linear_reg_lme.md
+++ b/man/rmd/linear_reg_lme.md
@@ -89,6 +89,11 @@ lme_wflow <-
fit(lme_wflow, data = riesby)
```
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## References
- J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY
diff --git a/man/rmd/linear_reg_lmer.Rmd b/man/rmd/linear_reg_lmer.Rmd
index 976956223..2a18c9d17 100644
--- a/man/rmd/linear_reg_lmer.Rmd
+++ b/man/rmd/linear_reg_lmer.Rmd
@@ -60,6 +60,11 @@ lmer_wflow <-
fit(lmer_wflow, data = riesby)
```
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## References
- J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY
diff --git a/man/rmd/linear_reg_lmer.md b/man/rmd/linear_reg_lmer.md
index f5a72d1d4..067ea848e 100644
--- a/man/rmd/linear_reg_lmer.md
+++ b/man/rmd/linear_reg_lmer.md
@@ -89,6 +89,13 @@ lmer_wflow <-
fit(lmer_wflow, data = riesby)
```
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## References
- J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY
diff --git a/man/rmd/linear_reg_spark.Rmd b/man/rmd/linear_reg_spark.Rmd
index 4aa27b0ec..3a8e13868 100644
--- a/man/rmd/linear_reg_spark.Rmd
+++ b/man/rmd/linear_reg_spark.Rmd
@@ -43,8 +43,17 @@ linear_reg(penalty = double(1), mixture = double(1)) %>%
```{r child = "template-same-scale.Rmd"}
```
+
By default, `ml_linear_regression()` uses the argument `standardization = TRUE` to center and scale the data.
+
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
+Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights.
+
## Other details
```{r child = "template-spark-notes.Rmd"}
diff --git a/man/rmd/linear_reg_spark.md b/man/rmd/linear_reg_spark.md
index 43d67c8c8..2bd15afed 100644
--- a/man/rmd/linear_reg_spark.md
+++ b/man/rmd/linear_reg_spark.md
@@ -39,7 +39,7 @@ linear_reg(penalty = double(1), mixture = double(1)) %>%
##
## Model fit template:
## sparklyr::ml_linear_regression(x = missing_arg(), formula = missing_arg(),
-## weight_col = missing_arg(), reg_param = double(1), elastic_net_param = double(1))
+## weights = missing_arg(), reg_param = double(1), elastic_net_param = double(1))
```
## Preprocessing requirements
@@ -50,8 +50,19 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm
Predictors should have the same scale. One way to achieve this is to center and
scale each so that each predictor has mean zero and a variance of one.
+
By default, `ml_linear_regression()` uses the argument `standardization = TRUE` to center and scale the data.
+
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
+Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights.
+
## Other details
diff --git a/man/rmd/linear_reg_stan.Rmd b/man/rmd/linear_reg_stan.Rmd
index 6e9961221..804aab653 100644
--- a/man/rmd/linear_reg_stan.Rmd
+++ b/man/rmd/linear_reg_stan.Rmd
@@ -39,6 +39,11 @@ Note that the `refresh` default prevents logging of the estimation process. Chan
For prediction, the `"stan"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome and when `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned.
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-stan) for `linear_reg()` with the `"stan"` engine.
diff --git a/man/rmd/linear_reg_stan.md b/man/rmd/linear_reg_stan.md
index 0f7b8bc60..8da583a1b 100644
--- a/man/rmd/linear_reg_stan.md
+++ b/man/rmd/linear_reg_stan.md
@@ -50,6 +50,13 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm
For prediction, the `"stan"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome and when `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-stan) for `linear_reg()` with the `"stan"` engine.
diff --git a/man/rmd/linear_reg_stan_glmer.Rmd b/man/rmd/linear_reg_stan_glmer.Rmd
index b2d76f8c7..bbd62de78 100644
--- a/man/rmd/linear_reg_stan_glmer.Rmd
+++ b/man/rmd/linear_reg_stan_glmer.Rmd
@@ -75,6 +75,11 @@ fit(glmer_wflow, data = riesby)
For prediction, the `"stan_glmer"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned.
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## References
- McElreath, R. 2020 _Statistical Rethinking_. CRC Press.
diff --git a/man/rmd/linear_reg_stan_glmer.md b/man/rmd/linear_reg_stan_glmer.md
index 9dd2403de..f23832798 100644
--- a/man/rmd/linear_reg_stan_glmer.md
+++ b/man/rmd/linear_reg_stan_glmer.md
@@ -105,6 +105,13 @@ fit(glmer_wflow, data = riesby)
For prediction, the `"stan_glmer"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## References
- McElreath, R. 2020 _Statistical Rethinking_. CRC Press.
diff --git a/man/rmd/logistic-reg.Rmd b/man/rmd/logistic-reg.Rmd
deleted file mode 100644
index 021055253..000000000
--- a/man/rmd/logistic-reg.Rmd
+++ /dev/null
@@ -1,117 +0,0 @@
-# Engine Details
-
-```{r, child = "aaa.Rmd", include = FALSE}
-```
-
-Engines may have pre-set default arguments when executing the model fit call.
-For this type of model, the template of the fit calls are below.
-
-## glm
-
-```{r glm-reg}
-logistic_reg() %>%
- set_engine("glm") %>%
- translate()
-```
-
-## glmnet
-
-```{r glmnet-csl}
-logistic_reg(penalty = 0.1) %>%
- set_engine("glmnet") %>%
- translate()
-```
-
-The glmnet engine requires a single value for the `penalty` argument (a number
-or `tune()`), but the full regularization path is always fit
-regardless of the value given to `penalty`. To pass in a custom sequence of
-values for glmnet's `lambda`, use the argument `path_values` in `set_engine()`.
-This will assign the value of the glmnet `lambda` parameter without disturbing
-the value given of `logistic_reg(penalty)`. For example:
-
-```{r glmnet-path}
-logistic_reg(penalty = .1) %>%
- set_engine("glmnet", path_values = c(0, 10^seq(-10, 1, length.out = 20))) %>%
- translate()
-```
-
-When fitting a pure ridge regression model (i.e., `penalty = 0`), we _strongly
-suggest_ that you pass in a vector for `path_values` that includes zero. See
-[issue #431](https://github.com/tidymodels/parsnip/issues/431) for a discussion.
-
-When using `predict()`, the single `penalty` value used for prediction is the
-one specified in `logistic_reg()`.
-
-To predict on multiple penalties, use the `multi_predict()` function.
-This function returns a tibble with a list column called `.pred` containing
-all of the penalty results.
-
-
-## LiblineaR
-
-```{r liblinear-reg}
-logistic_reg() %>%
- set_engine("LiblineaR") %>%
- translate()
-```
-
-For `LiblineaR` models, the value for `mixture` can either be 0 (for ridge) or 1
-(for lasso) but not other intermediate values. In the `LiblineaR` documentation,
-these correspond to types 0 (L2-regularized) and 6 (L1-regularized).
-
-Be aware that the `LiblineaR` engine regularizes the intercept. Other
-regularized regression models do not, which will result in different parameter estimates.
-
-## stan
-
-```{r stan-reg}
-logistic_reg() %>%
- set_engine("stan") %>%
- translate()
-```
-
-Note that the `refresh` default prevents logging of the estimation process.
-Change this value in `set_engine()` to show the logs.
-
-For prediction, the `stan` engine can compute posterior intervals analogous to
-confidence and prediction intervals. In these instances, the units are the
-original outcome and when `std_error = TRUE`, the standard deviation of the
-posterior distribution (or posterior predictive distribution as appropriate) is
-returned.
-
-## spark
-
-```{r spark-reg}
-logistic_reg() %>%
- set_engine("spark") %>%
- translate()
-```
-
-## keras
-
-```{r keras-reg}
-logistic_reg() %>%
- set_engine("keras") %>%
- translate()
-```
-
-
-## Parameter translations
-
-The standardized parameter names in parsnip can be mapped to their original
-names in each engine that has main parameters. Each engine typically has a
-different default value (shown in parentheses) for each parameter.
-
-```{r echo = FALSE, results = "asis"}
-get_defaults_logistic_reg <- function() {
- tibble::tribble(
- ~model, ~engine, ~parsnip, ~original, ~default,
- "logistic_reg", "glmnet", "mixture", "alpha", get_arg("glmnet", "glmnet", "alpha"),
- "logistic_reg", "LiblineaR", "mixture", "type", "0",
- "logistic_reg", "spark", "penalty", "reg_param", get_arg("sparklyr", "ml_logistic_regression", "reg_param"),
- "logistic_reg", "spark", "mixture", "elastic_net_param", get_arg("sparklyr", "ml_logistic_regression", "elastic_net_param"),
- "logistic_reg", "keras", "penalty", "penalty", get_arg("parsnip", "keras_mlp", "penalty"),
- )
-}
-convert_args("logistic_reg")
-```
diff --git a/man/rmd/logistic-reg.md b/man/rmd/logistic-reg.md
deleted file mode 100644
index 2bfd3343c..000000000
--- a/man/rmd/logistic-reg.md
+++ /dev/null
@@ -1,196 +0,0 @@
-# Engine Details
-
-
-
-
-Engines may have pre-set default arguments when executing the model fit call.
-For this type of model, the template of the fit calls are below.
-
-## glm
-
-
-```r
-logistic_reg() %>%
- set_engine("glm") %>%
- translate()
-```
-
-```
-## Logistic Regression Model Specification (classification)
-##
-## Computational engine: glm
-##
-## Model fit template:
-## stats::glm(formula = missing_arg(), data = missing_arg(), weights = missing_arg(),
-## family = stats::binomial)
-```
-
-## glmnet
-
-
-```r
-logistic_reg(penalty = 0.1) %>%
- set_engine("glmnet") %>%
- translate()
-```
-
-```
-## Logistic Regression Model Specification (classification)
-##
-## Main Arguments:
-## penalty = 0.1
-##
-## Computational engine: glmnet
-##
-## Model fit template:
-## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
-## family = "binomial")
-```
-
-The glmnet engine requires a single value for the `penalty` argument (a number
-or `tune()`), but the full regularization path is always fit
-regardless of the value given to `penalty`. To pass in a custom sequence of
-values for glmnet's `lambda`, use the argument `path_values` in `set_engine()`.
-This will assign the value of the glmnet `lambda` parameter without disturbing
-the value given of `logistic_reg(penalty)`. For example:
-
-
-```r
-logistic_reg(penalty = .1) %>%
- set_engine("glmnet", path_values = c(0, 10^seq(-10, 1, length.out = 20))) %>%
- translate()
-```
-
-```
-## Logistic Regression Model Specification (classification)
-##
-## Main Arguments:
-## penalty = 0.1
-##
-## Computational engine: glmnet
-##
-## Model fit template:
-## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
-## lambda = c(0, 10^seq(-10, 1, length.out = 20)), family = "binomial")
-```
-
-When fitting a pure ridge regression model (i.e., `penalty = 0`), we _strongly
-suggest_ that you pass in a vector for `path_values` that includes zero. See
-[issue #431](https://github.com/tidymodels/parsnip/issues/431) for a discussion.
-
-When using `predict()`, the single `penalty` value used for prediction is the
-one specified in `logistic_reg()`.
-
-To predict on multiple penalties, use the `multi_predict()` function.
-This function returns a tibble with a list column called `.pred` containing
-all of the penalty results.
-
-
-## LiblineaR
-
-
-```r
-logistic_reg() %>%
- set_engine("LiblineaR") %>%
- translate()
-```
-
-```
-## Logistic Regression Model Specification (classification)
-##
-## Computational engine: LiblineaR
-##
-## Model fit template:
-## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(),
-## verbose = FALSE)
-```
-
-For `LiblineaR` models, the value for `mixture` can either be 0 (for ridge) or 1
-(for lasso) but not other intermediate values. In the `LiblineaR` documentation,
-these correspond to types 0 (L2-regularized) and 6 (L1-regularized).
-
-Be aware that the `LiblineaR` engine regularizes the intercept. Other
-regularized regression models do not, which will result in different parameter estimates.
-
-## stan
-
-
-```r
-logistic_reg() %>%
- set_engine("stan") %>%
- translate()
-```
-
-```
-## Logistic Regression Model Specification (classification)
-##
-## Computational engine: stan
-##
-## Model fit template:
-## rstanarm::stan_glm(formula = missing_arg(), data = missing_arg(),
-## weights = missing_arg(), family = stats::binomial, refresh = 0)
-```
-
-Note that the `refresh` default prevents logging of the estimation process.
-Change this value in `set_engine()` to show the logs.
-
-For prediction, the `stan` engine can compute posterior intervals analogous to
-confidence and prediction intervals. In these instances, the units are the
-original outcome and when `std_error = TRUE`, the standard deviation of the
-posterior distribution (or posterior predictive distribution as appropriate) is
-returned.
-
-## spark
-
-
-```r
-logistic_reg() %>%
- set_engine("spark") %>%
- translate()
-```
-
-```
-## Logistic Regression Model Specification (classification)
-##
-## Computational engine: spark
-##
-## Model fit template:
-## sparklyr::ml_logistic_regression(x = missing_arg(), formula = missing_arg(),
-## weight_col = missing_arg(), family = "binomial")
-```
-
-## keras
-
-
-```r
-logistic_reg() %>%
- set_engine("keras") %>%
- translate()
-```
-
-```
-## Logistic Regression Model Specification (classification)
-##
-## Computational engine: keras
-##
-## Model fit template:
-## parsnip::keras_mlp(x = missing_arg(), y = missing_arg(), hidden_units = 1,
-## act = "linear")
-```
-
-
-## Parameter translations
-
-The standardized parameter names in parsnip can be mapped to their original
-names in each engine that has main parameters. Each engine typically has a
-different default value (shown in parentheses) for each parameter.
-
-
-|**parsnip** |**glmnet** |**LiblineaR** |**spark** |**keras** |**brulee** |
-|:-----------|:----------|:-------------|:---------------------|:-----------|:----------|
-|penalty |lambda |cost |reg_param (0) |penalty (0) |penalty |
-|mixture |alpha (1) |type (0) |elastic_net_param (0) |NA |mixture |
-|epochs |NA |NA |NA |NA |epochs |
-|learn_rate |NA |NA |NA |NA |learn_rate |
-|momentum |NA |NA |NA |NA |momentum |
-|stop_iter |NA |NA |NA |NA |stop_iter |
diff --git a/man/rmd/logistic_reg_LiblineaR.md b/man/rmd/logistic_reg_LiblineaR.md
index e0d308e0f..761092a85 100644
--- a/man/rmd/logistic_reg_LiblineaR.md
+++ b/man/rmd/logistic_reg_LiblineaR.md
@@ -36,8 +36,8 @@ logistic_reg(penalty = double(1), mixture = double(1)) %>%
## Computational engine: LiblineaR
##
## Model fit template:
-## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(),
-## cost = Inf, type = double(1), verbose = FALSE)
+## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), cost = Inf,
+## type = double(1), verbose = FALSE)
```
## Preprocessing requirements
diff --git a/man/rmd/logistic_reg_brulee.Rmd b/man/rmd/logistic_reg_brulee.Rmd
index 75cf83785..e6a8d2e3f 100644
--- a/man/rmd/logistic_reg_brulee.Rmd
+++ b/man/rmd/logistic_reg_brulee.Rmd
@@ -50,6 +50,11 @@ logistic_reg(penalty = double(1)) %>%
```{r child = "template-same-scale.Rmd"}
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## References
- Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
diff --git a/man/rmd/logistic_reg_brulee.md b/man/rmd/logistic_reg_brulee.md
index ca79d0192..9573a98fa 100644
--- a/man/rmd/logistic_reg_brulee.md
+++ b/man/rmd/logistic_reg_brulee.md
@@ -56,6 +56,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm
Predictors should have the same scale. One way to achieve this is to center and
scale each so that each predictor has mean zero and a variance of one.
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## References
- Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
diff --git a/man/rmd/logistic_reg_gee.Rmd b/man/rmd/logistic_reg_gee.Rmd
index 219750979..ac3a60707 100644
--- a/man/rmd/logistic_reg_gee.Rmd
+++ b/man/rmd/logistic_reg_gee.Rmd
@@ -69,6 +69,11 @@ fit(gee_wflow, data = toenail)
Also, because of issues with the `gee()` function, a supplementary call to `glm()` is needed to get the rank and QR decomposition objects so that `predict()` can be used.
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## References
- Liang, K.Y. and Zeger, S.L. (1986) Longitudinal data analysis using generalized linear models. _Biometrika_, 73 13–22.
diff --git a/man/rmd/logistic_reg_gee.md b/man/rmd/logistic_reg_gee.md
index 84f7de293..9a63f2c58 100644
--- a/man/rmd/logistic_reg_gee.md
+++ b/man/rmd/logistic_reg_gee.md
@@ -80,6 +80,11 @@ The `gee::gee()` function always prints out warnings and output even when `silen
Also, because of issues with the `gee()` function, a supplementary call to `glm()` is needed to get the rank and QR decomposition objects so that `predict()` can be used.
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## References
- Liang, K.Y. and Zeger, S.L. (1986) Longitudinal data analysis using generalized linear models. _Biometrika_, 73 13–22.
diff --git a/man/rmd/logistic_reg_glm.Rmd b/man/rmd/logistic_reg_glm.Rmd
index 408b84ac0..7db0e2847 100644
--- a/man/rmd/logistic_reg_glm.Rmd
+++ b/man/rmd/logistic_reg_glm.Rmd
@@ -28,6 +28,13 @@ linear_reg() %>%
```{r child = "template-makes-dummies.Rmd"}
```
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
+_However_, the documentation in [stats::glm()] assumes that is specific type of case weights are being used:"Non-NULL weights can be used to indicate that different observations have different dispersions (with the values in weights being inversely proportional to the dispersions); or equivalently, when the elements of weights are positive integers `w_i`, that each response `y_i` is the mean of `w_i` unit-weight observations. For a binomial GLM prior weights are used to give the number of trials when the response is the proportion of successes: they would rarely be used for a Poisson GLM."
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#logistic-reg-glm) for `logistic_reg()` with the `"glm"` engine.
diff --git a/man/rmd/logistic_reg_glm.md b/man/rmd/logistic_reg_glm.md
index af17affb4..7a1cdf2c6 100644
--- a/man/rmd/logistic_reg_glm.md
+++ b/man/rmd/logistic_reg_glm.md
@@ -53,6 +53,15 @@ linear_reg() %>%
Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
+_However_, the documentation in [stats::glm()] assumes that is specific type of case weights are being used:"Non-NULL weights can be used to indicate that different observations have different dispersions (with the values in weights being inversely proportional to the dispersions); or equivalently, when the elements of weights are positive integers `w_i`, that each response `y_i` is the mean of `w_i` unit-weight observations. For a binomial GLM prior weights are used to give the number of trials when the response is the proportion of successes: they would rarely be used for a Poisson GLM."
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#logistic-reg-glm) for `logistic_reg()` with the `"glm"` engine.
diff --git a/man/rmd/logistic_reg_glmer.Rmd b/man/rmd/logistic_reg_glmer.Rmd
index 4951fcf2c..278891d24 100644
--- a/man/rmd/logistic_reg_glmer.Rmd
+++ b/man/rmd/logistic_reg_glmer.Rmd
@@ -59,6 +59,11 @@ glmer_wflow <-
fit(glmer_wflow, data = toenail)
```
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## References
- J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY
diff --git a/man/rmd/logistic_reg_glmer.md b/man/rmd/logistic_reg_glmer.md
index 98a612c0f..eebcfd6c8 100644
--- a/man/rmd/logistic_reg_glmer.md
+++ b/man/rmd/logistic_reg_glmer.md
@@ -89,6 +89,13 @@ glmer_wflow <-
fit(glmer_wflow, data = toenail)
```
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## References
- J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY
diff --git a/man/rmd/logistic_reg_glmnet.Rmd b/man/rmd/logistic_reg_glmnet.Rmd
index 50203c0d4..396b4be19 100644
--- a/man/rmd/logistic_reg_glmnet.Rmd
+++ b/man/rmd/logistic_reg_glmnet.Rmd
@@ -45,6 +45,11 @@ logistic_reg(penalty = double(1), mixture = double(1)) %>%
```
By default, [glmnet::glmnet()] uses the argument `standardize = TRUE` to center and scale the data.
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#logistic-reg-glmnet) for `logistic_reg()` with the `"glmnet"` engine.
diff --git a/man/rmd/logistic_reg_glmnet.md b/man/rmd/logistic_reg_glmnet.md
index 456e38f41..fc90e59f4 100644
--- a/man/rmd/logistic_reg_glmnet.md
+++ b/man/rmd/logistic_reg_glmnet.md
@@ -52,6 +52,13 @@ Predictors should have the same scale. One way to achieve this is to center and
scale each so that each predictor has mean zero and a variance of one.
By default, [glmnet::glmnet()] uses the argument `standardize = TRUE` to center and scale the data.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#logistic-reg-glmnet) for `logistic_reg()` with the `"glmnet"` engine.
diff --git a/man/rmd/logistic_reg_keras.Rmd b/man/rmd/logistic_reg_keras.Rmd
index f984508e2..47c90a887 100644
--- a/man/rmd/logistic_reg_keras.Rmd
+++ b/man/rmd/logistic_reg_keras.Rmd
@@ -42,6 +42,11 @@ logistic_reg(penalty = double(1)) %>%
```{r child = "template-same-scale.Rmd"}
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#logistic-reg-keras) for `logistic_reg()` with the `"keras"` engine.
diff --git a/man/rmd/logistic_reg_keras.md b/man/rmd/logistic_reg_keras.md
index a51c0fba6..c98b1791a 100644
--- a/man/rmd/logistic_reg_keras.md
+++ b/man/rmd/logistic_reg_keras.md
@@ -46,6 +46,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm
Predictors should have the same scale. One way to achieve this is to center and
scale each so that each predictor has mean zero and a variance of one.
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#logistic-reg-keras) for `logistic_reg()` with the `"keras"` engine.
diff --git a/man/rmd/logistic_reg_spark.Rmd b/man/rmd/logistic_reg_spark.Rmd
index 9f4cb969e..45881e057 100644
--- a/man/rmd/logistic_reg_spark.Rmd
+++ b/man/rmd/logistic_reg_spark.Rmd
@@ -43,8 +43,16 @@ logistic_reg(penalty = double(1), mixture = double(1)) %>%
```{r child = "template-same-scale.Rmd"}
```
+
By default, `ml_logistic_regression()` uses the argument `standardization = TRUE` to center and scale the data.
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
+Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights.
+
## Other details
```{r child = "template-spark-notes.Rmd"}
diff --git a/man/rmd/logistic_reg_spark.md b/man/rmd/logistic_reg_spark.md
index 2cce86e71..feed4a39b 100644
--- a/man/rmd/logistic_reg_spark.md
+++ b/man/rmd/logistic_reg_spark.md
@@ -39,7 +39,7 @@ logistic_reg(penalty = double(1), mixture = double(1)) %>%
##
## Model fit template:
## sparklyr::ml_logistic_regression(x = missing_arg(), formula = missing_arg(),
-## weight_col = missing_arg(), reg_param = double(1), elastic_net_param = double(1),
+## weights = missing_arg(), reg_param = double(1), elastic_net_param = double(1),
## family = "binomial")
```
@@ -51,8 +51,18 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm
Predictors should have the same scale. One way to achieve this is to center and
scale each so that each predictor has mean zero and a variance of one.
+
By default, `ml_logistic_regression()` uses the argument `standardization = TRUE` to center and scale the data.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
+Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights.
+
## Other details
diff --git a/man/rmd/logistic_reg_stan.Rmd b/man/rmd/logistic_reg_stan.Rmd
index 2bb5768ad..ee0e3c6a3 100644
--- a/man/rmd/logistic_reg_stan.Rmd
+++ b/man/rmd/logistic_reg_stan.Rmd
@@ -39,6 +39,11 @@ Note that the `refresh` default prevents logging of the estimation process. Chan
For prediction, the `"stan"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome and when `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned.
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#logistic-reg-stan) for `logistic_reg()` with the `"stan"` engine.
diff --git a/man/rmd/logistic_reg_stan.md b/man/rmd/logistic_reg_stan.md
index 11190f9e2..7587d8db2 100644
--- a/man/rmd/logistic_reg_stan.md
+++ b/man/rmd/logistic_reg_stan.md
@@ -50,6 +50,13 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm
For prediction, the `"stan"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome and when `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#logistic-reg-stan) for `logistic_reg()` with the `"stan"` engine.
diff --git a/man/rmd/logistic_reg_stan_glmer.Rmd b/man/rmd/logistic_reg_stan_glmer.Rmd
index 0c516680d..59537f316 100644
--- a/man/rmd/logistic_reg_stan_glmer.Rmd
+++ b/man/rmd/logistic_reg_stan_glmer.Rmd
@@ -74,6 +74,11 @@ fit(glmer_wflow, data = toenail)
For prediction, the `"stan_glmer"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned.
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## References
- McElreath, R. 2020 _Statistical Rethinking_. CRC Press.
diff --git a/man/rmd/logistic_reg_stan_glmer.md b/man/rmd/logistic_reg_stan_glmer.md
index 953b552df..f14e1d0ea 100644
--- a/man/rmd/logistic_reg_stan_glmer.md
+++ b/man/rmd/logistic_reg_stan_glmer.md
@@ -104,6 +104,13 @@ fit(glmer_wflow, data = toenail)
For prediction, the `"stan_glmer"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## References
- McElreath, R. 2020 _Statistical Rethinking_. CRC Press.
diff --git a/man/rmd/mars_earth.Rmd b/man/rmd/mars_earth.Rmd
index eb58d2620..30543008f 100644
--- a/man/rmd/mars_earth.Rmd
+++ b/man/rmd/mars_earth.Rmd
@@ -50,6 +50,11 @@ An alternate method for using MARs for categorical outcomes can be found in [dis
```{r child = "template-makes-dummies.Rmd"}
```
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#mars-earth) for `mars()` with the `"earth"` engine.
diff --git a/man/rmd/mars_earth.md b/man/rmd/mars_earth.md
index e8d8688be..e0d7826a6 100644
--- a/man/rmd/mars_earth.md
+++ b/man/rmd/mars_earth.md
@@ -80,6 +80,13 @@ An alternate method for using MARs for categorical outcomes can be found in [dis
Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#mars-earth) for `mars()` with the `"earth"` engine.
diff --git a/man/rmd/mlp_brulee.Rmd b/man/rmd/mlp_brulee.Rmd
index 8f0dddc47..7732c39af 100644
--- a/man/rmd/mlp_brulee.Rmd
+++ b/man/rmd/mlp_brulee.Rmd
@@ -77,6 +77,11 @@ mlp(
```{r child = "template-same-scale.Rmd"}
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## References
- Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
diff --git a/man/rmd/mlp_brulee.md b/man/rmd/mlp_brulee.md
index 2ec86f5d0..0290d0c08 100644
--- a/man/rmd/mlp_brulee.md
+++ b/man/rmd/mlp_brulee.md
@@ -119,6 +119,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm
Predictors should have the same scale. One way to achieve this is to center and
scale each so that each predictor has mean zero and a variance of one.
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## References
- Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
diff --git a/man/rmd/mlp_keras.Rmd b/man/rmd/mlp_keras.Rmd
index 280d99994..a995f7766 100644
--- a/man/rmd/mlp_keras.Rmd
+++ b/man/rmd/mlp_keras.Rmd
@@ -61,6 +61,11 @@ mlp(
```{r child = "template-same-scale.Rmd"}
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#mlp-keras) for `mlp()` with the `"keras"` engine.
diff --git a/man/rmd/mlp_keras.md b/man/rmd/mlp_keras.md
index 628bf56f4..5adff8c9e 100644
--- a/man/rmd/mlp_keras.md
+++ b/man/rmd/mlp_keras.md
@@ -97,6 +97,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm
Predictors should have the same scale. One way to achieve this is to center and
scale each so that each predictor has mean zero and a variance of one.
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#mlp-keras) for `mlp()` with the `"keras"` engine.
diff --git a/man/rmd/mlp_nnet.Rmd b/man/rmd/mlp_nnet.Rmd
index 176965ef5..1893fea42 100644
--- a/man/rmd/mlp_nnet.Rmd
+++ b/man/rmd/mlp_nnet.Rmd
@@ -62,6 +62,11 @@ mlp(
```{r child = "template-same-scale.Rmd"}
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#mlp-nnet) for `mlp()` with the `"nnet"` engine.
diff --git a/man/rmd/mlp_nnet.md b/man/rmd/mlp_nnet.md
index 6866b1516..a85ae1103 100644
--- a/man/rmd/mlp_nnet.md
+++ b/man/rmd/mlp_nnet.md
@@ -43,9 +43,8 @@ mlp(
## Computational engine: nnet
##
## Model fit template:
-## nnet::nnet(formula = missing_arg(), data = missing_arg(), weights = missing_arg(),
-## size = integer(1), decay = double(1), maxit = integer(1),
-## trace = FALSE, linout = TRUE)
+## nnet::nnet(formula = missing_arg(), data = missing_arg(), size = integer(1),
+## decay = double(1), maxit = integer(1), trace = FALSE, linout = TRUE)
```
Note that parsnip automatically sets linear activation in the last layer.
@@ -75,9 +74,8 @@ mlp(
## Computational engine: nnet
##
## Model fit template:
-## nnet::nnet(formula = missing_arg(), data = missing_arg(), weights = missing_arg(),
-## size = integer(1), decay = double(1), maxit = integer(1),
-## trace = FALSE, linout = FALSE)
+## nnet::nnet(formula = missing_arg(), data = missing_arg(), size = integer(1),
+## decay = double(1), maxit = integer(1), trace = FALSE, linout = FALSE)
```
@@ -90,6 +88,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm
Predictors should have the same scale. One way to achieve this is to center and
scale each so that each predictor has mean zero and a variance of one.
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#mlp-nnet) for `mlp()` with the `"nnet"` engine.
diff --git a/man/rmd/multinom_reg_brulee.Rmd b/man/rmd/multinom_reg_brulee.Rmd
index 6e30242ba..86cc2afdd 100644
--- a/man/rmd/multinom_reg_brulee.Rmd
+++ b/man/rmd/multinom_reg_brulee.Rmd
@@ -50,6 +50,11 @@ multinom_reg(penalty = double(1)) %>%
```{r child = "template-same-scale.Rmd"}
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## References
- Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
diff --git a/man/rmd/multinom_reg_brulee.md b/man/rmd/multinom_reg_brulee.md
index 8cfbc5f42..20166fac6 100644
--- a/man/rmd/multinom_reg_brulee.md
+++ b/man/rmd/multinom_reg_brulee.md
@@ -56,6 +56,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm
Predictors should have the same scale. One way to achieve this is to center and
scale each so that each predictor has mean zero and a variance of one.
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## References
- Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
diff --git a/man/rmd/multinom_reg_glmnet.Rmd b/man/rmd/multinom_reg_glmnet.Rmd
index 3c70c4afc..a4a2e97ff 100644
--- a/man/rmd/multinom_reg_glmnet.Rmd
+++ b/man/rmd/multinom_reg_glmnet.Rmd
@@ -49,6 +49,11 @@ By default, [glmnet::glmnet()] uses the argument `standardize = TRUE` to center
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#multinom-reg-glmnet) for `multinom_reg()` with the `"glmnet"` engine.
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## References
- Hastie, T, R Tibshirani, and M Wainwright. 2015. _Statistical Learning with Sparsity_. CRC Press.
diff --git a/man/rmd/multinom_reg_glmnet.md b/man/rmd/multinom_reg_glmnet.md
index 9604c356b..425cb10ba 100644
--- a/man/rmd/multinom_reg_glmnet.md
+++ b/man/rmd/multinom_reg_glmnet.md
@@ -56,6 +56,13 @@ By default, [glmnet::glmnet()] uses the argument `standardize = TRUE` to center
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#multinom-reg-glmnet) for `multinom_reg()` with the `"glmnet"` engine.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## References
- Hastie, T, R Tibshirani, and M Wainwright. 2015. _Statistical Learning with Sparsity_. CRC Press.
diff --git a/man/rmd/multinom_reg_keras.Rmd b/man/rmd/multinom_reg_keras.Rmd
index f182c82f6..a5c15402b 100644
--- a/man/rmd/multinom_reg_keras.Rmd
+++ b/man/rmd/multinom_reg_keras.Rmd
@@ -42,6 +42,11 @@ multinom_reg(penalty = double(1)) %>%
```{r child = "template-same-scale.Rmd"}
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#multinom-reg-keras) for `multinom_reg()` with the `"keras"` engine.
diff --git a/man/rmd/multinom_reg_keras.md b/man/rmd/multinom_reg_keras.md
index 6a88fde7c..acea8e2fc 100644
--- a/man/rmd/multinom_reg_keras.md
+++ b/man/rmd/multinom_reg_keras.md
@@ -46,6 +46,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm
Predictors should have the same scale. One way to achieve this is to center and
scale each so that each predictor has mean zero and a variance of one.
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#multinom-reg-keras) for `multinom_reg()` with the `"keras"` engine.
diff --git a/man/rmd/multinom_reg_nnet.Rmd b/man/rmd/multinom_reg_nnet.Rmd
index 65340d081..185c9ec77 100644
--- a/man/rmd/multinom_reg_nnet.Rmd
+++ b/man/rmd/multinom_reg_nnet.Rmd
@@ -44,6 +44,12 @@ multinom_reg(penalty = double(1)) %>%
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#multinom-reg-nnet) for `multinom_reg()` with the `"nnet"` engine.
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
+
## References
- Luraschi, J, K Kuo, and E Ruiz. 2019. _Mastering nnet with R_. O'Reilly Media
diff --git a/man/rmd/multinom_reg_nnet.md b/man/rmd/multinom_reg_nnet.md
index f7072b433..94fd309d4 100644
--- a/man/rmd/multinom_reg_nnet.md
+++ b/man/rmd/multinom_reg_nnet.md
@@ -32,7 +32,7 @@ multinom_reg(penalty = double(1)) %>%
##
## Model fit template:
## nnet::multinom(formula = missing_arg(), data = missing_arg(),
-## weights = missing_arg(), decay = double(1), trace = FALSE)
+## decay = double(1), trace = FALSE)
```
## Preprocessing requirements
@@ -48,6 +48,12 @@ scale each so that each predictor has mean zero and a variance of one.
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#multinom-reg-nnet) for `multinom_reg()` with the `"nnet"` engine.
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
+
## References
- Luraschi, J, K Kuo, and E Ruiz. 2019. _Mastering nnet with R_. O'Reilly Media
diff --git a/man/rmd/multinom_reg_spark.Rmd b/man/rmd/multinom_reg_spark.Rmd
index fb058cd5e..209e53329 100644
--- a/man/rmd/multinom_reg_spark.Rmd
+++ b/man/rmd/multinom_reg_spark.Rmd
@@ -43,8 +43,17 @@ multinom_reg(penalty = double(1), mixture = double(1)) %>%
```{r child = "template-same-scale.Rmd"}
```
+
By default, `ml_multinom_regression()` uses the argument `standardization = TRUE` to center and scale the data.
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
+Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights.
+
+
## Other details
```{r child = "template-spark-notes.Rmd"}
diff --git a/man/rmd/multinom_reg_spark.md b/man/rmd/multinom_reg_spark.md
index a6709a419..35d30c7cf 100644
--- a/man/rmd/multinom_reg_spark.md
+++ b/man/rmd/multinom_reg_spark.md
@@ -39,7 +39,7 @@ multinom_reg(penalty = double(1), mixture = double(1)) %>%
##
## Model fit template:
## sparklyr::ml_logistic_regression(x = missing_arg(), formula = missing_arg(),
-## weight_col = missing_arg(), reg_param = double(1), elastic_net_param = double(1),
+## weights = missing_arg(), reg_param = double(1), elastic_net_param = double(1),
## family = "multinomial")
```
@@ -51,8 +51,19 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm
Predictors should have the same scale. One way to achieve this is to center and
scale each so that each predictor has mean zero and a variance of one.
+
By default, `ml_multinom_regression()` uses the argument `standardization = TRUE` to center and scale the data.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
+Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights.
+
+
## Other details
diff --git a/man/rmd/naive_Bayes_klaR.Rmd b/man/rmd/naive_Bayes_klaR.Rmd
index 9557cc292..a1b1970e2 100644
--- a/man/rmd/naive_Bayes_klaR.Rmd
+++ b/man/rmd/naive_Bayes_klaR.Rmd
@@ -44,6 +44,11 @@ The columns for qualitative predictors should always be represented as factors (
```{r child = "template-zv.Rmd"}
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## References
- Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
diff --git a/man/rmd/naive_Bayes_klaR.md b/man/rmd/naive_Bayes_klaR.md
index d3362e18f..961d2ad15 100644
--- a/man/rmd/naive_Bayes_klaR.md
+++ b/man/rmd/naive_Bayes_klaR.md
@@ -52,6 +52,11 @@ Variance calculations are used in these computations so _zero-variance_ predicto
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## References
- Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
diff --git a/man/rmd/naive_Bayes_naivebayes.Rmd b/man/rmd/naive_Bayes_naivebayes.Rmd
index cfff2bb2a..da22742d5 100644
--- a/man/rmd/naive_Bayes_naivebayes.Rmd
+++ b/man/rmd/naive_Bayes_naivebayes.Rmd
@@ -46,6 +46,11 @@ For count data, integers can be estimated using a Poisson distribution if the ar
```{r child = "template-zv.Rmd"}
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## References
- Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
diff --git a/man/rmd/naive_Bayes_naivebayes.md b/man/rmd/naive_Bayes_naivebayes.md
index 5db2431d3..63fcec9a0 100644
--- a/man/rmd/naive_Bayes_naivebayes.md
+++ b/man/rmd/naive_Bayes_naivebayes.md
@@ -54,6 +54,11 @@ Variance calculations are used in these computations so _zero-variance_ predicto
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## References
- Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
diff --git a/man/rmd/nearest-neighbor.Rmd b/man/rmd/nearest-neighbor.Rmd
deleted file mode 100644
index 47f8e110d..000000000
--- a/man/rmd/nearest-neighbor.Rmd
+++ /dev/null
@@ -1,49 +0,0 @@
-# Engine Details
-
-```{r, child = "aaa.Rmd", include = FALSE}
-```
-
-Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are below:
-
-## kknn
-
-```{r kknn-reg}
-nearest_neighbor() %>%
- set_engine("kknn") %>%
- set_mode("regression") %>%
- translate()
-```
-
-```{r kknn-cls}
-nearest_neighbor() %>%
- set_engine("kknn") %>%
- set_mode("classification") %>%
- translate()
-```
-
-For `kknn`, the underlying modeling function used is a restricted version of
-`train.kknn()` and not `kknn()`. It is set up in this way so that `parsnip` can
-utilize the underlying `predict.train.kknn` method to predict on new data. This
-also means that a single value of that function's `kernel` argument (a.k.a
-`weight_func` here) can be supplied
-
-For this engine, tuning over `neighbors` is very efficient since the same model
-object can be used to make predictions over multiple values of `neighbors`.
-
-## Parameter translations
-
-The standardized parameter names in parsnip can be mapped to their original
-names in each engine that has main parameters. Each engine typically has a
-different default value (shown in parentheses) for each parameter.
-
-```{r echo = FALSE, results = "asis"}
-get_defaults_nearest_neighbor <- function() {
- tibble::tribble(
- ~model, ~engine, ~parsnip, ~original, ~default,
- "nearest_neighbor", "kknn", "weight_func", "kernel", get_arg("kknn", "train.kknn", "kernel"),
- "nearest_neighbor", "kknn", "dist_power", "distance", get_arg("kknn", "train.kknn", "distance"),
- )
-}
-convert_args("nearest_neighbor")
-```
-
diff --git a/man/rmd/nearest_neighbor_kknn.Rmd b/man/rmd/nearest_neighbor_kknn.Rmd
index 2795d52e2..c0bb8a6ec 100644
--- a/man/rmd/nearest_neighbor_kknn.Rmd
+++ b/man/rmd/nearest_neighbor_kknn.Rmd
@@ -62,6 +62,11 @@ nearest_neighbor(
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#nearest-neighbor-kknn) for `nearest_neighbor()` with the `"kknn"` engine.
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## References
- Hechenbichler K. and Schliep K.P. (2004) [Weighted k-Nearest-Neighbor Techniques and Ordinal Classification](https://epub.ub.uni-muenchen.de/1769/), Discussion Paper 399, SFB 386, Ludwig-Maximilians University Munich
diff --git a/man/rmd/nearest_neighbor_kknn.md b/man/rmd/nearest_neighbor_kknn.md
index f39c597bd..7af970293 100644
--- a/man/rmd/nearest_neighbor_kknn.md
+++ b/man/rmd/nearest_neighbor_kknn.md
@@ -88,6 +88,11 @@ scale each so that each predictor has mean zero and a variance of one.
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#nearest-neighbor-kknn) for `nearest_neighbor()` with the `"kknn"` engine.
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## References
- Hechenbichler K. and Schliep K.P. (2004) [Weighted k-Nearest-Neighbor Techniques and Ordinal Classification](https://epub.ub.uni-muenchen.de/1769/), Discussion Paper 399, SFB 386, Ludwig-Maximilians University Munich
diff --git a/man/rmd/pls_mixOmics.Rmd b/man/rmd/pls_mixOmics.Rmd
index 1bc593da8..c12a9b581 100644
--- a/man/rmd/pls_mixOmics.Rmd
+++ b/man/rmd/pls_mixOmics.Rmd
@@ -71,8 +71,6 @@ This package is available via the Bioconductor repository and is not accessible
remotes::install_bioc("mixOmics")
```
-
-
## Preprocessing requirements
```{r child = "template-makes-dummies.Rmd"}
@@ -84,6 +82,11 @@ This package is available via the Bioconductor repository and is not accessible
```{r child = "template-same-scale.Rmd"}
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## References
- Rohart F and Gautier B and Singh A and Le Cao K-A (2017). "mixOmics: An R package for 'omics feature selection and multiple data integration." PLoS computational biology, 13(11), e1005752.
diff --git a/man/rmd/pls_mixOmics.md b/man/rmd/pls_mixOmics.md
index 733b840ad..4228a894c 100644
--- a/man/rmd/pls_mixOmics.md
+++ b/man/rmd/pls_mixOmics.md
@@ -92,8 +92,6 @@ This package is available via the Bioconductor repository and is not accessible
remotes::install_bioc("mixOmics")
```
-
-
## Preprocessing requirements
@@ -108,6 +106,11 @@ Variance calculations are used in these computations so _zero-variance_ predicto
Predictors should have the same scale. One way to achieve this is to center and
scale each so that each predictor has mean zero and a variance of one.
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## References
- Rohart F and Gautier B and Singh A and Le Cao K-A (2017). "mixOmics: An R package for 'omics feature selection and multiple data integration." PLoS computational biology, 13(11), e1005752.
diff --git a/man/rmd/poisson_reg_gee.Rmd b/man/rmd/poisson_reg_gee.Rmd
index 393c43e7b..db6254ed0 100644
--- a/man/rmd/poisson_reg_gee.Rmd
+++ b/man/rmd/poisson_reg_gee.Rmd
@@ -26,9 +26,12 @@ poisson_reg(engine = "gee") %>%
There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section.
-## Other details
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
-The model cannot accept case weights.
+## Other details
Both `gee:gee()` and `gee:geepack()` specify the id/cluster variable using an argument `id` that requires a vector. parsnip doesn't work that way so we enable this model to be fit using a artificial function `id_var()` to be used in the formula. So, in the original package, the call would look like:
diff --git a/man/rmd/poisson_reg_gee.md b/man/rmd/poisson_reg_gee.md
index 5a37ca8b3..fff3b0503 100644
--- a/man/rmd/poisson_reg_gee.md
+++ b/man/rmd/poisson_reg_gee.md
@@ -37,9 +37,12 @@ poisson_reg(engine = "gee") %>%
There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section.
-## Other details
+## Case weights
+
-The model cannot accept case weights.
+The underlying model implementation does not allow for case weights.
+
+## Other details
Both `gee:gee()` and `gee:geepack()` specify the id/cluster variable using an argument `id` that requires a vector. parsnip doesn't work that way so we enable this model to be fit using a artificial function `id_var()` to be used in the formula. So, in the original package, the call would look like:
diff --git a/man/rmd/poisson_reg_glm.Rmd b/man/rmd/poisson_reg_glm.Rmd
index c245b4889..0d0819fa6 100644
--- a/man/rmd/poisson_reg_glm.Rmd
+++ b/man/rmd/poisson_reg_glm.Rmd
@@ -24,4 +24,20 @@ poisson_reg() %>%
```{r child = "template-makes-dummies.Rmd"}
```
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
+
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
+_However_, the documentation in [stats::glm()] assumes that is specific type of case weights are being used:"Non-NULL weights can be used to indicate that different observations have different dispersions (with the values in weights being inversely proportional to the dispersions); or equivalently, when the elements of weights are positive integers `w_i`, that each response `y_i` is the mean of `w_i` unit-weight observations. For a binomial GLM prior weights are used to give the number of trials when the response is the proportion of successes: they would rarely be used for a Poisson GLM."
+
+If frequency weights are being used in your application, the [glm_grouped()] model (and corresponding engine) may be more appropriate.
+
+
diff --git a/man/rmd/poisson_reg_glm.md b/man/rmd/poisson_reg_glm.md
index 0473be15f..b107c08f3 100644
--- a/man/rmd/poisson_reg_glm.md
+++ b/man/rmd/poisson_reg_glm.md
@@ -35,4 +35,24 @@ poisson_reg() %>%
Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
+
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
+_However_, the documentation in [stats::glm()] assumes that is specific type of case weights are being used:"Non-NULL weights can be used to indicate that different observations have different dispersions (with the values in weights being inversely proportional to the dispersions); or equivalently, when the elements of weights are positive integers `w_i`, that each response `y_i` is the mean of `w_i` unit-weight observations. For a binomial GLM prior weights are used to give the number of trials when the response is the proportion of successes: they would rarely be used for a Poisson GLM."
+
+If frequency weights are being used in your application, the [glm_grouped()] model (and corresponding engine) may be more appropriate.
+
+
diff --git a/man/rmd/poisson_reg_glmer.Rmd b/man/rmd/poisson_reg_glmer.Rmd
index d330b2f33..02b6aed03 100644
--- a/man/rmd/poisson_reg_glmer.Rmd
+++ b/man/rmd/poisson_reg_glmer.Rmd
@@ -58,6 +58,11 @@ glmer_wflow <-
fit(glmer_wflow, data = longitudinal_counts)
```
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## References
- J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY
diff --git a/man/rmd/poisson_reg_glmer.md b/man/rmd/poisson_reg_glmer.md
index 4d50e5f44..61cd2083b 100644
--- a/man/rmd/poisson_reg_glmer.md
+++ b/man/rmd/poisson_reg_glmer.md
@@ -88,6 +88,13 @@ glmer_wflow <-
fit(glmer_wflow, data = longitudinal_counts)
```
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## References
- J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY
diff --git a/man/rmd/poisson_reg_glmnet.Rmd b/man/rmd/poisson_reg_glmnet.Rmd
index 665e05776..6f48a4c73 100644
--- a/man/rmd/poisson_reg_glmnet.Rmd
+++ b/man/rmd/poisson_reg_glmnet.Rmd
@@ -49,3 +49,8 @@ poisson_reg(penalty = double(1), mixture = double(1)) %>%
```
By default, `glmnet::glmnet()` uses the argument `standardize = TRUE` to center and scale the data.
+
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
diff --git a/man/rmd/poisson_reg_glmnet.md b/man/rmd/poisson_reg_glmnet.md
index 465102622..3171eaabe 100644
--- a/man/rmd/poisson_reg_glmnet.md
+++ b/man/rmd/poisson_reg_glmnet.md
@@ -56,3 +56,10 @@ Predictors should have the same scale. One way to achieve this is to center and
scale each so that each predictor has mean zero and a variance of one.
By default, `glmnet::glmnet()` uses the argument `standardize = TRUE` to center and scale the data.
+
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
diff --git a/man/rmd/poisson_reg_hurdle.Rmd b/man/rmd/poisson_reg_hurdle.Rmd
index 043ee62a3..c6c946744 100644
--- a/man/rmd/poisson_reg_hurdle.Rmd
+++ b/man/rmd/poisson_reg_hurdle.Rmd
@@ -57,3 +57,8 @@ workflow() %>%
```
The reason for this is that [workflows::add_formula()] will try to create the model matrix and either fail or create dummy variables prematurely.
+
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
diff --git a/man/rmd/poisson_reg_hurdle.md b/man/rmd/poisson_reg_hurdle.md
index 25c9fde6f..7dfdfb4bf 100644
--- a/man/rmd/poisson_reg_hurdle.md
+++ b/man/rmd/poisson_reg_hurdle.md
@@ -106,3 +106,10 @@ workflow() %>%
```
The reason for this is that [workflows::add_formula()] will try to create the model matrix and either fail or create dummy variables prematurely.
+
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
diff --git a/man/rmd/poisson_reg_stan.Rmd b/man/rmd/poisson_reg_stan.Rmd
index d82a3d9d9..bcead8846 100644
--- a/man/rmd/poisson_reg_stan.Rmd
+++ b/man/rmd/poisson_reg_stan.Rmd
@@ -43,6 +43,11 @@ Note that the `refresh` default prevents logging of the estimation process. Chan
For prediction, the `"stan"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned.
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-stan) for `poisson_reg()` with the `"stan"` engine.
diff --git a/man/rmd/poisson_reg_stan.md b/man/rmd/poisson_reg_stan.md
index d4542bd53..73dda6395 100644
--- a/man/rmd/poisson_reg_stan.md
+++ b/man/rmd/poisson_reg_stan.md
@@ -54,6 +54,13 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm
For prediction, the `"stan"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-stan) for `poisson_reg()` with the `"stan"` engine.
diff --git a/man/rmd/poisson_reg_stan_glmer.Rmd b/man/rmd/poisson_reg_stan_glmer.Rmd
index f912e1d21..19bf1ced1 100644
--- a/man/rmd/poisson_reg_stan_glmer.Rmd
+++ b/man/rmd/poisson_reg_stan_glmer.Rmd
@@ -73,6 +73,11 @@ fit(glmer_wflow, data = longitudinal_counts)
For prediction, the `"stan_glmer"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned.
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## References
- McElreath, R. 2020 _Statistical Rethinking_. CRC Press.
diff --git a/man/rmd/poisson_reg_stan_glmer.md b/man/rmd/poisson_reg_stan_glmer.md
index b1724addf..3afdf95e2 100644
--- a/man/rmd/poisson_reg_stan_glmer.md
+++ b/man/rmd/poisson_reg_stan_glmer.md
@@ -103,6 +103,13 @@ fit(glmer_wflow, data = longitudinal_counts)
For prediction, the `"stan_glmer"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## References
- McElreath, R. 2020 _Statistical Rethinking_. CRC Press.
diff --git a/man/rmd/poisson_reg_zeroinfl.Rmd b/man/rmd/poisson_reg_zeroinfl.Rmd
index b424c0ac5..3c87a9e49 100644
--- a/man/rmd/poisson_reg_zeroinfl.Rmd
+++ b/man/rmd/poisson_reg_zeroinfl.Rmd
@@ -57,3 +57,8 @@ workflow() %>%
```
The reason for this is that [workflows::add_formula()] will try to create the model matrix and either fail or create dummy variables prematurely.
+
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
diff --git a/man/rmd/poisson_reg_zeroinfl.md b/man/rmd/poisson_reg_zeroinfl.md
index 43b6b2281..afb529e35 100644
--- a/man/rmd/poisson_reg_zeroinfl.md
+++ b/man/rmd/poisson_reg_zeroinfl.md
@@ -107,3 +107,10 @@ workflow() %>%
```
The reason for this is that [workflows::add_formula()] will try to create the model matrix and either fail or create dummy variables prematurely.
+
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
diff --git a/man/rmd/proportional_hazards_glmnet.Rmd b/man/rmd/proportional_hazards_glmnet.Rmd
index a27721529..4c91b66f3 100644
--- a/man/rmd/proportional_hazards_glmnet.Rmd
+++ b/man/rmd/proportional_hazards_glmnet.Rmd
@@ -90,6 +90,11 @@ Note that columns used in the `strata()` function _will_ also be estimated in th
```{r child = "template-censored-linear-predictor.Rmd"}
```
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
# References
- Simon N, Friedman J, Hastie T, Tibshirani R. 2011. "Regularization Paths for Cox’s Proportional Hazards Model via Coordinate Descent." _Journal of Statistical Software_, Articles 39 (5): 1–13. \doi{10.18637/jss.v039.i05}.
diff --git a/man/rmd/proportional_hazards_glmnet.md b/man/rmd/proportional_hazards_glmnet.md
index 269602a7f..8e5fe8c39 100644
--- a/man/rmd/proportional_hazards_glmnet.md
+++ b/man/rmd/proportional_hazards_glmnet.md
@@ -43,7 +43,7 @@ proportional_hazards(penalty = double(1), mixture = double(1)) %>%
##
## Model fit template:
## censored::glmnet_fit_wrapper(formula = missing_arg(), data = missing_arg(),
-## family = missing_arg(), alpha = double(1))
+## alpha = double(1))
```
## Preprocessing requirements
@@ -107,6 +107,13 @@ tidymodels does not treat different models differently when computing performanc
This behavior can be changed by using the `increasing` argument when calling `predict()` on a \pkg{parsnip} model object.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
# References
- Simon N, Friedman J, Hastie T, Tibshirani R. 2011. "Regularization Paths for Cox’s Proportional Hazards Model via Coordinate Descent." _Journal of Statistical Software_, Articles 39 (5): 1–13. \doi{10.18637/jss.v039.i05}.
diff --git a/man/rmd/proportional_hazards_survival.Rmd b/man/rmd/proportional_hazards_survival.Rmd
index 91065564f..f262fb032 100644
--- a/man/rmd/proportional_hazards_survival.Rmd
+++ b/man/rmd/proportional_hazards_survival.Rmd
@@ -44,9 +44,16 @@ Note that columns used in the `strata()` function will not be estimated in the r
# Linear predictor values
+
```{r child = "template-censored-linear-predictor.Rmd"}
```
+
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## References
- Andersen P, Gill R. 1982. Cox's regression model for counting processes, a large sample study. _Annals of Statistics_ 10, 1100-1120.
diff --git a/man/rmd/proportional_hazards_survival.md b/man/rmd/proportional_hazards_survival.md
index 6c2ad17e0..2c5e52fe4 100644
--- a/man/rmd/proportional_hazards_survival.md
+++ b/man/rmd/proportional_hazards_survival.md
@@ -87,6 +87,7 @@ Note that columns used in the `strata()` function will not be estimated in the r
# Linear predictor values
+
Since risk regression and parametric survival models are modeling different characteristics (e.g. relative hazard versus event time), their linear predictors will be going in opposite directions.
For example, for parametric models, the linear predictor _increases with time_. For proportional hazards models the linear predictor _decreases with time_ (since hazard is increasing). As such, the linear predictors for these two quantities will have opposite signs.
@@ -95,6 +96,14 @@ tidymodels does not treat different models differently when computing performanc
This behavior can be changed by using the `increasing` argument when calling `predict()` on a \pkg{parsnip} model object.
+
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## References
- Andersen P, Gill R. 1982. Cox's regression model for counting processes, a large sample study. _Annals of Statistics_ 10, 1100-1120.
diff --git a/man/rmd/rand_forest_partykit.md b/man/rmd/rand_forest_partykit.md
index 47c15ed8b..6fa48e2d7 100644
--- a/man/rmd/rand_forest_partykit.md
+++ b/man/rmd/rand_forest_partykit.md
@@ -17,7 +17,7 @@ This model has 3 tuning parameters:
## Translation from parsnip to the original package (regression)
-
+The **bonsai** extension package is required to fit this model.
```r
@@ -41,7 +41,7 @@ rand_forest() %>%
## Translation from parsnip to the original package (classification)
-
+The **bonsai** extension package is required to fit this model.
```r
@@ -67,7 +67,7 @@ rand_forest() %>%
# Translation from parsnip to the original package (censored regression)
-
+The **censored** extension package is required to fit this model.
```r
diff --git a/man/rmd/rand_forest_ranger.Rmd b/man/rmd/rand_forest_ranger.Rmd
index 86e723fd2..2567858c0 100644
--- a/man/rmd/rand_forest_ranger.Rmd
+++ b/man/rmd/rand_forest_ranger.Rmd
@@ -67,6 +67,11 @@ By default, parallel processing is turned off. When tuning, it is more efficient
For `ranger` confidence intervals, the intervals are constructed using the form `estimate +/- z * std_error`. For classification probabilities, these values can fall outside of `[0, 1]` and will be coerced to be in this range.
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#rand-forest-ranger) for `rand_forest()` with the `"ranger"` engine.
diff --git a/man/rmd/rand_forest_ranger.md b/man/rmd/rand_forest_ranger.md
index 16b3cdba9..7bd425c48 100644
--- a/man/rmd/rand_forest_ranger.md
+++ b/man/rmd/rand_forest_ranger.md
@@ -44,7 +44,7 @@ rand_forest(
## Computational engine: ranger
##
## Model fit template:
-## ranger::ranger(x = missing_arg(), y = missing_arg(), case.weights = missing_arg(),
+## ranger::ranger(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
## mtry = min_cols(~integer(1), x), num.trees = integer(1),
## min.node.size = min_rows(~integer(1), x), num.threads = 1,
## verbose = FALSE, seed = sample.int(10^5, 1))
@@ -77,7 +77,7 @@ rand_forest(
## Computational engine: ranger
##
## Model fit template:
-## ranger::ranger(x = missing_arg(), y = missing_arg(), case.weights = missing_arg(),
+## ranger::ranger(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
## mtry = min_cols(~integer(1), x), num.trees = integer(1),
## min.node.size = min_rows(~integer(1), x), num.threads = 1,
## verbose = FALSE, seed = sample.int(10^5, 1), probability = TRUE)
@@ -96,6 +96,13 @@ By default, parallel processing is turned off. When tuning, it is more efficient
For `ranger` confidence intervals, the intervals are constructed using the form `estimate +/- z * std_error`. For classification probabilities, these values can fall outside of `[0, 1]` and will be coerced to be in this range.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#rand-forest-ranger) for `rand_forest()` with the `"ranger"` engine.
diff --git a/man/rmd/rand_forest_spark.Rmd b/man/rmd/rand_forest_spark.Rmd
index 471342ac5..0afe168de 100644
--- a/man/rmd/rand_forest_spark.Rmd
+++ b/man/rmd/rand_forest_spark.Rmd
@@ -62,6 +62,14 @@ rand_forest(
```{r child = "template-spark-notes.Rmd"}
```
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
+Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights.
+
+
## References
- Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
diff --git a/man/rmd/rand_forest_spark.md b/man/rmd/rand_forest_spark.md
index 9253ebbab..3753b3a3f 100644
--- a/man/rmd/rand_forest_spark.md
+++ b/man/rmd/rand_forest_spark.md
@@ -96,6 +96,16 @@ For models created using the `"spark"` engine, there are several things to consi
* There is no equivalent to factor columns in Spark tables so class predictions are returned as character columns.
* To retain the model object for a new R session (via `save()`), the `model$fit` element of the parsnip object should be serialized via `ml_save(object$fit)` and separately saved to disk. In a new session, the object can be reloaded and reattached to the parsnip object.
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
+Note that, for spark engines, the `case_weight` argument value should be a character string to specify the column with the numeric case weights.
+
+
## References
- Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
diff --git a/man/rmd/rule_fit_xrf.Rmd b/man/rmd/rule_fit_xrf.Rmd
index 1a1a67e32..08fb5d462 100644
--- a/man/rmd/rule_fit_xrf.Rmd
+++ b/man/rmd/rule_fit_xrf.Rmd
@@ -95,6 +95,11 @@ These differences will create a disparity in the values of the `penalty` argumen
```{r child = "template-mtry-prop.Rmd"}
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## References
- Friedman and Popescu. "Predictive learning via rule ensembles." Ann. Appl. Stat. 2 (3) 916- 954, September 2008
diff --git a/man/rmd/rule_fit_xrf.md b/man/rmd/rule_fit_xrf.md
index c1a2e1c61..9d9e5b8c5 100644
--- a/man/rmd/rule_fit_xrf.md
+++ b/man/rmd/rule_fit_xrf.md
@@ -65,7 +65,7 @@ rule_fit(
## Computational engine: xrf
##
## Model fit template:
-## rules::xrf_fit(formula = missing_arg(), data = missing_arg(),
+## rules::xrf_fit(object = missing_arg(), data = missing_arg(),
## colsample_bytree = numeric(1), nrounds = integer(1), min_child_weight = integer(1),
## max_depth = integer(1), eta = numeric(1), gamma = numeric(1),
## subsample = numeric(1), lambda = numeric(1))
@@ -111,7 +111,7 @@ rule_fit(
## Computational engine: xrf
##
## Model fit template:
-## rules::xrf_fit(formula = missing_arg(), data = missing_arg(),
+## rules::xrf_fit(object = missing_arg(), data = missing_arg(),
## colsample_bytree = numeric(1), nrounds = integer(1), min_child_weight = integer(1),
## max_depth = integer(1), eta = numeric(1), gamma = numeric(1),
## subsample = numeric(1), lambda = numeric(1))
@@ -132,23 +132,15 @@ Also, there are several configuration differences in how `xrf()` is fit between
These differences will create a disparity in the values of the `penalty` argument that **glmnet** uses. Also, **rules** can also set `penalty` whereas **xrf** uses an internal 5-fold cross-validation to determine it (by default).
-## Other details
-
-### Preprocessing requirements
+## Preprocessing requirements
Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators.
-### Interpreting `mtry`
-
-
-The `mtry` argument denotes the number of predictors that will be randomly sampled at each split when creating tree models.
-
-Some engines, such as `"xgboost"`, `"xrf"`, and `"lightgbm"`, interpret their analogue to the `mtry` argument as the _proportion_ of predictors that will be randomly sampled at each split rather than the _count_. In some settings, such as when tuning over preprocessors that influence the number of predictors, this parameterization is quite helpful---interpreting `mtry` as a proportion means that $[0, 1]$ is always a valid range for that parameter, regardless of input data.
+## Case weights
-parsnip and its extensions accommodate this parameterization using the `counts` argument: a logical indicating whether `mtry` should be interpreted as the number of predictors that will be randomly sampled at each split. `TRUE` indicates that `mtry` will be interpreted in its sense as a count, `FALSE` indicates that the argument will be interpreted in its sense as a proportion.
-`mtry` is a main model argument for \\code{\\link[=boost_tree]{boost_tree()}} and \\code{\\link[=rand_forest]{rand_forest()}}, and thus should not have an engine-specific interface. So, regardless of engine, `counts` defaults to `TRUE`. For engines that support the proportion interpretation---currently `"xgboost"`, `"xrf"` (via the rules package), and `"lightgbm"` (via the bonsai package)---the user can pass the `counts = FALSE` argument to `set_engine()` to supply `mtry` values within $[0, 1]$.
+The underlying model implementation does not allow for case weights.
## References
diff --git a/man/rmd/survival_reg_survival.Rmd b/man/rmd/survival_reg_survival.Rmd
index 4dabb03b2..a4f49d61b 100644
--- a/man/rmd/survival_reg_survival.Rmd
+++ b/man/rmd/survival_reg_survival.Rmd
@@ -54,6 +54,11 @@ survival_reg() %>%
extract_fit_engine()
```
+## Case weights
+
+```{r child = "template-uses-case-weights.Rmd"}
+```
+
## References
- Kalbfleisch, J. D. and Prentice, R. L. 2002 _The statistical analysis of failure time data_, Wiley.
diff --git a/man/rmd/survival_reg_survival.md b/man/rmd/survival_reg_survival.md
index 26b78030f..9ea328154 100644
--- a/man/rmd/survival_reg_survival.md
+++ b/man/rmd/survival_reg_survival.md
@@ -75,6 +75,13 @@ survival_reg() %>%
## n= 26
```
+## Case weights
+
+
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
+
## References
- Kalbfleisch, J. D. and Prentice, R. L. 2002 _The statistical analysis of failure time data_, Wiley.
diff --git a/man/rmd/svm_linear_LiblineaR.Rmd b/man/rmd/svm_linear_LiblineaR.Rmd
index 86b74c70e..fb7419c81 100644
--- a/man/rmd/svm_linear_LiblineaR.Rmd
+++ b/man/rmd/svm_linear_LiblineaR.Rmd
@@ -59,6 +59,11 @@ Note that the `LiblineaR` engine does not produce class probabilities. When opti
```{r child = "template-same-scale.Rmd"}
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#svm-linear-LiblineaR) for `svm_linear()` with the `"LiblineaR"` engine.
diff --git a/man/rmd/svm_linear_LiblineaR.md b/man/rmd/svm_linear_LiblineaR.md
index 059744312..1afe530f8 100644
--- a/man/rmd/svm_linear_LiblineaR.md
+++ b/man/rmd/svm_linear_LiblineaR.md
@@ -38,8 +38,8 @@ svm_linear(
## Computational engine: LiblineaR
##
## Model fit template:
-## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(),
-## C = double(1), svr_eps = double(1), type = 11)
+## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), C = double(1),
+## svr_eps = double(1), type = 11)
```
## Translation from parsnip to the original package (classification)
@@ -63,8 +63,8 @@ svm_linear(
## Computational engine: LiblineaR
##
## Model fit template:
-## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(),
-## C = double(1), type = 1)
+## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), C = double(1),
+## type = 1)
```
The `margin` parameter does not apply to classification models.
@@ -80,6 +80,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm
Predictors should have the same scale. One way to achieve this is to center and
scale each so that each predictor has mean zero and a variance of one.
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#svm-linear-LiblineaR) for `svm_linear()` with the `"LiblineaR"` engine.
diff --git a/man/rmd/svm_linear_kernlab.Rmd b/man/rmd/svm_linear_kernlab.Rmd
index 560b4b129..f3afcc31b 100644
--- a/man/rmd/svm_linear_kernlab.Rmd
+++ b/man/rmd/svm_linear_kernlab.Rmd
@@ -57,6 +57,11 @@ Note that the `"kernlab"` engine does not naturally estimate class probabilities
```{r child = "template-same-scale.Rmd"}
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#svm-linear-kernlab) for `svm_linear()` with the `"kernlab"` engine.
diff --git a/man/rmd/svm_linear_kernlab.md b/man/rmd/svm_linear_kernlab.md
index 399e80d93..f8aadb40b 100644
--- a/man/rmd/svm_linear_kernlab.md
+++ b/man/rmd/svm_linear_kernlab.md
@@ -78,6 +78,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm
Predictors should have the same scale. One way to achieve this is to center and
scale each so that each predictor has mean zero and a variance of one.
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#svm-linear-kernlab) for `svm_linear()` with the `"kernlab"` engine.
diff --git a/man/rmd/svm_poly_kernlab.Rmd b/man/rmd/svm_poly_kernlab.Rmd
index 78efd4743..d5555dca2 100644
--- a/man/rmd/svm_poly_kernlab.Rmd
+++ b/man/rmd/svm_poly_kernlab.Rmd
@@ -61,6 +61,11 @@ Note that the `"kernlab"` engine does not naturally estimate class probabilities
```{r child = "template-same-scale.Rmd"}
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#svm-poly-kernlab) for `svm_poly()` with the `"kernlab"` engine.
diff --git a/man/rmd/svm_poly_kernlab.md b/man/rmd/svm_poly_kernlab.md
index 1bdf9d6b2..584d602ec 100644
--- a/man/rmd/svm_poly_kernlab.md
+++ b/man/rmd/svm_poly_kernlab.md
@@ -92,6 +92,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm
Predictors should have the same scale. One way to achieve this is to center and
scale each so that each predictor has mean zero and a variance of one.
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#svm-poly-kernlab) for `svm_poly()` with the `"kernlab"` engine.
diff --git a/man/rmd/svm_rbf_kernlab.Rmd b/man/rmd/svm_rbf_kernlab.Rmd
index 338e9c542..88e9bc46f 100644
--- a/man/rmd/svm_rbf_kernlab.Rmd
+++ b/man/rmd/svm_rbf_kernlab.Rmd
@@ -61,6 +61,11 @@ Note that the `"kernlab"` engine does not naturally estimate class probabilities
```{r child = "template-same-scale.Rmd"}
```
+## Case weights
+
+```{r child = "template-no-case-weights.Rmd"}
+```
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#svm-rbf-kernlab) for `svm_rbf()` with the `"kernlab"` engine.
diff --git a/man/rmd/svm_rbf_kernlab.md b/man/rmd/svm_rbf_kernlab.md
index 8eaa338b1..3be887803 100644
--- a/man/rmd/svm_rbf_kernlab.md
+++ b/man/rmd/svm_rbf_kernlab.md
@@ -86,6 +86,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm
Predictors should have the same scale. One way to achieve this is to center and
scale each so that each predictor has mean zero and a variance of one.
+## Case weights
+
+
+The underlying model implementation does not allow for case weights.
+
## Examples
The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#svm-rbf-kernlab) for `svm_rbf()` with the `"kernlab"` engine.
diff --git a/man/rmd/template-no-case-weights.Rmd b/man/rmd/template-no-case-weights.Rmd
new file mode 100644
index 000000000..e096d2385
--- /dev/null
+++ b/man/rmd/template-no-case-weights.Rmd
@@ -0,0 +1 @@
+The underlying model implementation does not allow for case weights.
diff --git a/man/rmd/template-uses-case-weights.Rmd b/man/rmd/template-uses-case-weights.Rmd
new file mode 100644
index 000000000..f7cf35aaa
--- /dev/null
+++ b/man/rmd/template-uses-case-weights.Rmd
@@ -0,0 +1,3 @@
+This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`.
+
+The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights.
diff --git a/tests/testthat/_snaps/translate.md b/tests/testthat/_snaps/translate.md
index 9bf0cf123..e1eb66a7b 100644
--- a/tests/testthat/_snaps/translate.md
+++ b/tests/testthat/_snaps/translate.md
@@ -9,6 +9,9 @@
$y
missing_arg()
+ $weights
+ missing_arg()
+
$nthread
[1] 1
@@ -62,6 +65,9 @@
$y
missing_arg()
+ $weights
+ missing_arg()
+
$print_every_n
expr: ^10L
@@ -105,6 +111,9 @@
$y
missing_arg()
+ $weights
+ missing_arg()
+
$nrounds
expr: ^15
@@ -148,6 +157,9 @@
$y
missing_arg()
+ $weights
+ missing_arg()
+
$min_child_weight
expr: ^15
@@ -462,7 +474,7 @@
$formula
missing_arg()
- $weight_col
+ $weights
missing_arg()
@@ -477,7 +489,7 @@
$formula
missing_arg()
- $weight_col
+ $weights
missing_arg()
$max_iter
@@ -519,7 +531,7 @@
$formula
missing_arg()
- $weight_col
+ $weights
missing_arg()
$elastic_net_param
@@ -539,7 +551,7 @@
$formula
missing_arg()
- $weight_col
+ $weights
missing_arg()
$elastic_net_param
@@ -611,7 +623,7 @@
$formula
missing_arg()
- $weight_col
+ $weights
missing_arg()
$reg_param
@@ -680,9 +692,6 @@
$y
missing_arg()
- $wi
- missing_arg()
-
$verbose
[1] FALSE
@@ -698,9 +707,6 @@
$y
missing_arg()
- $wi
- missing_arg()
-
$bias
expr: ^0
@@ -773,7 +779,7 @@
$formula
missing_arg()
- $weight_col
+ $weights
missing_arg()
$family
@@ -791,7 +797,7 @@
$formula
missing_arg()
- $weight_col
+ $weights
missing_arg()
$max_iter
@@ -825,7 +831,7 @@
$formula
missing_arg()
- $weight_col
+ $weights
missing_arg()
$elastic_net_param
@@ -912,9 +918,6 @@
$y
missing_arg()
- $wi
- missing_arg()
-
$cost
expr: ^1
@@ -935,7 +938,7 @@
$formula
missing_arg()
- $weight_col
+ $weights
missing_arg()
$reg_param
@@ -969,9 +972,6 @@
$y
missing_arg()
- $wi
- missing_arg()
-
$type
expr: ^tune()
@@ -992,7 +992,7 @@
$formula
missing_arg()
- $weight_col
+ $weights
missing_arg()
$elastic_net_param
@@ -1127,9 +1127,6 @@
$data
missing_arg()
- $weights
- missing_arg()
-
$size
expr: ^4
@@ -1170,9 +1167,6 @@
$data
missing_arg()
- $weights
- missing_arg()
-
$size
[1] 5
@@ -1194,9 +1188,6 @@
$data
missing_arg()
- $weights
- missing_arg()
-
$size
[1] 5
@@ -1240,9 +1231,6 @@
$data
missing_arg()
- $weights
- missing_arg()
-
$size
[1] 5
@@ -1269,9 +1257,6 @@
$data
missing_arg()
- $weights
- missing_arg()
-
$size
expr: ^4
@@ -1593,7 +1578,7 @@
$y
missing_arg()
- $case.weights
+ $weights
missing_arg()
$mtry
@@ -1656,7 +1641,7 @@
$y
missing_arg()
- $case.weights
+ $weights
missing_arg()
$num.trees
@@ -1688,7 +1673,7 @@
$y
missing_arg()
- $case.weights
+ $weights
missing_arg()
$num.trees
@@ -1765,7 +1750,7 @@
$y
missing_arg()
- $case.weights
+ $weights
missing_arg()
$min.node.size
@@ -1903,9 +1888,6 @@
$y
missing_arg()
- $wi
- missing_arg()
-
$type
[1] 11
@@ -1924,9 +1906,6 @@
$y
missing_arg()
- $wi
- missing_arg()
-
$type
expr: ^12
diff --git a/tests/testthat/test-case-weights.R b/tests/testthat/test-case-weights.R
new file mode 100644
index 000000000..f2bce3656
--- /dev/null
+++ b/tests/testthat/test-case-weights.R
@@ -0,0 +1,143 @@
+
+test_that('case weights with xy method', {
+
+ skip_if_not_installed("C50")
+ skip_if_not_installed("modeldata")
+ data("two_class_dat", package = "modeldata")
+
+ wts <- runif(nrow(two_class_dat))
+ wts <- ifelse(wts < 1/5, 0, 1)
+ two_class_subset <- two_class_dat[wts != 0, ]
+ wts <- importance_weights(wts)
+
+ expect_error({
+ set.seed(1)
+ C5_bst_wt_fit <-
+ boost_tree(trees = 5) %>%
+ set_engine("C5.0") %>%
+ set_mode("classification") %>%
+ fit(Class ~ ., data = two_class_dat, case_weights = wts)
+ },
+ regexp = NA)
+
+ expect_output(
+ print(C5_bst_wt_fit$fit$call),
+ "weights = weights"
+ )
+
+ expect_error({
+ set.seed(1)
+ C5_bst_wt_fit <-
+ boost_tree(trees = 5) %>%
+ set_engine("C5.0") %>%
+ set_mode("classification") %>%
+ fit_xy(
+ x = two_class_dat[c("A", "B")],
+ y = two_class_dat$Class,
+ case_weights = wts
+ )
+ },
+ regexp = NA)
+
+ expect_output(
+ print(C5_bst_wt_fit$fit$call),
+ "weights = weights"
+ )
+})
+
+
+test_that('case weights with xy method - non-standard argument names', {
+
+ skip_if_not_installed("ranger")
+ skip_if_not_installed("modeldata")
+ data("two_class_dat", package = "modeldata")
+
+ wts <- runif(nrow(two_class_dat))
+ wts <- ifelse(wts < 1/5, 0, 1)
+ two_class_subset <- two_class_dat[wts != 0, ]
+ wts <- importance_weights(wts)
+
+ expect_error({
+ set.seed(1)
+ rf_wt_fit <-
+ rand_forest(trees = 5) %>%
+ set_mode("classification") %>%
+ fit(Class ~ ., data = two_class_dat, case_weights = wts)
+ },
+ regexp = NA)
+
+ # expect_output(
+ # print(rf_wt_fit$fit$call),
+ # "case\\.weights = weights"
+ # )
+
+ expect_error({
+ set.seed(1)
+ rf_wt_fit <-
+ rand_forest(trees = 5) %>%
+ set_mode("classification") %>%
+ fit_xy(
+ x = two_class_dat[c("A", "B")],
+ y = two_class_dat$Class,
+ case_weights = wts
+ )
+ },
+ regexp = NA)
+})
+
+test_that('case weights with formula method', {
+
+ skip_if_not_installed("modeldata")
+ data("ames", package = "modeldata")
+ ames$Sale_Price <- log10(ames$Sale_Price)
+
+ set.seed(1)
+ wts <- runif(nrow(ames))
+ wts <- ifelse(wts < 1/5, 0L, 1L)
+ ames_subset <- ames[wts != 0, ]
+ wts <- frequency_weights(wts)
+
+ expect_error(
+ lm_wt_fit <-
+ linear_reg() %>%
+ fit(Sale_Price ~ Longitude + Latitude, data = ames, case_weights = wts),
+ regexp = NA)
+
+ lm_sub_fit <-
+ linear_reg() %>%
+ fit(Sale_Price ~ Longitude + Latitude, data = ames_subset)
+
+ expect_equal(coef(lm_wt_fit$fit), coef(lm_sub_fit$fit))
+})
+
+test_that('case weights with formula method that goes through `fit_xy()`', {
+
+ skip_if_not_installed("modeldata")
+ data("ames", package = "modeldata")
+ ames$Sale_Price <- log10(ames$Sale_Price)
+
+ set.seed(1)
+ wts <- runif(nrow(ames))
+ wts <- ifelse(wts < 1/5, 0L, 1L)
+ ames_subset <- ames[wts != 0, ]
+ wts <- frequency_weights(wts)
+
+ expect_error(
+ lm_wt_fit <-
+ linear_reg() %>%
+ fit_xy(
+ x = ames[c("Longitude", "Latitude")],
+ y = ames$Sale_Price,
+ case_weights = wts
+ ),
+ regexp = NA)
+
+ lm_sub_fit <-
+ linear_reg() %>%
+ fit_xy(
+ x = ames_subset[c("Longitude", "Latitude")],
+ y = ames_subset$Sale_Price
+ )
+
+ expect_equal(coef(lm_wt_fit$fit), coef(lm_sub_fit$fit))
+})
diff --git a/tests/testthat/test_boost_tree_xgboost.R b/tests/testthat/test_boost_tree_xgboost.R
index b7147b07a..690a8a0a1 100644
--- a/tests/testthat/test_boost_tree_xgboost.R
+++ b/tests/testthat/test_boost_tree_xgboost.R
@@ -12,27 +12,66 @@ test_that('xgboost execution, classification', {
skip_if_not_installed("xgboost")
- expect_error(
- res <- parsnip::fit(
+ ctrl$verbosity <- 0L
+
+ set.seed(1)
+ wts <- ifelse(runif(nrow(hpc)) < .1, 0, 1)
+ wts <- importance_weights(wts)
+
+ expect_error({
+ set.seed(1)
+ res_f <- parsnip::fit(
hpc_xgboost,
class ~ compounds + input_fields,
data = hpc,
control = ctrl
- ),
- regexp = NA
+ )
+ },
+ regexp = NA
)
- expect_error(
- res <- parsnip::fit_xy(
+ expect_error({
+ set.seed(1)
+ res_xy <- parsnip::fit_xy(
hpc_xgboost,
- x = hpc[, num_pred],
+ x = hpc[, c("compounds", "input_fields")],
y = hpc$class,
control = ctrl
- ),
- regexp = NA
+ )
+ },
+ regexp = NA
+ )
+ expect_error({
+ set.seed(1)
+ res_f_wts <- parsnip::fit(
+ hpc_xgboost,
+ class ~ compounds + input_fields,
+ data = hpc,
+ control = ctrl,
+ case_weights = wts
+ )
+ },
+ regexp = NA
+ )
+ expect_error({
+ set.seed(1)
+ res_xy_wts <- parsnip::fit_xy(
+ hpc_xgboost,
+ x = hpc[, c("compounds", "input_fields")],
+ y = hpc$class,
+ control = ctrl,
+ case_weights = wts
+ )
+ },
+ regexp = NA
)
- expect_true(has_multi_predict(res))
- expect_equal(multi_predict_args(res), "trees")
+ expect_equal(res_f$fit$evaluation_log, res_xy$fit$evaluation_log)
+ expect_equal(res_f_wts$fit$evaluation_log, res_xy_wts$fit$evaluation_log)
+ # Check to see if the case weights had an effect
+ expect_true(!isTRUE(all.equal(res_f$fit$evaluation_log, res_f_wts$fit$evaluation_log)))
+
+ expect_true(has_multi_predict(res_xy))
+ expect_equal(multi_predict_args(res_xy), "trees")
expect_error(
res <- parsnip::fit(
@@ -50,6 +89,9 @@ test_that('xgboost classification prediction', {
skip_if_not_installed("xgboost")
library(xgboost)
+
+ ctrl$verbosity <- 0L
+
xy_fit <- fit_xy(
hpc_xgboost,
x = hpc[, num_pred],
@@ -96,6 +138,8 @@ test_that('xgboost execution, regression', {
skip_if_not_installed("xgboost")
+ ctrl$verbosity <- 0L
+
expect_error(
res <- parsnip::fit(
car_basic,
@@ -133,6 +177,8 @@ test_that('xgboost regression prediction', {
skip_if_not_installed("xgboost")
+ ctrl$verbosity <- 0L
+
xy_fit <- fit_xy(
car_basic,
x = mtcars[, -1],
@@ -162,6 +208,8 @@ test_that('xgboost regression prediction', {
test_that('xgboost alternate objective', {
skip_if_not_installed("xgboost")
+ ctrl$verbosity <- 0L
+
spec <-
boost_tree() %>%
set_engine("xgboost", objective = "reg:pseudohubererror") %>%
@@ -176,6 +224,8 @@ test_that('submodel prediction', {
skip_if_not_installed("xgboost")
+ ctrl$verbosity <- 0L
+
reg_fit <-
boost_tree(trees = 20, mode = "regression") %>%
set_engine("xgboost") %>%
@@ -194,7 +244,7 @@ test_that('submodel prediction', {
class_fit <-
boost_tree(trees = 20, mode = "classification") %>%
set_engine("xgboost") %>%
- fit(churn ~ ., data = wa_churn[-(1:4), c("churn", vars)])
+ fit(churn ~ ., data = wa_churn[-(1:4), c("churn", vars)], control = ctrl)
x <- xgboost::xgb.DMatrix(as.matrix(wa_churn[1:4, vars]))
@@ -214,6 +264,9 @@ test_that('submodel prediction', {
test_that('validation sets', {
skip_if_not_installed("xgboost")
+
+ ctrl$verbosity <- 0L
+
expect_error(
reg_fit <-
boost_tree(trees = 20, mode = "regression") %>%
@@ -259,6 +312,9 @@ test_that('validation sets', {
test_that('early stopping', {
skip_if_not_installed("xgboost")
+
+ ctrl$verbosity <- 0L
+
set.seed(233456)
expect_error(
reg_fit <-
@@ -304,6 +360,7 @@ test_that('xgboost data conversion', {
mtcar_x <- mtcars[, -1]
mtcar_mat <- as.matrix(mtcar_x)
mtcar_smat <- Matrix::Matrix(mtcar_mat, sparse = TRUE)
+ wts <- 1:32
expect_error(from_df <- parsnip:::as_xgb_data(mtcar_x, mtcars$mpg), regexp = NA)
expect_true(inherits(from_df$data, "xgb.DMatrix"))
@@ -344,15 +401,25 @@ test_that('xgboost data conversion', {
expect_warning(from_df <- parsnip:::as_xgb_data(mtcar_x, mtcars_y, event_level = "second"),
regexp = "`event_level` can only be set for binary variables.")
+ # case weights added
+ expect_error(wted <- parsnip:::as_xgb_data(mtcar_x, mtcars$mpg, weights = wts), regexp = NA)
+ expect_equal(wts, xgboost::getinfo(wted$data, "weight"))
+ expect_error(wted_val <- parsnip:::as_xgb_data(mtcar_x, mtcars$mpg, weights = wts, validation = 1/4), regexp = NA)
+ expect_true(all(xgboost::getinfo(wted_val$data, "weight") %in% wts))
+ expect_null(xgboost::getinfo(wted_val$watchlist$validation, "weight"))
+
})
test_that('xgboost data and sparse matrices', {
skip_if_not_installed("xgboost")
+ ctrl$verbosity <- 0L
+
mtcar_x <- mtcars[, -1]
mtcar_mat <- as.matrix(mtcar_x)
mtcar_smat <- Matrix::Matrix(mtcar_mat, sparse = TRUE)
+ wts <- 1:32
xgb_spec <-
boost_tree(trees = 10) %>%
@@ -374,6 +441,13 @@ test_that('xgboost data and sparse matrices', {
expect_equal(extract_fit_engine(from_df), extract_fit_engine(from_mat), ignore_function_env = TRUE)
expect_equal(extract_fit_engine(from_df), extract_fit_engine(from_sparse), ignore_function_env = TRUE)
+ # case weights added
+ expect_error(wted <- parsnip:::as_xgb_data(mtcar_smat, mtcars$mpg, weights = wts), regexp = NA)
+ expect_equal(wts, xgboost::getinfo(wted$data, "weight"))
+ expect_error(wted_val <- parsnip:::as_xgb_data(mtcar_smat, mtcars$mpg, weights = wts, validation = 1/4), regexp = NA)
+ expect_true(all(xgboost::getinfo(wted_val$data, "weight") %in% wts))
+ expect_null(xgboost::getinfo(wted_val$watchlist$validation, "weight"))
+
})
@@ -383,6 +457,8 @@ test_that('argument checks for data dimensions', {
skip_if_not_installed("xgboost")
+ ctrl$verbosity <- 0L
+
data(penguins, package = "modeldata")
penguins <- na.omit(penguins)
@@ -395,11 +471,11 @@ test_that('argument checks for data dimensions', {
penguins_dummy <- as.data.frame(penguins_dummy[, -1])
expect_warning(
- f_fit <- spec %>% fit(species ~ ., data = penguins),
+ f_fit <- spec %>% fit(species ~ ., data = penguins, control = ctrl),
"1000 samples were requested"
)
expect_warning(
- xy_fit <- spec %>% fit_xy(x = penguins_dummy, y = penguins$species),
+ xy_fit <- spec %>% fit_xy(x = penguins_dummy, y = penguins$species, control = ctrl),
"1000 samples were requested"
)
expect_equal(extract_fit_engine(f_fit)$params$colsample_bynode, 1)
@@ -413,6 +489,8 @@ test_that("fit and prediction with `event_level`", {
skip_if_not_installed("xgboost")
+ ctrl$verbosity <- 0L
+
data(penguins, package = "modeldata")
penguins <- na.omit(penguins[, -c(1:2)])
@@ -438,7 +516,8 @@ test_that("fit and prediction with `event_level`", {
nrounds = 10,
watchlist = list("training" = xgbmat_train_1),
objective = "binary:logistic",
- eval_metric = "auc")
+ eval_metric = "auc",
+ verbose = 0)
expect_equal(extract_fit_engine(fit_p_1)$evaluation_log, fit_xgb_1$evaluation_log)
@@ -461,7 +540,8 @@ test_that("fit and prediction with `event_level`", {
nrounds = 10,
watchlist = list("training" = xgbmat_train_2),
objective = "binary:logistic",
- eval_metric = "auc")
+ eval_metric = "auc",
+ verbose = 0)
expect_equal(extract_fit_engine(fit_p_2)$evaluation_log, fit_xgb_2$evaluation_log)
@@ -473,6 +553,9 @@ test_that("fit and prediction with `event_level`", {
test_that("count/proportion parameters", {
skip_if_not_installed("xgboost")
+
+ ctrl$verbosity <- 0L
+
fit1 <-
boost_tree(mtry = 7, trees = 4) %>%
set_engine("xgboost") %>%
diff --git a/tests/testthat/test_grouped_glm.R b/tests/testthat/test_grouped_glm.R
new file mode 100644
index 000000000..2d47961d5
--- /dev/null
+++ b/tests/testthat/test_grouped_glm.R
@@ -0,0 +1,18 @@
+context("grouped logistic regression")
+library(tidyr)
+
+test_that('correct results for glm_grouped()', {
+ ucb_weighted <- as.data.frame(UCBAdmissions)
+ ucb_weighted$Freq <- as.integer(ucb_weighted$Freq)
+
+ ucb_long <- uncount(ucb_weighted, Freq)
+
+ ungrouped <- glm(Admit ~ Gender + Dept, data = ucb_long, family = binomial)
+
+ expect_error(
+ grouped <- glm_grouped(Admit ~ Gender + Dept, data = ucb_weighted, weights = ucb_weighted$Freq),
+ regexp = NA
+ )
+ expect_equal(grouped$df.null, 11)
+
+})
diff --git a/tests/testthat/test_mlp.R b/tests/testthat/test_mlp.R
index 035086ec9..da4e639d7 100644
--- a/tests/testthat/test_mlp.R
+++ b/tests/testthat/test_mlp.R
@@ -1,3 +1,4 @@
+
test_that('updating', {
expect_snapshot(
mlp(mode = "classification", hidden_units = 2) %>%
diff --git a/tests/testthat/test_model_basics.R b/tests/testthat/test_model_basics.R
new file mode 100644
index 000000000..bd79f8bcb
--- /dev/null
+++ b/tests/testthat/test_model_basics.R
@@ -0,0 +1,104 @@
+context("basic model tests")
+
+test_that('basic object classes and print methods', {
+ expect_output(print(bag_mars()), 'Specification')
+
+ expect_output(print(bag_tree()), 'Specification')
+
+ expect_output(print(bart()), 'Specification')
+ expect_true(inherits(bart(engine = 'dbarts'), 'bart'))
+
+ expect_output(print(boost_tree()), 'Specification')
+ expect_true(inherits(boost_tree(engine = 'C5.0'), 'boost_tree'))
+ expect_true(inherits(boost_tree(engine = 'spark'), 'boost_tree'))
+ expect_true(inherits(boost_tree(engine = 'xgboost'), 'boost_tree'))
+
+ expect_output(print(C5_rules()), 'Specification')
+
+ expect_output(print(cubist_rules()), 'Specification')
+
+ expect_output(print(decision_tree()), 'Specification')
+ expect_true(inherits(decision_tree(engine = 'C5.0'), 'decision_tree'))
+ expect_true(inherits(decision_tree(engine = 'rpart'), 'decision_tree'))
+ expect_true(inherits(decision_tree(engine = 'spark'), 'decision_tree'))
+
+ expect_output(print(discrim_flexible()), 'Specification')
+
+ expect_output(print(discrim_linear()), 'Specification')
+
+ expect_output(print(discrim_quad()), 'Specification')
+
+ expect_output(print(discrim_regularized()), 'Specification')
+
+ expect_output(print(gen_additive_mod()), 'Specification')
+ expect_true(inherits(gen_additive_mod(engine = 'mgcv'), 'gen_additive_mod'))
+
+ expect_output(print(linear_reg()), 'Specification')
+ expect_true(inherits(linear_reg(engine = 'brulee'), 'linear_reg'))
+ expect_true(inherits(linear_reg(engine = 'glm'), 'linear_reg'))
+ expect_true(inherits(linear_reg(engine = 'glmnet'), 'linear_reg'))
+ expect_true(inherits(linear_reg(engine = 'keras'), 'linear_reg'))
+ expect_true(inherits(linear_reg(engine = 'lm'), 'linear_reg'))
+ expect_true(inherits(linear_reg(engine = 'spark'), 'linear_reg'))
+ expect_true(inherits(linear_reg(engine = 'stan'), 'linear_reg'))
+
+ expect_output(print(logistic_reg()), 'Specification')
+ expect_true(inherits(logistic_reg(engine = 'brulee'), 'logistic_reg'))
+ expect_true(inherits(logistic_reg(engine = 'glm'), 'logistic_reg'))
+ expect_true(inherits(logistic_reg(engine = 'glmnet'), 'logistic_reg'))
+ expect_true(inherits(logistic_reg(engine = 'keras'), 'logistic_reg'))
+ expect_true(inherits(logistic_reg(engine = 'LiblineaR'), 'logistic_reg'))
+ expect_true(inherits(logistic_reg(engine = 'spark'), 'logistic_reg'))
+ expect_true(inherits(logistic_reg(engine = 'stan'), 'logistic_reg'))
+
+ expect_output(print(mars()), 'Specification')
+ expect_true(inherits(mars(engine = 'earth'), 'mars'))
+
+ expect_output(print(mlp()), 'Specification')
+ expect_true(inherits(mlp(engine = 'brulee'), 'mlp'))
+ expect_true(inherits(mlp(engine = 'keras'), 'mlp'))
+ expect_true(inherits(mlp(engine = 'nnet'), 'mlp'))
+
+ expect_output(print(multinom_reg()), 'Specification')
+ expect_true(inherits(multinom_reg(engine = 'brulee'), 'multinom_reg'))
+ expect_true(inherits(multinom_reg(engine = 'glmnet'), 'multinom_reg'))
+ expect_true(inherits(multinom_reg(engine = 'keras'), 'multinom_reg'))
+ expect_true(inherits(multinom_reg(engine = 'nnet'), 'multinom_reg'))
+ expect_true(inherits(multinom_reg(engine = 'spark'), 'multinom_reg'))
+
+ expect_output(print(naive_Bayes()), 'Specification')
+
+ expect_output(print(nearest_neighbor()), 'Specification')
+ expect_true(inherits(nearest_neighbor(engine = 'kknn'), 'nearest_neighbor'))
+
+ expect_output(print(null_model()), 'Specification')
+ expect_true(inherits(null_model(), 'null_model'))
+
+ expect_output(print(pls()), 'Specification')
+
+ expect_output(print(poisson_reg()), 'Specification')
+
+ expect_output(print(proportional_hazards()), 'Specification')
+
+ expect_output(print(rand_forest()), 'Specification')
+ expect_true(inherits(rand_forest(engine = 'randomForest'), 'rand_forest'))
+ expect_true(inherits(rand_forest(engine = 'ranger'), 'rand_forest'))
+ expect_true(inherits(rand_forest(engine = 'spark'), 'rand_forest'))
+
+ expect_output(print(rule_fit()), 'Specification')
+
+ expect_output(print(survival_reg()), 'Specification')
+
+ expect_output(print(svm_linear()), 'Specification')
+ expect_true(inherits(svm_linear(engine = 'kernlab'), 'svm_linear'))
+ expect_true(inherits(svm_linear(engine = 'LiblineaR'), 'svm_linear'))
+
+ expect_output(print(svm_poly()), 'Specification')
+ expect_true(inherits(svm_poly(engine = 'kernlab'), 'svm_poly'))
+
+ expect_output(print(svm_rbf()), 'Specification')
+ expect_true(inherits(svm_rbf(engine = 'kernlab'), 'svm_rbf'))
+ expect_true(inherits(svm_rbf(engine = 'liquidSVM'), 'svm_rbf'))
+
+})
+
diff --git a/tests/testthat/test_rand_forest.R b/tests/testthat/test_rand_forest.R
index 96212a8ae..e74b5e7c7 100644
--- a/tests/testthat/test_rand_forest.R
+++ b/tests/testthat/test_rand_forest.R
@@ -1,3 +1,4 @@
+
test_that('updating', {
expect_snapshot(
rand_forest(mode = "regression", mtry = 2) %>%
diff --git a/tests/testthat/test_svm_linear.R b/tests/testthat/test_svm_linear.R
index c81f0e43b..e4de476bd 100644
--- a/tests/testthat/test_svm_linear.R
+++ b/tests/testthat/test_svm_linear.R
@@ -1,6 +1,7 @@
hpc <- hpc_data[1:150, c(2:5, 8)]
# ------------------------------------------------------------------------------
+
test_that('updating', {
expect_snapshot(
svm_linear(mode = "regression", cost = 2) %>%