From 8fee11dceb80c25bf23b32066adcf330ce7e871f Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Fri, 30 Sep 2022 14:09:15 -0400 Subject: [PATCH 1/3] added some details about each case type of case weights --- R/case-weights.R | 15 +++++++++++++++ man/importance_weights.Rd | 9 +++++++++ man/new_importance_weights.Rd | 7 +++++++ 3 files changed, 31 insertions(+) diff --git a/R/case-weights.R b/R/case-weights.R index 3b8f1652..fc478fc9 100644 --- a/R/case-weights.R +++ b/R/case-weights.R @@ -9,6 +9,15 @@ #' allowed. #' #' @param x A double vector. +#' @details +#' +#' Importance weights focus on how much each row of the data set should +#' influence model estimation. These can be based on data or arbitrarily set to +#' achieve some goal. +#' +#' In tidymodels, importance weights only affect the model estimation and +#' _supervised_ recipes steps. They are not used with yardstick functions for +#' calculating measures of model performance. #' #' @return A new importance weights vector. #' @@ -41,6 +50,12 @@ importance_weights <- function(x) { #' #' @param x A double vector. #' +#' @details +#' Frequency weights are integers that denote how many times a particular row of +#' data has been observed. They help compress redundant rows into a single entry. +#' +#' In tidymodels, frequency weights are used for all parts of the preprocessing, +#' model fitting, and performance estimation operations. #' @return A new importance weights vector. #' #' @export diff --git a/man/importance_weights.Rd b/man/importance_weights.Rd index 56030b40..1a71613e 100644 --- a/man/importance_weights.Rd +++ b/man/importance_weights.Rd @@ -20,6 +20,15 @@ to apply a context dependent weight to your observations. Importance weights are supplied as a non-negative double vector, where fractional values are allowed. } +\details{ +Importance weights focus on how much each row of the data set should +influence model estimation. These can be based on data or arbitrarily set to +achieve some goal. + +In tidymodels, importance weights only affect the model estimation and +\emph{supervised} recipes steps. They are not used with yardstick functions for +calculating measures of model performance. +} \examples{ importance_weights(c(1.5, 2.3, 10)) } diff --git a/man/new_importance_weights.Rd b/man/new_importance_weights.Rd index a6076149..ddae2c62 100644 --- a/man/new_importance_weights.Rd +++ b/man/new_importance_weights.Rd @@ -23,6 +23,13 @@ A new importance weights vector. a new importance weights vector. Generally, you should use \code{\link[=importance_weights]{importance_weights()}} instead. } +\details{ +Frequency weights are integers that denote how many times a particular row of +data has been observed. They help compress redundant rows into a single entry. + +In tidymodels, frequency weights are used for all parts of the preprocessing, +model fitting, and performance estimation operations. +} \examples{ new_importance_weights() new_importance_weights(c(1.5, 2.3, 10)) From a3ddf5572fca4e724d344d5323e46618e29814e9 Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Fri, 28 Oct 2022 14:15:40 -0400 Subject: [PATCH 2/3] Tweak placement of documentation --- R/case-weights.R | 18 ++++++++++-------- man/frequency_weights.Rd | 8 ++++++++ man/new_importance_weights.Rd | 7 ------- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/R/case-weights.R b/R/case-weights.R index fc478fc9..762c73ac 100644 --- a/R/case-weights.R +++ b/R/case-weights.R @@ -8,9 +8,7 @@ #' are supplied as a non-negative double vector, where fractional values are #' allowed. #' -#' @param x A double vector. #' @details -#' #' Importance weights focus on how much each row of the data set should #' influence model estimation. These can be based on data or arbitrarily set to #' achieve some goal. @@ -19,6 +17,8 @@ #' _supervised_ recipes steps. They are not used with yardstick functions for #' calculating measures of model performance. #' +#' @param x A double vector. +#' #' @return A new importance weights vector. #' #' @seealso @@ -50,12 +50,6 @@ importance_weights <- function(x) { #' #' @param x A double vector. #' -#' @details -#' Frequency weights are integers that denote how many times a particular row of -#' data has been observed. They help compress redundant rows into a single entry. -#' -#' In tidymodels, frequency weights are used for all parts of the preprocessing, -#' model fitting, and performance estimation operations. #' @return A new importance weights vector. #' #' @export @@ -132,6 +126,14 @@ vec_ptype_abbr.hardhat_importance_weights <- function(x, ...) { #' are supplied as a non-negative integer vector, where only whole numbers are #' allowed. #' +#' @details +#' Frequency weights are integers that denote how many times a particular row of +#' the data has been observed. They help compress redundant rows into a single +#' entry. +#' +#' In tidymodels, frequency weights are used for all parts of the preprocessing, +#' model fitting, and performance estimation operations. +#' #' @param x An integer vector. #' #' @return A new frequency weights vector. diff --git a/man/frequency_weights.Rd b/man/frequency_weights.Rd index a830251b..6bbd1f1c 100644 --- a/man/frequency_weights.Rd +++ b/man/frequency_weights.Rd @@ -20,6 +20,14 @@ to compactly repeat an observation a set number of times. Frequency weights are supplied as a non-negative integer vector, where only whole numbers are allowed. } +\details{ +Frequency weights are integers that denote how many times a particular row of +the data has been observed. They help compress redundant rows into a single +entry. + +In tidymodels, frequency weights are used for all parts of the preprocessing, +model fitting, and performance estimation operations. +} \examples{ # Record that the first observation has 10 replicates, the second has 12 # replicates, and so on diff --git a/man/new_importance_weights.Rd b/man/new_importance_weights.Rd index ddae2c62..a6076149 100644 --- a/man/new_importance_weights.Rd +++ b/man/new_importance_weights.Rd @@ -23,13 +23,6 @@ A new importance weights vector. a new importance weights vector. Generally, you should use \code{\link[=importance_weights]{importance_weights()}} instead. } -\details{ -Frequency weights are integers that denote how many times a particular row of -data has been observed. They help compress redundant rows into a single entry. - -In tidymodels, frequency weights are used for all parts of the preprocessing, -model fitting, and performance estimation operations. -} \examples{ new_importance_weights() new_importance_weights(c(1.5, 2.3, 10)) From e1f0440b2492620a9b8700570df1b7cbb056d118 Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Fri, 28 Oct 2022 14:17:08 -0400 Subject: [PATCH 3/3] NEWS bullet --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index 8c4ad86d..ec5b4471 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # hardhat (development version) +* Added more documentation about importance and frequency weights in + `?importance_weights()` and `?frequency_weights()` (#214). + # hardhat 1.2.0 * We have reverted the change made in hardhat 1.0.0 that caused recipe