From 46280d074f8f7202f631b57d8ead7f277d5094e8 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 16 Nov 2021 12:10:03 -0500 Subject: [PATCH 01/65] include md instead of Rmd --- R/C5_rules_C5.0.R | 11 +++++++++ R/aaa_models.R | 2 +- R/bag_mars_earth.R | 10 +++++++++ R/bag_tree_C5.0.R | 10 +++++++++ R/bag_tree_rpart 2.R | 9 ++++++++ R/bag_tree_rpart.R | 10 +++++++++ R/bart_dbarts.R | 2 +- R/boost_tree_C5.0.R | 2 +- R/boost_tree_mboost.R | 11 +++++++++ R/boost_tree_spark.R | 2 +- R/boost_tree_xgboost.R | 2 +- R/contr_one_hot.R | 2 +- R/cubist_and_rulefit.R | 7 ++++++ R/cubist_rules_Cubist.R | 11 +++++++++ R/decision_tree_C5.0.R | 2 +- R/decision_tree_party.R | 10 +++++++++ R/decision_tree_rpart.R | 2 +- R/decision_tree_spark.R | 2 +- R/discrim_flexible_earth.R | 11 +++++++++ R/discrim_linear_MASS.R | 12 ++++++++++ R/discrim_linear_mda.R | 11 +++++++++ R/discrim_linear_sparsediscrim.R | 11 +++++++++ R/discrim_quad_MASS.R | 12 ++++++++++ R/discrim_quad_sparsediscrim.R | 11 +++++++++ R/discrim_regularized_klaR.R | 13 +++++++++++ R/gen_additive_mod_mgcv.R | 2 +- R/glmnet_details.R | 2 +- R/knit_engine_docs.R | 37 +++++++++++++++++++++++++++++++ R/linear_reg_gee.R | 10 +++++++++ R/linear_reg_glmnet.R | 2 +- R/linear_reg_keras.R | 2 +- R/linear_reg_lm.R | 2 +- R/linear_reg_lmer.R | 10 +++++++++ R/linear_reg_spark.R | 2 +- R/linear_reg_stan.R | 2 +- R/linear_reg_stan_glmer.R | 10 +++++++++ R/logistic_reg_LiblineaR.R | 2 +- R/logistic_reg_gee.R | 10 +++++++++ R/logistic_reg_glm.R | 2 +- R/logistic_reg_glmer.R | 10 +++++++++ R/logistic_reg_glmnet.R | 2 +- R/logistic_reg_keras.R | 2 +- R/logistic_reg_spark.R | 2 +- R/logistic_reg_stan.R | 2 +- R/logistic_reg_stan_glmer.R | 10 +++++++++ R/mars_earth.R | 2 +- R/mlp_keras.R | 2 +- R/mlp_nnet.R | 2 +- R/multinom_reg_glmnet.R | 2 +- R/multinom_reg_keras.R | 2 +- R/multinom_reg_nnet.R | 2 +- R/multinom_reg_spark.R | 2 +- R/naive_Bayes_klaR.R | 10 +++++++++ R/naive_Bayes_naivebayes.R | 10 +++++++++ R/nearest_neighbor_kknn.R | 2 +- R/nullmodel.R | 2 +- R/pls_mixOmics.R | 9 ++++++++ R/poisson_reg_gee.R | 10 +++++++++ R/poisson_reg_glm.R | 9 ++++++++ R/poisson_reg_glmer.R | 10 +++++++++ R/poisson_reg_glmnet.R | 10 +++++++++ R/poisson_reg_hurdle.R | 11 +++++++++ R/poisson_reg_stan.R | 10 +++++++++ R/poisson_reg_stan_glmer.R | 10 +++++++++ R/poisson_reg_zeroinfl.R | 11 +++++++++ R/proportional_hazards_glmnet.R | 9 ++++++++ R/proportional_hazards_survival.R | 9 ++++++++ R/rand_forest_party.R | 11 +++++++++ R/rand_forest_randomForest.R | 2 +- R/rand_forest_ranger.R | 2 +- R/rand_forest_spark.R | 2 +- R/rule_fit_xrf.R | 11 +++++++++ R/surv_reg_flexsurv.R | 2 +- R/surv_reg_survival.R | 2 +- R/survival_reg_flexsurv.R | 9 ++++++++ R/survival_reg_survival.R | 9 ++++++++ R/svm_linear_LiblineaR.R | 2 +- R/svm_linear_kernlab.R | 2 +- R/svm_poly_kernlab.R | 2 +- R/svm_rbf_kernlab.R | 2 +- 80 files changed, 475 insertions(+), 40 deletions(-) create mode 100644 R/C5_rules_C5.0.R create mode 100644 R/bag_mars_earth.R create mode 100644 R/bag_tree_C5.0.R create mode 100644 R/bag_tree_rpart 2.R create mode 100644 R/bag_tree_rpart.R create mode 100644 R/boost_tree_mboost.R create mode 100644 R/cubist_and_rulefit.R create mode 100644 R/cubist_rules_Cubist.R create mode 100644 R/decision_tree_party.R create mode 100644 R/discrim_flexible_earth.R create mode 100644 R/discrim_linear_MASS.R create mode 100644 R/discrim_linear_mda.R create mode 100644 R/discrim_linear_sparsediscrim.R create mode 100644 R/discrim_quad_MASS.R create mode 100644 R/discrim_quad_sparsediscrim.R create mode 100644 R/discrim_regularized_klaR.R create mode 100644 R/knit_engine_docs.R create mode 100644 R/linear_reg_gee.R create mode 100644 R/linear_reg_lmer.R create mode 100644 R/linear_reg_stan_glmer.R create mode 100644 R/logistic_reg_gee.R create mode 100644 R/logistic_reg_glmer.R create mode 100644 R/logistic_reg_stan_glmer.R create mode 100644 R/naive_Bayes_klaR.R create mode 100644 R/naive_Bayes_naivebayes.R create mode 100644 R/pls_mixOmics.R create mode 100644 R/poisson_reg_gee.R create mode 100644 R/poisson_reg_glm.R create mode 100644 R/poisson_reg_glmer.R create mode 100644 R/poisson_reg_glmnet.R create mode 100644 R/poisson_reg_hurdle.R create mode 100644 R/poisson_reg_stan.R create mode 100644 R/poisson_reg_stan_glmer.R create mode 100644 R/poisson_reg_zeroinfl.R create mode 100644 R/proportional_hazards_glmnet.R create mode 100644 R/proportional_hazards_survival.R create mode 100644 R/rand_forest_party.R create mode 100644 R/rule_fit_xrf.R create mode 100644 R/survival_reg_flexsurv.R create mode 100644 R/survival_reg_survival.R diff --git a/R/C5_rules_C5.0.R b/R/C5_rules_C5.0.R new file mode 100644 index 000000000..448e0c27a --- /dev/null +++ b/R/C5_rules_C5.0.R @@ -0,0 +1,11 @@ +#' C5.0 rule-based classification models +#' +#' [C50::C5.0()] fits model that derives feature rules from a tree for +#' prediction. A single tree or boosted ensemble can be used. [rules::c5_fit()] +#' is a wrapper around this function. +#' +#' @includeRmd man/rmd/C5_rules_C5.0.md details +#' +#' @name details_C5_rules_C5.0 +#' @keywords internal +NULL diff --git a/R/aaa_models.R b/R/aaa_models.R index a1cacc660..8d28259fe 100644 --- a/R/aaa_models.R +++ b/R/aaa_models.R @@ -1084,7 +1084,7 @@ get_encoding <- function(model) { #' 1. Create an engine-specific R file in the `R` directory with the name #' `{model}_{engine}.R` (e.g. `boost_tree_C5.0.R`). This has a small amount of #' documentation, as well as the directives "`@name details_{model}_{engine}`" -#' and "`@includeRmd man/rmd/{model}_{engine}.Rmd details`". +#' and "`@includeRmd man/rmd/{model}_{engine}.md details`". #' #' 2. Copy the file in \pkg{parsnip} that is in `man/rmd/setup.Rmd` and put #' it in the same place in your package. diff --git a/R/bag_mars_earth.R b/R/bag_mars_earth.R new file mode 100644 index 000000000..cb1787ab5 --- /dev/null +++ b/R/bag_mars_earth.R @@ -0,0 +1,10 @@ +#' Bagged MARS via earth +#' +#' [baguette::bagger()] creates an collection of MARS models forming an +#' ensemble. All models in the ensemble are combined to produce a final prediction. +#' +#' @includeRmd man/rmd/bag_mars_earth.md details +#' +#' @name details_bag_mars_earth +#' @keywords internal +NULL diff --git a/R/bag_tree_C5.0.R b/R/bag_tree_C5.0.R new file mode 100644 index 000000000..c54943b3f --- /dev/null +++ b/R/bag_tree_C5.0.R @@ -0,0 +1,10 @@ +#' Bagged trees via C5.0 +#' +#' [baguette::bagger()] creates an collection of decision trees forming an +#' ensemble. All trees in the ensemble are combined to produce a final prediction. +#' +#' @includeRmd man/rmd/bag_tree_C5.0.md details +#' +#' @name details_bag_tree_C5.0 +#' @keywords internal +NULL diff --git a/R/bag_tree_rpart 2.R b/R/bag_tree_rpart 2.R new file mode 100644 index 000000000..00aaed488 --- /dev/null +++ b/R/bag_tree_rpart 2.R @@ -0,0 +1,9 @@ +#' Ensembles of CART decision trees +#' +#' [ipred::bagging()] fits an ensemble of decision trees, using the `rpart` package. +#' +#' @includeRmd man/rmd/bag_tree_rpart.md details +#' +#' @name details_bag_tree_rpart +#' @keywords internal +NULL diff --git a/R/bag_tree_rpart.R b/R/bag_tree_rpart.R new file mode 100644 index 000000000..935d49d9f --- /dev/null +++ b/R/bag_tree_rpart.R @@ -0,0 +1,10 @@ +#' Bagged trees via rpart +#' +#' [baguette::bagger()] creates an collection of decision trees forming an +#' ensemble. All trees in the ensemble are combined to produce a final prediction. +#' +#' @includeRmd man/rmd/bag_tree_rpart.md details +#' +#' @name details_bag_tree_rpart +#' @keywords internal +NULL diff --git a/R/bart_dbarts.R b/R/bart_dbarts.R index 63e2c9d7d..c81ea6a45 100644 --- a/R/bart_dbarts.R +++ b/R/bart_dbarts.R @@ -3,7 +3,7 @@ #' [dbarts::bart()] creates an ensemble of tree-based model whose training #' and assembly is determined using Bayesian analysis. #' -#' @includeRmd man/rmd/bart_dbarts.Rmd details +#' @includeRmd man/rmd/bart_dbarts.md details #' #' @name details_bart_dbarts #' @keywords internal diff --git a/R/boost_tree_C5.0.R b/R/boost_tree_C5.0.R index 439d643f9..7675acb2b 100644 --- a/R/boost_tree_C5.0.R +++ b/R/boost_tree_C5.0.R @@ -4,7 +4,7 @@ #' ensemble. Each tree depends on the results of previous trees. All trees in #' the ensemble are combined to produce a final prediction. #' -#' @includeRmd man/rmd/boost_tree_C5.0.Rmd details +#' @includeRmd man/rmd/boost_tree_C5.0.md details #' #' @name details_boost_tree_C5.0 #' @keywords internal diff --git a/R/boost_tree_mboost.R b/R/boost_tree_mboost.R new file mode 100644 index 000000000..7bdfd3d30 --- /dev/null +++ b/R/boost_tree_mboost.R @@ -0,0 +1,11 @@ +#' Boosted trees +#' +#' [mboost::blackboost()] fits a series of decision trees forming an ensemble. +#' Each tree depends on the results of previous trees. All trees in the +#' ensemble are combined to produce a final prediction. +#' +#' @includeRmd man/rmd/boost_tree_mboost.md details +#' +#' @name details_boost_tree_mboost +#' @keywords internal +NULL diff --git a/R/boost_tree_spark.R b/R/boost_tree_spark.R index c66b04485..13930de3a 100644 --- a/R/boost_tree_spark.R +++ b/R/boost_tree_spark.R @@ -4,7 +4,7 @@ #' forming an ensemble. Each tree depends on the results of previous trees. #' All trees in the ensemble are combined to produce a final prediction. #' -#' @includeRmd man/rmd/boost_tree_spark.Rmd details +#' @includeRmd man/rmd/boost_tree_spark.md details #' #' @name details_boost_tree_spark #' @keywords internal diff --git a/R/boost_tree_xgboost.R b/R/boost_tree_xgboost.R index 6cb4c88c5..d6cab8a1c 100644 --- a/R/boost_tree_xgboost.R +++ b/R/boost_tree_xgboost.R @@ -4,7 +4,7 @@ #' ensemble. Each tree depends on the results of previous trees. All trees in #' the ensemble are combined to produce a final prediction. #' -#' @includeRmd man/rmd/boost_tree_xgboost.Rmd details +#' @includeRmd man/rmd/boost_tree_xgboost.md details #' #' @name details_boost_tree_xgboost #' @keywords internal diff --git a/R/contr_one_hot.R b/R/contr_one_hot.R index 5876b6d07..f4bed3711 100644 --- a/R/contr_one_hot.R +++ b/R/contr_one_hot.R @@ -9,7 +9,7 @@ #' @param sparse This argument is for backwards compatibility and only the #' default of `FALSE` is supported. #' -#' @includeRmd man/rmd/one-hot.Rmd details +#' @includeRmd man/rmd/one-hot.md details #' #' @return A diagonal matrix that is `n`-by-`n`. #' diff --git a/R/cubist_and_rulefit.R b/R/cubist_and_rulefit.R new file mode 100644 index 000000000..4c0326584 --- /dev/null +++ b/R/cubist_and_rulefit.R @@ -0,0 +1,7 @@ +#' Comparing Cubist and RuleFit +#' +#' @includeRmd man/rmd/cubist-and-rulefit.Rmd +#' +#' @name cubist_and_rulefit +#' @keywords internal +NULL diff --git a/R/cubist_rules_Cubist.R b/R/cubist_rules_Cubist.R new file mode 100644 index 000000000..a24e1cf97 --- /dev/null +++ b/R/cubist_rules_Cubist.R @@ -0,0 +1,11 @@ +#' Cubist rule-based regression models +#' +#' [Cubist::cubist()] fits a model that derives simple feature rules from a tree +#' ensemble and uses creates regression models within each rule. +#' [rules::cubist_fit()] is a wrapper around this function. +#' +#' @includeRmd man/rmd/cubist_rules_Cubist.md details +#' +#' @name details_cubist_rules_Cubist +#' @keywords internal +NULL diff --git a/R/decision_tree_C5.0.R b/R/decision_tree_C5.0.R index b495f4e3a..6c6b35a35 100644 --- a/R/decision_tree_C5.0.R +++ b/R/decision_tree_C5.0.R @@ -3,7 +3,7 @@ #' [C50::C5.0()] fits a model as a set of `if/then` statements that #' creates a tree-based structure. #' -#' @includeRmd man/rmd/decision_tree_C5.0.Rmd details +#' @includeRmd man/rmd/decision_tree_C5.0.md details #' #' @name details_decision_tree_C5.0 #' @keywords internal diff --git a/R/decision_tree_party.R b/R/decision_tree_party.R new file mode 100644 index 000000000..610ec49c3 --- /dev/null +++ b/R/decision_tree_party.R @@ -0,0 +1,10 @@ +#' Decision trees via party +#' +#' [party::ctree()] fits a model as a set of if/then statements that creates a +#' tree-based structure using hypothesis testing methods. +#' +#' @includeRmd man/rmd/decision_tree_party.md details +#' +#' @name details_decision_tree_party +#' @keywords internal +NULL diff --git a/R/decision_tree_rpart.R b/R/decision_tree_rpart.R index 0aec47624..c5addf457 100644 --- a/R/decision_tree_rpart.R +++ b/R/decision_tree_rpart.R @@ -3,7 +3,7 @@ #' [rpart::rpart()] fits a model as a set of `if/then` statements that #' creates a tree-based structure. #' -#' @includeRmd man/rmd/decision_tree_rpart.Rmd details +#' @includeRmd man/rmd/decision_tree_rpart.md details #' #' @name details_decision_tree_rpart #' @keywords internal diff --git a/R/decision_tree_spark.R b/R/decision_tree_spark.R index 55bf23390..d0d99cbfe 100644 --- a/R/decision_tree_spark.R +++ b/R/decision_tree_spark.R @@ -3,7 +3,7 @@ #' [sparklyr::ml_decision_tree()] fits a model as a set of `if/then` #' statements that creates a tree-based structure. #' -#' @includeRmd man/rmd/decision_tree_spark.Rmd details +#' @includeRmd man/rmd/decision_tree_spark.md details #' #' @name details_decision_tree_spark #' @keywords internal diff --git a/R/discrim_flexible_earth.R b/R/discrim_flexible_earth.R new file mode 100644 index 000000000..90b7c2258 --- /dev/null +++ b/R/discrim_flexible_earth.R @@ -0,0 +1,11 @@ +#' Flexible discriminant analysis via earth +#' +#' [mda::fda()] (in conjunction with [earth::earth()] can fit a nonlinear +#' discriminant analysis model that uses nonlinear features created using +#' multivariate adaptive regression splines (MARS). +#' +#' @includeRmd man/rmd/discrim_flexible_earth.md details +#' +#' @name details_discrim_flexible_earth +#' @keywords internal +NULL diff --git a/R/discrim_linear_MASS.R b/R/discrim_linear_MASS.R new file mode 100644 index 000000000..8797ccc38 --- /dev/null +++ b/R/discrim_linear_MASS.R @@ -0,0 +1,12 @@ +#' Linear discriminant analysis via MASS +#' +#' [MASS::lda()] fits a model that estimates a multivariate +#' distribution for the predictors separately for the data in each class +#' (Gaussian with a common covariance matrix). Bayes' theorem is used +#' to compute the probability of each class, given the predictor values. +#' +#' @includeRmd man/rmd/discrim_linear_MASS.md details +#' +#' @name details_discrim_linear_MASS +#' @keywords internal +NULL diff --git a/R/discrim_linear_mda.R b/R/discrim_linear_mda.R new file mode 100644 index 000000000..962c503b4 --- /dev/null +++ b/R/discrim_linear_mda.R @@ -0,0 +1,11 @@ +#' Linear discriminant analysis via flexible discriminant analysis +#' +#' [mda::fda()] (in conjunction with [mda::gen.ridge()] can fit a linear +#' discriminant analysis model that penalizes the predictor coefficients with a +#' quadratic penalty (i.e., a ridge or weight decay approach). +#' +#' @includeRmd man/rmd/discrim_linear_mda.md details +#' +#' @name details_discrim_linear_mda +#' @keywords internal +NULL diff --git a/R/discrim_linear_sparsediscrim.R b/R/discrim_linear_sparsediscrim.R new file mode 100644 index 000000000..6c0fd859f --- /dev/null +++ b/R/discrim_linear_sparsediscrim.R @@ -0,0 +1,11 @@ +#' Linear discriminant analysis via regularization +#' +#' Functions in the `sparsediscrim` package fit different types of linear +#' discriminant analysis model that regularize the estimates (like the mean or +#' covariance). +#' +#' @includeRmd man/rmd/discrim_linear_sparsediscrim.md details +#' +#' @name details_discrim_linear_sparsediscrim +#' @keywords internal +NULL diff --git a/R/discrim_quad_MASS.R b/R/discrim_quad_MASS.R new file mode 100644 index 000000000..420325c83 --- /dev/null +++ b/R/discrim_quad_MASS.R @@ -0,0 +1,12 @@ +#' Quadratic discriminant analysis via MASS +#' +#' [MASS::qda()] fits a model that estimates a multivariate +#' distribution for the predictors separately for the data in each class +#' (Gaussian with separate covariance matrices). Bayes' theorem is used +#' to compute the probability of each class, given the predictor values. +#' +#' @includeRmd man/rmd/discrim_quad_MASS.md details +#' +#' @name details_discrim_quad_MASS +#' @keywords internal +NULL diff --git a/R/discrim_quad_sparsediscrim.R b/R/discrim_quad_sparsediscrim.R new file mode 100644 index 000000000..1e98c7f74 --- /dev/null +++ b/R/discrim_quad_sparsediscrim.R @@ -0,0 +1,11 @@ +#' Quadratic discriminant analysis via regularization +#' +#' Functions in the `sparsediscrim` package fit different types of quadratic +#' discriminant analysis model that regularize the estimates (like the mean or +#' covariance). +#' +#' @includeRmd man/rmd/discrim_quad_sparsediscrim.md details +#' +#' @name details_discrim_quad_sparsediscrim +#' @keywords internal +NULL diff --git a/R/discrim_regularized_klaR.R b/R/discrim_regularized_klaR.R new file mode 100644 index 000000000..2b63fc95e --- /dev/null +++ b/R/discrim_regularized_klaR.R @@ -0,0 +1,13 @@ +#' Regularized discriminant analysis via klaR +#' +#' [klaR::rda()] fits a a model that estimates a multivariate +#' distribution for the predictors separately for the data in each class. The +#' structure of the model can be LDA, QDA, or some amalgam of the two. Bayes' +#' theorem is used to compute the probability of each class, given the +#' predictor values. +#' +#' @includeRmd man/rmd/discrim_regularized_klaR.md details +#' +#' @name details_discrim_regularized_klaR +#' @keywords internal +NULL diff --git a/R/gen_additive_mod_mgcv.R b/R/gen_additive_mod_mgcv.R index a0daf3bc3..7842238ca 100644 --- a/R/gen_additive_mod_mgcv.R +++ b/R/gen_additive_mod_mgcv.R @@ -3,7 +3,7 @@ #' [mgcv::gam()] fits a generalized linear model with additive smoother terms #' for continuous predictors. #' -#' @includeRmd man/rmd/gen_additive_mod_mgcv.Rmd details +#' @includeRmd man/rmd/gen_additive_mod_mgcv.md details #' #' @name details_gen_additive_mod_mgcv #' @keywords internal diff --git a/R/glmnet_details.R b/R/glmnet_details.R index f1436df41..820f165bb 100644 --- a/R/glmnet_details.R +++ b/R/glmnet_details.R @@ -3,7 +3,7 @@ #' glmnet is a popular statistical model for regularized generalized linear #' models. These notes reflect common questions about this particular model. #' -#' @includeRmd man/rmd/glmnet-details.Rmd details +#' @includeRmd man/rmd/glmnet-details.md details #' #' @name glmnet-details #' @keywords internal diff --git a/R/knit_engine_docs.R b/R/knit_engine_docs.R new file mode 100644 index 000000000..29eae97d3 --- /dev/null +++ b/R/knit_engine_docs.R @@ -0,0 +1,37 @@ +#' Knit engine-specific documentation +#' @param pattern A regular expression to specify which files to knit. The +#' default knits all engine documentation files. +#' @param ... Options to pass to [knitr::knit()] such as `quiet = TRUE`. +#' @return A tibble with column `file` for the file name and `result` (a +#' character vector that echos the output file name or, when there is +#' a failure, the error message). +#' @keywords internal +#' @export +knit_engine_docs <- function(pattern = NULL, ...) { + rmd_files <- list.files("man/rmd", pattern = "\\.Rmd", full.names = TRUE) + + if (!is.null(pattern)) { + target_exists <- grepl(pattern, rmd_files) + files <- rmd_files[target_exists] + } else { + files <- rmd_files[!grepl("(template-)|(setup\\.)", rmd_files)] + } + outputs <- gsub("Rmd$", "md", files) + + res <- purrr::map2(files, outputs, ~ try(knitr::knit(.x, .y), silent = TRUE)) + res <- purrr::map_chr(res, as.character) + tibble::tibble(file = basename(files), result = res) +} + +# TODO +# - add a function to list extensions included here +# - move all details files to parsnip +# - simplify code to find model files +# - add is_installed() to set code with all extra dependencies +# - list models by mode + + +extensions <- function(x) { + c("baguette", "censored", "discrim", "multilevelmod", "plsmod", + "poissonreg", "rules") +} diff --git a/R/linear_reg_gee.R b/R/linear_reg_gee.R new file mode 100644 index 000000000..608aa5fd6 --- /dev/null +++ b/R/linear_reg_gee.R @@ -0,0 +1,10 @@ +#' Linear regression via generalized estimating equations (GEE) +#' +#' `gee::gee()` uses generalized least squares to fit different types of models +#' that have errors that are not independent. +#' +#' @includeRmd man/rmd/linear_reg_gee.md details +#' +#' @name details_linear_reg_gee +#' @keywords internal +NULL diff --git a/R/linear_reg_glmnet.R b/R/linear_reg_glmnet.R index 1b22b70bc..30c30f731 100644 --- a/R/linear_reg_glmnet.R +++ b/R/linear_reg_glmnet.R @@ -2,7 +2,7 @@ #' #' [glmnet::glmnet()] uses regularized least squares to fit models with numeric outcomes. #' -#' @includeRmd man/rmd/linear_reg_glmnet.Rmd details +#' @includeRmd man/rmd/linear_reg_glmnet.md details #' #' @name details_linear_reg_glmnet #' @keywords internal diff --git a/R/linear_reg_keras.R b/R/linear_reg_keras.R index 4f05b859c..9d86e47d2 100644 --- a/R/linear_reg_keras.R +++ b/R/linear_reg_keras.R @@ -2,7 +2,7 @@ #' #' This model uses regularized least squares to fit models with numeric outcomes. #' -#' @includeRmd man/rmd/linear_reg_keras.Rmd details +#' @includeRmd man/rmd/linear_reg_keras.md details #' #' @name details_linear_reg_keras #' @keywords internal diff --git a/R/linear_reg_lm.R b/R/linear_reg_lm.R index bd5054e28..8f087743c 100644 --- a/R/linear_reg_lm.R +++ b/R/linear_reg_lm.R @@ -2,7 +2,7 @@ #' #' [stats::lm()] uses ordinary least squares to fit models with numeric outcomes. #' -#' @includeRmd man/rmd/linear_reg_lm.Rmd details +#' @includeRmd man/rmd/linear_reg_lm.md details #' #' @name details_linear_reg_lm #' @keywords internal diff --git a/R/linear_reg_lmer.R b/R/linear_reg_lmer.R new file mode 100644 index 000000000..e086af111 --- /dev/null +++ b/R/linear_reg_lmer.R @@ -0,0 +1,10 @@ +#' Linear regression via mixed models +#' +#' The `lmer` engine estimates fixed and random effect regression parameters +#' using maximum likelihood (or restricted maximum likelihood) estimation. +#' +#' @includeRmd man/rmd/linear_reg_lmer.md details +#' +#' @name details_linear_reg_lmer +#' @keywords internal +NULL diff --git a/R/linear_reg_spark.R b/R/linear_reg_spark.R index ab7dd2706..5d21918b9 100644 --- a/R/linear_reg_spark.R +++ b/R/linear_reg_spark.R @@ -3,7 +3,7 @@ #' [sparklyr::ml_linear_regression()] uses regularized least squares to fit #' models with numeric outcomes. #' -#' @includeRmd man/rmd/linear_reg_spark.Rmd details +#' @includeRmd man/rmd/linear_reg_spark.md details #' #' @name details_linear_reg_spark #' @keywords internal diff --git a/R/linear_reg_stan.R b/R/linear_reg_stan.R index f6ff4251d..1c77437d4 100644 --- a/R/linear_reg_stan.R +++ b/R/linear_reg_stan.R @@ -2,7 +2,7 @@ #' #' The `"stan"` engine estimates regression parameters using Bayesian estimation. #' -#' @includeRmd man/rmd/linear_reg_stan.Rmd details +#' @includeRmd man/rmd/linear_reg_stan.md details #' #' @name details_linear_reg_stan #' @keywords internal diff --git a/R/linear_reg_stan_glmer.R b/R/linear_reg_stan_glmer.R new file mode 100644 index 000000000..23a76eb78 --- /dev/null +++ b/R/linear_reg_stan_glmer.R @@ -0,0 +1,10 @@ +#' Linear regression via hierarchical Bayesian methods +#' +#' The `stan_glmer` engine estimates hierarchical regression parameters using +#' Bayesian estimation. +#' +#' @includeRmd man/rmd/linear_reg_stan_glmer.md details +#' +#' @name details_linear_reg_stan_glmer +#' @keywords internal +NULL diff --git a/R/logistic_reg_LiblineaR.R b/R/logistic_reg_LiblineaR.R index b414e41f2..2bdc983f4 100644 --- a/R/logistic_reg_LiblineaR.R +++ b/R/logistic_reg_LiblineaR.R @@ -4,7 +4,7 @@ #' linear combination of the predictors is used to model the log odds of an #' event. #' -#' @includeRmd man/rmd/logistic_reg_LiblineaR.Rmd details +#' @includeRmd man/rmd/logistic_reg_LiblineaR.md details #' #' @name details_logistic_reg_LiblineaR #' @keywords internal diff --git a/R/logistic_reg_gee.R b/R/logistic_reg_gee.R new file mode 100644 index 000000000..3050ab2c4 --- /dev/null +++ b/R/logistic_reg_gee.R @@ -0,0 +1,10 @@ +#' Logistic regression via generalized estimating equations (GEE) +#' +#' `gee::gee()` uses generalized least squares to fit different types of models +#' that have errors that are not independent. +#' +#' @includeRmd man/rmd/logistic_reg_gee.md details +#' +#' @name details_logistic_reg_gee +#' @keywords internal +NULL diff --git a/R/logistic_reg_glm.R b/R/logistic_reg_glm.R index b44a2fe00..bc173a7ae 100644 --- a/R/logistic_reg_glm.R +++ b/R/logistic_reg_glm.R @@ -4,7 +4,7 @@ #' linear combination of the predictors is used to model the log odds of an #' event. #' -#' @includeRmd man/rmd/logistic_reg_glm.Rmd details +#' @includeRmd man/rmd/logistic_reg_glm.md details #' #' @name details_logistic_reg_glm #' @keywords internal diff --git a/R/logistic_reg_glmer.R b/R/logistic_reg_glmer.R new file mode 100644 index 000000000..af14a12fe --- /dev/null +++ b/R/logistic_reg_glmer.R @@ -0,0 +1,10 @@ +#' Logistic regression via mixed models +#' +#' The `glmer` engine estimates fixed and random effect regression parameters +#' using maximum likelihood (or restricted maximum likelihood) estimation. +#' +#' @includeRmd man/rmd/logistic_reg_glmer.md details +#' +#' @name details_logistic_reg_glmer +#' @keywords internal +NULL diff --git a/R/logistic_reg_glmnet.R b/R/logistic_reg_glmnet.R index af927b7a6..70143b693 100644 --- a/R/logistic_reg_glmnet.R +++ b/R/logistic_reg_glmnet.R @@ -4,7 +4,7 @@ #' linear combination of the predictors is used to model the log odds of an #' event. #' -#' @includeRmd man/rmd/logistic_reg_glmnet.Rmd details +#' @includeRmd man/rmd/logistic_reg_glmnet.md details #' #' @name details_logistic_reg_glmnet #' @keywords internal diff --git a/R/logistic_reg_keras.R b/R/logistic_reg_keras.R index 54ca11a52..13f145878 100644 --- a/R/logistic_reg_keras.R +++ b/R/logistic_reg_keras.R @@ -4,7 +4,7 @@ #' linear combination of the predictors is used to model the log odds of an #' event. #' -#' @includeRmd man/rmd/logistic_reg_keras.Rmd details +#' @includeRmd man/rmd/logistic_reg_keras.md details #' #' @name details_logistic_reg_keras #' @keywords internal diff --git a/R/logistic_reg_spark.R b/R/logistic_reg_spark.R index 316db1a65..d73c38c8b 100644 --- a/R/logistic_reg_spark.R +++ b/R/logistic_reg_spark.R @@ -4,7 +4,7 @@ #' binary outcomes. A linear combination of the predictors is used to model the #' log odds of an event. #' -#' @includeRmd man/rmd/logistic_reg_spark.Rmd details +#' @includeRmd man/rmd/logistic_reg_spark.md details #' #' @name details_logistic_reg_spark #' @keywords internal diff --git a/R/logistic_reg_stan.R b/R/logistic_reg_stan.R index 08b07f6ef..f497c7d5c 100644 --- a/R/logistic_reg_stan.R +++ b/R/logistic_reg_stan.R @@ -4,7 +4,7 @@ #' A linear combination of the predictors is used to model the log odds of an #' event. #' -#' @includeRmd man/rmd/logistic_reg_stan.Rmd details +#' @includeRmd man/rmd/logistic_reg_stan.md details #' #' @name details_logistic_reg_stan #' @keywords internal diff --git a/R/logistic_reg_stan_glmer.R b/R/logistic_reg_stan_glmer.R new file mode 100644 index 000000000..9e227a96d --- /dev/null +++ b/R/logistic_reg_stan_glmer.R @@ -0,0 +1,10 @@ +#' Logistic regression via hierarchical Bayesian methods +#' +#' The `stan_glmer` engine estimates hierarchical regression parameters using +#' Bayesian estimation. +#' +#' @includeRmd man/rmd/logistic_reg_stan_glmer.md details +#' +#' @name details_logistic_reg_stan_glmer +#' @keywords internal +NULL diff --git a/R/mars_earth.R b/R/mars_earth.R index 18ad8cd1c..ee6dc56af 100644 --- a/R/mars_earth.R +++ b/R/mars_earth.R @@ -4,7 +4,7 @@ #' some predictors. These features resemble hinge functions and the result is #' a model that is a segmented regression in small dimensions. #' -#' @includeRmd man/rmd/mars_earth.Rmd details +#' @includeRmd man/rmd/mars_earth.md details #' #' @name details_mars_earth #' @keywords internal diff --git a/R/mlp_keras.R b/R/mlp_keras.R index 0c3917f1b..9af724445 100644 --- a/R/mlp_keras.R +++ b/R/mlp_keras.R @@ -2,7 +2,7 @@ #' #' [keras_mlp()] fits a single layer, feed-forward neural network. #' -#' @includeRmd man/rmd/mlp_keras.Rmd details +#' @includeRmd man/rmd/mlp_keras.md details #' #' @name details_mlp_keras #' @keywords internal diff --git a/R/mlp_nnet.R b/R/mlp_nnet.R index 774ae7f57..4ee34afd7 100644 --- a/R/mlp_nnet.R +++ b/R/mlp_nnet.R @@ -2,7 +2,7 @@ #' #' [nnet::nnet()] fits a single layer, feed-forward neural network. #' -#' @includeRmd man/rmd/mlp_nnet.Rmd details +#' @includeRmd man/rmd/mlp_nnet.md details #' #' @name details_mlp_nnet #' @keywords internal diff --git a/R/multinom_reg_glmnet.R b/R/multinom_reg_glmnet.R index 3ddb98120..f40be0f38 100644 --- a/R/multinom_reg_glmnet.R +++ b/R/multinom_reg_glmnet.R @@ -3,7 +3,7 @@ #' [glmnet::glmnet()] fits a model that uses linear predictors to predict #' multiclass data using the multinomial distribution. #' -#' @includeRmd man/rmd/multinom_reg_glmnet.Rmd details +#' @includeRmd man/rmd/multinom_reg_glmnet.md details #' #' @name details_multinom_reg_glmnet #' @keywords internal diff --git a/R/multinom_reg_keras.R b/R/multinom_reg_keras.R index dfe0a396e..7003a38ef 100644 --- a/R/multinom_reg_keras.R +++ b/R/multinom_reg_keras.R @@ -3,7 +3,7 @@ #' [keras_mlp()] fits a model that uses linear predictors to predict #' multiclass data using the multinomial distribution. #' -#' @includeRmd man/rmd/multinom_reg_keras.Rmd details +#' @includeRmd man/rmd/multinom_reg_keras.md details #' #' @name details_multinom_reg_keras #' @keywords internal diff --git a/R/multinom_reg_nnet.R b/R/multinom_reg_nnet.R index 049f3d355..fd9932654 100644 --- a/R/multinom_reg_nnet.R +++ b/R/multinom_reg_nnet.R @@ -3,7 +3,7 @@ #' [nnet::multinom()] fits a model that uses linear predictors to predict #' multiclass data using the multinomial distribution. #' -#' @includeRmd man/rmd/multinom_reg_nnet.Rmd details +#' @includeRmd man/rmd/multinom_reg_nnet.md details #' #' @name details_multinom_reg_nnet #' @keywords internal diff --git a/R/multinom_reg_spark.R b/R/multinom_reg_spark.R index 2cb5adc4e..c6a8f6663 100644 --- a/R/multinom_reg_spark.R +++ b/R/multinom_reg_spark.R @@ -3,7 +3,7 @@ #' [sparklyr::ml_logistic_regression()] fits a model that uses linear #' predictors to predict multiclass data using the multinomial distribution. #' -#' @includeRmd man/rmd/multinom_reg_spark.Rmd details +#' @includeRmd man/rmd/multinom_reg_spark.md details #' #' @name details_multinom_reg_spark #' @keywords internal diff --git a/R/naive_Bayes_klaR.R b/R/naive_Bayes_klaR.R new file mode 100644 index 000000000..b5e254bf6 --- /dev/null +++ b/R/naive_Bayes_klaR.R @@ -0,0 +1,10 @@ +#' Naive Bayes models via klaR +#' +#' [klaR::NaiveBayes()] fits a model that uses Bayes' theorem to compute the +#' probability of each class, given the predictor values. +#' +#' @includeRmd man/rmd/naive_Bayes_klaR.md details +#' +#' @name details_naive_Bayes_klaR +#' @keywords internal +NULL diff --git a/R/naive_Bayes_naivebayes.R b/R/naive_Bayes_naivebayes.R new file mode 100644 index 000000000..67cfe8274 --- /dev/null +++ b/R/naive_Bayes_naivebayes.R @@ -0,0 +1,10 @@ +#' Naive Bayes models via naivebayes +#' +#' [naivebayes::naive_bayes()] fits a model that uses Bayes' theorem to compute +#' the probability of each class, given the predictor values. +#' +#' @includeRmd man/rmd/naive_Bayes_naivebayes.md details +#' +#' @name details_naive_Bayes_naivebayes +#' @keywords internal +NULL diff --git a/R/nearest_neighbor_kknn.R b/R/nearest_neighbor_kknn.R index bd00e391c..ed04e2373 100644 --- a/R/nearest_neighbor_kknn.R +++ b/R/nearest_neighbor_kknn.R @@ -3,7 +3,7 @@ #' [kknn::train.kknn()] fits a model that uses the `K` most similar data points #' from the training set to predict new samples. #' -#' @includeRmd man/rmd/nearest_neighbor_kknn.Rmd details +#' @includeRmd man/rmd/nearest_neighbor_kknn.md details #' #' @name details_nearest_neighbor_kknn #' @keywords internal diff --git a/R/nullmodel.R b/R/nullmodel.R index ec804c2ce..6ef7c6228 100644 --- a/R/nullmodel.R +++ b/R/nullmodel.R @@ -137,7 +137,7 @@ predict.nullmodel <- function (object, new_data = NULL, type = NULL, ...) { #' \item \pkg{R}: `"parsnip"` #' } #' -#' @includeRmd man/rmd/null-model.Rmd details +#' @includeRmd man/rmd/null-model.md details #' #' @seealso [fit.model_spec()] #' @examples diff --git a/R/pls_mixOmics.R b/R/pls_mixOmics.R new file mode 100644 index 000000000..76e0624c8 --- /dev/null +++ b/R/pls_mixOmics.R @@ -0,0 +1,9 @@ +#' Partial least squares via mixOmics +#' +#' The mixOmics package can fit several different types of PLS models. +#' +#' @includeRmd man/rmd/pls_mixOmics.md details +#' +#' @name details_pls_mixOmics +#' @keywords internal +NULL diff --git a/R/poisson_reg_gee.R b/R/poisson_reg_gee.R new file mode 100644 index 000000000..c8f407ba5 --- /dev/null +++ b/R/poisson_reg_gee.R @@ -0,0 +1,10 @@ +#' Poisson regression via generalized estimating equations (GEE) +#' +#' `gee::gee()` uses generalized least squares to fit different types of models +#' that have errors that are not independent. +#' +#' @includeRmd man/rmd/poisson_reg_gee.md details +#' +#' @name details_poisson_reg_gee +#' @keywords internal +NULL diff --git a/R/poisson_reg_glm.R b/R/poisson_reg_glm.R new file mode 100644 index 000000000..772c3f14b --- /dev/null +++ b/R/poisson_reg_glm.R @@ -0,0 +1,9 @@ +#' Poisson regression via glm +#' +#' [stats::glm()] uses maximum likelihood to fit a model for count data. +#' +#' @includeRmd man/rmd/poisson_reg_glm.md details +#' +#' @name details_poisson_reg_glm +#' @keywords internal +NULL diff --git a/R/poisson_reg_glmer.R b/R/poisson_reg_glmer.R new file mode 100644 index 000000000..26b0bddd7 --- /dev/null +++ b/R/poisson_reg_glmer.R @@ -0,0 +1,10 @@ +#' Poisson regression via mixed models +#' +#' The `glmer` engine estimates fixed and random effect regression parameters +#' using maximum likelihood (or restricted maximum likelihood) estimation. +#' +#' @includeRmd man/rmd/poisson_reg_glmer.md details +#' +#' @name details_poisson_reg_glmer +#' @keywords internal +NULL diff --git a/R/poisson_reg_glmnet.R b/R/poisson_reg_glmnet.R new file mode 100644 index 000000000..5358b0fc3 --- /dev/null +++ b/R/poisson_reg_glmnet.R @@ -0,0 +1,10 @@ +#' Poisson regression via glmnet +#' +#' `glmnet::glmnet()` uses penalized maximum likelihood to fit a model for +#' count data. +#' +#' @includeRmd man/rmd/poisson_reg_glmnet.md details +#' +#' @name details_poisson_reg_glmnet +#' @keywords internal +NULL diff --git a/R/poisson_reg_hurdle.R b/R/poisson_reg_hurdle.R new file mode 100644 index 000000000..8ad3a2c36 --- /dev/null +++ b/R/poisson_reg_hurdle.R @@ -0,0 +1,11 @@ +#' Poisson regression via pscl +#' +#' [pscl::hurdle()] uses maximum likelihood estimation to fit a model for +#' count data that has separate model terms for predicting the counts and for +#' predicting the probability of a zero count. +#' +#' @includeRmd man/rmd/poisson_reg_hurdle.md details +#' +#' @name details_poisson_reg_hurdle +#' @keywords internal +NULL diff --git a/R/poisson_reg_stan.R b/R/poisson_reg_stan.R new file mode 100644 index 000000000..bff455f37 --- /dev/null +++ b/R/poisson_reg_stan.R @@ -0,0 +1,10 @@ +#' Poisson regression via stan +#' +#' [rstanarm::stan_glm()] uses Bayesian estimation to fit a model for +#' count data. +#' +#' @includeRmd man/rmd/poisson_reg_stan.md details +#' +#' @name details_poisson_reg_stan +#' @keywords internal +NULL diff --git a/R/poisson_reg_stan_glmer.R b/R/poisson_reg_stan_glmer.R new file mode 100644 index 000000000..166014335 --- /dev/null +++ b/R/poisson_reg_stan_glmer.R @@ -0,0 +1,10 @@ +#' Poisson regression via hierarchical Bayesian methods +#' +#' The `stan_glmer` engine estimates hierarchical regression parameters using +#' Bayesian estimation. +#' +#' @includeRmd man/rmd/poisson_reg_stan_glmer.md details +#' +#' @name details_poisson_reg_stan_glmer +#' @keywords internal +NULL diff --git a/R/poisson_reg_zeroinfl.R b/R/poisson_reg_zeroinfl.R new file mode 100644 index 000000000..f2ab1d392 --- /dev/null +++ b/R/poisson_reg_zeroinfl.R @@ -0,0 +1,11 @@ +#' Poisson regression via pscl +#' +#' [pscl::zeroinfl()] uses maximum likelihood estimation to fit a model for +#' count data that has separate model terms for predicting the counts and for +#' predicting the probability of a zero count. +#' +#' @includeRmd man/rmd/poisson_reg_zeroinfl.md details +#' +#' @name details_poisson_reg_zeroinfl +#' @keywords internal +NULL diff --git a/R/proportional_hazards_glmnet.R b/R/proportional_hazards_glmnet.R new file mode 100644 index 000000000..00b510ef7 --- /dev/null +++ b/R/proportional_hazards_glmnet.R @@ -0,0 +1,9 @@ +#' Proportional hazards regression +#' +#' [glmnet::glmnet()] fits a regularized Cox proportional hazards model. +#' +#' @includeRmd man/rmd/proportional_hazards_glmnet.md details +#' +#' @name details_proportional_hazards_glmnet +#' @keywords internal +NULL diff --git a/R/proportional_hazards_survival.R b/R/proportional_hazards_survival.R new file mode 100644 index 000000000..39acf58e9 --- /dev/null +++ b/R/proportional_hazards_survival.R @@ -0,0 +1,9 @@ +#' Proportional hazards regression +#' +#' [survival::coxph()] fits a Cox proportional hazards model. +#' +#' @includeRmd man/rmd/proportional_hazards_survival.md details +#' +#' @name details_proportional_hazards_survival +#' @keywords internal +NULL diff --git a/R/rand_forest_party.R b/R/rand_forest_party.R new file mode 100644 index 000000000..ca9a8eb06 --- /dev/null +++ b/R/rand_forest_party.R @@ -0,0 +1,11 @@ +#' Random forests via party +#' +#' [party::cforest()] fits a model that creates a large number of decision +#' trees, each independent of the others. The final prediction uses all +#' predictions from the individual trees and combines them. +#' +#' @includeRmd man/rmd/rand_forest_party.md details +#' +#' @name details_rand_forest_party +#' @keywords internal +NULL diff --git a/R/rand_forest_randomForest.R b/R/rand_forest_randomForest.R index df3322c5e..9d044be8e 100644 --- a/R/rand_forest_randomForest.R +++ b/R/rand_forest_randomForest.R @@ -4,7 +4,7 @@ #' decision trees, each independent of the others. The final prediction uses all #' predictions from the individual trees and combines them. #' -#' @includeRmd man/rmd/rand_forest_randomForest.Rmd details +#' @includeRmd man/rmd/rand_forest_randomForest.md details #' #' @name details_rand_forest_randomForest #' @keywords internal diff --git a/R/rand_forest_ranger.R b/R/rand_forest_ranger.R index 44c8ca72a..7eb4942a4 100644 --- a/R/rand_forest_ranger.R +++ b/R/rand_forest_ranger.R @@ -4,7 +4,7 @@ #' trees, each independent of the others. The final prediction uses all #' predictions from the individual trees and combines them. #' -#' @includeRmd man/rmd/rand_forest_ranger.Rmd details +#' @includeRmd man/rmd/rand_forest_ranger.md details #' #' @name details_rand_forest_ranger #' @keywords internal diff --git a/R/rand_forest_spark.R b/R/rand_forest_spark.R index 0cb69438d..ec0dacfab 100644 --- a/R/rand_forest_spark.R +++ b/R/rand_forest_spark.R @@ -4,7 +4,7 @@ #' decision trees, each independent of the others. The final prediction uses all #' predictions from the individual trees and combines them. #' -#' @includeRmd man/rmd/rand_forest_spark.Rmd details +#' @includeRmd man/rmd/rand_forest_spark.md details #' #' @name details_rand_forest_spark #' @keywords internal diff --git a/R/rule_fit_xrf.R b/R/rule_fit_xrf.R new file mode 100644 index 000000000..46df5ebc7 --- /dev/null +++ b/R/rule_fit_xrf.R @@ -0,0 +1,11 @@ +#' RuleFit models via xrf +#' +#' [xrf::xrf()] fits a model that derives simple feature rules from a tree +#' ensemble and uses them as features to a regularized model. [rules::xrf_fit()] +#' is a wrapper around this function. +#' +#' @includeRmd man/rmd/rule_fit_xrf.md details +#' +#' @name details_rule_fit_xrf +#' @keywords internal +NULL diff --git a/R/surv_reg_flexsurv.R b/R/surv_reg_flexsurv.R index af5b37e3b..39404798f 100644 --- a/R/surv_reg_flexsurv.R +++ b/R/surv_reg_flexsurv.R @@ -2,7 +2,7 @@ #' #' [flexsurv::flexsurvreg()] fits a parametric survival model. #' -#' @includeRmd man/rmd/surv_reg_flexsurv.Rmd details +#' @includeRmd man/rmd/surv_reg_flexsurv.md details #' #' @name details_surv_reg_flexsurv #' @keywords internal diff --git a/R/surv_reg_survival.R b/R/surv_reg_survival.R index 6d1879347..e4b24a35d 100644 --- a/R/surv_reg_survival.R +++ b/R/surv_reg_survival.R @@ -2,7 +2,7 @@ #' #' [survival::survreg()] fits a parametric survival model. #' -#' @includeRmd man/rmd/surv_reg_survival.Rmd details +#' @includeRmd man/rmd/surv_reg_survival.md details #' #' @name details_surv_reg_survival #' @keywords internal diff --git a/R/survival_reg_flexsurv.R b/R/survival_reg_flexsurv.R new file mode 100644 index 000000000..98df28eb7 --- /dev/null +++ b/R/survival_reg_flexsurv.R @@ -0,0 +1,9 @@ +#' Parametric survival regression +#' +#' [flexsurv::flexsurvreg()] fits a parametric survival model. +#' +#' @includeRmd man/rmd/survival_reg_flexsurv.md details +#' +#' @name details_survival_reg_flexsurv +#' @keywords internal +NULL diff --git a/R/survival_reg_survival.R b/R/survival_reg_survival.R new file mode 100644 index 000000000..2df45c4e5 --- /dev/null +++ b/R/survival_reg_survival.R @@ -0,0 +1,9 @@ +#' Parametric survival regression +#' +#' [survival::survreg()] fits a parametric survival model. +#' +#' @includeRmd man/rmd/survival_reg_survival.md details +#' +#' @name details_survival_reg_survival +#' @keywords internal +NULL diff --git a/R/svm_linear_LiblineaR.R b/R/svm_linear_LiblineaR.R index 8a65e8403..8e210f911 100644 --- a/R/svm_linear_LiblineaR.R +++ b/R/svm_linear_LiblineaR.R @@ -5,7 +5,7 @@ #' For regression, the model optimizes a robust loss function that is only #' affected by very large model residuals. #' -#' @includeRmd man/rmd/svm_linear_LiblineaR.Rmd details +#' @includeRmd man/rmd/svm_linear_LiblineaR.md details #' #' @name details_svm_linear_LiblineaR #' @keywords internal diff --git a/R/svm_linear_kernlab.R b/R/svm_linear_kernlab.R index dd781d447..a380e3130 100644 --- a/R/svm_linear_kernlab.R +++ b/R/svm_linear_kernlab.R @@ -5,7 +5,7 @@ #' For regression, the model optimizes a robust loss function that is only #' affected by very large model residuals. #' -#' @includeRmd man/rmd/svm_linear_kernlab.Rmd details +#' @includeRmd man/rmd/svm_linear_kernlab.md details #' #' @name details_svm_linear_kernlab #' @keywords internal diff --git a/R/svm_poly_kernlab.R b/R/svm_poly_kernlab.R index b9c694232..0ac76609d 100644 --- a/R/svm_poly_kernlab.R +++ b/R/svm_poly_kernlab.R @@ -5,7 +5,7 @@ #' For regression, the model optimizes a robust loss function that is only #' affected by very large model residuals. #' -#' @includeRmd man/rmd/svm_poly_kernlab.Rmd details +#' @includeRmd man/rmd/svm_poly_kernlab.md details #' #' @name details_svm_poly_kernlab #' @keywords internal diff --git a/R/svm_rbf_kernlab.R b/R/svm_rbf_kernlab.R index 92ec43554..9b989bd36 100644 --- a/R/svm_rbf_kernlab.R +++ b/R/svm_rbf_kernlab.R @@ -5,7 +5,7 @@ #' For regression, the model optimizes a robust loss function that is only #' affected by very large model residuals. #' -#' @includeRmd man/rmd/svm_rbf_kernlab.Rmd details +#' @includeRmd man/rmd/svm_rbf_kernlab.md details #' #' @name details_svm_rbf_kernlab #' @keywords internal From 2435b84e3b36c0d1e9591be173884f53a3a3aa37 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 16 Nov 2021 12:10:26 -0500 Subject: [PATCH 02/65] don't commit engine md files --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 9d682298c..f45c8601f 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ derby.log logs/* revdep/* docs* +man/rmd/*.md From dc1921a48fd1e478a63bec81bf42aca47521a8ca Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 16 Nov 2021 12:27:48 -0500 Subject: [PATCH 03/65] remove extra files --- R/cubist_and_rulefit.R | 7 - R/knit_engine_docs.R | 5 +- man/details_C5_rules_C5.0.Rd | 68 ++++++ man/details_bag_mars_earth.Rd | 89 ++++++++ man/details_bag_tree_C5.0.Rd | 55 +++++ man/details_bag_tree_rpart.Rd | 209 +++++++++++++++++++ man/details_boost_tree_mboost.Rd | 64 ++++++ man/details_cubist_rules_Cubist.Rd | 67 ++++++ man/details_decision_tree_party.Rd | 71 +++++++ man/details_decision_tree_rpart.Rd | 3 +- man/details_discrim_flexible_earth.Rd | 64 ++++++ man/details_discrim_linear_MASS.Rd | 51 +++++ man/details_discrim_linear_mda.Rd | 58 +++++ man/details_discrim_linear_sparsediscrim.Rd | 82 ++++++++ man/details_discrim_quad_MASS.Rd | 52 +++++ man/details_discrim_quad_sparsediscrim.Rd | 78 +++++++ man/details_discrim_regularized_klaR.Rd | 74 +++++++ man/details_gen_additive_mod_mgcv.Rd | 2 +- man/details_linear_reg_gee.Rd | 97 +++++++++ man/details_linear_reg_lmer.Rd | 116 ++++++++++ man/details_linear_reg_stan_glmer.Rd | 133 ++++++++++++ man/details_logistic_reg_gee.Rd | 97 +++++++++ man/details_logistic_reg_glmer.Rd | 115 ++++++++++ man/details_logistic_reg_stan_glmer.Rd | 132 ++++++++++++ man/details_naive_Bayes_klaR.Rd | 59 ++++++ man/details_naive_Bayes_naivebayes.Rd | 60 ++++++ man/details_pls_mixOmics.Rd | 98 +++++++++ man/details_poisson_reg_gee.Rd | 96 +++++++++ man/details_poisson_reg_glm.Rd | 38 ++++ man/details_poisson_reg_glmer.Rd | 114 ++++++++++ man/details_poisson_reg_glmnet.Rd | 62 ++++++ man/details_poisson_reg_hurdle.Rd | 108 ++++++++++ man/details_poisson_reg_stan.Rd | 87 ++++++++ man/details_poisson_reg_stan_glmer.Rd | 131 ++++++++++++ man/details_poisson_reg_zeroinfl.Rd | 109 ++++++++++ man/details_proportional_hazards_glmnet.Rd | 135 ++++++++++++ man/details_proportional_hazards_survival.Rd | 111 ++++++++++ man/details_rand_forest_party.Rd | 65 ++++++ man/details_rule_fit_xrf.Rd | 137 ++++++++++++ man/details_survival_reg_flexsurv.Rd | 54 +++++ man/details_survival_reg_survival.Rd | 81 +++++++ man/doc-tools.Rd | 2 +- man/knit_engine_docs.Rd | 21 ++ 43 files changed, 3343 insertions(+), 14 deletions(-) delete mode 100644 R/cubist_and_rulefit.R create mode 100644 man/details_C5_rules_C5.0.Rd create mode 100644 man/details_bag_mars_earth.Rd create mode 100644 man/details_bag_tree_C5.0.Rd create mode 100644 man/details_bag_tree_rpart.Rd create mode 100644 man/details_boost_tree_mboost.Rd create mode 100644 man/details_cubist_rules_Cubist.Rd create mode 100644 man/details_decision_tree_party.Rd create mode 100644 man/details_discrim_flexible_earth.Rd create mode 100644 man/details_discrim_linear_MASS.Rd create mode 100644 man/details_discrim_linear_mda.Rd create mode 100644 man/details_discrim_linear_sparsediscrim.Rd create mode 100644 man/details_discrim_quad_MASS.Rd create mode 100644 man/details_discrim_quad_sparsediscrim.Rd create mode 100644 man/details_discrim_regularized_klaR.Rd create mode 100644 man/details_linear_reg_gee.Rd create mode 100644 man/details_linear_reg_lmer.Rd create mode 100644 man/details_linear_reg_stan_glmer.Rd create mode 100644 man/details_logistic_reg_gee.Rd create mode 100644 man/details_logistic_reg_glmer.Rd create mode 100644 man/details_logistic_reg_stan_glmer.Rd create mode 100644 man/details_naive_Bayes_klaR.Rd create mode 100644 man/details_naive_Bayes_naivebayes.Rd create mode 100644 man/details_pls_mixOmics.Rd create mode 100644 man/details_poisson_reg_gee.Rd create mode 100644 man/details_poisson_reg_glm.Rd create mode 100644 man/details_poisson_reg_glmer.Rd create mode 100644 man/details_poisson_reg_glmnet.Rd create mode 100644 man/details_poisson_reg_hurdle.Rd create mode 100644 man/details_poisson_reg_stan.Rd create mode 100644 man/details_poisson_reg_stan_glmer.Rd create mode 100644 man/details_poisson_reg_zeroinfl.Rd create mode 100644 man/details_proportional_hazards_glmnet.Rd create mode 100644 man/details_proportional_hazards_survival.Rd create mode 100644 man/details_rand_forest_party.Rd create mode 100644 man/details_rule_fit_xrf.Rd create mode 100644 man/details_survival_reg_flexsurv.Rd create mode 100644 man/details_survival_reg_survival.Rd create mode 100644 man/knit_engine_docs.Rd diff --git a/R/cubist_and_rulefit.R b/R/cubist_and_rulefit.R deleted file mode 100644 index 4c0326584..000000000 --- a/R/cubist_and_rulefit.R +++ /dev/null @@ -1,7 +0,0 @@ -#' Comparing Cubist and RuleFit -#' -#' @includeRmd man/rmd/cubist-and-rulefit.Rmd -#' -#' @name cubist_and_rulefit -#' @keywords internal -NULL diff --git a/R/knit_engine_docs.R b/R/knit_engine_docs.R index 29eae97d3..15de1424c 100644 --- a/R/knit_engine_docs.R +++ b/R/knit_engine_docs.R @@ -1,13 +1,12 @@ #' Knit engine-specific documentation #' @param pattern A regular expression to specify which files to knit. The #' default knits all engine documentation files. -#' @param ... Options to pass to [knitr::knit()] such as `quiet = TRUE`. #' @return A tibble with column `file` for the file name and `result` (a #' character vector that echos the output file name or, when there is #' a failure, the error message). #' @keywords internal #' @export -knit_engine_docs <- function(pattern = NULL, ...) { +knit_engine_docs <- function(pattern = NULL) { rmd_files <- list.files("man/rmd", pattern = "\\.Rmd", full.names = TRUE) if (!is.null(pattern)) { @@ -24,8 +23,6 @@ knit_engine_docs <- function(pattern = NULL, ...) { } # TODO -# - add a function to list extensions included here -# - move all details files to parsnip # - simplify code to find model files # - add is_installed() to set code with all extra dependencies # - list models by mode diff --git a/man/details_C5_rules_C5.0.Rd b/man/details_C5_rules_C5.0.Rd new file mode 100644 index 000000000..4b00c7ae1 --- /dev/null +++ b/man/details_C5_rules_C5.0.Rd @@ -0,0 +1,68 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/C5_rules_C5.0.R +\name{details_C5_rules_C5.0} +\alias{details_C5_rules_C5.0} +\title{C5.0 rule-based classification models} +\description{ +\code{\link[C50:C5.0]{C50::C5.0()}} fits model that derives feature rules from a tree for +prediction. A single tree or boosted ensemble can be used. \code{\link[rules:rules-internal]{rules::c5_fit()}} +is a wrapper around this function. +} +\details{ +For this engine, there is a single mode: classification +\subsection{Tuning Parameters}{ + +This model has 1 tuning parameters: +\itemize{ +\item \code{trees}: # Trees (type: integer, default: 1L) +} + +Note that C5.0 has a tool for \emph{early stopping} during boosting where +less iterations of boosting are performed than the number requested. +\code{C5_rules()} turns this feature off (although it can be re-enabled using +\code{\link[C50:C5.0Control]{C50::C5.0Control()}}). +} + +\subsection{Translation from parsnip to the underlying model call (regression)}{\if{html}{\out{
}}\preformatted{C5_rules( + trees = integer(1), + min_n = integer(1) +) \%>\% + set_engine("C5.0") \%>\% + set_mode("classification") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## C5.0 Model Specification (classification) +## +## Main Arguments: +## trees = integer(1) +## min_n = integer(1) +## +## Computational engine: C5.0 +## +## Model fit template: +## rules::c5_fit(x = missing_arg(), y = missing_arg(), weights = missing_arg(), +## trials = integer(1), minCases = integer(1)) +} +} + +\subsection{Preprocessing requirements}{ + +This engine does not require any special encoding of the predictors. +Categorical predictors can be partitioned into groups of factor levels +(e.g. \verb{\{a, c\}} vs \verb{\{b, d\}}) when splitting at a node. Dummy variables +are not required for this model. +} + +\subsection{References}{ +\itemize{ +\item Quinlan R (1992). “Learning with Continuous Classes.” Proceedings of +the 5th Australian Joint Conference On Artificial Intelligence, +pp. 343-348. +\item Quinlan R (1993).“Combining Instance-Based and Model-Based +Learning.” Proceedings of the Tenth International Conference on +Machine Learning, pp. 236-243. +\item Kuhn M and Johnson K (2013). \emph{Applied Predictive Modeling}. +Springer. +} +} +} +\keyword{internal} diff --git a/man/details_bag_mars_earth.Rd b/man/details_bag_mars_earth.Rd new file mode 100644 index 000000000..b196134d2 --- /dev/null +++ b/man/details_bag_mars_earth.Rd @@ -0,0 +1,89 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bag_mars_earth.R +\name{details_bag_mars_earth} +\alias{details_bag_mars_earth} +\title{Bagged MARS via earth} +\description{ +\code{\link[baguette:bagger]{baguette::bagger()}} creates an collection of MARS models forming an +ensemble. All models in the ensemble are combined to produce a final prediction. +} +\details{ +For this engine, there are multiple modes: classification and regression +\subsection{Tuning Parameters}{ + +This model has 3 tuning parameters: +\itemize{ +\item \code{prod_degree}: Degree of Interaction (type: integer, default: 1L) +\item \code{prune_method}: Pruning Method (type: character, default: +‘backward’) +\item \code{num_terms}: # Model Terms (type: integer, default: see below) +} + +The default value of \code{num_terms} depends on the number of predictor +columns. For a data frame \code{x}, the default is +\code{min(200, max(20, 2 * ncol(x))) + 1} (see +\code{\link[earth:earth]{earth::earth()}} and the reference below). +} + +\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{bag_mars(num_terms = integer(1), prod_degree = integer(1), prune_method = character(1)) \%>\% + set_engine("earth") \%>\% + set_mode("regression") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Bagged MARS Model Specification (regression) +## +## Main Arguments: +## num_terms = integer(1) +## prod_degree = integer(1) +## prune_method = character(1) +## +## Computational engine: earth +## +## Model fit template: +## baguette::bagger(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), nprune = integer(1), degree = integer(1), +## pmethod = character(1), base_model = "MARS") +} +} + +\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{bag_mars(num_terms = integer(1), prod_degree = integer(1), prune_method = character(1)) \%>\% + set_engine("earth") \%>\% + set_mode("classification") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Bagged MARS Model Specification (classification) +## +## Main Arguments: +## num_terms = integer(1) +## prod_degree = integer(1) +## prune_method = character(1) +## +## Computational engine: earth +## +## Model fit template: +## baguette::bagger(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), nprune = integer(1), degree = integer(1), +## pmethod = character(1), base_model = "MARS") +} +} + +\subsection{Preprocessing requirements}{ + +Factor/categorical predictors need to be converted to numeric values +(e.g., dummy or indicator variables) for this engine. When using the +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. +} + +\subsection{References}{ +\itemize{ +\item Breiman, L. 1996. “Bagging predictors”. Machine Learning. 24 (2): +123-140 +\item Friedman, J. 1991. “Multivariate Adaptive Regression Splines.” \emph{The +Annals of Statistics}, vol. 19, no. 1, pp. 1-67. +\item Milborrow, S. \href{http://www.milbo.org/doc/earth-notes.pdf}{“Notes on the earth package.”} +\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. +Springer. +} +} +} +\keyword{internal} diff --git a/man/details_bag_tree_C5.0.Rd b/man/details_bag_tree_C5.0.Rd new file mode 100644 index 000000000..e9c80a4a8 --- /dev/null +++ b/man/details_bag_tree_C5.0.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bag_tree_C5.0.R +\name{details_bag_tree_C5.0} +\alias{details_bag_tree_C5.0} +\title{Bagged trees via C5.0} +\description{ +\code{\link[baguette:bagger]{baguette::bagger()}} creates an collection of decision trees forming an +ensemble. All trees in the ensemble are combined to produce a final prediction. +} +\details{ +For this engine, there is a single mode: classification +\subsection{Tuning Parameters}{ + +This model has 1 tuning parameters: +\itemize{ +\item \code{min_n}: Minimal Node Size (type: integer, default: 2L) +} +} + +\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{bag_tree(min_n = integer()) \%>\% + set_engine("C5.0") \%>\% + set_mode("classification") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Bagged Decision Tree Model Specification (classification) +## +## Main Arguments: +## cost_complexity = 0 +## min_n = integer() +## +## Computational engine: C5.0 +## +## Model fit template: +## baguette::bagger(x = missing_arg(), y = missing_arg(), weights = missing_arg(), +## minCases = integer(), base_model = "C5.0") +} +} + +\subsection{Preprocessing requirements}{ + +This engine does not require any special encoding of the predictors. +Categorical predictors can be partitioned into groups of factor levels +(e.g. \verb{\{a, c\}} vs \verb{\{b, d\}}) when splitting at a node. Dummy variables +are not required for this model. +} + +\subsection{References}{ +\itemize{ +\item Breiman, L. 1996. “Bagging predictors”. Machine Learning. 24 (2): +123-140 +\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. +Springer. +} +} +} +\keyword{internal} diff --git a/man/details_bag_tree_rpart.Rd b/man/details_bag_tree_rpart.Rd new file mode 100644 index 000000000..3f903052e --- /dev/null +++ b/man/details_bag_tree_rpart.Rd @@ -0,0 +1,209 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bag_tree_rpart 2.R, R/bag_tree_rpart.R +\name{details_bag_tree_rpart} +\alias{details_bag_tree_rpart} +\title{Ensembles of CART decision trees} +\description{ +\code{\link[ipred:bagging]{ipred::bagging()}} fits an ensemble of decision trees, using the \code{rpart} package. + +\code{\link[baguette:bagger]{baguette::bagger()}} creates an collection of decision trees forming an +ensemble. All trees in the ensemble are combined to produce a final prediction. +} +\details{ +For this engine, there are multiple modes: classification, regression, +and censored regression +\subsection{Tuning Parameters}{ + +This model has 4 tuning parameters: +\itemize{ +\item \code{class_cost}: Class Cost (type: double, default: (see below)) +\item \code{tree_depth}: Tree Depth (type: integer, default: 30L) +\item \code{min_n}: Minimal Node Size (type: integer, default: 2L) +\item \code{cost_complexity}: Cost-Complexity Parameter (type: double, default: +0.01) +} + +For the \code{class_cost} parameter, the value can be a non-negative scalar +for a class cost (where a cost of 1 means no extra cost). This is useful +for when the first level of the outcome factor is the minority class. If +this is not the case, values between zero and one can be used to bias to +the second level of the factor. +} + +\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% + set_engine("rpart") \%>\% + set_mode("classification") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Bagged Decision Tree Model Specification (classification) +## +## Main Arguments: +## cost_complexity = double(1) +## tree_depth = integer(1) +## min_n = integer(1) +## +## Computational engine: rpart +## +## Model fit template: +## baguette::bagger(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), cp = double(1), maxdepth = integer(1), +## minsplit = integer(1), base_model = "CART") +} +} + +\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% + set_engine("rpart") \%>\% + set_mode("regression") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Bagged Decision Tree Model Specification (regression) +## +## Main Arguments: +## cost_complexity = double(1) +## tree_depth = integer(1) +## min_n = integer(1) +## +## Computational engine: rpart +## +## Model fit template: +## baguette::bagger(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), cp = double(1), maxdepth = integer(1), +## minsplit = integer(1), base_model = "CART") +} +} + +\subsection{Translation from parsnip to the original package (censored regression)}{\if{html}{\out{
}}\preformatted{bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% + set_engine("rpart") \%>\% + set_mode("censored regression") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Bagged Decision Tree Model Specification (censored regression) +## +## Main Arguments: +## cost_complexity = double(1) +## tree_depth = integer(1) +## min_n = integer(1) +## +## Computational engine: rpart +## +## Model fit template: +## ipred::bagging(formula = missing_arg(), data = missing_arg(), +## cp = double(1), maxdepth = integer(1), minsplit = integer(1)) +} +} + +\subsection{Preprocessing requirements}{ + +This engine does not require any special encoding of the predictors. +Categorical predictors can be partitioned into groups of factor levels +(e.g. \verb{\{a, c\}} vs \verb{\{b, d\}}) when splitting at a node. Dummy variables +are not required for this model. +} + +\subsection{References}{ +\itemize{ +\item Breiman L. 1996. “Bagging predictors”. Machine Learning. 24 (2): +123-140 +\item Hothorn T, Lausen B, Benner A, Radespiel-Troeger M. 2004. Bagging +Survival Trees. \emph{Statistics in Medicine}, 23(1), 77–91. +\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. +Springer. +} +} + +For this engine, there are multiple modes: classification, regression, +and censored regression +\subsection{Tuning Parameters}{ + +This model has 4 tuning parameters: +\itemize{ +\item \code{class_cost}: Class Cost (type: double, default: (see below)) +\item \code{tree_depth}: Tree Depth (type: integer, default: 30L) +\item \code{min_n}: Minimal Node Size (type: integer, default: 2L) +\item \code{cost_complexity}: Cost-Complexity Parameter (type: double, default: +0.01) +} + +For the \code{class_cost} parameter, the value can be a non-negative scalar +for a class cost (where a cost of 1 means no extra cost). This is useful +for when the first level of the outcome factor is the minority class. If +this is not the case, values between zero and one can be used to bias to +the second level of the factor. +} + +\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% + set_engine("rpart") \%>\% + set_mode("classification") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Bagged Decision Tree Model Specification (classification) +## +## Main Arguments: +## cost_complexity = double(1) +## tree_depth = integer(1) +## min_n = integer(1) +## +## Computational engine: rpart +## +## Model fit template: +## baguette::bagger(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), cp = double(1), maxdepth = integer(1), +## minsplit = integer(1), base_model = "CART") +} +} + +\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% + set_engine("rpart") \%>\% + set_mode("regression") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Bagged Decision Tree Model Specification (regression) +## +## Main Arguments: +## cost_complexity = double(1) +## tree_depth = integer(1) +## min_n = integer(1) +## +## Computational engine: rpart +## +## Model fit template: +## baguette::bagger(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), cp = double(1), maxdepth = integer(1), +## minsplit = integer(1), base_model = "CART") +} +} + +\subsection{Translation from parsnip to the original package (censored regression)}{\if{html}{\out{
}}\preformatted{bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% + set_engine("rpart") \%>\% + set_mode("censored regression") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Bagged Decision Tree Model Specification (censored regression) +## +## Main Arguments: +## cost_complexity = double(1) +## tree_depth = integer(1) +## min_n = integer(1) +## +## Computational engine: rpart +## +## Model fit template: +## ipred::bagging(formula = missing_arg(), data = missing_arg(), +## cp = double(1), maxdepth = integer(1), minsplit = integer(1)) +} +} + +\subsection{Preprocessing requirements}{ + +This engine does not require any special encoding of the predictors. +Categorical predictors can be partitioned into groups of factor levels +(e.g. \verb{\{a, c\}} vs \verb{\{b, d\}}) when splitting at a node. Dummy variables +are not required for this model. +} + +\subsection{References}{ +\itemize{ +\item Breiman L. 1996. “Bagging predictors”. Machine Learning. 24 (2): +123-140 +\item Hothorn T, Lausen B, Benner A, Radespiel-Troeger M. 2004. Bagging +Survival Trees. \emph{Statistics in Medicine}, 23(1), 77–91. +\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. +Springer. +} +} +} +\keyword{internal} diff --git a/man/details_boost_tree_mboost.Rd b/man/details_boost_tree_mboost.Rd new file mode 100644 index 000000000..7dace18ff --- /dev/null +++ b/man/details_boost_tree_mboost.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/boost_tree_mboost.R +\name{details_boost_tree_mboost} +\alias{details_boost_tree_mboost} +\title{Boosted trees} +\description{ +\code{\link[mboost:blackboost]{mboost::blackboost()}} fits a series of decision trees forming an ensemble. +Each tree depends on the results of previous trees. All trees in the +ensemble are combined to produce a final prediction. +} +\details{ +For this engine, there is a single mode: censored regression +\subsection{Tuning Parameters}{ + +This model has 5 tuning parameters: +\itemize{ +\item \code{mtry}: # Randomly Selected Predictors (type: integer, default: see +below) +\item \code{trees}: # Trees (type: integer, default: 100L) +\item \code{tree_depth}: Tree Depth (type: integer, default: 2L) +\item \code{min_n}: Minimal Node Size (type: integer, default: 10L) +\item \code{loss_reduction}: Minimum Loss Reduction (type: double, default: 0) +} + +The \code{mtry} parameter is related to the number of predictors. The default +is to use all predictors. +} + +\subsection{Translation from parsnip to the original package (censored regression)}{\if{html}{\out{
}}\preformatted{boost_tree() \%>\% + set_engine("mboost") \%>\% + set_mode("censored regression") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Boosted Tree Model Specification (censored regression) +## +## Computational engine: mboost +## +## Model fit template: +## censored::blackboost_train(formula = missing_arg(), data = missing_arg(), +## family = mboost::CoxPH()) +} + +\code{\link[=blackboost_train]{blackboost_train()}} is a wrapper around +\code{\link[mboost:blackboost]{mboost::blackboost()}} (and other functions) +that makes it easier to run this model. +} + +\subsection{Preprocessing requirements}{ + +This engine does not require any special encoding of the predictors. +Categorical predictors can be partitioned into groups of factor levels +(e.g. \verb{\{a, c\}} vs \verb{\{b, d\}}) when splitting at a node. Dummy variables +are not required for this model. +} + +\subsection{References}{ +\itemize{ +\item Buehlmann P, Hothorn T. 2007. Boosting algorithms: regularization, +prediction and model fitting. \emph{Statistical Science}, 22(4), 477–505. +\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. +Springer. +} +} +} +\keyword{internal} diff --git a/man/details_cubist_rules_Cubist.Rd b/man/details_cubist_rules_Cubist.Rd new file mode 100644 index 000000000..a8a524e56 --- /dev/null +++ b/man/details_cubist_rules_Cubist.Rd @@ -0,0 +1,67 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cubist_rules_Cubist.R +\name{details_cubist_rules_Cubist} +\alias{details_cubist_rules_Cubist} +\title{Cubist rule-based regression models} +\description{ +\code{\link[Cubist:cubist.default]{Cubist::cubist()}} fits a model that derives simple feature rules from a tree +ensemble and uses creates regression models within each rule. +\code{\link[rules:rules-internal]{rules::cubist_fit()}} is a wrapper around this function. +} +\details{ +For this engine, there is a single mode: regression +\subsection{Tuning Parameters}{ + +This model has 3 tuning parameters: +\itemize{ +\item \code{committees}: # Committees (type: integer, default: 1L) +\item \code{neighbors}: # Nearest Neighbors (type: integer, default: 0L) +\item \code{max_rules}: Max. Rules (type: integer, default: NA_integer) +} +} + +\subsection{Translation from parsnip to the underlying model call (regression)}{\if{html}{\out{
}}\preformatted{cubist_rules( + committees = integer(1), + neighbors = integer(1), + max_rules = integer(1) +) \%>\% + set_engine("Cubist") \%>\% + set_mode("regression") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Cubist Model Specification (regression) +## +## Main Arguments: +## committees = integer(1) +## neighbors = integer(1) +## max_rules = integer(1) +## +## Computational engine: Cubist +## +## Model fit template: +## rules::cubist_fit(x = missing_arg(), y = missing_arg(), weights = missing_arg(), +## committees = integer(1), neighbors = integer(1), max_rules = integer(1)) +} +} + +\subsection{Preprocessing requirements}{ + +This engine does not require any special encoding of the predictors. +Categorical predictors can be partitioned into groups of factor levels +(e.g. \verb{\{a, c\}} vs \verb{\{b, d\}}) when splitting at a node. Dummy variables +are not required for this model. +} + +\subsection{References}{ +\itemize{ +\item Quinlan R (1992). “Learning with Continuous Classes.” Proceedings of +the 5th Australian Joint Conference On Artificial Intelligence, +pp. 343-348. +\item Quinlan R (1993).“Combining Instance-Based and Model-Based +Learning.” Proceedings of the Tenth International Conference on +Machine Learning, pp. 236-243. +\item Kuhn M and Johnson K (2013). \emph{Applied Predictive Modeling}. +Springer. +} +} +} +\keyword{internal} diff --git a/man/details_decision_tree_party.Rd b/man/details_decision_tree_party.Rd new file mode 100644 index 000000000..483b12d16 --- /dev/null +++ b/man/details_decision_tree_party.Rd @@ -0,0 +1,71 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/decision_tree_party.R +\name{details_decision_tree_party} +\alias{details_decision_tree_party} +\title{Decision trees via party} +\description{ +\code{\link[party:ctree]{party::ctree()}} fits a model as a set of if/then statements that creates a +tree-based structure using hypothesis testing methods. +} +\details{ +For this engine, there is a single mode: censored regression +\subsection{Tuning Parameters}{ + +This model has 2 tuning parameters: +\itemize{ +\item \code{tree_depth}: Tree Depth (type: integer, default: see below) +\item \code{min_n}: Minimal Node Size (type: integer, default: 20L) +} + +The \code{tree_depth} parameter defaults to \code{0} which means no restrictions +are applied to tree depth. + +An engine specific parameter for this model is: +\itemize{ +\item \code{mtry}: the number of predictors, selected at random, that are +evaluated for splitting. The default is to use all predictors. +} +} + +\subsection{Translation from parsnip to the original package (censored regression)}{\if{html}{\out{
}}\preformatted{decision_tree(tree_depth = integer(1), min_n = integer(1)) \%>\% + set_engine("party") \%>\% + set_mode("censored regression") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (censored regression) +## +## Main Arguments: +## tree_depth = integer(1) +## min_n = integer(1) +## +## Computational engine: party +## +## Model fit template: +## censored::cond_inference_surv_ctree(formula = missing_arg(), +## data = missing_arg(), maxdepth = integer(1), minsplit = min_rows(0L, +## data)) +} + +\code{\link[=cond_inference_surv_ctree]{cond_inference_surv_ctree()}} is a +wrapper around \code{\link[party:ctree]{party::ctree()}} (and other functions) +that makes it easier to run this model. +} + +\subsection{Preprocessing requirements}{ + +This engine does not require any special encoding of the predictors. +Categorical predictors can be partitioned into groups of factor levels +(e.g. \verb{\{a, c\}} vs \verb{\{b, d\}}) when splitting at a node. Dummy variables +are not required for this model. +} + +\subsection{References}{ +\itemize{ +\item Hothorn T, Hornik K, Zeileis A. 2006. Unbiased Recursive +Partitioning: A Conditional Inference Framework. \emph{Journal of +Computational and Graphical Statistics}, 15(3), 651–674. +\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. +Springer. +} +} +} +\keyword{internal} diff --git a/man/details_decision_tree_rpart.Rd b/man/details_decision_tree_rpart.Rd index 0aa68f2e0..30c09081a 100644 --- a/man/details_decision_tree_rpart.Rd +++ b/man/details_decision_tree_rpart.Rd @@ -8,7 +8,8 @@ creates a tree-based structure. } \details{ -For this engine, there are multiple modes: classification and regression +For this engine, there are multiple modes: classification, regression, +and censored regression \subsection{Tuning Parameters}{ This model has 3 tuning parameters: diff --git a/man/details_discrim_flexible_earth.Rd b/man/details_discrim_flexible_earth.Rd new file mode 100644 index 000000000..ac79c993a --- /dev/null +++ b/man/details_discrim_flexible_earth.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/discrim_flexible_earth.R +\name{details_discrim_flexible_earth} +\alias{details_discrim_flexible_earth} +\title{Flexible discriminant analysis via earth} +\description{ +\code{\link[mda:fda]{mda::fda()}} (in conjunction with \code{\link[earth:earth]{earth::earth()}} can fit a nonlinear +discriminant analysis model that uses nonlinear features created using +multivariate adaptive regression splines (MARS). +} +\details{ +For this engine, there is a single mode: classification +\subsection{Tuning Parameters}{ + +This model has 3 tuning parameter: +\itemize{ +\item \code{num_terms}: # Model Terms (type: integer, default: (see below)) +\item \code{prod_degree}: Degree of Interaction (type: integer, default: 1L) +\item \code{prune_method}: Pruning Method (type: character, default: +‘backward’) +} + +The default value of \code{num_terms} depends on the number of columns (\code{p}): +\code{min(200, max(20, 2 * p)) + 1}. Note that \code{num_terms = 1} is an +intercept-only model. +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{discrim_flexible(num_terms = integer(0), prod_degree = integer(0), prune_method = character(0)) \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Flexible Discriminant Model Specification (classification) +## +## Main Arguments: +## num_terms = integer(0) +## prod_degree = integer(0) +## prune_method = character(0) +## +## Computational engine: earth +## +## Model fit template: +## mda::fda(formula = missing_arg(), data = missing_arg(), nprune = integer(0), +## degree = integer(0), pmethod = character(0), method = earth::earth) +} +} + +\subsection{Preprocessing requirements}{ + +Factor/categorical predictors need to be converted to numeric values +(e.g., dummy or indicator variables) for this engine. When using the +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. +} + +\subsection{References}{ +\itemize{ +\item Hastie, Tibshirani & Buja (1994) Flexible Discriminant Analysis by +Optimal Scoring, \emph{Journal of the American Statistical Association}, +89:428, 1255-1270 +\item Friedman (1991). Multivariate Adaptive Regression Splines. \emph{The +Annals of Statistics}, 19(1), 1-67. +} +} +} +\keyword{internal} diff --git a/man/details_discrim_linear_MASS.Rd b/man/details_discrim_linear_MASS.Rd new file mode 100644 index 000000000..d3be2a696 --- /dev/null +++ b/man/details_discrim_linear_MASS.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/discrim_linear_MASS.R +\name{details_discrim_linear_MASS} +\alias{details_discrim_linear_MASS} +\title{Linear discriminant analysis via MASS} +\description{ +\code{\link[MASS:lda]{MASS::lda()}} fits a model that estimates a multivariate +distribution for the predictors separately for the data in each class +(Gaussian with a common covariance matrix). Bayes' theorem is used +to compute the probability of each class, given the predictor values. +} +\details{ +For this engine, there is a single mode: classification +\subsection{Tuning Parameters}{ + +This engine has no tuning parameters. +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{discrim_linear() \%>\% + set_engine("MASS") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Linear Discriminant Model Specification (classification) +## +## Computational engine: MASS +## +## Model fit template: +## MASS::lda(formula = missing_arg(), data = missing_arg()) +} +} + +\subsection{Preprocessing requirements}{ + +Factor/categorical predictors need to be converted to numeric values +(e.g., dummy or indicator variables) for this engine. When using the +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. + +Variance calculations are used in these computations so \emph{zero-variance} +predictors (i.e., with a single unique value) should be eliminated +before fitting the model. +} + +\subsection{References}{ +\itemize{ +\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. +Springer. +} +} +} +\keyword{internal} diff --git a/man/details_discrim_linear_mda.Rd b/man/details_discrim_linear_mda.Rd new file mode 100644 index 000000000..ec9b74f90 --- /dev/null +++ b/man/details_discrim_linear_mda.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/discrim_linear_mda.R +\name{details_discrim_linear_mda} +\alias{details_discrim_linear_mda} +\title{Linear discriminant analysis via flexible discriminant analysis} +\description{ +\code{\link[mda:fda]{mda::fda()}} (in conjunction with \code{\link[mda:gen.ridge]{mda::gen.ridge()}} can fit a linear +discriminant analysis model that penalizes the predictor coefficients with a +quadratic penalty (i.e., a ridge or weight decay approach). +} +\details{ +For this engine, there is a single mode: classification +\subsection{Tuning Parameters}{ + +This model has 1 tuning parameter: +\itemize{ +\item \code{penalty}: Amount of Regularization (type: double, default: 1.0) +} +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{discrim_linear(penalty = numeric(0)) \%>\% + set_engine("mda") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Linear Discriminant Model Specification (classification) +## +## Main Arguments: +## penalty = numeric(0) +## +## Computational engine: mda +## +## Model fit template: +## mda::fda(formula = missing_arg(), data = missing_arg(), lambda = numeric(0), +## method = mda::gen.ridge, keep.fitted = FALSE) +} +} + +\subsection{Preprocessing requirements}{ + +Factor/categorical predictors need to be converted to numeric values +(e.g., dummy or indicator variables) for this engine. When using the +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. + +Variance calculations are used in these computations so \emph{zero-variance} +predictors (i.e., with a single unique value) should be eliminated +before fitting the model. +} + +\subsection{References}{ +\itemize{ +\item Hastie, Tibshirani & Buja (1994) Flexible Discriminant Analysis by +Optimal Scoring, \emph{Journal of the American Statistical Association}, +89:428, 1255-1270 +} +} +} +\keyword{internal} diff --git a/man/details_discrim_linear_sparsediscrim.Rd b/man/details_discrim_linear_sparsediscrim.Rd new file mode 100644 index 000000000..012a47465 --- /dev/null +++ b/man/details_discrim_linear_sparsediscrim.Rd @@ -0,0 +1,82 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/discrim_linear_sparsediscrim.R +\name{details_discrim_linear_sparsediscrim} +\alias{details_discrim_linear_sparsediscrim} +\title{Linear discriminant analysis via regularization} +\description{ +Functions in the \code{sparsediscrim} package fit different types of linear +discriminant analysis model that regularize the estimates (like the mean or +covariance). +} +\details{ +For this engine, there is a single mode: classification +\subsection{Tuning Parameters}{ + +This model has 1 tuning parameter: +\itemize{ +\item \code{regularization_method}: Regularization Method (type: character, +default: ‘diagonal’) +} + +The possible values of this parameter, and the functions that they +execute, are: +\itemize{ +\item \code{"diagonal"}: +\code{\link[sparsediscrim:lda_diag]{sparsediscrim::lda_diag()}} +\item \code{"min_distance"}: +\code{\link[sparsediscrim:lda_emp_bayes_eigen]{sparsediscrim::lda_emp_bayes_eigen()}} +\item \code{"shrink_mean"}: +\code{\link[sparsediscrim:lda_shrink_mean]{sparsediscrim::lda_shrink_mean()}} +\item \code{"shrink_cov"}: +\code{\link[sparsediscrim:lda_shrink_cov]{sparsediscrim::lda_shrink_cov()}} +} +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{discrim_linear(regularization_method = character(0)) \%>\% + set_engine("sparsediscrim") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Linear Discriminant Model Specification (classification) +## +## Main Arguments: +## regularization_method = character(0) +## +## Computational engine: sparsediscrim +## +## Model fit template: +## discrim::fit_regularized_linear(x = missing_arg(), y = missing_arg(), +## method = character(0)) +} +} + +\subsection{Preprocessing requirements}{ + +Factor/categorical predictors need to be converted to numeric values +(e.g., dummy or indicator variables) for this engine. When using the +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. + +Variance calculations are used in these computations so \emph{zero-variance} +predictors (i.e., with a single unique value) should be eliminated +before fitting the model. +} + +\subsection{References}{ +\itemize{ +\item \code{lda_diag()}: Dudoit, Fridlyand and Speed (2002) Comparison of +Discrimination Methods for the Classification of Tumors Using Gene +Expression Data, \emph{Journal of the American Statistical Association}, +97:457, 77-87. +\item \code{lda_shrink_mean()}: Tong, Chen, Zhao, Improved mean estimation and +its application to diagonal discriminant analysis, \emph{Bioinformatics}, +Volume 28, Issue 4, 15 February 2012, Pages 531-537. +\item \code{lda_shrink_cov()}: Pang, Tong and Zhao (2009), Shrinkage-based +Diagonal Discriminant Analysis and Its Applications in +High-Dimensional Data. \emph{Biometrics}, 65, 1021-1029. +\item \code{lda_emp_bayes_eigen()}: Srivistava and Kubokawa (2007), Comparison +of Discrimination Methods for High Dimensional Data, \emph{Journal of the +Japan Statistical Society}, 37:1, 123-134. +} +} +} +\keyword{internal} diff --git a/man/details_discrim_quad_MASS.Rd b/man/details_discrim_quad_MASS.Rd new file mode 100644 index 000000000..a70983cdf --- /dev/null +++ b/man/details_discrim_quad_MASS.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/discrim_quad_MASS.R +\name{details_discrim_quad_MASS} +\alias{details_discrim_quad_MASS} +\title{Quadratic discriminant analysis via MASS} +\description{ +\code{\link[MASS:qda]{MASS::qda()}} fits a model that estimates a multivariate +distribution for the predictors separately for the data in each class +(Gaussian with separate covariance matrices). Bayes' theorem is used +to compute the probability of each class, given the predictor values. +} +\details{ +For this engine, there is a single mode: classification +\subsection{Tuning Parameters}{ + +This engine has no tuning parameters. +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{discrim_quad() \%>\% + set_engine("MASS") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Quadratic Discriminant Model Specification (classification) +## +## Computational engine: MASS +## +## Model fit template: +## MASS::qda(formula = missing_arg(), data = missing_arg()) +} +} + +\subsection{Preprocessing requirements}{ + +Factor/categorical predictors need to be converted to numeric values +(e.g., dummy or indicator variables) for this engine. When using the +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. + +Variance calculations are used in these computations within each outcome +class. For this reason, \emph{zero-variance} predictors (i.e., with a single +unique value) within each class should be eliminated before fitting the +model. +} + +\subsection{References}{ +\itemize{ +\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. +Springer. +} +} +} +\keyword{internal} diff --git a/man/details_discrim_quad_sparsediscrim.Rd b/man/details_discrim_quad_sparsediscrim.Rd new file mode 100644 index 000000000..085a69a8e --- /dev/null +++ b/man/details_discrim_quad_sparsediscrim.Rd @@ -0,0 +1,78 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/discrim_quad_sparsediscrim.R +\name{details_discrim_quad_sparsediscrim} +\alias{details_discrim_quad_sparsediscrim} +\title{Quadratic discriminant analysis via regularization} +\description{ +Functions in the \code{sparsediscrim} package fit different types of quadratic +discriminant analysis model that regularize the estimates (like the mean or +covariance). +} +\details{ +For this engine, there is a single mode: classification +\subsection{Tuning Parameters}{ + +This model has 1 tuning parameter: +\itemize{ +\item \code{regularization_method}: Regularization Method (type: character, +default: ‘diagonal’) +} + +The possible values of this parameter, and the functions that they +execute, are: +\itemize{ +\item \code{"diagonal"}: +\code{\link[sparsediscrim:qda_diag]{sparsediscrim::qda_diag()}} +\item \code{"shrink_mean"}: +\code{\link[sparsediscrim:qda_shrink_mean]{sparsediscrim::qda_shrink_mean()}} +\item \code{"shrink_cov"}: +\code{\link[sparsediscrim:qda_shrink_cov]{sparsediscrim::qda_shrink_cov()}} +} +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{discrim_quad(regularization_method = character(0)) \%>\% + set_engine("sparsediscrim") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Quadratic Discriminant Model Specification (classification) +## +## Main Arguments: +## regularization_method = character(0) +## +## Computational engine: sparsediscrim +## +## Model fit template: +## discrim::fit_regularized_quad(x = missing_arg(), y = missing_arg(), +## method = character(0)) +} +} + +\subsection{Preprocessing requirements}{ + +Factor/categorical predictors need to be converted to numeric values +(e.g., dummy or indicator variables) for this engine. When using the +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. + +Variance calculations are used in these computations within each outcome +class. For this reason, \emph{zero-variance} predictors (i.e., with a single +unique value) within each class should be eliminated before fitting the +model. +} + +\subsection{References}{ +\itemize{ +\item \code{qda_diag()}: Dudoit, Fridlyand and Speed (2002) Comparison of +Discrimination Methods for the Classification of Tumors Using Gene +Expression Data, \emph{Journal of the American Statistical Association}, +97:457, 77-87. +\item \code{qda_shrink_mean()}: Tong, Chen, Zhao, Improved mean estimation and +its application to diagonal discriminant analysis, \emph{Bioinformatics}, +Volume 28, Issue 4, 15 February 2012, Pages 531-537. +\item \code{qda_shrink_cov()}: Pang, Tong and Zhao (2009), Shrinkage-based +Diagonal Discriminant Analysis and Its Applications in +High-Dimensional Data. \emph{Biometrics}, 65, 1021-1029. +} +} +} +\keyword{internal} diff --git a/man/details_discrim_regularized_klaR.Rd b/man/details_discrim_regularized_klaR.Rd new file mode 100644 index 000000000..b73b96857 --- /dev/null +++ b/man/details_discrim_regularized_klaR.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/discrim_regularized_klaR.R +\name{details_discrim_regularized_klaR} +\alias{details_discrim_regularized_klaR} +\title{Regularized discriminant analysis via klaR} +\description{ +\code{\link[klaR:rda]{klaR::rda()}} fits a a model that estimates a multivariate +distribution for the predictors separately for the data in each class. The +structure of the model can be LDA, QDA, or some amalgam of the two. Bayes' +theorem is used to compute the probability of each class, given the +predictor values. +} +\details{ +For this engine, there is a single mode: classification +\subsection{Tuning Parameters}{ + +This model has 2 tuning parameter: +\itemize{ +\item \code{frac_common_cov}: Fraction of the Common Covariance Matrix (type: +double, default: (see below)) +\item \code{frac_identity}: Fraction of the Identity Matrix (type: double, +default: (see below)) +} + +Some special cases for the RDA model: +\itemize{ +\item \code{frac_identity = 0} and \code{frac_common_cov = 1} is a linear +discriminant analysis (LDA) model. +\item \code{frac_identity = 0} and \code{frac_common_cov = 0} is a quadratic +discriminant analysis (QDA) model. +} +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{discrim_regularized(frac_identity = numeric(0), frac_common_cov = numeric(0)) \%>\% + set_engine("klaR") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Regularized Discriminant Model Specification (classification) +## +## Main Arguments: +## frac_common_cov = numeric(0) +## frac_identity = numeric(0) +## +## Computational engine: klaR +## +## Model fit template: +## klaR::rda(formula = missing_arg(), data = missing_arg(), lambda = numeric(0), +## gamma = numeric(0)) +} +} + +\subsection{Preprocessing requirements}{ + +Factor/categorical predictors need to be converted to numeric values +(e.g., dummy or indicator variables) for this engine. When using the +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. + +Variance calculations are used in these computations within each outcome +class. For this reason, \emph{zero-variance} predictors (i.e., with a single +unique value) within each class should be eliminated before fitting the +model. +} + +\subsection{References}{ +\itemize{ +\item Friedman, J (1989). Regularized Discriminant Analysis. \emph{Journal of +the American Statistical Association}, 84, 165-175. +\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. +Springer. +} +} +} +\keyword{internal} diff --git a/man/details_gen_additive_mod_mgcv.Rd b/man/details_gen_additive_mod_mgcv.Rd index ea1364a66..9a8d037eb 100644 --- a/man/details_gen_additive_mod_mgcv.Rd +++ b/man/details_gen_additive_mod_mgcv.Rd @@ -65,7 +65,7 @@ gen_additive_mod() \%>\% fit(mpg ~ wt + gear + cyl + s(disp, k = 10), data = mtcars) }\if{html}{\out{}}\preformatted{## parsnip model object ## -## Fit time: 21ms +## Fit time: 23ms ## ## Family: gaussian ## Link function: identity diff --git a/man/details_linear_reg_gee.Rd b/man/details_linear_reg_gee.Rd new file mode 100644 index 000000000..f74e2f8b7 --- /dev/null +++ b/man/details_linear_reg_gee.Rd @@ -0,0 +1,97 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/linear_reg_gee.R +\name{details_linear_reg_gee} +\alias{details_linear_reg_gee} +\title{Linear regression via generalized estimating equations (GEE)} +\description{ +\code{gee::gee()} uses generalized least squares to fit different types of models +that have errors that are not independent. +} +\details{ +For this engine, there is a single mode: regression +\subsection{Tuning Parameters}{ + +This model has no formal tuning parameters. It might be beneficial to +determine the appropriate correlation structure to use. However, this +typically does not affect the predicted value of the model but does have +an effect on the inferential results and parameter covariance values. +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{linear_reg() \%>\% + set_engine("gee") \%>\% + set_mode("regression") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) +## +## Computational engine: gee +## +## Model fit template: +## multilevelmod::gee_fit(formula = missing_arg(), data = missing_arg(), +## family = gaussian) +} + +\code{multilevelmod::gee_fit()} is a wrapper model around \code{gee()}. +} + +\subsection{Preprocessing requirements}{ + +There are no specific preprocessing needs. However, it is helpful to +keep the clustering/subject identifier column as factor or character +(instead of making them into dummy variables). See the examples in the +next section. +} + +\subsection{Other details}{ + +The model cannot accept case weights. + +Both \code{gee:gee(a)} and \code{gee:geepack()} specifies the id/cluster variable +using an argument \code{id} that requires a vector. parsnip doesn’t work that +way so we enable this model to be fit using a artificial function called +\code{id_var()} to be used in the formula. So, in the original package, the +call would look like:\if{html}{\out{
}}\preformatted{gee(breaks ~ tension, id = wool, data = warpbreaks, corstr = "exchangeable") +}\if{html}{\out{
}} + +With \code{parsnip}, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) + +linear_reg() \%>\% + set_engine("gee", corstr = "exchangeable") \%>\% + fit(breaks ~ tension + id_var(wool), data = warpbreaks) +}\if{html}{\out{
}} + +When using the general tidymodels infrastructure, it may be better to +use a workflow. In this case, you can add the appropriate columns using +\code{add_variables()} then supply the GEE formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) + +gee_spec <- + linear_reg() \%>\% + set_engine("gee", corstr = "exchangeable") + +gee_wflow <- + workflow() \%>\% + # The data are included as-is using: + add_variables(outcomes = breaks, predictors = c(tension, wool)) \%>\% + add_model(gee_spec, formula = breaks ~ tension + id_var(wool)) + +fit(gee_wflow, data = warpbreaks) +}\if{html}{\out{
}} + +\code{gee()} always prints out warnings and output even when \code{silent = TRUE}. +When using the \code{gee} engine, it will never produce output, even if +\code{silent = FALSE}. + +Also, because of issues with the \code{gee()} function, a supplementary call +to \code{glm()} is needed to get the rank and QR decomposition objects so +that \code{predict()} can be used. +} + +\subsection{References}{ +\itemize{ +\item Liang, K.Y. and Zeger, S.L. (1986) Longitudinal data analysis using +generalized linear models. \emph{Biometrika}, 73 13–22. +\item Zeger, S.L. and Liang, K.Y. (1986) Longitudinal data analysis for +discrete and continuous outcomes. \emph{Biometrics}, 42 121–130. +} +} +} +\keyword{internal} diff --git a/man/details_linear_reg_lmer.Rd b/man/details_linear_reg_lmer.Rd new file mode 100644 index 000000000..1f3b758b4 --- /dev/null +++ b/man/details_linear_reg_lmer.Rd @@ -0,0 +1,116 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/linear_reg_lmer.R +\name{details_linear_reg_lmer} +\alias{details_linear_reg_lmer} +\title{Linear regression via mixed models} +\description{ +The \code{lmer} engine estimates fixed and random effect regression parameters +using maximum likelihood (or restricted maximum likelihood) estimation. +} +\details{ +For this engine, there is a single mode: regression +\subsection{Tuning Parameters}{ + +This model has no tuning parameters. +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{linear_reg() \%>\% + set_engine("lmer") \%>\% + set_mode("regression") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) +## +## Computational engine: lmer +## +## Model fit template: +## lme4::lmer(formula = missing_arg(), data = missing_arg()) +} +} + +\subsection{Predicting new samples}{ + +This model can use subject-specific coefficient estimates to make +predictions (i.e. partial pooling). For example, this equation shows the +linear predictor (\emph{η}) for a random intercept: + +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} + +where \emph{i} denotes the \code{i}th independent experimental unit +(e.g. subject). When the model has seen subject \code{i}, it can use that +subject’s data to adjust the \emph{population} intercept to be more specific +to that subjects results. + +What happens when data are being predicted for a subject that was not +used in the model fit? In that case, this package uses \emph{only} the +population parameter estimates for prediction: + +\emph{η̂}\if{html}{\out{}}\emph{i}′\if{html}{\out{}} = \emph{β̂}\if{html}{\out{}}0\if{html}{\out{}} + \emph{β̂**x}\if{html}{\out{}}\emph{i}′1\if{html}{\out{}} + +Depending on what covariates are in the model, this might have the +effect of making the same prediction for all new samples. The population +parameters are the “best estimate” for a subject that was not included +in the model fit. + +The tidymodels framework deliberately constrains predictions for new +data to not use the training set or other data (to prevent information +leakage). +} + +\subsection{Preprocessing requirements}{ + +There are no specific preprocessing needs. However, it is helpful to +keep the clustering/subject identifier column as factor or character +(instead of making them into dummy variables). See the examples in the +next section. +} + +\subsection{Other details}{ + +The model can accept case weights. + +With \code{parsnip}, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) +data("riesby") + +linear_reg() \%>\% + set_engine("lmer") \%>\% + fit(depr_score ~ week + (1|subject), data = riesby) +}\if{html}{\out{
}} + +When using the general tidymodels infrastructure, it may be better to +use a workflow. In this case, you can add the appropriate columns using +\code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) + +lmer_spec <- + linear_reg() \%>\% + set_engine("lmer") + +lmer_wflow <- + workflow() \%>\% + # The data are included as-is using: + add_variables(outcomes = depr_score, predictors = c(week, subject)) \%>\% + add_model(lmer_spec, formula = depr_score ~ week + (1|subject)) + +fit(lmer_wflow, data = riesby) +}\if{html}{\out{
}} +} + +\subsection{References}{ +\itemize{ +\item J Pinheiro, and D Bates. 2000. \emph{Mixed-effects models in S and +S-PLUS}. Springer, New York, NY +\item West, K, Band Welch, and A Galecki. 2014. \emph{Linear Mixed Models: A +Practical Guide Using Statistical Software}. CRC Press. +\item Thorson, J, Minto, C. 2015, Mixed effects: a unifying framework for +statistical modelling in fisheries biology. \emph{ICES Journal of Marine +Science}, Volume 72, Issue 5, Pages 1245–1256. +\item Harrison, XA, Donaldson, L, Correa-Cano, ME, Evans, J, Fisher, DN, +Goodwin, CED, Robinson, BS, Hodgson, DJ, Inger, R. 2018. \emph{A brief +introduction to mixed effects modelling and multi-model inference in +ecology}. PeerJ 6:e4794. +\item DeBruine LM, Barr DJ. Understanding Mixed-Effects Models Through +Data Simulation. 2021. \emph{Advances in Methods and Practices in +Psychological Science}. +} +} +} +\keyword{internal} diff --git a/man/details_linear_reg_stan_glmer.Rd b/man/details_linear_reg_stan_glmer.Rd new file mode 100644 index 000000000..5202bc329 --- /dev/null +++ b/man/details_linear_reg_stan_glmer.Rd @@ -0,0 +1,133 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/linear_reg_stan_glmer.R +\name{details_linear_reg_stan_glmer} +\alias{details_linear_reg_stan_glmer} +\title{Linear regression via hierarchical Bayesian methods} +\description{ +The \code{stan_glmer} engine estimates hierarchical regression parameters using +Bayesian estimation. +} +\details{ +For this engine, there is a single mode: regression +\subsection{Tuning Parameters}{ + +This model has no tuning parameters. +} + +\subsection{Important engine-specific options}{ + +Some relevant arguments that can be passed to \code{set_engine()}: +\itemize{ +\item \code{chains}: A positive integer specifying the number of Markov chains. +The default is 4. +\item \code{iter}: A positive integer specifying the number of iterations for +each chain (including warmup). The default is 2000. +\item \code{seed}: The seed for random number generation. +\item \code{cores}: Number of cores to use when executing the chains in +parallel. +\item \code{prior}: The prior distribution for the (non-hierarchical) +regression coefficients. +\item \code{prior_intercept}: The prior distribution for the intercept (after +centering all predictors). +} + +See \code{?rstanarm::stan_glmer} and \code{?rstan::sampling} for more information. +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{linear_reg() \%>\% + set_engine("stan_glmer") \%>\% + set_mode("regression") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) +## +## Computational engine: stan_glmer +## +## Model fit template: +## rstanarm::stan_glmer(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), family = stats::gaussian, refresh = 0) +} +} + +\subsection{Predicting new samples}{ + +This model can use subject-specific coefficient estimates to make +predictions (i.e. partial pooling). For example, this equation shows the +linear predictor (\emph{η}) for a random intercept: + +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} + +where \emph{i} denotes the \code{i}th independent experimental unit +(e.g. subject). When the model has seen subject \code{i}, it can use that +subject’s data to adjust the \emph{population} intercept to be more specific +to that subjects results. + +What happens when data are being predicted for a subject that was not +used in the model fit? In that case, this package uses \emph{only} the +population parameter estimates for prediction: + +\emph{η̂}\if{html}{\out{}}\emph{i}′\if{html}{\out{}} = \emph{β̂}\if{html}{\out{}}0\if{html}{\out{}} + \emph{β̂**x}\if{html}{\out{}}\emph{i}′1\if{html}{\out{}} + +Depending on what covariates are in the model, this might have the +effect of making the same prediction for all new samples. The population +parameters are the “best estimate” for a subject that was not included +in the model fit. + +The tidymodels framework deliberately constrains predictions for new +data to not use the training set or other data (to prevent information +leakage). +} + +\subsection{Preprocessing requirements}{ + +There are no specific preprocessing needs. However, it is helpful to +keep the clustering/subject identifier column as factor or character +(instead of making them into dummy variables). See the examples in the +next section. +} + +\subsection{Other details}{ + +The model can accept case weights. + +With \code{parsnip}, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) +data("riesby") + +linear_reg() \%>\% + set_engine("stan_glmer") \%>\% + fit(depr_score ~ week + (1|subject), data = riesby) +}\if{html}{\out{
}} + +When using the general tidymodels infrastructure, it may be better to +use a workflow. In this case, you can add the appropriate columns using +\code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) + +glmer_spec <- + linear_reg() \%>\% + set_engine("stan_glmer") + +glmer_wflow <- + workflow() \%>\% + # The data are included as-is using: + add_variables(outcomes = depr_score, predictors = c(week, subject)) \%>\% + add_model(glmer_spec, formula = depr_score ~ week + (1|subject)) + +fit(glmer_wflow, data = riesby) +}\if{html}{\out{
}} + +For prediction, the \code{stan_glmer} engine can compute posterior intervals +analogous to confidence and prediction intervals. In these instances, +the units are the original outcome and when \code{std_error = TRUE}, the +standard deviation of the posterior distribution (or posterior +predictive distribution as appropriate) is returned. +} + +\subsection{References}{ +\itemize{ +\item McElreath, R. 2020 \emph{Statistical Rethinking}. CRC Press. +\item Sorensen, T, Vasishth, S. 2016. Bayesian linear mixed models using +Stan: A tutorial for psychologists, linguists, and cognitive +scientists, arXiv:1506.06201. +} +} +} +\keyword{internal} diff --git a/man/details_logistic_reg_gee.Rd b/man/details_logistic_reg_gee.Rd new file mode 100644 index 000000000..957ea6286 --- /dev/null +++ b/man/details_logistic_reg_gee.Rd @@ -0,0 +1,97 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/logistic_reg_gee.R +\name{details_logistic_reg_gee} +\alias{details_logistic_reg_gee} +\title{Logistic regression via generalized estimating equations (GEE)} +\description{ +\code{gee::gee()} uses generalized least squares to fit different types of models +that have errors that are not independent. +} +\details{ +For this engine, there is a single mode: classification +\subsection{Tuning Parameters}{ + +This model has no formal tuning parameters. It might be beneficial to +determine the appropriate correlation structure to use. However, this +typically does not affect the predicted value of the model but does have +an effect on the inferential results and parameter covariance values. +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{logistic_reg() \%>\% + set_engine("gee") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) +## +## Computational engine: gee +## +## Model fit template: +## multilevelmod::gee_fit(formula = missing_arg(), data = missing_arg(), +## family = binomial) +} + +\code{multilevelmod::gee_fit()} is a wrapper model around \code{gee()}. +} + +\subsection{Preprocessing requirements}{ + +There are no specific preprocessing needs. However, it is helpful to +keep the clustering/subject identifier column as factor or character +(instead of making them into dummy variables). See the examples in the +next section. +} + +\subsection{Other details}{ + +The model cannot accept case weights. + +Both \code{gee:gee(a)} and \code{gee:geepack()} specifies the id/cluster variable +using an argument \code{id} that requires a vector. parsnip doesn’t work that +way so we enable this model to be fit using a artificial function called +\code{id_var()} to be used in the formula. So, in the original package, the +call would look like:\if{html}{\out{
}}\preformatted{gee(breaks ~ tension, id = wool, data = warpbreaks, corstr = "exchangeable") +}\if{html}{\out{
}} + +With \code{parsnip}, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) +data("toenail", package = "HSAUR3") + +logistic_reg() \%>\% + set_engine("gee", corstr = "exchangeable") \%>\% + fit(outcome ~ treatment * visit + id_var(patientID), data = toenail) +}\if{html}{\out{
}} + +When using the general tidymodels infrastructure, it may be better to +use a workflow. In this case, you can add the appropriate columns using +\code{add_variables()} then supply the GEE formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) + +gee_spec <- + logistic_reg() \%>\% + set_engine("gee", corstr = "exchangeable") + +gee_wflow <- + workflow() \%>\% + # The data are included as-is using: + add_variables(outcomes = outcome, predictors = c(treatment, visit, patientID)) \%>\% + add_model(gee_spec, formula = outcome ~ treatment * visit + id_var(patientID)) + +fit(gee_wflow, data = toenail) +}\if{html}{\out{
}} + +\code{gee()} always prints out warnings and output even when \code{silent = TRUE}. +When using the \code{gee} engine, it will never produce output, even if +\code{silent = FALSE}. + +Also, because of issues with the \code{gee()} function, a supplementary call +to \code{glm()} is needed to get the rank and QR decomposition objects so +that \code{predict()} can be used. +} + +\subsection{References}{ +\itemize{ +\item Liang, K.Y. and Zeger, S.L. (1986) Longitudinal data analysis using +generalized linear models. \emph{Biometrika}, 73 13–22. +\item Zeger, S.L. and Liang, K.Y. (1986) Longitudinal data analysis for +discrete and continuous outcomes. \emph{Biometrics}, 42 121–130. +} +} +} +\keyword{internal} diff --git a/man/details_logistic_reg_glmer.Rd b/man/details_logistic_reg_glmer.Rd new file mode 100644 index 000000000..98535aafc --- /dev/null +++ b/man/details_logistic_reg_glmer.Rd @@ -0,0 +1,115 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/logistic_reg_glmer.R +\name{details_logistic_reg_glmer} +\alias{details_logistic_reg_glmer} +\title{Logistic regression via mixed models} +\description{ +The \code{glmer} engine estimates fixed and random effect regression parameters +using maximum likelihood (or restricted maximum likelihood) estimation. +} +\details{ +For this engine, there is a single mode: classification +\subsection{Tuning Parameters}{ + +This model has no tuning parameters. +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{logistic_reg() \%>\% + set_engine("glmer") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) +## +## Computational engine: glmer +## +## Model fit template: +## lme4::glmer(formula = missing_arg(), data = missing_arg(), family = binomial) +} +} + +\subsection{Predicting new samples}{ + +This model can use subject-specific coefficient estimates to make +predictions (i.e. partial pooling). For example, this equation shows the +linear predictor (\emph{η}) for a random intercept: + +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} + +where \emph{i} denotes the \code{i}th independent experimental unit +(e.g. subject). When the model has seen subject \code{i}, it can use that +subject’s data to adjust the \emph{population} intercept to be more specific +to that subjects results. + +What happens when data are being predicted for a subject that was not +used in the model fit? In that case, this package uses \emph{only} the +population parameter estimates for prediction: + +\emph{η̂}\if{html}{\out{}}\emph{i}′\if{html}{\out{}} = \emph{β̂}\if{html}{\out{}}0\if{html}{\out{}} + \emph{β̂**x}\if{html}{\out{}}\emph{i}′1\if{html}{\out{}} + +Depending on what covariates are in the model, this might have the +effect of making the same prediction for all new samples. The population +parameters are the “best estimate” for a subject that was not included +in the model fit. + +The tidymodels framework deliberately constrains predictions for new +data to not use the training set or other data (to prevent information +leakage). +} + +\subsection{Preprocessing requirements}{ + +There are no specific preprocessing needs. However, it is helpful to +keep the clustering/subject identifier column as factor or character +(instead of making them into dummy variables). See the examples in the +next section. +} + +\subsection{Other details}{ + +The model can accept case weights. + +With \code{parsnip}, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) +data("toenail", package = "HSAUR3") + +logistic_reg() \%>\% + set_engine("glmer") \%>\% + fit(outcome ~ treatment * visit + (1 | patientID), data = toenail) +}\if{html}{\out{
}} + +When using the general tidymodels infrastructure, it may be better to +use a workflow. In this case, you can add the appropriate columns using +\code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) + +glmer_spec <- + logistic_reg() \%>\% + set_engine("glmer") + +glmer_wflow <- + workflow() \%>\% + # The data are included as-is using: + add_variables(outcomes = outcome, predictors = c(treatment, visit, patientID)) \%>\% + add_model(glmer_spec, formula = outcome ~ treatment * visit + (1 | patientID)) + +fit(glmer_wflow, data = toenail) +}\if{html}{\out{
}} +} + +\subsection{References}{ +\itemize{ +\item J Pinheiro, and D Bates. 2000. \emph{Mixed-effects models in S and +S-PLUS}. Springer, New York, NY +\item West, K, Band Welch, and A Galecki. 2014. \emph{Linear Mixed Models: A +Practical Guide Using Statistical Software}. CRC Press. +\item Thorson, J, Minto, C. 2015, Mixed effects: a unifying framework for +statistical modelling in fisheries biology. \emph{ICES Journal of Marine +Science}, Volume 72, Issue 5, Pages 1245–1256. +\item Harrison, XA, Donaldson, L, Correa-Cano, ME, Evans, J, Fisher, DN, +Goodwin, CED, Robinson, BS, Hodgson, DJ, Inger, R. 2018. \emph{A brief +introduction to mixed effects modelling and multi-model inference in +ecology}. PeerJ 6:e4794. +\item DeBruine LM, Barr DJ. Understanding Mixed-Effects Models Through +Data Simulation. 2021. \emph{Advances in Methods and Practices in +Psychological Science}. +} +} +} +\keyword{internal} diff --git a/man/details_logistic_reg_stan_glmer.Rd b/man/details_logistic_reg_stan_glmer.Rd new file mode 100644 index 000000000..14c5e2fdc --- /dev/null +++ b/man/details_logistic_reg_stan_glmer.Rd @@ -0,0 +1,132 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/logistic_reg_stan_glmer.R +\name{details_logistic_reg_stan_glmer} +\alias{details_logistic_reg_stan_glmer} +\title{Logistic regression via hierarchical Bayesian methods} +\description{ +The \code{stan_glmer} engine estimates hierarchical regression parameters using +Bayesian estimation. +} +\details{ +For this engine, there is a single mode: classification +\subsection{Tuning Parameters}{ + +This model has no tuning parameters. +} + +\subsection{Important engine-specific options}{ + +Some relevant arguments that can be passed to \code{set_engine()}: +\itemize{ +\item \code{chains}: A positive integer specifying the number of Markov chains. +The default is 4. +\item \code{iter}: A positive integer specifying the number of iterations for +each chain (including warmup). The default is 2000. +\item \code{seed}: The seed for random number generation. +\item \code{cores}: Number of cores to use when executing the chains in +parallel. +\item \code{prior}: The prior distribution for the (non-hierarchical) +regression coefficients. +\item \code{prior_intercept}: The prior distribution for the intercept (after +centering all predictors). +} + +See \code{?rstanarm::stan_glmer} and \code{?rstan::sampling} for more information. +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{logistic_reg() \%>\% + set_engine("stan_glmer") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) +## +## Computational engine: stan_glmer +## +## Model fit template: +## rstanarm::stan_glmer(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), family = stats::binomial, refresh = 0) +} +} + +\subsection{Predicting new samples}{ + +This model can use subject-specific coefficient estimates to make +predictions (i.e. partial pooling). For example, this equation shows the +linear predictor (\emph{η}) for a random intercept: + +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} + +where \emph{i} denotes the \code{i}th independent experimental unit +(e.g. subject). When the model has seen subject \code{i}, it can use that +subject’s data to adjust the \emph{population} intercept to be more specific +to that subjects results. + +What happens when data are being predicted for a subject that was not +used in the model fit? In that case, this package uses \emph{only} the +population parameter estimates for prediction: + +\emph{η̂}\if{html}{\out{}}\emph{i}′\if{html}{\out{}} = \emph{β̂}\if{html}{\out{}}0\if{html}{\out{}} + \emph{β̂**x}\if{html}{\out{}}\emph{i}′1\if{html}{\out{}} + +Depending on what covariates are in the model, this might have the +effect of making the same prediction for all new samples. The population +parameters are the “best estimate” for a subject that was not included +in the model fit. + +The tidymodels framework deliberately constrains predictions for new +data to not use the training set or other data (to prevent information +leakage). +} + +\subsection{Preprocessing requirements}{ + +There are no specific preprocessing needs. However, it is helpful to +keep the clustering/subject identifier column as factor or character +(instead of making them into dummy variables). See the examples in the +next section. +} + +\subsection{Other details}{ + +The model can accept case weights. + +With \code{parsnip}, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) +data("toenail", package = "HSAUR3") + +logistic_reg() \%>\% + set_engine("stan_glmer") \%>\% + fit(outcome ~ treatment * visit + (1 | patientID), data = toenail) +}\if{html}{\out{
}} + +When using the general tidymodels infrastructure, it may be better to +use a workflow. In this case, you can add the appropriate columns using +\code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) + +glmer_spec <- + logistic_reg() \%>\% + set_engine("stan_glmer") + +glmer_wflow <- + workflow() \%>\% + # The data are included as-is using: + add_variables(outcomes = outcome, predictors = c(treatment, visit, patientID)) \%>\% + add_model(glmer_spec, formula = outcome ~ treatment * visit + (1 | patientID)) + +fit(glmer_wflow, data = toenail) +}\if{html}{\out{
}} + +For prediction, the \code{stan_glmer} engine can compute posterior intervals +analogous to confidence and prediction intervals. In these instances, +the units are the original outcome and when \code{std_error = TRUE}, the +standard deviation of the posterior distribution (or posterior +predictive distribution as appropriate) is returned. +} + +\subsection{References}{ +\itemize{ +\item McElreath, R. 2020 \emph{Statistical Rethinking}. CRC Press. +\item Sorensen, T, Vasishth, S. 2016. Bayesian linear mixed models using +Stan: A tutorial for psychologists, linguists, and cognitive +scientists, arXiv:1506.06201. +} +} +} +\keyword{internal} diff --git a/man/details_naive_Bayes_klaR.Rd b/man/details_naive_Bayes_klaR.Rd new file mode 100644 index 000000000..ace2d74f0 --- /dev/null +++ b/man/details_naive_Bayes_klaR.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/naive_Bayes_klaR.R +\name{details_naive_Bayes_klaR} +\alias{details_naive_Bayes_klaR} +\title{Naive Bayes models via klaR} +\description{ +\code{\link[klaR:NaiveBayes]{klaR::NaiveBayes()}} fits a model that uses Bayes' theorem to compute the +probability of each class, given the predictor values. +} +\details{ +For this engine, there is a single mode: classification +\subsection{Tuning Parameters}{ + +This model has 2 tuning parameter: +\itemize{ +\item \code{smoothness}: Kernel Smoothness (type: double, default: 1.0) +\item \code{Laplace}: Laplace Correction (type: double, default: 0.0) +} + +Note that \code{usekernel} is always set to \code{TRUE} for the \code{klaR} engine. +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{naive_Bayes(smoothness = numeric(0), Laplace = numeric(0)) \%>\% + set_engine("klaR") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Naive Bayes Model Specification (classification) +## +## Main Arguments: +## smoothness = numeric(0) +## Laplace = numeric(0) +## +## Computational engine: klaR +## +## Model fit template: +## discrim::klar_bayes_wrapper(x = missing_arg(), y = missing_arg(), +## adjust = numeric(0), fL = numeric(0), usekernel = TRUE) +} +} + +\subsection{Preprocessing requirements}{ + +The columns for qualitative predictors should always be represented as +factors (as opposed to dummy/indicator variables). When the predictors +are factors, the underlying code treats them as multinomial data and +appropriately computes their conditional distributions. + +Variance calculations are used in these computations so \emph{zero-variance} +predictors (i.e., with a single unique value) should be eliminated +before fitting the model. +} + +\subsection{References}{ +\itemize{ +\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. +Springer. +} +} +} +\keyword{internal} diff --git a/man/details_naive_Bayes_naivebayes.Rd b/man/details_naive_Bayes_naivebayes.Rd new file mode 100644 index 000000000..105bb2b87 --- /dev/null +++ b/man/details_naive_Bayes_naivebayes.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/naive_Bayes_naivebayes.R +\name{details_naive_Bayes_naivebayes} +\alias{details_naive_Bayes_naivebayes} +\title{Naive Bayes models via naivebayes} +\description{ +\code{\link[naivebayes:naive_bayes]{naivebayes::naive_bayes()}} fits a model that uses Bayes' theorem to compute +the probability of each class, given the predictor values. +} +\details{ +For this engine, there is a single mode: classification +\subsection{Tuning Parameters}{ + +This model has 2 tuning parameter: +\itemize{ +\item \code{smoothness}: Kernel Smoothness (type: double, default: 1.0) +\item \code{Laplace}: Laplace Correction (type: double, default: 0.0) +} +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{naive_Bayes(smoothness = numeric(0), Laplace = numeric(0)) \%>\% + set_engine("naivebayes") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Naive Bayes Model Specification (classification) +## +## Main Arguments: +## smoothness = numeric(0) +## Laplace = numeric(0) +## +## Computational engine: naivebayes +## +## Model fit template: +## naivebayes::naive_bayes(x = missing_arg(), y = missing_arg(), +## adjust = numeric(0), laplace = numeric(0), usekernel = TRUE) +} +} + +\subsection{Preprocessing requirements}{ + +The columns for qualitative predictors should always be represented as +factors (as opposed to dummy/indicator variables). When the predictors +are factors, the underlying code treats them as multinomial data and +appropriately computes their conditional distributions. + +For count data, integers can be estimated using a Poisson distribution +if the augment \code{usepoisson = TRUE} is passed as an engine argument. + +Variance calculations are used in these computations so \emph{zero-variance} +predictors (i.e., with a single unique value) should be eliminated +before fitting the model. +} + +\subsection{References}{ +\itemize{ +\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. +Springer. +} +} +} +\keyword{internal} diff --git a/man/details_pls_mixOmics.Rd b/man/details_pls_mixOmics.Rd new file mode 100644 index 000000000..5f9695b06 --- /dev/null +++ b/man/details_pls_mixOmics.Rd @@ -0,0 +1,98 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pls_mixOmics.R +\name{details_pls_mixOmics} +\alias{details_pls_mixOmics} +\title{Partial least squares via mixOmics} +\description{ +The mixOmics package can fit several different types of PLS models. +} +\details{ +For this engine, there are multiple modes: classification and regression +\subsection{Tuning Parameters}{ + +This model has 2 tuning parameters: +\itemize{ +\item \code{predictor_prop}: Proportion of Predictors (type: double, default: +see below) +\item \code{num_comp}: # Components (type: integer, default: 2L) +} +} + +\subsection{Translation from parsnip to the underlying model call (regression)}{\if{html}{\out{
}}\preformatted{pls(num_comp = integer(1), predictor_prop = double(1)) \%>\% + set_engine("mixOmics") \%>\% + set_mode("regression") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## PLS Model Specification (regression) +## +## Main Arguments: +## predictor_prop = double(1) +## num_comp = integer(1) +## +## Computational engine: mixOmics +## +## Model fit template: +## plsmod::pls_fit(x = missing_arg(), y = missing_arg(), predictor_prop = double(1), +## ncomp = integer(1)) +} + +\code{\link[plsmod:pls_fit]{plsmod::pls_fit()}} is a function that +\itemize{ +\item Determines the number of predictors in the data. +\item Adjusts \code{num_comp} if the value is larger than the number of +factors. +\item Determines whether sparsity is required based on the value of +\code{predictor_prop}. +\item Sets the \code{keepX} argument of \code{\link[mixOmics:spls]{mixOmics::spls()}} +for sparse models. +} +} + +\subsection{Translation from parsnip to the underlying model call (classification)}{\if{html}{\out{
}}\preformatted{pls(num_comp = integer(1), predictor_prop = double(1)) \%>\% + set_engine("mixOmics") \%>\% + set_mode("classification") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## PLS Model Specification (classification) +## +## Main Arguments: +## predictor_prop = double(1) +## num_comp = integer(1) +## +## Computational engine: mixOmics +## +## Model fit template: +## plsmod::pls_fit(x = missing_arg(), y = missing_arg(), predictor_prop = double(1), +## ncomp = integer(1)) +} + +In this case, \code{\link[plsmod:pls_fit]{plsmod::pls_fit()}} has the same +role as above but eventually targets +\code{\link[mixOmics:plsda]{mixOmics::plsda()}} or +\code{\link[mixOmics:splsda]{mixOmics::splsda()}} . +} + +\subsection{Preprocessing requirements}{ + +Factor/categorical predictors need to be converted to numeric values +(e.g., dummy or indicator variables) for this engine. When using the +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. + +Variance calculations are used in these computations so \emph{zero-variance} +predictors (i.e., with a single unique value) should be eliminated +before fitting the model. + +Predictors should have the same scale. One way to achieve this is to +center and scale each so that each predictor has mean zero and a +variance of one. +} + +\subsection{References}{ +\itemize{ +\item Rohart F and Gautier B and Singh A and Le Cao K-A (2017). “mixOmics: +An R package for ’omics feature selection and multiple data +integration.” PLoS computational biology, 13(11), e1005752. +} +} +} +\keyword{internal} diff --git a/man/details_poisson_reg_gee.Rd b/man/details_poisson_reg_gee.Rd new file mode 100644 index 000000000..41669f543 --- /dev/null +++ b/man/details_poisson_reg_gee.Rd @@ -0,0 +1,96 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/poisson_reg_gee.R +\name{details_poisson_reg_gee} +\alias{details_poisson_reg_gee} +\title{Poisson regression via generalized estimating equations (GEE)} +\description{ +\code{gee::gee()} uses generalized least squares to fit different types of models +that have errors that are not independent. +} +\details{ +For this engine, there is a single mode: regression +\subsection{Tuning Parameters}{ + +This model has no formal tuning parameters. It might be beneficial to +determine the appropriate correlation structure to use. However, this +typically does not affect the predicted value of the model but does have +an effect on the inferential results and parameter covariance values. +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{poisson_reg(engine = "gee") \%>\% + set_engine("gee") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) +## +## Computational engine: gee +## +## Model fit template: +## multilevelmod::gee_fit(formula = missing_arg(), data = missing_arg(), +## family = stats::poisson) +} + +\code{multilevelmod::gee_fit()} is a wrapper model around \code{gee()}. +} + +\subsection{Preprocessing requirements}{ + +There are no specific preprocessing needs. However, it is helpful to +keep the clustering/subject identifier column as factor or character +(instead of making them into dummy variables). See the examples in the +next section. +} + +\subsection{Other details}{ + +The model cannot accept case weights. + +Both \code{gee:gee(a)} and \code{gee:geepack()} specifies the id/cluster variable +using an argument \code{id} that requires a vector. parsnip doesn’t work that +way so we enable this model to be fit using a artificial function called +\code{id_var()} to be used in the formula. So, in the original package, the +call would look like:\if{html}{\out{
}}\preformatted{gee(breaks ~ tension, id = wool, data = warpbreaks, corstr = "exchangeable") +}\if{html}{\out{
}} + +With \code{parsnip}, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) + +poisson_reg() \%>\% + set_engine("gee", corstr = "exchangeable") \%>\% + fit(y ~ time + x + id_var(subject), data = longitudinal_counts) +}\if{html}{\out{
}} + +When using the general tidymodels infrastructure, it may be better to +use a workflow. In this case, you can add the appropriate columns using +\code{add_variables()} then supply the GEE formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) + +gee_spec <- + poisson_reg() \%>\% + set_engine("gee", corstr = "exchangeable") + +gee_wflow <- + workflow() \%>\% + # The data are included as-is using: + add_variables(outcomes = y, predictors = c(time, x, subject)) \%>\% + add_model(gee_spec, formula = y ~ time + x + id_var(subject)) + +fit(gee_wflow, data = longitudinal_counts) +}\if{html}{\out{
}} + +\code{gee()} always prints out warnings and output even when \code{silent = TRUE}. +When using the \code{gee} engine, it will never produce output, even if +\code{silent = FALSE}. + +Also, because of issues with the \code{gee()} function, a supplementary call +to \code{glm()} is needed to get the rank and QR decomposition objects so +that \code{predict()} can be used. +} + +\subsection{References}{ +\itemize{ +\item Liang, K.Y. and Zeger, S.L. (1986) Longitudinal data analysis using +generalized linear models. \emph{Biometrika}, 73 13–22. +\item Zeger, S.L. and Liang, K.Y. (1986) Longitudinal data analysis for +discrete and continuous outcomes. \emph{Biometrics}, 42 121–130. +} +} +} +\keyword{internal} diff --git a/man/details_poisson_reg_glm.Rd b/man/details_poisson_reg_glm.Rd new file mode 100644 index 000000000..d3e8422ab --- /dev/null +++ b/man/details_poisson_reg_glm.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/poisson_reg_glm.R +\name{details_poisson_reg_glm} +\alias{details_poisson_reg_glm} +\title{Poisson regression via glm} +\description{ +\code{\link[stats:glm]{stats::glm()}} uses maximum likelihood to fit a model for count data. +} +\details{ +For this engine, there is a single mode: regression +\subsection{Tuning Parameters}{ + +This engine has no tuning parameters. +} + +\subsection{Translation from parsnip to the underlying model call (regression)}{\if{html}{\out{
}}\preformatted{poisson_reg() \%>\% + set_engine("glm") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) +## +## Computational engine: glm +## +## Model fit template: +## stats::glm(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), +## family = stats::poisson) +} +} + +\subsection{Preprocessing requirements}{ + +Factor/categorical predictors need to be converted to numeric values +(e.g., dummy or indicator variables) for this engine. When using the +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. +} +} +\keyword{internal} diff --git a/man/details_poisson_reg_glmer.Rd b/man/details_poisson_reg_glmer.Rd new file mode 100644 index 000000000..ae2b3d898 --- /dev/null +++ b/man/details_poisson_reg_glmer.Rd @@ -0,0 +1,114 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/poisson_reg_glmer.R +\name{details_poisson_reg_glmer} +\alias{details_poisson_reg_glmer} +\title{Poisson regression via mixed models} +\description{ +The \code{glmer} engine estimates fixed and random effect regression parameters +using maximum likelihood (or restricted maximum likelihood) estimation. +} +\details{ +For this engine, there is a single mode: regression +\subsection{Tuning Parameters}{ + +This model has no tuning parameters. +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{poisson_reg(engine = "glmer") \%>\% + set_engine("glmer") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) +## +## Computational engine: glmer +## +## Model fit template: +## lme4::glmer(formula = missing_arg(), data = missing_arg(), family = stats::poisson) +} +} + +\subsection{Predicting new samples}{ + +This model can use subject-specific coefficient estimates to make +predictions (i.e. partial pooling). For example, this equation shows the +linear predictor (\emph{η}) for a random intercept: + +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} + +where \emph{i} denotes the \code{i}th independent experimental unit +(e.g. subject). When the model has seen subject \code{i}, it can use that +subject’s data to adjust the \emph{population} intercept to be more specific +to that subjects results. + +What happens when data are being predicted for a subject that was not +used in the model fit? In that case, this package uses \emph{only} the +population parameter estimates for prediction: + +\emph{η̂}\if{html}{\out{}}\emph{i}′\if{html}{\out{}} = \emph{β̂}\if{html}{\out{}}0\if{html}{\out{}} + \emph{β̂**x}\if{html}{\out{}}\emph{i}′1\if{html}{\out{}} + +Depending on what covariates are in the model, this might have the +effect of making the same prediction for all new samples. The population +parameters are the “best estimate” for a subject that was not included +in the model fit. + +The tidymodels framework deliberately constrains predictions for new +data to not use the training set or other data (to prevent information +leakage). +} + +\subsection{Preprocessing requirements}{ + +There are no specific preprocessing needs. However, it is helpful to +keep the clustering/subject identifier column as factor or character +(instead of making them into dummy variables). See the examples in the +next section. +} + +\subsection{Other details}{ + +The model can accept case weights. + +With \code{parsnip}, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) + +poisson_reg() \%>\% + set_engine("glmer") \%>\% + fit(y ~ time + x + (1 | subject), data = longitudinal_counts) +}\if{html}{\out{
}} + +When using the general tidymodels infrastructure, it may be better to +use a workflow. In this case, you can add the appropriate columns using +\code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) + +glmer_spec <- + poisson_reg() \%>\% + set_engine("glmer") + +glmer_wflow <- + workflow() \%>\% + # The data are included as-is using: + add_variables(outcomes = y, predictors = c(time, x, subject)) \%>\% + add_model(glmer_spec, formula = y ~ time + x + (1 | subject)) + +fit(glmer_wflow, data = longitudinal_counts) +}\if{html}{\out{
}} +} + +\subsection{References}{ +\itemize{ +\item J Pinheiro, and D Bates. 2000. \emph{Mixed-effects models in S and +S-PLUS}. Springer, New York, NY +\item West, K, Band Welch, and A Galecki. 2014. \emph{Linear Mixed Models: A +Practical Guide Using Statistical Software}. CRC Press. +\item Thorson, J, Minto, C. 2015, Mixed effects: a unifying framework for +statistical modelling in fisheries biology. \emph{ICES Journal of Marine +Science}, Volume 72, Issue 5, Pages 1245–1256. +\item Harrison, XA, Donaldson, L, Correa-Cano, ME, Evans, J, Fisher, DN, +Goodwin, CED, Robinson, BS, Hodgson, DJ, Inger, R. 2018. \emph{A brief +introduction to mixed effects modelling and multi-model inference in +ecology}. PeerJ 6:e4794. +\item DeBruine LM, Barr DJ. Understanding Mixed-Effects Models Through +Data Simulation. 2021. \emph{Advances in Methods and Practices in +Psychological Science}. +} +} +} +\keyword{internal} diff --git a/man/details_poisson_reg_glmnet.Rd b/man/details_poisson_reg_glmnet.Rd new file mode 100644 index 000000000..f9bf14c05 --- /dev/null +++ b/man/details_poisson_reg_glmnet.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/poisson_reg_glmnet.R +\name{details_poisson_reg_glmnet} +\alias{details_poisson_reg_glmnet} +\title{Poisson regression via glmnet} +\description{ +\code{glmnet::glmnet()} uses penalized maximum likelihood to fit a model for +count data. +} +\details{ +For this engine, there is a single mode: regression +\subsection{Tuning Parameters}{ + +This model has 2 tuning parameters: +\itemize{ +\item \code{penalty}: Amount of Regularization (type: double, default: see +below) +\item \code{mixture}: Proportion of Lasso Penalty (type: double, default: 1.0) +} + +A value of \code{mixture = 1} corresponds to a pure lasso model, while +\code{mixture = 0} indicates ridge regression. + +The \code{penalty} parameter has no default and requires a single numeric +value. For more details about this, and the \code{glmnet} model in general, +see \link{glmnet-details}. +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{poisson_reg(penalty = double(1), mixture = double(1)) \%>\% + set_engine("glmnet") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) +## +## Main Arguments: +## penalty = 0 +## mixture = double(1) +## +## Computational engine: glmnet +## +## Model fit template: +## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(), +## alpha = double(1), family = "poisson") +} +} + +\subsection{Preprocessing requirements}{ + +Factor/categorical predictors need to be converted to numeric values +(e.g., dummy or indicator variables) for this engine. When using the +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. + +Predictors should have the same scale. One way to achieve this is to +center and scale each so that each predictor has mean zero and a +variance of one. + +By default, \code{glmnet::glmnet()} uses the argument \code{standardize = TRUE} to +center and scale the data. +} +} +\keyword{internal} diff --git a/man/details_poisson_reg_hurdle.Rd b/man/details_poisson_reg_hurdle.Rd new file mode 100644 index 000000000..985c1ef75 --- /dev/null +++ b/man/details_poisson_reg_hurdle.Rd @@ -0,0 +1,108 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/poisson_reg_hurdle.R +\name{details_poisson_reg_hurdle} +\alias{details_poisson_reg_hurdle} +\title{Poisson regression via pscl} +\description{ +\code{\link[pscl:hurdle]{pscl::hurdle()}} uses maximum likelihood estimation to fit a model for +count data that has separate model terms for predicting the counts and for +predicting the probability of a zero count. +} +\details{ +For this engine, there is a single mode: regression +\subsection{Tuning Parameters}{ + +This engine has no tuning parameters. +} + +\subsection{Translation from parsnip to the underlying model call (regression)}{\if{html}{\out{
}}\preformatted{poisson_reg() \%>\% + set_engine("hurdle") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) +## +## Computational engine: hurdle +## +## Model fit template: +## pscl::hurdle(formula = missing_arg(), data = missing_arg(), weights = missing_arg()) +} +} + +\subsection{Preprocessing and special formulas for zero-inflated Poisson models}{ + +Factor/categorical predictors need to be converted to numeric values +(e.g., dummy or indicator variables) for this engine. When using the +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. + +For this particular model, a special formula is used to specify which +columns affect the counts and which affect the model for the probability +of zero counts. These sets of terms are separated by a bar. For example, +\code{y ~ x | z}. This type of formula is not used by the base R +infrastructure (e.g. \code{model.matrix()}) + +When fitting a parsnip model with this engine directly, the formula +method is required and the formula is just passed through. For example:\if{html}{\out{
}}\preformatted{library(tidymodels) +tidymodels_prefer() + +data("bioChemists", package = "pscl") +poisson_reg() \%>\% + set_engine("hurdle") \%>\% + fit(art ~ fem + mar | ment, data = bioChemists) +}\if{html}{\out{
}}\preformatted{## parsnip model object +## +## Fit time: 16ms +## +## Call: +## pscl::hurdle(formula = art ~ fem + mar | ment, data = data) +## +## Count model coefficients (truncated poisson with log link): +## (Intercept) femWomen marMarried +## 0.847598 -0.237351 0.008846 +## +## Zero hurdle model coefficients (binomial with logit link): +## (Intercept) ment +## 0.24871 0.08092 +} + +However, when using a workflow, the best approach is to avoid using +\code{\link[workflows:add_formula]{workflows::add_formula()}} and use +\code{\link[workflows:add_variables]{workflows::add_variables()}} in +conjunction with a model formula:\if{html}{\out{
}}\preformatted{data("bioChemists", package = "pscl") +spec <- + poisson_reg() \%>\% + set_engine("hurdle") + +workflow() \%>\% + add_variables(outcomes = c(art), predictors = c(fem, mar, ment)) \%>\% + add_model(spec, formula = art ~ fem + mar | ment) \%>\% + fit(data = bioChemists) +}\if{html}{\out{
}}\preformatted{## ══ Workflow [trained] ══════════════════════════════════════════════════════════ +## Preprocessor: Variables +## Model: poisson_reg() +## +## ── Preprocessor ──────────────────────────────────────────────────────────────── +## Outcomes: c(art) +## Predictors: c(fem, mar, ment) +## +## ── Model ─────────────────────────────────────────────────────────────────────── +## +## Call: +## pscl::hurdle(formula = art ~ fem + mar | ment, data = data) +## +## Count model coefficients (truncated poisson with log link): +## (Intercept) femWomen marMarried +## 0.847598 -0.237351 0.008846 +## +## Zero hurdle model coefficients (binomial with logit link): +## (Intercept) ment +## 0.24871 0.08092 +} + +The reason for this is that +\code{\link[workflows:add_formula]{workflows::add_formula()}} will try to +create the model matrix and either fail or create dummy variables +prematurely. +} +} +\keyword{internal} diff --git a/man/details_poisson_reg_stan.Rd b/man/details_poisson_reg_stan.Rd new file mode 100644 index 000000000..20b343b43 --- /dev/null +++ b/man/details_poisson_reg_stan.Rd @@ -0,0 +1,87 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/poisson_reg_stan.R +\name{details_poisson_reg_stan} +\alias{details_poisson_reg_stan} +\title{Poisson regression via stan} +\description{ +\code{\link[rstanarm:stan_glm]{rstanarm::stan_glm()}} uses Bayesian estimation to fit a model for +count data. +} +\details{ +For this engine, there is a single mode: regression +\subsection{Tuning Parameters}{ + +This engine has no tuning parameters. +} + +\subsection{Important engine-specific options}{ + +Some relevant arguments that can be passed to \code{set_engine()}: +\itemize{ +\item \code{chains}: A positive integer specifying the number of Markov chains. +The default is 4. +\item \code{iter}: A positive integer specifying the number of iterations for +each chain (including warmup). The default is 2000. +\item \code{seed}: The seed for random number generation. +\item \code{cores}: Number of cores to use when executing the chains in +parallel. +\item \code{prior}: The prior distribution for the (non-hierarchical) +regression coefficients. The \code{"stan"} engine does not fit any +hierarchical terms. +\item \code{prior_intercept}: The prior distribution for the intercept (after +centering all predictors). +} + +See \code{\link[rstan:stanmodel-method-sampling]{rstan::sampling()}} and +\code{\link[rstanarm:priors]{rstanarm::priors()}} for more information on these +and other options. +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{poisson_reg() \%>\% + set_engine("stan") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) +## +## Computational engine: stan +## +## Model fit template: +## rstanarm::stan_glm(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), family = stats::poisson) +} + +Note that the \code{refresh} default prevents logging of the estimation +process. Change this value in \code{set_engine()} to show the MCMC logs. +} + +\subsection{Preprocessing requirements}{ + +Factor/categorical predictors need to be converted to numeric values +(e.g., dummy or indicator variables) for this engine. When using the +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. +} + +\subsection{Other details}{ + +For prediction, the \code{"stan"} engine can compute posterior intervals +analogous to confidence and prediction intervals. In these instances, +the units are the original outcome and when \code{std_error = TRUE}, the +standard deviation of the posterior distribution (or posterior +predictive distribution as appropriate) is returned. +} + +\subsection{Examples}{ + +The “Fitting and Predicting with parsnip” article contains +\href{https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-stan}{examples} +for \code{poisson_reg()} with the \code{"stan"} engine. +} + +\subsection{References}{ +\itemize{ +\item McElreath, R. 2020 \emph{Statistical Rethinking}. CRC Press. +} +} +} +\keyword{internal} diff --git a/man/details_poisson_reg_stan_glmer.Rd b/man/details_poisson_reg_stan_glmer.Rd new file mode 100644 index 000000000..5bec962ae --- /dev/null +++ b/man/details_poisson_reg_stan_glmer.Rd @@ -0,0 +1,131 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/poisson_reg_stan_glmer.R +\name{details_poisson_reg_stan_glmer} +\alias{details_poisson_reg_stan_glmer} +\title{Poisson regression via hierarchical Bayesian methods} +\description{ +The \code{stan_glmer} engine estimates hierarchical regression parameters using +Bayesian estimation. +} +\details{ +For this engine, there is a single mode: regression +\subsection{Tuning Parameters}{ + +This model has no tuning parameters. +} + +\subsection{Important engine-specific options}{ + +Some relevant arguments that can be passed to \code{set_engine()}: +\itemize{ +\item \code{chains}: A positive integer specifying the number of Markov chains. +The default is 4. +\item \code{iter}: A positive integer specifying the number of iterations for +each chain (including warmup). The default is 2000. +\item \code{seed}: The seed for random number generation. +\item \code{cores}: Number of cores to use when executing the chains in +parallel. +\item \code{prior}: The prior distribution for the (non-hierarchical) +regression coefficients. +\item \code{prior_intercept}: The prior distribution for the intercept (after +centering all predictors). +} + +See \code{?rstanarm::stan_glmer} and \code{?rstan::sampling} for more information. +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{poisson_reg(engine = "stan_glmer") \%>\% + set_engine("stan_glmer") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) +## +## Computational engine: stan_glmer +## +## Model fit template: +## rstanarm::stan_glmer(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), family = stats::poisson, refresh = 0) +} +} + +\subsection{Predicting new samples}{ + +This model can use subject-specific coefficient estimates to make +predictions (i.e. partial pooling). For example, this equation shows the +linear predictor (\emph{η}) for a random intercept: + +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} + +where \emph{i} denotes the \code{i}th independent experimental unit +(e.g. subject). When the model has seen subject \code{i}, it can use that +subject’s data to adjust the \emph{population} intercept to be more specific +to that subjects results. + +What happens when data are being predicted for a subject that was not +used in the model fit? In that case, this package uses \emph{only} the +population parameter estimates for prediction: + +\emph{η̂}\if{html}{\out{}}\emph{i}′\if{html}{\out{}} = \emph{β̂}\if{html}{\out{}}0\if{html}{\out{}} + \emph{β̂**x}\if{html}{\out{}}\emph{i}′1\if{html}{\out{}} + +Depending on what covariates are in the model, this might have the +effect of making the same prediction for all new samples. The population +parameters are the “best estimate” for a subject that was not included +in the model fit. + +The tidymodels framework deliberately constrains predictions for new +data to not use the training set or other data (to prevent information +leakage). +} + +\subsection{Preprocessing requirements}{ + +There are no specific preprocessing needs. However, it is helpful to +keep the clustering/subject identifier column as factor or character +(instead of making them into dummy variables). See the examples in the +next section. +} + +\subsection{Other details}{ + +The model can accept case weights. + +With \code{parsnip}, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) + +poisson_reg() \%>\% + set_engine("stan_glmer") \%>\% + fit(y ~ time + x + (1 | subject), data = longitudinal_counts) +}\if{html}{\out{
}} + +When using the general tidymodels infrastructure, it may be better to +use a workflow. In this case, you can add the appropriate columns using +\code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) + +glmer_spec <- + poisson_reg() \%>\% + set_engine("stan_glmer") + +glmer_wflow <- + workflow() \%>\% + # The data are included as-is using: + add_variables(outcomes = y, predictors = c(time, x, subject)) \%>\% + add_model(glmer_spec, formula = y ~ time + x + (1 | subject)) + +fit(glmer_wflow, data = longitudinal_counts) +}\if{html}{\out{
}} + +For prediction, the \code{stan_glmer} engine can compute posterior intervals +analogous to confidence and prediction intervals. In these instances, +the units are the original outcome and when \code{std_error = TRUE}, the +standard deviation of the posterior distribution (or posterior +predictive distribution as appropriate) is returned. +} + +\subsection{References}{ +\itemize{ +\item McElreath, R. 2020 \emph{Statistical Rethinking}. CRC Press. +\item Sorensen, T, Vasishth, S. 2016. Bayesian linear mixed models using +Stan: A tutorial for psychologists, linguists, and cognitive +scientists, arXiv:1506.06201. +} +} +} +\keyword{internal} diff --git a/man/details_poisson_reg_zeroinfl.Rd b/man/details_poisson_reg_zeroinfl.Rd new file mode 100644 index 000000000..0027d43ce --- /dev/null +++ b/man/details_poisson_reg_zeroinfl.Rd @@ -0,0 +1,109 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/poisson_reg_zeroinfl.R +\name{details_poisson_reg_zeroinfl} +\alias{details_poisson_reg_zeroinfl} +\title{Poisson regression via pscl} +\description{ +\code{\link[pscl:zeroinfl]{pscl::zeroinfl()}} uses maximum likelihood estimation to fit a model for +count data that has separate model terms for predicting the counts and for +predicting the probability of a zero count. +} +\details{ +For this engine, there is a single mode: regression +\subsection{Tuning Parameters}{ + +This engine has no tuning parameters. +} + +\subsection{Translation from parsnip to the underlying model call (regression)}{\if{html}{\out{
}}\preformatted{poisson_reg() \%>\% + set_engine("zeroinfl") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) +## +## Computational engine: zeroinfl +## +## Model fit template: +## pscl::zeroinfl(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg()) +} +} + +\subsection{Preprocessing and special formulas for zero-inflated Poisson models}{ + +Factor/categorical predictors need to be converted to numeric values +(e.g., dummy or indicator variables) for this engine. When using the +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. + +For this particular model, a special formula is used to specify which +columns affect the counts and which affect the model for the probability +of zero counts. These sets of terms are separated by a bar. For example, +\code{y ~ x | z}. This type of formula is not used by the base R +infrastructure (e.g. \code{model.matrix()}) + +When fitting a parsnip model with this engine directly, the formula +method is required and the formula is just passed through. For example:\if{html}{\out{
}}\preformatted{library(tidymodels) +tidymodels_prefer() + +data("bioChemists", package = "pscl") +poisson_reg() \%>\% + set_engine("zeroinfl") \%>\% + fit(art ~ fem + mar | ment, data = bioChemists) +}\if{html}{\out{
}}\preformatted{## parsnip model object +## +## Fit time: 20ms +## +## Call: +## pscl::zeroinfl(formula = art ~ fem + mar | ment, data = data) +## +## Count model coefficients (poisson with log link): +## (Intercept) femWomen marMarried +## 0.82840 -0.21365 0.02576 +## +## Zero-inflation model coefficients (binomial with logit link): +## (Intercept) ment +## -0.363 -0.166 +} + +However, when using a workflow, the best approach is to avoid using +\code{\link[workflows:add_formula]{workflows::add_formula()}} and use +\code{\link[workflows:add_variables]{workflows::add_variables()}} in +conjunction with a model formula:\if{html}{\out{
}}\preformatted{data("bioChemists", package = "pscl") +spec <- + poisson_reg() \%>\% + set_engine("zeroinfl") + +workflow() \%>\% + add_variables(outcomes = c(art), predictors = c(fem, mar, ment)) \%>\% + add_model(spec, formula = art ~ fem + mar | ment) \%>\% + fit(data = bioChemists) +}\if{html}{\out{
}}\preformatted{## ══ Workflow [trained] ══════════════════════════════════════════════════════════ +## Preprocessor: Variables +## Model: poisson_reg() +## +## ── Preprocessor ──────────────────────────────────────────────────────────────── +## Outcomes: c(art) +## Predictors: c(fem, mar, ment) +## +## ── Model ─────────────────────────────────────────────────────────────────────── +## +## Call: +## pscl::zeroinfl(formula = art ~ fem + mar | ment, data = data) +## +## Count model coefficients (poisson with log link): +## (Intercept) femWomen marMarried +## 0.82840 -0.21365 0.02576 +## +## Zero-inflation model coefficients (binomial with logit link): +## (Intercept) ment +## -0.363 -0.166 +} + +The reason for this is that +\code{\link[workflows:add_formula]{workflows::add_formula()}} will try to +create the model matrix and either fail or create dummy variables +prematurely. +} +} +\keyword{internal} diff --git a/man/details_proportional_hazards_glmnet.Rd b/man/details_proportional_hazards_glmnet.Rd new file mode 100644 index 000000000..523769edf --- /dev/null +++ b/man/details_proportional_hazards_glmnet.Rd @@ -0,0 +1,135 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/proportional_hazards_glmnet.R +\name{details_proportional_hazards_glmnet} +\alias{details_proportional_hazards_glmnet} +\title{Proportional hazards regression} +\description{ +\code{\link[glmnet:glmnet]{glmnet::glmnet()}} fits a regularized Cox proportional hazards model. +} +\details{ +For this engine, there is a single mode: censored regression +\subsection{Tuning Parameters}{ + +This model has 2 tuning parameters: +\itemize{ +\item \code{penalty}: Amount of Regularization (type: double, default: see +below) +\item \code{mixture}: Proportion of Lasso Penalty (type: double, default: 1.0) +} + +A value of \code{mixture = 1} corresponds to a pure lasso model, while +\code{mixture = 0} indicates ridge regression. + +The \code{penalty} parameter has no default and requires a single numeric +value. For more details about this, and the \code{glmnet} model in general, +see \link{glmnet-details}. +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{proportional_hazards(penalty = double(1), mixture = double(1)) \%>\% + set_engine("glmnet") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Proportional Hazards Model Specification (censored regression) +## +## Main Arguments: +## penalty = 0 +## mixture = double(1) +## +## Computational engine: glmnet +## +## Model fit template: +## censored::glmnet_fit_wrapper(formula = missing_arg(), data = missing_arg(), +## family = missing_arg(), alpha = double(1)) +} +} + +\subsection{Preprocessing requirements}{ + +Factor/categorical predictors need to be converted to numeric values +(e.g., dummy or indicator variables) for this engine. When using the +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. + +Predictors should have the same scale. One way to achieve this is to +center and scale each so that each predictor has mean zero and a +variance of one. By default, \code{\link[glmnet:glmnet]{glmnet::glmnet()}} uses +the argument \code{standardize = TRUE} to center and scale the data. +} + +\subsection{Other details}{ + +The model does not fit an intercept. + +\code{\link[glmnet:glmnet]{glmnet::glmnet()}} does not use the formula +interface but, for consistency, this package requires a model formula. + +The model formula can include \emph{special} terms, such as +\code{\link[survival:strata]{survival::strata()}}. The allows the baseline +hazard to differ between groups contained in the function. The column +used inside \code{strata()} is treated as qualitative no matter its type. +This is different than the syntax offered by the +\code{\link[glmnet:glmnet]{glmnet::glmnet()}} package (i.e., +\code{\link[glmnet:stratifySurv]{glmnet::stratifySurv()}}) which is mot +recommended here. + +For example, in this model, the numeric column \code{rx} is used to estimate +two different baseline hazards for each value of the column:\if{html}{\out{
}}\preformatted{library(survival) +library(censored) +library(dplyr) +library(tidyr) + +mod <- + proportional_hazards(penalty = 0.01) \%>\% + set_engine("glmnet", nlambda = 5) \%>\% + fit(Surv(futime, fustat) ~ age + ecog.ps + strata(rx), data = ovarian) + +pred_data <- data.frame(age = c(50, 50), ecog.ps = c(1, 1), rx = c(1, 2)) + +# Different survival probabilities for different values of 'rx' +predict(mod, pred_data, type = "survival", time = 500) \%>\% + bind_cols(pred_data) \%>\% + unnest(.pred) +}\if{html}{\out{
}}\preformatted{## # A tibble: 2 × 5 +## .time .pred_survival age ecog.ps rx +## +## 1 500 0.666 50 1 1 +## 2 500 0.769 50 1 2 +} + +Note that columns used in the \code{strata()} function \emph{will} also be +estimated in the regular portion of the model (i.e., within the linear +predictor). +} +} +\section{Linear predictor values}{ +Since risk regression and parametric survival models are modeling +different characteristics (e.g. relative hazard versus event time), +their linear predictors will be going in opposite directions. + +For example, for parametric models, the linear predictor \emph{increases with +time}. For proportional hazards models the linear predictor \emph{decreases +with time} (since hazard is increasing). As such, the linear predictors +for these two quantities will have opposite signs. + +tidymodels does not treat different models differently when computing +performance metrics. To standardize across model types, the default for +proportional hazards models is to have \emph{increasing values with time}. As +a result, the sign of the linear predictor will be the opposite of the +value produced by the \code{predict()} method in the package. + +This behavior can be changed by using the \code{increasing} argument when +calling \code{predict()} on a model object. +} + +\section{References}{ +\itemize{ +\item Simon N, Friedman J, Hastie T, Tibshirani R. 2011. “Regularization +Paths for Cox’s Proportional Hazards Model via Coordinate Descent.” +\emph{Journal of Statistical Software}, Articles 39 (5): 1–13. . +\item Hastie T, Tibshirani R, Wainwright M. 2015. \emph{Statistical Learning +with Sparsity}. CRC Press. +\item Kuhn M, Johnson K. 2013. \emph{Applied Predictive Modeling}. Springer. +} +} + +\keyword{internal} diff --git a/man/details_proportional_hazards_survival.Rd b/man/details_proportional_hazards_survival.Rd new file mode 100644 index 000000000..6ee69341a --- /dev/null +++ b/man/details_proportional_hazards_survival.Rd @@ -0,0 +1,111 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/proportional_hazards_survival.R +\name{details_proportional_hazards_survival} +\alias{details_proportional_hazards_survival} +\title{Proportional hazards regression} +\description{ +\code{\link[survival:coxph]{survival::coxph()}} fits a Cox proportional hazards model. +} +\details{ +For this engine, there is a single mode: censored regression +\subsection{Tuning Parameters}{ + +This model has no tuning parameters. +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{proportional_hazards() \%>\% + set_engine("survival") \%>\% + set_mode("censored regression") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Proportional Hazards Model Specification (censored regression) +## +## Computational engine: survival +## +## Model fit template: +## survival::coxph(formula = missing_arg(), data = missing_arg(), +## x = TRUE, model = TRUE) +} +} + +\subsection{Other details}{ + +The model does not fit an intercept. + +The main interface for this model uses the formula method since the +model specification typically involved the use of +\code{\link[survival:Surv]{survival::Surv()}}. + +The model formula can include \emph{special} terms, such as +\code{\link[survival:strata]{survival::strata()}}. The allows the baseline +hazard to differ between groups contained in the function. The column +used inside \code{strata()} is treated as qualitative no matter its type. + +For example, in this model, the numeric column \code{rx} is used to estimate +two different baseline hazards for each value of the column:\if{html}{\out{
}}\preformatted{library(survival) + +proportional_hazards() \%>\% + fit(Surv(futime, fustat) ~ age + strata(rx), data = ovarian) \%>\% + extract_fit_engine() \%>\% + # Two different hazards for each value of 'rx' + basehaz() +}\if{html}{\out{
}}\preformatted{## hazard time strata +## 1 0.02250134 59 rx=1 +## 2 0.05088586 115 rx=1 +## 3 0.09467873 156 rx=1 +## 4 0.14809975 268 rx=1 +## 5 0.30670509 329 rx=1 +## 6 0.46962698 431 rx=1 +## 7 0.46962698 448 rx=1 +## 8 0.46962698 477 rx=1 +## 9 1.07680229 638 rx=1 +## 10 1.07680229 803 rx=1 +## 11 1.07680229 855 rx=1 +## 12 1.07680229 1040 rx=1 +## 13 1.07680229 1106 rx=1 +## 14 0.05843331 353 rx=2 +## 15 0.12750063 365 rx=2 +## 16 0.12750063 377 rx=2 +## 17 0.12750063 421 rx=2 +## 18 0.23449656 464 rx=2 +## 19 0.35593895 475 rx=2 +## 20 0.50804209 563 rx=2 +## 21 0.50804209 744 rx=2 +## 22 0.50804209 769 rx=2 +## 23 0.50804209 770 rx=2 +## 24 0.50804209 1129 rx=2 +## 25 0.50804209 1206 rx=2 +## 26 0.50804209 1227 rx=2 +} + +Note that columns used in the \code{strata()} function will not be estimated +in the regular portion of the model (i.e., within the linear predictor). +} +} +\section{Linear predictor values}{ +Since risk regression and parametric survival models are modeling +different characteristics (e.g. relative hazard versus event time), +their linear predictors will be going in opposite directions. + +For example, for parametric models, the linear predictor \emph{increases with +time}. For proportional hazards models the linear predictor \emph{decreases +with time} (since hazard is increasing). As such, the linear predictors +for these two quantities will have opposite signs. + +tidymodels does not treat different models differently when computing +performance metrics. To standardize across model types, the default for +proportional hazards models is to have \emph{increasing values with time}. As +a result, the sign of the linear predictor will be the opposite of the +value produced by the \code{predict()} method in the package. + +This behavior can be changed by using the \code{increasing} argument when +calling \code{predict()} on a model object. +\subsection{References}{ +\itemize{ +\item Andersen P, Gill R. 1982. Cox’s regression model for counting +processes, a large sample study. \emph{Annals of Statistics} 10, +1100-1120. +} +} +} + +\keyword{internal} diff --git a/man/details_rand_forest_party.Rd b/man/details_rand_forest_party.Rd new file mode 100644 index 000000000..1c0633b6e --- /dev/null +++ b/man/details_rand_forest_party.Rd @@ -0,0 +1,65 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rand_forest_party.R +\name{details_rand_forest_party} +\alias{details_rand_forest_party} +\title{Random forests via party} +\description{ +\code{\link[party:cforest]{party::cforest()}} fits a model that creates a large number of decision +trees, each independent of the others. The final prediction uses all +predictions from the individual trees and combines them. +} +\details{ +For this engine, there is a single mode: censored regression +\subsection{Tuning Parameters}{ + +This model has 3 tuning parameters: +\itemize{ +\item \code{trees}: # Trees (type: integer, default: 500L) +\item \code{min_n}: Minimal Node Size (type: integer, default: 20L) +\item \code{mtry}: # Randomly Selected Predictors (type: integer, default: 5L) +} +} + +\subsection{Translation from parsnip to the original package (censored regression)}{\if{html}{\out{
}}\preformatted{rand_forest() \%>\% + set_engine("party") \%>\% + set_mode("censored regression") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (censored regression) +## +## Computational engine: party +## +## Model fit template: +## censored::cond_inference_surv_cforest(formula = missing_arg(), +## data = missing_arg()) +} + +\code{\link[=cond_inference_surv_cforest]{cond_inference_surv_cforest()}} is a +wrapper around \code{\link[party:cforest]{party::cforest()}} (and other +functions) that makes it easier to run this model. +} + +\subsection{Preprocessing requirements}{ + +This engine does not require any special encoding of the predictors. +Categorical predictors can be partitioned into groups of factor levels +(e.g. \verb{\{a, c\}} vs \verb{\{b, d\}}) when splitting at a node. Dummy variables +are not required for this model. +} + +\subsection{Other details}{ + +The main interface for this model uses the formula method since the +model specification typically involved the use of +\code{\link[survival:Surv]{survival::Surv()}}. +} + +\subsection{References}{ +\itemize{ +\item Hothorn T, Buhlmann P, Dudoit S, Molinaro A, Van der Laan MJ. 2006. +Survival Ensembles. \emph{Biostatistics}, 7(3), 355–373. +\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. +Springer. +} +} +} +\keyword{internal} diff --git a/man/details_rule_fit_xrf.Rd b/man/details_rule_fit_xrf.Rd new file mode 100644 index 000000000..8d2223429 --- /dev/null +++ b/man/details_rule_fit_xrf.Rd @@ -0,0 +1,137 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rule_fit_xrf.R +\name{details_rule_fit_xrf} +\alias{details_rule_fit_xrf} +\title{RuleFit models via xrf} +\description{ +\code{\link[xrf:xrf]{xrf::xrf()}} fits a model that derives simple feature rules from a tree +ensemble and uses them as features to a regularized model. \code{\link[rules:rules-internal]{rules::xrf_fit()}} +is a wrapper around this function. +} +\details{ +For this engine, there are multiple modes: classification and regression +\subsection{Tuning Parameters}{ + +This model has 8 tuning parameters: +\itemize{ +\item \code{mtry}: Proportion Randomly Selected Predictors (type: double, +default: 1.0) +\item \code{trees}: # Trees (type: integer, default: 15L) +\item \code{min_n}: Minimal Node Size (type: integer, default: 1L) +\item \code{tree_depth}: Tree Depth (type: integer, default: 6L) +\item \code{learn_rate}: Learning Rate (type: double, default: 0.3) +\item \code{loss_reduction}: Minimum Loss Reduction (type: double, default: +0.0) +\item \code{sample_size}: Proportion Observations Sampled (type: double, +default: 1.0) +\item \code{penalty}: Amount of Regularization (type: double, default: 0.1) +} +} + +\subsection{Translation from parsnip to the underlying model call (regression)}{\if{html}{\out{
}}\preformatted{rule_fit( + mtry = numeric(1), + trees = integer(1), + min_n = integer(1), + tree_depth = integer(1), + learn_rate = numeric(1), + loss_reduction = numeric(1), + sample_size = numeric(1), + penalty = numeric(1) +) \%>\% + set_engine("xrf") \%>\% + set_mode("regression") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## RuleFit Model Specification (regression) +## +## Main Arguments: +## mtry = numeric(1) +## trees = integer(1) +## min_n = integer(1) +## tree_depth = integer(1) +## learn_rate = numeric(1) +## loss_reduction = numeric(1) +## sample_size = numeric(1) +## penalty = numeric(1) +## +## Computational engine: xrf +## +## Model fit template: +## rules::xrf_fit(object = missing_arg(), data = missing_arg(), +## colsample_bytree = numeric(1), nrounds = integer(1), min_child_weight = integer(1), +## max_depth = integer(1), eta = numeric(1), gamma = numeric(1), +## subsample = numeric(1), lambda = numeric(1)) +} +} + +\subsection{Translation from parsnip to the underlying model call (classification)}{\if{html}{\out{
}}\preformatted{rule_fit( + mtry = numeric(1), + trees = integer(1), + min_n = integer(1), + tree_depth = integer(1), + learn_rate = numeric(1), + loss_reduction = numeric(1), + sample_size = numeric(1), + penalty = numeric(1) +) \%>\% + set_engine("xrf") \%>\% + set_mode("classification") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## RuleFit Model Specification (classification) +## +## Main Arguments: +## mtry = numeric(1) +## trees = integer(1) +## min_n = integer(1) +## tree_depth = integer(1) +## learn_rate = numeric(1) +## loss_reduction = numeric(1) +## sample_size = numeric(1) +## penalty = numeric(1) +## +## Computational engine: xrf +## +## Model fit template: +## rules::xrf_fit(object = missing_arg(), data = missing_arg(), +## colsample_bytree = numeric(1), nrounds = integer(1), min_child_weight = integer(1), +## max_depth = integer(1), eta = numeric(1), gamma = numeric(1), +## subsample = numeric(1), lambda = numeric(1)) +} +} + +\subsection{Differences from the xrf package}{ + +Note that, per the documentation in \code{?xrf}, transformations of the +response variable are not supported. To use these with \code{rule_fit()}, we +recommend using a recipe instead of the formula method. + +Also, there are several configuration differences in how \code{xrf()} is fit +between that package and the wrapper used in \code{rules}. Some differences +in default values are: +\itemize{ +\item \code{trees}: \verb{xrf: 100,}rules`: 15 +\item \code{max_depth}: \code{xrf}: 3, \code{rules}: 6 +} + +These differences will create a difference in the values of the +\code{penalty} argument that \code{glmnet} uses. Also, can also set \code{penalty} +whereas uses an internal 5-fold cross-validation to determine it (by +default). +} + +\subsection{Preprocessing requirements}{ + +Factor/categorical predictors need to be converted to numeric values +(e.g., dummy or indicator variables) for this engine. When using the +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. +} + +\subsection{References}{ +\itemize{ +\item Friedman and Popescu. “Predictive learning via rule ensembles.” Ann. +Appl. Stat. 2 (3) 916- 954, September 2008 +} +} +} +\keyword{internal} diff --git a/man/details_survival_reg_flexsurv.Rd b/man/details_survival_reg_flexsurv.Rd new file mode 100644 index 000000000..733c95851 --- /dev/null +++ b/man/details_survival_reg_flexsurv.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/survival_reg_flexsurv.R +\name{details_survival_reg_flexsurv} +\alias{details_survival_reg_flexsurv} +\title{Parametric survival regression} +\description{ +\code{\link[flexsurv:flexsurvreg]{flexsurv::flexsurvreg()}} fits a parametric survival model. +} +\details{ +For this engine, there is a single mode: censored regression +\subsection{Tuning Parameters}{ + +This model has 1 tuning parameters: +\itemize{ +\item \code{dist}: Distribution (type: character, default: ‘weibull’) +} +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{survival_reg(dist = character(1)) \%>\% + set_engine("flexsurv") \%>\% + set_mode("censored regression") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Parametric Survival Regression Model Specification (censored regression) +## +## Main Arguments: +## dist = character(1) +## +## Computational engine: flexsurv +## +## Model fit template: +## flexsurv::flexsurvreg(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), dist = character(1)) +} +} + +\subsection{Other details}{ + +The main interface for this model uses the formula method since the +model specification typically involved the use of +\code{\link[survival:Surv]{survival::Surv()}}. + +For this engine, stratification cannot be specified via +\code{\link[=strata]{strata()}}, please see the documentation of the +\code{\link{flexsurv}} package for alternative specifications. +} + +\subsection{References}{ +\itemize{ +\item Jackson, C. 2016. \code{flexsurv}: A Platform for Parametric Survival +Modeling in R. \emph{Journal of Statistical Software}, 70(8), 1 - 33. +} +} +} +\keyword{internal} diff --git a/man/details_survival_reg_survival.Rd b/man/details_survival_reg_survival.Rd new file mode 100644 index 000000000..ca88a90e2 --- /dev/null +++ b/man/details_survival_reg_survival.Rd @@ -0,0 +1,81 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/survival_reg_survival.R +\name{details_survival_reg_survival} +\alias{details_survival_reg_survival} +\title{Parametric survival regression} +\description{ +\code{\link[survival:survreg]{survival::survreg()}} fits a parametric survival model. +} +\details{ +For this engine, there is a single mode: censored regression +\subsection{Tuning Parameters}{ + +This model has 1 tuning parameters: +\itemize{ +\item \code{dist}: Distribution (type: character, default: ‘weibull’) +} +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{survival_reg(dist = character(1)) \%>\% + set_engine("survival") \%>\% + set_mode("censored regression") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Parametric Survival Regression Model Specification (censored regression) +## +## Main Arguments: +## dist = character(1) +## +## Computational engine: survival +## +## Model fit template: +## survival::survreg(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), dist = character(1), model = TRUE) +} +} + +\subsection{Other details}{ + +Note that \code{model = TRUE} is needed to produce quantile predictions when +there is a stratification variable and can be overridden in other cases. + +The main interface for this model uses the formula method since the +model specification typically involved the use of +\code{\link[survival:Surv]{survival::Surv()}}. + +The model formula can include \emph{special} terms, such as +\code{\link[survival:strata]{survival::strata()}}. The allows the model scale +parameter to differ between groups contained in the function. The column +used inside \code{strata()} is treated as qualitative no matter its type. + +For example, in this model, the numeric column \code{rx} is used to estimate +two different scale parameters for each value of the column:\if{html}{\out{
}}\preformatted{library(survival) + +survival_reg() \%>\% + fit(Surv(futime, fustat) ~ age + strata(rx), data = ovarian) \%>\% + extract_fit_engine() +}\if{html}{\out{
}}\preformatted{## Call: +## survival::survreg(formula = Surv(futime, fustat) ~ age + strata(rx), +## data = data, model = TRUE) +## +## Coefficients: +## (Intercept) age +## 12.8734120 -0.1033569 +## +## Scale: +## rx=1 rx=2 +## 0.7695509 0.4703602 +## +## Loglik(model)= -89.4 Loglik(intercept only)= -97.1 +## Chisq= 15.36 on 1 degrees of freedom, p= 8.88e-05 +## n= 26 +} +} + +\subsection{References}{ +\itemize{ +\item Kalbfleisch, J. D. and Prentice, R. L. 2002 \emph{The statistical +analysis of failure time data}, Wiley. +} +} +} +\keyword{internal} diff --git a/man/doc-tools.Rd b/man/doc-tools.Rd index ff03c08f6..48b11fd01 100644 --- a/man/doc-tools.Rd +++ b/man/doc-tools.Rd @@ -52,7 +52,7 @@ To enable this, the process for a package developer is to: \item Create an engine-specific R file in the \code{R} directory with the name \verb{\{model\}_\{engine\}.R} (e.g. \code{boost_tree_C5.0.R}). This has a small amount of documentation, as well as the directives "\verb{@name details_\{model\}_\{engine\}}" -and "\verb{@includeRmd man/rmd/\{model\}_\{engine\}.Rmd details}". +and "\verb{@includeRmd man/rmd/\{model\}_\{engine\}.md details}". \item Copy the file in \pkg{parsnip} that is in \code{man/rmd/setup.Rmd} and put it in the same place in your package. \item Write your own \verb{man/rmd/\{model\}_\{engine\}.Rmd} file. This can include diff --git a/man/knit_engine_docs.Rd b/man/knit_engine_docs.Rd new file mode 100644 index 000000000..753b8c4f9 --- /dev/null +++ b/man/knit_engine_docs.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/knit_engine_docs.R +\name{knit_engine_docs} +\alias{knit_engine_docs} +\title{Knit engine-specific documentation} +\usage{ +knit_engine_docs(pattern = NULL) +} +\arguments{ +\item{pattern}{A regular expression to specify which files to knit. The +default knits all engine documentation files.} +} +\value{ +A tibble with column \code{file} for the file name and \code{result} (a +character vector that echos the output file name or, when there is +a failure, the error message). +} +\description{ +Knit engine-specific documentation +} +\keyword{internal} From 9e894f9d2a2f7fdfd68c96ff5c9f8a6f1fc54108 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 16 Nov 2021 12:28:13 -0500 Subject: [PATCH 04/65] additional templates --- man/rmd/template-zv-conditional.Rmd | 3 +++ man/rmd/template-zv.Rmd | 3 +++ 2 files changed, 6 insertions(+) create mode 100644 man/rmd/template-zv-conditional.Rmd create mode 100644 man/rmd/template-zv.Rmd diff --git a/man/rmd/template-zv-conditional.Rmd b/man/rmd/template-zv-conditional.Rmd new file mode 100644 index 000000000..40182f1ca --- /dev/null +++ b/man/rmd/template-zv-conditional.Rmd @@ -0,0 +1,3 @@ +Variance calculations are used in these computations within each outcome class. For this reason, _zero-variance_ predictors (i.e., with a single unique value) within each class should be eliminated before fitting the model. + + diff --git a/man/rmd/template-zv.Rmd b/man/rmd/template-zv.Rmd new file mode 100644 index 000000000..d9436b240 --- /dev/null +++ b/man/rmd/template-zv.Rmd @@ -0,0 +1,3 @@ +Variance calculations are used in these computations so _zero-variance_ predictors (i.e., with a single unique value) should be eliminated before fitting the model. + + From 8a393aa8ed0e353d00e6074a3f3a97751607b0a6 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 16 Nov 2021 12:28:30 -0500 Subject: [PATCH 05/65] move some argument code to a function --- NAMESPACE | 1 + man/rmd/C5_rules_C5.0.Rmd | 50 +++++++++++ man/rmd/bag_mars_earth.Rmd | 59 +++++++++++++ man/rmd/bag_tree_C5.0.Rmd | 46 ++++++++++ man/rmd/bag_tree_rpart.Rmd | 69 +++++++++++++++ man/rmd/bart_dbarts.Rmd | 13 +-- man/rmd/boost_tree_C5.0.Rmd | 13 +-- man/rmd/boost_tree_mboost.Rmd | 48 ++++++++++ man/rmd/boost_tree_spark.Rmd | 14 +-- man/rmd/boost_tree_xgboost.Rmd | 13 +-- man/rmd/cubist_rules_Cubist.Rmd | 50 +++++++++++ man/rmd/decision_tree_C5.0.Rmd | 13 +-- man/rmd/decision_tree_party.Rmd | 52 +++++++++++ man/rmd/decision_tree_rpart.Rmd | 13 +-- man/rmd/decision_tree_spark.Rmd | 13 +-- man/rmd/discrim_flexible_earth.Rmd | 45 ++++++++++ man/rmd/discrim_linear_MASS.Rmd | 28 ++++++ man/rmd/discrim_linear_mda.Rmd | 45 ++++++++++ man/rmd/discrim_linear_sparsediscrim.Rmd | 57 ++++++++++++ man/rmd/discrim_quad_MASS.Rmd | 28 ++++++ man/rmd/discrim_quad_sparsediscrim.Rmd | 53 +++++++++++ man/rmd/discrim_regularized_klaR.Rmd | 54 ++++++++++++ man/rmd/example_mlm.Rmd | 41 +++++++++ man/rmd/gen_additive_mod_mgcv.Rmd | 13 +-- man/rmd/linear_reg_gee.Rmd | 73 ++++++++++++++++ man/rmd/linear_reg_glmnet.Rmd | 13 +-- man/rmd/linear_reg_keras.Rmd | 13 +-- man/rmd/linear_reg_lmer.Rmd | 70 +++++++++++++++ man/rmd/linear_reg_spark.Rmd | 13 +-- man/rmd/linear_reg_stan_glmer.Rmd | 78 +++++++++++++++++ man/rmd/logistic_reg_LiblineaR.Rmd | 13 +-- man/rmd/logistic_reg_gee.Rmd | 73 ++++++++++++++++ man/rmd/logistic_reg_glmer.Rmd | 69 +++++++++++++++ man/rmd/logistic_reg_glmnet.Rmd | 13 +-- man/rmd/logistic_reg_keras.Rmd | 13 +-- man/rmd/logistic_reg_spark.Rmd | 13 +-- man/rmd/logistic_reg_stan_glmer.Rmd | 77 ++++++++++++++++ man/rmd/mars_earth.Rmd | 13 +-- man/rmd/mlp_keras.Rmd | 13 +-- man/rmd/mlp_nnet.Rmd | 13 +-- man/rmd/multinom_reg_glmnet.Rmd | 13 +-- man/rmd/multinom_reg_keras.Rmd | 13 +-- man/rmd/multinom_reg_nnet.Rmd | 13 +-- man/rmd/multinom_reg_spark.Rmd | 13 +-- man/rmd/naive_Bayes_klaR.Rmd | 45 ++++++++++ man/rmd/naive_Bayes_naivebayes.Rmd | 45 ++++++++++ man/rmd/nearest_neighbor_kknn.Rmd | 13 +-- man/rmd/no-pooling.Rmd | 20 +++++ man/rmd/pls_mixOmics.Rmd | 68 +++++++++++++++ man/rmd/poission-reg-engine.Rmd | 50 +++++++++++ man/rmd/poisson_reg_gee.Rmd | 72 +++++++++++++++ man/rmd/poisson_reg_glm.Rmd | 23 +++++ man/rmd/poisson_reg_glmer.Rmd | 68 +++++++++++++++ man/rmd/poisson_reg_glmnet.Rmd | 46 ++++++++++ man/rmd/poisson_reg_hurdle.Rmd | 55 ++++++++++++ man/rmd/poisson_reg_stan.Rmd | 48 ++++++++++ man/rmd/poisson_reg_stan_glmer.Rmd | 76 ++++++++++++++++ man/rmd/poisson_reg_zeroinfl.Rmd | 55 ++++++++++++ man/rmd/proportional_hazards_glmnet.Rmd | 102 ++++++++++++++++++++++ man/rmd/proportional_hazards_survival.Rmd | 54 ++++++++++++ man/rmd/rand_forest_party.Rmd | 51 +++++++++++ man/rmd/rand_forest_randomForest.Rmd | 13 +-- man/rmd/rand_forest_ranger.Rmd | 13 +-- man/rmd/rand_forest_spark.Rmd | 13 +-- man/rmd/rule_fit_xrf.Rmd | 83 ++++++++++++++++++ man/rmd/setup.Rmd | 37 +++++++- man/rmd/surv_reg_flexsurv.Rmd | 13 +-- man/rmd/surv_reg_survival.Rmd | 13 +-- man/rmd/survival_reg_flexsurv.Rmd | 43 +++++++++ man/rmd/survival_reg_survival.Rmd | 55 ++++++++++++ man/rmd/svm_linear_LiblineaR.Rmd | 13 +-- man/rmd/svm_linear_kernlab.Rmd | 13 +-- man/rmd/svm_poly_kernlab.Rmd | 13 +-- man/rmd/svm_rbf_kernlab.Rmd | 13 +-- man/rmd/tidy-example.Rmd | 56 ++++++++++++ 75 files changed, 2347 insertions(+), 388 deletions(-) create mode 100644 man/rmd/C5_rules_C5.0.Rmd create mode 100644 man/rmd/bag_mars_earth.Rmd create mode 100644 man/rmd/bag_tree_C5.0.Rmd create mode 100644 man/rmd/bag_tree_rpart.Rmd create mode 100644 man/rmd/boost_tree_mboost.Rmd create mode 100644 man/rmd/cubist_rules_Cubist.Rmd create mode 100644 man/rmd/decision_tree_party.Rmd create mode 100644 man/rmd/discrim_flexible_earth.Rmd create mode 100644 man/rmd/discrim_linear_MASS.Rmd create mode 100644 man/rmd/discrim_linear_mda.Rmd create mode 100644 man/rmd/discrim_linear_sparsediscrim.Rmd create mode 100644 man/rmd/discrim_quad_MASS.Rmd create mode 100644 man/rmd/discrim_quad_sparsediscrim.Rmd create mode 100644 man/rmd/discrim_regularized_klaR.Rmd create mode 100644 man/rmd/example_mlm.Rmd create mode 100644 man/rmd/linear_reg_gee.Rmd create mode 100644 man/rmd/linear_reg_lmer.Rmd create mode 100644 man/rmd/linear_reg_stan_glmer.Rmd create mode 100644 man/rmd/logistic_reg_gee.Rmd create mode 100644 man/rmd/logistic_reg_glmer.Rmd create mode 100644 man/rmd/logistic_reg_stan_glmer.Rmd create mode 100644 man/rmd/naive_Bayes_klaR.Rmd create mode 100644 man/rmd/naive_Bayes_naivebayes.Rmd create mode 100644 man/rmd/no-pooling.Rmd create mode 100644 man/rmd/pls_mixOmics.Rmd create mode 100644 man/rmd/poission-reg-engine.Rmd create mode 100644 man/rmd/poisson_reg_gee.Rmd create mode 100644 man/rmd/poisson_reg_glm.Rmd create mode 100644 man/rmd/poisson_reg_glmer.Rmd create mode 100644 man/rmd/poisson_reg_glmnet.Rmd create mode 100644 man/rmd/poisson_reg_hurdle.Rmd create mode 100644 man/rmd/poisson_reg_stan.Rmd create mode 100644 man/rmd/poisson_reg_stan_glmer.Rmd create mode 100644 man/rmd/poisson_reg_zeroinfl.Rmd create mode 100644 man/rmd/proportional_hazards_glmnet.Rmd create mode 100644 man/rmd/proportional_hazards_survival.Rmd create mode 100644 man/rmd/rand_forest_party.Rmd create mode 100644 man/rmd/rule_fit_xrf.Rmd create mode 100644 man/rmd/survival_reg_flexsurv.Rmd create mode 100644 man/rmd/survival_reg_survival.Rmd create mode 100644 man/rmd/tidy-example.Rmd diff --git a/NAMESPACE b/NAMESPACE index 69abb870a..8813a7eab 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -201,6 +201,7 @@ export(glance) export(has_multi_predict) export(is_varying) export(keras_mlp) +export(knit_engine_docs) export(linear_reg) export(logistic_reg) export(make_call) diff --git a/man/rmd/C5_rules_C5.0.Rmd b/man/rmd/C5_rules_C5.0.Rmd new file mode 100644 index 000000000..743c35b62 --- /dev/null +++ b/man/rmd/C5_rules_C5.0.Rmd @@ -0,0 +1,50 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("C5_rules", "C5.0")` + +## Tuning Parameters + +```{r C5.0-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("trees", "min_n"), + default = c("1L", "2L")) + +param <- + C5_rules() %>% + set_engine("C5.0") %>% + make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameters: + +```{r C5.0-param-list, echo = FALSE, results = "asis"} +param$item +``` + +Note that C5.0 has a tool for _early stopping_ during boosting where less iterations of boosting are performed than the number requested. `C5_rules()` turns this feature off (although it can be re-enabled using [C50::C5.0Control()]). + +## Translation from parsnip to the underlying model call (regression) + +```{r C5.0-cls} +C5_rules( + trees = integer(1), + min_n = integer(1) +) %>% + set_engine("C5.0") %>% + set_mode("classification") %>% + translate() +``` + +## Preprocessing requirements + +```{r child = "template-tree-split-factors.Rmd"} +``` + +## References + + - Quinlan R (1992). "Learning with Continuous Classes." Proceedings of the 5th Australian Joint Conference On Artificial Intelligence, pp. 343-348. + + - Quinlan R (1993)."Combining Instance-Based and Model-Based Learning." Proceedings of the Tenth International Conference on Machine Learning, pp. 236-243. + + - Kuhn M and Johnson K (2013). _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/bag_mars_earth.Rmd b/man/rmd/bag_mars_earth.Rmd new file mode 100644 index 000000000..4cfae5d17 --- /dev/null +++ b/man/rmd/bag_mars_earth.Rmd @@ -0,0 +1,59 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("bag_mars", "earth")` + +## Tuning Parameters + +```{r earth-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("num_terms", "prod_degree", "prune_method"), + default = c("see below", "1L", "'backward'")) + +param <- + bag_mars() %>% + set_engine("earth") %>% + make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameters: + +```{r earth-param-list, echo = FALSE, results = "asis"} +param$item +``` + +The default value of `num_terms` depends on the number of predictor columns. For a data frame `x`, the default is `min(200, max(20, 2 * ncol(x))) + 1` (see [earth::earth()] and the reference below). + +## Translation from parsnip to the original package (regression) + +```{r earth-reg} +bag_mars(num_terms = integer(1), prod_degree = integer(1), prune_method = character(1)) %>% + set_engine("earth") %>% + set_mode("regression") %>% + translate() +``` + +## Translation from parsnip to the original package (classification) + +```{r earth-cls} +bag_mars(num_terms = integer(1), prod_degree = integer(1), prune_method = character(1)) %>% + set_engine("earth") %>% + set_mode("classification") %>% + translate() +``` + +## Preprocessing requirements + +```{r child = "template-makes-dummies.Rmd"} +``` + +## References + + - Breiman, L. 1996. "Bagging predictors". Machine Learning. 24 (2): 123-140 + + - Friedman, J. 1991. "Multivariate Adaptive Regression Splines." _The Annals of Statistics_, vol. 19, no. 1, pp. 1-67. + + - Milborrow, S. ["Notes on the earth package."](http://www.milbo.org/doc/earth-notes.pdf) + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. + diff --git a/man/rmd/bag_tree_C5.0.Rmd b/man/rmd/bag_tree_C5.0.Rmd new file mode 100644 index 000000000..fea6a118e --- /dev/null +++ b/man/rmd/bag_tree_C5.0.Rmd @@ -0,0 +1,46 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("bag_tree", "C5.0")` + +## Tuning Parameters + +```{r C5.0-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("min_n"), + default = c("2L")) + +param <- + bag_tree() %>% + set_engine("C5.0") %>% + set_mode("classification") %>% + make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameters: + +```{r C5.0-param-list, echo = FALSE, results = "asis"} +param$item +``` + +## Translation from parsnip to the original package (classification) + +```{r C5.0-cls} +bag_tree(min_n = integer()) %>% + set_engine("C5.0") %>% + set_mode("classification") %>% + translate() +``` + +## Preprocessing requirements + +```{r child = "template-tree-split-factors.Rmd"} +``` + + +## References + + - Breiman, L. 1996. "Bagging predictors". Machine Learning. 24 (2): 123-140 + + - Kuhn, M, and K Johnson. 2013. *Applied Predictive Modeling*. Springer. + diff --git a/man/rmd/bag_tree_rpart.Rmd b/man/rmd/bag_tree_rpart.Rmd new file mode 100644 index 000000000..06148a6ed --- /dev/null +++ b/man/rmd/bag_tree_rpart.Rmd @@ -0,0 +1,69 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("bag_tree", "rpart")` + +## Tuning Parameters + +```{r rpart-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("tree_depth", "min_n", "cost_complexity", "class_cost"), + default = c("30L", "2L", "0.01", "(see below)")) + +param <- + bag_tree() %>% + set_engine("rpart") %>% + set_mode("regression") %>% + make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameters: + +```{r rpart-param-list, echo = FALSE, results = "asis"} +param$item +``` + +For the `class_cost` parameter, the value can be a non-negative scalar for a class cost (where a cost of 1 means no extra cost). This is useful for when the first level of the outcome factor is the minority class. If this is not the case, values between zero and one can be used to bias to the second level of the factor. + + +## Translation from parsnip to the original package (classification) + +```{r rpart-cls} +bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) %>% + set_engine("rpart") %>% + set_mode("classification") %>% + translate() +``` + + +## Translation from parsnip to the original package (regression) + +```{r rpart-reg} +bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) %>% + set_engine("rpart") %>% + set_mode("regression") %>% + translate() +``` + +## Translation from parsnip to the original package (censored regression) + +```{r rpart-creg} +bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) %>% + set_engine("rpart") %>% + set_mode("censored regression") %>% + translate() +``` + + +## Preprocessing requirements + +```{r child = "template-tree-split-factors.Rmd"} +``` + +## References + + - Breiman L. 1996. "Bagging predictors". Machine Learning. 24 (2): 123-140 + + - Hothorn T, Lausen B, Benner A, Radespiel-Troeger M. 2004. Bagging Survival Trees. _Statistics in Medicine_, 23(1), 77–91. + + - Kuhn, M, and K Johnson. 2013. *Applied Predictive Modeling*. Springer. diff --git a/man/rmd/bart_dbarts.Rmd b/man/rmd/bart_dbarts.Rmd index 25bf63028..43e4d9b82 100644 --- a/man/rmd/bart_dbarts.Rmd +++ b/man/rmd/bart_dbarts.Rmd @@ -16,18 +16,7 @@ param <- bart() %>% set_engine("dbarts") %>% set_mode("regression") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/boost_tree_C5.0.Rmd b/man/rmd/boost_tree_C5.0.Rmd index 00080349f..8a5de29e8 100644 --- a/man/rmd/boost_tree_C5.0.Rmd +++ b/man/rmd/boost_tree_C5.0.Rmd @@ -14,18 +14,7 @@ param <- boost_tree() %>% set_engine("C5.0") %>% set_mode("classification") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/boost_tree_mboost.Rmd b/man/rmd/boost_tree_mboost.Rmd new file mode 100644 index 000000000..191423088 --- /dev/null +++ b/man/rmd/boost_tree_mboost.Rmd @@ -0,0 +1,48 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("boost_tree", "mboost")` + +## Tuning Parameters + +```{r mboost-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("mtry", "trees", "tree_depth", "min_n", "loss_reduction"), + default = c("see below", "100L", "2L", "10L", "0")) + +param <- + boost_tree() %>% + set_engine("mboost") %>% + set_mode("censored regression") %>% + make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameters: + +```{r mboost-param-list, echo = FALSE, results = "asis"} +param$item +``` + +The `mtry` parameter is related to the number of predictors. The default is to use all predictors. + +## Translation from parsnip to the original package (censored regression) + +```{r mboost-creg} +boost_tree() %>% + set_engine("mboost") %>% + set_mode("censored regression") %>% + translate() +``` + +[blackboost_train()] is a wrapper around [mboost::blackboost()] (and other functions) that makes it easier to run this model. + +## Preprocessing requirements + +```{r child = "template-tree-split-factors.Rmd"} +``` + +## References + + - Buehlmann P, Hothorn T. 2007. Boosting algorithms: regularization, prediction and model fitting. _Statistical Science_, 22(4), 477–505. + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/boost_tree_spark.Rmd b/man/rmd/boost_tree_spark.Rmd index 8d257b332..52e97e138 100644 --- a/man/rmd/boost_tree_spark.Rmd +++ b/man/rmd/boost_tree_spark.Rmd @@ -15,19 +15,7 @@ param <- boost_tree() %>% set_engine("spark") %>% set_mode("regression") %>% - tunable() %>% - dplyr::filter(name != "stop_iter") %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/boost_tree_xgboost.Rmd b/man/rmd/boost_tree_xgboost.Rmd index 178c7c1ad..788ee363d 100644 --- a/man/rmd/boost_tree_xgboost.Rmd +++ b/man/rmd/boost_tree_xgboost.Rmd @@ -15,18 +15,7 @@ param <- boost_tree() %>% set_engine("xgboost") %>% set_mode("regression") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/cubist_rules_Cubist.Rmd b/man/rmd/cubist_rules_Cubist.Rmd new file mode 100644 index 000000000..4aa6ccc4a --- /dev/null +++ b/man/rmd/cubist_rules_Cubist.Rmd @@ -0,0 +1,50 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("cubist_rules", "Cubist")` + +## Tuning Parameters + +```{r cubist-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("committees", "neighbors", "max_rules"), + default = c("1L", "0L", "NA_integer")) + +param <- + cubist_rules() %>% + set_engine("Cubist") %>% + make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameters: + +```{r cubist-param-list, echo = FALSE, results = "asis"} +param$item +``` + + +## Translation from parsnip to the underlying model call (regression) + +```{r cubist-reg} +cubist_rules( + committees = integer(1), + neighbors = integer(1), + max_rules = integer(1) +) %>% + set_engine("Cubist") %>% + set_mode("regression") %>% + translate() +``` + +## Preprocessing requirements + +```{r child = "template-tree-split-factors.Rmd"} +``` + +## References + + - Quinlan R (1992). "Learning with Continuous Classes." Proceedings of the 5th Australian Joint Conference On Artificial Intelligence, pp. 343-348. + + - Quinlan R (1993)."Combining Instance-Based and Model-Based Learning." Proceedings of the Tenth International Conference on Machine Learning, pp. 236-243. + + - Kuhn M and Johnson K (2013). _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/decision_tree_C5.0.Rmd b/man/rmd/decision_tree_C5.0.Rmd index 41b786baa..533c80e5c 100644 --- a/man/rmd/decision_tree_C5.0.Rmd +++ b/man/rmd/decision_tree_C5.0.Rmd @@ -14,18 +14,7 @@ param <- decision_tree() %>% set_engine("C5.0") %>% set_mode("classification") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/decision_tree_party.Rmd b/man/rmd/decision_tree_party.Rmd new file mode 100644 index 000000000..1a08459d8 --- /dev/null +++ b/man/rmd/decision_tree_party.Rmd @@ -0,0 +1,52 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("decision_tree", "party")` + +## Tuning Parameters + +```{r party-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("tree_depth", "min_n"), + default = c("see below", "20L")) + +param <- + decision_tree() %>% + set_engine("party") %>% + set_mode("censored regression") %>% + make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameters: + +```{r party-param-list, echo = FALSE, results = "asis"} +param$item +``` + +The `tree_depth` parameter defaults to `0` which means no restrictions are applied to tree depth. + +An engine specific parameter for this model is: + + * `mtry`: the number of predictors, selected at random, that are evaluated for splitting. The default is to use all predictors. + +## Translation from parsnip to the original package (censored regression) + +```{r party-creg} +decision_tree(tree_depth = integer(1), min_n = integer(1)) %>% + set_engine("party") %>% + set_mode("censored regression") %>% + translate() +``` + +[cond_inference_surv_ctree()] is a wrapper around [party::ctree()] (and other functions) that makes it easier to run this model. + +## Preprocessing requirements + +```{r child = "template-tree-split-factors.Rmd"} +``` + +## References + + - Hothorn T, Hornik K, Zeileis A. 2006. Unbiased Recursive Partitioning: A Conditional Inference Framework. _Journal of Computational and Graphical Statistics_, 15(3), 651–674. + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/decision_tree_rpart.Rmd b/man/rmd/decision_tree_rpart.Rmd index 3de385dad..3a55c3374 100644 --- a/man/rmd/decision_tree_rpart.Rmd +++ b/man/rmd/decision_tree_rpart.Rmd @@ -14,18 +14,7 @@ param <- decision_tree() %>% set_engine("rpart") %>% set_mode("regression") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/decision_tree_spark.Rmd b/man/rmd/decision_tree_spark.Rmd index 22627438a..0c56e3e3a 100644 --- a/man/rmd/decision_tree_spark.Rmd +++ b/man/rmd/decision_tree_spark.Rmd @@ -14,18 +14,7 @@ param <- decision_tree() %>% set_engine("spark") %>% set_mode("regression") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/discrim_flexible_earth.Rmd b/man/rmd/discrim_flexible_earth.Rmd new file mode 100644 index 000000000..d801ac80e --- /dev/null +++ b/man/rmd/discrim_flexible_earth.Rmd @@ -0,0 +1,45 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("discrim_flexible", "earth")` + +## Tuning Parameters + +```{r earth-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("num_terms", "prod_degree", "prune_method"), + default = c("(see below)", "1L", "'backward'")) + +param <- + discrim_flexible() %>% + set_engine("earth") %>% +make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameter: + +```{r earth-param-list, echo = FALSE, results = "asis"} +param$item +``` + +The default value of `num_terms` depends on the number of columns (`p`): `min(200, max(20, 2 * p)) + 1`. Note that `num_terms = 1` is an intercept-only model. + +## Translation from parsnip to the original package + +```{r earth-cls} +discrim_flexible(num_terms = integer(0), prod_degree = integer(0), prune_method = character(0)) %>% + translate() +``` + +## Preprocessing requirements + +```{r child = "template-makes-dummies.Rmd"} +``` + + +## References + + - Hastie, Tibshirani & Buja (1994) Flexible Discriminant Analysis by Optimal + Scoring, _Journal of the American Statistical Association_, 89:428, 1255-1270 + + - Friedman (1991). Multivariate Adaptive Regression Splines. _The Annals of Statistics_, 19(1), 1-67. diff --git a/man/rmd/discrim_linear_MASS.Rmd b/man/rmd/discrim_linear_MASS.Rmd new file mode 100644 index 000000000..04b417107 --- /dev/null +++ b/man/rmd/discrim_linear_MASS.Rmd @@ -0,0 +1,28 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("discrim_linear", "MASS")` + +## Tuning Parameters + +This engine has no tuning parameters. + +## Translation from parsnip to the original package + +```{r mass-cls} +discrim_linear() %>% + set_engine("MASS") %>% + translate() +``` + +## Preprocessing requirements + +```{r child = "template-makes-dummies.Rmd"} +``` + +```{r child = "template-zv.Rmd"} +``` + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/discrim_linear_mda.Rmd b/man/rmd/discrim_linear_mda.Rmd new file mode 100644 index 000000000..24bfe3f6e --- /dev/null +++ b/man/rmd/discrim_linear_mda.Rmd @@ -0,0 +1,45 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("discrim_linear", "mda")` + +## Tuning Parameters + + +```{r mda-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("penalty"), + default = c("1.0")) + +param <- + discrim_linear() %>% + set_engine("mda") %>% +make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameter: + +```{r mda-param-list, echo = FALSE, results = "asis"} +param$item +``` + +## Translation from parsnip to the original package + +```{r mda-cls} +discrim_linear(penalty = numeric(0)) %>% + set_engine("mda") %>% + translate() +``` + +## Preprocessing requirements + +```{r child = "template-makes-dummies.Rmd"} +``` + +```{r child = "template-zv.Rmd"} +``` + +## References + + - Hastie, Tibshirani & Buja (1994) Flexible Discriminant Analysis by Optimal + Scoring, _Journal of the American Statistical Association_, 89:428, 1255-1270 diff --git a/man/rmd/discrim_linear_sparsediscrim.Rmd b/man/rmd/discrim_linear_sparsediscrim.Rmd new file mode 100644 index 000000000..d8a673bdc --- /dev/null +++ b/man/rmd/discrim_linear_sparsediscrim.Rmd @@ -0,0 +1,57 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("discrim_linear", "sparsediscrim")` + +## Tuning Parameters + +```{r sparsediscrim-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("regularization_method"), + default = c("'diagonal'")) + +param <- + discrim_linear() %>% + set_engine("sparsediscrim") %>% +make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameter: + +```{r mda-param-list, echo = FALSE, results = "asis"} +param$item +``` + +The possible values of this parameter, and the functions that they execute, are: + +* `"diagonal"`: [sparsediscrim::lda_diag()] +* `"min_distance"`: [sparsediscrim::lda_emp_bayes_eigen()] +* `"shrink_mean"`: [sparsediscrim::lda_shrink_mean()] +* `"shrink_cov"`: [sparsediscrim::lda_shrink_cov()] + +## Translation from parsnip to the original package + +```{r sparsediscrim-cls} +discrim_linear(regularization_method = character(0)) %>% + set_engine("sparsediscrim") %>% + translate() +``` + +## Preprocessing requirements + +```{r child = "template-makes-dummies.Rmd"} +``` + +```{r child = "template-zv.Rmd"} +``` + +## References + + + - `lda_diag()`: Dudoit, Fridlyand and Speed (2002) Comparison of Discrimination Methods for the Classification of Tumors Using Gene Expression Data, _Journal of the American Statistical Association_, 97:457, 77-87. + + - `lda_shrink_mean()`: Tong, Chen, Zhao, Improved mean estimation and its application to diagonal discriminant analysis, _Bioinformatics_, Volume 28, Issue 4, 15 February 2012, Pages 531-537. + + - `lda_shrink_cov()`: Pang, Tong and Zhao (2009), Shrinkage-based Diagonal Discriminant Analysis and Its Applications in High-Dimensional Data. _Biometrics_, 65, 1021-1029. + + - `lda_emp_bayes_eigen()`: Srivistava and Kubokawa (2007), Comparison of Discrimination Methods for High Dimensional Data, _Journal of the Japan Statistical Society_, 37:1, 123-134. diff --git a/man/rmd/discrim_quad_MASS.Rmd b/man/rmd/discrim_quad_MASS.Rmd new file mode 100644 index 000000000..801b4ae58 --- /dev/null +++ b/man/rmd/discrim_quad_MASS.Rmd @@ -0,0 +1,28 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("discrim_quad", "MASS")` + +## Tuning Parameters + +This engine has no tuning parameters. + +## Translation from parsnip to the original package + +```{r mass-cls} +discrim_quad() %>% + set_engine("MASS") %>% + translate() +``` + +## Preprocessing requirements + +```{r child = "template-makes-dummies.Rmd"} +``` + +```{r child = "template-zv-conditional.Rmd"} +``` + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/discrim_quad_sparsediscrim.Rmd b/man/rmd/discrim_quad_sparsediscrim.Rmd new file mode 100644 index 000000000..971ae0790 --- /dev/null +++ b/man/rmd/discrim_quad_sparsediscrim.Rmd @@ -0,0 +1,53 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("discrim_quad", "sparsediscrim")` + +## Tuning Parameters + +```{r sparsediscrim-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("regularization_method"), + default = c("'diagonal'")) + +param <- + discrim_quad() %>% + set_engine("sparsediscrim") %>% +make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameter: + +```{r mda-param-list, echo = FALSE, results = "asis"} +param$item +``` + +The possible values of this parameter, and the functions that they execute, are: + +* `"diagonal"`: [sparsediscrim::qda_diag()] +* `"shrink_mean"`: [sparsediscrim::qda_shrink_mean()] +* `"shrink_cov"`: [sparsediscrim::qda_shrink_cov()] + +## Translation from parsnip to the original package + +```{r sparsediscrim-cls} +discrim_quad(regularization_method = character(0)) %>% + set_engine("sparsediscrim") %>% + translate() +``` + +## Preprocessing requirements + +```{r child = "template-makes-dummies.Rmd"} +``` + +```{r child = "template-zv-conditional.Rmd"} +``` + +## References + + - `qda_diag()`: Dudoit, Fridlyand and Speed (2002) Comparison of Discrimination Methods for the Classification of Tumors Using Gene Expression Data, _Journal of the American Statistical Association_, 97:457, 77-87. + + - `qda_shrink_mean()`: Tong, Chen, Zhao, Improved mean estimation and its application to diagonal discriminant analysis, _Bioinformatics_, Volume 28, Issue 4, 15 February 2012, Pages 531-537. + + - `qda_shrink_cov()`: Pang, Tong and Zhao (2009), Shrinkage-based Diagonal Discriminant Analysis and Its Applications in High-Dimensional Data. _Biometrics_, 65, 1021-1029. diff --git a/man/rmd/discrim_regularized_klaR.Rmd b/man/rmd/discrim_regularized_klaR.Rmd new file mode 100644 index 000000000..7533b027d --- /dev/null +++ b/man/rmd/discrim_regularized_klaR.Rmd @@ -0,0 +1,54 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("discrim_regularized", "klaR")` + +## Tuning Parameters + + +```{r klaR-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("frac_identity", "frac_common_cov"), + default = c("(see below)", "(see below)")) + +param <- + discrim_regularized() %>% + set_engine("klaR") %>% +make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameter: + +```{r klaR-param-list, echo = FALSE, results = "asis"} +param$item +``` + +Some special cases for the RDA model: + +* `frac_identity = 0` and `frac_common_cov = 1` is a linear discriminant analysis (LDA) model. + +* `frac_identity = 0` and `frac_common_cov = 0` is a quadratic discriminant analysis (QDA) model. + + + +## Translation from parsnip to the original package + +```{r klaR-cls} +discrim_regularized(frac_identity = numeric(0), frac_common_cov = numeric(0)) %>% + set_engine("klaR") %>% + translate() +``` + +## Preprocessing requirements + +```{r child = "template-makes-dummies.Rmd"} +``` + +```{r child = "template-zv-conditional.Rmd"} +``` + +## References + + - Friedman, J (1989). Regularized Discriminant Analysis. _Journal of the American Statistical Association_, 84, 165-175. + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/example_mlm.Rmd b/man/rmd/example_mlm.Rmd new file mode 100644 index 000000000..a84188ba0 --- /dev/null +++ b/man/rmd/example_mlm.Rmd @@ -0,0 +1,41 @@ +```{r quiet-load, include = FALSE} +library(tidymodels) +library(multilevelmod) +library(poissonreg) # current required for poisson_reg() +``` + +```{r, message = FALSE, warning = FALSE} +library(tidymodels) +library(multilevelmod) +library(poissonreg) # current required for poisson_reg() + +# The lme4 package is required for this model. + +tidymodels_prefer() + +# Split out two subjects to show how prediction works +data_train <- + longitudinal_counts %>% + filter(!(subject %in% c("1", "2"))) + +data_new <- + longitudinal_counts %>% + filter(subject %in% c("1", "2")) + +# Fit the model +count_mod <- + poisson_reg() %>% + set_engine("glmer") %>% + fit(y ~ time + x + (1 | subject), data = data_train) +``` + +```{r} +count_mod +``` + +When making predictions, the basic `predict()` method does the trick: + +```{r} +count_mod %>% predict(data_new) +``` + diff --git a/man/rmd/gen_additive_mod_mgcv.Rmd b/man/rmd/gen_additive_mod_mgcv.Rmd index 5a300da7f..8baf43bf0 100644 --- a/man/rmd/gen_additive_mod_mgcv.Rmd +++ b/man/rmd/gen_additive_mod_mgcv.Rmd @@ -14,18 +14,7 @@ defaults <- param <- gen_additive_mod() %>% set_engine("mgcv") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/linear_reg_gee.Rmd b/man/rmd/linear_reg_gee.Rmd new file mode 100644 index 000000000..6e730a901 --- /dev/null +++ b/man/rmd/linear_reg_gee.Rmd @@ -0,0 +1,73 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("linear_reg", "gee")` + +## Tuning Parameters + +This model has no formal tuning parameters. It might be beneficial to determine the appropriate correlation structure to use. However, this typically does not affect the predicted value of the model but does have an effect on the inferential results and parameter covariance values. + +## Translation from parsnip to the original package + +```{r gee-csl} +linear_reg() %>% + set_engine("gee") %>% + set_mode("regression") %>% + translate() +``` + +`multilevelmod::gee_fit()` is a wrapper model around `gee()`. + + +## Preprocessing requirements + +There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. + +## Other details + +The model cannot accept case weights. + +Both `gee:gee(a)` and `gee:geepack()` specifies the id/cluster variable using an argument `id` that requires a vector. parsnip doesn't work that way so we enable this model to be fit using a artificial function called `id_var()` to be used in the formula. So, in the original package, the call would look like: + +```r +gee(breaks ~ tension, id = wool, data = warpbreaks, corstr = "exchangeable") +``` + +With `parsnip`, we suggest using the formula method when fitting: + +```r +library(tidymodels) + +linear_reg() %>% + set_engine("gee", corstr = "exchangeable") %>% + fit(breaks ~ tension + id_var(wool), data = warpbreaks) +``` + +When using the general tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the GEE formula when adding the model: + +```r +library(tidymodels) + +gee_spec <- + linear_reg() %>% + set_engine("gee", corstr = "exchangeable") + +gee_wflow <- + workflow() %>% + # The data are included as-is using: + add_variables(outcomes = breaks, predictors = c(tension, wool)) %>% + add_model(gee_spec, formula = breaks ~ tension + id_var(wool)) + +fit(gee_wflow, data = warpbreaks) +``` + +`gee()` always prints out warnings and output even when `silent = TRUE`. When using the `gee` engine, it will never produce output, even if `silent = FALSE`. + +Also, because of issues with the `gee()` function, a supplementary call to `glm()` is needed to get the rank and QR decomposition objects so that `predict()` can be used. + +## References + + - Liang, K.Y. and Zeger, S.L. (1986) Longitudinal data analysis using generalized linear models. _Biometrika_, 73 13–22. + + - Zeger, S.L. and Liang, K.Y. (1986) Longitudinal data analysis for discrete and continuous outcomes. _Biometrics_, 42 121–130. + diff --git a/man/rmd/linear_reg_glmnet.Rmd b/man/rmd/linear_reg_glmnet.Rmd index f15d1eab5..40eb9f3a1 100644 --- a/man/rmd/linear_reg_glmnet.Rmd +++ b/man/rmd/linear_reg_glmnet.Rmd @@ -13,18 +13,7 @@ defaults <- param <- linear_reg() %>% set_engine("glmnet") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/linear_reg_keras.Rmd b/man/rmd/linear_reg_keras.Rmd index 4d7ab6742..fe6acbe1b 100644 --- a/man/rmd/linear_reg_keras.Rmd +++ b/man/rmd/linear_reg_keras.Rmd @@ -13,18 +13,7 @@ defaults <- param <- linear_reg() %>% set_engine("keras") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - dplyr::mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has one tuning parameter: diff --git a/man/rmd/linear_reg_lmer.Rmd b/man/rmd/linear_reg_lmer.Rmd new file mode 100644 index 000000000..60171a300 --- /dev/null +++ b/man/rmd/linear_reg_lmer.Rmd @@ -0,0 +1,70 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("linear_reg", "lmer")` + +## Tuning Parameters + +This model has no tuning parameters. + +## Translation from parsnip to the original package + +```{r lmer-csl} +linear_reg() %>% + set_engine("lmer") %>% + set_mode("regression") %>% + translate() +``` + +```{r child = "no-pooling.Rmd"} +``` + +## Preprocessing requirements + +There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. + +## Other details + +The model can accept case weights. + +With `parsnip`, we suggest using the formula method when fitting: + +```r +library(tidymodels) +data("riesby") + +linear_reg() %>% + set_engine("lmer") %>% + fit(depr_score ~ week + (1|subject), data = riesby) +``` + +When using the general tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: + +```r +library(tidymodels) + +lmer_spec <- + linear_reg() %>% + set_engine("lmer") + +lmer_wflow <- + workflow() %>% + # The data are included as-is using: + add_variables(outcomes = depr_score, predictors = c(week, subject)) %>% + add_model(lmer_spec, formula = depr_score ~ week + (1|subject)) + +fit(lmer_wflow, data = riesby) +``` + +## References + + - J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY + + - West, K, Band Welch, and A Galecki. 2014. _Linear Mixed Models: A Practical Guide Using Statistical Software_. CRC Press. + + - Thorson, J, Minto, C. 2015, Mixed effects: a unifying framework for statistical modelling in fisheries biology. _ICES Journal of Marine Science_, Volume 72, Issue 5, Pages 1245–1256. + + - Harrison, XA, Donaldson, L, Correa-Cano, ME, Evans, J, Fisher, DN, Goodwin, CED, Robinson, BS, Hodgson, DJ, Inger, R. 2018. _A brief introduction to mixed effects modelling and multi-model inference in ecology_. PeerJ 6:e4794. + + - DeBruine LM, Barr DJ. Understanding Mixed-Effects Models Through Data Simulation. 2021. _Advances in Methods and Practices in Psychological Science_. + diff --git a/man/rmd/linear_reg_spark.Rmd b/man/rmd/linear_reg_spark.Rmd index 08c503683..fc97ae633 100644 --- a/man/rmd/linear_reg_spark.Rmd +++ b/man/rmd/linear_reg_spark.Rmd @@ -13,18 +13,7 @@ defaults <- param <- linear_reg() %>% set_engine("spark") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - dplyr::mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/linear_reg_stan_glmer.Rmd b/man/rmd/linear_reg_stan_glmer.Rmd new file mode 100644 index 000000000..e869f1d19 --- /dev/null +++ b/man/rmd/linear_reg_stan_glmer.Rmd @@ -0,0 +1,78 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("linear_reg", "stan_glmer")` + +## Tuning Parameters + +This model has no tuning parameters. + +## Important engine-specific options + +Some relevant arguments that can be passed to `set_engine()`: + + * `chains`: A positive integer specifying the number of Markov chains. The default is 4. + * `iter`: A positive integer specifying the number of iterations for each chain (including warmup). The default is 2000. + * `seed`: The seed for random number generation. + * `cores`: Number of cores to use when executing the chains in parallel. + * `prior`: The prior distribution for the (non-hierarchical) regression coefficients. + * `prior_intercept`: The prior distribution for the intercept (after centering all predictors). + +See `?rstanarm::stan_glmer` and `?rstan::sampling` for more information. + +## Translation from parsnip to the original package + +```{r stan_glmer-csl} +linear_reg() %>% + set_engine("stan_glmer") %>% + set_mode("regression") %>% + translate() +``` + +```{r child = "no-pooling.Rmd"} +``` + +## Preprocessing requirements + +There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. + +## Other details + +The model can accept case weights. + +With `parsnip`, we suggest using the formula method when fitting: + +```r +library(tidymodels) +data("riesby") + +linear_reg() %>% + set_engine("stan_glmer") %>% + fit(depr_score ~ week + (1|subject), data = riesby) +``` + +When using the general tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: + +```r +library(tidymodels) + +glmer_spec <- + linear_reg() %>% + set_engine("stan_glmer") + +glmer_wflow <- + workflow() %>% + # The data are included as-is using: + add_variables(outcomes = depr_score, predictors = c(week, subject)) %>% + add_model(glmer_spec, formula = depr_score ~ week + (1|subject)) + +fit(glmer_wflow, data = riesby) +``` + +For prediction, the `stan_glmer` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome and when `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. + +## References + + - McElreath, R. 2020 _Statistical Rethinking_. CRC Press. + + - Sorensen, T, Vasishth, S. 2016. Bayesian linear mixed models using Stan: A tutorial for psychologists, linguists, and cognitive scientists, arXiv:1506.06201. diff --git a/man/rmd/logistic_reg_LiblineaR.Rmd b/man/rmd/logistic_reg_LiblineaR.Rmd index 5332ee1d4..47fc53783 100644 --- a/man/rmd/logistic_reg_LiblineaR.Rmd +++ b/man/rmd/logistic_reg_LiblineaR.Rmd @@ -13,18 +13,7 @@ defaults <- param <- logistic_reg() %>% set_engine("LiblineaR") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/logistic_reg_gee.Rmd b/man/rmd/logistic_reg_gee.Rmd new file mode 100644 index 000000000..869bcc25f --- /dev/null +++ b/man/rmd/logistic_reg_gee.Rmd @@ -0,0 +1,73 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("logistic_reg", "gee")` + +## Tuning Parameters + +This model has no formal tuning parameters. It might be beneficial to determine the appropriate correlation structure to use. However, this typically does not affect the predicted value of the model but does have an effect on the inferential results and parameter covariance values. + +## Translation from parsnip to the original package + +```{r gee-csl} +logistic_reg() %>% + set_engine("gee") %>% + translate() +``` + +`multilevelmod::gee_fit()` is a wrapper model around `gee()`. + + +## Preprocessing requirements + +There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. + +## Other details + +The model cannot accept case weights. + +Both `gee:gee(a)` and `gee:geepack()` specifies the id/cluster variable using an argument `id` that requires a vector. parsnip doesn't work that way so we enable this model to be fit using a artificial function called `id_var()` to be used in the formula. So, in the original package, the call would look like: + +```r +gee(breaks ~ tension, id = wool, data = warpbreaks, corstr = "exchangeable") +``` + +With `parsnip`, we suggest using the formula method when fitting: + +```r +library(tidymodels) +data("toenail", package = "HSAUR3") + +logistic_reg() %>% + set_engine("gee", corstr = "exchangeable") %>% + fit(outcome ~ treatment * visit + id_var(patientID), data = toenail) +``` + +When using the general tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the GEE formula when adding the model: + +```r +library(tidymodels) + +gee_spec <- + logistic_reg() %>% + set_engine("gee", corstr = "exchangeable") + +gee_wflow <- + workflow() %>% + # The data are included as-is using: + add_variables(outcomes = outcome, predictors = c(treatment, visit, patientID)) %>% + add_model(gee_spec, formula = outcome ~ treatment * visit + id_var(patientID)) + +fit(gee_wflow, data = toenail) +``` + +`gee()` always prints out warnings and output even when `silent = TRUE`. When using the `gee` engine, it will never produce output, even if `silent = FALSE`. + +Also, because of issues with the `gee()` function, a supplementary call to `glm()` is needed to get the rank and QR decomposition objects so that `predict()` can be used. + +## References + + - Liang, K.Y. and Zeger, S.L. (1986) Longitudinal data analysis using generalized linear models. _Biometrika_, 73 13–22. + + - Zeger, S.L. and Liang, K.Y. (1986) Longitudinal data analysis for discrete and continuous outcomes. _Biometrics_, 42 121–130. + diff --git a/man/rmd/logistic_reg_glmer.Rmd b/man/rmd/logistic_reg_glmer.Rmd new file mode 100644 index 000000000..776482737 --- /dev/null +++ b/man/rmd/logistic_reg_glmer.Rmd @@ -0,0 +1,69 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("logistic_reg", "glmer")` + +## Tuning Parameters + +This model has no tuning parameters. + +## Translation from parsnip to the original package + +```{r lmer-csl} +logistic_reg() %>% + set_engine("glmer") %>% + translate() +``` + +```{r child = "no-pooling.Rmd"} +``` + +## Preprocessing requirements + +There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. + +## Other details + +The model can accept case weights. + +With `parsnip`, we suggest using the formula method when fitting: + +```r +library(tidymodels) +data("toenail", package = "HSAUR3") + +logistic_reg() %>% + set_engine("glmer") %>% + fit(outcome ~ treatment * visit + (1 | patientID), data = toenail) +``` + +When using the general tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: + +```r +library(tidymodels) + +glmer_spec <- + logistic_reg() %>% + set_engine("glmer") + +glmer_wflow <- + workflow() %>% + # The data are included as-is using: + add_variables(outcomes = outcome, predictors = c(treatment, visit, patientID)) %>% + add_model(glmer_spec, formula = outcome ~ treatment * visit + (1 | patientID)) + +fit(glmer_wflow, data = toenail) +``` + +## References + + - J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY + + - West, K, Band Welch, and A Galecki. 2014. _Linear Mixed Models: A Practical Guide Using Statistical Software_. CRC Press. + + - Thorson, J, Minto, C. 2015, Mixed effects: a unifying framework for statistical modelling in fisheries biology. _ICES Journal of Marine Science_, Volume 72, Issue 5, Pages 1245–1256. + + - Harrison, XA, Donaldson, L, Correa-Cano, ME, Evans, J, Fisher, DN, Goodwin, CED, Robinson, BS, Hodgson, DJ, Inger, R. 2018. _A brief introduction to mixed effects modelling and multi-model inference in ecology_. PeerJ 6:e4794. + + - DeBruine LM, Barr DJ. Understanding Mixed-Effects Models Through Data Simulation. 2021. _Advances in Methods and Practices in Psychological Science_. + diff --git a/man/rmd/logistic_reg_glmnet.Rmd b/man/rmd/logistic_reg_glmnet.Rmd index 2f27623ee..96c822c07 100644 --- a/man/rmd/logistic_reg_glmnet.Rmd +++ b/man/rmd/logistic_reg_glmnet.Rmd @@ -13,18 +13,7 @@ defaults <- param <- logistic_reg() %>% set_engine("glmnet") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/logistic_reg_keras.Rmd b/man/rmd/logistic_reg_keras.Rmd index 69172d1dd..1ec3c7d06 100644 --- a/man/rmd/logistic_reg_keras.Rmd +++ b/man/rmd/logistic_reg_keras.Rmd @@ -13,18 +13,7 @@ defaults <- param <- logistic_reg() %>% set_engine("keras") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - dplyr::mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has one tuning parameter: diff --git a/man/rmd/logistic_reg_spark.Rmd b/man/rmd/logistic_reg_spark.Rmd index 428b1645d..779d88fc8 100644 --- a/man/rmd/logistic_reg_spark.Rmd +++ b/man/rmd/logistic_reg_spark.Rmd @@ -13,18 +13,7 @@ defaults <- param <- logistic_reg() %>% set_engine("spark") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - dplyr::mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/logistic_reg_stan_glmer.Rmd b/man/rmd/logistic_reg_stan_glmer.Rmd new file mode 100644 index 000000000..b10b8a81f --- /dev/null +++ b/man/rmd/logistic_reg_stan_glmer.Rmd @@ -0,0 +1,77 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("logistic_reg", "stan_glmer")` + +## Tuning Parameters + +This model has no tuning parameters. + +## Important engine-specific options + +Some relevant arguments that can be passed to `set_engine()`: + + * `chains`: A positive integer specifying the number of Markov chains. The default is 4. + * `iter`: A positive integer specifying the number of iterations for each chain (including warmup). The default is 2000. + * `seed`: The seed for random number generation. + * `cores`: Number of cores to use when executing the chains in parallel. + * `prior`: The prior distribution for the (non-hierarchical) regression coefficients. + * `prior_intercept`: The prior distribution for the intercept (after centering all predictors). + +See `?rstanarm::stan_glmer` and `?rstan::sampling` for more information. + +## Translation from parsnip to the original package + +```{r stan_glmer-cls} +logistic_reg() %>% + set_engine("stan_glmer") %>% + translate() +``` + +```{r child = "no-pooling.Rmd"} +``` + +## Preprocessing requirements + +There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. + +## Other details + +The model can accept case weights. + +With `parsnip`, we suggest using the formula method when fitting: + +```r +library(tidymodels) +data("toenail", package = "HSAUR3") + +logistic_reg() %>% + set_engine("stan_glmer") %>% + fit(outcome ~ treatment * visit + (1 | patientID), data = toenail) +``` + +When using the general tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: + +```r +library(tidymodels) + +glmer_spec <- + logistic_reg() %>% + set_engine("stan_glmer") + +glmer_wflow <- + workflow() %>% + # The data are included as-is using: + add_variables(outcomes = outcome, predictors = c(treatment, visit, patientID)) %>% + add_model(glmer_spec, formula = outcome ~ treatment * visit + (1 | patientID)) + +fit(glmer_wflow, data = toenail) +``` + +For prediction, the `stan_glmer` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome and when `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. + +## References + + - McElreath, R. 2020 _Statistical Rethinking_. CRC Press. + + - Sorensen, T, Vasishth, S. 2016. Bayesian linear mixed models using Stan: A tutorial for psychologists, linguists, and cognitive scientists, arXiv:1506.06201. diff --git a/man/rmd/mars_earth.Rmd b/man/rmd/mars_earth.Rmd index e29e05d6d..17a6c8e03 100644 --- a/man/rmd/mars_earth.Rmd +++ b/man/rmd/mars_earth.Rmd @@ -13,18 +13,7 @@ defaults <- param <- mars() %>% set_engine("earth") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/mlp_keras.Rmd b/man/rmd/mlp_keras.Rmd index e237af08f..f8f480383 100644 --- a/man/rmd/mlp_keras.Rmd +++ b/man/rmd/mlp_keras.Rmd @@ -13,18 +13,7 @@ defaults <- param <- mlp() %>% set_engine("keras") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/mlp_nnet.Rmd b/man/rmd/mlp_nnet.Rmd index 3486f4921..5bcb2770e 100644 --- a/man/rmd/mlp_nnet.Rmd +++ b/man/rmd/mlp_nnet.Rmd @@ -13,18 +13,7 @@ defaults <- param <- mlp() %>% set_engine("nnet") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/multinom_reg_glmnet.Rmd b/man/rmd/multinom_reg_glmnet.Rmd index 998121d07..0a4f8227d 100644 --- a/man/rmd/multinom_reg_glmnet.Rmd +++ b/man/rmd/multinom_reg_glmnet.Rmd @@ -13,18 +13,7 @@ defaults <- param <- multinom_reg() %>% set_engine("glmnet") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/multinom_reg_keras.Rmd b/man/rmd/multinom_reg_keras.Rmd index 3475f4409..91cc8a5b3 100644 --- a/man/rmd/multinom_reg_keras.Rmd +++ b/man/rmd/multinom_reg_keras.Rmd @@ -13,18 +13,7 @@ defaults <- param <- multinom_reg() %>% set_engine("keras") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - dplyr::mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has one tuning parameter: diff --git a/man/rmd/multinom_reg_nnet.Rmd b/man/rmd/multinom_reg_nnet.Rmd index 9cd571b76..f04888b0d 100644 --- a/man/rmd/multinom_reg_nnet.Rmd +++ b/man/rmd/multinom_reg_nnet.Rmd @@ -13,18 +13,7 @@ defaults <- param <- multinom_reg() %>% set_engine("nnet") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - dplyr::mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/multinom_reg_spark.Rmd b/man/rmd/multinom_reg_spark.Rmd index d9fb28f03..69d75c63f 100644 --- a/man/rmd/multinom_reg_spark.Rmd +++ b/man/rmd/multinom_reg_spark.Rmd @@ -13,18 +13,7 @@ defaults <- param <- multinom_reg() %>% set_engine("spark") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - dplyr::mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/naive_Bayes_klaR.Rmd b/man/rmd/naive_Bayes_klaR.Rmd new file mode 100644 index 000000000..f498f9f72 --- /dev/null +++ b/man/rmd/naive_Bayes_klaR.Rmd @@ -0,0 +1,45 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("naive_Bayes", "klaR")` + +## Tuning Parameters + + +```{r klaR-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("smoothness", "Laplace"), + default = c("1.0", "0.0")) + +param <- + naive_Bayes() %>% + set_engine("klaR") %>% +make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameter: + +```{r klaR-param-list, echo = FALSE, results = "asis"} +param$item +``` + +Note that `usekernel` is always set to `TRUE` for the `klaR` engine. + +## Translation from parsnip to the original package + +```{r klaR-cls} +naive_Bayes(smoothness = numeric(0), Laplace = numeric(0)) %>% + set_engine("klaR") %>% + translate() +``` + +## Preprocessing requirements + +The columns for qualitative predictors should always be represented as factors (as opposed to dummy/indicator variables). When the predictors are factors, the underlying code treats them as multinomial data and appropriately computes their conditional distributions. + +```{r child = "template-zv.Rmd"} +``` + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/naive_Bayes_naivebayes.Rmd b/man/rmd/naive_Bayes_naivebayes.Rmd new file mode 100644 index 000000000..35ead379e --- /dev/null +++ b/man/rmd/naive_Bayes_naivebayes.Rmd @@ -0,0 +1,45 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("naive_Bayes", "naivebayes")` + +## Tuning Parameters + + +```{r naivebayes-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("smoothness", "Laplace"), + default = c("1.0", "0.0")) + +param <- + naive_Bayes() %>% + set_engine("naivebayes") %>% +make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameter: + +```{r naivebayes-param-list, echo = FALSE, results = "asis"} +param$item +``` + +## Translation from parsnip to the original package + +```{r naivebayes-cls} +naive_Bayes(smoothness = numeric(0), Laplace = numeric(0)) %>% + set_engine("naivebayes") %>% + translate() +``` + +## Preprocessing requirements + +The columns for qualitative predictors should always be represented as factors (as opposed to dummy/indicator variables). When the predictors are factors, the underlying code treats them as multinomial data and appropriately computes their conditional distributions. + +For count data, integers can be estimated using a Poisson distribution if the augment `usepoisson = TRUE` is passed as an engine argument. + +```{r child = "template-zv.Rmd"} +``` + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/nearest_neighbor_kknn.Rmd b/man/rmd/nearest_neighbor_kknn.Rmd index 838c92474..a9b7b4342 100644 --- a/man/rmd/nearest_neighbor_kknn.Rmd +++ b/man/rmd/nearest_neighbor_kknn.Rmd @@ -13,18 +13,7 @@ defaults <- param <- nearest_neighbor() %>% set_engine("kknn") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/no-pooling.Rmd b/man/rmd/no-pooling.Rmd new file mode 100644 index 000000000..52568f095 --- /dev/null +++ b/man/rmd/no-pooling.Rmd @@ -0,0 +1,20 @@ +## Predicting new samples + +This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor ($\eta$) for a random intercept: + +$$ +\eta_{i} = (\beta_0 + b_{0i}) + \beta_1x_{i1} +$$ + +where $i$ denotes the `i`th independent experimental unit (e.g. subject). When the model has seen subject `i`, it can use that subject's data to adjust the _population_ intercept to be more specific to that subjects results. + +What happens when data are being predicted for a subject that was not used in the model fit? In that case, this package uses _only_ the population parameter estimates for prediction: + +$$ +\hat{\eta}_{i'} = \hat{\beta}_0+ \hat{\beta}x_{i'1} +$$ + +Depending on what covariates are in the model, this might have the effect of making the same prediction for all new samples. The population parameters are the "best estimate" for a subject that was not included in the model fit. + +The tidymodels framework deliberately constrains predictions for new data to not use the training set or other data (to prevent information leakage). + diff --git a/man/rmd/pls_mixOmics.Rmd b/man/rmd/pls_mixOmics.Rmd new file mode 100644 index 000000000..f68320e59 --- /dev/null +++ b/man/rmd/pls_mixOmics.Rmd @@ -0,0 +1,68 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("pls", "mixOmics")` + +## Tuning Parameters + +```{r mixOmics-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("num_comp", "predictor_prop"), + default = c("2L", "see below")) + +param <- + pls() %>% + set_engine("mixOmics") %>% + set_mode("regression") %>% + make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameters: + +```{r mixOmics-param-list, echo = FALSE, results = "asis"} +param$item +``` + + +## Translation from parsnip to the underlying model call (regression) + +```{r mixOmics-reg} +pls(num_comp = integer(1), predictor_prop = double(1)) %>% + set_engine("mixOmics") %>% + set_mode("regression") %>% + translate() +``` + +[plsmod::pls_fit()] is a function that + + - Determines the number of predictors in the data. + - Adjusts `num_comp` if the value is larger than the number of factors. + - Determines whether sparsity is required based on the value of `predictor_prop`. + - Sets the `keepX` argument of [mixOmics::spls()] for sparse models. + +## Translation from parsnip to the underlying model call (classification) + +```{r mixOmics-cls} +pls(num_comp = integer(1), predictor_prop = double(1)) %>% + set_engine("mixOmics") %>% + set_mode("classification") %>% + translate() +``` + +In this case, [plsmod::pls_fit()] has the same role as above but eventually targets [mixOmics::plsda()] or [mixOmics::splsda()] . + +## Preprocessing requirements + +```{r child = "template-makes-dummies.Rmd"} +``` + +```{r child = "template-zv.Rmd"} +``` + +```{r child = "template-same-scale.Rmd"} +``` + +## References + + - Rohart F and Gautier B and Singh A and Le Cao K-A (2017). "mixOmics: An R package for 'omics feature selection and multiple data integration." PLoS computational biology, 13(11), e1005752. + diff --git a/man/rmd/poission-reg-engine.Rmd b/man/rmd/poission-reg-engine.Rmd new file mode 100644 index 000000000..75a6f81c0 --- /dev/null +++ b/man/rmd/poission-reg-engine.Rmd @@ -0,0 +1,50 @@ +# Engine Details + +```{r startup, include = FALSE} +library(poissonreg) +``` + +Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are: + +\pkg{glm} + +```{r glm} +poisson_reg() %>% + set_engine("glm") %>% + translate() +``` + +\pkg{zeroinfl} + +```{r zeroinfl} +poisson_reg() %>% + set_engine("zeroinfl") %>% + translate() +``` + +\pkg{hurdle} + +```{r hurdle} +poisson_reg() %>% + set_engine("hurdle") %>% + translate() +``` + + +\pkg{glmnet} + +```{r glmnet} +poisson_reg() %>% + set_engine("glmnet") %>% + translate() +``` + + +\pkg{stan} + +```{r stan} +poisson_reg() %>% + set_engine("stan") %>% + translate() +``` + diff --git a/man/rmd/poisson_reg_gee.Rmd b/man/rmd/poisson_reg_gee.Rmd new file mode 100644 index 000000000..e9d782b7b --- /dev/null +++ b/man/rmd/poisson_reg_gee.Rmd @@ -0,0 +1,72 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("poisson_reg", "gee")` + +## Tuning Parameters + +This model has no formal tuning parameters. It might be beneficial to determine the appropriate correlation structure to use. However, this typically does not affect the predicted value of the model but does have an effect on the inferential results and parameter covariance values. + +## Translation from parsnip to the original package + +```{r gee-csl} +poisson_reg(engine = "gee") %>% + set_engine("gee") %>% + translate() +``` + +`multilevelmod::gee_fit()` is a wrapper model around `gee()`. + + +## Preprocessing requirements + +There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. + +## Other details + +The model cannot accept case weights. + +Both `gee:gee(a)` and `gee:geepack()` specifies the id/cluster variable using an argument `id` that requires a vector. parsnip doesn't work that way so we enable this model to be fit using a artificial function called `id_var()` to be used in the formula. So, in the original package, the call would look like: + +```r +gee(breaks ~ tension, id = wool, data = warpbreaks, corstr = "exchangeable") +``` + +With `parsnip`, we suggest using the formula method when fitting: + +```r +library(tidymodels) + +poisson_reg() %>% + set_engine("gee", corstr = "exchangeable") %>% + fit(y ~ time + x + id_var(subject), data = longitudinal_counts) +``` + +When using the general tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the GEE formula when adding the model: + +```r +library(tidymodels) + +gee_spec <- + poisson_reg() %>% + set_engine("gee", corstr = "exchangeable") + +gee_wflow <- + workflow() %>% + # The data are included as-is using: + add_variables(outcomes = y, predictors = c(time, x, subject)) %>% + add_model(gee_spec, formula = y ~ time + x + id_var(subject)) + +fit(gee_wflow, data = longitudinal_counts) +``` + +`gee()` always prints out warnings and output even when `silent = TRUE`. When using the `gee` engine, it will never produce output, even if `silent = FALSE`. + +Also, because of issues with the `gee()` function, a supplementary call to `glm()` is needed to get the rank and QR decomposition objects so that `predict()` can be used. + +## References + + - Liang, K.Y. and Zeger, S.L. (1986) Longitudinal data analysis using generalized linear models. _Biometrika_, 73 13–22. + + - Zeger, S.L. and Liang, K.Y. (1986) Longitudinal data analysis for discrete and continuous outcomes. _Biometrics_, 42 121–130. + diff --git a/man/rmd/poisson_reg_glm.Rmd b/man/rmd/poisson_reg_glm.Rmd new file mode 100644 index 000000000..6a0d0ffcd --- /dev/null +++ b/man/rmd/poisson_reg_glm.Rmd @@ -0,0 +1,23 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("poisson_reg", "glm")` + +## Tuning Parameters + +This engine has no tuning parameters. + +## Translation from parsnip to the underlying model call (regression) + +```{r glm-reg} +poisson_reg() %>% + set_engine("glm") %>% + translate() +``` + +## Preprocessing requirements + +```{r child = "template-makes-dummies.Rmd"} +``` + + diff --git a/man/rmd/poisson_reg_glmer.Rmd b/man/rmd/poisson_reg_glmer.Rmd new file mode 100644 index 000000000..52f1945a2 --- /dev/null +++ b/man/rmd/poisson_reg_glmer.Rmd @@ -0,0 +1,68 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("poisson_reg", "glmer")` + +## Tuning Parameters + +This model has no tuning parameters. + +## Translation from parsnip to the original package + +```{r lmer-csl} +poisson_reg(engine = "glmer") %>% + set_engine("glmer") %>% + translate() +``` + +```{r child = "no-pooling.Rmd"} +``` + +## Preprocessing requirements + +There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. + +## Other details + +The model can accept case weights. + +With `parsnip`, we suggest using the formula method when fitting: + +```r +library(tidymodels) + +poisson_reg() %>% + set_engine("glmer") %>% + fit(y ~ time + x + (1 | subject), data = longitudinal_counts) +``` + +When using the general tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: + +```r +library(tidymodels) + +glmer_spec <- + poisson_reg() %>% + set_engine("glmer") + +glmer_wflow <- + workflow() %>% + # The data are included as-is using: + add_variables(outcomes = y, predictors = c(time, x, subject)) %>% + add_model(glmer_spec, formula = y ~ time + x + (1 | subject)) + +fit(glmer_wflow, data = longitudinal_counts) +``` + +## References + + - J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY + + - West, K, Band Welch, and A Galecki. 2014. _Linear Mixed Models: A Practical Guide Using Statistical Software_. CRC Press. + + - Thorson, J, Minto, C. 2015, Mixed effects: a unifying framework for statistical modelling in fisheries biology. _ICES Journal of Marine Science_, Volume 72, Issue 5, Pages 1245–1256. + + - Harrison, XA, Donaldson, L, Correa-Cano, ME, Evans, J, Fisher, DN, Goodwin, CED, Robinson, BS, Hodgson, DJ, Inger, R. 2018. _A brief introduction to mixed effects modelling and multi-model inference in ecology_. PeerJ 6:e4794. + + - DeBruine LM, Barr DJ. Understanding Mixed-Effects Models Through Data Simulation. 2021. _Advances in Methods and Practices in Psychological Science_. + diff --git a/man/rmd/poisson_reg_glmnet.Rmd b/man/rmd/poisson_reg_glmnet.Rmd new file mode 100644 index 000000000..0f54a2526 --- /dev/null +++ b/man/rmd/poisson_reg_glmnet.Rmd @@ -0,0 +1,46 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("poisson_reg", "glmnet")` + +## Tuning Parameters + +```{r glmnet-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("penalty", "mixture"), + default = c("see below", "1.0")) + +param <- +poisson_reg() %>% + set_engine("glmnet") %>% + make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameters: + +```{r glmnet-param-list, echo = FALSE, results = "asis"} +param$item +``` + +A value of `mixture = 1` corresponds to a pure lasso model, while `mixture = 0` indicates ridge regression. + +The `penalty` parameter has no default and requires a single numeric value. For more details about this, and the `glmnet` model in general, see [glmnet-details]. + +## Translation from parsnip to the original package + +```{r glmnet-csl} +poisson_reg(penalty = double(1), mixture = double(1)) %>% + set_engine("glmnet") %>% + translate() +``` + +## Preprocessing requirements + +```{r child = "template-makes-dummies.Rmd"} +``` + +```{r child = "template-same-scale.Rmd"} +``` + +By default, `glmnet::glmnet()` uses the argument `standardize = TRUE` to center and scale the data. + diff --git a/man/rmd/poisson_reg_hurdle.Rmd b/man/rmd/poisson_reg_hurdle.Rmd new file mode 100644 index 000000000..d3dfeabf5 --- /dev/null +++ b/man/rmd/poisson_reg_hurdle.Rmd @@ -0,0 +1,55 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("poisson_reg", "hurdle")` + +## Tuning Parameters + +This engine has no tuning parameters. + +## Translation from parsnip to the underlying model call (regression) + +```{r hurdle-reg} +poisson_reg() %>% + set_engine("hurdle") %>% + translate() +``` + +## Preprocessing and special formulas for zero-inflated Poisson models + +```{r child = "template-makes-dummies.Rmd"} +``` + +For this particular model, a special formula is used to specify which columns affect the counts and which affect the model for the probability of zero counts. These sets of terms are separated by a bar. For example, `y ~ x | z`. This type of formula is not used by the base R infrastructure (e.g. `model.matrix()`) + +When fitting a parsnip model with this engine directly, the formula method is required and the formula is just passed through. For example: + +```{r, include = FALSE} +library(tidymodels) +``` + +```{r, message = FALSE} +library(tidymodels) +tidymodels_prefer() + +data("bioChemists", package = "pscl") +poisson_reg() %>% + set_engine("hurdle") %>% + fit(art ~ fem + mar | ment, data = bioChemists) +``` + +However, when using a workflow, the best approach is to avoid using [workflows::add_formula()] and use [workflows::add_variables()] in conjunction with a model formula: + +```{r} +data("bioChemists", package = "pscl") +spec <- + poisson_reg() %>% + set_engine("hurdle") + +workflow() %>% + add_variables(outcomes = c(art), predictors = c(fem, mar, ment)) %>% + add_model(spec, formula = art ~ fem + mar | ment) %>% + fit(data = bioChemists) +``` + +The reason for this is that [workflows::add_formula()] will try to create the model matrix and either fail or create dummy variables prematurely. diff --git a/man/rmd/poisson_reg_stan.Rmd b/man/rmd/poisson_reg_stan.Rmd new file mode 100644 index 000000000..d96387b77 --- /dev/null +++ b/man/rmd/poisson_reg_stan.Rmd @@ -0,0 +1,48 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("poisson_reg", "stan")` + +## Tuning Parameters + +This engine has no tuning parameters. + +## Important engine-specific options + +Some relevant arguments that can be passed to `set_engine()`: + + * `chains`: A positive integer specifying the number of Markov chains. The default is 4. + * `iter`: A positive integer specifying the number of iterations for each chain (including warmup). The default is 2000. + * `seed`: The seed for random number generation. + * `cores`: Number of cores to use when executing the chains in parallel. + * `prior`: The prior distribution for the (non-hierarchical) regression coefficients. The `"stan"` engine does not fit any hierarchical terms. + * `prior_intercept`: The prior distribution for the intercept (after centering all predictors). + +See [rstan::sampling()] and [rstanarm::priors()] for more information on these and other options. + +## Translation from parsnip to the original package + +```{r stan-csl} +poisson_reg() %>% + set_engine("stan") %>% + translate() +``` + +Note that the `refresh` default prevents logging of the estimation process. Change this value in `set_engine()` to show the MCMC logs. + +## Preprocessing requirements + +```{r child = "template-makes-dummies.Rmd"} +``` + +## Other details + +For prediction, the `"stan"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome and when `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-stan) for `poisson_reg()` with the `"stan"` engine. + +## References + + - McElreath, R. 2020 _Statistical Rethinking_. CRC Press. diff --git a/man/rmd/poisson_reg_stan_glmer.Rmd b/man/rmd/poisson_reg_stan_glmer.Rmd new file mode 100644 index 000000000..c3738c990 --- /dev/null +++ b/man/rmd/poisson_reg_stan_glmer.Rmd @@ -0,0 +1,76 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("poisson_reg", "stan_glmer")` + +## Tuning Parameters + +This model has no tuning parameters. + +## Important engine-specific options + +Some relevant arguments that can be passed to `set_engine()`: + + * `chains`: A positive integer specifying the number of Markov chains. The default is 4. + * `iter`: A positive integer specifying the number of iterations for each chain (including warmup). The default is 2000. + * `seed`: The seed for random number generation. + * `cores`: Number of cores to use when executing the chains in parallel. + * `prior`: The prior distribution for the (non-hierarchical) regression coefficients. + * `prior_intercept`: The prior distribution for the intercept (after centering all predictors). + +See `?rstanarm::stan_glmer` and `?rstan::sampling` for more information. + +## Translation from parsnip to the original package + +```{r stan_glmer-cls} +poisson_reg(engine = "stan_glmer") %>% + set_engine("stan_glmer") %>% + translate() +``` + +```{r child = "no-pooling.Rmd"} +``` + +## Preprocessing requirements + +There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. + +## Other details + +The model can accept case weights. + +With `parsnip`, we suggest using the formula method when fitting: + +```r +library(tidymodels) + +poisson_reg() %>% + set_engine("stan_glmer") %>% + fit(y ~ time + x + (1 | subject), data = longitudinal_counts) +``` + +When using the general tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: + +```r +library(tidymodels) + +glmer_spec <- + poisson_reg() %>% + set_engine("stan_glmer") + +glmer_wflow <- + workflow() %>% + # The data are included as-is using: + add_variables(outcomes = y, predictors = c(time, x, subject)) %>% + add_model(glmer_spec, formula = y ~ time + x + (1 | subject)) + +fit(glmer_wflow, data = longitudinal_counts) +``` + +For prediction, the `stan_glmer` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome and when `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. + +## References + + - McElreath, R. 2020 _Statistical Rethinking_. CRC Press. + + - Sorensen, T, Vasishth, S. 2016. Bayesian linear mixed models using Stan: A tutorial for psychologists, linguists, and cognitive scientists, arXiv:1506.06201. diff --git a/man/rmd/poisson_reg_zeroinfl.Rmd b/man/rmd/poisson_reg_zeroinfl.Rmd new file mode 100644 index 000000000..eb6641aa1 --- /dev/null +++ b/man/rmd/poisson_reg_zeroinfl.Rmd @@ -0,0 +1,55 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("poisson_reg", "zeroinfl")` + +## Tuning Parameters + +This engine has no tuning parameters. + +## Translation from parsnip to the underlying model call (regression) + +```{r zeroinfl-reg} +poisson_reg() %>% + set_engine("zeroinfl") %>% + translate() +``` + +## Preprocessing and special formulas for zero-inflated Poisson models + +```{r child = "template-makes-dummies.Rmd"} +``` + +For this particular model, a special formula is used to specify which columns affect the counts and which affect the model for the probability of zero counts. These sets of terms are separated by a bar. For example, `y ~ x | z`. This type of formula is not used by the base R infrastructure (e.g. `model.matrix()`) + +When fitting a parsnip model with this engine directly, the formula method is required and the formula is just passed through. For example: + +```{r, include = FALSE} +library(tidymodels) +``` + +```{r, message = FALSE} +library(tidymodels) +tidymodels_prefer() + +data("bioChemists", package = "pscl") +poisson_reg() %>% + set_engine("zeroinfl") %>% + fit(art ~ fem + mar | ment, data = bioChemists) +``` + +However, when using a workflow, the best approach is to avoid using [workflows::add_formula()] and use [workflows::add_variables()] in conjunction with a model formula: + +```{r} +data("bioChemists", package = "pscl") +spec <- + poisson_reg() %>% + set_engine("zeroinfl") + +workflow() %>% + add_variables(outcomes = c(art), predictors = c(fem, mar, ment)) %>% + add_model(spec, formula = art ~ fem + mar | ment) %>% + fit(data = bioChemists) +``` + +The reason for this is that [workflows::add_formula()] will try to create the model matrix and either fail or create dummy variables prematurely. diff --git a/man/rmd/proportional_hazards_glmnet.Rmd b/man/rmd/proportional_hazards_glmnet.Rmd new file mode 100644 index 000000000..9ea68fe17 --- /dev/null +++ b/man/rmd/proportional_hazards_glmnet.Rmd @@ -0,0 +1,102 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("proportional_hazards", "glmnet")` + +## Tuning Parameters + +```{r glmnet-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("penalty", "mixture"), + default = c("see below", "1.0")) + +param <- +proportional_hazards() %>% + set_engine("glmnet") %>% + make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameters: + +```{r glmnet-param-list, echo = FALSE, results = "asis"} +param$item +``` + +A value of `mixture = 1` corresponds to a pure lasso model, while `mixture = 0` indicates ridge regression. + +The `penalty` parameter has no default and requires a single numeric value. For more details about this, and the `glmnet` model in general, see [parsnip::glmnet-details]. + +## Translation from parsnip to the original package + +```{r glmnet-creg} +proportional_hazards(penalty = double(1), mixture = double(1)) %>% + set_engine("glmnet") %>% + translate() +``` + +## Preprocessing requirements + +```{r child = "template-makes-dummies.Rmd"} +``` + +```{r child = "template-same-scale.Rmd"} +``` +By default, [glmnet::glmnet()] uses the argument `standardize = TRUE` to center and scale the data. + + +## Other details + +The model does not fit an intercept. + +[glmnet::glmnet()] does not use the formula interface but, for consistency, this package requires a model formula. + +The model formula can include _special_ terms, such as [survival::strata()]. The allows the baseline hazard to differ between groups contained in the function. The column used inside `strata()` is treated as qualitative no matter its type. This is different than the syntax offered by the [glmnet::glmnet()] package (i.e., [glmnet::stratifySurv()]) which is mot recommended here. + +For example, in this model, the numeric column `rx` is used to estimate two different baseline hazards for each value of the column: + +```{r, include = FALSE} +library(survival) +library(censored) +library(dplyr) +library(tidyr) +``` + +```{r, warning = FALSE} +library(survival) +library(censored) +library(dplyr) +library(tidyr) + +mod <- + proportional_hazards(penalty = 0.01) %>% + set_engine("glmnet", nlambda = 5) %>% + fit(Surv(futime, fustat) ~ age + ecog.ps + strata(rx), data = ovarian) + +pred_data <- data.frame(age = c(50, 50), ecog.ps = c(1, 1), rx = c(1, 2)) + +# Different survival probabilities for different values of 'rx' +predict(mod, pred_data, type = "survival", time = 500) %>% + bind_cols(pred_data) %>% + unnest(.pred) +``` + +Note that columns used in the `strata()` function _will_ also be estimated in the regular portion of the model (i.e., within the linear predictor). + +# Linear predictor values + +Since risk regression and parametric survival models are modeling different characteristics (e.g. relative hazard versus event time), their linear predictors will be going in opposite directions. + +For example, for parametric models, the linear predictor _increases with time_. For proportional hazards models the linear predictor _decreases with time_ (since hazard is increasing). As such, the linear predictors for these two quantities will have opposite signs. + +tidymodels does not treat different models differently when computing performance metrics. To standardize across model types, the default for proportional hazards models is to have _increasing values with time_. As a result, the sign of the linear predictor will be the opposite of the value produced by the `predict()` method in the \pkg{survival} package. + +This behavior can be changed by using the `increasing` argument when calling `predict()` on a \pkg{parsnip} model object. + +# References + + - Simon N, Friedman J, Hastie T, Tibshirani R. 2011. "Regularization Paths for Cox’s Proportional Hazards Model via Coordinate Descent." _Journal of Statistical Software_, Articles 39 (5): 1–13. \doi{10.18637/jss.v039.i05}. + + - Hastie T, Tibshirani R, Wainwright M. 2015. _Statistical Learning with Sparsity_. CRC Press. + + - Kuhn M, Johnson K. 2013. _Applied Predictive Modeling_. Springer. + diff --git a/man/rmd/proportional_hazards_survival.Rmd b/man/rmd/proportional_hazards_survival.Rmd new file mode 100644 index 000000000..3e77d09ad --- /dev/null +++ b/man/rmd/proportional_hazards_survival.Rmd @@ -0,0 +1,54 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("proportional_hazards", "survival")` + +## Tuning Parameters + +This model has no tuning parameters. + +## Translation from parsnip to the original package + +```{r survival-creg} +proportional_hazards() %>% + set_engine("survival") %>% + set_mode("censored regression") %>% + translate() +``` + +## Other details + +The model does not fit an intercept. + +The main interface for this model uses the formula method since the model specification typically involved the use of [survival::Surv()]. + +The model formula can include _special_ terms, such as [survival::strata()]. The allows the baseline hazard to differ between groups contained in the function. The column used inside `strata()` is treated as qualitative no matter its type. + +For example, in this model, the numeric column `rx` is used to estimate two different baseline hazards for each value of the column: + +```{r} +library(survival) + +proportional_hazards() %>% + fit(Surv(futime, fustat) ~ age + strata(rx), data = ovarian) %>% + extract_fit_engine() %>% + # Two different hazards for each value of 'rx' + basehaz() +``` + +Note that columns used in the `strata()` function will not be estimated in the regular portion of the model (i.e., within the linear predictor). + + +# Linear predictor values + +Since risk regression and parametric survival models are modeling different characteristics (e.g. relative hazard versus event time), their linear predictors will be going in opposite directions. + +For example, for parametric models, the linear predictor _increases with time_. For proportional hazards models the linear predictor _decreases with time_ (since hazard is increasing). As such, the linear predictors for these two quantities will have opposite signs. + +tidymodels does not treat different models differently when computing performance metrics. To standardize across model types, the default for proportional hazards models is to have _increasing values with time_. As a result, the sign of the linear predictor will be the opposite of the value produced by the `predict()` method in the \pkg{survival} package. + +This behavior can be changed by using the `increasing` argument when calling `predict()` on a \pkg{parsnip} model object. + +## References + +- Andersen P, Gill R. 1982. Cox's regression model for counting processes, a large sample study. _Annals of Statistics_ 10, 1100-1120. diff --git a/man/rmd/rand_forest_party.Rmd b/man/rmd/rand_forest_party.Rmd new file mode 100644 index 000000000..61b0aa2f2 --- /dev/null +++ b/man/rmd/rand_forest_party.Rmd @@ -0,0 +1,51 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("rand_forest", "party")` + +## Tuning Parameters + +```{r party-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("trees", "min_n", "mtry"), + default = c("500L", "20L", "5L")) + +param <- + rand_forest() %>% + set_engine("party") %>% + set_mode("censored regression") %>% + make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameters: + +```{r party-param-list, echo = FALSE, results = "asis"} +param$item +``` + +## Translation from parsnip to the original package (censored regression) + +```{r party-creg} +rand_forest() %>% + set_engine("party") %>% + set_mode("censored regression") %>% + translate() +``` + +[cond_inference_surv_cforest()] is a wrapper around [party::cforest()] (and other functions) that makes it easier to run this model. + +## Preprocessing requirements + +```{r child = "template-tree-split-factors.Rmd"} +``` + +## Other details + +The main interface for this model uses the formula method since the model specification typically involved the use of [survival::Surv()]. + + +## References + + - Hothorn T, Buhlmann P, Dudoit S, Molinaro A, Van der Laan MJ. 2006. Survival Ensembles. _Biostatistics_, 7(3), 355–373. + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/rand_forest_randomForest.Rmd b/man/rmd/rand_forest_randomForest.Rmd index 676395004..9e085c9f7 100644 --- a/man/rmd/rand_forest_randomForest.Rmd +++ b/man/rmd/rand_forest_randomForest.Rmd @@ -13,18 +13,7 @@ defaults <- param <- rand_forest() %>% set_engine("randomForest") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/rand_forest_ranger.Rmd b/man/rmd/rand_forest_ranger.Rmd index bea7a4835..5aa69b6b7 100644 --- a/man/rmd/rand_forest_ranger.Rmd +++ b/man/rmd/rand_forest_ranger.Rmd @@ -13,18 +13,7 @@ defaults <- param <- rand_forest() %>% set_engine("ranger") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/rand_forest_spark.Rmd b/man/rmd/rand_forest_spark.Rmd index e10f26ae2..88ed7ca01 100644 --- a/man/rmd/rand_forest_spark.Rmd +++ b/man/rmd/rand_forest_spark.Rmd @@ -13,18 +13,7 @@ defaults <- param <- rand_forest() %>% set_engine("spark") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/rule_fit_xrf.Rmd b/man/rmd/rule_fit_xrf.Rmd new file mode 100644 index 000000000..130eb8c3c --- /dev/null +++ b/man/rmd/rule_fit_xrf.Rmd @@ -0,0 +1,83 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("rule_fit", "xrf")` + +## Tuning Parameters + +```{r xrf-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("tree_depth", "trees", "learn_rate", "mtry", "min_n", "loss_reduction", "sample_size", "penalty"), + default = c("6L", "15L", "0.3", "1.0", "1L", "0.0", "1.0", "0.1")) + +param <- + rule_fit() %>% + set_engine("xrf") %>% + make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameters: + +```{r xrf-param-list, echo = FALSE, results = "asis"} +param$item +``` + + +## Translation from parsnip to the underlying model call (regression) + +```{r xrf-reg} +rule_fit( + mtry = numeric(1), + trees = integer(1), + min_n = integer(1), + tree_depth = integer(1), + learn_rate = numeric(1), + loss_reduction = numeric(1), + sample_size = numeric(1), + penalty = numeric(1) +) %>% + set_engine("xrf") %>% + set_mode("regression") %>% + translate() +``` + +## Translation from parsnip to the underlying model call (classification) + +```{r xrf-cls} +rule_fit( + mtry = numeric(1), + trees = integer(1), + min_n = integer(1), + tree_depth = integer(1), + learn_rate = numeric(1), + loss_reduction = numeric(1), + sample_size = numeric(1), + penalty = numeric(1) +) %>% + set_engine("xrf") %>% + set_mode("classification") %>% + translate() +``` + +## Differences from the xrf package + +Note that, per the documentation in `?xrf`, transformations of the response variable are not supported. To +use these with `rule_fit()`, we recommend using a recipe instead of the formula method. + +Also, there are several configuration differences in how `xrf()` is fit between that package and the wrapper used in `rules`. Some differences in default values are: + +- `trees`: `xrf: 100, `rules`: 15 +- `max_depth`: `xrf`: 3, `rules`: 6 + + +These differences will create a difference in the values of the `penalty` argument that `glmnet` uses. Also, \pkg{rules} can also set `penalty` whereas \pkg{xrf} uses an internal 5-fold cross-validation to determine it (by default). + +## Preprocessing requirements + +```{r child = "template-makes-dummies.Rmd"} +``` + +## References + + - Friedman and Popescu. "Predictive learning via rule ensembles." Ann. Appl. Stat. 2 (3) 916- 954, September 2008 + diff --git a/man/rmd/setup.Rmd b/man/rmd/setup.Rmd index 230a46b32..72c555507 100644 --- a/man/rmd/setup.Rmd +++ b/man/rmd/setup.Rmd @@ -1,7 +1,35 @@ ```{r, include = FALSE} -library(dials) -library(parsnip) -library(tune) +# ------------------------------------------------------------------------------ +# These are required to build md docs for parsnip and extensions + +check_pkg_for_docs <- function(x){ + purrr::map(x, ~ rlang::check_installed(.x)) + purrr::map(x, ~ require(.x, character.only = TRUE)) +} + +rmd_pkgs <- c("tune", "glue", "dplyr", "parsnip", "dials", "glmnet", "Cubist", "xrf") + +check_pkg_for_docs(rmd_pkgs) +check_pkg_for_docs(parsnip:::extensions()) + +# ------------------------------------------------------------------------------ +# Code to get information about main arguments and format the results to print + +make_parameter_list <- function(x, defaults) { + x %>% + tune::tunable() %>% + dplyr::select(-source, -component, -component_id, parsnip = name) %>% + dplyr::mutate( + dials = purrr::map(call_info, get_dials), + label = purrr::map_chr(dials, ~ .x$label), + type = purrr::map_chr(dials, ~ .x$type) + ) %>% + dplyr::inner_join(defaults, by = "parsnip") %>% + dplyr::mutate( + item = + glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") + ) +} convert_args <- function(model_name) { envir <- get_model_env() @@ -52,6 +80,9 @@ get_dials <- function(x) { rlang::eval_tidy(cl) } +# ------------------------------------------------------------------------------ +# Write text about modes + descr_models <- function(mod, eng) { res <- get_from_env(mod) %>% dplyr::filter(engine == eng) %>% diff --git a/man/rmd/surv_reg_flexsurv.Rmd b/man/rmd/surv_reg_flexsurv.Rmd index 8708f1c35..5da1f8c44 100644 --- a/man/rmd/surv_reg_flexsurv.Rmd +++ b/man/rmd/surv_reg_flexsurv.Rmd @@ -14,18 +14,7 @@ param <- surv_reg() %>% set_engine("flexsurv") %>% set_mode("regression") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - dplyr::mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/surv_reg_survival.Rmd b/man/rmd/surv_reg_survival.Rmd index 3480239db..85c1608a1 100644 --- a/man/rmd/surv_reg_survival.Rmd +++ b/man/rmd/surv_reg_survival.Rmd @@ -14,18 +14,7 @@ param <- surv_reg() %>% set_engine("survival") %>% set_mode("regression") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - dplyr::mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/survival_reg_flexsurv.Rmd b/man/rmd/survival_reg_flexsurv.Rmd new file mode 100644 index 000000000..121123e92 --- /dev/null +++ b/man/rmd/survival_reg_flexsurv.Rmd @@ -0,0 +1,43 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("survival_reg", "flexsurv")` + +## Tuning Parameters + +```{r flexsurv-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("dist"), + default = c("'weibull'")) + +param <- + survival_reg() %>% + set_engine("flexsurv") %>% + set_mode("censored regression") %>% + make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameters: + +```{r flexsurv-param-list, echo = FALSE, results = "asis"} +param$item +``` + +## Translation from parsnip to the original package + +```{r flexsurv-creg} +survival_reg(dist = character(1)) %>% + set_engine("flexsurv") %>% + set_mode("censored regression") %>% + translate() +``` + +## Other details + +The main interface for this model uses the formula method since the model specification typically involved the use of [survival::Surv()]. + +For this engine, stratification cannot be specified via [`strata()`], please see the documentation of the [`flexsurv`] package for alternative specifications. + +## References + +- Jackson, C. 2016. `flexsurv`: A Platform for Parametric Survival Modeling in R. _Journal of Statistical Software_, 70(8), 1 - 33. diff --git a/man/rmd/survival_reg_survival.Rmd b/man/rmd/survival_reg_survival.Rmd new file mode 100644 index 000000000..27c04cb7b --- /dev/null +++ b/man/rmd/survival_reg_survival.Rmd @@ -0,0 +1,55 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("survival_reg", "survival")` + +## Tuning Parameters + +```{r survival-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("dist"), + default = c("'weibull'")) + +param <- + survival_reg() %>% + set_engine("survival") %>% + set_mode("censored regression") %>% + make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameters: + +```{r survival-param-list, echo = FALSE, results = "asis"} +param$item +``` + +## Translation from parsnip to the original package + +```{r survival-creg} +survival_reg(dist = character(1)) %>% + set_engine("survival") %>% + set_mode("censored regression") %>% + translate() +``` + +## Other details + +Note that `model = TRUE` is needed to produce quantile predictions when there is a stratification variable and can be overridden in other cases. + +The main interface for this model uses the formula method since the model specification typically involved the use of [survival::Surv()]. + +The model formula can include _special_ terms, such as [survival::strata()]. The allows the model scale parameter to differ between groups contained in the function. The column used inside `strata()` is treated as qualitative no matter its type. + +For example, in this model, the numeric column `rx` is used to estimate two different scale parameters for each value of the column: + +```{r} +library(survival) + +survival_reg() %>% + fit(Surv(futime, fustat) ~ age + strata(rx), data = ovarian) %>% + extract_fit_engine() +``` + +## References + +- Kalbfleisch, J. D. and Prentice, R. L. 2002 _The statistical analysis of failure time data_, Wiley. diff --git a/man/rmd/svm_linear_LiblineaR.Rmd b/man/rmd/svm_linear_LiblineaR.Rmd index 7d68e91df..336cdd149 100644 --- a/man/rmd/svm_linear_LiblineaR.Rmd +++ b/man/rmd/svm_linear_LiblineaR.Rmd @@ -13,18 +13,7 @@ defaults <- param <- svm_linear() %>% set_engine("LiblineaR") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/svm_linear_kernlab.Rmd b/man/rmd/svm_linear_kernlab.Rmd index 9c7cec545..14f75f6e3 100644 --- a/man/rmd/svm_linear_kernlab.Rmd +++ b/man/rmd/svm_linear_kernlab.Rmd @@ -13,18 +13,7 @@ defaults <- param <- svm_linear() %>% set_engine("kernlab") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/svm_poly_kernlab.Rmd b/man/rmd/svm_poly_kernlab.Rmd index 179d3f157..739afec4f 100644 --- a/man/rmd/svm_poly_kernlab.Rmd +++ b/man/rmd/svm_poly_kernlab.Rmd @@ -13,18 +13,7 @@ defaults <- param <- svm_poly() %>% set_engine("kernlab") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/svm_rbf_kernlab.Rmd b/man/rmd/svm_rbf_kernlab.Rmd index b62abaf5f..0def3231f 100644 --- a/man/rmd/svm_rbf_kernlab.Rmd +++ b/man/rmd/svm_rbf_kernlab.Rmd @@ -13,18 +13,7 @@ defaults <- param <- svm_rbf() %>% set_engine("kernlab") %>% - tunable() %>% - dplyr::select(-source, -component, -component_id, parsnip = name) %>% - dplyr::mutate( - dials = purrr::map(call_info, get_dials), - label = purrr::map_chr(dials, ~ .x$label), - type = purrr::map_chr(dials, ~ .x$type) - ) %>% - dplyr::full_join(defaults, by = "parsnip") %>% - mutate( - item = - glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n") - ) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/tidy-example.Rmd b/man/rmd/tidy-example.Rmd new file mode 100644 index 000000000..3ebbb0538 --- /dev/null +++ b/man/rmd/tidy-example.Rmd @@ -0,0 +1,56 @@ +## An example + +```{r, warnings = FALSE} +library(dplyr) + +data(ames, package = "modeldata") + +ames <- + ames %>% + mutate(Sale_Price = log10(ames$Sale_Price), + Gr_Liv_Area = log10(ames$Gr_Liv_Area)) + +# ------------------------------------------------------------------------------ + +cb_fit <- + cubist_rules(committees = 10) %>% + set_engine("Cubist") %>% + fit(Sale_Price ~ Neighborhood + Longitude + Latitude + Gr_Liv_Area + Central_Air, + data = ames) + +cb_res <- tidy(cb_fit) +cb_res + +cb_res$estimate[[1]] +cb_res$statistic[[1]] + +# ------------------------------------------------------------------------------ + +library(recipes) + +xrf_reg_mod <- + rule_fit(trees = 10, penalty = .001) %>% + set_engine("xrf") %>% + set_mode("regression") + +# Make dummy variables since xgboost will not +ames_rec <- + recipe(Sale_Price ~ Neighborhood + Longitude + Latitude + + Gr_Liv_Area + Central_Air, + data = ames) %>% + step_dummy(Neighborhood, Central_Air) %>% + step_zv(all_predictors()) + +ames_processed <- prep(ames_rec) %>% bake(new_data = NULL) + +set.seed(1) +xrf_reg_fit <- + xrf_reg_mod %>% + fit(Sale_Price ~ ., data = ames_processed) + +xrf_rule_res <- tidy(xrf_reg_fit) +xrf_rule_res$rule[nrow(xrf_rule_res)] %>% rlang::parse_expr() + +xrf_col_res <- tidy(xrf_reg_fit, unit = "columns") +xrf_col_res +``` From e62decf647725ab32708e32e0d5060c07c1de0d5 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 16 Nov 2021 12:31:34 -0500 Subject: [PATCH 06/65] move legacy code to generate engine doc lists and seealso --- R/aaa_models.R | 215 ------------------------------------------ R/knit_engine_docs.R | 220 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 220 insertions(+), 215 deletions(-) diff --git a/R/aaa_models.R b/R/aaa_models.R index 8d28259fe..c3ccd8abf 100644 --- a/R/aaa_models.R +++ b/R/aaa_models.R @@ -1046,218 +1046,3 @@ get_encoding <- function(model) { } res } - -#' Tools for dynamically documenting packages -#' -#' @description -#' These are functions used to create dynamic documentation in Rd files -#' based on which parsnip-related packages are loaded by the user. -#' -#' These functions can be used to make dynamic lists of documentation help -#' files. \pkg{parsnip} uses these along with files in `man/rmd` which -#' contain expanded documentation for specific model/engine combinations. -#' [find_engine_files()] looks for files that have the pattern -#' `details_{model}_{engine}.Rd` to link to. These files are generated by files -#' named `man/rmd/{model}_{engine}.Rmd`. `make_engine_list()` creates a -#' list seen at the top of the model Rd files while `make_seealso_list()` -#' populates the list seen in "See Also" below. See the details section. -#' -#' @param mod A character string for the model file (e.g. "linear_reg") -#' @param pkg A character string for the package where the function is invoked. -#' @return -#' `make_engine_list()` returns a character string that creates a -#' bulleted list of links to more specific help files. -#' -#' `make_seealso_list()` returns a formatted character string of links. -#' -#' `find_engine_files()` returns a tibble. -#' @details -#' The \pkg{parsnip} documentation is generated _dynamically_. Part of the Rd -#' file populates a list of engines that depends on what packages are loaded -#' *at the time that the man file is loaded*. For example, if -#' another package has a new engine for `linear_reg()`, the -#' `parsnip::linear_reg()` help can show a link to a detailed help page in the -#' other package. -#' -#' To enable this, the process for a package developer is to: -#' -#' 1. Create an engine-specific R file in the `R` directory with the name -#' `{model}_{engine}.R` (e.g. `boost_tree_C5.0.R`). This has a small amount of -#' documentation, as well as the directives "`@name details_{model}_{engine}`" -#' and "`@includeRmd man/rmd/{model}_{engine}.md details`". -#' -#' 2. Copy the file in \pkg{parsnip} that is in `man/rmd/setup.Rmd` and put -#' it in the same place in your package. -#' -#' 3. Write your own `man/rmd/{model}_{engine}.Rmd` file. This can include -#' packages that are not listed in the DESCRIPTION file. Those are only -#' required when the documentation file is created locally (probably using -#' [devtools::document()]). -#' -#' 4. Run [devtools::document()] so that the Rmd content is included in the -#' Rd file. -#' -#' The examples in \pkg{parsnip} can provide guidance for how to organize -#' technical information about the models. -#' @name doc-tools -#' @keywords internal -#' @export -#' @examples -#' find_engine_files("linear_reg") -#' cat(make_engine_list("linear_reg")) -find_engine_files <- function(mod, pkg = "parsnip") { - - requireNamespace(pkg, quietly = TRUE) - # Get available topics - topic_names <- search_for_engine_docs(mod) - if (length(topic_names) == 0) { - return(character(0)) - } - - # Subset for our model function - eng <- strsplit(topic_names, "_") - eng <- purrr::map_chr(eng, ~ .x[length(.x)]) - eng <- tibble::tibble(engine = eng, topic = topic_names) - - # Combine them to keep the order in which they were registered - all_eng <- get_from_env(mod) %>% dplyr::distinct(engine) - all_eng$.order <- 1:nrow(all_eng) - eng <- dplyr::left_join(eng, all_eng, by = "engine") - eng <- eng[order(eng$.order),] - - # Determine and label default engine - default <- get_default_engine(mod, pkg) - eng$default <- ifelse(eng$engine == default, " (default)", "") - - eng -} - -#' @export -#' @rdname doc-tools -make_engine_list <- function(mod, pkg = "parsnip") { - eng <- find_engine_files(mod, pkg) - - if (length(eng) == 0) { - return("No engines were found within the currently loaded packages.\n\n") - } else { - main <- paste("The engine-specific pages for this model are listed ", - "below and contain the details:\n\n") - } - - res <- - glue::glue(" \\item \\code{\\link[|eng$topic|]{|eng$engine|} |eng$default| }", - .open = "|", .close = "|") - - res <- paste0(main, "\\itemize{\n", paste0(res, collapse = "\n"), "\n}") - res -} - -get_default_engine <- function(mod, pkg= "parsnip") { - cl <- rlang::call2(mod, .ns = pkg) - rlang::eval_tidy(cl)$engine -} - -#' @export -#' @rdname doc-tools -make_seealso_list <- function(mod, pkg= "parsnip") { - requireNamespace(pkg, quietly = TRUE) - eng <- find_engine_files(mod, pkg) - - main <- c("\\code{\\link[=fit.model_spec]{fit()}}", - "\\code{\\link[=set_engine]{set_engine()}}", - "\\code{\\link[=update]{update()}}") - - if (length(eng) == 0) { - return(paste0(main, collapse = ", ")) - } - - res <- - glue::glue("\\code{\\link[|eng$topic|]{|eng$engine| engine details}}", - .open = "|", .close = "|") - - if (pkg != "parsnip") { - main <- NULL - } - paste0(c(main, res), collapse = ", ") -} - -# These will never have documentation and we can avoid searching them. -excl_pkgs <- - c("C50", "Cubist", "earth", "flexsurv", "forecast", "glmnet", - "keras", "kernlab", "kknn", "klaR", "LiblineaR", "liquidSVM", - "magrittr", "MASS", "mda", "mixOmics", "naivebayes", "nnet", - "prophet", "pscl", "randomForest", "ranger", "rpart", "rstanarm", - "sparklyr", "stats", "survival", "xgboost", "xrf") - -search_for_engine_docs <- function(mod) { - all_deps <- get_from_env(paste0(mod, "_pkgs")) - all_deps <- unlist(all_deps$pkg) - all_deps <- unique(c("parsnip", all_deps)) - - all_deps <- all_deps[!(all_deps %in% excl_pkgs)] - res <- purrr::map(all_deps, find_details_topics, mod = mod) - res <- unique(unlist(res)) - res -} - -find_details_topics <- function(pkg, mod) { - meta_loc <- system.file("Meta/Rd.rds", package = pkg) - meta_loc <- meta_loc[meta_loc != ""] - if (length(meta_loc) > 0) { - topic_names <- readRDS(meta_loc)$Name - res <- grep(paste0("details_", mod), topic_names, value = TRUE) - if (length(res) > 0) { - res <- paste0(pkg, ":", res) - } - } else { - res <- character(0) - } - res -} - - -# For use in `set_engine()` docs -generate_set_engine_bullets <- function() { - env <- get_model_env() - models <- env$models - info <- rlang::env_get_list(env, models) - - model_engines <- purrr::map(info, get_sorted_unique_engines) - - model_prefixes <- glue::glue( - "\\code{\\link[=.{models}.]{.{models}.()}}:", - .open = ".{", - .close = "}." - ) - - bullets <- purrr::map2( - .x = model_prefixes, - .y = model_engines, - .f = combine_prefix_with_engines - ) - - bullets <- glue::glue("\\item {bullets}") - bullets <- glue::glue_collapse(bullets, sep = "\n") - bullets <- paste("\\itemize{", bullets, "}", sep = "\n") - - bullets -} - -sort_c <- function(x) { - withr::with_collate("C", sort(x)) -} -get_sorted_unique_engines <- function(x) { - engines <- x$engine - engines <- unique(engines) - engines <- sort_c(engines) - engines -} -combine_prefix_with_engines <- function(prefix, engines) { - if (length(engines) == 0L) { - engines <- "No engines currently available" - } else { - engines <- glue::glue_collapse(engines, sep = ", ") - } - - glue::glue("{prefix} {engines}") -} diff --git a/R/knit_engine_docs.R b/R/knit_engine_docs.R index 15de1424c..b24e4d277 100644 --- a/R/knit_engine_docs.R +++ b/R/knit_engine_docs.R @@ -27,8 +27,228 @@ knit_engine_docs <- function(pattern = NULL) { # - add is_installed() to set code with all extra dependencies # - list models by mode +# ------------------------------------------------------------------------------ extensions <- function(x) { c("baguette", "censored", "discrim", "multilevelmod", "plsmod", "poissonreg", "rules") } + +# ------------------------------------------------------------------------------ + + +#' Tools for dynamically documenting packages +#' +#' @description +#' These are functions used to create dynamic documentation in Rd files +#' based on which parsnip-related packages are loaded by the user. +#' +#' These functions can be used to make dynamic lists of documentation help +#' files. \pkg{parsnip} uses these along with files in `man/rmd` which +#' contain expanded documentation for specific model/engine combinations. +#' [find_engine_files()] looks for files that have the pattern +#' `details_{model}_{engine}.Rd` to link to. These files are generated by files +#' named `man/rmd/{model}_{engine}.Rmd`. `make_engine_list()` creates a +#' list seen at the top of the model Rd files while `make_seealso_list()` +#' populates the list seen in "See Also" below. See the details section. +#' +#' @param mod A character string for the model file (e.g. "linear_reg") +#' @param pkg A character string for the package where the function is invoked. +#' @return +#' `make_engine_list()` returns a character string that creates a +#' bulleted list of links to more specific help files. +#' +#' `make_seealso_list()` returns a formatted character string of links. +#' +#' `find_engine_files()` returns a tibble. +#' @details +#' The \pkg{parsnip} documentation is generated _dynamically_. Part of the Rd +#' file populates a list of engines that depends on what packages are loaded +#' *at the time that the man file is loaded*. For example, if +#' another package has a new engine for `linear_reg()`, the +#' `parsnip::linear_reg()` help can show a link to a detailed help page in the +#' other package. +#' +#' To enable this, the process for a package developer is to: +#' +#' 1. Create an engine-specific R file in the `R` directory with the name +#' `{model}_{engine}.R` (e.g. `boost_tree_C5.0.R`). This has a small amount of +#' documentation, as well as the directives "`@name details_{model}_{engine}`" +#' and "`@includeRmd man/rmd/{model}_{engine}.md details`". +#' +#' 2. Copy the file in \pkg{parsnip} that is in `man/rmd/setup.Rmd` and put +#' it in the same place in your package. +#' +#' 3. Write your own `man/rmd/{model}_{engine}.Rmd` file. This can include +#' packages that are not listed in the DESCRIPTION file. Those are only +#' required when the documentation file is created locally (probably using +#' [devtools::document()]). +#' +#' 4. Run [devtools::document()] so that the Rmd content is included in the +#' Rd file. +#' +#' The examples in \pkg{parsnip} can provide guidance for how to organize +#' technical information about the models. +#' @name doc-tools +#' @keywords internal +#' @export +#' @examples +#' find_engine_files("linear_reg") +#' cat(make_engine_list("linear_reg")) +find_engine_files <- function(mod, pkg = "parsnip") { + + requireNamespace(pkg, quietly = TRUE) + # Get available topics + topic_names <- search_for_engine_docs(mod) + if (length(topic_names) == 0) { + return(character(0)) + } + + # Subset for our model function + eng <- strsplit(topic_names, "_") + eng <- purrr::map_chr(eng, ~ .x[length(.x)]) + eng <- tibble::tibble(engine = eng, topic = topic_names) + + # Combine them to keep the order in which they were registered + all_eng <- get_from_env(mod) %>% dplyr::distinct(engine) + all_eng$.order <- 1:nrow(all_eng) + eng <- dplyr::left_join(eng, all_eng, by = "engine") + eng <- eng[order(eng$.order),] + + # Determine and label default engine + default <- get_default_engine(mod, pkg) + eng$default <- ifelse(eng$engine == default, " (default)", "") + + eng +} + +#' @export +#' @rdname doc-tools +make_engine_list <- function(mod, pkg = "parsnip") { + eng <- find_engine_files(mod, pkg) + + if (length(eng) == 0) { + return("No engines were found within the currently loaded packages.\n\n") + } else { + main <- paste("The engine-specific pages for this model are listed ", + "below and contain the details:\n\n") + } + + res <- + glue::glue(" \\item \\code{\\link[|eng$topic|]{|eng$engine|} |eng$default| }", + .open = "|", .close = "|") + + res <- paste0(main, "\\itemize{\n", paste0(res, collapse = "\n"), "\n}") + res +} + +get_default_engine <- function(mod, pkg= "parsnip") { + cl <- rlang::call2(mod, .ns = pkg) + rlang::eval_tidy(cl)$engine +} + +#' @export +#' @rdname doc-tools +make_seealso_list <- function(mod, pkg= "parsnip") { + requireNamespace(pkg, quietly = TRUE) + eng <- find_engine_files(mod, pkg) + + main <- c("\\code{\\link[=fit.model_spec]{fit()}}", + "\\code{\\link[=set_engine]{set_engine()}}", + "\\code{\\link[=update]{update()}}") + + if (length(eng) == 0) { + return(paste0(main, collapse = ", ")) + } + + res <- + glue::glue("\\code{\\link[|eng$topic|]{|eng$engine| engine details}}", + .open = "|", .close = "|") + + if (pkg != "parsnip") { + main <- NULL + } + paste0(c(main, res), collapse = ", ") +} + +# These will never have documentation and we can avoid searching them. +excl_pkgs <- + c("C50", "Cubist", "earth", "flexsurv", "forecast", "glmnet", + "keras", "kernlab", "kknn", "klaR", "LiblineaR", "liquidSVM", + "magrittr", "MASS", "mda", "mixOmics", "naivebayes", "nnet", + "prophet", "pscl", "randomForest", "ranger", "rpart", "rstanarm", + "sparklyr", "stats", "survival", "xgboost", "xrf") + +search_for_engine_docs <- function(mod) { + all_deps <- get_from_env(paste0(mod, "_pkgs")) + all_deps <- unlist(all_deps$pkg) + all_deps <- unique(c("parsnip", all_deps)) + + all_deps <- all_deps[!(all_deps %in% excl_pkgs)] + res <- purrr::map(all_deps, find_details_topics, mod = mod) + res <- unique(unlist(res)) + res +} + +find_details_topics <- function(pkg, mod) { + meta_loc <- system.file("Meta/Rd.rds", package = pkg) + meta_loc <- meta_loc[meta_loc != ""] + if (length(meta_loc) > 0) { + topic_names <- readRDS(meta_loc)$Name + res <- grep(paste0("details_", mod), topic_names, value = TRUE) + if (length(res) > 0) { + res <- paste0(pkg, ":", res) + } + } else { + res <- character(0) + } + res +} + +# For use in `set_engine()` docs +generate_set_engine_bullets <- function() { + env <- get_model_env() + models <- env$models + info <- rlang::env_get_list(env, models) + + model_engines <- purrr::map(info, get_sorted_unique_engines) + + model_prefixes <- glue::glue( + "\\code{\\link[=.{models}.]{.{models}.()}}:", + .open = ".{", + .close = "}." + ) + + bullets <- purrr::map2( + .x = model_prefixes, + .y = model_engines, + .f = combine_prefix_with_engines + ) + + bullets <- glue::glue("\\item {bullets}") + bullets <- glue::glue_collapse(bullets, sep = "\n") + bullets <- paste("\\itemize{", bullets, "}", sep = "\n") + + bullets +} + +sort_c <- function(x) { + withr::with_collate("C", sort(x)) +} +get_sorted_unique_engines <- function(x) { + engines <- x$engine + engines <- unique(engines) + engines <- sort_c(engines) + engines +} +combine_prefix_with_engines <- function(prefix, engines) { + if (length(engines) == 0L) { + engines <- "No engines currently available" + } else { + engines <- glue::glue_collapse(engines, sep = ", ") + } + + glue::glue("{prefix} {engines}") +} + + From e810e0ddb3da733baca088f883b83761e294de97 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 16 Nov 2021 12:31:51 -0500 Subject: [PATCH 07/65] rename file --- R/{knit_engine_docs.R => engine_docs.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename R/{knit_engine_docs.R => engine_docs.R} (100%) diff --git a/R/knit_engine_docs.R b/R/engine_docs.R similarity index 100% rename from R/knit_engine_docs.R rename to R/engine_docs.R From 0607739c36070e5b39764de8ab9642daf72d9864 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 16 Nov 2021 12:51:05 -0500 Subject: [PATCH 08/65] refactor code since all files reside within parsnip --- R/engine_docs.R | 71 ++++++++++++++++++++--------------------- man/doc-tools.Rd | 6 ++-- man/knit_engine_docs.Rd | 2 +- 3 files changed, 39 insertions(+), 40 deletions(-) diff --git a/R/engine_docs.R b/R/engine_docs.R index b24e4d277..c8d5a3538 100644 --- a/R/engine_docs.R +++ b/R/engine_docs.R @@ -95,11 +95,9 @@ extensions <- function(x) { #' @examples #' find_engine_files("linear_reg") #' cat(make_engine_list("linear_reg")) -find_engine_files <- function(mod, pkg = "parsnip") { - - requireNamespace(pkg, quietly = TRUE) +find_engine_files <- function(mod) { # Get available topics - topic_names <- search_for_engine_docs(mod) + topic_names <- find_details_topics(mod) if (length(topic_names) == 0) { return(character(0)) } @@ -116,33 +114,53 @@ find_engine_files <- function(mod, pkg = "parsnip") { eng <- eng[order(eng$.order),] # Determine and label default engine - default <- get_default_engine(mod, pkg) + default <- get_default_engine(mod) eng$default <- ifelse(eng$engine == default, " (default)", "") + # reorder based on default and name + non_defaults <- dplyr::filter(eng, !grepl("default", default)) + non_defaults <- + non_defaults %>% + dplyr::arrange(tolower(engine)) %>% + dplyr::mutate(.order = dplyr::row_number() + 1) + eng <- + dplyr::filter(eng, grepl("default", default)) %>% + dplyr::bind_rows(non_defaults) + eng } #' @export #' @rdname doc-tools -make_engine_list <- function(mod, pkg = "parsnip") { - eng <- find_engine_files(mod, pkg) +make_engine_list <- function(mod) { + eng <- find_engine_files(mod) if (length(eng) == 0) { return("No engines were found within the currently loaded packages.\n\n") } else { main <- paste("The engine-specific pages for this model are listed ", - "below and contain the details:\n\n") + "below by mode. These contain further details:\n\n") } - res <- - glue::glue(" \\item \\code{\\link[|eng$topic|]{|eng$engine|} |eng$default| }", - .open = "|", .close = "|") - - res <- paste0(main, "\\itemize{\n", paste0(res, collapse = "\n"), "\n}") + modes <- get_from_env(mod) + eng <- + dplyr::full_join(eng, modes, by = "engine") %>% + dplyr::mutate( + item = glue::glue(" \\item \\code{\\link[|topic|]{|engine|} |default| }", + .open = "|", .close = "|") + ) %>% + dplyr::group_nest(mode) %>% + dplyr::arrange(desc(mode)) %>% + dplyr::mutate( + items = purrr::map_chr(data, ~ paste0(.x$item, collapse = "\n")), + items = paste0(mode, ":\n\n\\itemize{\n", items, "\n}") + ) + + res <- paste0(main, paste0(eng$items, collapse = "\n\n")) res } -get_default_engine <- function(mod, pkg= "parsnip") { +get_default_engine <- function(mod, pkg = "parsnip") { cl <- rlang::call2(mod, .ns = pkg) rlang::eval_tidy(cl)$engine } @@ -151,7 +169,7 @@ get_default_engine <- function(mod, pkg= "parsnip") { #' @rdname doc-tools make_seealso_list <- function(mod, pkg= "parsnip") { requireNamespace(pkg, quietly = TRUE) - eng <- find_engine_files(mod, pkg) + eng <- find_engine_files(mod) main <- c("\\code{\\link[=fit.model_spec]{fit()}}", "\\code{\\link[=set_engine]{set_engine()}}", @@ -171,26 +189,7 @@ make_seealso_list <- function(mod, pkg= "parsnip") { paste0(c(main, res), collapse = ", ") } -# These will never have documentation and we can avoid searching them. -excl_pkgs <- - c("C50", "Cubist", "earth", "flexsurv", "forecast", "glmnet", - "keras", "kernlab", "kknn", "klaR", "LiblineaR", "liquidSVM", - "magrittr", "MASS", "mda", "mixOmics", "naivebayes", "nnet", - "prophet", "pscl", "randomForest", "ranger", "rpart", "rstanarm", - "sparklyr", "stats", "survival", "xgboost", "xrf") - -search_for_engine_docs <- function(mod) { - all_deps <- get_from_env(paste0(mod, "_pkgs")) - all_deps <- unlist(all_deps$pkg) - all_deps <- unique(c("parsnip", all_deps)) - - all_deps <- all_deps[!(all_deps %in% excl_pkgs)] - res <- purrr::map(all_deps, find_details_topics, mod = mod) - res <- unique(unlist(res)) - res -} - -find_details_topics <- function(pkg, mod) { +find_details_topics <- function(mod, pkg = "parsnip") { meta_loc <- system.file("Meta/Rd.rds", package = pkg) meta_loc <- meta_loc[meta_loc != ""] if (length(meta_loc) > 0) { @@ -202,7 +201,7 @@ find_details_topics <- function(pkg, mod) { } else { res <- character(0) } - res + unique(res) } # For use in `set_engine()` docs diff --git a/man/doc-tools.Rd b/man/doc-tools.Rd index 48b11fd01..ce77744a7 100644 --- a/man/doc-tools.Rd +++ b/man/doc-tools.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aaa_models.R +% Please edit documentation in R/engine_docs.R \name{doc-tools} \alias{doc-tools} \alias{find_engine_files} @@ -7,9 +7,9 @@ \alias{make_seealso_list} \title{Tools for dynamically documenting packages} \usage{ -find_engine_files(mod, pkg = "parsnip") +find_engine_files(mod) -make_engine_list(mod, pkg = "parsnip") +make_engine_list(mod) make_seealso_list(mod, pkg = "parsnip") } diff --git a/man/knit_engine_docs.Rd b/man/knit_engine_docs.Rd index 753b8c4f9..3315d0eb2 100644 --- a/man/knit_engine_docs.Rd +++ b/man/knit_engine_docs.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/knit_engine_docs.R +% Please edit documentation in R/engine_docs.R \name{knit_engine_docs} \alias{knit_engine_docs} \title{Knit engine-specific documentation} From 5717bef8321715eeb7f97421a92006d4a824a2a5 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 16 Nov 2021 17:39:05 -0500 Subject: [PATCH 09/65] better printing of the engine list --- R/engine_docs.R | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/R/engine_docs.R b/R/engine_docs.R index c8d5a3538..634fed656 100644 --- a/R/engine_docs.R +++ b/R/engine_docs.R @@ -23,9 +23,9 @@ knit_engine_docs <- function(pattern = NULL) { } # TODO -# - simplify code to find model files -# - add is_installed() to set code with all extra dependencies -# - list models by mode +# - In Rmd, state which packages have engine code e.g. "The parsnip package +# contains rpart engines for classification and regression and the censored package +# contains an rpart engine for censored regression". # ------------------------------------------------------------------------------ @@ -107,12 +107,6 @@ find_engine_files <- function(mod) { eng <- purrr::map_chr(eng, ~ .x[length(.x)]) eng <- tibble::tibble(engine = eng, topic = topic_names) - # Combine them to keep the order in which they were registered - all_eng <- get_from_env(mod) %>% dplyr::distinct(engine) - all_eng$.order <- 1:nrow(all_eng) - eng <- dplyr::left_join(eng, all_eng, by = "engine") - eng <- eng[order(eng$.order),] - # Determine and label default engine default <- get_default_engine(mod) eng$default <- ifelse(eng$engine == default, " (default)", "") @@ -125,6 +119,7 @@ find_engine_files <- function(mod) { dplyr::mutate(.order = dplyr::row_number() + 1) eng <- dplyr::filter(eng, grepl("default", default)) %>% + dplyr::mutate(.order = 1) %>% dplyr::bind_rows(non_defaults) eng @@ -136,27 +131,28 @@ make_engine_list <- function(mod) { eng <- find_engine_files(mod) if (length(eng) == 0) { - return("No engines were found within the currently loaded packages.\n\n") + return("No engines were found for this model.\n\n") } else { - main <- paste("The engine-specific pages for this model are listed ", - "below by mode. These contain further details:\n\n") + modes <- get_from_env(paste0(mod, "_modes")) + modes <- modes[modes != "unknown"] + modes <- glue::glue_collapse(modes, sep = ", ", last = " and ") + modes <- glue::glue("\\code{|mod|()} can fit |modes| models.", + .open = "|", .close = "|") + main <- glue::glue("The engine-specific pages for this model are listed ", + "below. These contain further details:\n\n") } - modes <- get_from_env(mod) eng <- - dplyr::full_join(eng, modes, by = "engine") %>% + eng %>% dplyr::mutate( - item = glue::glue(" \\item \\code{\\link[|topic|]{|engine|} |default| }", + item = glue::glue(" \\item \\code{\\link[|topic|]{|engine|}|default|}", .open = "|", .close = "|") ) %>% - dplyr::group_nest(mode) %>% - dplyr::arrange(desc(mode)) %>% - dplyr::mutate( - items = purrr::map_chr(data, ~ paste0(.x$item, collapse = "\n")), - items = paste0(mode, ":\n\n\\itemize{\n", items, "\n}") - ) + dplyr::distinct(item) - res <- paste0(main, paste0(eng$items, collapse = "\n\n")) + items <- glue::glue_collapse(eng$item, sep = "\n") + res <- glue::glue("|main|\n\\itemize{\n|items|\n}\n\n |modes|}", + .open = "|", .close = "|") res } From b7038cb7e88354be725d0dd323f186100621acac Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Wed, 17 Nov 2021 15:05:27 -0500 Subject: [PATCH 10/65] fix bug in param object call --- man/rmd/setup.Rmd | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/man/rmd/setup.Rmd b/man/rmd/setup.Rmd index 72c555507..986c6926c 100644 --- a/man/rmd/setup.Rmd +++ b/man/rmd/setup.Rmd @@ -73,10 +73,11 @@ get_arg <- function(ns, f, arg) { get_dials <- function(x) { if (any(names(x) == "range")) { - cl <- rlang::call2(x$fun, .ns = x$pgk, range = x$range) + cl <- rlang::call2(x$fun, .ns = x$pkg, range = x$range) } else { - cl <- rlang::call2(x$fun, .ns = x$pgk) + cl <- rlang::call2(x$fun, .ns = x$pkg) } + rlang::eval_tidy(cl) } From 4604d433b14aa66563c103d59ebf408eb46bd75d Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Wed, 17 Nov 2021 15:05:42 -0500 Subject: [PATCH 11/65] avoid deprecation warning --- man/details_surv_reg_flexsurv.Rd | 9 ++------- man/details_surv_reg_survival.Rd | 14 +++----------- man/rmd/surv_reg_flexsurv.Rmd | 2 +- man/rmd/surv_reg_survival.Rmd | 2 +- 4 files changed, 7 insertions(+), 20 deletions(-) diff --git a/man/details_surv_reg_flexsurv.Rd b/man/details_surv_reg_flexsurv.Rd index 30fa7a1e7..e39e6d032 100644 --- a/man/details_surv_reg_flexsurv.Rd +++ b/man/details_surv_reg_flexsurv.Rd @@ -8,9 +8,7 @@ } \details{ For this engine, there is a single mode: regression -\subsection{Tuning Parameters}{\preformatted{## Warning: `surv_reg()` was deprecated in parsnip 0.1.6. -## Please use `survival_reg()` instead. -} +\subsection{Tuning Parameters}{ This model has 1 tuning parameters: \itemize{ @@ -22,10 +20,7 @@ This model has 1 tuning parameters: set_engine("flexsurv") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{}}\preformatted{## Warning: `surv_reg()` was deprecated in parsnip 0.1.6. -## Please use `survival_reg()` instead. - -## Parametric Survival Regression Model Specification (regression) +}\if{html}{\out{}}\preformatted{## Parametric Survival Regression Model Specification (regression) ## ## Main Arguments: ## dist = character(1) diff --git a/man/details_surv_reg_survival.Rd b/man/details_surv_reg_survival.Rd index c43244bcf..cc863e3c2 100644 --- a/man/details_surv_reg_survival.Rd +++ b/man/details_surv_reg_survival.Rd @@ -8,9 +8,7 @@ } \details{ For this engine, there is a single mode: regression -\subsection{Tuning Parameters}{\preformatted{## Warning: `surv_reg()` was deprecated in parsnip 0.1.6. -## Please use `survival_reg()` instead. -} +\subsection{Tuning Parameters}{ This model has 1 tuning parameters: \itemize{ @@ -22,10 +20,7 @@ This model has 1 tuning parameters: set_engine("survival") \%>\% set_mode("regression") \%>\% translate() -}\if{html}{\out{}}\preformatted{## Warning: `surv_reg()` was deprecated in parsnip 0.1.6. -## Please use `survival_reg()` instead. - -## Parametric Survival Regression Model Specification (regression) +}\if{html}{\out{}}\preformatted{## Parametric Survival Regression Model Specification (regression) ## ## Main Arguments: ## dist = character(1) @@ -58,10 +53,7 @@ two different scale parameters for each value of the column:\if{html}{\out{
\% fit(Surv(futime, fustat) ~ age + strata(rx), data = ovarian) \%>\% extract_fit_engine() -}\if{html}{\out{
}}\preformatted{## Warning: `surv_reg()` was deprecated in parsnip 0.1.6. -## Please use `survival_reg()` instead. - -## Call: +}\if{html}{\out{}}\preformatted{## Call: ## survival::survreg(formula = Surv(futime, fustat) ~ age + strata(rx), ## data = data, model = TRUE) ## diff --git a/man/rmd/surv_reg_flexsurv.Rmd b/man/rmd/surv_reg_flexsurv.Rmd index 5da1f8c44..be8f93d93 100644 --- a/man/rmd/surv_reg_flexsurv.Rmd +++ b/man/rmd/surv_reg_flexsurv.Rmd @@ -26,7 +26,7 @@ param$item ## Translation from parsnip to the original package -```{r flexsurv-reg} +```{r flexsurv-reg, warning = FALSE} surv_reg(dist = character(1)) %>% set_engine("flexsurv") %>% set_mode("regression") %>% diff --git a/man/rmd/surv_reg_survival.Rmd b/man/rmd/surv_reg_survival.Rmd index 85c1608a1..ddfe53e85 100644 --- a/man/rmd/surv_reg_survival.Rmd +++ b/man/rmd/surv_reg_survival.Rmd @@ -25,7 +25,7 @@ param$item ## Translation from parsnip to the original package -```{r survival-reg} +```{r survival-reg, warning = FALSE} surv_reg(dist = character(1)) %>% set_engine("survival") %>% set_mode("regression") %>% From c77ec5f6f359b3653596aca78494936db9aab157 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Wed, 17 Nov 2021 15:10:27 -0500 Subject: [PATCH 12/65] added missing mode --- man/details_decision_tree_rpart.Rd | 20 ++++++++++++++++++++ man/rmd/decision_tree_rpart.Rmd | 10 ++++++++++ 2 files changed, 30 insertions(+) diff --git a/man/details_decision_tree_rpart.Rd b/man/details_decision_tree_rpart.Rd index 30c09081a..fe0d4fab0 100644 --- a/man/details_decision_tree_rpart.Rd +++ b/man/details_decision_tree_rpart.Rd @@ -61,6 +61,26 @@ This model has 3 tuning parameters: } } +\subsection{Translation from parsnip to the original package (censored regression)}{\if{html}{\out{
}}\preformatted{decision_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% + set_engine("rpart") \%>\% + set_mode("censored regression") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (censored regression) +## +## Main Arguments: +## cost_complexity = double(1) +## tree_depth = integer(1) +## min_n = integer(1) +## +## Computational engine: rpart +## +## Model fit template: +## pec::pecRpart(formula = missing_arg(), data = missing_arg(), +## cp = double(1), maxdepth = integer(1), minsplit = min_rows(0L, +## data)) +} +} + \subsection{Preprocessing requirements}{ This engine does not require any special encoding of the predictors. diff --git a/man/rmd/decision_tree_rpart.Rmd b/man/rmd/decision_tree_rpart.Rmd index 3a55c3374..dba34e7f3 100644 --- a/man/rmd/decision_tree_rpart.Rmd +++ b/man/rmd/decision_tree_rpart.Rmd @@ -42,6 +42,16 @@ decision_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = dou translate() ``` + +## Translation from parsnip to the original package (censored regression) + +```{r rpart-cens-reg} +decision_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) %>% + set_engine("rpart") %>% + set_mode("censored regression") %>% + translate() +``` + ## Preprocessing requirements ```{r child = "template-tree-split-factors.Rmd"} From 0dc7c76b5b8979ec276278c4c60458400214a2f9 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Sat, 11 Dec 2021 18:16:55 -0500 Subject: [PATCH 13/65] add files for SDA model --- R/discrim_linear_sda.R | 10 +++++ man/details_discrim_linear_sda.Rd | 68 +++++++++++++++++++++++++++++++ man/rmd/discrim_linear_sda.Rmd | 38 +++++++++++++++++ 3 files changed, 116 insertions(+) create mode 100644 R/discrim_linear_sda.R create mode 100644 man/details_discrim_linear_sda.Rd create mode 100644 man/rmd/discrim_linear_sda.Rmd diff --git a/R/discrim_linear_sda.R b/R/discrim_linear_sda.R new file mode 100644 index 000000000..8a1172970 --- /dev/null +++ b/R/discrim_linear_sda.R @@ -0,0 +1,10 @@ +#' Linear discriminant analysis via James-Stein-type shrinkage estimation +#' +#' [sda::sda()] can fit a linear discriminant analysis model that can fit models +#' between classical discriminant analysis and diagonal discriminant analysis. +#' +#' @includeRmd man/rmd/discrim_linear_sda.md details +#' +#' @name details_discrim_linear_sda +#' @keywords internal +NULL diff --git a/man/details_discrim_linear_sda.Rd b/man/details_discrim_linear_sda.Rd new file mode 100644 index 000000000..d38275e79 --- /dev/null +++ b/man/details_discrim_linear_sda.Rd @@ -0,0 +1,68 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/discrim_linear_sda.R +\name{details_discrim_linear_sda} +\alias{details_discrim_linear_sda} +\title{Linear discriminant analysis via James-Stein-type shrinkage estimation} +\description{ +\code{\link[sda:sda]{sda::sda()}} can fit a linear discriminant analysis model that can fit models +between classical discriminant analysis and diagonal discriminant analysis. +} +\details{ +For this engine, there is a single mode: classification +\subsection{Tuning Parameters}{ + +This engine has no tuning parameters. + +However, there are a few engine-specific parameters that can be set or +optimized when calling \code{\link[=set_engine]{set_engine()}}: +\itemize{ +\item \code{lambda}: the shrinkage parameters for the correlation matrix. This +maps to the parameter +\code{\link[dials:shrinkage_correlation]{dials::shrinkage_correlation()}}. +\item \code{lambda.var}: the shrinkage parameters for the predictor variances. +This maps to +\code{\link[dials:shrinkage_correlation]{dials::shrinkage_variance()}}. +\item \code{lambda.freqs}: the shrinkage parameters for the class frequencies. +This maps to +\code{\link[dials:shrinkage_correlation]{dials::shrinkage_frequencies()}}. +\item \code{diagonal}: a logical to make the model covariance diagonal or not. +This maps to +\code{\link[dials:shrinkage_correlation]{dials::diagonal_covariance()}}. +} +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{discrim_linear() \%>\% + set_engine("sda") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Linear Discriminant Model Specification (classification) +## +## Computational engine: sda +## +## Model fit template: +## sda::sda(Xtrain = missing_arg(), L = missing_arg(), verbose = FALSE) +} +} + +\subsection{Preprocessing requirements}{ + +Factor/categorical predictors need to be converted to numeric values +(e.g., dummy or indicator variables) for this engine. When using the +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. + +Variance calculations are used in these computations so \emph{zero-variance} +predictors (i.e., with a single unique value) should be eliminated +before fitting the model. +} + +\subsection{References}{ +\itemize{ +\item Ahdesmaki, A., and K. Strimmer. 2010. Feature selection in omics +prediction problems using cat scores and false non-discovery rate +control. Ann. Appl. Stat. 4: 503-519. +\href{http://arxiv.org/abs/0903.2003}{Preprint}. +} +} +} +\keyword{internal} diff --git a/man/rmd/discrim_linear_sda.Rmd b/man/rmd/discrim_linear_sda.Rmd new file mode 100644 index 000000000..28d69396d --- /dev/null +++ b/man/rmd/discrim_linear_sda.Rmd @@ -0,0 +1,38 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("discrim_linear", "sda")` + +## Tuning Parameters + +This engine has no tuning parameter arguments in [discrim_linear()]. + +However, there are a few engine-specific parameters that can be set or optimized when calling [set_engine()]: + +* `lambda`: the shrinkage parameters for the correlation matrix. This maps to the \pkg{dials} parameter [dials::shrinkage_correlation()]. + +* `lambda.var`: the shrinkage parameters for the predictor variances. This maps to [dials::shrinkage_variance()]. + +* `lambda.freqs`: the shrinkage parameters for the class frequencies. This maps to [dials::shrinkage_frequencies()]. + +* `diagonal`: a logical to make the model covariance diagonal or not. This maps to [dials::diagonal_covariance()]. + +## Translation from parsnip to the original package + +```{r sda-cls} +discrim_linear() %>% + set_engine("sda") %>% + translate() +``` + +## Preprocessing requirements + +```{r child = "template-makes-dummies.Rmd"} +``` + +```{r child = "template-zv.Rmd"} +``` + +## References + + - Ahdesmaki, A., and K. Strimmer. 2010. Feature selection in omics prediction problems using cat scores and false non-discovery rate control. Ann. Appl. Stat. 4: 503-519. [Preprint](http://arxiv.org/abs/0903.2003). From e3d8e6e1efc3f95456e26548f7a6222b1360f954 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Sat, 11 Dec 2021 18:37:39 -0500 Subject: [PATCH 14/65] new multilevelmod engine docs --- R/linear_reg_gls.R | 10 ++ R/linear_reg_lme.R | 10 ++ man/details_linear_reg_gls.Rd | 193 ++++++++++++++++++++++++++++++++++ man/details_linear_reg_lme.Rd | 118 +++++++++++++++++++++ man/rmd/linear_reg_gls.Rmd | 121 +++++++++++++++++++++ man/rmd/linear_reg_lme.Rmd | 70 ++++++++++++ 6 files changed, 522 insertions(+) create mode 100644 R/linear_reg_gls.R create mode 100644 R/linear_reg_lme.R create mode 100644 man/details_linear_reg_gls.Rd create mode 100644 man/details_linear_reg_lme.Rd create mode 100644 man/rmd/linear_reg_gls.Rmd create mode 100644 man/rmd/linear_reg_lme.Rmd diff --git a/R/linear_reg_gls.R b/R/linear_reg_gls.R new file mode 100644 index 000000000..c2884f164 --- /dev/null +++ b/R/linear_reg_gls.R @@ -0,0 +1,10 @@ +#' Linear regression via generalized least squares +#' +#' The `gls` engine estimates linear regression for models where the rows of the +#' data are not indpendent. +#' +#' @includeRmd man/rmd/linear_reg_gls.md details +#' +#' @name details_linear_reg_gls +#' @keywords internal +NULL diff --git a/R/linear_reg_lme.R b/R/linear_reg_lme.R new file mode 100644 index 000000000..46c792133 --- /dev/null +++ b/R/linear_reg_lme.R @@ -0,0 +1,10 @@ +#' Linear regression via mixed models +#' +#' The `lme` engine estimates fixed and random effect regression parameters +#' using maximum likelihood (or restricted maximum likelihood) estimation. +#' +#' @includeRmd man/rmd/linear_reg_lme.md details +#' +#' @name details_linear_reg_lme +#' @keywords internal +NULL diff --git a/man/details_linear_reg_gls.Rd b/man/details_linear_reg_gls.Rd new file mode 100644 index 000000000..eda828299 --- /dev/null +++ b/man/details_linear_reg_gls.Rd @@ -0,0 +1,193 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/linear_reg_gls.R +\name{details_linear_reg_gls} +\alias{details_linear_reg_gls} +\title{Linear regression via generalized least squares} +\description{ +The \code{gls} engine estimates linear regression for models where the rows of the +data are not indpendent. +} +\details{ +For this engine, there is a single mode: regression +\subsection{Tuning Parameters}{ + +This model has no tuning parameters. +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{linear_reg() \%>\% + set_engine("gls") \%>\% + set_mode("regression") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) +## +## Computational engine: gls +## +## Model fit template: +## nlme::gls(formula = missing_arg(), data = missing_arg()) +} +} + +\subsection{Preprocessing requirements}{ + +There are no specific preprocessing needs. However, it is helpful to +keep the clustering/subject identifier column as factor or character +(instead of making them into dummy variables). See the examples in the +next section. +} + +\subsection{Other details}{ + +The model can accept case weights. + +With \code{parsnip}, we suggest using the \emph{fixed effects} formula method when +fitting but the details of the correlation structure should be passed to +\code{set_engine()} since it is an irregular (but required) argument:\if{html}{\out{
}}\preformatted{library(tidymodels) +}\if{html}{\out{
}}\preformatted{## ── Attaching packages ────────────────────────────────────── tidymodels 0.1.4 ── + +## ✓ broom 0.7.9 ✓ tibble 3.1.6 +## ✓ ggplot2 3.3.5 ✓ tidyr 1.1.4 +## ✓ infer 1.0.0 ✓ workflows 0.2.4.9000 +## ✓ purrr 0.3.4 ✓ workflowsets 0.1.0 +## ✓ recipes 0.1.17.9000 ✓ yardstick 0.0.8 +## ✓ rsample 0.1.0 + +## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ── +## x purrr::discard() masks scales::discard() +## x tidyr::expand() masks Matrix::expand() +## x dplyr::filter() masks stats::filter() +## x dplyr::lag() masks stats::lag() +## x dials::max_rules() masks rules::max_rules() +## x tidyr::pack() masks Matrix::pack() +## x recipes::step() masks stats::step() +## x tidyr::unpack() masks Matrix::unpack() +## x recipes::update() masks Matrix::update(), stats::update() +## • Learn how to get started at https://www.tidymodels.org/start/ +}\if{html}{\out{
}}\preformatted{# load nlme to be able to use the `cor*()` functions +library(nlme) +}\if{html}{\out{
}}\preformatted{## +## Attaching package: 'nlme' + +## The following object is masked from 'package:dplyr': +## +## collapse +}\if{html}{\out{
}}\preformatted{data("riesby") + +linear_reg() \%>\% + set_engine("gls", correlation = corCompSymm(form = ~ 1 | subject)) \%>\% + fit(depr_score ~ week, data = riesby) +}\if{html}{\out{
}}\preformatted{## parsnip model object +## +## Generalized least squares fit by REML +## Model: depr_score ~ week +## Data: data +## Log-restricted-likelihood: -765.0148 +## +## Coefficients: +## (Intercept) week +## -4.953439 -2.119678 +## +## Correlation Structure: Compound symmetry +## Formula: ~1 | subject +## Parameter estimate(s): +## Rho +## 0.6820145 +## Degrees of freedom: 250 total; 248 residual +## Residual standard error: 6.868785 +} + +When using the general tidymodels infrastructure, it may be better to +use a workflow. In this case, you can add the appropriate columns using +\code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) + +gls_spec <- + linear_reg() \%>\% + set_engine("gls", correlation = corCompSymm(form = ~ 1 | subject)) + +gls_wflow <- + workflow() \%>\% + # The data are included as-is using: + add_variables(outcomes = depr_score, predictors = c(week, subject)) \%>\% + add_model(gls_spec, formula = depr_score ~ week) + +fit(gls_wflow, data = riesby) +}\if{html}{\out{
}} +} +} +\section{Degrees of freedom}{ +Note that \code{\link[nlme:lme]{nlme::lme()}} and \code{\link[nlme:gls]{nlme::gls()}} +can fit the same model but will count degrees of freedom differently. If +there are \code{n} data points, \code{p} fixed effects parameters, and \code{q} random +effect parameters, the residual degrees of freedom are: +\itemize{ +\item \code{lme}: n - p - q +\item \code{gls}: n - p +} + +As a result, p-values will be different. For example, we can fit the +same model using different estimation methods (assuming a positive +covariance value):\if{html}{\out{
}}\preformatted{gls_fit <- + linear_reg() \%>\% + set_engine("gls", correlation = corCompSymm(form = ~ 1 | subject)) \%>\% + fit(depr_score ~ week, data = riesby) + +lme_fit <- + linear_reg() \%>\% + set_engine("lme", random = ~ 1 | subject) \%>\% + fit(depr_score ~ week, data = riesby) +}\if{html}{\out{
}} + +The estimated within-subject correlations are the same:\if{html}{\out{
}}\preformatted{library(ape) +}\if{html}{\out{
}}\preformatted{## +## Attaching package: 'ape' + +## The following object is masked from 'package:rsample': +## +## complement +}\if{html}{\out{
}}\preformatted{# lme, use ape package: +lme_within_sub <- varcomp(lme_fit$fit)/sum(varcomp(lme_fit$fit)) +lme_within_sub["subject"] +}\if{html}{\out{
}}\preformatted{## subject +## 0.6820145 +}\if{html}{\out{
}}\preformatted{# gls: +summary(gls_fit$fit$modelStruct) +}\if{html}{\out{
}}\preformatted{## Correlation Structure: Compound symmetry +## Formula: ~1 | subject +## Parameter estimate(s): +## Rho +## 0.6820145 +} + +as are the fixed effects (and their standard errors):\if{html}{\out{
}}\preformatted{nlme::fixef(lme_fit$fit) +}\if{html}{\out{
}}\preformatted{## (Intercept) week +## -4.953439 -2.119678 +}\if{html}{\out{
}}\preformatted{coef(gls_fit$fit) +}\if{html}{\out{
}}\preformatted{## (Intercept) week +## -4.953439 -2.119678 +} + +However, the p-values for the fixed effects are different:\if{html}{\out{
}}\preformatted{library(broom.mixed) + +# lme: +lme_fit \%>\% tidy() \%>\% filter(group == "fixed") \%>\% select(-group, -effect) +}\if{html}{\out{
}}\preformatted{## # A tibble: 2 × 6 +## term estimate std.error df statistic p.value +## +## 1 (Intercept) -4.95 0.808 183 -6.13 5.37e- 9 +## 2 week -2.12 0.224 183 -9.47 1.41e-17 +}\if{html}{\out{
}}\preformatted{# gls: +gls_fit \%>\% tidy() +}\if{html}{\out{
}}\preformatted{## # A tibble: 2 × 5 +## term estimate std.error statistic p.value +## +## 1 (Intercept) -4.95 0.808 -6.13 3.50e- 9 +## 2 week -2.12 0.224 -9.47 2.26e-18 +} +\subsection{References}{ +\itemize{ +\item J Pinheiro, and D Bates. 2000. \emph{Mixed-effects models in S and +S-PLUS}. Springer, New York, NY +} +} +} + +\keyword{internal} diff --git a/man/details_linear_reg_lme.Rd b/man/details_linear_reg_lme.Rd new file mode 100644 index 000000000..c0cc5f360 --- /dev/null +++ b/man/details_linear_reg_lme.Rd @@ -0,0 +1,118 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/linear_reg_lme.R +\name{details_linear_reg_lme} +\alias{details_linear_reg_lme} +\title{Linear regression via mixed models} +\description{ +The \code{lme} engine estimates fixed and random effect regression parameters +using maximum likelihood (or restricted maximum likelihood) estimation. +} +\details{ +For this engine, there is a single mode: regression +\subsection{Tuning Parameters}{ + +This model has no tuning parameters. +} + +\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{linear_reg() \%>\% + set_engine("lme") \%>\% + set_mode("regression") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) +## +## Computational engine: lme +## +## Model fit template: +## nlme::lme(fixed = missing_arg(), data = missing_arg()) +} +} + +\subsection{Predicting new samples}{ + +This model can use subject-specific coefficient estimates to make +predictions (i.e. partial pooling). For example, this equation shows the +linear predictor (\emph{η}) for a random intercept: + +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} + +where \emph{i} denotes the \code{i}th independent experimental unit +(e.g. subject). When the model has seen subject \code{i}, it can use that +subject’s data to adjust the \emph{population} intercept to be more specific +to that subjects results. + +What happens when data are being predicted for a subject that was not +used in the model fit? In that case, this package uses \emph{only} the +population parameter estimates for prediction: + +\emph{η̂}\if{html}{\out{}}\emph{i}′\if{html}{\out{}} = \emph{β̂}\if{html}{\out{}}0\if{html}{\out{}} + \emph{β̂**x}\if{html}{\out{}}\emph{i}′1\if{html}{\out{}} + +Depending on what covariates are in the model, this might have the +effect of making the same prediction for all new samples. The population +parameters are the “best estimate” for a subject that was not included +in the model fit. + +The tidymodels framework deliberately constrains predictions for new +data to not use the training set or other data (to prevent information +leakage). +} + +\subsection{Preprocessing requirements}{ + +There are no specific preprocessing needs. However, it is helpful to +keep the clustering/subject identifier column as factor or character +(instead of making them into dummy variables). See the examples in the +next section. +} + +\subsection{Other details}{ + +The model can accept case weights. + +With \code{parsnip}, we suggest using the \emph{fixed effects} formula method when +fitting but the random effects formula should be passed to +\code{set_engine()} since it is an irregular (but required) argument:\if{html}{\out{
}}\preformatted{library(tidymodels) +data("riesby") + +linear_reg() \%>\% + set_engine("lme", random = ~ 1|subject) \%>\% + fit(depr_score ~ week, data = riesby) +}\if{html}{\out{
}} + +When using the general tidymodels infrastructure, it may be better to +use a workflow. In this case, you can add the appropriate columns using +\code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) + +lme_spec <- + linear_reg() \%>\% + set_engine("lme", random = ~ 1|subject) + +lme_wflow <- + workflow() \%>\% + # The data are included as-is using: + add_variables(outcomes = depr_score, predictors = c(week, subject)) \%>\% + add_model(lme_spec, formula = depr_score ~ week) + +fit(lme_wflow, data = riesby) +}\if{html}{\out{
}} +} + +\subsection{References}{ +\itemize{ +\item J Pinheiro, and D Bates. 2000. \emph{Mixed-effects models in S and +S-PLUS}. Springer, New York, NY +\item West, K, Band Welch, and A Galecki. 2014. \emph{Linear Mixed Models: A +Practical Guide Using Statistical Software}. CRC Press. +\item Thorson, J, Minto, C. 2015, Mixed effects: a unifying framework for +statistical modelling in fisheries biology. \emph{ICES Journal of Marine +Science}, Volume 72, Issue 5, Pages 1245–1256. +\item Harrison, XA, Donaldson, L, Correa-Cano, ME, Evans, J, Fisher, DN, +Goodwin, CED, Robinson, BS, Hodgson, DJ, Inger, R. 2018. \emph{A brief +introduction to mixed effects modelling and multi-model inference in +ecology}. PeerJ 6:e4794. +\item DeBruine LM, Barr DJ. Understanding Mixed-Effects Models Through +Data Simulation. 2021. \emph{Advances in Methods and Practices in +Psychological Science}. +} +} +} +\keyword{internal} diff --git a/man/rmd/linear_reg_gls.Rmd b/man/rmd/linear_reg_gls.Rmd new file mode 100644 index 000000000..c41937186 --- /dev/null +++ b/man/rmd/linear_reg_gls.Rmd @@ -0,0 +1,121 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("linear_reg", "gls")` + +## Tuning Parameters + +This model has no tuning parameters. + +## Translation from parsnip to the original package + +```{r gls-csl} +linear_reg() %>% + set_engine("gls") %>% + set_mode("regression") %>% + translate() +``` + + +## Preprocessing requirements + +There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. + +## Other details + +The model can accept case weights. + +With `parsnip`, we suggest using the _fixed effects_ formula method when fitting but the details of the correlation structure should be passed to `set_engine()` since it is an irregular (but required) argument: + +```{r} +library(tidymodels) +# load nlme to be able to use the `cor*()` functions +library(nlme) + +data("riesby") + +linear_reg() %>% + set_engine("gls", correlation = corCompSymm(form = ~ 1 | subject)) %>% + fit(depr_score ~ week, data = riesby) +``` + +When using the general tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: + +```r +library(tidymodels) + +gls_spec <- + linear_reg() %>% + set_engine("gls", correlation = corCompSymm(form = ~ 1 | subject)) + +gls_wflow <- + workflow() %>% + # The data are included as-is using: + add_variables(outcomes = depr_score, predictors = c(week, subject)) %>% + add_model(gls_spec, formula = depr_score ~ week) + +fit(gls_wflow, data = riesby) +``` + +# Degrees of freedom + +Note that [nlme::lme()] and [nlme::gls()] can fit the same model but will count degrees of freedom differently. If there are `n` data points, `p` fixed effects parameters, and `q` random effect parameters, the residual degrees of freedom are: + +* `lme`: n - p - q +* `gls`: n - p + +As a result, p-values will be different. For example, we can fit the same model using different estimation methods (assuming a positive covariance value): + +```{r} +gls_fit <- + linear_reg() %>% + set_engine("gls", correlation = corCompSymm(form = ~ 1 | subject)) %>% + fit(depr_score ~ week, data = riesby) + +lme_fit <- + linear_reg() %>% + set_engine("lme", random = ~ 1 | subject) %>% + fit(depr_score ~ week, data = riesby) +``` + +The estimated within-subject correlations are the same: + +```{r} +library(ape) + +# lme, use ape package: +lme_within_sub <- varcomp(lme_fit$fit)/sum(varcomp(lme_fit$fit)) +lme_within_sub["subject"] + +# gls: +summary(gls_fit$fit$modelStruct) +``` + +as are the fixed effects (and their standard errors): + +```{r} +nlme::fixef(lme_fit$fit) +coef(gls_fit$fit) +``` + +However, the p-values for the fixed effects are different: + +```{r, include = FALSE} +library(broom.mixed) +``` +```{r} +library(broom.mixed) + +# lme: +lme_fit %>% tidy() %>% filter(group == "fixed") %>% select(-group, -effect) + +# gls: +gls_fit %>% tidy() +``` + + + +## References + +- J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY + diff --git a/man/rmd/linear_reg_lme.Rmd b/man/rmd/linear_reg_lme.Rmd new file mode 100644 index 000000000..29ec56cb1 --- /dev/null +++ b/man/rmd/linear_reg_lme.Rmd @@ -0,0 +1,70 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("linear_reg", "lme")` + +## Tuning Parameters + +This model has no tuning parameters. + +## Translation from parsnip to the original package + +```{r lme-csl} +linear_reg() %>% + set_engine("lme") %>% + set_mode("regression") %>% + translate() +``` + +```{r child = "no-pooling.Rmd"} +``` + +## Preprocessing requirements + +There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. + +## Other details + +The model can accept case weights. + +With `parsnip`, we suggest using the _fixed effects_ formula method when fitting but the random effects formula should be passed to `set_engine()` since it is an irregular (but required) argument: + +```r +library(tidymodels) +data("riesby") + +linear_reg() %>% + set_engine("lme", random = ~ 1|subject) %>% + fit(depr_score ~ week, data = riesby) +``` + +When using the general tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: + +```r +library(tidymodels) + +lme_spec <- + linear_reg() %>% + set_engine("lme", random = ~ 1|subject) + +lme_wflow <- + workflow() %>% + # The data are included as-is using: + add_variables(outcomes = depr_score, predictors = c(week, subject)) %>% + add_model(lme_spec, formula = depr_score ~ week) + +fit(lme_wflow, data = riesby) +``` + +## References + +- J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY + +- West, K, Band Welch, and A Galecki. 2014. _Linear Mixed Models: A Practical Guide Using Statistical Software_. CRC Press. + +- Thorson, J, Minto, C. 2015, Mixed effects: a unifying framework for statistical modelling in fisheries biology. _ICES Journal of Marine Science_, Volume 72, Issue 5, Pages 1245–1256. + +- Harrison, XA, Donaldson, L, Correa-Cano, ME, Evans, J, Fisher, DN, Goodwin, CED, Robinson, BS, Hodgson, DJ, Inger, R. 2018. _A brief introduction to mixed effects modelling and multi-model inference in ecology_. PeerJ 6:e4794. + +- DeBruine LM, Barr DJ. Understanding Mixed-Effects Models Through Data Simulation. 2021. _Advances in Methods and Practices in Psychological Science_. + From c22426629e8a0dd8c702c3d8179dd5d4af2d60dc Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Sat, 11 Dec 2021 19:27:27 -0500 Subject: [PATCH 15/65] prototypes for engine extension package notes --- NAMESPACE | 1 + R/engine_docs.R | 38 ++++++++++++++++++++++++++++++++++++++ R/linear_reg.R | 6 ++++-- man/doc-tools.Rd | 3 +++ 4 files changed, 46 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 8813a7eab..24953c522 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -225,6 +225,7 @@ export(null_model) export(null_value) export(nullmodel) export(parsnip_addin) +export(pkg_extension_note) export(pls) export(poisson_reg) export(pred_value_template) diff --git a/R/engine_docs.R b/R/engine_docs.R index 634fed656..622ab93de 100644 --- a/R/engine_docs.R +++ b/R/engine_docs.R @@ -246,4 +246,42 @@ combine_prefix_with_engines <- function(prefix, engines) { glue::glue("{prefix} {engines}") } +keep_ext <- function(x, pkgs) { + x <- x[x %in% pkgs] + if (length(x) > 0) { + x <- paste0("\\pkg{", x, "}") + x <- glue::glue_collapse(x, sep = ", ", last = " and ") + x <- paste0(" (may require: ", x, ")") + } else { + x <- "" + } + x +} + +get_extension_pkgs <- function(mod) { + deps <- + get_from_env(paste0(mod, "_pkgs")) %>% + dplyr::mutate(ext = purrr::map_chr(pkg, keep_ext, parsnip:::extensions())) + dplyr::select(deps, engine, ext) +} +#' @export +#' @rdname doc-tools +pkg_extension_note <- function(mod) { + ext_pkgs <- tibble::tibble(pkg = parsnip:::extensions()) + deps <- + get_from_env(paste0(mod, "_pkgs")) %>% + tidyr::unnest(cols = c(pkg)) %>% + dplyr::inner_join(ext_pkgs, by = "pkg") %>% + dplyr::arrange(tolower(engine)) %>% + dplyr::mutate( + pkg = paste0("\\pkg{", pkg, "}"), + engine = paste0("`", engine, "`") + ) %>% + dplyr::group_nest(pkg) %>% + dplyr::mutate(note = purrr::map_chr(data, ~ glue::glue_collapse(.x$engine, sep = ", ", last = " and "))) %>% + dplyr::mutate(note = glue::glue("Note that engine(s) {note} may require extension package {pkg}.\n\n")) %>% + purrr::pluck("note") + + paste(deps, collapse = "") +} diff --git a/R/linear_reg.R b/R/linear_reg.R index 58f1cb9cc..ef6d64fa4 100644 --- a/R/linear_reg.R +++ b/R/linear_reg.R @@ -5,11 +5,13 @@ #' `linear_reg()` defines a model that can predict numeric values from #' predictors using a linear function. #' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' There are different ways to fit this model. The method of estimation is +#' chosen by setting the model _engine_. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("linear_reg")} #' +#' \Sexpr[stage=render,results=rd]{parsnip:::pkg_extension_note("linear_reg")} +#' #' More information on how \pkg{parsnip} is used for modeling is at #' \url{https://www.tidymodels.org/}. #' diff --git a/man/doc-tools.Rd b/man/doc-tools.Rd index ce77744a7..1c3e7d869 100644 --- a/man/doc-tools.Rd +++ b/man/doc-tools.Rd @@ -5,6 +5,7 @@ \alias{find_engine_files} \alias{make_engine_list} \alias{make_seealso_list} +\alias{pkg_extension_note} \title{Tools for dynamically documenting packages} \usage{ find_engine_files(mod) @@ -12,6 +13,8 @@ find_engine_files(mod) make_engine_list(mod) make_seealso_list(mod, pkg = "parsnip") + +pkg_extension_note(mod) } \arguments{ \item{mod}{A character string for the model file (e.g. "linear_reg")} From 546cded0a829f790e0237e801817f9d72db822ff Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Fri, 17 Dec 2021 14:43:00 -0500 Subject: [PATCH 16/65] some brulee files --- R/mlp_brulee.R | 9 +++++ man/rmd/mlp_brulee.Rmd | 82 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 R/mlp_brulee.R create mode 100644 man/rmd/mlp_brulee.Rmd diff --git a/R/mlp_brulee.R b/R/mlp_brulee.R new file mode 100644 index 000000000..29571ae83 --- /dev/null +++ b/R/mlp_brulee.R @@ -0,0 +1,9 @@ +#' Multilayer perceptron via brulee +#' +#' [brulee::brulee_mlp()] fits a neural networks. +#' +#' @includeRmd man/rmd/mlp_brulee.md details +#' +#' @name details_mlp_brulee +#' @keywords internal +NULL diff --git a/man/rmd/mlp_brulee.Rmd b/man/rmd/mlp_brulee.Rmd new file mode 100644 index 000000000..263e9c807 --- /dev/null +++ b/man/rmd/mlp_brulee.Rmd @@ -0,0 +1,82 @@ +```{r, child = "setup.Rmd", include = FALSE} +``` + +`r descr_models("mlp", "brulee")` + +## Tuning Parameters + +```{r brulee-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("hidden_units", "penalty", "dropout", "epochs", "learn_rate", "activation"), + default = c("3L", "0.0", "0.0", "0.01", "100L", "0.0", "0.01", "'relu'")) + +param <- + mlp() %>% + set_engine("brulee") %>% + make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameters: + +```{r brulee-param-list, echo = FALSE, results = "asis"} +param$item +``` + +Both `penalty` and `dropout` should be used in the same model. + +Other engine arguments of interest: + + - `batch_size()`: An integer for the number of training set points in each batch. + - `class_weights()`: Numeric class weights. See [brulee::brulee_mlp()]. + - `stop_iter()`: A non-negative integer for how many iterations with no improvement before stopping. (default: 5L). + + +## Translation from parsnip to the original package (regression) + +```{r brulee-reg} +mlp( + hidden_units = integer(1), + penalty = double(1), + dropout = double(1), + epochs = integer(1), + learn_rate = double(1), + activation = character(1) +) %>% + set_engine("brulee") %>% + set_mode("regression") %>% + translate() +``` + +Note that parsnip automatically sets linear activation in the last layer. + +## Translation from parsnip to the original package (classification) + +```{r brulee-cls} +mlp( + hidden_units = integer(1), + penalty = double(1), + dropout = double(1), + epochs = integer(1), + learn_rate = double(1), + activation = character(1) +) %>% + set_engine("brulee") %>% + set_mode("classification") %>% + translate() +``` + + +## Preprocessing requirements + +```{r child = "template-makes-dummies.Rmd"} +``` + +```{r child = "template-same-scale.Rmd"} +``` + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. + + + From 9e6c22da44cce6d31a3af3f89a6a946d2082fd2d Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 20 Dec 2021 11:40:33 -0500 Subject: [PATCH 17/65] add a file with engine/model information --- NAMESPACE | 1 + R/engine_docs.R | 57 ++++++++++++++++++- inst/models.tsv | 104 ++++++++++++++++++++++++++++++++++ man/update_model_info_file.Rd | 28 +++++++++ 4 files changed, 189 insertions(+), 1 deletion(-) create mode 100644 inst/models.tsv create mode 100644 man/update_model_info_file.Rd diff --git a/NAMESPACE b/NAMESPACE index 24953c522..f7daf12ca 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -282,6 +282,7 @@ export(translate.default) export(update_dot_check) export(update_engine_parameters) export(update_main_parameters) +export(update_model_info_file) export(varying) export(varying_args) export(xgb_train) diff --git a/R/engine_docs.R b/R/engine_docs.R index 622ab93de..d602480bf 100644 --- a/R/engine_docs.R +++ b/R/engine_docs.R @@ -36,6 +36,49 @@ extensions <- function(x) { # ------------------------------------------------------------------------------ +#' Save information about models +#' @description +#' This function writes a tab delimited file to the package to capture +#' information about the known models. This information includes packages in +#' the tidymodels GitHub repository as well as packages that are know to work +#' well with tidymodels packages (e.g. \pkg{tune}, etc.). There are likely +#' other model definitions in other extension packages that are not included +#' here that do not follow the +#' [model implementation guidelines](https://tidymodels.github.io/model-implementation-principles) +#' or do not work with packages other than \pkg{parsnip}. +#' +#' These data are used to document engines for each model function man page. +#' @keywords internal +#' @param path A character string for the location of the tab delimited file. +#' @details +#' It is highly recommended that the know parsnip extension packages are loaded. +#' The unexported \pkg{parsnip} function `extensions()` will list these. +#' @export +update_model_info_file <- function(path = "inst/models.tsv") { + mods <- get_from_env("models") + info <- + purrr::map_dfr(mods, ~ get_from_env(.x) %>% dplyr::mutate(model = .x)) %>% + dplyr::arrange(model, mode, engine) %>% + dplyr::select(model, mode, engine) + exts <- + purrr::map_dfr( + mods, + ~ get_from_env(paste0(.x, "_pkgs")) %>% dplyr::mutate(model = .x) + ) %>% + tidyr::unnest(cols = "pkg") %>% + dplyr::inner_join(tibble::tibble(pkg = extensions()), by = "pkg") %>% + dplyr::distinct(engine, model) %>% + dplyr::mutate(extension = TRUE) + info <- + dplyr::left_join(info, exts, by = c("model", "engine")) %>% + dplyr::mutate(extension = ifelse(is.na(extension), FALSE, extension)) + + csv <- utils::write.table(info, file = path, row.names = FALSE, sep = "\t") + invisible(info) +} + +# ------------------------------------------------------------------------------ + #' Tools for dynamically documenting packages #' @@ -142,10 +185,22 @@ make_engine_list <- function(mod) { "below. These contain further details:\n\n") } + exts <- + read.delim(system.file("models.tsv", package = "parsnip")) %>% + dplyr::filter(model == mod) %>% + dplyr::group_by(engine) %>% + dplyr::summarize(extension = any(extension)) %>% + dplyr::mutate( + extension = ifelse(extension, " (may require a parsnip extension package)", "") + ) + eng <- dplyr::left_join(eng, exts, by = "engine") + + eng <- eng %>% + dplyr::arrange(.order) %>% dplyr::mutate( - item = glue::glue(" \\item \\code{\\link[|topic|]{|engine|}|default|}", + item = glue::glue(" \\item \\code{\\link[|topic|]{|engine|}|default||extension|}", .open = "|", .close = "|") ) %>% dplyr::distinct(item) diff --git a/inst/models.tsv b/inst/models.tsv new file mode 100644 index 000000000..b614c4cef --- /dev/null +++ b/inst/models.tsv @@ -0,0 +1,104 @@ +"model" "mode" "engine" "extension" +"bag_mars" "classification" "earth" TRUE +"bag_mars" "regression" "earth" TRUE +"bag_tree" "censored regression" "rpart" TRUE +"bag_tree" "classification" "C5.0" TRUE +"bag_tree" "classification" "rpart" TRUE +"bag_tree" "regression" "rpart" TRUE +"bart" "classification" "dbarts" FALSE +"bart" "regression" "dbarts" FALSE +"boost_tree" "censored regression" "mboost" TRUE +"boost_tree" "classification" "C5.0" FALSE +"boost_tree" "classification" "spark" FALSE +"boost_tree" "classification" "xgboost" FALSE +"boost_tree" "regression" "spark" FALSE +"boost_tree" "regression" "xgboost" FALSE +"C5_rules" "classification" "C5.0" TRUE +"cubist_rules" "regression" "Cubist" TRUE +"decision_tree" "censored regression" "party" TRUE +"decision_tree" "censored regression" "rpart" TRUE +"decision_tree" "classification" "C5.0" FALSE +"decision_tree" "classification" "rpart" TRUE +"decision_tree" "classification" "spark" FALSE +"decision_tree" "regression" "rpart" TRUE +"decision_tree" "regression" "spark" FALSE +"discrim_flexible" "classification" "earth" TRUE +"discrim_linear" "classification" "MASS" TRUE +"discrim_linear" "classification" "mda" TRUE +"discrim_linear" "classification" "sda" TRUE +"discrim_linear" "classification" "sparsediscrim" TRUE +"discrim_quad" "classification" "MASS" TRUE +"discrim_quad" "classification" "sparsediscrim" TRUE +"discrim_regularized" "classification" "klaR" TRUE +"gen_additive_mod" "classification" "mgcv" FALSE +"gen_additive_mod" "regression" "mgcv" FALSE +"linear_reg" "regression" "gee" TRUE +"linear_reg" "regression" "glmnet" FALSE +"linear_reg" "regression" "gls" TRUE +"linear_reg" "regression" "keras" FALSE +"linear_reg" "regression" "lm" FALSE +"linear_reg" "regression" "lme" TRUE +"linear_reg" "regression" "lmer" TRUE +"linear_reg" "regression" "spark" FALSE +"linear_reg" "regression" "stan" FALSE +"linear_reg" "regression" "stan_glmer" TRUE +"logistic_reg" "classification" "gee" TRUE +"logistic_reg" "classification" "glm" FALSE +"logistic_reg" "classification" "glmer" TRUE +"logistic_reg" "classification" "glmnet" FALSE +"logistic_reg" "classification" "keras" FALSE +"logistic_reg" "classification" "LiblineaR" FALSE +"logistic_reg" "classification" "spark" FALSE +"logistic_reg" "classification" "stan" FALSE +"logistic_reg" "classification" "stan_glmer" TRUE +"mars" "classification" "earth" FALSE +"mars" "regression" "earth" FALSE +"mlp" "classification" "keras" FALSE +"mlp" "classification" "nnet" FALSE +"mlp" "regression" "keras" FALSE +"mlp" "regression" "nnet" FALSE +"multinom_reg" "classification" "glmnet" FALSE +"multinom_reg" "classification" "keras" FALSE +"multinom_reg" "classification" "nnet" FALSE +"multinom_reg" "classification" "spark" FALSE +"naive_Bayes" "classification" "klaR" TRUE +"naive_Bayes" "classification" "naivebayes" TRUE +"nearest_neighbor" "classification" "kknn" FALSE +"nearest_neighbor" "regression" "kknn" FALSE +"null_model" "classification" "parsnip" FALSE +"null_model" "regression" "parsnip" FALSE +"pls" "classification" "mixOmics" TRUE +"pls" "regression" "mixOmics" TRUE +"poisson_reg" "regression" "gee" TRUE +"poisson_reg" "regression" "glm" TRUE +"poisson_reg" "regression" "glmer" TRUE +"poisson_reg" "regression" "glmnet" TRUE +"poisson_reg" "regression" "hurdle" TRUE +"poisson_reg" "regression" "stan" TRUE +"poisson_reg" "regression" "stan_glmer" TRUE +"poisson_reg" "regression" "zeroinfl" TRUE +"proportional_hazards" "censored regression" "glmnet" TRUE +"proportional_hazards" "censored regression" "survival" TRUE +"rand_forest" "censored regression" "party" TRUE +"rand_forest" "classification" "randomForest" FALSE +"rand_forest" "classification" "ranger" FALSE +"rand_forest" "classification" "spark" FALSE +"rand_forest" "regression" "randomForest" FALSE +"rand_forest" "regression" "ranger" FALSE +"rand_forest" "regression" "spark" FALSE +"rule_fit" "classification" "xrf" TRUE +"rule_fit" "regression" "xrf" TRUE +"surv_reg" "regression" "flexsurv" FALSE +"surv_reg" "regression" "survival" FALSE +"survival_reg" "censored regression" "flexsurv" TRUE +"survival_reg" "censored regression" "survival" TRUE +"svm_linear" "classification" "kernlab" FALSE +"svm_linear" "classification" "LiblineaR" FALSE +"svm_linear" "regression" "kernlab" FALSE +"svm_linear" "regression" "LiblineaR" FALSE +"svm_poly" "classification" "kernlab" FALSE +"svm_poly" "regression" "kernlab" FALSE +"svm_rbf" "classification" "kernlab" FALSE +"svm_rbf" "classification" "liquidSVM" FALSE +"svm_rbf" "regression" "kernlab" FALSE +"svm_rbf" "regression" "liquidSVM" FALSE diff --git a/man/update_model_info_file.Rd b/man/update_model_info_file.Rd new file mode 100644 index 000000000..c91d9953e --- /dev/null +++ b/man/update_model_info_file.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/engine_docs.R +\name{update_model_info_file} +\alias{update_model_info_file} +\title{Save information about models} +\usage{ +update_model_info_file(path = "inst/models.tsv") +} +\arguments{ +\item{path}{A character string for the location of the tab delimited file.} +} +\description{ +This function writes a tab delimited file to the package to capture +information about the known models. This information includes packages in +the tidymodels GitHub repository as well as packages that are know to work +well with tidymodels packages (e.g. \pkg{tune}, etc.). There are likely +other model definitions in other extension packages that are not included +here that do not follow the +\href{https://tidymodels.github.io/model-implementation-principles}{model implementation guidelines} +or do not work with packages other than \pkg{parsnip}. + +These data are used to document engines for each model function man page. +} +\details{ +It is highly recommended that the know parsnip extension packages are loaded. +The unexported \pkg{parsnip} function \code{extensions()} will list these. +} +\keyword{internal} From 547aca1833fc4bd89288dfba419bf11df6a8321b Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 20 Dec 2021 12:12:26 -0500 Subject: [PATCH 18/65] keep extension package name --- R/engine_docs.R | 17 ++-- inst/models.tsv | 211 +++++++++++++++++++++++----------------------- man/rmd/setup.Rmd | 12 +++ 3 files changed, 126 insertions(+), 114 deletions(-) diff --git a/R/engine_docs.R b/R/engine_docs.R index d602480bf..5dbdaa269 100644 --- a/R/engine_docs.R +++ b/R/engine_docs.R @@ -29,7 +29,7 @@ knit_engine_docs <- function(pattern = NULL) { # ------------------------------------------------------------------------------ -extensions <- function(x) { +extensions <- function() { c("baguette", "censored", "discrim", "multilevelmod", "plsmod", "poissonreg", "rules") } @@ -66,12 +66,9 @@ update_model_info_file <- function(path = "inst/models.tsv") { ~ get_from_env(paste0(.x, "_pkgs")) %>% dplyr::mutate(model = .x) ) %>% tidyr::unnest(cols = "pkg") %>% - dplyr::inner_join(tibble::tibble(pkg = extensions()), by = "pkg") %>% - dplyr::distinct(engine, model) %>% - dplyr::mutate(extension = TRUE) - info <- - dplyr::left_join(info, exts, by = c("model", "engine")) %>% - dplyr::mutate(extension = ifelse(is.na(extension), FALSE, extension)) + dplyr::inner_join(tibble::tibble(pkg = extensions()), by = "pkg") + + info <- dplyr::left_join(info, exts, by = c("model", "engine")) csv <- utils::write.table(info, file = path, row.names = FALSE, sep = "\t") invisible(info) @@ -189,9 +186,9 @@ make_engine_list <- function(mod) { read.delim(system.file("models.tsv", package = "parsnip")) %>% dplyr::filter(model == mod) %>% dplyr::group_by(engine) %>% - dplyr::summarize(extension = any(extension)) %>% + dplyr::summarize(extensions = sum(!is.na(pkg))) %>% dplyr::mutate( - extension = ifelse(extension, " (may require a parsnip extension package)", "") + has_ext = ifelse(extensions > 0, " (may require a parsnip extension package)", "") ) eng <- dplyr::left_join(eng, exts, by = "engine") @@ -200,7 +197,7 @@ make_engine_list <- function(mod) { eng %>% dplyr::arrange(.order) %>% dplyr::mutate( - item = glue::glue(" \\item \\code{\\link[|topic|]{|engine|}|default||extension|}", + item = glue::glue(" \\item \\code{\\link[|topic|]{|engine|}|default||has_ext|}", .open = "|", .close = "|") ) %>% dplyr::distinct(item) diff --git a/inst/models.tsv b/inst/models.tsv index b614c4cef..0db9469f4 100644 --- a/inst/models.tsv +++ b/inst/models.tsv @@ -1,104 +1,107 @@ -"model" "mode" "engine" "extension" -"bag_mars" "classification" "earth" TRUE -"bag_mars" "regression" "earth" TRUE -"bag_tree" "censored regression" "rpart" TRUE -"bag_tree" "classification" "C5.0" TRUE -"bag_tree" "classification" "rpart" TRUE -"bag_tree" "regression" "rpart" TRUE -"bart" "classification" "dbarts" FALSE -"bart" "regression" "dbarts" FALSE -"boost_tree" "censored regression" "mboost" TRUE -"boost_tree" "classification" "C5.0" FALSE -"boost_tree" "classification" "spark" FALSE -"boost_tree" "classification" "xgboost" FALSE -"boost_tree" "regression" "spark" FALSE -"boost_tree" "regression" "xgboost" FALSE -"C5_rules" "classification" "C5.0" TRUE -"cubist_rules" "regression" "Cubist" TRUE -"decision_tree" "censored regression" "party" TRUE -"decision_tree" "censored regression" "rpart" TRUE -"decision_tree" "classification" "C5.0" FALSE -"decision_tree" "classification" "rpart" TRUE -"decision_tree" "classification" "spark" FALSE -"decision_tree" "regression" "rpart" TRUE -"decision_tree" "regression" "spark" FALSE -"discrim_flexible" "classification" "earth" TRUE -"discrim_linear" "classification" "MASS" TRUE -"discrim_linear" "classification" "mda" TRUE -"discrim_linear" "classification" "sda" TRUE -"discrim_linear" "classification" "sparsediscrim" TRUE -"discrim_quad" "classification" "MASS" TRUE -"discrim_quad" "classification" "sparsediscrim" TRUE -"discrim_regularized" "classification" "klaR" TRUE -"gen_additive_mod" "classification" "mgcv" FALSE -"gen_additive_mod" "regression" "mgcv" FALSE -"linear_reg" "regression" "gee" TRUE -"linear_reg" "regression" "glmnet" FALSE -"linear_reg" "regression" "gls" TRUE -"linear_reg" "regression" "keras" FALSE -"linear_reg" "regression" "lm" FALSE -"linear_reg" "regression" "lme" TRUE -"linear_reg" "regression" "lmer" TRUE -"linear_reg" "regression" "spark" FALSE -"linear_reg" "regression" "stan" FALSE -"linear_reg" "regression" "stan_glmer" TRUE -"logistic_reg" "classification" "gee" TRUE -"logistic_reg" "classification" "glm" FALSE -"logistic_reg" "classification" "glmer" TRUE -"logistic_reg" "classification" "glmnet" FALSE -"logistic_reg" "classification" "keras" FALSE -"logistic_reg" "classification" "LiblineaR" FALSE -"logistic_reg" "classification" "spark" FALSE -"logistic_reg" "classification" "stan" FALSE -"logistic_reg" "classification" "stan_glmer" TRUE -"mars" "classification" "earth" FALSE -"mars" "regression" "earth" FALSE -"mlp" "classification" "keras" FALSE -"mlp" "classification" "nnet" FALSE -"mlp" "regression" "keras" FALSE -"mlp" "regression" "nnet" FALSE -"multinom_reg" "classification" "glmnet" FALSE -"multinom_reg" "classification" "keras" FALSE -"multinom_reg" "classification" "nnet" FALSE -"multinom_reg" "classification" "spark" FALSE -"naive_Bayes" "classification" "klaR" TRUE -"naive_Bayes" "classification" "naivebayes" TRUE -"nearest_neighbor" "classification" "kknn" FALSE -"nearest_neighbor" "regression" "kknn" FALSE -"null_model" "classification" "parsnip" FALSE -"null_model" "regression" "parsnip" FALSE -"pls" "classification" "mixOmics" TRUE -"pls" "regression" "mixOmics" TRUE -"poisson_reg" "regression" "gee" TRUE -"poisson_reg" "regression" "glm" TRUE -"poisson_reg" "regression" "glmer" TRUE -"poisson_reg" "regression" "glmnet" TRUE -"poisson_reg" "regression" "hurdle" TRUE -"poisson_reg" "regression" "stan" TRUE -"poisson_reg" "regression" "stan_glmer" TRUE -"poisson_reg" "regression" "zeroinfl" TRUE -"proportional_hazards" "censored regression" "glmnet" TRUE -"proportional_hazards" "censored regression" "survival" TRUE -"rand_forest" "censored regression" "party" TRUE -"rand_forest" "classification" "randomForest" FALSE -"rand_forest" "classification" "ranger" FALSE -"rand_forest" "classification" "spark" FALSE -"rand_forest" "regression" "randomForest" FALSE -"rand_forest" "regression" "ranger" FALSE -"rand_forest" "regression" "spark" FALSE -"rule_fit" "classification" "xrf" TRUE -"rule_fit" "regression" "xrf" TRUE -"surv_reg" "regression" "flexsurv" FALSE -"surv_reg" "regression" "survival" FALSE -"survival_reg" "censored regression" "flexsurv" TRUE -"survival_reg" "censored regression" "survival" TRUE -"svm_linear" "classification" "kernlab" FALSE -"svm_linear" "classification" "LiblineaR" FALSE -"svm_linear" "regression" "kernlab" FALSE -"svm_linear" "regression" "LiblineaR" FALSE -"svm_poly" "classification" "kernlab" FALSE -"svm_poly" "regression" "kernlab" FALSE -"svm_rbf" "classification" "kernlab" FALSE -"svm_rbf" "classification" "liquidSVM" FALSE -"svm_rbf" "regression" "kernlab" FALSE -"svm_rbf" "regression" "liquidSVM" FALSE +"model" "mode" "engine" "pkg" +"bag_mars" "classification" "earth" "baguette" +"bag_mars" "regression" "earth" "baguette" +"bag_tree" "censored regression" "rpart" "censored" +"bag_tree" "censored regression" "rpart" "baguette" +"bag_tree" "classification" "C5.0" "baguette" +"bag_tree" "classification" "rpart" "censored" +"bag_tree" "classification" "rpart" "baguette" +"bag_tree" "regression" "rpart" "censored" +"bag_tree" "regression" "rpart" "baguette" +"bart" "classification" "dbarts" NA +"bart" "regression" "dbarts" NA +"boost_tree" "censored regression" "mboost" "censored" +"boost_tree" "classification" "C5.0" NA +"boost_tree" "classification" "spark" NA +"boost_tree" "classification" "xgboost" NA +"boost_tree" "regression" "spark" NA +"boost_tree" "regression" "xgboost" NA +"C5_rules" "classification" "C5.0" "rules" +"cubist_rules" "regression" "Cubist" "rules" +"decision_tree" "censored regression" "party" "censored" +"decision_tree" "censored regression" "rpart" "censored" +"decision_tree" "classification" "C5.0" NA +"decision_tree" "classification" "rpart" "censored" +"decision_tree" "classification" "spark" NA +"decision_tree" "regression" "rpart" "censored" +"decision_tree" "regression" "spark" NA +"discrim_flexible" "classification" "earth" "discrim" +"discrim_linear" "classification" "MASS" "discrim" +"discrim_linear" "classification" "mda" "discrim" +"discrim_linear" "classification" "sda" "discrim" +"discrim_linear" "classification" "sparsediscrim" "discrim" +"discrim_quad" "classification" "MASS" "discrim" +"discrim_quad" "classification" "sparsediscrim" "discrim" +"discrim_regularized" "classification" "klaR" "discrim" +"gen_additive_mod" "classification" "mgcv" NA +"gen_additive_mod" "regression" "mgcv" NA +"linear_reg" "regression" "gee" "multilevelmod" +"linear_reg" "regression" "glmnet" NA +"linear_reg" "regression" "gls" "multilevelmod" +"linear_reg" "regression" "keras" NA +"linear_reg" "regression" "lm" NA +"linear_reg" "regression" "lme" "multilevelmod" +"linear_reg" "regression" "lmer" "multilevelmod" +"linear_reg" "regression" "spark" NA +"linear_reg" "regression" "stan" NA +"linear_reg" "regression" "stan_glmer" "multilevelmod" +"logistic_reg" "classification" "gee" "multilevelmod" +"logistic_reg" "classification" "glm" NA +"logistic_reg" "classification" "glmer" "multilevelmod" +"logistic_reg" "classification" "glmnet" NA +"logistic_reg" "classification" "keras" NA +"logistic_reg" "classification" "LiblineaR" NA +"logistic_reg" "classification" "spark" NA +"logistic_reg" "classification" "stan" NA +"logistic_reg" "classification" "stan_glmer" "multilevelmod" +"mars" "classification" "earth" NA +"mars" "regression" "earth" NA +"mlp" "classification" "keras" NA +"mlp" "classification" "nnet" NA +"mlp" "regression" "keras" NA +"mlp" "regression" "nnet" NA +"multinom_reg" "classification" "glmnet" NA +"multinom_reg" "classification" "keras" NA +"multinom_reg" "classification" "nnet" NA +"multinom_reg" "classification" "spark" NA +"naive_Bayes" "classification" "klaR" "discrim" +"naive_Bayes" "classification" "naivebayes" "discrim" +"nearest_neighbor" "classification" "kknn" NA +"nearest_neighbor" "regression" "kknn" NA +"null_model" "classification" "parsnip" NA +"null_model" "regression" "parsnip" NA +"pls" "classification" "mixOmics" "plsmod" +"pls" "regression" "mixOmics" "plsmod" +"poisson_reg" "regression" "gee" "multilevelmod" +"poisson_reg" "regression" "glm" "poissonreg" +"poisson_reg" "regression" "glmer" "multilevelmod" +"poisson_reg" "regression" "glmnet" "poissonreg" +"poisson_reg" "regression" "hurdle" "poissonreg" +"poisson_reg" "regression" "stan" "poissonreg" +"poisson_reg" "regression" "stan_glmer" "multilevelmod" +"poisson_reg" "regression" "zeroinfl" "poissonreg" +"proportional_hazards" "censored regression" "glmnet" "censored" +"proportional_hazards" "censored regression" "survival" "censored" +"rand_forest" "censored regression" "party" "censored" +"rand_forest" "classification" "randomForest" NA +"rand_forest" "classification" "ranger" NA +"rand_forest" "classification" "spark" NA +"rand_forest" "regression" "randomForest" NA +"rand_forest" "regression" "ranger" NA +"rand_forest" "regression" "spark" NA +"rule_fit" "classification" "xrf" "rules" +"rule_fit" "regression" "xrf" "rules" +"surv_reg" "regression" "flexsurv" NA +"surv_reg" "regression" "survival" NA +"survival_reg" "censored regression" "flexsurv" "censored" +"survival_reg" "censored regression" "survival" "censored" +"svm_linear" "classification" "kernlab" NA +"svm_linear" "classification" "LiblineaR" NA +"svm_linear" "regression" "kernlab" NA +"svm_linear" "regression" "LiblineaR" NA +"svm_poly" "classification" "kernlab" NA +"svm_poly" "regression" "kernlab" NA +"svm_rbf" "classification" "kernlab" NA +"svm_rbf" "classification" "liquidSVM" NA +"svm_rbf" "regression" "kernlab" NA +"svm_rbf" "regression" "liquidSVM" NA diff --git a/man/rmd/setup.Rmd b/man/rmd/setup.Rmd index 986c6926c..1910098db 100644 --- a/man/rmd/setup.Rmd +++ b/man/rmd/setup.Rmd @@ -15,6 +15,18 @@ check_pkg_for_docs(parsnip:::extensions()) # ------------------------------------------------------------------------------ # Code to get information about main arguments and format the results to print +make_mode_list <- function(mod, eng) { + modes <- c("regression", "classification", "censored regression") + exts <- + read.delim(system.file("models.tsv", package = "parsnip")) %>% + dplyr::filter(model == mod & engine == eng) %>% + dplyr::mutate(mode = factor(mode, levels = modes)) %>% + dplyr::arrange(mode) + + # Need to get mode-specific dependencies + exts +} + make_parameter_list <- function(x, defaults) { x %>% tune::tunable() %>% From 24197004c1e832fbeee2af4ad5e20bdbb31d27e1 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 3 Jan 2022 11:37:19 -0500 Subject: [PATCH 19/65] use superscripted numbers for default engine and extensions --- R/engine_docs.R | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/R/engine_docs.R b/R/engine_docs.R index 5dbdaa269..4beeb2ba4 100644 --- a/R/engine_docs.R +++ b/R/engine_docs.R @@ -149,7 +149,7 @@ find_engine_files <- function(mod) { # Determine and label default engine default <- get_default_engine(mod) - eng$default <- ifelse(eng$engine == default, " (default)", "") + eng$default <- ifelse(eng$engine == default, cli::symbol$sup_1, "") # reorder based on default and name non_defaults <- dplyr::filter(eng, !grepl("default", default)) @@ -158,7 +158,7 @@ find_engine_files <- function(mod) { dplyr::arrange(tolower(engine)) %>% dplyr::mutate(.order = dplyr::row_number() + 1) eng <- - dplyr::filter(eng, grepl("default", default)) %>% + dplyr::filter(eng, default != "") %>% dplyr::mutate(.order = 1) %>% dplyr::bind_rows(non_defaults) @@ -176,7 +176,7 @@ make_engine_list <- function(mod) { modes <- get_from_env(paste0(mod, "_modes")) modes <- modes[modes != "unknown"] modes <- glue::glue_collapse(modes, sep = ", ", last = " and ") - modes <- glue::glue("\\code{|mod|()} can fit |modes| models.", + modes <- glue::glue("\\code{|mod|()} can fit |modes| models.", .open = "|", .close = "|") main <- glue::glue("The engine-specific pages for this model are listed ", "below. These contain further details:\n\n") @@ -188,7 +188,7 @@ make_engine_list <- function(mod) { dplyr::group_by(engine) %>% dplyr::summarize(extensions = sum(!is.na(pkg))) %>% dplyr::mutate( - has_ext = ifelse(extensions > 0, " (may require a parsnip extension package)", "") + has_ext = ifelse(extensions > 0, cli::symbol$sup_2, "") ) eng <- dplyr::left_join(eng, exts, by = "engine") @@ -202,8 +202,14 @@ make_engine_list <- function(mod) { ) %>% dplyr::distinct(item) + notes <- paste0("\n", cli::symbol$sup_1, " The default engine.") + if (any(exts$has_ext != "")) { + notes <- paste0(notes, " ", cli::symbol$sup_2, " May require a parsnip extension package.") + } + + items <- glue::glue_collapse(eng$item, sep = "\n") - res <- glue::glue("|main|\n\\itemize{\n|items|\n}\n\n |modes|}", + res <- glue::glue("|main|\n\\itemize{\n|items|\n}\n\n |notes|\n\n|modes|", .open = "|", .close = "|") res } From 6cc5e3d111a5891441578925d48081eacf21054e Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 3 Jan 2022 12:43:05 -0500 Subject: [PATCH 20/65] Refactor the engine and mode information in the model files --- R/bag_mars.R | 5 +---- R/bag_tree.R | 6 ++---- R/bart.R | 6 ++---- R/boost_tree.R | 6 ++---- R/c5_rules.R | 6 ++---- R/cubist_rules.R | 6 ++---- R/decision_tree.R | 6 ++---- R/discrim_flexible.R | 3 --- R/discrim_flexible_earth.R | 3 ++- R/discrim_linear.R | 6 ++---- R/discrim_quad.R | 6 ++---- R/discrim_regularized.R | 5 +---- R/engine_docs.R | 22 +++++++++++--------- R/gen_additive_mod.R | 6 ++---- R/linear_reg.R | 7 +------ R/logistic_reg.R | 5 +---- R/mars.R | 6 ++---- R/mlp.R | 6 ++---- R/multinom_reg.R | 6 ++---- R/naive_Bayes.R | 6 ++---- R/nearest_neighbor.R | 3 --- R/nullmodel.R | 2 +- R/pls.R | 4 +--- R/poisson_reg.R | 5 +---- R/proportional_hazards.R | 7 +++---- R/rand_forest.R | 6 ++---- R/rule_fit.R | 6 ++---- R/surv_reg.R | 4 +--- R/survival_reg.R | 6 ++---- R/svm_linear.R | 13 ++++-------- R/svm_poly.R | 14 +++++-------- R/svm_rbf.R | 14 +++++-------- man/C5_rules.Rd | 6 ++---- man/bag_mars.Rd | 5 +---- man/bag_tree.Rd | 6 ++---- man/bart.Rd | 6 ++---- man/boost_tree.Rd | 6 ++---- man/cubist_rules.Rd | 6 ++---- man/decision_tree.Rd | 6 ++---- man/details_discrim_flexible_earth.Rd | 3 ++- man/details_discrim_linear_sda.Rd | 3 ++- man/details_linear_reg_gls.Rd | 29 ++------------------------- man/details_mlp_brulee.Rd | 20 ++++++++++++++++++ man/details_poisson_reg_hurdle.Rd | 1 - man/details_poisson_reg_zeroinfl.Rd | 1 - man/discrim_flexible.Rd | 3 --- man/discrim_linear.Rd | 6 ++---- man/discrim_quad.Rd | 6 ++---- man/discrim_regularized.Rd | 5 +---- man/gen_additive_mod.Rd | 6 ++---- man/linear_reg.Rd | 5 +---- man/logistic_reg.Rd | 5 +---- man/mars.Rd | 6 ++---- man/mlp.Rd | 6 ++---- man/multinom_reg.Rd | 6 ++---- man/naive_Bayes.Rd | 6 ++---- man/nearest_neighbor.Rd | 3 --- man/null_model.Rd | 2 +- man/pls.Rd | 4 +--- man/poisson_reg.Rd | 4 +--- man/proportional_hazards.Rd | 7 +++---- man/rand_forest.Rd | 6 ++---- man/rule_fit.Rd | 6 ++---- man/surv_reg.Rd | 3 --- man/survival_reg.Rd | 6 ++---- man/svm_linear.Rd | 13 ++++-------- man/svm_poly.Rd | 14 +++++-------- man/svm_rbf.Rd | 14 +++++-------- 68 files changed, 154 insertions(+), 298 deletions(-) create mode 100644 man/details_mlp_brulee.Rd diff --git a/R/bag_mars.R b/R/bag_mars.R index 369160872..970dac731 100644 --- a/R/bag_mars.R +++ b/R/bag_mars.R @@ -5,10 +5,7 @@ #' `bag_mars()` defines an ensemble of generalized linear models that use #' artificial features for some predictors. These features resemble hinge #' functions and the result is a model that is a segmented regression in small -#' dimensions. -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' dimensions. The function can fit classification and regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("bag_mars")} #' diff --git a/R/bag_tree.R b/R/bag_tree.R index 9a6a045f5..ed4c1d3a3 100644 --- a/R/bag_tree.R +++ b/R/bag_tree.R @@ -2,10 +2,8 @@ #' #' @description #' -#' `bag_tree()` defines an ensemble of decision trees. -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' `bag_tree()` defines an ensemble of decision trees. The function can fit +#' classification, regression, and censored regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("bag_tree")} #' diff --git a/R/bart.R b/R/bart.R index 06377be09..5e51bc12c 100644 --- a/R/bart.R +++ b/R/bart.R @@ -3,10 +3,8 @@ #' @description #' #' `bart()` defines a tree ensemble model that uses Bayesian analysis to -#' assemble the ensemble. -#' -#' There are different ways to fit this model. See the engine-specific pages -#' for more details: +#' assemble the ensemble. The function can fit classification and regression +#' models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("bart")} #' diff --git a/R/boost_tree.R b/R/boost_tree.R index b6e0fa4e4..bb8f3d3e1 100644 --- a/R/boost_tree.R +++ b/R/boost_tree.R @@ -6,10 +6,8 @@ #' #' `boost_tree()` defines a model that creates a series of decision trees #' forming an ensemble. Each tree depends on the results of previous trees. -#' All trees in the ensemble are combined to produce a final prediction. -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' All trees in the ensemble are combined to produce a final prediction. The +#' function can fit classification, regression, and censored regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("boost_tree")} #' diff --git a/R/c5_rules.R b/R/c5_rules.R index c388a357c..b32ac08df 100644 --- a/R/c5_rules.R +++ b/R/c5_rules.R @@ -5,10 +5,8 @@ #' #' @description #' `C5_rules()` defines a model that derives feature rules from a tree for -#' prediction. A single tree or boosted ensemble can be used. -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' prediction. A single tree or boosted ensemble can be used. The function can +#' fit classification models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("C5_rules")} #' diff --git a/R/cubist_rules.R b/R/cubist_rules.R index 1df73bbfc..88452dc98 100644 --- a/R/cubist_rules.R +++ b/R/cubist_rules.R @@ -2,10 +2,8 @@ #' #' @description #' `cubist_rules()` defines a model that derives simple feature rules from a tree -#' ensemble and creates regression models within each rule. -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' ensemble and creates regression models within each rule. The function can fit +#' regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("cubist_rules")} #' diff --git a/R/decision_tree.R b/R/decision_tree.R index 8d4c480e1..ed44cc277 100644 --- a/R/decision_tree.R +++ b/R/decision_tree.R @@ -4,10 +4,8 @@ #' #' @description #' `decision_tree()` defines a model as a set of `if/then` statements that -#' creates a tree-based structure. -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' creates a tree-based structure. The function can fit classification, +#' regression, and censored regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("decision_tree")} #' diff --git a/R/discrim_flexible.R b/R/discrim_flexible.R index 89f865366..668f7d1cb 100644 --- a/R/discrim_flexible.R +++ b/R/discrim_flexible.R @@ -6,9 +6,6 @@ #' that can use nonlinear features created using multivariate adaptive #' regression splines (MARS). #' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. -#' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("discrim_flexible")} #' #' More information on how \pkg{parsnip} is used for modeling is at diff --git a/R/discrim_flexible_earth.R b/R/discrim_flexible_earth.R index 90b7c2258..e271527ca 100644 --- a/R/discrim_flexible_earth.R +++ b/R/discrim_flexible_earth.R @@ -2,7 +2,8 @@ #' #' [mda::fda()] (in conjunction with [earth::earth()] can fit a nonlinear #' discriminant analysis model that uses nonlinear features created using -#' multivariate adaptive regression splines (MARS). +#' multivariate adaptive regression splines (MARS). The function can fit +#' classification models. #' #' @includeRmd man/rmd/discrim_flexible_earth.md details #' diff --git a/R/discrim_linear.R b/R/discrim_linear.R index 3fdbfd29e..281104964 100644 --- a/R/discrim_linear.R +++ b/R/discrim_linear.R @@ -5,10 +5,8 @@ #' `discrim_linear()` defines a model that estimates a multivariate #' distribution for the predictors separately for the data in each class #' (usually Gaussian with a common covariance matrix). Bayes' theorem is used -#' to compute the probability of each class, given the predictor values. -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' to compute the probability of each class, given the predictor values. The +#' function can fit classification models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("discrim_linear")} #' diff --git a/R/discrim_quad.R b/R/discrim_quad.R index ceb0c374b..ba999a4a4 100644 --- a/R/discrim_quad.R +++ b/R/discrim_quad.R @@ -5,10 +5,8 @@ #' `discrim_quad()` defines a model that estimates a multivariate #' distribution for the predictors separately for the data in each class #' (usually Gaussian with separate covariance matrices). Bayes' theorem is used -#' to compute the probability of each class, given the predictor values. -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' to compute the probability of each class, given the predictor values. The +#' function can fit classification models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("discrim_quad")} #' diff --git a/R/discrim_regularized.R b/R/discrim_regularized.R index eca51739f..397b02372 100644 --- a/R/discrim_regularized.R +++ b/R/discrim_regularized.R @@ -6,10 +6,7 @@ #' distribution for the predictors separately for the data in each class. The #' structure of the model can be LDA, QDA, or some amalgam of the two. Bayes' #' theorem is used to compute the probability of each class, given the -#' predictor values. -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' predictor values. The function can fit classification models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("discrim_regularized")} #' diff --git a/R/engine_docs.R b/R/engine_docs.R index 4beeb2ba4..ab9517a10 100644 --- a/R/engine_docs.R +++ b/R/engine_docs.R @@ -171,15 +171,19 @@ make_engine_list <- function(mod) { eng <- find_engine_files(mod) if (length(eng) == 0) { - return("No engines were found for this model.\n\n") + return( + paste( + "There are different ways to fit this model. The method of estimation is ", + "chosen by setting the model \\emph{engine}. No engines were found for ", + "this model.\n\n" + ) + ) } else { - modes <- get_from_env(paste0(mod, "_modes")) - modes <- modes[modes != "unknown"] - modes <- glue::glue_collapse(modes, sep = ", ", last = " and ") - modes <- glue::glue("\\code{|mod|()} can fit |modes| models.", - .open = "|", .close = "|") - main <- glue::glue("The engine-specific pages for this model are listed ", - "below. These contain further details:\n\n") + main <- paste( + "There are different ways to fit this model. The method of estimation is ", + "chosen by setting the model \\emph{engine}. The engine-specific pages ", + "for this model are listed below.\n\n" + ) } exts <- @@ -209,7 +213,7 @@ make_engine_list <- function(mod) { items <- glue::glue_collapse(eng$item, sep = "\n") - res <- glue::glue("|main|\n\\itemize{\n|items|\n}\n\n |notes|\n\n|modes|", + res <- glue::glue("|main|\n\\itemize{\n|items|\n}\n\n |notes|", .open = "|", .close = "|") res } diff --git a/R/gen_additive_mod.R b/R/gen_additive_mod.R index 34caf0ad0..06003d04e 100644 --- a/R/gen_additive_mod.R +++ b/R/gen_additive_mod.R @@ -2,10 +2,8 @@ #' #' @description #' `gen_additive_mod()` defines a model that can use smoothed functions of -#' numeric predictors in a generalized linear model. -#' -#' There are different ways to fit this model. See the engine-specific pages -#' for more details +#' numeric predictors in a generalized linear model. The function can fit +#' classification and regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("gen_additive_mod")} #' diff --git a/R/linear_reg.R b/R/linear_reg.R index ef6d64fa4..453544c10 100644 --- a/R/linear_reg.R +++ b/R/linear_reg.R @@ -3,15 +3,10 @@ #' @description #' #' `linear_reg()` defines a model that can predict numeric values from -#' predictors using a linear function. -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' predictors using a linear function. The function can fit regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("linear_reg")} #' -#' \Sexpr[stage=render,results=rd]{parsnip:::pkg_extension_note("linear_reg")} -#' #' More information on how \pkg{parsnip} is used for modeling is at #' \url{https://www.tidymodels.org/}. #' diff --git a/R/logistic_reg.R b/R/logistic_reg.R index 3a6c24550..e16cba54f 100644 --- a/R/logistic_reg.R +++ b/R/logistic_reg.R @@ -3,10 +3,7 @@ #' @description #' [logistic_reg()] defines a generalized linear model for binary outcomes. A #' linear combination of the predictors is used to model the log odds of an -#' event. -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' event. The function can fit classification models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("logistic_reg")} #' diff --git a/R/mars.R b/R/mars.R index e64968a18..70bb909f4 100644 --- a/R/mars.R +++ b/R/mars.R @@ -4,10 +4,8 @@ #' #' `mars()` defines a generalized linear model that uses artificial features for #' some predictors. These features resemble hinge functions and the result is -#' a model that is a segmented regression in small dimensions. -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' a model that is a segmented regression in small dimensions. The function can +#' fit classification and regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("mars")} #' diff --git a/R/mlp.R b/R/mlp.R index 2a72a9015..13b6f5dec 100644 --- a/R/mlp.R +++ b/R/mlp.R @@ -2,10 +2,8 @@ #' #' @description #' `mlp()` defines a multilayer perceptron model (a.k.a. a single layer, -#' feed-forward neural network). -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' feed-forward neural network). The function can fit classification and +#' regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("mlp")} #' diff --git a/R/multinom_reg.R b/R/multinom_reg.R index 5e60c8042..65c644f15 100644 --- a/R/multinom_reg.R +++ b/R/multinom_reg.R @@ -3,10 +3,8 @@ #' @description #' #' `multinom_reg()` defines a model that uses linear predictors to predict -#' multiclass data using the multinomial distribution. -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' multiclass data using the multinomial distribution. The function can fit +#' classification models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("multinom_reg")} #' diff --git a/R/naive_Bayes.R b/R/naive_Bayes.R index 441785aa9..347eb1a07 100644 --- a/R/naive_Bayes.R +++ b/R/naive_Bayes.R @@ -3,10 +3,8 @@ #' @description #' #' `naive_Bayes()` defines a model that uses Bayes' theorem to compute the -#' probability of each class, given the predictor values. -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' probability of each class, given the predictor values. The function can fit +#' classification models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("naive_Bayes")} #' diff --git a/R/nearest_neighbor.R b/R/nearest_neighbor.R index ee5f4624d..0e3ff2491 100644 --- a/R/nearest_neighbor.R +++ b/R/nearest_neighbor.R @@ -5,9 +5,6 @@ #' `nearest_neighbor()` defines a model that uses the `K` most similar data #' points from the training set to predict new samples. #' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. -#' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("nearest_neighbor")} #' #' More information on how \pkg{parsnip} is used for modeling is at diff --git a/R/nullmodel.R b/R/nullmodel.R index 6ef7c6228..6b7c88beb 100644 --- a/R/nullmodel.R +++ b/R/nullmodel.R @@ -128,7 +128,7 @@ predict.nullmodel <- function (object, new_data = NULL, type = NULL, ...) { #' Null model #' #' `null_model()` defines a simple, non-informative model. It doesn't have any -#' main arguments. +#' main arguments. The function can fit classification and regression models. #' #' @inheritParams boost_tree #' @details The model can be created using the `fit()` function using the diff --git a/R/pls.R b/R/pls.R index be17fbbae..e1bf1d089 100644 --- a/R/pls.R +++ b/R/pls.R @@ -3,9 +3,7 @@ #' @description #' `pls()` defines a partial least squares model that uses latent variables to #' model the data. It is similar to a supervised version of principal component. -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' The function can fit classification and regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("pls")} #' diff --git a/R/poisson_reg.R b/R/poisson_reg.R index c554ac191..616b11b2b 100644 --- a/R/poisson_reg.R +++ b/R/poisson_reg.R @@ -3,10 +3,7 @@ #' @description #' #' `poisson_reg()` defines a generalized linear model for count data that follow -#' a Poisson distribution. - -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' a Poisson distribution. The function can fit regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("poisson_reg")} #' diff --git a/R/proportional_hazards.R b/R/proportional_hazards.R index 25da23753..5d8e2b7a6 100644 --- a/R/proportional_hazards.R +++ b/R/proportional_hazards.R @@ -1,10 +1,9 @@ #' Proportional hazards regression #' #' @description -#' `proportional_hazards()` defines a proportional hazards model. -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' `proportional_hazards()` defines a technique that models the hazard function +#' as a multiplicative function of covariates times a baseline hazard. The +#' function can fit censored regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("proportional_hazards")} #' diff --git a/R/rand_forest.R b/R/rand_forest.R index 97bc5279d..fcbf80ed0 100644 --- a/R/rand_forest.R +++ b/R/rand_forest.R @@ -4,10 +4,8 @@ #' #' `rand_forest()` defines a model that creates a large number of decision #' trees, each independent of the others. The final prediction uses all -#' predictions from the individual trees and combines them. -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' predictions from the individual trees and combines them. The function can fit +#' classification, regression, and censored regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("rand_forest")} #' diff --git a/R/rule_fit.R b/R/rule_fit.R index 9f39afa7a..a90218e30 100644 --- a/R/rule_fit.R +++ b/R/rule_fit.R @@ -2,10 +2,8 @@ #' #' @description #' `rule_fit()` defines a model that derives simple feature rules from a tree -#' ensemble and uses them as features in a regularized model. -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' ensemble and uses them as features in a regularized model. The function can +#' fit classification and regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("rule_fit")} #' diff --git a/R/surv_reg.R b/R/surv_reg.R index 9d94bbe2c..c65d19392 100644 --- a/R/surv_reg.R +++ b/R/surv_reg.R @@ -7,9 +7,7 @@ #' `"censored regression"` mode. #' #' `surv_reg()` defines a parametric survival model. -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. + #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("surv_reg")} #' diff --git a/R/survival_reg.R b/R/survival_reg.R index e8584956b..35772fa8c 100644 --- a/R/survival_reg.R +++ b/R/survival_reg.R @@ -1,10 +1,8 @@ #' Parametric survival regression #' #' @description -#' `survival_reg()` defines a parametric survival model. -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' `survival_reg()` defines a parametric survival model. The function can fit +#' censored regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("survival_reg")} #' diff --git a/R/svm_linear.R b/R/svm_linear.R index 15939526b..6e3efea9f 100644 --- a/R/svm_linear.R +++ b/R/svm_linear.R @@ -3,15 +3,10 @@ #' @description #' #' `svm_linear()` defines a support vector machine model. For classification, -#' the model tries to maximize the width of the margin between classes. -#' For regression, the model optimizes a robust loss function that is only -#' affected by very large model residuals. -#' -#' This SVM model uses a linear function to create the decision boundary or -#' regression line. -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' the model tries to maximize the width of the margin between classes (using a +#' linear class boundary). For regression, the model optimizes a robust loss +#' function that is only affected by very large model residuals and uses a +#' linear fit. The function can fit classification and regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("svm_linear")} #' diff --git a/R/svm_poly.R b/R/svm_poly.R index 4b7fe3735..2ba82ce36 100644 --- a/R/svm_poly.R +++ b/R/svm_poly.R @@ -3,15 +3,11 @@ #' @description #' #' `svm_poly()` defines a support vector machine model. For classification, -#' the model tries to maximize the width of the margin between classes. -#' For regression, the model optimizes a robust loss function that is only -#' affected by very large model residuals. -#' -#' This SVM model uses a nonlinear function, specifically a polynomial function, -#' to create the decision boundary or regression line. -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' the model tries to maximize the width of the margin between classes (using a +#' polynomial class boundary). For regression, the model optimizes a robust loss +#' function that is only affected by very large model residuals (via polynomial +#' functions of the predictors). The function can fit classification and +#' regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("svm_poly")} #' diff --git a/R/svm_rbf.R b/R/svm_rbf.R index 6a8fd9ad6..53ba52433 100644 --- a/R/svm_rbf.R +++ b/R/svm_rbf.R @@ -3,15 +3,11 @@ #' @description #' #' `svm_rbf()` defines a support vector machine model. For classification, -#' the model tries to maximize the width of the margin between classes. -#' For regression, the model optimizes a robust loss function that is only -#' affected by very large model residuals. -#' -#' This SVM model uses a nonlinear function, specifically the radial basis function, -#' to create the decision boundary or regression line. -#' -#' There are different ways to fit this model. The method of estimation is -#' chosen by setting the model _engine_. +#' the model tries to maximize the width of the margin between classes using a +#' nonlinear class boundary. For regression, the model optimizes a robust loss +#' function that is only affected by very large model residuals and uses +#' nonlinear functions of the predictors. The function can fit classification +#' and regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("svm_rbf")} #' diff --git a/man/C5_rules.Rd b/man/C5_rules.Rd index 196c7528b..b653f059b 100644 --- a/man/C5_rules.Rd +++ b/man/C5_rules.Rd @@ -21,10 +21,8 @@ to use for fitting.} } \description{ \code{C5_rules()} defines a model that derives feature rules from a tree for -prediction. A single tree or boosted ensemble can be used. - -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +prediction. A single tree or boosted ensemble can be used. The function can +fit classification models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("C5_rules")} diff --git a/man/bag_mars.Rd b/man/bag_mars.Rd index 404ac43fc..41bd83cc2 100644 --- a/man/bag_mars.Rd +++ b/man/bag_mars.Rd @@ -31,10 +31,7 @@ to use for fitting.} \code{bag_mars()} defines an ensemble of generalized linear models that use artificial features for some predictors. These features resemble hinge functions and the result is a model that is a segmented regression in small -dimensions. - -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +dimensions. The function can fit classification and regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("bag_mars")} diff --git a/man/bag_tree.Rd b/man/bag_tree.Rd index fc239f375..97607148a 100644 --- a/man/bag_tree.Rd +++ b/man/bag_tree.Rd @@ -35,10 +35,8 @@ and one can be used to bias to the second level of the factor.} to use for fitting.} } \description{ -\code{bag_tree()} defines an ensemble of decision trees. - -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +\code{bag_tree()} defines an ensemble of decision trees. The function can fit +classification, regression, and censored regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("bag_tree")} diff --git a/man/bart.Rd b/man/bart.Rd index a15d45cc4..9d2f76f83 100644 --- a/man/bart.Rd +++ b/man/bart.Rd @@ -43,10 +43,8 @@ on the logit scale). The default value is 2.} } \description{ \code{bart()} defines a tree ensemble model that uses Bayesian analysis to -assemble the ensemble. - -There are different ways to fit this model. See the engine-specific pages -for more details: +assemble the ensemble. The function can fit classification and regression +models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("bart")} diff --git a/man/boost_tree.Rd b/man/boost_tree.Rd index 6780e361d..91ed9cf4e 100644 --- a/man/boost_tree.Rd +++ b/man/boost_tree.Rd @@ -54,10 +54,8 @@ stopping (specific engines only).} \description{ \code{boost_tree()} defines a model that creates a series of decision trees forming an ensemble. Each tree depends on the results of previous trees. -All trees in the ensemble are combined to produce a final prediction. - -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +All trees in the ensemble are combined to produce a final prediction. The +function can fit classification, regression, and censored regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("boost_tree")} diff --git a/man/cubist_rules.Rd b/man/cubist_rules.Rd index 2bb2b34eb..2d99fa74a 100644 --- a/man/cubist_rules.Rd +++ b/man/cubist_rules.Rd @@ -29,10 +29,8 @@ to use for fitting.} } \description{ \code{cubist_rules()} defines a model that derives simple feature rules from a tree -ensemble and creates regression models within each rule. - -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +ensemble and creates regression models within each rule. The function can fit +regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("cubist_rules")} diff --git a/man/decision_tree.Rd b/man/decision_tree.Rd index 82caffb71..5a9c2ab28 100644 --- a/man/decision_tree.Rd +++ b/man/decision_tree.Rd @@ -30,10 +30,8 @@ in a node that are required for the node to be split further.} } \description{ \code{decision_tree()} defines a model as a set of \verb{if/then} statements that -creates a tree-based structure. - -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +creates a tree-based structure. The function can fit classification, +regression, and censored regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("decision_tree")} diff --git a/man/details_discrim_flexible_earth.Rd b/man/details_discrim_flexible_earth.Rd index ac79c993a..f36a2e36c 100644 --- a/man/details_discrim_flexible_earth.Rd +++ b/man/details_discrim_flexible_earth.Rd @@ -6,7 +6,8 @@ \description{ \code{\link[mda:fda]{mda::fda()}} (in conjunction with \code{\link[earth:earth]{earth::earth()}} can fit a nonlinear discriminant analysis model that uses nonlinear features created using -multivariate adaptive regression splines (MARS). +multivariate adaptive regression splines (MARS). The function can fit +classification models. } \details{ For this engine, there is a single mode: classification diff --git a/man/details_discrim_linear_sda.Rd b/man/details_discrim_linear_sda.Rd index d38275e79..a7411c573 100644 --- a/man/details_discrim_linear_sda.Rd +++ b/man/details_discrim_linear_sda.Rd @@ -11,7 +11,8 @@ between classical discriminant analysis and diagonal discriminant analysis. For this engine, there is a single mode: classification \subsection{Tuning Parameters}{ -This engine has no tuning parameters. +This engine has no tuning parameter arguments in +\code{\link[=discrim_linear]{discrim_linear()}}. However, there are a few engine-specific parameters that can be set or optimized when calling \code{\link[=set_engine]{set_engine()}}: diff --git a/man/details_linear_reg_gls.Rd b/man/details_linear_reg_gls.Rd index eda828299..f072cc789 100644 --- a/man/details_linear_reg_gls.Rd +++ b/man/details_linear_reg_gls.Rd @@ -42,35 +42,10 @@ The model can accept case weights. With \code{parsnip}, we suggest using the \emph{fixed effects} formula method when fitting but the details of the correlation structure should be passed to \code{set_engine()} since it is an irregular (but required) argument:\if{html}{\out{
}}\preformatted{library(tidymodels) -}\if{html}{\out{
}}\preformatted{## ── Attaching packages ────────────────────────────────────── tidymodels 0.1.4 ── - -## ✓ broom 0.7.9 ✓ tibble 3.1.6 -## ✓ ggplot2 3.3.5 ✓ tidyr 1.1.4 -## ✓ infer 1.0.0 ✓ workflows 0.2.4.9000 -## ✓ purrr 0.3.4 ✓ workflowsets 0.1.0 -## ✓ recipes 0.1.17.9000 ✓ yardstick 0.0.8 -## ✓ rsample 0.1.0 - -## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ── -## x purrr::discard() masks scales::discard() -## x tidyr::expand() masks Matrix::expand() -## x dplyr::filter() masks stats::filter() -## x dplyr::lag() masks stats::lag() -## x dials::max_rules() masks rules::max_rules() -## x tidyr::pack() masks Matrix::pack() -## x recipes::step() masks stats::step() -## x tidyr::unpack() masks Matrix::unpack() -## x recipes::update() masks Matrix::update(), stats::update() -## • Learn how to get started at https://www.tidymodels.org/start/ -}\if{html}{\out{
}}\preformatted{# load nlme to be able to use the `cor*()` functions +# load nlme to be able to use the `cor*()` functions library(nlme) -}\if{html}{\out{
}}\preformatted{## -## Attaching package: 'nlme' -## The following object is masked from 'package:dplyr': -## -## collapse -}\if{html}{\out{
}}\preformatted{data("riesby") +data("riesby") linear_reg() \%>\% set_engine("gls", correlation = corCompSymm(form = ~ 1 | subject)) \%>\% diff --git a/man/details_mlp_brulee.Rd b/man/details_mlp_brulee.Rd new file mode 100644 index 000000000..f685b74e7 --- /dev/null +++ b/man/details_mlp_brulee.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mlp_brulee.R +\name{details_mlp_brulee} +\alias{details_mlp_brulee} +\title{Multilayer perceptron via brulee} +\description{ +\code{\link[brulee:brulee_mlp]{brulee::brulee_mlp()}} fits a neural networks. +} +\details{ +For this engine, there are multiple modes: classification and regression +\subsection{Tuning Parameters}{\preformatted{## Error: Tibble columns must have compatible sizes. +## * Size 6: Existing data. +## * Size 8: Column `default`. +## ℹ Only values of size one are recycled. + +## Error in is.data.frame(y): object 'defaults' not found +} +} +} +\keyword{internal} diff --git a/man/details_poisson_reg_hurdle.Rd b/man/details_poisson_reg_hurdle.Rd index 985c1ef75..404cfa033 100644 --- a/man/details_poisson_reg_hurdle.Rd +++ b/man/details_poisson_reg_hurdle.Rd @@ -51,7 +51,6 @@ poisson_reg() \%>\% fit(art ~ fem + mar | ment, data = bioChemists) }\if{html}{\out{
}}\preformatted{## parsnip model object ## -## Fit time: 16ms ## ## Call: ## pscl::hurdle(formula = art ~ fem + mar | ment, data = data) diff --git a/man/details_poisson_reg_zeroinfl.Rd b/man/details_poisson_reg_zeroinfl.Rd index 0027d43ce..9646f812d 100644 --- a/man/details_poisson_reg_zeroinfl.Rd +++ b/man/details_poisson_reg_zeroinfl.Rd @@ -52,7 +52,6 @@ poisson_reg() \%>\% fit(art ~ fem + mar | ment, data = bioChemists) }\if{html}{\out{}}\preformatted{## parsnip model object ## -## Fit time: 20ms ## ## Call: ## pscl::zeroinfl(formula = art ~ fem + mar | ment, data = data) diff --git a/man/discrim_flexible.Rd b/man/discrim_flexible.Rd index 7da1984c6..b99df1595 100644 --- a/man/discrim_flexible.Rd +++ b/man/discrim_flexible.Rd @@ -32,9 +32,6 @@ to use for fitting.} that can use nonlinear features created using multivariate adaptive regression splines (MARS). -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. - \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("discrim_flexible")} More information on how \pkg{parsnip} is used for modeling is at diff --git a/man/discrim_linear.Rd b/man/discrim_linear.Rd index b11ff6343..9724c0dec 100644 --- a/man/discrim_linear.Rd +++ b/man/discrim_linear.Rd @@ -29,10 +29,8 @@ to use for fitting.} \code{discrim_linear()} defines a model that estimates a multivariate distribution for the predictors separately for the data in each class (usually Gaussian with a common covariance matrix). Bayes' theorem is used -to compute the probability of each class, given the predictor values. - -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +to compute the probability of each class, given the predictor values. The +function can fit classification models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("discrim_linear")} diff --git a/man/discrim_quad.Rd b/man/discrim_quad.Rd index f3d2e8c85..e0a09e3a8 100644 --- a/man/discrim_quad.Rd +++ b/man/discrim_quad.Rd @@ -25,10 +25,8 @@ to use for fitting.} \code{discrim_quad()} defines a model that estimates a multivariate distribution for the predictors separately for the data in each class (usually Gaussian with separate covariance matrices). Bayes' theorem is used -to compute the probability of each class, given the predictor values. - -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +to compute the probability of each class, given the predictor values. The +function can fit classification models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("discrim_quad")} diff --git a/man/discrim_regularized.Rd b/man/discrim_regularized.Rd index 13097a997..f88c1ed75 100644 --- a/man/discrim_regularized.Rd +++ b/man/discrim_regularized.Rd @@ -26,10 +26,7 @@ to use for fitting.} distribution for the predictors separately for the data in each class. The structure of the model can be LDA, QDA, or some amalgam of the two. Bayes' theorem is used to compute the probability of each class, given the -predictor values. - -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +predictor values. The function can fit classification models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("discrim_regularized")} diff --git a/man/gen_additive_mod.Rd b/man/gen_additive_mod.Rd index 974d75cda..120b78d12 100644 --- a/man/gen_additive_mod.Rd +++ b/man/gen_additive_mod.Rd @@ -28,10 +28,8 @@ to use for fitting.} } \description{ \code{gen_additive_mod()} defines a model that can use smoothed functions of -numeric predictors in a generalized linear model. - -There are different ways to fit this model. See the engine-specific pages -for more details +numeric predictors in a generalized linear model. The function can fit +classification and regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("gen_additive_mod")} diff --git a/man/linear_reg.Rd b/man/linear_reg.Rd index 72ce094c6..20892bc5e 100644 --- a/man/linear_reg.Rd +++ b/man/linear_reg.Rd @@ -24,10 +24,7 @@ ridge regression is being used (specific engines only).} } \description{ \code{linear_reg()} defines a model that can predict numeric values from -predictors using a linear function. - -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +predictors using a linear function. The function can fit regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("linear_reg")} diff --git a/man/logistic_reg.Rd b/man/logistic_reg.Rd index 2505176b4..ffde94e7e 100644 --- a/man/logistic_reg.Rd +++ b/man/logistic_reg.Rd @@ -34,10 +34,7 @@ For \code{LiblineaR} models, \code{mixture} must be exactly 0 or 1 only.} \description{ \code{\link[=logistic_reg]{logistic_reg()}} defines a generalized linear model for binary outcomes. A linear combination of the predictors is used to model the log odds of an -event. - -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +event. The function can fit classification models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("logistic_reg")} diff --git a/man/mars.Rd b/man/mars.Rd index f5b4af675..529cf9e1d 100644 --- a/man/mars.Rd +++ b/man/mars.Rd @@ -30,10 +30,8 @@ final model, including the intercept.} \description{ \code{mars()} defines a generalized linear model that uses artificial features for some predictors. These features resemble hinge functions and the result is -a model that is a segmented regression in small dimensions. - -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +a model that is a segmented regression in small dimensions. The function can +fit classification and regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("mars")} diff --git a/man/mlp.Rd b/man/mlp.Rd index e13d1b708..a606b7542 100644 --- a/man/mlp.Rd +++ b/man/mlp.Rd @@ -44,10 +44,8 @@ from iteration-to-iteration (specific engines only).} } \description{ \code{mlp()} defines a multilayer perceptron model (a.k.a. a single layer, -feed-forward neural network). - -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +feed-forward neural network). The function can fit classification and +regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("mlp")} diff --git a/man/multinom_reg.Rd b/man/multinom_reg.Rd index 855367a36..e8d44e51a 100644 --- a/man/multinom_reg.Rd +++ b/man/multinom_reg.Rd @@ -32,10 +32,8 @@ ridge regression is being used. (specific engines only).} } \description{ \code{multinom_reg()} defines a model that uses linear predictors to predict -multiclass data using the multinomial distribution. - -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +multiclass data using the multinomial distribution. The function can fit +classification models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("multinom_reg")} diff --git a/man/naive_Bayes.Rd b/man/naive_Bayes.Rd index a0802d7f5..85a3b9169 100644 --- a/man/naive_Bayes.Rd +++ b/man/naive_Bayes.Rd @@ -29,10 +29,8 @@ to use for fitting.} } \description{ \code{naive_Bayes()} defines a model that uses Bayes' theorem to compute the -probability of each class, given the predictor values. - -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +probability of each class, given the predictor values. The function can fit +classification models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("naive_Bayes")} diff --git a/man/nearest_neighbor.Rd b/man/nearest_neighbor.Rd index e79f9ae02..370c0e779 100644 --- a/man/nearest_neighbor.Rd +++ b/man/nearest_neighbor.Rd @@ -36,9 +36,6 @@ calculating Minkowski distance.} \code{nearest_neighbor()} defines a model that uses the \code{K} most similar data points from the training set to predict new samples. -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. - \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("nearest_neighbor")} More information on how \pkg{parsnip} is used for modeling is at diff --git a/man/null_model.Rd b/man/null_model.Rd index 3c0ea95bc..55d943ca9 100644 --- a/man/null_model.Rd +++ b/man/null_model.Rd @@ -13,7 +13,7 @@ Possible values for this model are "unknown", "regression", or } \description{ \code{null_model()} defines a simple, non-informative model. It doesn't have any -main arguments. +main arguments. The function can fit classification and regression models. } \details{ The model can be created using the \code{fit()} function using the diff --git a/man/pls.Rd b/man/pls.Rd index 76a47666a..554573303 100644 --- a/man/pls.Rd +++ b/man/pls.Rd @@ -28,9 +28,7 @@ to use for fitting.} \description{ \code{pls()} defines a partial least squares model that uses latent variables to model the data. It is similar to a supervised version of principal component. - -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +The function can fit classification and regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("pls")} diff --git a/man/poisson_reg.Rd b/man/poisson_reg.Rd index 60d65a949..6ae9f8a72 100644 --- a/man/poisson_reg.Rd +++ b/man/poisson_reg.Rd @@ -28,9 +28,7 @@ to use for fitting.} } \description{ \code{poisson_reg()} defines a generalized linear model for count data that follow -a Poisson distribution. -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +a Poisson distribution. The function can fit regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("poisson_reg")} diff --git a/man/proportional_hazards.Rd b/man/proportional_hazards.Rd index bb5e17d9f..d7ddfdff2 100644 --- a/man/proportional_hazards.Rd +++ b/man/proportional_hazards.Rd @@ -27,10 +27,9 @@ proportion of L1 regularization (i.e. lasso) in the model. When ridge regression is being used (specific engines only).} } \description{ -\code{proportional_hazards()} defines a proportional hazards model. - -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +\code{proportional_hazards()} defines a technique that models the hazard function +as a multiplicative function of covariates times a baseline hazard. The +function can fit censored regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("proportional_hazards")} diff --git a/man/rand_forest.Rd b/man/rand_forest.Rd index 523410fb3..d798109fc 100644 --- a/man/rand_forest.Rd +++ b/man/rand_forest.Rd @@ -32,10 +32,8 @@ in a node that are required for the node to be split further.} \description{ \code{rand_forest()} defines a model that creates a large number of decision trees, each independent of the others. The final prediction uses all -predictions from the individual trees and combines them. - -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +predictions from the individual trees and combines them. The function can fit +classification, regression, and censored regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("rand_forest")} diff --git a/man/rule_fit.Rd b/man/rule_fit.Rd index 81d3db8e7..1c5160a5a 100644 --- a/man/rule_fit.Rd +++ b/man/rule_fit.Rd @@ -52,10 +52,8 @@ to use for fitting.} } \description{ \code{rule_fit()} defines a model that derives simple feature rules from a tree -ensemble and uses them as features in a regularized model. - -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +ensemble and uses them as features in a regularized model. The function can +fit classification and regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("rule_fit")} diff --git a/man/surv_reg.Rd b/man/surv_reg.Rd index 871dd7445..2b39dc077 100644 --- a/man/surv_reg.Rd +++ b/man/surv_reg.Rd @@ -24,9 +24,6 @@ This function is soft-deprecated in favor of \code{survival_reg()} which uses th \code{surv_reg()} defines a parametric survival model. -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. - \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("surv_reg")} More information on how \pkg{parsnip} is used for modeling is at diff --git a/man/survival_reg.Rd b/man/survival_reg.Rd index 234093c37..4e16afc9e 100644 --- a/man/survival_reg.Rd +++ b/man/survival_reg.Rd @@ -17,10 +17,8 @@ to use for fitting.} outcome. The default is "weibull".} } \description{ -\code{survival_reg()} defines a parametric survival model. - -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +\code{survival_reg()} defines a parametric survival model. The function can fit +censored regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("survival_reg")} diff --git a/man/svm_linear.Rd b/man/svm_linear.Rd index 1b2407186..9743c8166 100644 --- a/man/svm_linear.Rd +++ b/man/svm_linear.Rd @@ -22,15 +22,10 @@ loss function (regression only)} } \description{ \code{svm_linear()} defines a support vector machine model. For classification, -the model tries to maximize the width of the margin between classes. -For regression, the model optimizes a robust loss function that is only -affected by very large model residuals. - -This SVM model uses a linear function to create the decision boundary or -regression line. - -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +the model tries to maximize the width of the margin between classes (using a +linear class boundary). For regression, the model optimizes a robust loss +function that is only affected by very large model residuals and uses a +linear fit. The function can fit classification and regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("svm_linear")} diff --git a/man/svm_poly.Rd b/man/svm_poly.Rd index fe088124f..31d764bda 100644 --- a/man/svm_poly.Rd +++ b/man/svm_poly.Rd @@ -33,15 +33,11 @@ loss function (regression only)} } \description{ \code{svm_poly()} defines a support vector machine model. For classification, -the model tries to maximize the width of the margin between classes. -For regression, the model optimizes a robust loss function that is only -affected by very large model residuals. - -This SVM model uses a nonlinear function, specifically a polynomial function, -to create the decision boundary or regression line. - -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +the model tries to maximize the width of the margin between classes (using a +polynomial class boundary). For regression, the model optimizes a robust loss +function that is only affected by very large model residuals (via polynomial +functions of the predictors). The function can fit classification and +regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("svm_poly")} diff --git a/man/svm_rbf.Rd b/man/svm_rbf.Rd index 6e31bbc57..e77e33827 100644 --- a/man/svm_rbf.Rd +++ b/man/svm_rbf.Rd @@ -31,15 +31,11 @@ loss function (regression only)} } \description{ \code{svm_rbf()} defines a support vector machine model. For classification, -the model tries to maximize the width of the margin between classes. -For regression, the model optimizes a robust loss function that is only -affected by very large model residuals. - -This SVM model uses a nonlinear function, specifically the radial basis function, -to create the decision boundary or regression line. - -There are different ways to fit this model. The method of estimation is -chosen by setting the model \emph{engine}. +the model tries to maximize the width of the margin between classes using a +nonlinear class boundary. For regression, the model optimizes a robust loss +function that is only affected by very large model residuals and uses +nonlinear functions of the predictors. The function can fit classification +and regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("svm_rbf")} From 2777cf0b3304ac4758b92acc19e0d9870e1f739d Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 3 Jan 2022 12:55:52 -0500 Subject: [PATCH 21/65] remove files no longer used --- man/rmd/poission-reg-engine.Rmd | 50 --------------------------------- man/rmd/setup.Rmd | 4 +++ 2 files changed, 4 insertions(+), 50 deletions(-) delete mode 100644 man/rmd/poission-reg-engine.Rmd diff --git a/man/rmd/poission-reg-engine.Rmd b/man/rmd/poission-reg-engine.Rmd deleted file mode 100644 index 75a6f81c0..000000000 --- a/man/rmd/poission-reg-engine.Rmd +++ /dev/null @@ -1,50 +0,0 @@ -# Engine Details - -```{r startup, include = FALSE} -library(poissonreg) -``` - -Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are: - -\pkg{glm} - -```{r glm} -poisson_reg() %>% - set_engine("glm") %>% - translate() -``` - -\pkg{zeroinfl} - -```{r zeroinfl} -poisson_reg() %>% - set_engine("zeroinfl") %>% - translate() -``` - -\pkg{hurdle} - -```{r hurdle} -poisson_reg() %>% - set_engine("hurdle") %>% - translate() -``` - - -\pkg{glmnet} - -```{r glmnet} -poisson_reg() %>% - set_engine("glmnet") %>% - translate() -``` - - -\pkg{stan} - -```{r stan} -poisson_reg() %>% - set_engine("stan") %>% - translate() -``` - diff --git a/man/rmd/setup.Rmd b/man/rmd/setup.Rmd index 1910098db..8a20f645e 100644 --- a/man/rmd/setup.Rmd +++ b/man/rmd/setup.Rmd @@ -1,4 +1,6 @@ ```{r, include = FALSE} +library(parsnip) + # ------------------------------------------------------------------------------ # These are required to build md docs for parsnip and extensions @@ -12,6 +14,7 @@ rmd_pkgs <- c("tune", "glue", "dplyr", "parsnip", "dials", "glmnet", "Cubist", " check_pkg_for_docs(rmd_pkgs) check_pkg_for_docs(parsnip:::extensions()) +stop("setup") # ------------------------------------------------------------------------------ # Code to get information about main arguments and format the results to print @@ -111,4 +114,5 @@ descr_models <- function(mod, eng) { } options(width = 80) + ``` From b527cea16af2cc38726c75cf362cb3e3d2569476 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 3 Jan 2022 13:01:00 -0500 Subject: [PATCH 22/65] temp remove check for extenstion pkgs --- man/rmd/setup.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/rmd/setup.Rmd b/man/rmd/setup.Rmd index 8a20f645e..b7069db8c 100644 --- a/man/rmd/setup.Rmd +++ b/man/rmd/setup.Rmd @@ -12,9 +12,9 @@ check_pkg_for_docs <- function(x){ rmd_pkgs <- c("tune", "glue", "dplyr", "parsnip", "dials", "glmnet", "Cubist", "xrf") check_pkg_for_docs(rmd_pkgs) -check_pkg_for_docs(parsnip:::extensions()) +# check_pkg_for_docs(parsnip:::extensions()) + -stop("setup") # ------------------------------------------------------------------------------ # Code to get information about main arguments and format the results to print From 756ab01bd46e175bf1c08810fafbd968e253d8d4 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 3 Jan 2022 13:04:06 -0500 Subject: [PATCH 23/65] move setup.Rmd to aaa.Rmd --- man/rmd/C5_rules_C5.0.Rmd | 2 +- man/rmd/{setup.Rmd => aaa.Rmd} | 0 man/rmd/bag_mars_earth.Rmd | 2 +- man/rmd/bag_tree_C5.0.Rmd | 2 +- man/rmd/bag_tree_rpart.Rmd | 2 +- man/rmd/bart_dbarts.Rmd | 2 +- man/rmd/boost_tree_C5.0.Rmd | 2 +- man/rmd/boost_tree_mboost.Rmd | 2 +- man/rmd/boost_tree_spark.Rmd | 2 +- man/rmd/boost_tree_xgboost.Rmd | 2 +- man/rmd/cubist_rules_Cubist.Rmd | 2 +- man/rmd/decision_tree_C5.0.Rmd | 2 +- man/rmd/decision_tree_party.Rmd | 2 +- man/rmd/decision_tree_rpart.Rmd | 2 +- man/rmd/decision_tree_spark.Rmd | 2 +- man/rmd/discrim_flexible_earth.Rmd | 2 +- man/rmd/discrim_linear_MASS.Rmd | 2 +- man/rmd/discrim_linear_mda.Rmd | 2 +- man/rmd/discrim_linear_sda.Rmd | 2 +- man/rmd/discrim_linear_sparsediscrim.Rmd | 2 +- man/rmd/discrim_quad_MASS.Rmd | 2 +- man/rmd/discrim_quad_sparsediscrim.Rmd | 2 +- man/rmd/discrim_regularized_klaR.Rmd | 2 +- man/rmd/gen_additive_mod_mgcv.Rmd | 2 +- man/rmd/glmnet-details.Rmd | 2 +- man/rmd/linear_reg_gee.Rmd | 2 +- man/rmd/linear_reg_glm.Rmd | 2 +- man/rmd/linear_reg_glmnet.Rmd | 2 +- man/rmd/linear_reg_gls.Rmd | 2 +- man/rmd/linear_reg_keras.Rmd | 2 +- man/rmd/linear_reg_lm.Rmd | 2 +- man/rmd/linear_reg_lme.Rmd | 2 +- man/rmd/linear_reg_lmer.Rmd | 2 +- man/rmd/linear_reg_spark.Rmd | 2 +- man/rmd/linear_reg_stan.Rmd | 2 +- man/rmd/linear_reg_stan_glmer.Rmd | 2 +- man/rmd/logistic-reg.Rmd | 2 +- man/rmd/logistic_reg_LiblineaR.Rmd | 2 +- man/rmd/logistic_reg_gee.Rmd | 2 +- man/rmd/logistic_reg_glm.Rmd | 2 +- man/rmd/logistic_reg_glmer.Rmd | 2 +- man/rmd/logistic_reg_glmnet.Rmd | 2 +- man/rmd/logistic_reg_keras.Rmd | 2 +- man/rmd/logistic_reg_spark.Rmd | 2 +- man/rmd/logistic_reg_stan.Rmd | 2 +- man/rmd/logistic_reg_stan_glmer.Rmd | 2 +- man/rmd/mars_earth.Rmd | 2 +- man/rmd/mlp_brulee.Rmd | 2 +- man/rmd/mlp_keras.Rmd | 2 +- man/rmd/mlp_nnet.Rmd | 2 +- man/rmd/multinom_reg_glmnet.Rmd | 2 +- man/rmd/multinom_reg_keras.Rmd | 2 +- man/rmd/multinom_reg_nnet.Rmd | 2 +- man/rmd/multinom_reg_spark.Rmd | 2 +- man/rmd/naive_Bayes_klaR.Rmd | 2 +- man/rmd/naive_Bayes_naivebayes.Rmd | 2 +- man/rmd/nearest-neighbor.Rmd | 2 +- man/rmd/nearest_neighbor_kknn.Rmd | 2 +- man/rmd/pls_mixOmics.Rmd | 2 +- man/rmd/poisson_reg_gee.Rmd | 2 +- man/rmd/poisson_reg_glm.Rmd | 2 +- man/rmd/poisson_reg_glmer.Rmd | 2 +- man/rmd/poisson_reg_glmnet.Rmd | 2 +- man/rmd/poisson_reg_hurdle.Rmd | 2 +- man/rmd/poisson_reg_stan.Rmd | 2 +- man/rmd/poisson_reg_stan_glmer.Rmd | 2 +- man/rmd/poisson_reg_zeroinfl.Rmd | 2 +- man/rmd/proportional_hazards_glmnet.Rmd | 2 +- man/rmd/proportional_hazards_survival.Rmd | 2 +- man/rmd/rand_forest_party.Rmd | 2 +- man/rmd/rand_forest_randomForest.Rmd | 2 +- man/rmd/rand_forest_ranger.Rmd | 2 +- man/rmd/rand_forest_spark.Rmd | 2 +- man/rmd/rule_fit_xrf.Rmd | 2 +- man/rmd/surv_reg_flexsurv.Rmd | 2 +- man/rmd/surv_reg_survival.Rmd | 2 +- man/rmd/survival_reg_flexsurv.Rmd | 2 +- man/rmd/survival_reg_survival.Rmd | 2 +- man/rmd/svm_linear_LiblineaR.Rmd | 2 +- man/rmd/svm_linear_kernlab.Rmd | 2 +- man/rmd/svm_poly_kernlab.Rmd | 2 +- man/rmd/svm_rbf_kernlab.Rmd | 2 +- 82 files changed, 81 insertions(+), 81 deletions(-) rename man/rmd/{setup.Rmd => aaa.Rmd} (100%) diff --git a/man/rmd/C5_rules_C5.0.Rmd b/man/rmd/C5_rules_C5.0.Rmd index 743c35b62..9dd46029e 100644 --- a/man/rmd/C5_rules_C5.0.Rmd +++ b/man/rmd/C5_rules_C5.0.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("C5_rules", "C5.0")` diff --git a/man/rmd/setup.Rmd b/man/rmd/aaa.Rmd similarity index 100% rename from man/rmd/setup.Rmd rename to man/rmd/aaa.Rmd diff --git a/man/rmd/bag_mars_earth.Rmd b/man/rmd/bag_mars_earth.Rmd index 4cfae5d17..08e4bedf1 100644 --- a/man/rmd/bag_mars_earth.Rmd +++ b/man/rmd/bag_mars_earth.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("bag_mars", "earth")` diff --git a/man/rmd/bag_tree_C5.0.Rmd b/man/rmd/bag_tree_C5.0.Rmd index fea6a118e..47cf492ce 100644 --- a/man/rmd/bag_tree_C5.0.Rmd +++ b/man/rmd/bag_tree_C5.0.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("bag_tree", "C5.0")` diff --git a/man/rmd/bag_tree_rpart.Rmd b/man/rmd/bag_tree_rpart.Rmd index 06148a6ed..e53671282 100644 --- a/man/rmd/bag_tree_rpart.Rmd +++ b/man/rmd/bag_tree_rpart.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("bag_tree", "rpart")` diff --git a/man/rmd/bart_dbarts.Rmd b/man/rmd/bart_dbarts.Rmd index 43e4d9b82..3589b5675 100644 --- a/man/rmd/bart_dbarts.Rmd +++ b/man/rmd/bart_dbarts.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("bart", "dbarts")` diff --git a/man/rmd/boost_tree_C5.0.Rmd b/man/rmd/boost_tree_C5.0.Rmd index 8a5de29e8..021aae2f0 100644 --- a/man/rmd/boost_tree_C5.0.Rmd +++ b/man/rmd/boost_tree_C5.0.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("boost_tree", "C5.0")` diff --git a/man/rmd/boost_tree_mboost.Rmd b/man/rmd/boost_tree_mboost.Rmd index 191423088..bcf3d66af 100644 --- a/man/rmd/boost_tree_mboost.Rmd +++ b/man/rmd/boost_tree_mboost.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("boost_tree", "mboost")` diff --git a/man/rmd/boost_tree_spark.Rmd b/man/rmd/boost_tree_spark.Rmd index 52e97e138..98158a34a 100644 --- a/man/rmd/boost_tree_spark.Rmd +++ b/man/rmd/boost_tree_spark.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("boost_tree", "spark")`. However, multiclass classification is not supported yet. diff --git a/man/rmd/boost_tree_xgboost.Rmd b/man/rmd/boost_tree_xgboost.Rmd index 788ee363d..a51e1c0cd 100644 --- a/man/rmd/boost_tree_xgboost.Rmd +++ b/man/rmd/boost_tree_xgboost.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("boost_tree", "xgboost")` diff --git a/man/rmd/cubist_rules_Cubist.Rmd b/man/rmd/cubist_rules_Cubist.Rmd index 4aa6ccc4a..e1477b8e1 100644 --- a/man/rmd/cubist_rules_Cubist.Rmd +++ b/man/rmd/cubist_rules_Cubist.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("cubist_rules", "Cubist")` diff --git a/man/rmd/decision_tree_C5.0.Rmd b/man/rmd/decision_tree_C5.0.Rmd index 533c80e5c..d7e9ef460 100644 --- a/man/rmd/decision_tree_C5.0.Rmd +++ b/man/rmd/decision_tree_C5.0.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("decision_tree", "C5.0")` diff --git a/man/rmd/decision_tree_party.Rmd b/man/rmd/decision_tree_party.Rmd index 1a08459d8..e3f53d9b1 100644 --- a/man/rmd/decision_tree_party.Rmd +++ b/man/rmd/decision_tree_party.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("decision_tree", "party")` diff --git a/man/rmd/decision_tree_rpart.Rmd b/man/rmd/decision_tree_rpart.Rmd index dba34e7f3..1dc40f084 100644 --- a/man/rmd/decision_tree_rpart.Rmd +++ b/man/rmd/decision_tree_rpart.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("decision_tree", "rpart")` diff --git a/man/rmd/decision_tree_spark.Rmd b/man/rmd/decision_tree_spark.Rmd index 0c56e3e3a..6ad5d4140 100644 --- a/man/rmd/decision_tree_spark.Rmd +++ b/man/rmd/decision_tree_spark.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("decision_tree", "spark")` diff --git a/man/rmd/discrim_flexible_earth.Rmd b/man/rmd/discrim_flexible_earth.Rmd index d801ac80e..df6b89a42 100644 --- a/man/rmd/discrim_flexible_earth.Rmd +++ b/man/rmd/discrim_flexible_earth.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("discrim_flexible", "earth")` diff --git a/man/rmd/discrim_linear_MASS.Rmd b/man/rmd/discrim_linear_MASS.Rmd index 04b417107..136bc2aeb 100644 --- a/man/rmd/discrim_linear_MASS.Rmd +++ b/man/rmd/discrim_linear_MASS.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("discrim_linear", "MASS")` diff --git a/man/rmd/discrim_linear_mda.Rmd b/man/rmd/discrim_linear_mda.Rmd index 24bfe3f6e..52641788b 100644 --- a/man/rmd/discrim_linear_mda.Rmd +++ b/man/rmd/discrim_linear_mda.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("discrim_linear", "mda")` diff --git a/man/rmd/discrim_linear_sda.Rmd b/man/rmd/discrim_linear_sda.Rmd index 28d69396d..1efe67c7b 100644 --- a/man/rmd/discrim_linear_sda.Rmd +++ b/man/rmd/discrim_linear_sda.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("discrim_linear", "sda")` diff --git a/man/rmd/discrim_linear_sparsediscrim.Rmd b/man/rmd/discrim_linear_sparsediscrim.Rmd index d8a673bdc..c5e32997e 100644 --- a/man/rmd/discrim_linear_sparsediscrim.Rmd +++ b/man/rmd/discrim_linear_sparsediscrim.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("discrim_linear", "sparsediscrim")` diff --git a/man/rmd/discrim_quad_MASS.Rmd b/man/rmd/discrim_quad_MASS.Rmd index 801b4ae58..b097ade1a 100644 --- a/man/rmd/discrim_quad_MASS.Rmd +++ b/man/rmd/discrim_quad_MASS.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("discrim_quad", "MASS")` diff --git a/man/rmd/discrim_quad_sparsediscrim.Rmd b/man/rmd/discrim_quad_sparsediscrim.Rmd index 971ae0790..afd32266d 100644 --- a/man/rmd/discrim_quad_sparsediscrim.Rmd +++ b/man/rmd/discrim_quad_sparsediscrim.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("discrim_quad", "sparsediscrim")` diff --git a/man/rmd/discrim_regularized_klaR.Rmd b/man/rmd/discrim_regularized_klaR.Rmd index 7533b027d..684bb8443 100644 --- a/man/rmd/discrim_regularized_klaR.Rmd +++ b/man/rmd/discrim_regularized_klaR.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("discrim_regularized", "klaR")` diff --git a/man/rmd/gen_additive_mod_mgcv.Rmd b/man/rmd/gen_additive_mod_mgcv.Rmd index 8baf43bf0..3c30a6531 100644 --- a/man/rmd/gen_additive_mod_mgcv.Rmd +++ b/man/rmd/gen_additive_mod_mgcv.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("gen_additive_mod", "mgcv")` diff --git a/man/rmd/glmnet-details.Rmd b/man/rmd/glmnet-details.Rmd index d79b82d5f..3e96c1314 100644 --- a/man/rmd/glmnet-details.Rmd +++ b/man/rmd/glmnet-details.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` # tidymodels and glmnet diff --git a/man/rmd/linear_reg_gee.Rmd b/man/rmd/linear_reg_gee.Rmd index 6e730a901..029f4f93e 100644 --- a/man/rmd/linear_reg_gee.Rmd +++ b/man/rmd/linear_reg_gee.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("linear_reg", "gee")` diff --git a/man/rmd/linear_reg_glm.Rmd b/man/rmd/linear_reg_glm.Rmd index a5895ffe4..dc786e3ae 100644 --- a/man/rmd/linear_reg_glm.Rmd +++ b/man/rmd/linear_reg_glm.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("linear_reg", "glm")` diff --git a/man/rmd/linear_reg_glmnet.Rmd b/man/rmd/linear_reg_glmnet.Rmd index 40eb9f3a1..816f8af18 100644 --- a/man/rmd/linear_reg_glmnet.Rmd +++ b/man/rmd/linear_reg_glmnet.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("linear_reg", "glmnet")` diff --git a/man/rmd/linear_reg_gls.Rmd b/man/rmd/linear_reg_gls.Rmd index c41937186..e5fe19c3c 100644 --- a/man/rmd/linear_reg_gls.Rmd +++ b/man/rmd/linear_reg_gls.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("linear_reg", "gls")` diff --git a/man/rmd/linear_reg_keras.Rmd b/man/rmd/linear_reg_keras.Rmd index fe6acbe1b..4e2564e3a 100644 --- a/man/rmd/linear_reg_keras.Rmd +++ b/man/rmd/linear_reg_keras.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("linear_reg", "keras")` diff --git a/man/rmd/linear_reg_lm.Rmd b/man/rmd/linear_reg_lm.Rmd index 9d90608c7..205c21374 100644 --- a/man/rmd/linear_reg_lm.Rmd +++ b/man/rmd/linear_reg_lm.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("linear_reg", "lm")` diff --git a/man/rmd/linear_reg_lme.Rmd b/man/rmd/linear_reg_lme.Rmd index 29ec56cb1..8950fac0a 100644 --- a/man/rmd/linear_reg_lme.Rmd +++ b/man/rmd/linear_reg_lme.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("linear_reg", "lme")` diff --git a/man/rmd/linear_reg_lmer.Rmd b/man/rmd/linear_reg_lmer.Rmd index 60171a300..86a4fcbce 100644 --- a/man/rmd/linear_reg_lmer.Rmd +++ b/man/rmd/linear_reg_lmer.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("linear_reg", "lmer")` diff --git a/man/rmd/linear_reg_spark.Rmd b/man/rmd/linear_reg_spark.Rmd index fc97ae633..255bc71ba 100644 --- a/man/rmd/linear_reg_spark.Rmd +++ b/man/rmd/linear_reg_spark.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("linear_reg", "spark")` diff --git a/man/rmd/linear_reg_stan.Rmd b/man/rmd/linear_reg_stan.Rmd index 2ad553e91..6e9961221 100644 --- a/man/rmd/linear_reg_stan.Rmd +++ b/man/rmd/linear_reg_stan.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("linear_reg", "stan")` diff --git a/man/rmd/linear_reg_stan_glmer.Rmd b/man/rmd/linear_reg_stan_glmer.Rmd index e869f1d19..4837d0b82 100644 --- a/man/rmd/linear_reg_stan_glmer.Rmd +++ b/man/rmd/linear_reg_stan_glmer.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("linear_reg", "stan_glmer")` diff --git a/man/rmd/logistic-reg.Rmd b/man/rmd/logistic-reg.Rmd index 174183546..021055253 100644 --- a/man/rmd/logistic-reg.Rmd +++ b/man/rmd/logistic-reg.Rmd @@ -1,6 +1,6 @@ # Engine Details -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` Engines may have pre-set default arguments when executing the model fit call. diff --git a/man/rmd/logistic_reg_LiblineaR.Rmd b/man/rmd/logistic_reg_LiblineaR.Rmd index 47fc53783..f2bdfcfa9 100644 --- a/man/rmd/logistic_reg_LiblineaR.Rmd +++ b/man/rmd/logistic_reg_LiblineaR.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("logistic_reg", "LiblineaR")` diff --git a/man/rmd/logistic_reg_gee.Rmd b/man/rmd/logistic_reg_gee.Rmd index 869bcc25f..6ebcc4e59 100644 --- a/man/rmd/logistic_reg_gee.Rmd +++ b/man/rmd/logistic_reg_gee.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("logistic_reg", "gee")` diff --git a/man/rmd/logistic_reg_glm.Rmd b/man/rmd/logistic_reg_glm.Rmd index 2d4fce64c..408b84ac0 100644 --- a/man/rmd/logistic_reg_glm.Rmd +++ b/man/rmd/logistic_reg_glm.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("logistic_reg", "glm")` diff --git a/man/rmd/logistic_reg_glmer.Rmd b/man/rmd/logistic_reg_glmer.Rmd index 776482737..0c0a70d92 100644 --- a/man/rmd/logistic_reg_glmer.Rmd +++ b/man/rmd/logistic_reg_glmer.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("logistic_reg", "glmer")` diff --git a/man/rmd/logistic_reg_glmnet.Rmd b/man/rmd/logistic_reg_glmnet.Rmd index 96c822c07..d9ae0c970 100644 --- a/man/rmd/logistic_reg_glmnet.Rmd +++ b/man/rmd/logistic_reg_glmnet.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("logistic_reg", "glmnet")` diff --git a/man/rmd/logistic_reg_keras.Rmd b/man/rmd/logistic_reg_keras.Rmd index 1ec3c7d06..f984508e2 100644 --- a/man/rmd/logistic_reg_keras.Rmd +++ b/man/rmd/logistic_reg_keras.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("logistic_reg", "keras")` diff --git a/man/rmd/logistic_reg_spark.Rmd b/man/rmd/logistic_reg_spark.Rmd index 779d88fc8..a202cc4fa 100644 --- a/man/rmd/logistic_reg_spark.Rmd +++ b/man/rmd/logistic_reg_spark.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("logistic_reg", "spark")` diff --git a/man/rmd/logistic_reg_stan.Rmd b/man/rmd/logistic_reg_stan.Rmd index 7160fb896..2bb5768ad 100644 --- a/man/rmd/logistic_reg_stan.Rmd +++ b/man/rmd/logistic_reg_stan.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("logistic_reg", "stan")` diff --git a/man/rmd/logistic_reg_stan_glmer.Rmd b/man/rmd/logistic_reg_stan_glmer.Rmd index b10b8a81f..cdd812681 100644 --- a/man/rmd/logistic_reg_stan_glmer.Rmd +++ b/man/rmd/logistic_reg_stan_glmer.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("logistic_reg", "stan_glmer")` diff --git a/man/rmd/mars_earth.Rmd b/man/rmd/mars_earth.Rmd index 17a6c8e03..eb58d2620 100644 --- a/man/rmd/mars_earth.Rmd +++ b/man/rmd/mars_earth.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("mars", "earth")` diff --git a/man/rmd/mlp_brulee.Rmd b/man/rmd/mlp_brulee.Rmd index 263e9c807..a9193fb6c 100644 --- a/man/rmd/mlp_brulee.Rmd +++ b/man/rmd/mlp_brulee.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("mlp", "brulee")` diff --git a/man/rmd/mlp_keras.Rmd b/man/rmd/mlp_keras.Rmd index f8f480383..280d99994 100644 --- a/man/rmd/mlp_keras.Rmd +++ b/man/rmd/mlp_keras.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("mlp", "keras")` diff --git a/man/rmd/mlp_nnet.Rmd b/man/rmd/mlp_nnet.Rmd index 5bcb2770e..176965ef5 100644 --- a/man/rmd/mlp_nnet.Rmd +++ b/man/rmd/mlp_nnet.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("mlp", "nnet")` diff --git a/man/rmd/multinom_reg_glmnet.Rmd b/man/rmd/multinom_reg_glmnet.Rmd index 0a4f8227d..178f8fd1e 100644 --- a/man/rmd/multinom_reg_glmnet.Rmd +++ b/man/rmd/multinom_reg_glmnet.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("multinom_reg", "glmnet")` diff --git a/man/rmd/multinom_reg_keras.Rmd b/man/rmd/multinom_reg_keras.Rmd index 91cc8a5b3..f182c82f6 100644 --- a/man/rmd/multinom_reg_keras.Rmd +++ b/man/rmd/multinom_reg_keras.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("multinom_reg", "keras")` diff --git a/man/rmd/multinom_reg_nnet.Rmd b/man/rmd/multinom_reg_nnet.Rmd index f04888b0d..65340d081 100644 --- a/man/rmd/multinom_reg_nnet.Rmd +++ b/man/rmd/multinom_reg_nnet.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("multinom_reg", "nnet")` diff --git a/man/rmd/multinom_reg_spark.Rmd b/man/rmd/multinom_reg_spark.Rmd index 69d75c63f..3f97894cb 100644 --- a/man/rmd/multinom_reg_spark.Rmd +++ b/man/rmd/multinom_reg_spark.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("multinom_reg", "spark")` diff --git a/man/rmd/naive_Bayes_klaR.Rmd b/man/rmd/naive_Bayes_klaR.Rmd index f498f9f72..78e832d55 100644 --- a/man/rmd/naive_Bayes_klaR.Rmd +++ b/man/rmd/naive_Bayes_klaR.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("naive_Bayes", "klaR")` diff --git a/man/rmd/naive_Bayes_naivebayes.Rmd b/man/rmd/naive_Bayes_naivebayes.Rmd index 35ead379e..ad1eb9300 100644 --- a/man/rmd/naive_Bayes_naivebayes.Rmd +++ b/man/rmd/naive_Bayes_naivebayes.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("naive_Bayes", "naivebayes")` diff --git a/man/rmd/nearest-neighbor.Rmd b/man/rmd/nearest-neighbor.Rmd index 14d0dd352..47f8e110d 100644 --- a/man/rmd/nearest-neighbor.Rmd +++ b/man/rmd/nearest-neighbor.Rmd @@ -1,6 +1,6 @@ # Engine Details -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are below: diff --git a/man/rmd/nearest_neighbor_kknn.Rmd b/man/rmd/nearest_neighbor_kknn.Rmd index a9b7b4342..2795d52e2 100644 --- a/man/rmd/nearest_neighbor_kknn.Rmd +++ b/man/rmd/nearest_neighbor_kknn.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("nearest_neighbor", "kknn")` diff --git a/man/rmd/pls_mixOmics.Rmd b/man/rmd/pls_mixOmics.Rmd index f68320e59..79b76bbb5 100644 --- a/man/rmd/pls_mixOmics.Rmd +++ b/man/rmd/pls_mixOmics.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("pls", "mixOmics")` diff --git a/man/rmd/poisson_reg_gee.Rmd b/man/rmd/poisson_reg_gee.Rmd index e9d782b7b..a6caf6988 100644 --- a/man/rmd/poisson_reg_gee.Rmd +++ b/man/rmd/poisson_reg_gee.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("poisson_reg", "gee")` diff --git a/man/rmd/poisson_reg_glm.Rmd b/man/rmd/poisson_reg_glm.Rmd index 6a0d0ffcd..d0211ed21 100644 --- a/man/rmd/poisson_reg_glm.Rmd +++ b/man/rmd/poisson_reg_glm.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("poisson_reg", "glm")` diff --git a/man/rmd/poisson_reg_glmer.Rmd b/man/rmd/poisson_reg_glmer.Rmd index 52f1945a2..f49a1cbcc 100644 --- a/man/rmd/poisson_reg_glmer.Rmd +++ b/man/rmd/poisson_reg_glmer.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("poisson_reg", "glmer")` diff --git a/man/rmd/poisson_reg_glmnet.Rmd b/man/rmd/poisson_reg_glmnet.Rmd index 0f54a2526..03fd29f4f 100644 --- a/man/rmd/poisson_reg_glmnet.Rmd +++ b/man/rmd/poisson_reg_glmnet.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("poisson_reg", "glmnet")` diff --git a/man/rmd/poisson_reg_hurdle.Rmd b/man/rmd/poisson_reg_hurdle.Rmd index d3dfeabf5..6531002d7 100644 --- a/man/rmd/poisson_reg_hurdle.Rmd +++ b/man/rmd/poisson_reg_hurdle.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("poisson_reg", "hurdle")` diff --git a/man/rmd/poisson_reg_stan.Rmd b/man/rmd/poisson_reg_stan.Rmd index d96387b77..921cbe692 100644 --- a/man/rmd/poisson_reg_stan.Rmd +++ b/man/rmd/poisson_reg_stan.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("poisson_reg", "stan")` diff --git a/man/rmd/poisson_reg_stan_glmer.Rmd b/man/rmd/poisson_reg_stan_glmer.Rmd index c3738c990..8a848ea65 100644 --- a/man/rmd/poisson_reg_stan_glmer.Rmd +++ b/man/rmd/poisson_reg_stan_glmer.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("poisson_reg", "stan_glmer")` diff --git a/man/rmd/poisson_reg_zeroinfl.Rmd b/man/rmd/poisson_reg_zeroinfl.Rmd index eb6641aa1..8dc998469 100644 --- a/man/rmd/poisson_reg_zeroinfl.Rmd +++ b/man/rmd/poisson_reg_zeroinfl.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("poisson_reg", "zeroinfl")` diff --git a/man/rmd/proportional_hazards_glmnet.Rmd b/man/rmd/proportional_hazards_glmnet.Rmd index 9ea68fe17..07e2a2c73 100644 --- a/man/rmd/proportional_hazards_glmnet.Rmd +++ b/man/rmd/proportional_hazards_glmnet.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("proportional_hazards", "glmnet")` diff --git a/man/rmd/proportional_hazards_survival.Rmd b/man/rmd/proportional_hazards_survival.Rmd index 3e77d09ad..697aeb824 100644 --- a/man/rmd/proportional_hazards_survival.Rmd +++ b/man/rmd/proportional_hazards_survival.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("proportional_hazards", "survival")` diff --git a/man/rmd/rand_forest_party.Rmd b/man/rmd/rand_forest_party.Rmd index 61b0aa2f2..75541af9d 100644 --- a/man/rmd/rand_forest_party.Rmd +++ b/man/rmd/rand_forest_party.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("rand_forest", "party")` diff --git a/man/rmd/rand_forest_randomForest.Rmd b/man/rmd/rand_forest_randomForest.Rmd index 9e085c9f7..438b605df 100644 --- a/man/rmd/rand_forest_randomForest.Rmd +++ b/man/rmd/rand_forest_randomForest.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("rand_forest", "randomForest")` diff --git a/man/rmd/rand_forest_ranger.Rmd b/man/rmd/rand_forest_ranger.Rmd index 5aa69b6b7..86e723fd2 100644 --- a/man/rmd/rand_forest_ranger.Rmd +++ b/man/rmd/rand_forest_ranger.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("rand_forest", "ranger")` diff --git a/man/rmd/rand_forest_spark.Rmd b/man/rmd/rand_forest_spark.Rmd index 88ed7ca01..471342ac5 100644 --- a/man/rmd/rand_forest_spark.Rmd +++ b/man/rmd/rand_forest_spark.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("rand_forest", "spark")` diff --git a/man/rmd/rule_fit_xrf.Rmd b/man/rmd/rule_fit_xrf.Rmd index 130eb8c3c..18bcb371c 100644 --- a/man/rmd/rule_fit_xrf.Rmd +++ b/man/rmd/rule_fit_xrf.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("rule_fit", "xrf")` diff --git a/man/rmd/surv_reg_flexsurv.Rmd b/man/rmd/surv_reg_flexsurv.Rmd index be8f93d93..967d62103 100644 --- a/man/rmd/surv_reg_flexsurv.Rmd +++ b/man/rmd/surv_reg_flexsurv.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("surv_reg", "survival")` diff --git a/man/rmd/surv_reg_survival.Rmd b/man/rmd/surv_reg_survival.Rmd index ddfe53e85..d9713650d 100644 --- a/man/rmd/surv_reg_survival.Rmd +++ b/man/rmd/surv_reg_survival.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("surv_reg", "survival")` diff --git a/man/rmd/survival_reg_flexsurv.Rmd b/man/rmd/survival_reg_flexsurv.Rmd index 121123e92..c7cc48ee5 100644 --- a/man/rmd/survival_reg_flexsurv.Rmd +++ b/man/rmd/survival_reg_flexsurv.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("survival_reg", "flexsurv")` diff --git a/man/rmd/survival_reg_survival.Rmd b/man/rmd/survival_reg_survival.Rmd index 27c04cb7b..56d942701 100644 --- a/man/rmd/survival_reg_survival.Rmd +++ b/man/rmd/survival_reg_survival.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("survival_reg", "survival")` diff --git a/man/rmd/svm_linear_LiblineaR.Rmd b/man/rmd/svm_linear_LiblineaR.Rmd index 336cdd149..86b74c70e 100644 --- a/man/rmd/svm_linear_LiblineaR.Rmd +++ b/man/rmd/svm_linear_LiblineaR.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("svm_linear", "LiblineaR")` diff --git a/man/rmd/svm_linear_kernlab.Rmd b/man/rmd/svm_linear_kernlab.Rmd index 14f75f6e3..560b4b129 100644 --- a/man/rmd/svm_linear_kernlab.Rmd +++ b/man/rmd/svm_linear_kernlab.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("svm_linear", "kernlab")` diff --git a/man/rmd/svm_poly_kernlab.Rmd b/man/rmd/svm_poly_kernlab.Rmd index 739afec4f..78efd4743 100644 --- a/man/rmd/svm_poly_kernlab.Rmd +++ b/man/rmd/svm_poly_kernlab.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("svm_poly", "kernlab")` diff --git a/man/rmd/svm_rbf_kernlab.Rmd b/man/rmd/svm_rbf_kernlab.Rmd index 0def3231f..338e9c542 100644 --- a/man/rmd/svm_rbf_kernlab.Rmd +++ b/man/rmd/svm_rbf_kernlab.Rmd @@ -1,4 +1,4 @@ -```{r, child = "setup.Rmd", include = FALSE} +```{r, child = "aaa.Rmd", include = FALSE} ``` `r descr_models("svm_rbf", "kernlab")` From 1693ad402804db2bca3885bff750a822f44c5211 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 3 Jan 2022 13:14:24 -0500 Subject: [PATCH 24/65] remove unused engine doc functions --- NAMESPACE | 1 - R/engine_docs.R | 39 --------------------------------------- man/doc-tools.Rd | 3 --- 3 files changed, 43 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 81b8e15c6..75920e7fa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -226,7 +226,6 @@ export(null_model) export(null_value) export(nullmodel) export(parsnip_addin) -export(pkg_extension_note) export(pls) export(poisson_reg) export(pred_value_template) diff --git a/R/engine_docs.R b/R/engine_docs.R index ab9517a10..bca8120f6 100644 --- a/R/engine_docs.R +++ b/R/engine_docs.R @@ -308,42 +308,3 @@ combine_prefix_with_engines <- function(prefix, engines) { glue::glue("{prefix} {engines}") } -keep_ext <- function(x, pkgs) { - x <- x[x %in% pkgs] - if (length(x) > 0) { - x <- paste0("\\pkg{", x, "}") - x <- glue::glue_collapse(x, sep = ", ", last = " and ") - x <- paste0(" (may require: ", x, ")") - } else { - x <- "" - } - x -} - -get_extension_pkgs <- function(mod) { - deps <- - get_from_env(paste0(mod, "_pkgs")) %>% - dplyr::mutate(ext = purrr::map_chr(pkg, keep_ext, parsnip:::extensions())) - dplyr::select(deps, engine, ext) -} - -#' @export -#' @rdname doc-tools -pkg_extension_note <- function(mod) { - ext_pkgs <- tibble::tibble(pkg = parsnip:::extensions()) - deps <- - get_from_env(paste0(mod, "_pkgs")) %>% - tidyr::unnest(cols = c(pkg)) %>% - dplyr::inner_join(ext_pkgs, by = "pkg") %>% - dplyr::arrange(tolower(engine)) %>% - dplyr::mutate( - pkg = paste0("\\pkg{", pkg, "}"), - engine = paste0("`", engine, "`") - ) %>% - dplyr::group_nest(pkg) %>% - dplyr::mutate(note = purrr::map_chr(data, ~ glue::glue_collapse(.x$engine, sep = ", ", last = " and "))) %>% - dplyr::mutate(note = glue::glue("Note that engine(s) {note} may require extension package {pkg}.\n\n")) %>% - purrr::pluck("note") - - paste(deps, collapse = "") -} diff --git a/man/doc-tools.Rd b/man/doc-tools.Rd index 1c3e7d869..ce77744a7 100644 --- a/man/doc-tools.Rd +++ b/man/doc-tools.Rd @@ -5,7 +5,6 @@ \alias{find_engine_files} \alias{make_engine_list} \alias{make_seealso_list} -\alias{pkg_extension_note} \title{Tools for dynamically documenting packages} \usage{ find_engine_files(mod) @@ -13,8 +12,6 @@ find_engine_files(mod) make_engine_list(mod) make_seealso_list(mod, pkg = "parsnip") - -pkg_extension_note(mod) } \arguments{ \item{mod}{A character string for the model file (e.g. "linear_reg")} From 7eb977dae7650efa8cf5006acd3d34953ed71ddb Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 3 Jan 2022 13:24:54 -0500 Subject: [PATCH 25/65] added a note with PR number --- man/rmd/aaa.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/rmd/aaa.Rmd b/man/rmd/aaa.Rmd index b7069db8c..346864fa0 100644 --- a/man/rmd/aaa.Rmd +++ b/man/rmd/aaa.Rmd @@ -26,7 +26,7 @@ make_mode_list <- function(mod, eng) { dplyr::mutate(mode = factor(mode, levels = modes)) %>% dplyr::arrange(mode) - # Need to get mode-specific dependencies + # Need to get mode-specific dependencies (see tidymodels/parsnip#629) exts } From c4b6227f9627aa4b033607009b4116148aeff804 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 3 Jan 2022 18:40:32 -0500 Subject: [PATCH 26/65] more info on models with extension packages --- DESCRIPTION | 1 + R/aaa.R | 2 +- R/engine_docs.R | 2 +- R/linear_reg_glm.R | 2 +- man/details_bag_mars_earth.Rd | 16 +++++++- man/details_bag_tree_C5.0.Rd | 7 +++- man/details_bag_tree_rpart.Rd | 42 +++++++++++++++++--- man/details_cubist_rules_Cubist.Rd | 7 +++- man/details_decision_tree_party.Rd | 7 +++- man/details_decision_tree_rpart.Rd | 11 ++++- man/details_discrim_flexible_earth.Rd | 11 ++++- man/details_discrim_linear_MASS.Rd | 7 +++- man/details_discrim_linear_mda.Rd | 7 +++- man/details_discrim_linear_sda.Rd | 7 +++- man/details_discrim_linear_sparsediscrim.Rd | 7 +++- man/details_discrim_quad_MASS.Rd | 7 +++- man/details_discrim_quad_sparsediscrim.Rd | 7 +++- man/details_discrim_regularized_klaR.Rd | 7 +++- man/details_linear_reg_gee.Rd | 7 +++- man/details_linear_reg_gls.Rd | 7 +++- man/details_linear_reg_lme.Rd | 7 +++- man/details_linear_reg_lmer.Rd | 7 +++- man/details_logistic_reg_gee.Rd | 7 +++- man/details_logistic_reg_glmer.Rd | 7 +++- man/details_naive_Bayes_klaR.Rd | 7 +++- man/details_naive_Bayes_naivebayes.Rd | 7 +++- man/details_pls_mixOmics.Rd | 14 ++++++- man/details_poisson_reg_gee.Rd | 7 +++- man/details_poisson_reg_glm.Rd | 7 +++- man/details_poisson_reg_glmer.Rd | 7 +++- man/details_poisson_reg_glmnet.Rd | 7 +++- man/details_poisson_reg_hurdle.Rd | 7 +++- man/details_poisson_reg_stan.Rd | 7 +++- man/details_poisson_reg_stan_glmer.Rd | 7 +++- man/details_poisson_reg_zeroinfl.Rd | 7 +++- man/details_proportional_hazards_glmnet.Rd | 7 +++- man/details_proportional_hazards_survival.Rd | 7 +++- man/details_rand_forest_party.Rd | 7 +++- man/details_rule_fit_xrf.Rd | 16 ++++++-- man/details_survival_reg_flexsurv.Rd | 7 +++- man/details_survival_reg_survival.Rd | 7 +++- man/rmd/aaa.Rmd | 31 +++++++++++++-- man/rmd/bag_mars_earth.Rmd | 12 +++++- man/rmd/bag_tree_C5.0.Rmd | 4 ++ man/rmd/bag_tree_rpart.Rmd | 12 ++++++ man/rmd/cubist_rules_Cubist.Rmd | 4 ++ man/rmd/decision_tree_party.Rmd | 4 ++ man/rmd/decision_tree_rpart.Rmd | 10 ++++- man/rmd/discrim_flexible_earth.Rmd | 10 ++++- man/rmd/discrim_linear_MASS.Rmd | 4 ++ man/rmd/discrim_linear_mda.Rmd | 4 ++ man/rmd/discrim_linear_sda.Rmd | 4 ++ man/rmd/discrim_linear_sparsediscrim.Rmd | 4 ++ man/rmd/discrim_quad_MASS.Rmd | 4 ++ man/rmd/discrim_quad_sparsediscrim.Rmd | 4 ++ man/rmd/discrim_regularized_klaR.Rmd | 4 ++ man/rmd/linear_reg_gee.Rmd | 4 ++ man/rmd/linear_reg_gls.Rmd | 4 ++ man/rmd/linear_reg_lme.Rmd | 4 ++ man/rmd/linear_reg_lmer.Rmd | 6 ++- man/rmd/logistic_reg_gee.Rmd | 4 ++ man/rmd/logistic_reg_glmer.Rmd | 4 ++ man/rmd/naive_Bayes_klaR.Rmd | 4 ++ man/rmd/naive_Bayes_naivebayes.Rmd | 4 ++ man/rmd/pls_mixOmics.Rmd | 8 ++++ man/rmd/poisson_reg_gee.Rmd | 4 ++ man/rmd/poisson_reg_glm.Rmd | 4 ++ man/rmd/poisson_reg_glmer.Rmd | 4 ++ man/rmd/poisson_reg_glmnet.Rmd | 4 ++ man/rmd/poisson_reg_hurdle.Rmd | 4 ++ man/rmd/poisson_reg_stan.Rmd | 4 ++ man/rmd/poisson_reg_stan_glmer.Rmd | 4 ++ man/rmd/poisson_reg_zeroinfl.Rmd | 4 ++ man/rmd/proportional_hazards_glmnet.Rmd | 4 ++ man/rmd/proportional_hazards_survival.Rmd | 4 ++ man/rmd/rand_forest_party.Rmd | 4 ++ man/rmd/rule_fit_xrf.Rmd | 11 ++++- man/rmd/survival_reg_flexsurv.Rmd | 4 ++ man/rmd/survival_reg_survival.Rmd | 4 ++ 79 files changed, 497 insertions(+), 57 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8eb26c758..e0b616dcf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -18,6 +18,7 @@ BugReports: https://github.com/tidymodels/parsnip/issues Depends: R (>= 2.10) Imports: + cli, dplyr (>= 0.8.0.1), generics (>= 0.1.0.9000), globals, diff --git a/R/aaa.R b/R/aaa.R index 3e65883b8..6f935668a 100644 --- a/R/aaa.R +++ b/R/aaa.R @@ -91,7 +91,7 @@ utils::globalVariables( "max_terms", "max_tree", "model", "name", "num_terms", "penalty", "trees", "sub_neighbors", ".pred_class", "x", "y", "predictor_indicators", "compute_intercept", "remove_intercept", "estimate", "term", - "call_info", "component", "component_id", "func") + "call_info", "component", "component_id", "func", "pkg", ".order", "item") ) # nocov end diff --git a/R/engine_docs.R b/R/engine_docs.R index bca8120f6..c3227b80e 100644 --- a/R/engine_docs.R +++ b/R/engine_docs.R @@ -187,7 +187,7 @@ make_engine_list <- function(mod) { } exts <- - read.delim(system.file("models.tsv", package = "parsnip")) %>% + utils::read.delim(system.file("models.tsv", package = "parsnip")) %>% dplyr::filter(model == mod) %>% dplyr::group_by(engine) %>% dplyr::summarize(extensions = sum(!is.na(pkg))) %>% diff --git a/R/linear_reg_glm.R b/R/linear_reg_glm.R index 3e3db0152..7884fe18d 100644 --- a/R/linear_reg_glm.R +++ b/R/linear_reg_glm.R @@ -4,7 +4,7 @@ #' linear combination of the predictors is used to model the numeric outcome #' via a link function. #' -#' @includeRmd man/rmd/linear_reg_glm.Rmd details +#' @includeRmd man/rmd/linear_reg_glm.md details #' #' @name details_linear_reg_glm #' @keywords internal diff --git a/man/details_bag_mars_earth.Rd b/man/details_bag_mars_earth.Rd index b196134d2..9d1b63095 100644 --- a/man/details_bag_mars_earth.Rd +++ b/man/details_bag_mars_earth.Rd @@ -25,7 +25,10 @@ columns. For a data frame \code{x}, the default is \code{\link[earth:earth]{earth::earth()}} and the reference below). } -\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{bag_mars(num_terms = integer(1), prod_degree = integer(1), prune_method = character(1)) \%>\% +\subsection{Translation from parsnip to the original package (regression)}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{baguette}.\if{html}{\out{
}}\preformatted{bag_mars(num_terms = integer(1), prod_degree = integer(1), prune_method = character(1)) \%>\% set_engine("earth") \%>\% set_mode("regression") \%>\% translate() @@ -45,7 +48,16 @@ columns. For a data frame \code{x}, the default is } } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{bag_mars(num_terms = integer(1), prod_degree = integer(1), prune_method = character(1)) \%>\% +\subsection{Translation from parsnip to the original package (classification)}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{baguette}.\if{html}{\out{
}}\preformatted{library(baguette) + +bag_mars( + num_terms = integer(1), + prod_degree = integer(1), + prune_method = character(1) +) \%>\% set_engine("earth") \%>\% set_mode("classification") \%>\% translate() diff --git a/man/details_bag_tree_C5.0.Rd b/man/details_bag_tree_C5.0.Rd index e9c80a4a8..8c00081f1 100644 --- a/man/details_bag_tree_C5.0.Rd +++ b/man/details_bag_tree_C5.0.Rd @@ -17,7 +17,12 @@ This model has 1 tuning parameters: } } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{bag_tree(min_n = integer()) \%>\% +\subsection{Translation from parsnip to the original package (classification)}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{baguette}.\if{html}{\out{
}}\preformatted{library(baguette) + +bag_tree(min_n = integer()) \%>\% set_engine("C5.0") \%>\% set_mode("classification") \%>\% translate() diff --git a/man/details_bag_tree_rpart.Rd b/man/details_bag_tree_rpart.Rd index 3f903052e..369a74a6f 100644 --- a/man/details_bag_tree_rpart.Rd +++ b/man/details_bag_tree_rpart.Rd @@ -30,7 +30,12 @@ this is not the case, values between zero and one can be used to bias to the second level of the factor. } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% +\subsection{Translation from parsnip to the original package (classification)}{ + +There are parsnip extension packages required to fit this model to this +mode: \strong{censored}, \strong{baguette}.\if{html}{\out{
}}\preformatted{library(baguette) + +bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% set_engine("rpart") \%>\% set_mode("classification") \%>\% translate() @@ -50,7 +55,12 @@ the second level of the factor. } } -\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% +\subsection{Translation from parsnip to the original package (regression)}{ + +There are parsnip extension packages required to fit this model to this +mode: \strong{censored}, \strong{baguette}.\if{html}{\out{
}}\preformatted{library(baguette) + +bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% set_engine("rpart") \%>\% set_mode("regression") \%>\% translate() @@ -70,7 +80,12 @@ the second level of the factor. } } -\subsection{Translation from parsnip to the original package (censored regression)}{\if{html}{\out{
}}\preformatted{bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% +\subsection{Translation from parsnip to the original package (censored regression)}{ + +There are parsnip extension packages required to fit this model to this +mode: \strong{censored}, \strong{baguette}.\if{html}{\out{
}}\preformatted{library(censored) + +bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% set_engine("rpart") \%>\% set_mode("censored regression") \%>\% translate() @@ -128,7 +143,12 @@ this is not the case, values between zero and one can be used to bias to the second level of the factor. } -\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% +\subsection{Translation from parsnip to the original package (classification)}{ + +There are parsnip extension packages required to fit this model to this +mode: \strong{censored}, \strong{baguette}.\if{html}{\out{
}}\preformatted{library(baguette) + +bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% set_engine("rpart") \%>\% set_mode("classification") \%>\% translate() @@ -148,7 +168,12 @@ the second level of the factor. } } -\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% +\subsection{Translation from parsnip to the original package (regression)}{ + +There are parsnip extension packages required to fit this model to this +mode: \strong{censored}, \strong{baguette}.\if{html}{\out{
}}\preformatted{library(baguette) + +bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% set_engine("rpart") \%>\% set_mode("regression") \%>\% translate() @@ -168,7 +193,12 @@ the second level of the factor. } } -\subsection{Translation from parsnip to the original package (censored regression)}{\if{html}{\out{
}}\preformatted{bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% +\subsection{Translation from parsnip to the original package (censored regression)}{ + +There are parsnip extension packages required to fit this model to this +mode: \strong{censored}, \strong{baguette}.\if{html}{\out{
}}\preformatted{library(censored) + +bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% set_engine("rpart") \%>\% set_mode("censored regression") \%>\% translate() diff --git a/man/details_cubist_rules_Cubist.Rd b/man/details_cubist_rules_Cubist.Rd index a8a524e56..b56984a82 100644 --- a/man/details_cubist_rules_Cubist.Rd +++ b/man/details_cubist_rules_Cubist.Rd @@ -20,7 +20,12 @@ This model has 3 tuning parameters: } } -\subsection{Translation from parsnip to the underlying model call (regression)}{\if{html}{\out{
}}\preformatted{cubist_rules( +\subsection{Translation from parsnip to the underlying model call (regression)}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{rules}.\if{html}{\out{
}}\preformatted{library(rules) + +cubist_rules( committees = integer(1), neighbors = integer(1), max_rules = integer(1) diff --git a/man/details_decision_tree_party.Rd b/man/details_decision_tree_party.Rd index 483b12d16..93a349fac 100644 --- a/man/details_decision_tree_party.Rd +++ b/man/details_decision_tree_party.Rd @@ -27,7 +27,12 @@ evaluated for splitting. The default is to use all predictors. } } -\subsection{Translation from parsnip to the original package (censored regression)}{\if{html}{\out{
}}\preformatted{decision_tree(tree_depth = integer(1), min_n = integer(1)) \%>\% +\subsection{Translation from parsnip to the original package (censored regression)}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{censored}.\if{html}{\out{
}}\preformatted{library(censored) + +decision_tree(tree_depth = integer(1), min_n = integer(1)) \%>\% set_engine("party") \%>\% set_mode("censored regression") \%>\% translate() diff --git a/man/details_decision_tree_rpart.Rd b/man/details_decision_tree_rpart.Rd index fe0d4fab0..8bffbbf61 100644 --- a/man/details_decision_tree_rpart.Rd +++ b/man/details_decision_tree_rpart.Rd @@ -61,7 +61,16 @@ This model has 3 tuning parameters: } } -\subsection{Translation from parsnip to the original package (censored regression)}{\if{html}{\out{
}}\preformatted{decision_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% +\subsection{Translation from parsnip to the original package (censored regression)}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{censored}.\if{html}{\out{
}}\preformatted{library(censored) + +decision_tree( + tree_depth = integer(1), + min_n = integer(1), + cost_complexity = double(1) +) \%>\% set_engine("rpart") \%>\% set_mode("censored regression") \%>\% translate() diff --git a/man/details_discrim_flexible_earth.Rd b/man/details_discrim_flexible_earth.Rd index f36a2e36c..b70091fd6 100644 --- a/man/details_discrim_flexible_earth.Rd +++ b/man/details_discrim_flexible_earth.Rd @@ -26,7 +26,16 @@ The default value of \code{num_terms} depends on the number of columns (\code{p} intercept-only model. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{discrim_flexible(num_terms = integer(0), prod_degree = integer(0), prune_method = character(0)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{discrim}.\if{html}{\out{
}}\preformatted{library(discrim) + +discrim_flexible( + num_terms = integer(0), + prod_degree = integer(0), + prune_method = character(0) +) \%>\% translate() }\if{html}{\out{
}}\preformatted{## Flexible Discriminant Model Specification (classification) ## diff --git a/man/details_discrim_linear_MASS.Rd b/man/details_discrim_linear_MASS.Rd index d3be2a696..428e9d1d3 100644 --- a/man/details_discrim_linear_MASS.Rd +++ b/man/details_discrim_linear_MASS.Rd @@ -16,7 +16,12 @@ For this engine, there is a single mode: classification This engine has no tuning parameters. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{discrim_linear() \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{discrim}.\if{html}{\out{
}}\preformatted{library(discrim) + +discrim_linear() \%>\% set_engine("MASS") \%>\% translate() }\if{html}{\out{
}}\preformatted{## Linear Discriminant Model Specification (classification) diff --git a/man/details_discrim_linear_mda.Rd b/man/details_discrim_linear_mda.Rd index ec9b74f90..b26442b43 100644 --- a/man/details_discrim_linear_mda.Rd +++ b/man/details_discrim_linear_mda.Rd @@ -18,7 +18,12 @@ This model has 1 tuning parameter: } } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{discrim_linear(penalty = numeric(0)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{discrim}.\if{html}{\out{
}}\preformatted{library(discrim) + +discrim_linear(penalty = numeric(0)) \%>\% set_engine("mda") \%>\% translate() }\if{html}{\out{
}}\preformatted{## Linear Discriminant Model Specification (classification) diff --git a/man/details_discrim_linear_sda.Rd b/man/details_discrim_linear_sda.Rd index a7411c573..7fbc118b3 100644 --- a/man/details_discrim_linear_sda.Rd +++ b/man/details_discrim_linear_sda.Rd @@ -32,7 +32,12 @@ This maps to } } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{discrim_linear() \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{discrim}.\if{html}{\out{
}}\preformatted{library(discrim) + +discrim_linear() \%>\% set_engine("sda") \%>\% translate() }\if{html}{\out{
}}\preformatted{## Linear Discriminant Model Specification (classification) diff --git a/man/details_discrim_linear_sparsediscrim.Rd b/man/details_discrim_linear_sparsediscrim.Rd index 012a47465..584b5f2ad 100644 --- a/man/details_discrim_linear_sparsediscrim.Rd +++ b/man/details_discrim_linear_sparsediscrim.Rd @@ -32,7 +32,12 @@ execute, are: } } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{discrim_linear(regularization_method = character(0)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{discrim}.\if{html}{\out{
}}\preformatted{library(discrim) + +discrim_linear(regularization_method = character(0)) \%>\% set_engine("sparsediscrim") \%>\% translate() }\if{html}{\out{
}}\preformatted{## Linear Discriminant Model Specification (classification) diff --git a/man/details_discrim_quad_MASS.Rd b/man/details_discrim_quad_MASS.Rd index a70983cdf..94d0f9117 100644 --- a/man/details_discrim_quad_MASS.Rd +++ b/man/details_discrim_quad_MASS.Rd @@ -16,7 +16,12 @@ For this engine, there is a single mode: classification This engine has no tuning parameters. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{discrim_quad() \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{discrim}.\if{html}{\out{
}}\preformatted{library(discrim) + +discrim_quad() \%>\% set_engine("MASS") \%>\% translate() }\if{html}{\out{
}}\preformatted{## Quadratic Discriminant Model Specification (classification) diff --git a/man/details_discrim_quad_sparsediscrim.Rd b/man/details_discrim_quad_sparsediscrim.Rd index 085a69a8e..9f4df2921 100644 --- a/man/details_discrim_quad_sparsediscrim.Rd +++ b/man/details_discrim_quad_sparsediscrim.Rd @@ -30,7 +30,12 @@ execute, are: } } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{discrim_quad(regularization_method = character(0)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{discrim}.\if{html}{\out{
}}\preformatted{library(discrim) + +discrim_quad(regularization_method = character(0)) \%>\% set_engine("sparsediscrim") \%>\% translate() }\if{html}{\out{
}}\preformatted{## Quadratic Discriminant Model Specification (classification) diff --git a/man/details_discrim_regularized_klaR.Rd b/man/details_discrim_regularized_klaR.Rd index b73b96857..0626055ea 100644 --- a/man/details_discrim_regularized_klaR.Rd +++ b/man/details_discrim_regularized_klaR.Rd @@ -31,7 +31,12 @@ discriminant analysis (QDA) model. } } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{discrim_regularized(frac_identity = numeric(0), frac_common_cov = numeric(0)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{discrim}.\if{html}{\out{
}}\preformatted{library(discrim) + +discrim_regularized(frac_identity = numeric(0), frac_common_cov = numeric(0)) \%>\% set_engine("klaR") \%>\% translate() }\if{html}{\out{
}}\preformatted{## Regularized Discriminant Model Specification (classification) diff --git a/man/details_linear_reg_gee.Rd b/man/details_linear_reg_gee.Rd index f74e2f8b7..85aa8fc6b 100644 --- a/man/details_linear_reg_gee.Rd +++ b/man/details_linear_reg_gee.Rd @@ -17,7 +17,12 @@ typically does not affect the predicted value of the model but does have an effect on the inferential results and parameter covariance values. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{linear_reg() \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{multilevelmod}.\if{html}{\out{
}}\preformatted{library(multilevelmod) + +linear_reg() \%>\% set_engine("gee") \%>\% set_mode("regression") \%>\% translate() diff --git a/man/details_linear_reg_gls.Rd b/man/details_linear_reg_gls.Rd index f072cc789..84dcc7f0f 100644 --- a/man/details_linear_reg_gls.Rd +++ b/man/details_linear_reg_gls.Rd @@ -14,7 +14,12 @@ For this engine, there is a single mode: regression This model has no tuning parameters. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{linear_reg() \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{multilevelmod}.\if{html}{\out{
}}\preformatted{library(multilevelmod) + +linear_reg() \%>\% set_engine("gls") \%>\% set_mode("regression") \%>\% translate() diff --git a/man/details_linear_reg_lme.Rd b/man/details_linear_reg_lme.Rd index c0cc5f360..a5ed1c5ee 100644 --- a/man/details_linear_reg_lme.Rd +++ b/man/details_linear_reg_lme.Rd @@ -14,7 +14,12 @@ For this engine, there is a single mode: regression This model has no tuning parameters. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{linear_reg() \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{multilevelmod}.\if{html}{\out{
}}\preformatted{library(multilevelmod) + +linear_reg() \%>\% set_engine("lme") \%>\% set_mode("regression") \%>\% translate() diff --git a/man/details_linear_reg_lmer.Rd b/man/details_linear_reg_lmer.Rd index 1f3b758b4..e2ea8de39 100644 --- a/man/details_linear_reg_lmer.Rd +++ b/man/details_linear_reg_lmer.Rd @@ -14,7 +14,12 @@ For this engine, there is a single mode: regression This model has no tuning parameters. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{linear_reg() \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{multilevelmod}.\if{html}{\out{
}}\preformatted{library(multilevelmod) + +linear_reg() \%>\% set_engine("lmer") \%>\% set_mode("regression") \%>\% translate() diff --git a/man/details_logistic_reg_gee.Rd b/man/details_logistic_reg_gee.Rd index 957ea6286..b6cee8428 100644 --- a/man/details_logistic_reg_gee.Rd +++ b/man/details_logistic_reg_gee.Rd @@ -17,7 +17,12 @@ typically does not affect the predicted value of the model but does have an effect on the inferential results and parameter covariance values. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{logistic_reg() \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{multilevelmod}.\if{html}{\out{
}}\preformatted{library(multilevelmod) + +logistic_reg() \%>\% set_engine("gee") \%>\% translate() }\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) diff --git a/man/details_logistic_reg_glmer.Rd b/man/details_logistic_reg_glmer.Rd index 98535aafc..f0318a394 100644 --- a/man/details_logistic_reg_glmer.Rd +++ b/man/details_logistic_reg_glmer.Rd @@ -14,7 +14,12 @@ For this engine, there is a single mode: classification This model has no tuning parameters. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{logistic_reg() \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{multilevelmod}.\if{html}{\out{
}}\preformatted{library(multilevelmod) + +logistic_reg() \%>\% set_engine("glmer") \%>\% translate() }\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) diff --git a/man/details_naive_Bayes_klaR.Rd b/man/details_naive_Bayes_klaR.Rd index ace2d74f0..c967156c0 100644 --- a/man/details_naive_Bayes_klaR.Rd +++ b/man/details_naive_Bayes_klaR.Rd @@ -20,7 +20,12 @@ This model has 2 tuning parameter: Note that \code{usekernel} is always set to \code{TRUE} for the \code{klaR} engine. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{naive_Bayes(smoothness = numeric(0), Laplace = numeric(0)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{discrim}.\if{html}{\out{
}}\preformatted{library(discrim) + +naive_Bayes(smoothness = numeric(0), Laplace = numeric(0)) \%>\% set_engine("klaR") \%>\% translate() }\if{html}{\out{
}}\preformatted{## Naive Bayes Model Specification (classification) diff --git a/man/details_naive_Bayes_naivebayes.Rd b/man/details_naive_Bayes_naivebayes.Rd index 105bb2b87..66ec9e3cd 100644 --- a/man/details_naive_Bayes_naivebayes.Rd +++ b/man/details_naive_Bayes_naivebayes.Rd @@ -18,7 +18,12 @@ This model has 2 tuning parameter: } } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{naive_Bayes(smoothness = numeric(0), Laplace = numeric(0)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{discrim}.\if{html}{\out{
}}\preformatted{library(discrim) + +naive_Bayes(smoothness = numeric(0), Laplace = numeric(0)) \%>\% set_engine("naivebayes") \%>\% translate() }\if{html}{\out{
}}\preformatted{## Naive Bayes Model Specification (classification) diff --git a/man/details_pls_mixOmics.Rd b/man/details_pls_mixOmics.Rd index 5f9695b06..85b4b1378 100644 --- a/man/details_pls_mixOmics.Rd +++ b/man/details_pls_mixOmics.Rd @@ -18,7 +18,12 @@ see below) } } -\subsection{Translation from parsnip to the underlying model call (regression)}{\if{html}{\out{
}}\preformatted{pls(num_comp = integer(1), predictor_prop = double(1)) \%>\% +\subsection{Translation from parsnip to the underlying model call (regression)}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{plsmod}.\if{html}{\out{
}}\preformatted{library(plsmod) + +pls(num_comp = integer(1), predictor_prop = double(1)) \%>\% set_engine("mixOmics") \%>\% set_mode("regression") \%>\% translate() @@ -47,7 +52,12 @@ for sparse models. } } -\subsection{Translation from parsnip to the underlying model call (classification)}{\if{html}{\out{
}}\preformatted{pls(num_comp = integer(1), predictor_prop = double(1)) \%>\% +\subsection{Translation from parsnip to the underlying model call (classification)}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{plsmod}.\if{html}{\out{
}}\preformatted{library(plsmod) + +pls(num_comp = integer(1), predictor_prop = double(1)) \%>\% set_engine("mixOmics") \%>\% set_mode("classification") \%>\% translate() diff --git a/man/details_poisson_reg_gee.Rd b/man/details_poisson_reg_gee.Rd index 41669f543..58326fd78 100644 --- a/man/details_poisson_reg_gee.Rd +++ b/man/details_poisson_reg_gee.Rd @@ -17,7 +17,12 @@ typically does not affect the predicted value of the model but does have an effect on the inferential results and parameter covariance values. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{poisson_reg(engine = "gee") \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{multilevelmod}.\if{html}{\out{
}}\preformatted{library(poissonreg) + +poisson_reg(engine = "gee") \%>\% set_engine("gee") \%>\% translate() }\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) diff --git a/man/details_poisson_reg_glm.Rd b/man/details_poisson_reg_glm.Rd index d3e8422ab..c481c62df 100644 --- a/man/details_poisson_reg_glm.Rd +++ b/man/details_poisson_reg_glm.Rd @@ -13,7 +13,12 @@ For this engine, there is a single mode: regression This engine has no tuning parameters. } -\subsection{Translation from parsnip to the underlying model call (regression)}{\if{html}{\out{
}}\preformatted{poisson_reg() \%>\% +\subsection{Translation from parsnip to the underlying model call (regression)}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{poissonreg}.\if{html}{\out{
}}\preformatted{library(poissonreg) + +poisson_reg() \%>\% set_engine("glm") \%>\% translate() }\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) diff --git a/man/details_poisson_reg_glmer.Rd b/man/details_poisson_reg_glmer.Rd index ae2b3d898..7cb64c27c 100644 --- a/man/details_poisson_reg_glmer.Rd +++ b/man/details_poisson_reg_glmer.Rd @@ -14,7 +14,12 @@ For this engine, there is a single mode: regression This model has no tuning parameters. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{poisson_reg(engine = "glmer") \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{multilevelmod}.\if{html}{\out{
}}\preformatted{library(poissonreg) + +poisson_reg(engine = "glmer") \%>\% set_engine("glmer") \%>\% translate() }\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) diff --git a/man/details_poisson_reg_glmnet.Rd b/man/details_poisson_reg_glmnet.Rd index f9bf14c05..9278ca50a 100644 --- a/man/details_poisson_reg_glmnet.Rd +++ b/man/details_poisson_reg_glmnet.Rd @@ -26,7 +26,12 @@ value. For more details about this, and the \code{glmnet} model in general, see \link{glmnet-details}. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{poisson_reg(penalty = double(1), mixture = double(1)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{poissonreg}.\if{html}{\out{
}}\preformatted{library(poissonreg) + +poisson_reg(penalty = double(1), mixture = double(1)) \%>\% set_engine("glmnet") \%>\% translate() }\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) diff --git a/man/details_poisson_reg_hurdle.Rd b/man/details_poisson_reg_hurdle.Rd index 404cfa033..22841351f 100644 --- a/man/details_poisson_reg_hurdle.Rd +++ b/man/details_poisson_reg_hurdle.Rd @@ -15,7 +15,12 @@ For this engine, there is a single mode: regression This engine has no tuning parameters. } -\subsection{Translation from parsnip to the underlying model call (regression)}{\if{html}{\out{
}}\preformatted{poisson_reg() \%>\% +\subsection{Translation from parsnip to the underlying model call (regression)}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{poissonreg}.\if{html}{\out{
}}\preformatted{library(poissonreg) + +poisson_reg() \%>\% set_engine("hurdle") \%>\% translate() }\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) diff --git a/man/details_poisson_reg_stan.Rd b/man/details_poisson_reg_stan.Rd index 20b343b43..836d05572 100644 --- a/man/details_poisson_reg_stan.Rd +++ b/man/details_poisson_reg_stan.Rd @@ -37,7 +37,12 @@ See \code{\link[rstan:stanmodel-method-sampling]{rstan::sampling()}} and and other options. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{poisson_reg() \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{poissonreg}.\if{html}{\out{
}}\preformatted{library(poissonreg) + +poisson_reg() \%>\% set_engine("stan") \%>\% translate() }\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) diff --git a/man/details_poisson_reg_stan_glmer.Rd b/man/details_poisson_reg_stan_glmer.Rd index 5bec962ae..5c0d68904 100644 --- a/man/details_poisson_reg_stan_glmer.Rd +++ b/man/details_poisson_reg_stan_glmer.Rd @@ -34,7 +34,12 @@ centering all predictors). See \code{?rstanarm::stan_glmer} and \code{?rstan::sampling} for more information. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{poisson_reg(engine = "stan_glmer") \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{multilevelmod}.\if{html}{\out{
}}\preformatted{library(poissonreg) + +poisson_reg(engine = "stan_glmer") \%>\% set_engine("stan_glmer") \%>\% translate() }\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) diff --git a/man/details_poisson_reg_zeroinfl.Rd b/man/details_poisson_reg_zeroinfl.Rd index 9646f812d..db965e59b 100644 --- a/man/details_poisson_reg_zeroinfl.Rd +++ b/man/details_poisson_reg_zeroinfl.Rd @@ -15,7 +15,12 @@ For this engine, there is a single mode: regression This engine has no tuning parameters. } -\subsection{Translation from parsnip to the underlying model call (regression)}{\if{html}{\out{
}}\preformatted{poisson_reg() \%>\% +\subsection{Translation from parsnip to the underlying model call (regression)}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{poissonreg}.\if{html}{\out{
}}\preformatted{library(poissonreg) + +poisson_reg() \%>\% set_engine("zeroinfl") \%>\% translate() }\if{html}{\out{
}}\preformatted{## Poisson Regression Model Specification (regression) diff --git a/man/details_proportional_hazards_glmnet.Rd b/man/details_proportional_hazards_glmnet.Rd index 523769edf..9d1d191f0 100644 --- a/man/details_proportional_hazards_glmnet.Rd +++ b/man/details_proportional_hazards_glmnet.Rd @@ -25,7 +25,12 @@ value. For more details about this, and the \code{glmnet} model in general, see \link{glmnet-details}. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{proportional_hazards(penalty = double(1), mixture = double(1)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{censored}.\if{html}{\out{
}}\preformatted{library(censored) + +proportional_hazards(penalty = double(1), mixture = double(1)) \%>\% set_engine("glmnet") \%>\% translate() }\if{html}{\out{
}}\preformatted{## Proportional Hazards Model Specification (censored regression) diff --git a/man/details_proportional_hazards_survival.Rd b/man/details_proportional_hazards_survival.Rd index 6ee69341a..5d6e57a2e 100644 --- a/man/details_proportional_hazards_survival.Rd +++ b/man/details_proportional_hazards_survival.Rd @@ -13,7 +13,12 @@ For this engine, there is a single mode: censored regression This model has no tuning parameters. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{proportional_hazards() \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{censored}.\if{html}{\out{
}}\preformatted{library(censored) + +proportional_hazards() \%>\% set_engine("survival") \%>\% set_mode("censored regression") \%>\% translate() diff --git a/man/details_rand_forest_party.Rd b/man/details_rand_forest_party.Rd index 1c0633b6e..d5fb9b486 100644 --- a/man/details_rand_forest_party.Rd +++ b/man/details_rand_forest_party.Rd @@ -20,7 +20,12 @@ This model has 3 tuning parameters: } } -\subsection{Translation from parsnip to the original package (censored regression)}{\if{html}{\out{
}}\preformatted{rand_forest() \%>\% +\subsection{Translation from parsnip to the original package (censored regression)}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{censored}.\if{html}{\out{
}}\preformatted{library(censored) + +rand_forest() \%>\% set_engine("party") \%>\% set_mode("censored regression") \%>\% translate() diff --git a/man/details_rule_fit_xrf.Rd b/man/details_rule_fit_xrf.Rd index 8d2223429..f1ed436f4 100644 --- a/man/details_rule_fit_xrf.Rd +++ b/man/details_rule_fit_xrf.Rd @@ -28,7 +28,12 @@ default: 1.0) } } -\subsection{Translation from parsnip to the underlying model call (regression)}{\if{html}{\out{
}}\preformatted{rule_fit( +\subsection{Translation from parsnip to the underlying model call (regression)}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{rules}.\if{html}{\out{
}}\preformatted{library(rules) + +rule_fit( mtry = numeric(1), trees = integer(1), min_n = integer(1), @@ -63,7 +68,12 @@ default: 1.0) } } -\subsection{Translation from parsnip to the underlying model call (classification)}{\if{html}{\out{
}}\preformatted{rule_fit( +\subsection{Translation from parsnip to the underlying model call (classification)}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{rules}.\if{html}{\out{
}}\preformatted{library(rules) + +rule_fit( mtry = numeric(1), trees = integer(1), min_n = integer(1), @@ -108,7 +118,7 @@ Also, there are several configuration differences in how \code{xrf()} is fit between that package and the wrapper used in \code{rules}. Some differences in default values are: \itemize{ -\item \code{trees}: \verb{xrf: 100,}rules`: 15 +\item \code{trees}: \code{xrf}: 100, \code{rules}: 15 \item \code{max_depth}: \code{xrf}: 3, \code{rules}: 6 } diff --git a/man/details_survival_reg_flexsurv.Rd b/man/details_survival_reg_flexsurv.Rd index 733c95851..2937973dc 100644 --- a/man/details_survival_reg_flexsurv.Rd +++ b/man/details_survival_reg_flexsurv.Rd @@ -16,7 +16,12 @@ This model has 1 tuning parameters: } } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{survival_reg(dist = character(1)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{censored}.\if{html}{\out{
}}\preformatted{library(censored) + +survival_reg(dist = character(1)) \%>\% set_engine("flexsurv") \%>\% set_mode("censored regression") \%>\% translate() diff --git a/man/details_survival_reg_survival.Rd b/man/details_survival_reg_survival.Rd index ca88a90e2..56584b040 100644 --- a/man/details_survival_reg_survival.Rd +++ b/man/details_survival_reg_survival.Rd @@ -16,7 +16,12 @@ This model has 1 tuning parameters: } } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{survival_reg(dist = character(1)) \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{censored}.\if{html}{\out{
}}\preformatted{library(censored) + +survival_reg(dist = character(1)) \%>\% set_engine("survival") \%>\% set_mode("censored regression") \%>\% translate() diff --git a/man/rmd/aaa.Rmd b/man/rmd/aaa.Rmd index 346864fa0..5ba7fa8f1 100644 --- a/man/rmd/aaa.Rmd +++ b/man/rmd/aaa.Rmd @@ -12,7 +12,7 @@ check_pkg_for_docs <- function(x){ rmd_pkgs <- c("tune", "glue", "dplyr", "parsnip", "dials", "glmnet", "Cubist", "xrf") check_pkg_for_docs(rmd_pkgs) -# check_pkg_for_docs(parsnip:::extensions()) +check_pkg_for_docs(parsnip:::extensions()) # ------------------------------------------------------------------------------ @@ -21,7 +21,7 @@ check_pkg_for_docs(rmd_pkgs) make_mode_list <- function(mod, eng) { modes <- c("regression", "classification", "censored regression") exts <- - read.delim(system.file("models.tsv", package = "parsnip")) %>% + utils::read.delim(system.file("models.tsv", package = "parsnip")) %>% dplyr::filter(model == mod & engine == eng) %>% dplyr::mutate(mode = factor(mode, levels = modes)) %>% dplyr::arrange(mode) @@ -113,6 +113,31 @@ descr_models <- function(mod, eng) { paste("For this engine, there", txt, knitr::combine_words(res)) } -options(width = 80) +uses_extension <- function(mod, eng, mod_mode) { + exts <- + utils::read.delim(system.file("models.tsv", package = "parsnip")) %>% + dplyr::filter( + model == mod & + engine == eng & + mode == mod_mode & + pkg %in% parsnip:::extensions() + ) %>% + dplyr::distinct(pkg) %>% + purrr::pluck("pkg") + + num_ext <- length(exts) + if (num_ext > 0) { + res <- paste0("**", exts, "**", collapse = ", ") + x <- + ifelse(num_ext > 1, + "There are parsnip extension packages ", + "There is a parsnip extension package ") + res <- paste0(x, "required to fit this model to this mode: ", res, ".") + } else { + res <- "" + } + res +} +options(width = 80) ``` diff --git a/man/rmd/bag_mars_earth.Rmd b/man/rmd/bag_mars_earth.Rmd index 08e4bedf1..b7df34e07 100644 --- a/man/rmd/bag_mars_earth.Rmd +++ b/man/rmd/bag_mars_earth.Rmd @@ -26,6 +26,8 @@ The default value of `num_terms` depends on the number of predictor columns. For ## Translation from parsnip to the original package (regression) +`r uses_extension("bag_mars", "earth", "regression")` + ```{r earth-reg} bag_mars(num_terms = integer(1), prod_degree = integer(1), prune_method = character(1)) %>% set_engine("earth") %>% @@ -35,8 +37,16 @@ bag_mars(num_terms = integer(1), prod_degree = integer(1), prune_method = charac ## Translation from parsnip to the original package (classification) +`r uses_extension("bag_mars", "earth", "classification")` + ```{r earth-cls} -bag_mars(num_terms = integer(1), prod_degree = integer(1), prune_method = character(1)) %>% +library(baguette) + +bag_mars( + num_terms = integer(1), + prod_degree = integer(1), + prune_method = character(1) +) %>% set_engine("earth") %>% set_mode("classification") %>% translate() diff --git a/man/rmd/bag_tree_C5.0.Rmd b/man/rmd/bag_tree_C5.0.Rmd index 47cf492ce..c15aca0a6 100644 --- a/man/rmd/bag_tree_C5.0.Rmd +++ b/man/rmd/bag_tree_C5.0.Rmd @@ -25,7 +25,11 @@ param$item ## Translation from parsnip to the original package (classification) +`r uses_extension("bag_tree", "C5.0", "classification")` + ```{r C5.0-cls} +library(baguette) + bag_tree(min_n = integer()) %>% set_engine("C5.0") %>% set_mode("classification") %>% diff --git a/man/rmd/bag_tree_rpart.Rmd b/man/rmd/bag_tree_rpart.Rmd index e53671282..c419455ed 100644 --- a/man/rmd/bag_tree_rpart.Rmd +++ b/man/rmd/bag_tree_rpart.Rmd @@ -28,7 +28,11 @@ For the `class_cost` parameter, the value can be a non-negative scalar for a cla ## Translation from parsnip to the original package (classification) +`r uses_extension("bag_tree", "rpart", "classification")` + ```{r rpart-cls} +library(baguette) + bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) %>% set_engine("rpart") %>% set_mode("classification") %>% @@ -38,7 +42,11 @@ bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1 ## Translation from parsnip to the original package (regression) +`r uses_extension("bag_tree", "rpart", "regression")` + ```{r rpart-reg} +library(baguette) + bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) %>% set_engine("rpart") %>% set_mode("regression") %>% @@ -47,7 +55,11 @@ bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1 ## Translation from parsnip to the original package (censored regression) +`r uses_extension("bag_tree", "rpart", "censored regression")` + ```{r rpart-creg} +library(censored) + bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) %>% set_engine("rpart") %>% set_mode("censored regression") %>% diff --git a/man/rmd/cubist_rules_Cubist.Rmd b/man/rmd/cubist_rules_Cubist.Rmd index e1477b8e1..dd5fe8362 100644 --- a/man/rmd/cubist_rules_Cubist.Rmd +++ b/man/rmd/cubist_rules_Cubist.Rmd @@ -25,7 +25,11 @@ param$item ## Translation from parsnip to the underlying model call (regression) +`r uses_extension("cubist_rules", "Cubist", "regression")` + ```{r cubist-reg} +library(rules) + cubist_rules( committees = integer(1), neighbors = integer(1), diff --git a/man/rmd/decision_tree_party.Rmd b/man/rmd/decision_tree_party.Rmd index e3f53d9b1..9beac0bc6 100644 --- a/man/rmd/decision_tree_party.Rmd +++ b/man/rmd/decision_tree_party.Rmd @@ -31,7 +31,11 @@ An engine specific parameter for this model is: ## Translation from parsnip to the original package (censored regression) +`r uses_extension("decision_tree", "party", "censored regression")` + ```{r party-creg} +library(censored) + decision_tree(tree_depth = integer(1), min_n = integer(1)) %>% set_engine("party") %>% set_mode("censored regression") %>% diff --git a/man/rmd/decision_tree_rpart.Rmd b/man/rmd/decision_tree_rpart.Rmd index 1dc40f084..54bc58d78 100644 --- a/man/rmd/decision_tree_rpart.Rmd +++ b/man/rmd/decision_tree_rpart.Rmd @@ -45,8 +45,16 @@ decision_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = dou ## Translation from parsnip to the original package (censored regression) +`r uses_extension("decision_tree", "rpart", "censored regression")` + ```{r rpart-cens-reg} -decision_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) %>% +library(censored) + +decision_tree( + tree_depth = integer(1), + min_n = integer(1), + cost_complexity = double(1) +) %>% set_engine("rpart") %>% set_mode("censored regression") %>% translate() diff --git a/man/rmd/discrim_flexible_earth.Rmd b/man/rmd/discrim_flexible_earth.Rmd index df6b89a42..e09e2fdbe 100644 --- a/man/rmd/discrim_flexible_earth.Rmd +++ b/man/rmd/discrim_flexible_earth.Rmd @@ -26,8 +26,16 @@ The default value of `num_terms` depends on the number of columns (`p`): `min(20 ## Translation from parsnip to the original package +`r uses_extension("discrim_flexible", "earth", "classification")` + ```{r earth-cls} -discrim_flexible(num_terms = integer(0), prod_degree = integer(0), prune_method = character(0)) %>% +library(discrim) + +discrim_flexible( + num_terms = integer(0), + prod_degree = integer(0), + prune_method = character(0) +) %>% translate() ``` diff --git a/man/rmd/discrim_linear_MASS.Rmd b/man/rmd/discrim_linear_MASS.Rmd index 136bc2aeb..c8f6ef6a8 100644 --- a/man/rmd/discrim_linear_MASS.Rmd +++ b/man/rmd/discrim_linear_MASS.Rmd @@ -9,7 +9,11 @@ This engine has no tuning parameters. ## Translation from parsnip to the original package +`r uses_extension("discrim_linear", "MASS", "classification")` + ```{r mass-cls} +library(discrim) + discrim_linear() %>% set_engine("MASS") %>% translate() diff --git a/man/rmd/discrim_linear_mda.Rmd b/man/rmd/discrim_linear_mda.Rmd index 52641788b..e931a4617 100644 --- a/man/rmd/discrim_linear_mda.Rmd +++ b/man/rmd/discrim_linear_mda.Rmd @@ -25,7 +25,11 @@ param$item ## Translation from parsnip to the original package +`r uses_extension("discrim_linear", "mda", "classification")` + ```{r mda-cls} +library(discrim) + discrim_linear(penalty = numeric(0)) %>% set_engine("mda") %>% translate() diff --git a/man/rmd/discrim_linear_sda.Rmd b/man/rmd/discrim_linear_sda.Rmd index 1efe67c7b..55149ec81 100644 --- a/man/rmd/discrim_linear_sda.Rmd +++ b/man/rmd/discrim_linear_sda.Rmd @@ -19,7 +19,11 @@ However, there are a few engine-specific parameters that can be set or optimized ## Translation from parsnip to the original package +`r uses_extension("discrim_linear", "sda", "classification")` + ```{r sda-cls} +library(discrim) + discrim_linear() %>% set_engine("sda") %>% translate() diff --git a/man/rmd/discrim_linear_sparsediscrim.Rmd b/man/rmd/discrim_linear_sparsediscrim.Rmd index c5e32997e..5b6d3bd5d 100644 --- a/man/rmd/discrim_linear_sparsediscrim.Rmd +++ b/man/rmd/discrim_linear_sparsediscrim.Rmd @@ -31,7 +31,11 @@ The possible values of this parameter, and the functions that they execute, are: ## Translation from parsnip to the original package +`r uses_extension("discrim_linear", "sparsediscrim", "classification")` + ```{r sparsediscrim-cls} +library(discrim) + discrim_linear(regularization_method = character(0)) %>% set_engine("sparsediscrim") %>% translate() diff --git a/man/rmd/discrim_quad_MASS.Rmd b/man/rmd/discrim_quad_MASS.Rmd index b097ade1a..d25fa1aee 100644 --- a/man/rmd/discrim_quad_MASS.Rmd +++ b/man/rmd/discrim_quad_MASS.Rmd @@ -9,7 +9,11 @@ This engine has no tuning parameters. ## Translation from parsnip to the original package +`r uses_extension("discrim_quad", "MASS", "classification")` + ```{r mass-cls} +library(discrim) + discrim_quad() %>% set_engine("MASS") %>% translate() diff --git a/man/rmd/discrim_quad_sparsediscrim.Rmd b/man/rmd/discrim_quad_sparsediscrim.Rmd index afd32266d..2d8c6aaa0 100644 --- a/man/rmd/discrim_quad_sparsediscrim.Rmd +++ b/man/rmd/discrim_quad_sparsediscrim.Rmd @@ -30,7 +30,11 @@ The possible values of this parameter, and the functions that they execute, are: ## Translation from parsnip to the original package +`r uses_extension("discrim_quad", "sparsediscrim", "classification")` + ```{r sparsediscrim-cls} +library(discrim) + discrim_quad(regularization_method = character(0)) %>% set_engine("sparsediscrim") %>% translate() diff --git a/man/rmd/discrim_regularized_klaR.Rmd b/man/rmd/discrim_regularized_klaR.Rmd index 684bb8443..cc1f0e836 100644 --- a/man/rmd/discrim_regularized_klaR.Rmd +++ b/man/rmd/discrim_regularized_klaR.Rmd @@ -33,7 +33,11 @@ Some special cases for the RDA model: ## Translation from parsnip to the original package +`r uses_extension("discrim_regularized", "klaR", "classification")` + ```{r klaR-cls} +library(discrim) + discrim_regularized(frac_identity = numeric(0), frac_common_cov = numeric(0)) %>% set_engine("klaR") %>% translate() diff --git a/man/rmd/linear_reg_gee.Rmd b/man/rmd/linear_reg_gee.Rmd index 029f4f93e..5a54c7e9f 100644 --- a/man/rmd/linear_reg_gee.Rmd +++ b/man/rmd/linear_reg_gee.Rmd @@ -9,7 +9,11 @@ This model has no formal tuning parameters. It might be beneficial to determine ## Translation from parsnip to the original package +`r uses_extension("linear_reg", "gee", "regression")` + ```{r gee-csl} +library(multilevelmod) + linear_reg() %>% set_engine("gee") %>% set_mode("regression") %>% diff --git a/man/rmd/linear_reg_gls.Rmd b/man/rmd/linear_reg_gls.Rmd index e5fe19c3c..cb5334466 100644 --- a/man/rmd/linear_reg_gls.Rmd +++ b/man/rmd/linear_reg_gls.Rmd @@ -9,7 +9,11 @@ This model has no tuning parameters. ## Translation from parsnip to the original package +`r uses_extension("linear_reg", "gls", "regression")` + ```{r gls-csl} +library(multilevelmod) + linear_reg() %>% set_engine("gls") %>% set_mode("regression") %>% diff --git a/man/rmd/linear_reg_lme.Rmd b/man/rmd/linear_reg_lme.Rmd index 8950fac0a..7f4eb5071 100644 --- a/man/rmd/linear_reg_lme.Rmd +++ b/man/rmd/linear_reg_lme.Rmd @@ -9,7 +9,11 @@ This model has no tuning parameters. ## Translation from parsnip to the original package +`r uses_extension("linear_reg", "lme", "regression")` + ```{r lme-csl} +library(multilevelmod) + linear_reg() %>% set_engine("lme") %>% set_mode("regression") %>% diff --git a/man/rmd/linear_reg_lmer.Rmd b/man/rmd/linear_reg_lmer.Rmd index 86a4fcbce..742a2da8e 100644 --- a/man/rmd/linear_reg_lmer.Rmd +++ b/man/rmd/linear_reg_lmer.Rmd @@ -9,7 +9,11 @@ This model has no tuning parameters. ## Translation from parsnip to the original package -```{r lmer-csl} +`r uses_extension("linear_reg", "lmer", "regression")` + +```{r lmer-reg} +library(multilevelmod) + linear_reg() %>% set_engine("lmer") %>% set_mode("regression") %>% diff --git a/man/rmd/logistic_reg_gee.Rmd b/man/rmd/logistic_reg_gee.Rmd index 6ebcc4e59..d2603ad7f 100644 --- a/man/rmd/logistic_reg_gee.Rmd +++ b/man/rmd/logistic_reg_gee.Rmd @@ -9,7 +9,11 @@ This model has no formal tuning parameters. It might be beneficial to determine ## Translation from parsnip to the original package +`r uses_extension("logistic_reg", "gee", "classification")` + ```{r gee-csl} +library(multilevelmod) + logistic_reg() %>% set_engine("gee") %>% translate() diff --git a/man/rmd/logistic_reg_glmer.Rmd b/man/rmd/logistic_reg_glmer.Rmd index 0c0a70d92..3f86442e2 100644 --- a/man/rmd/logistic_reg_glmer.Rmd +++ b/man/rmd/logistic_reg_glmer.Rmd @@ -9,7 +9,11 @@ This model has no tuning parameters. ## Translation from parsnip to the original package +`r uses_extension("logistic_reg", "glmer", "classification")` + ```{r lmer-csl} +library(multilevelmod) + logistic_reg() %>% set_engine("glmer") %>% translate() diff --git a/man/rmd/naive_Bayes_klaR.Rmd b/man/rmd/naive_Bayes_klaR.Rmd index 78e832d55..05eba966f 100644 --- a/man/rmd/naive_Bayes_klaR.Rmd +++ b/man/rmd/naive_Bayes_klaR.Rmd @@ -27,7 +27,11 @@ Note that `usekernel` is always set to `TRUE` for the `klaR` engine. ## Translation from parsnip to the original package +`r uses_extension("naive_Bayes", "klaR", "classification")` + ```{r klaR-cls} +library(discrim) + naive_Bayes(smoothness = numeric(0), Laplace = numeric(0)) %>% set_engine("klaR") %>% translate() diff --git a/man/rmd/naive_Bayes_naivebayes.Rmd b/man/rmd/naive_Bayes_naivebayes.Rmd index ad1eb9300..73291a43c 100644 --- a/man/rmd/naive_Bayes_naivebayes.Rmd +++ b/man/rmd/naive_Bayes_naivebayes.Rmd @@ -25,7 +25,11 @@ param$item ## Translation from parsnip to the original package +`r uses_extension("naive_Bayes", "naivebayes", "classification")` + ```{r naivebayes-cls} +library(discrim) + naive_Bayes(smoothness = numeric(0), Laplace = numeric(0)) %>% set_engine("naivebayes") %>% translate() diff --git a/man/rmd/pls_mixOmics.Rmd b/man/rmd/pls_mixOmics.Rmd index 79b76bbb5..c9725279d 100644 --- a/man/rmd/pls_mixOmics.Rmd +++ b/man/rmd/pls_mixOmics.Rmd @@ -26,7 +26,11 @@ param$item ## Translation from parsnip to the underlying model call (regression) +`r uses_extension("pls", "mixOmics", "regression")` + ```{r mixOmics-reg} +library(plsmod) + pls(num_comp = integer(1), predictor_prop = double(1)) %>% set_engine("mixOmics") %>% set_mode("regression") %>% @@ -42,7 +46,11 @@ pls(num_comp = integer(1), predictor_prop = double(1)) %>% ## Translation from parsnip to the underlying model call (classification) +`r uses_extension("pls", "mixOmics", "classification")` + ```{r mixOmics-cls} +library(plsmod) + pls(num_comp = integer(1), predictor_prop = double(1)) %>% set_engine("mixOmics") %>% set_mode("classification") %>% diff --git a/man/rmd/poisson_reg_gee.Rmd b/man/rmd/poisson_reg_gee.Rmd index a6caf6988..967c82cea 100644 --- a/man/rmd/poisson_reg_gee.Rmd +++ b/man/rmd/poisson_reg_gee.Rmd @@ -9,7 +9,11 @@ This model has no formal tuning parameters. It might be beneficial to determine ## Translation from parsnip to the original package +`r uses_extension("poisson_reg", "gee", "regression")` + ```{r gee-csl} +library(poissonreg) + poisson_reg(engine = "gee") %>% set_engine("gee") %>% translate() diff --git a/man/rmd/poisson_reg_glm.Rmd b/man/rmd/poisson_reg_glm.Rmd index d0211ed21..c245b4889 100644 --- a/man/rmd/poisson_reg_glm.Rmd +++ b/man/rmd/poisson_reg_glm.Rmd @@ -9,7 +9,11 @@ This engine has no tuning parameters. ## Translation from parsnip to the underlying model call (regression) +`r uses_extension("poisson_reg", "glm", "regression")` + ```{r glm-reg} +library(poissonreg) + poisson_reg() %>% set_engine("glm") %>% translate() diff --git a/man/rmd/poisson_reg_glmer.Rmd b/man/rmd/poisson_reg_glmer.Rmd index f49a1cbcc..bfe797ac4 100644 --- a/man/rmd/poisson_reg_glmer.Rmd +++ b/man/rmd/poisson_reg_glmer.Rmd @@ -9,7 +9,11 @@ This model has no tuning parameters. ## Translation from parsnip to the original package +`r uses_extension("poisson_reg", "glmer", "regression")` + ```{r lmer-csl} +library(poissonreg) + poisson_reg(engine = "glmer") %>% set_engine("glmer") %>% translate() diff --git a/man/rmd/poisson_reg_glmnet.Rmd b/man/rmd/poisson_reg_glmnet.Rmd index 03fd29f4f..ffe7bd20e 100644 --- a/man/rmd/poisson_reg_glmnet.Rmd +++ b/man/rmd/poisson_reg_glmnet.Rmd @@ -28,7 +28,11 @@ The `penalty` parameter has no default and requires a single numeric value. For ## Translation from parsnip to the original package +`r uses_extension("poisson_reg", "glmnet", "regression")` + ```{r glmnet-csl} +library(poissonreg) + poisson_reg(penalty = double(1), mixture = double(1)) %>% set_engine("glmnet") %>% translate() diff --git a/man/rmd/poisson_reg_hurdle.Rmd b/man/rmd/poisson_reg_hurdle.Rmd index 6531002d7..043ee62a3 100644 --- a/man/rmd/poisson_reg_hurdle.Rmd +++ b/man/rmd/poisson_reg_hurdle.Rmd @@ -9,7 +9,11 @@ This engine has no tuning parameters. ## Translation from parsnip to the underlying model call (regression) +`r uses_extension("poisson_reg", "hurdle", "regression")` + ```{r hurdle-reg} +library(poissonreg) + poisson_reg() %>% set_engine("hurdle") %>% translate() diff --git a/man/rmd/poisson_reg_stan.Rmd b/man/rmd/poisson_reg_stan.Rmd index 921cbe692..b1166d615 100644 --- a/man/rmd/poisson_reg_stan.Rmd +++ b/man/rmd/poisson_reg_stan.Rmd @@ -22,7 +22,11 @@ See [rstan::sampling()] and [rstanarm::priors()] for more information on these a ## Translation from parsnip to the original package +`r uses_extension("poisson_reg", "stan", "regression")` + ```{r stan-csl} +library(poissonreg) + poisson_reg() %>% set_engine("stan") %>% translate() diff --git a/man/rmd/poisson_reg_stan_glmer.Rmd b/man/rmd/poisson_reg_stan_glmer.Rmd index 8a848ea65..fafda6a83 100644 --- a/man/rmd/poisson_reg_stan_glmer.Rmd +++ b/man/rmd/poisson_reg_stan_glmer.Rmd @@ -22,7 +22,11 @@ See `?rstanarm::stan_glmer` and `?rstan::sampling` for more information. ## Translation from parsnip to the original package +`r uses_extension("poisson_reg", "stan_glmer", "regression")` + ```{r stan_glmer-cls} +library(poissonreg) + poisson_reg(engine = "stan_glmer") %>% set_engine("stan_glmer") %>% translate() diff --git a/man/rmd/poisson_reg_zeroinfl.Rmd b/man/rmd/poisson_reg_zeroinfl.Rmd index 8dc998469..b424c0ac5 100644 --- a/man/rmd/poisson_reg_zeroinfl.Rmd +++ b/man/rmd/poisson_reg_zeroinfl.Rmd @@ -9,7 +9,11 @@ This engine has no tuning parameters. ## Translation from parsnip to the underlying model call (regression) +`r uses_extension("poisson_reg", "zeroinfl", "regression")` + ```{r zeroinfl-reg} +library(poissonreg) + poisson_reg() %>% set_engine("zeroinfl") %>% translate() diff --git a/man/rmd/proportional_hazards_glmnet.Rmd b/man/rmd/proportional_hazards_glmnet.Rmd index 07e2a2c73..bd0ae7126 100644 --- a/man/rmd/proportional_hazards_glmnet.Rmd +++ b/man/rmd/proportional_hazards_glmnet.Rmd @@ -28,7 +28,11 @@ The `penalty` parameter has no default and requires a single numeric value. For ## Translation from parsnip to the original package +`r uses_extension("proportional_hazards", "glmnet", "censored regression")` + ```{r glmnet-creg} +library(censored) + proportional_hazards(penalty = double(1), mixture = double(1)) %>% set_engine("glmnet") %>% translate() diff --git a/man/rmd/proportional_hazards_survival.Rmd b/man/rmd/proportional_hazards_survival.Rmd index 697aeb824..e3027247f 100644 --- a/man/rmd/proportional_hazards_survival.Rmd +++ b/man/rmd/proportional_hazards_survival.Rmd @@ -9,7 +9,11 @@ This model has no tuning parameters. ## Translation from parsnip to the original package +`r uses_extension("proportional_hazards", "survival", "censored regression")` + ```{r survival-creg} +library(censored) + proportional_hazards() %>% set_engine("survival") %>% set_mode("censored regression") %>% diff --git a/man/rmd/rand_forest_party.Rmd b/man/rmd/rand_forest_party.Rmd index 75541af9d..68e0fddd4 100644 --- a/man/rmd/rand_forest_party.Rmd +++ b/man/rmd/rand_forest_party.Rmd @@ -25,7 +25,11 @@ param$item ## Translation from parsnip to the original package (censored regression) +`r uses_extension("rand_forest", "party", "censored regression")` + ```{r party-creg} +library(censored) + rand_forest() %>% set_engine("party") %>% set_mode("censored regression") %>% diff --git a/man/rmd/rule_fit_xrf.Rmd b/man/rmd/rule_fit_xrf.Rmd index 18bcb371c..206744b2f 100644 --- a/man/rmd/rule_fit_xrf.Rmd +++ b/man/rmd/rule_fit_xrf.Rmd @@ -25,7 +25,11 @@ param$item ## Translation from parsnip to the underlying model call (regression) +`r uses_extension("rule_fit", "xrf", "regression")` + ```{r xrf-reg} +library(rules) + rule_fit( mtry = numeric(1), trees = integer(1), @@ -43,7 +47,12 @@ rule_fit( ## Translation from parsnip to the underlying model call (classification) +`r uses_extension("rule_fit", "xrf", "classification")` + + ```{r xrf-cls} +library(rules) + rule_fit( mtry = numeric(1), trees = integer(1), @@ -66,7 +75,7 @@ use these with `rule_fit()`, we recommend using a recipe instead of the formula Also, there are several configuration differences in how `xrf()` is fit between that package and the wrapper used in `rules`. Some differences in default values are: -- `trees`: `xrf: 100, `rules`: 15 +- `trees`: `xrf`: 100, `rules`: 15 - `max_depth`: `xrf`: 3, `rules`: 6 diff --git a/man/rmd/survival_reg_flexsurv.Rmd b/man/rmd/survival_reg_flexsurv.Rmd index c7cc48ee5..b0ed8bc3f 100644 --- a/man/rmd/survival_reg_flexsurv.Rmd +++ b/man/rmd/survival_reg_flexsurv.Rmd @@ -25,7 +25,11 @@ param$item ## Translation from parsnip to the original package +`r uses_extension("survival_reg", "flexsurv", "censored regression")` + ```{r flexsurv-creg} +library(censored) + survival_reg(dist = character(1)) %>% set_engine("flexsurv") %>% set_mode("censored regression") %>% diff --git a/man/rmd/survival_reg_survival.Rmd b/man/rmd/survival_reg_survival.Rmd index 56d942701..e20681ef9 100644 --- a/man/rmd/survival_reg_survival.Rmd +++ b/man/rmd/survival_reg_survival.Rmd @@ -25,7 +25,11 @@ param$item ## Translation from parsnip to the original package +`r uses_extension("survival_reg", "survival", "censored regression")` + ```{r survival-creg} +library(censored) + survival_reg(dist = character(1)) %>% set_engine("survival") %>% set_mode("censored regression") %>% From b5c0f8c462ef44f99b81b3fcd5c9d38334d8d840 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 4 Jan 2022 10:50:03 -0500 Subject: [PATCH 27/65] add a link to the "find" page for models --- man-roxygen/spec-references.R | 2 +- man/C5_rules.Rd | 2 +- man/bag_mars.Rd | 2 +- man/bag_tree.Rd | 2 +- man/bart.Rd | 2 +- man/boost_tree.Rd | 2 +- man/cubist_rules.Rd | 2 +- man/decision_tree.Rd | 2 +- man/discrim_flexible.Rd | 2 +- man/discrim_linear.Rd | 2 +- man/discrim_quad.Rd | 2 +- man/discrim_regularized.Rd | 2 +- man/gen_additive_mod.Rd | 2 +- man/linear_reg.Rd | 2 +- man/logistic_reg.Rd | 2 +- man/mars.Rd | 2 +- man/mlp.Rd | 2 +- man/multinom_reg.Rd | 2 +- man/naive_Bayes.Rd | 2 +- man/nearest_neighbor.Rd | 2 +- man/pls.Rd | 2 +- man/poisson_reg.Rd | 2 +- man/proportional_hazards.Rd | 2 +- man/rand_forest.Rd | 2 +- man/rule_fit.Rd | 2 +- man/surv_reg.Rd | 2 +- man/survival_reg.Rd | 2 +- man/svm_linear.Rd | 2 +- man/svm_poly.Rd | 2 +- man/svm_rbf.Rd | 2 +- 30 files changed, 30 insertions(+), 30 deletions(-) diff --git a/man-roxygen/spec-references.R b/man-roxygen/spec-references.R index c1ddbbd1e..e47ceda3e 100644 --- a/man-roxygen/spec-references.R +++ b/man-roxygen/spec-references.R @@ -1 +1 @@ -#' @references \url{https://www.tidymodels.org}, [_Tidy Modeling with R_](https://www.tmwr.org/) +#' @references \url{https://www.tidymodels.org}, [_Tidy Modeling with R_](https://www.tmwr.org/), [searchable list of parsnip models](https://www.tidymodels.org/find/parsnip/) diff --git a/man/C5_rules.Rd b/man/C5_rules.Rd index b653f059b..c067f38c1 100644 --- a/man/C5_rules.Rd +++ b/man/C5_rules.Rd @@ -53,7 +53,7 @@ C5_rules() Quinlan R (1993). \emph{C4.5: Programs for Machine Learning}. Morgan Kaufmann Publishers. -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \code{\link[C50:C5.0]{C50::C5.0()}}, \code{\link[C50:C5.0Control]{C50::C5.0Control()}}, diff --git a/man/bag_mars.Rd b/man/bag_mars.Rd index 41bd83cc2..ea8efee77 100644 --- a/man/bag_mars.Rd +++ b/man/bag_mars.Rd @@ -46,7 +46,7 @@ The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model with the data. } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("bag_mars")} diff --git a/man/bag_tree.Rd b/man/bag_tree.Rd index 97607148a..01c84bed3 100644 --- a/man/bag_tree.Rd +++ b/man/bag_tree.Rd @@ -51,7 +51,7 @@ The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model with the data. } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("bag_tree")} diff --git a/man/bart.Rd b/man/bart.Rd index 9d2f76f83..80da4841d 100644 --- a/man/bart.Rd +++ b/man/bart.Rd @@ -90,7 +90,7 @@ prior_test(coef = c(0.05, 0.5, .95), expo = c(1/2, 1, 2)) \%>\% facet_wrap(~ expo) } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("bart")} diff --git a/man/boost_tree.Rd b/man/boost_tree.Rd index 91ed9cf4e..3e1bd4c5a 100644 --- a/man/boost_tree.Rd +++ b/man/boost_tree.Rd @@ -75,7 +75,7 @@ show_engines("boost_tree") boost_tree(mode = "classification", trees = 20) } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("boost_tree")}, diff --git a/man/cubist_rules.Rd b/man/cubist_rules.Rd index 2d99fa74a..7fe715af2 100644 --- a/man/cubist_rules.Rd +++ b/man/cubist_rules.Rd @@ -76,7 +76,7 @@ The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model with the data. } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} Quinlan R (1992). "Learning with Continuous Classes." Proceedings of the 5th Australian Joint Conference On Artificial Intelligence, pp. diff --git a/man/decision_tree.Rd b/man/decision_tree.Rd index 5a9c2ab28..d156ac437 100644 --- a/man/decision_tree.Rd +++ b/man/decision_tree.Rd @@ -51,7 +51,7 @@ show_engines("decision_tree") decision_tree(mode = "classification", tree_depth = 5) } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("decision_tree")} diff --git a/man/discrim_flexible.Rd b/man/discrim_flexible.Rd index b99df1595..d980a16e0 100644 --- a/man/discrim_flexible.Rd +++ b/man/discrim_flexible.Rd @@ -45,7 +45,7 @@ The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model with the data. } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("discrim_flexible")} diff --git a/man/discrim_linear.Rd b/man/discrim_linear.Rd index 9724c0dec..0b4b09b1f 100644 --- a/man/discrim_linear.Rd +++ b/man/discrim_linear.Rd @@ -45,7 +45,7 @@ The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model with the data. } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("discrim_linear")} diff --git a/man/discrim_quad.Rd b/man/discrim_quad.Rd index e0a09e3a8..30354d97f 100644 --- a/man/discrim_quad.Rd +++ b/man/discrim_quad.Rd @@ -41,7 +41,7 @@ The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model with the data. } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("discrim_quad")} diff --git a/man/discrim_regularized.Rd b/man/discrim_regularized.Rd index f88c1ed75..20a260818 100644 --- a/man/discrim_regularized.Rd +++ b/man/discrim_regularized.Rd @@ -56,7 +56,7 @@ The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model with the data. } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} Friedman, J (1989). Regularized Discriminant Analysis. \emph{Journal of the American Statistical Association}, 84, 165-175. diff --git a/man/gen_additive_mod.Rd b/man/gen_additive_mod.Rd index 120b78d12..df5e779b7 100644 --- a/man/gen_additive_mod.Rd +++ b/man/gen_additive_mod.Rd @@ -50,7 +50,7 @@ gen_additive_mod() } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("gen_additive_mod")} diff --git a/man/linear_reg.Rd b/man/linear_reg.Rd index 20892bc5e..5c8b24a23 100644 --- a/man/linear_reg.Rd +++ b/man/linear_reg.Rd @@ -44,7 +44,7 @@ show_engines("linear_reg") linear_reg() } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("linear_reg")} diff --git a/man/logistic_reg.Rd b/man/logistic_reg.Rd index ffde94e7e..728d11f30 100644 --- a/man/logistic_reg.Rd +++ b/man/logistic_reg.Rd @@ -54,7 +54,7 @@ show_engines("logistic_reg") logistic_reg() } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("logistic_reg")} diff --git a/man/mars.Rd b/man/mars.Rd index 529cf9e1d..e8f3581e1 100644 --- a/man/mars.Rd +++ b/man/mars.Rd @@ -51,7 +51,7 @@ show_engines("mars") mars(mode = "regression", num_terms = 5) } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("mars")} diff --git a/man/mlp.Rd b/man/mlp.Rd index a606b7542..07f445f43 100644 --- a/man/mlp.Rd +++ b/man/mlp.Rd @@ -65,7 +65,7 @@ show_engines("mlp") mlp(mode = "classification", penalty = 0.01) } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("mlp")} diff --git a/man/multinom_reg.Rd b/man/multinom_reg.Rd index e8d44e51a..4c97de1a6 100644 --- a/man/multinom_reg.Rd +++ b/man/multinom_reg.Rd @@ -53,7 +53,7 @@ show_engines("multinom_reg") multinom_reg() } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("multinom_reg")} diff --git a/man/naive_Bayes.Rd b/man/naive_Bayes.Rd index 85a3b9169..9620683bb 100644 --- a/man/naive_Bayes.Rd +++ b/man/naive_Bayes.Rd @@ -45,7 +45,7 @@ The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model with the data. } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("naive_Bayes")} diff --git a/man/nearest_neighbor.Rd b/man/nearest_neighbor.Rd index 370c0e779..61778d3b5 100644 --- a/man/nearest_neighbor.Rd +++ b/man/nearest_neighbor.Rd @@ -55,7 +55,7 @@ nearest_neighbor(neighbors = 11) } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("nearest_neighbor")} diff --git a/man/pls.Rd b/man/pls.Rd index 554573303..dc3527ba5 100644 --- a/man/pls.Rd +++ b/man/pls.Rd @@ -43,7 +43,7 @@ The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model with the data. } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("pls")} diff --git a/man/poisson_reg.Rd b/man/poisson_reg.Rd index 6ae9f8a72..d94cd8ad0 100644 --- a/man/poisson_reg.Rd +++ b/man/poisson_reg.Rd @@ -43,7 +43,7 @@ The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model with the data. } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("poisson_reg")} diff --git a/man/proportional_hazards.Rd b/man/proportional_hazards.Rd index d7ddfdff2..71a344980 100644 --- a/man/proportional_hazards.Rd +++ b/man/proportional_hazards.Rd @@ -55,7 +55,7 @@ show_engines("proportional_hazards") proportional_hazards(mode = "censored regression") } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("proportional_hazards")} diff --git a/man/rand_forest.Rd b/man/rand_forest.Rd index d798109fc..dafb9cbe1 100644 --- a/man/rand_forest.Rd +++ b/man/rand_forest.Rd @@ -53,7 +53,7 @@ show_engines("rand_forest") rand_forest(mode = "classification", trees = 2000) } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("rand_forest")} diff --git a/man/rule_fit.Rd b/man/rule_fit.Rd index 1c5160a5a..43958dde8 100644 --- a/man/rule_fit.Rd +++ b/man/rule_fit.Rd @@ -83,7 +83,7 @@ rule_fit() Friedman, J. H., and Popescu, B. E. (2008). "Predictive learning via rule ensembles." \emph{The Annals of Applied Statistics}, 2(3), 916-954. -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \code{\link[xrf:xrf.formula]{xrf::xrf.formula()}}, \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("rule_fit")} diff --git a/man/surv_reg.Rd b/man/surv_reg.Rd index 2b39dc077..1c2da015f 100644 --- a/man/surv_reg.Rd +++ b/man/surv_reg.Rd @@ -46,7 +46,7 @@ show_engines("surv_reg") surv_reg(mode = "regression", dist = "weibull") } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("surv_reg")} diff --git a/man/survival_reg.Rd b/man/survival_reg.Rd index 4e16afc9e..f2d48bebd 100644 --- a/man/survival_reg.Rd +++ b/man/survival_reg.Rd @@ -42,7 +42,7 @@ show_engines("survival_reg") survival_reg(mode = "censored regression", dist = "weibull") } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("survival_reg")} diff --git a/man/svm_linear.Rd b/man/svm_linear.Rd index 9743c8166..5f9dcbf22 100644 --- a/man/svm_linear.Rd +++ b/man/svm_linear.Rd @@ -45,7 +45,7 @@ show_engines("svm_linear") svm_linear(mode = "classification") } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("svm_linear")} diff --git a/man/svm_poly.Rd b/man/svm_poly.Rd index 31d764bda..8ef4ad19c 100644 --- a/man/svm_poly.Rd +++ b/man/svm_poly.Rd @@ -57,7 +57,7 @@ show_engines("svm_poly") svm_poly(mode = "classification", degree = 1.2) } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("svm_poly")} diff --git a/man/svm_rbf.Rd b/man/svm_rbf.Rd index e77e33827..1d7982fae 100644 --- a/man/svm_rbf.Rd +++ b/man/svm_rbf.Rd @@ -55,7 +55,7 @@ show_engines("svm_rbf") svm_rbf(mode = "classification", rbf_sigma = 0.2) } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("svm_rbf")} From a93b7f8e73e244d4d81c1ebe00f1ae9acb804d52 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 4 Jan 2022 11:07:38 -0500 Subject: [PATCH 28/65] updated model database --- inst/models.tsv | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/inst/models.tsv b/inst/models.tsv index 0db9469f4..3ac928450 100644 --- a/inst/models.tsv +++ b/inst/models.tsv @@ -35,7 +35,9 @@ "discrim_regularized" "classification" "klaR" "discrim" "gen_additive_mod" "classification" "mgcv" NA "gen_additive_mod" "regression" "mgcv" NA +"linear_reg" "regression" "brulee" NA "linear_reg" "regression" "gee" "multilevelmod" +"linear_reg" "regression" "glm" NA "linear_reg" "regression" "glmnet" NA "linear_reg" "regression" "gls" "multilevelmod" "linear_reg" "regression" "keras" NA @@ -45,6 +47,7 @@ "linear_reg" "regression" "spark" NA "linear_reg" "regression" "stan" NA "linear_reg" "regression" "stan_glmer" "multilevelmod" +"logistic_reg" "classification" "brulee" NA "logistic_reg" "classification" "gee" "multilevelmod" "logistic_reg" "classification" "glm" NA "logistic_reg" "classification" "glmer" "multilevelmod" @@ -56,10 +59,13 @@ "logistic_reg" "classification" "stan_glmer" "multilevelmod" "mars" "classification" "earth" NA "mars" "regression" "earth" NA +"mlp" "classification" "brulee" NA "mlp" "classification" "keras" NA "mlp" "classification" "nnet" NA +"mlp" "regression" "brulee" NA "mlp" "regression" "keras" NA "mlp" "regression" "nnet" NA +"multinom_reg" "classification" "brulee" NA "multinom_reg" "classification" "glmnet" NA "multinom_reg" "classification" "keras" NA "multinom_reg" "classification" "nnet" NA From c252ee3c9e0f2864dee52d6b94e2508e810a060c Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 4 Jan 2022 11:07:58 -0500 Subject: [PATCH 29/65] fix bug related to engine names with underscores --- R/engine_docs.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/engine_docs.R b/R/engine_docs.R index c3227b80e..f59b7ffd3 100644 --- a/R/engine_docs.R +++ b/R/engine_docs.R @@ -143,8 +143,8 @@ find_engine_files <- function(mod) { } # Subset for our model function - eng <- strsplit(topic_names, "_") - eng <- purrr::map_chr(eng, ~ .x[length(.x)]) + prefix <- paste0("parsnip:details_", mod, "_") + eng <- gsub(prefix, "", topic_names) eng <- tibble::tibble(engine = eng, topic = topic_names) # Determine and label default engine @@ -152,7 +152,7 @@ find_engine_files <- function(mod) { eng$default <- ifelse(eng$engine == default, cli::symbol$sup_1, "") # reorder based on default and name - non_defaults <- dplyr::filter(eng, !grepl("default", default)) + non_defaults <- dplyr::filter(eng, default == "") non_defaults <- non_defaults %>% dplyr::arrange(tolower(engine)) %>% From 4432811d89f94f1dc5d7ea96c4a5260f9cec0382 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 4 Jan 2022 11:51:08 -0500 Subject: [PATCH 30/65] brulee engine files --- R/linear_reg_brulee.R | 10 +++ R/logistic_reg_brulee.R | 11 +++ man/details_linear_reg_brulee.Rd | 72 +++++++++++++++++++ man/details_logistic_reg_brulee.Rd | 77 ++++++++++++++++++++ man/details_mlp_brulee.Rd | 111 +++++++++++++++++++++++++++-- man/rmd/linear_reg_brulee.Rmd | 54 ++++++++++++++ man/rmd/logistic_reg_brulee.Rmd | 57 +++++++++++++++ man/rmd/mlp_brulee.Rmd | 5 +- 8 files changed, 390 insertions(+), 7 deletions(-) create mode 100644 R/linear_reg_brulee.R create mode 100644 R/logistic_reg_brulee.R create mode 100644 man/details_linear_reg_brulee.Rd create mode 100644 man/details_logistic_reg_brulee.Rd create mode 100644 man/rmd/linear_reg_brulee.Rmd create mode 100644 man/rmd/logistic_reg_brulee.Rmd diff --git a/R/linear_reg_brulee.R b/R/linear_reg_brulee.R new file mode 100644 index 000000000..c65e31f75 --- /dev/null +++ b/R/linear_reg_brulee.R @@ -0,0 +1,10 @@ +#' Linear regression via brulee +#' +#' [brulee::brulee_linear_reg()] uses ordinary least squares to fit models with +#' numeric outcomes. +#' +#' @includeRmd man/rmd/linear_reg_brulee.md details +#' +#' @name details_linear_reg_brulee +#' @keywords internal +NULL diff --git a/R/logistic_reg_brulee.R b/R/logistic_reg_brulee.R new file mode 100644 index 000000000..3b1784246 --- /dev/null +++ b/R/logistic_reg_brulee.R @@ -0,0 +1,11 @@ +#' Logistic regression via brulee +#' +#' [brulee::brulee_logistic_reg()] fits a generalized linear model for binary +#' outcomes. A linear combination of the predictors is used to model the log +#' odds of an event. +#' +#' @includeRmd man/rmd/logistic_reg_brulee.md details +#' +#' @name details_logistic_reg_brulee +#' @keywords internal +NULL diff --git a/man/details_linear_reg_brulee.Rd b/man/details_linear_reg_brulee.Rd new file mode 100644 index 000000000..36bc2d425 --- /dev/null +++ b/man/details_linear_reg_brulee.Rd @@ -0,0 +1,72 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/linear_reg_brulee.R +\name{details_linear_reg_brulee} +\alias{details_linear_reg_brulee} +\title{Linear regression via brulee} +\description{ +\code{\link[brulee:brulee_linear_reg]{brulee::brulee_linear_reg()}} uses ordinary least squares to fit models with +numeric outcomes. +} +\details{ +For this engine, there is a single mode: regression +\subsection{Tuning Parameters}{ + +This model has 1 tuning parameter: +\itemize{ +\item \code{penalty}: Amount of Regularization (type: double, default: 0.001) +} + +Other engine arguments of interest: +\itemize{ +\item \code{optimizer()}: The optimization method. See +\code{\link[brulee:brulee_linear_reg]{brulee::brulee_linear_reg()}}. +\item \code{epochs()}: An integer for the number of passes through the training +set. +\item \code{lean_rate()}: A number used to accelerate the gradient decsent +process. +\item \code{momentum()}: A number used to use historical gradient infomration +during optimization (\code{optimizer = "SGD"} only). +\item \code{batch_size()}: An integer for the number of training set points in +each batch. +\item \code{stop_iter()}: A non-negative integer for how many iterations with +no improvement before stopping. (default: 5L). +} +} + +\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{linear_reg(penalty = double(1)) \%>\% + set_engine("brulee") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) +## +## Main Arguments: +## penalty = double(1) +## +## Computational engine: brulee +## +## Model fit template: +## brulee::brulee_linear_reg(x = missing_arg(), y = missing_arg(), +## penalty = double(1)) +} +} + +\subsection{Preprocessing requirements}{ + +Factor/categorical predictors need to be converted to numeric values +(e.g., dummy or indicator variables) for this engine. When using the +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. + +Predictors should have the same scale. One way to achieve this is to +center and scale each so that each predictor has mean zero and a +variance of one. +} + +\subsection{References}{ +\itemize{ +\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. +Springer. +} +} +} +\keyword{internal} diff --git a/man/details_logistic_reg_brulee.Rd b/man/details_logistic_reg_brulee.Rd new file mode 100644 index 000000000..e8a5e70c5 --- /dev/null +++ b/man/details_logistic_reg_brulee.Rd @@ -0,0 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/logistic_reg_brulee.R +\name{details_logistic_reg_brulee} +\alias{details_logistic_reg_brulee} +\title{Logistic regression via brulee} +\description{ +\code{\link[brulee:brulee_logistic_reg]{brulee::brulee_logistic_reg()}} fits a generalized linear model for binary +outcomes. A linear combination of the predictors is used to model the log +odds of an event. +} +\details{ +For this engine, there is a single mode: classification +\subsection{Tuning Parameters}{ + +This model has 1 tuning parameter: +\itemize{ +\item \code{penalty}: Amount of Regularization (type: double, default: 0.001) +} + +Both \code{penalty} and \code{dropout} should be not be used in the same model. + +Other engine arguments of interest: +\itemize{ +\item \code{optimizer()}: The optimization method. See +\code{\link[brulee:brulee_linear_reg]{brulee::brulee_linear_reg()}}. +\item \code{epochs()}: An integer for the number of passes through the training +set. +\item \code{lean_rate()}: A number used to accelerate the gradient decsent +process. +\item \code{momentum()}: A number used to use historical gradient information +during optimization (\code{optimizer = "SGD"} only). +\item \code{batch_size()}: An integer for the number of training set points in +each batch. +\item \code{stop_iter()}: A non-negative integer for how many iterations with +no improvement before stopping. (default: 5L). +\item \code{class_weights()}: Numeric class weights. See +\code{\link[brulee:brulee_logistic_reg]{brulee::brulee_logistic_reg()}}. +} +} + +\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{logistic_reg(penalty = double(1)) \%>\% + set_engine("brulee") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) +## +## Main Arguments: +## penalty = double(1) +## +## Computational engine: brulee +## +## Model fit template: +## brulee::brulee_logistic_reg(x = missing_arg(), y = missing_arg(), +## penalty = double(1)) +} +} + +\subsection{Preprocessing requirements}{ + +Factor/categorical predictors need to be converted to numeric values +(e.g., dummy or indicator variables) for this engine. When using the +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. + +Predictors should have the same scale. One way to achieve this is to +center and scale each so that each predictor has mean zero and a +variance of one. +} + +\subsection{References}{ +\itemize{ +\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. +Springer. +} +} +} +\keyword{internal} diff --git a/man/details_mlp_brulee.Rd b/man/details_mlp_brulee.Rd index f685b74e7..4b26bc211 100644 --- a/man/details_mlp_brulee.Rd +++ b/man/details_mlp_brulee.Rd @@ -8,12 +8,113 @@ } \details{ For this engine, there are multiple modes: classification and regression -\subsection{Tuning Parameters}{\preformatted{## Error: Tibble columns must have compatible sizes. -## * Size 6: Existing data. -## * Size 8: Column `default`. -## ℹ Only values of size one are recycled. +\subsection{Tuning Parameters}{ -## Error in is.data.frame(y): object 'defaults' not found +This model has 6 tuning parameters: +\itemize{ +\item \code{hidden_units}: # Hidden Units (type: integer, default: 3L) +\item \code{penalty}: Amount of Regularization (type: double, default: 0.0) +\item \code{epochs}: # Epochs (type: integer, default: 0.01) +\item \code{dropout}: Dropout Rate (type: double, default: 0.0) +\item \code{learn_rate}: Learning Rate (type: double, default: 100L) +\item \code{activation}: Activation Function (type: character, default: ‘relu’) +} + +Both \code{penalty} and \code{dropout} should be not be used in the same model. + +Other engine arguments of interest: +\itemize{ +\item \code{momentum()}: A number used to use historical gradient infomration +during optimization. +\item \code{batch_size()}: An integer for the number of training set points in +each batch. +\item \code{class_weights()}: Numeric class weights. See +\code{\link[brulee:brulee_mlp]{brulee::brulee_mlp()}}. +\item \code{stop_iter()}: A non-negative integer for how many iterations with +no improvement before stopping. (default: 5L). +} +} + +\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{
}}\preformatted{mlp( + hidden_units = integer(1), + penalty = double(1), + dropout = double(1), + epochs = integer(1), + learn_rate = double(1), + activation = character(1) +) \%>\% + set_engine("brulee") \%>\% + set_mode("regression") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Single Layer Neural Network Specification (regression) +## +## Main Arguments: +## hidden_units = integer(1) +## penalty = double(1) +## dropout = double(1) +## epochs = integer(1) +## activation = character(1) +## learn_rate = double(1) +## +## Computational engine: brulee +## +## Model fit template: +## brulee::brulee_mlp(x = missing_arg(), y = missing_arg(), hidden_units = integer(1), +## penalty = double(1), dropout = double(1), epochs = integer(1), +## activation = character(1), learn_rate = double(1)) +} + +Note that parsnip automatically sets linear activation in the last +layer. +} + +\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{mlp( + hidden_units = integer(1), + penalty = double(1), + dropout = double(1), + epochs = integer(1), + learn_rate = double(1), + activation = character(1) +) \%>\% + set_engine("brulee") \%>\% + set_mode("classification") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Single Layer Neural Network Specification (classification) +## +## Main Arguments: +## hidden_units = integer(1) +## penalty = double(1) +## dropout = double(1) +## epochs = integer(1) +## activation = character(1) +## learn_rate = double(1) +## +## Computational engine: brulee +## +## Model fit template: +## brulee::brulee_mlp(x = missing_arg(), y = missing_arg(), hidden_units = integer(1), +## penalty = double(1), dropout = double(1), epochs = integer(1), +## activation = character(1), learn_rate = double(1)) +} +} + +\subsection{Preprocessing requirements}{ + +Factor/categorical predictors need to be converted to numeric values +(e.g., dummy or indicator variables) for this engine. When using the +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. + +Predictors should have the same scale. One way to achieve this is to +center and scale each so that each predictor has mean zero and a +variance of one. +} + +\subsection{References}{ +\itemize{ +\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. +Springer. } } } diff --git a/man/rmd/linear_reg_brulee.Rmd b/man/rmd/linear_reg_brulee.Rmd new file mode 100644 index 000000000..9cbba1e40 --- /dev/null +++ b/man/rmd/linear_reg_brulee.Rmd @@ -0,0 +1,54 @@ +```{r, child = "aaa.Rmd", include = FALSE} +``` + +`r descr_models("linear_reg", "brulee")` + +## Tuning Parameters + +```{r brulee-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("penalty"), + default = c( "0.001")) + +param <- + linear_reg() %>% + set_engine("brulee") %>% + make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameter: + +```{r brulee-param-list, echo = FALSE, results = "asis"} +param$item +``` + +Other engine arguments of interest: + + - `optimizer()`: The optimization method. See [brulee::brulee_linear_reg()]. + - `epochs()`: An integer for the number of passes through the training set. + - `lean_rate()`: A number used to accelerate the gradient decsent process. + - `momentum()`: A number used to use historical gradient infomration during optimization (`optimizer = "SGD"` only). + - `batch_size()`: An integer for the number of training set points in each batch. + - `stop_iter()`: A non-negative integer for how many iterations with no improvement before stopping. (default: 5L). + + +## Translation from parsnip to the original package (regression) + +```{r brulee-reg} +linear_reg(penalty = double(1)) %>% + set_engine("brulee") %>% + translate() +``` + + +## Preprocessing requirements + +```{r child = "template-makes-dummies.Rmd"} +``` + +```{r child = "template-same-scale.Rmd"} +``` + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/logistic_reg_brulee.Rmd b/man/rmd/logistic_reg_brulee.Rmd new file mode 100644 index 000000000..9fc36259a --- /dev/null +++ b/man/rmd/logistic_reg_brulee.Rmd @@ -0,0 +1,57 @@ +```{r, child = "aaa.Rmd", include = FALSE} +``` + +`r descr_models("logistic_reg", "brulee")` + +## Tuning Parameters + +```{r brulee-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("penalty"), + default = c( "0.001")) + +param <- + logistic_reg() %>% + set_engine("brulee") %>% + make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameter: + +```{r brulee-param-list, echo = FALSE, results = "asis"} +param$item +``` + +Both `penalty` and `dropout` should be not be used in the same model. + +Other engine arguments of interest: + + - `optimizer()`: The optimization method. See [brulee::brulee_linear_reg()]. + - `epochs()`: An integer for the number of passes through the training set. + - `lean_rate()`: A number used to accelerate the gradient decsent process. + - `momentum()`: A number used to use historical gradient information during optimization (`optimizer = "SGD"` only). + - `batch_size()`: An integer for the number of training set points in each batch. + - `stop_iter()`: A non-negative integer for how many iterations with no improvement before stopping. (default: 5L). + - `class_weights()`: Numeric class weights. See [brulee::brulee_logistic_reg()]. + + +## Translation from parsnip to the original package (classification) + +```{r brulee-cls} +logistic_reg(penalty = double(1)) %>% + set_engine("brulee") %>% + translate() +``` + + +## Preprocessing requirements + +```{r child = "template-makes-dummies.Rmd"} +``` + +```{r child = "template-same-scale.Rmd"} +``` + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/mlp_brulee.Rmd b/man/rmd/mlp_brulee.Rmd index a9193fb6c..05a486ed2 100644 --- a/man/rmd/mlp_brulee.Rmd +++ b/man/rmd/mlp_brulee.Rmd @@ -8,7 +8,7 @@ ```{r brulee-param-info, echo = FALSE} defaults <- tibble::tibble(parsnip = c("hidden_units", "penalty", "dropout", "epochs", "learn_rate", "activation"), - default = c("3L", "0.0", "0.0", "0.01", "100L", "0.0", "0.01", "'relu'")) + default = c("3L", "0.0", "0.0", "0.01", "100L", "'relu'")) param <- mlp() %>% @@ -22,10 +22,11 @@ This model has `r nrow(param)` tuning parameters: param$item ``` -Both `penalty` and `dropout` should be used in the same model. +Both `penalty` and `dropout` should be not be used in the same model. Other engine arguments of interest: + - `momentum()`: A number used to use historical gradient infomration during optimization. - `batch_size()`: An integer for the number of training set points in each batch. - `class_weights()`: Numeric class weights. See [brulee::brulee_mlp()]. - `stop_iter()`: A non-negative integer for how many iterations with no improvement before stopping. (default: 5L). From 094ab597723b721ec326e6f66471a9dbf2a8d389 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 4 Jan 2022 12:28:08 -0500 Subject: [PATCH 31/65] combined replicate files --- R/bag_tree_rpart 2.R | 9 --------- R/bag_tree_rpart.R | 5 +++-- 2 files changed, 3 insertions(+), 11 deletions(-) delete mode 100644 R/bag_tree_rpart 2.R diff --git a/R/bag_tree_rpart 2.R b/R/bag_tree_rpart 2.R deleted file mode 100644 index 00aaed488..000000000 --- a/R/bag_tree_rpart 2.R +++ /dev/null @@ -1,9 +0,0 @@ -#' Ensembles of CART decision trees -#' -#' [ipred::bagging()] fits an ensemble of decision trees, using the `rpart` package. -#' -#' @includeRmd man/rmd/bag_tree_rpart.md details -#' -#' @name details_bag_tree_rpart -#' @keywords internal -NULL diff --git a/R/bag_tree_rpart.R b/R/bag_tree_rpart.R index 935d49d9f..e83f8f12f 100644 --- a/R/bag_tree_rpart.R +++ b/R/bag_tree_rpart.R @@ -1,7 +1,8 @@ #' Bagged trees via rpart #' -#' [baguette::bagger()] creates an collection of decision trees forming an -#' ensemble. All trees in the ensemble are combined to produce a final prediction. +#' [baguette::bagger()] and [ipred::bagging()] create collections of decision +#' trees forming an ensemble. All trees in the ensemble are combined to produce +#' a final prediction. #' #' @includeRmd man/rmd/bag_tree_rpart.md details #' From 355f1a4455674ec2b8e20397361ba7206afae751 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 4 Jan 2022 12:42:33 -0500 Subject: [PATCH 32/65] Wrote a readme document and added notes about it in engine files --- R/C5_rules_C5.0.R | 2 + R/bag_mars_earth.R | 2 + R/bag_tree_C5.0.R | 2 + R/bag_tree_rpart.R | 2 + R/bart_dbarts.R | 2 + R/boost_tree_C5.0.R | 2 + R/boost_tree_mboost.R | 2 + R/boost_tree_spark.R | 2 + R/boost_tree_xgboost.R | 2 + R/cubist_rules_Cubist.R | 2 + R/decision_tree_C5.0.R | 2 + R/decision_tree_party.R | 2 + R/decision_tree_rpart.R | 2 + R/decision_tree_spark.R | 2 + R/discrim_flexible_earth.R | 2 + R/discrim_linear_MASS.R | 2 + R/discrim_linear_mda.R | 2 + R/discrim_linear_sda.R | 2 + R/discrim_linear_sparsediscrim.R | 2 + R/discrim_quad_MASS.R | 2 + R/discrim_quad_sparsediscrim.R | 2 + R/discrim_regularized_klaR.R | 2 + R/linear_reg_brulee.R | 2 + R/linear_reg_gee.R | 2 + R/linear_reg_glm.R | 2 + R/linear_reg_glmnet.R | 2 + R/linear_reg_gls.R | 2 + R/linear_reg_keras.R | 2 + R/linear_reg_lm.R | 2 + R/linear_reg_lme.R | 2 + R/linear_reg_lmer.R | 2 + R/linear_reg_spark.R | 2 + R/linear_reg_stan.R | 2 + R/linear_reg_stan_glmer.R | 2 + R/logistic_reg_LiblineaR.R | 2 + R/logistic_reg_brulee.R | 2 + R/logistic_reg_gee.R | 2 + R/logistic_reg_glm.R | 2 + R/logistic_reg_glmer.R | 2 + R/logistic_reg_glmnet.R | 2 + R/logistic_reg_keras.R | 2 + R/logistic_reg_spark.R | 2 + R/logistic_reg_stan.R | 2 + R/logistic_reg_stan_glmer.R | 2 + R/mars_earth.R | 2 + R/mlp_brulee.R | 2 + R/mlp_keras.R | 2 + R/mlp_nnet.R | 2 + R/multinom_reg_glmnet.R | 2 + R/multinom_reg_keras.R | 2 + R/multinom_reg_nnet.R | 2 + R/multinom_reg_spark.R | 2 + R/naive_Bayes_klaR.R | 2 + R/naive_Bayes_naivebayes.R | 2 + R/nearest_neighbor_kknn.R | 2 + R/pls_mixOmics.R | 2 + R/poisson_reg_gee.R | 2 + R/poisson_reg_glm.R | 2 + R/poisson_reg_glmer.R | 2 + R/poisson_reg_glmnet.R | 2 + R/poisson_reg_hurdle.R | 2 + R/poisson_reg_stan.R | 2 + R/poisson_reg_stan_glmer.R | 2 + R/poisson_reg_zeroinfl.R | 2 + R/proportional_hazards_glmnet.R | 2 + R/proportional_hazards_survival.R | 2 + R/rand_forest_party.R | 2 + R/rand_forest_randomForest.R | 2 + R/rand_forest_ranger.R | 2 + R/rand_forest_spark.R | 2 + R/rule_fit_xrf.R | 2 + R/survival_reg_flexsurv.R | 2 + R/survival_reg_survival.R | 2 + R/svm_linear_LiblineaR.R | 2 + R/svm_linear_kernlab.R | 2 + R/svm_poly_kernlab.R | 2 + R/svm_rbf_kernlab.R | 2 + man/details_bag_tree_rpart.Rd | 124 ++---------------------------- 78 files changed, 159 insertions(+), 119 deletions(-) diff --git a/R/C5_rules_C5.0.R b/R/C5_rules_C5.0.R index 448e0c27a..4e800581a 100644 --- a/R/C5_rules_C5.0.R +++ b/R/C5_rules_C5.0.R @@ -9,3 +9,5 @@ #' @name details_C5_rules_C5.0 #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/bag_mars_earth.R b/R/bag_mars_earth.R index cb1787ab5..9f48413ae 100644 --- a/R/bag_mars_earth.R +++ b/R/bag_mars_earth.R @@ -8,3 +8,5 @@ #' @name details_bag_mars_earth #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/bag_tree_C5.0.R b/R/bag_tree_C5.0.R index c54943b3f..5f4f591d6 100644 --- a/R/bag_tree_C5.0.R +++ b/R/bag_tree_C5.0.R @@ -8,3 +8,5 @@ #' @name details_bag_tree_C5.0 #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/bag_tree_rpart.R b/R/bag_tree_rpart.R index e83f8f12f..407d99ec3 100644 --- a/R/bag_tree_rpart.R +++ b/R/bag_tree_rpart.R @@ -9,3 +9,5 @@ #' @name details_bag_tree_rpart #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/bart_dbarts.R b/R/bart_dbarts.R index c81ea6a45..8f393d86c 100644 --- a/R/bart_dbarts.R +++ b/R/bart_dbarts.R @@ -8,3 +8,5 @@ #' @name details_bart_dbarts #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/boost_tree_C5.0.R b/R/boost_tree_C5.0.R index 7675acb2b..3bbd52949 100644 --- a/R/boost_tree_C5.0.R +++ b/R/boost_tree_C5.0.R @@ -9,3 +9,5 @@ #' @name details_boost_tree_C5.0 #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/boost_tree_mboost.R b/R/boost_tree_mboost.R index 7bdfd3d30..0aab174b6 100644 --- a/R/boost_tree_mboost.R +++ b/R/boost_tree_mboost.R @@ -9,3 +9,5 @@ #' @name details_boost_tree_mboost #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/boost_tree_spark.R b/R/boost_tree_spark.R index 13930de3a..95cdbd672 100644 --- a/R/boost_tree_spark.R +++ b/R/boost_tree_spark.R @@ -9,3 +9,5 @@ #' @name details_boost_tree_spark #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/boost_tree_xgboost.R b/R/boost_tree_xgboost.R index d6cab8a1c..781a0ff7b 100644 --- a/R/boost_tree_xgboost.R +++ b/R/boost_tree_xgboost.R @@ -9,3 +9,5 @@ #' @name details_boost_tree_xgboost #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/cubist_rules_Cubist.R b/R/cubist_rules_Cubist.R index a24e1cf97..6f1efebd8 100644 --- a/R/cubist_rules_Cubist.R +++ b/R/cubist_rules_Cubist.R @@ -9,3 +9,5 @@ #' @name details_cubist_rules_Cubist #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/decision_tree_C5.0.R b/R/decision_tree_C5.0.R index 6c6b35a35..5dfa3dee1 100644 --- a/R/decision_tree_C5.0.R +++ b/R/decision_tree_C5.0.R @@ -8,3 +8,5 @@ #' @name details_decision_tree_C5.0 #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/decision_tree_party.R b/R/decision_tree_party.R index 610ec49c3..8a8dc067e 100644 --- a/R/decision_tree_party.R +++ b/R/decision_tree_party.R @@ -8,3 +8,5 @@ #' @name details_decision_tree_party #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/decision_tree_rpart.R b/R/decision_tree_rpart.R index c5addf457..bbe1e11bd 100644 --- a/R/decision_tree_rpart.R +++ b/R/decision_tree_rpart.R @@ -8,3 +8,5 @@ #' @name details_decision_tree_rpart #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/decision_tree_spark.R b/R/decision_tree_spark.R index d0d99cbfe..144f2351b 100644 --- a/R/decision_tree_spark.R +++ b/R/decision_tree_spark.R @@ -8,3 +8,5 @@ #' @name details_decision_tree_spark #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/discrim_flexible_earth.R b/R/discrim_flexible_earth.R index e271527ca..1b87e2f8d 100644 --- a/R/discrim_flexible_earth.R +++ b/R/discrim_flexible_earth.R @@ -10,3 +10,5 @@ #' @name details_discrim_flexible_earth #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/discrim_linear_MASS.R b/R/discrim_linear_MASS.R index 8797ccc38..a3c7cfd91 100644 --- a/R/discrim_linear_MASS.R +++ b/R/discrim_linear_MASS.R @@ -10,3 +10,5 @@ #' @name details_discrim_linear_MASS #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/discrim_linear_mda.R b/R/discrim_linear_mda.R index 962c503b4..4365db3a3 100644 --- a/R/discrim_linear_mda.R +++ b/R/discrim_linear_mda.R @@ -9,3 +9,5 @@ #' @name details_discrim_linear_mda #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/discrim_linear_sda.R b/R/discrim_linear_sda.R index 8a1172970..817cb5b4c 100644 --- a/R/discrim_linear_sda.R +++ b/R/discrim_linear_sda.R @@ -8,3 +8,5 @@ #' @name details_discrim_linear_sda #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/discrim_linear_sparsediscrim.R b/R/discrim_linear_sparsediscrim.R index 6c0fd859f..8ad80ea84 100644 --- a/R/discrim_linear_sparsediscrim.R +++ b/R/discrim_linear_sparsediscrim.R @@ -9,3 +9,5 @@ #' @name details_discrim_linear_sparsediscrim #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/discrim_quad_MASS.R b/R/discrim_quad_MASS.R index 420325c83..7a9829fd9 100644 --- a/R/discrim_quad_MASS.R +++ b/R/discrim_quad_MASS.R @@ -10,3 +10,5 @@ #' @name details_discrim_quad_MASS #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/discrim_quad_sparsediscrim.R b/R/discrim_quad_sparsediscrim.R index 1e98c7f74..06851aa7a 100644 --- a/R/discrim_quad_sparsediscrim.R +++ b/R/discrim_quad_sparsediscrim.R @@ -9,3 +9,5 @@ #' @name details_discrim_quad_sparsediscrim #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/discrim_regularized_klaR.R b/R/discrim_regularized_klaR.R index 2b63fc95e..b0e75c088 100644 --- a/R/discrim_regularized_klaR.R +++ b/R/discrim_regularized_klaR.R @@ -11,3 +11,5 @@ #' @name details_discrim_regularized_klaR #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/linear_reg_brulee.R b/R/linear_reg_brulee.R index c65e31f75..7515d66a7 100644 --- a/R/linear_reg_brulee.R +++ b/R/linear_reg_brulee.R @@ -8,3 +8,5 @@ #' @name details_linear_reg_brulee #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/linear_reg_gee.R b/R/linear_reg_gee.R index 608aa5fd6..edf8aa47f 100644 --- a/R/linear_reg_gee.R +++ b/R/linear_reg_gee.R @@ -8,3 +8,5 @@ #' @name details_linear_reg_gee #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/linear_reg_glm.R b/R/linear_reg_glm.R index 7884fe18d..a458e49ff 100644 --- a/R/linear_reg_glm.R +++ b/R/linear_reg_glm.R @@ -9,3 +9,5 @@ #' @name details_linear_reg_glm #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/linear_reg_glmnet.R b/R/linear_reg_glmnet.R index 30c30f731..0e338e705 100644 --- a/R/linear_reg_glmnet.R +++ b/R/linear_reg_glmnet.R @@ -7,3 +7,5 @@ #' @name details_linear_reg_glmnet #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/linear_reg_gls.R b/R/linear_reg_gls.R index c2884f164..6b15f4a4b 100644 --- a/R/linear_reg_gls.R +++ b/R/linear_reg_gls.R @@ -8,3 +8,5 @@ #' @name details_linear_reg_gls #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/linear_reg_keras.R b/R/linear_reg_keras.R index 9d86e47d2..5845113cd 100644 --- a/R/linear_reg_keras.R +++ b/R/linear_reg_keras.R @@ -7,3 +7,5 @@ #' @name details_linear_reg_keras #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/linear_reg_lm.R b/R/linear_reg_lm.R index 8f087743c..c34d00db8 100644 --- a/R/linear_reg_lm.R +++ b/R/linear_reg_lm.R @@ -7,3 +7,5 @@ #' @name details_linear_reg_lm #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/linear_reg_lme.R b/R/linear_reg_lme.R index 46c792133..f56f3b273 100644 --- a/R/linear_reg_lme.R +++ b/R/linear_reg_lme.R @@ -8,3 +8,5 @@ #' @name details_linear_reg_lme #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/linear_reg_lmer.R b/R/linear_reg_lmer.R index e086af111..bf805104d 100644 --- a/R/linear_reg_lmer.R +++ b/R/linear_reg_lmer.R @@ -8,3 +8,5 @@ #' @name details_linear_reg_lmer #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/linear_reg_spark.R b/R/linear_reg_spark.R index 5d21918b9..88ffad626 100644 --- a/R/linear_reg_spark.R +++ b/R/linear_reg_spark.R @@ -8,3 +8,5 @@ #' @name details_linear_reg_spark #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/linear_reg_stan.R b/R/linear_reg_stan.R index 1c77437d4..cad16f8b7 100644 --- a/R/linear_reg_stan.R +++ b/R/linear_reg_stan.R @@ -7,3 +7,5 @@ #' @name details_linear_reg_stan #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/linear_reg_stan_glmer.R b/R/linear_reg_stan_glmer.R index 23a76eb78..2dc2ec555 100644 --- a/R/linear_reg_stan_glmer.R +++ b/R/linear_reg_stan_glmer.R @@ -8,3 +8,5 @@ #' @name details_linear_reg_stan_glmer #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/logistic_reg_LiblineaR.R b/R/logistic_reg_LiblineaR.R index 2bdc983f4..dcc1c1c55 100644 --- a/R/logistic_reg_LiblineaR.R +++ b/R/logistic_reg_LiblineaR.R @@ -9,3 +9,5 @@ #' @name details_logistic_reg_LiblineaR #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/logistic_reg_brulee.R b/R/logistic_reg_brulee.R index 3b1784246..ffdc92777 100644 --- a/R/logistic_reg_brulee.R +++ b/R/logistic_reg_brulee.R @@ -9,3 +9,5 @@ #' @name details_logistic_reg_brulee #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/logistic_reg_gee.R b/R/logistic_reg_gee.R index 3050ab2c4..d97be886c 100644 --- a/R/logistic_reg_gee.R +++ b/R/logistic_reg_gee.R @@ -8,3 +8,5 @@ #' @name details_logistic_reg_gee #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/logistic_reg_glm.R b/R/logistic_reg_glm.R index bc173a7ae..96e37871d 100644 --- a/R/logistic_reg_glm.R +++ b/R/logistic_reg_glm.R @@ -9,3 +9,5 @@ #' @name details_logistic_reg_glm #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/logistic_reg_glmer.R b/R/logistic_reg_glmer.R index af14a12fe..f241a1717 100644 --- a/R/logistic_reg_glmer.R +++ b/R/logistic_reg_glmer.R @@ -8,3 +8,5 @@ #' @name details_logistic_reg_glmer #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/logistic_reg_glmnet.R b/R/logistic_reg_glmnet.R index 70143b693..56e38d083 100644 --- a/R/logistic_reg_glmnet.R +++ b/R/logistic_reg_glmnet.R @@ -9,3 +9,5 @@ #' @name details_logistic_reg_glmnet #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/logistic_reg_keras.R b/R/logistic_reg_keras.R index 13f145878..4845f16b1 100644 --- a/R/logistic_reg_keras.R +++ b/R/logistic_reg_keras.R @@ -9,3 +9,5 @@ #' @name details_logistic_reg_keras #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/logistic_reg_spark.R b/R/logistic_reg_spark.R index d73c38c8b..b9bc3c324 100644 --- a/R/logistic_reg_spark.R +++ b/R/logistic_reg_spark.R @@ -9,3 +9,5 @@ #' @name details_logistic_reg_spark #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/logistic_reg_stan.R b/R/logistic_reg_stan.R index f497c7d5c..637b69483 100644 --- a/R/logistic_reg_stan.R +++ b/R/logistic_reg_stan.R @@ -9,3 +9,5 @@ #' @name details_logistic_reg_stan #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/logistic_reg_stan_glmer.R b/R/logistic_reg_stan_glmer.R index 9e227a96d..2649591a8 100644 --- a/R/logistic_reg_stan_glmer.R +++ b/R/logistic_reg_stan_glmer.R @@ -8,3 +8,5 @@ #' @name details_logistic_reg_stan_glmer #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/mars_earth.R b/R/mars_earth.R index ee6dc56af..9462f941c 100644 --- a/R/mars_earth.R +++ b/R/mars_earth.R @@ -9,3 +9,5 @@ #' @name details_mars_earth #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/mlp_brulee.R b/R/mlp_brulee.R index 29571ae83..787b7d9cf 100644 --- a/R/mlp_brulee.R +++ b/R/mlp_brulee.R @@ -7,3 +7,5 @@ #' @name details_mlp_brulee #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/mlp_keras.R b/R/mlp_keras.R index 9af724445..4838a13fe 100644 --- a/R/mlp_keras.R +++ b/R/mlp_keras.R @@ -7,3 +7,5 @@ #' @name details_mlp_keras #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/mlp_nnet.R b/R/mlp_nnet.R index 4ee34afd7..625ece227 100644 --- a/R/mlp_nnet.R +++ b/R/mlp_nnet.R @@ -7,3 +7,5 @@ #' @name details_mlp_nnet #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/multinom_reg_glmnet.R b/R/multinom_reg_glmnet.R index f40be0f38..76a7d2ed7 100644 --- a/R/multinom_reg_glmnet.R +++ b/R/multinom_reg_glmnet.R @@ -8,3 +8,5 @@ #' @name details_multinom_reg_glmnet #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/multinom_reg_keras.R b/R/multinom_reg_keras.R index 7003a38ef..334da6b3e 100644 --- a/R/multinom_reg_keras.R +++ b/R/multinom_reg_keras.R @@ -8,3 +8,5 @@ #' @name details_multinom_reg_keras #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/multinom_reg_nnet.R b/R/multinom_reg_nnet.R index fd9932654..1d9d92741 100644 --- a/R/multinom_reg_nnet.R +++ b/R/multinom_reg_nnet.R @@ -8,3 +8,5 @@ #' @name details_multinom_reg_nnet #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/multinom_reg_spark.R b/R/multinom_reg_spark.R index c6a8f6663..c9c01530c 100644 --- a/R/multinom_reg_spark.R +++ b/R/multinom_reg_spark.R @@ -8,3 +8,5 @@ #' @name details_multinom_reg_spark #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/naive_Bayes_klaR.R b/R/naive_Bayes_klaR.R index b5e254bf6..42d403e9c 100644 --- a/R/naive_Bayes_klaR.R +++ b/R/naive_Bayes_klaR.R @@ -8,3 +8,5 @@ #' @name details_naive_Bayes_klaR #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/naive_Bayes_naivebayes.R b/R/naive_Bayes_naivebayes.R index 67cfe8274..b3c1b981c 100644 --- a/R/naive_Bayes_naivebayes.R +++ b/R/naive_Bayes_naivebayes.R @@ -8,3 +8,5 @@ #' @name details_naive_Bayes_naivebayes #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/nearest_neighbor_kknn.R b/R/nearest_neighbor_kknn.R index ed04e2373..5c25c45ad 100644 --- a/R/nearest_neighbor_kknn.R +++ b/R/nearest_neighbor_kknn.R @@ -8,3 +8,5 @@ #' @name details_nearest_neighbor_kknn #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/pls_mixOmics.R b/R/pls_mixOmics.R index 76e0624c8..03efc62fb 100644 --- a/R/pls_mixOmics.R +++ b/R/pls_mixOmics.R @@ -7,3 +7,5 @@ #' @name details_pls_mixOmics #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/poisson_reg_gee.R b/R/poisson_reg_gee.R index c8f407ba5..ed83cfe54 100644 --- a/R/poisson_reg_gee.R +++ b/R/poisson_reg_gee.R @@ -8,3 +8,5 @@ #' @name details_poisson_reg_gee #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/poisson_reg_glm.R b/R/poisson_reg_glm.R index 772c3f14b..59fd66dc2 100644 --- a/R/poisson_reg_glm.R +++ b/R/poisson_reg_glm.R @@ -7,3 +7,5 @@ #' @name details_poisson_reg_glm #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/poisson_reg_glmer.R b/R/poisson_reg_glmer.R index 26b0bddd7..1b22ed1df 100644 --- a/R/poisson_reg_glmer.R +++ b/R/poisson_reg_glmer.R @@ -8,3 +8,5 @@ #' @name details_poisson_reg_glmer #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/poisson_reg_glmnet.R b/R/poisson_reg_glmnet.R index 5358b0fc3..bf40cb76f 100644 --- a/R/poisson_reg_glmnet.R +++ b/R/poisson_reg_glmnet.R @@ -8,3 +8,5 @@ #' @name details_poisson_reg_glmnet #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/poisson_reg_hurdle.R b/R/poisson_reg_hurdle.R index 8ad3a2c36..1f1426cf4 100644 --- a/R/poisson_reg_hurdle.R +++ b/R/poisson_reg_hurdle.R @@ -9,3 +9,5 @@ #' @name details_poisson_reg_hurdle #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/poisson_reg_stan.R b/R/poisson_reg_stan.R index bff455f37..937d097ad 100644 --- a/R/poisson_reg_stan.R +++ b/R/poisson_reg_stan.R @@ -8,3 +8,5 @@ #' @name details_poisson_reg_stan #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/poisson_reg_stan_glmer.R b/R/poisson_reg_stan_glmer.R index 166014335..98082762f 100644 --- a/R/poisson_reg_stan_glmer.R +++ b/R/poisson_reg_stan_glmer.R @@ -8,3 +8,5 @@ #' @name details_poisson_reg_stan_glmer #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/poisson_reg_zeroinfl.R b/R/poisson_reg_zeroinfl.R index f2ab1d392..4b6016cc0 100644 --- a/R/poisson_reg_zeroinfl.R +++ b/R/poisson_reg_zeroinfl.R @@ -9,3 +9,5 @@ #' @name details_poisson_reg_zeroinfl #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/proportional_hazards_glmnet.R b/R/proportional_hazards_glmnet.R index 00b510ef7..b017038ec 100644 --- a/R/proportional_hazards_glmnet.R +++ b/R/proportional_hazards_glmnet.R @@ -7,3 +7,5 @@ #' @name details_proportional_hazards_glmnet #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/proportional_hazards_survival.R b/R/proportional_hazards_survival.R index 39acf58e9..a3a7fe367 100644 --- a/R/proportional_hazards_survival.R +++ b/R/proportional_hazards_survival.R @@ -7,3 +7,5 @@ #' @name details_proportional_hazards_survival #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/rand_forest_party.R b/R/rand_forest_party.R index ca9a8eb06..5fe37cfc6 100644 --- a/R/rand_forest_party.R +++ b/R/rand_forest_party.R @@ -9,3 +9,5 @@ #' @name details_rand_forest_party #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/rand_forest_randomForest.R b/R/rand_forest_randomForest.R index 9d044be8e..36fd3a9ca 100644 --- a/R/rand_forest_randomForest.R +++ b/R/rand_forest_randomForest.R @@ -9,3 +9,5 @@ #' @name details_rand_forest_randomForest #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/rand_forest_ranger.R b/R/rand_forest_ranger.R index 7eb4942a4..80160198e 100644 --- a/R/rand_forest_ranger.R +++ b/R/rand_forest_ranger.R @@ -9,3 +9,5 @@ #' @name details_rand_forest_ranger #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/rand_forest_spark.R b/R/rand_forest_spark.R index ec0dacfab..07684c29e 100644 --- a/R/rand_forest_spark.R +++ b/R/rand_forest_spark.R @@ -9,3 +9,5 @@ #' @name details_rand_forest_spark #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/rule_fit_xrf.R b/R/rule_fit_xrf.R index 46df5ebc7..5a45e36e2 100644 --- a/R/rule_fit_xrf.R +++ b/R/rule_fit_xrf.R @@ -9,3 +9,5 @@ #' @name details_rule_fit_xrf #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/survival_reg_flexsurv.R b/R/survival_reg_flexsurv.R index 98df28eb7..b7f62aff0 100644 --- a/R/survival_reg_flexsurv.R +++ b/R/survival_reg_flexsurv.R @@ -7,3 +7,5 @@ #' @name details_survival_reg_flexsurv #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/survival_reg_survival.R b/R/survival_reg_survival.R index 2df45c4e5..5b4ed13f3 100644 --- a/R/survival_reg_survival.R +++ b/R/survival_reg_survival.R @@ -7,3 +7,5 @@ #' @name details_survival_reg_survival #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/svm_linear_LiblineaR.R b/R/svm_linear_LiblineaR.R index 8e210f911..76ccb1071 100644 --- a/R/svm_linear_LiblineaR.R +++ b/R/svm_linear_LiblineaR.R @@ -10,3 +10,5 @@ #' @name details_svm_linear_LiblineaR #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/svm_linear_kernlab.R b/R/svm_linear_kernlab.R index a380e3130..1300fc542 100644 --- a/R/svm_linear_kernlab.R +++ b/R/svm_linear_kernlab.R @@ -10,3 +10,5 @@ #' @name details_svm_linear_kernlab #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/svm_poly_kernlab.R b/R/svm_poly_kernlab.R index 0ac76609d..31a214821 100644 --- a/R/svm_poly_kernlab.R +++ b/R/svm_poly_kernlab.R @@ -10,3 +10,5 @@ #' @name details_svm_poly_kernlab #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/R/svm_rbf_kernlab.R b/R/svm_rbf_kernlab.R index 9b989bd36..694181052 100644 --- a/R/svm_rbf_kernlab.R +++ b/R/svm_rbf_kernlab.R @@ -10,3 +10,5 @@ #' @name details_svm_rbf_kernlab #' @keywords internal NULL + +# See man/rmd/README.md for a description of how these files are processed diff --git a/man/details_bag_tree_rpart.Rd b/man/details_bag_tree_rpart.Rd index 369a74a6f..7db27487e 100644 --- a/man/details_bag_tree_rpart.Rd +++ b/man/details_bag_tree_rpart.Rd @@ -1,13 +1,12 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/bag_tree_rpart 2.R, R/bag_tree_rpart.R +% Please edit documentation in R/bag_tree_rpart.R \name{details_bag_tree_rpart} \alias{details_bag_tree_rpart} -\title{Ensembles of CART decision trees} +\title{Bagged trees via rpart} \description{ -\code{\link[ipred:bagging]{ipred::bagging()}} fits an ensemble of decision trees, using the \code{rpart} package. - -\code{\link[baguette:bagger]{baguette::bagger()}} creates an collection of decision trees forming an -ensemble. All trees in the ensemble are combined to produce a final prediction. +\code{\link[baguette:bagger]{baguette::bagger()}} and \code{\link[ipred:bagging]{ipred::bagging()}} create collections of decision +trees forming an ensemble. All trees in the ensemble are combined to produce +a final prediction. } \details{ For this engine, there are multiple modes: classification, regression, @@ -112,119 +111,6 @@ Categorical predictors can be partitioned into groups of factor levels are not required for this model. } -\subsection{References}{ -\itemize{ -\item Breiman L. 1996. “Bagging predictors”. Machine Learning. 24 (2): -123-140 -\item Hothorn T, Lausen B, Benner A, Radespiel-Troeger M. 2004. Bagging -Survival Trees. \emph{Statistics in Medicine}, 23(1), 77–91. -\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. -Springer. -} -} - -For this engine, there are multiple modes: classification, regression, -and censored regression -\subsection{Tuning Parameters}{ - -This model has 4 tuning parameters: -\itemize{ -\item \code{class_cost}: Class Cost (type: double, default: (see below)) -\item \code{tree_depth}: Tree Depth (type: integer, default: 30L) -\item \code{min_n}: Minimal Node Size (type: integer, default: 2L) -\item \code{cost_complexity}: Cost-Complexity Parameter (type: double, default: -0.01) -} - -For the \code{class_cost} parameter, the value can be a non-negative scalar -for a class cost (where a cost of 1 means no extra cost). This is useful -for when the first level of the outcome factor is the minority class. If -this is not the case, values between zero and one can be used to bias to -the second level of the factor. -} - -\subsection{Translation from parsnip to the original package (classification)}{ - -There are parsnip extension packages required to fit this model to this -mode: \strong{censored}, \strong{baguette}.\if{html}{\out{
}}\preformatted{library(baguette) - -bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% - set_engine("rpart") \%>\% - set_mode("classification") \%>\% - translate() -}\if{html}{\out{
}}\preformatted{## Bagged Decision Tree Model Specification (classification) -## -## Main Arguments: -## cost_complexity = double(1) -## tree_depth = integer(1) -## min_n = integer(1) -## -## Computational engine: rpart -## -## Model fit template: -## baguette::bagger(formula = missing_arg(), data = missing_arg(), -## weights = missing_arg(), cp = double(1), maxdepth = integer(1), -## minsplit = integer(1), base_model = "CART") -} -} - -\subsection{Translation from parsnip to the original package (regression)}{ - -There are parsnip extension packages required to fit this model to this -mode: \strong{censored}, \strong{baguette}.\if{html}{\out{
}}\preformatted{library(baguette) - -bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% - set_engine("rpart") \%>\% - set_mode("regression") \%>\% - translate() -}\if{html}{\out{
}}\preformatted{## Bagged Decision Tree Model Specification (regression) -## -## Main Arguments: -## cost_complexity = double(1) -## tree_depth = integer(1) -## min_n = integer(1) -## -## Computational engine: rpart -## -## Model fit template: -## baguette::bagger(formula = missing_arg(), data = missing_arg(), -## weights = missing_arg(), cp = double(1), maxdepth = integer(1), -## minsplit = integer(1), base_model = "CART") -} -} - -\subsection{Translation from parsnip to the original package (censored regression)}{ - -There are parsnip extension packages required to fit this model to this -mode: \strong{censored}, \strong{baguette}.\if{html}{\out{
}}\preformatted{library(censored) - -bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% - set_engine("rpart") \%>\% - set_mode("censored regression") \%>\% - translate() -}\if{html}{\out{
}}\preformatted{## Bagged Decision Tree Model Specification (censored regression) -## -## Main Arguments: -## cost_complexity = double(1) -## tree_depth = integer(1) -## min_n = integer(1) -## -## Computational engine: rpart -## -## Model fit template: -## ipred::bagging(formula = missing_arg(), data = missing_arg(), -## cp = double(1), maxdepth = integer(1), minsplit = integer(1)) -} -} - -\subsection{Preprocessing requirements}{ - -This engine does not require any special encoding of the predictors. -Categorical predictors can be partitioned into groups of factor levels -(e.g. \verb{\{a, c\}} vs \verb{\{b, d\}}) when splitting at a node. Dummy variables -are not required for this model. -} - \subsection{References}{ \itemize{ \item Breiman L. 1996. “Bagging predictors”. Machine Learning. 24 (2): From 793d02e065ff3879706ce13647625635b5d3dd3e Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 4 Jan 2022 14:35:28 -0500 Subject: [PATCH 33/65] more code cleanup --- R/engine_docs.R | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/R/engine_docs.R b/R/engine_docs.R index f59b7ffd3..07d7812bf 100644 --- a/R/engine_docs.R +++ b/R/engine_docs.R @@ -22,11 +22,6 @@ knit_engine_docs <- function(pattern = NULL) { tibble::tibble(file = basename(files), result = res) } -# TODO -# - In Rmd, state which packages have engine code e.g. "The parsnip package -# contains rpart engines for classification and regression and the censored package -# contains an rpart engine for censored regression". - # ------------------------------------------------------------------------------ extensions <- function() { @@ -292,19 +287,3 @@ generate_set_engine_bullets <- function() { sort_c <- function(x) { withr::with_collate("C", sort(x)) } -get_sorted_unique_engines <- function(x) { - engines <- x$engine - engines <- unique(engines) - engines <- sort_c(engines) - engines -} -combine_prefix_with_engines <- function(prefix, engines) { - if (length(engines) == 0L) { - engines <- "No engines currently available" - } else { - engines <- glue::glue_collapse(engines, sep = ", ") - } - - glue::glue("{prefix} {engines}") -} - From 5574bb8e8faa0ec59d83172199152e127e11c7b2 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 4 Jan 2022 14:47:12 -0500 Subject: [PATCH 34/65] fix some extension package doc issues --- man/details_C5_rules_C5.0.Rd | 7 ++++++- man/details_boost_tree_mboost.Rd | 7 ++++++- man/details_linear_reg_gls.Rd | 7 +------ man/details_linear_reg_stan_glmer.Rd | 7 ++++++- man/details_logistic_reg_stan_glmer.Rd | 7 ++++++- man/details_poisson_reg_gee.Rd | 2 +- man/details_poisson_reg_glmer.Rd | 2 +- man/details_poisson_reg_stan_glmer.Rd | 2 +- man/rmd/C5_rules_C5.0.Rmd | 6 +++++- man/rmd/boost_tree_mboost.Rmd | 4 ++++ man/rmd/linear_reg_stan_glmer.Rmd | 4 ++++ man/rmd/logistic_reg_stan_glmer.Rmd | 4 ++++ man/rmd/poisson_reg_gee.Rmd | 2 +- man/rmd/poisson_reg_glmer.Rmd | 2 +- man/rmd/poisson_reg_stan_glmer.Rmd | 2 +- 15 files changed, 48 insertions(+), 17 deletions(-) diff --git a/man/details_C5_rules_C5.0.Rd b/man/details_C5_rules_C5.0.Rd index 4b00c7ae1..8584e9455 100644 --- a/man/details_C5_rules_C5.0.Rd +++ b/man/details_C5_rules_C5.0.Rd @@ -23,7 +23,12 @@ less iterations of boosting are performed than the number requested. \code{\link[C50:C5.0Control]{C50::C5.0Control()}}). } -\subsection{Translation from parsnip to the underlying model call (regression)}{\if{html}{\out{
}}\preformatted{C5_rules( +\subsection{Translation from parsnip to the underlying model call (classification)}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{rules}.\if{html}{\out{
}}\preformatted{library(rules) + +C5_rules( trees = integer(1), min_n = integer(1) ) \%>\% diff --git a/man/details_boost_tree_mboost.Rd b/man/details_boost_tree_mboost.Rd index 7dace18ff..84c568cb3 100644 --- a/man/details_boost_tree_mboost.Rd +++ b/man/details_boost_tree_mboost.Rd @@ -26,7 +26,12 @@ The \code{mtry} parameter is related to the number of predictors. The default is to use all predictors. } -\subsection{Translation from parsnip to the original package (censored regression)}{\if{html}{\out{
}}\preformatted{boost_tree() \%>\% +\subsection{Translation from parsnip to the original package (censored regression)}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{censored}.\if{html}{\out{
}}\preformatted{library(censored) + +boost_tree() \%>\% set_engine("mboost") \%>\% set_mode("censored regression") \%>\% translate() diff --git a/man/details_linear_reg_gls.Rd b/man/details_linear_reg_gls.Rd index 84dcc7f0f..8c2e0dfcc 100644 --- a/man/details_linear_reg_gls.Rd +++ b/man/details_linear_reg_gls.Rd @@ -117,13 +117,8 @@ lme_fit <- }\if{html}{\out{
}} The estimated within-subject correlations are the same:\if{html}{\out{
}}\preformatted{library(ape) -}\if{html}{\out{
}}\preformatted{## -## Attaching package: 'ape' -## The following object is masked from 'package:rsample': -## -## complement -}\if{html}{\out{
}}\preformatted{# lme, use ape package: +# lme, use ape package: lme_within_sub <- varcomp(lme_fit$fit)/sum(varcomp(lme_fit$fit)) lme_within_sub["subject"] }\if{html}{\out{
}}\preformatted{## subject diff --git a/man/details_linear_reg_stan_glmer.Rd b/man/details_linear_reg_stan_glmer.Rd index 5202bc329..119c54b19 100644 --- a/man/details_linear_reg_stan_glmer.Rd +++ b/man/details_linear_reg_stan_glmer.Rd @@ -34,7 +34,12 @@ centering all predictors). See \code{?rstanarm::stan_glmer} and \code{?rstan::sampling} for more information. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{linear_reg() \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{multilevelmod}.\if{html}{\out{
}}\preformatted{library(multilevelmod) + +linear_reg() \%>\% set_engine("stan_glmer") \%>\% set_mode("regression") \%>\% translate() diff --git a/man/details_logistic_reg_stan_glmer.Rd b/man/details_logistic_reg_stan_glmer.Rd index 14c5e2fdc..348446ce7 100644 --- a/man/details_logistic_reg_stan_glmer.Rd +++ b/man/details_logistic_reg_stan_glmer.Rd @@ -34,7 +34,12 @@ centering all predictors). See \code{?rstanarm::stan_glmer} and \code{?rstan::sampling} for more information. } -\subsection{Translation from parsnip to the original package}{\if{html}{\out{
}}\preformatted{logistic_reg() \%>\% +\subsection{Translation from parsnip to the original package}{ + +There is a parsnip extension package required to fit this model to this +mode: \strong{multilevelmod}.\if{html}{\out{
}}\preformatted{library(multilevelmod) + +logistic_reg() \%>\% set_engine("stan_glmer") \%>\% translate() }\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) diff --git a/man/details_poisson_reg_gee.Rd b/man/details_poisson_reg_gee.Rd index 58326fd78..4fb21d049 100644 --- a/man/details_poisson_reg_gee.Rd +++ b/man/details_poisson_reg_gee.Rd @@ -20,7 +20,7 @@ an effect on the inferential results and parameter covariance values. \subsection{Translation from parsnip to the original package}{ There is a parsnip extension package required to fit this model to this -mode: \strong{multilevelmod}.\if{html}{\out{
}}\preformatted{library(poissonreg) +mode: \strong{multilevelmod}.\if{html}{\out{
}}\preformatted{library(multilevelmod) poisson_reg(engine = "gee") \%>\% set_engine("gee") \%>\% diff --git a/man/details_poisson_reg_glmer.Rd b/man/details_poisson_reg_glmer.Rd index 7cb64c27c..11183c2df 100644 --- a/man/details_poisson_reg_glmer.Rd +++ b/man/details_poisson_reg_glmer.Rd @@ -17,7 +17,7 @@ This model has no tuning parameters. \subsection{Translation from parsnip to the original package}{ There is a parsnip extension package required to fit this model to this -mode: \strong{multilevelmod}.\if{html}{\out{
}}\preformatted{library(poissonreg) +mode: \strong{multilevelmod}.\if{html}{\out{
}}\preformatted{library(multilevelmod) poisson_reg(engine = "glmer") \%>\% set_engine("glmer") \%>\% diff --git a/man/details_poisson_reg_stan_glmer.Rd b/man/details_poisson_reg_stan_glmer.Rd index 5c0d68904..dc3e575b0 100644 --- a/man/details_poisson_reg_stan_glmer.Rd +++ b/man/details_poisson_reg_stan_glmer.Rd @@ -37,7 +37,7 @@ See \code{?rstanarm::stan_glmer} and \code{?rstan::sampling} for more informatio \subsection{Translation from parsnip to the original package}{ There is a parsnip extension package required to fit this model to this -mode: \strong{multilevelmod}.\if{html}{\out{
}}\preformatted{library(poissonreg) +mode: \strong{multilevelmod}.\if{html}{\out{
}}\preformatted{library(multilevelmod) poisson_reg(engine = "stan_glmer") \%>\% set_engine("stan_glmer") \%>\% diff --git a/man/rmd/C5_rules_C5.0.Rmd b/man/rmd/C5_rules_C5.0.Rmd index 9dd46029e..7f9d33d5f 100644 --- a/man/rmd/C5_rules_C5.0.Rmd +++ b/man/rmd/C5_rules_C5.0.Rmd @@ -24,9 +24,13 @@ param$item Note that C5.0 has a tool for _early stopping_ during boosting where less iterations of boosting are performed than the number requested. `C5_rules()` turns this feature off (although it can be re-enabled using [C50::C5.0Control()]). -## Translation from parsnip to the underlying model call (regression) +## Translation from parsnip to the underlying model call (classification) + +`r uses_extension("C5_rules", "C5.0", "classification")` ```{r C5.0-cls} +library(rules) + C5_rules( trees = integer(1), min_n = integer(1) diff --git a/man/rmd/boost_tree_mboost.Rmd b/man/rmd/boost_tree_mboost.Rmd index bcf3d66af..0d2ddabfd 100644 --- a/man/rmd/boost_tree_mboost.Rmd +++ b/man/rmd/boost_tree_mboost.Rmd @@ -27,7 +27,11 @@ The `mtry` parameter is related to the number of predictors. The default is to u ## Translation from parsnip to the original package (censored regression) +`r uses_extension("boost_tree", "mboost", "censored regression")` + ```{r mboost-creg} +library(censored) + boost_tree() %>% set_engine("mboost") %>% set_mode("censored regression") %>% diff --git a/man/rmd/linear_reg_stan_glmer.Rmd b/man/rmd/linear_reg_stan_glmer.Rmd index 4837d0b82..787618b2b 100644 --- a/man/rmd/linear_reg_stan_glmer.Rmd +++ b/man/rmd/linear_reg_stan_glmer.Rmd @@ -22,7 +22,11 @@ See `?rstanarm::stan_glmer` and `?rstan::sampling` for more information. ## Translation from parsnip to the original package +`r uses_extension("linear_reg", "stan_glmer", "regression")` + ```{r stan_glmer-csl} +library(multilevelmod) + linear_reg() %>% set_engine("stan_glmer") %>% set_mode("regression") %>% diff --git a/man/rmd/logistic_reg_stan_glmer.Rmd b/man/rmd/logistic_reg_stan_glmer.Rmd index cdd812681..4617eabdd 100644 --- a/man/rmd/logistic_reg_stan_glmer.Rmd +++ b/man/rmd/logistic_reg_stan_glmer.Rmd @@ -22,7 +22,11 @@ See `?rstanarm::stan_glmer` and `?rstan::sampling` for more information. ## Translation from parsnip to the original package +`r uses_extension("logistic_reg", "stan_glmer", "classification")` + ```{r stan_glmer-cls} +library(multilevelmod) + logistic_reg() %>% set_engine("stan_glmer") %>% translate() diff --git a/man/rmd/poisson_reg_gee.Rmd b/man/rmd/poisson_reg_gee.Rmd index 967c82cea..a8d8a3b2e 100644 --- a/man/rmd/poisson_reg_gee.Rmd +++ b/man/rmd/poisson_reg_gee.Rmd @@ -12,7 +12,7 @@ This model has no formal tuning parameters. It might be beneficial to determine `r uses_extension("poisson_reg", "gee", "regression")` ```{r gee-csl} -library(poissonreg) +library(multilevelmod) poisson_reg(engine = "gee") %>% set_engine("gee") %>% diff --git a/man/rmd/poisson_reg_glmer.Rmd b/man/rmd/poisson_reg_glmer.Rmd index bfe797ac4..b7c65ce3b 100644 --- a/man/rmd/poisson_reg_glmer.Rmd +++ b/man/rmd/poisson_reg_glmer.Rmd @@ -12,7 +12,7 @@ This model has no tuning parameters. `r uses_extension("poisson_reg", "glmer", "regression")` ```{r lmer-csl} -library(poissonreg) +library(multilevelmod) poisson_reg(engine = "glmer") %>% set_engine("glmer") %>% diff --git a/man/rmd/poisson_reg_stan_glmer.Rmd b/man/rmd/poisson_reg_stan_glmer.Rmd index fafda6a83..9002a2a58 100644 --- a/man/rmd/poisson_reg_stan_glmer.Rmd +++ b/man/rmd/poisson_reg_stan_glmer.Rmd @@ -25,7 +25,7 @@ See `?rstanarm::stan_glmer` and `?rstan::sampling` for more information. `r uses_extension("poisson_reg", "stan_glmer", "regression")` ```{r stan_glmer-cls} -library(poissonreg) +library(multilevelmod) poisson_reg(engine = "stan_glmer") %>% set_engine("stan_glmer") %>% From 212fa374821b632d501f696c8da902cbd072e455 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 4 Jan 2022 14:55:54 -0500 Subject: [PATCH 35/65] add a readme for the documentation system --- inst/README-DOCS.md | 107 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 inst/README-DOCS.md diff --git a/inst/README-DOCS.md b/inst/README-DOCS.md new file mode 100644 index 000000000..b77bcc14f --- /dev/null +++ b/inst/README-DOCS.md @@ -0,0 +1,107 @@ +# About the parsnip documentation system + +parsnip has uses a hierarchy to describe different models: + + - The _model type_ defines the model equation + - Example: `"linear_reg"` is for models that predict a numeric outcome using a linear combination of predictors and coefficients. + + - The _model mode_ reflects the usage of a model, usually defined by the outcome type. + - Values: `"classification"`, `"regression"`, and `"censored regression"`. + + - The _model engine_ is a designation of _how_ the model should be fit. + + - This is often a package or function name (e.g. `"ranger"` for the ranger package). + +There are extension packages that use the parsnip model functions to define _new engines_. For example, the poissonreg package has engines for the `poission_reg()` function. + +There are many combinations of type/engine/mode available in parsnip. We try to keep track of these values for packages that have their model definitions in parsnip and fully adhere to the tidymodels APIs. A tab-delimited file with these values is in the package (called `models.tsv`). + +## Main function and engine documentation + +Each modeling function defined in parsnip has a documentation file (with extension `Rd`). + +Also, each combination of engine and model type has a corresponding Rd file (a.k.a the "engine-specific" documentation files). The list of known engines is also shown in the Rd file for the main function. + + +## Creating the engine-specific Rd files + +We'll use an example with `poisson_reg()` and the `"zeroinfl"` engine. + +Each model/engine combination has its own Rd file with a naming convention reflecting the contents (`poisson_reg_zeroinfl.R`). + +This file has a description of the type of model and the underlying function that is used for that engine: + +> `[pscl::zeroinfl()]` uses maximum likelihood estimation to fit a model for count data that has separate model terms for predicting the counts and for predicting the probability of a zero count. + +Next comes an indication that a specific _markdown_ file should be included: + +> `@includeRmd man/rmd/poisson_reg_zeroinfl.md details` + +as well as a directive for the Rd file name: + +> `@name details_poisson_reg_zeroinfl` + +The engine markdown file (`poisson_reg_zeroinfl.md`) is made by the developer offline. + +## Creating the engine-specific md files + +These files are created by corresponding Rmd files contained in `parsnip/man/rmd/`. There are Rmd files for the engines defined in parsnip as well as the extension packages listed by `parsnip:::extensions()`. + +Each Rmd file imports `parsnip/man/rmd/aaa.Rmd`. This file defines a few functions and loads some specific packages. + +The Rmd files use packages that are not formally parsnip dependencies (these are listed in `aaa.Rmd`). It also requires the parsnip extension packages defined in `parsnip:::extensions()`. + +The Rmd files have a consistent structure and there are numerous examples of these files in the package. The main sections are: + + - The list of possible engines. + - The list of tuning parameters, if any, and other arguments of interest. + - Details about how parsnip translates the function call to the call for the underlying model function. + - Other details (e.g. preprocessing requirements, etc.) + +To convert the Rmd files to md, the function `knit_engine_docs()` is used to generate the md files. After this, using `devtools::document()` will create the engine specific `Rd` files. + +To test the results, do a hard restart of the R session (i.e., do not use `load_all()`). + +## The main function Rd files + +These files determine the engine specific Rd files specific for the function and enumerates their values in a bulleted list. For example, `poisson_reg.R` has the line: + +``` +#' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("poisson_reg")} +``` + +This finds the relevant engine Rd files and creates the corresponding Rd markup: + +``` +There are different ways to fit this model. The method of estimation is +chosen by setting the model \emph{engine}. The engine-specific pages +for this model are listed below. + +\itemize{ + \item \code{\link[parsnip:details_poisson_reg_glm]{glm}¹²} + \item \code{\link[parsnip:details_poisson_reg_gee]{gee}²} + \item \code{\link[parsnip:details_poisson_reg_glmer]{glmer}²} + \item \code{\link[parsnip:details_poisson_reg_glmnet]{glmnet}²} + \item \code{\link[parsnip:details_poisson_reg_hurdle]{hurdle}²} + \item \code{\link[parsnip:details_poisson_reg_stan]{stan}²} + \item \code{\link[parsnip:details_poisson_reg_stan_glmer]{stan_glmer}²} + \item \code{\link[parsnip:details_poisson_reg_zeroinfl]{zeroinfl}²} +} + +¹ The default engine. ² May require a parsnip extension package. +``` + +There is a similar line at the bottom of the files that creates the _See Also_ list: + +``` +#' @seealso \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("poisson_reg")} +``` + +## Generating the model flat file + +As previously mentioned, the package contains a file `models.tsv`. The create this file: + +1. Load the packages listed in `parsnip:::extensions()`. +2. Run `parsnip::update_model_info_file()`. + +Note that the file should never have fewer lines that the current version. From dfe90db17cc231899f3725a60941a1f0cec3918e Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 4 Jan 2022 14:56:08 -0500 Subject: [PATCH 36/65] change path to readme doc --- R/C5_rules_C5.0.R | 2 +- R/bag_mars_earth.R | 2 +- R/bag_tree_C5.0.R | 2 +- R/bag_tree_rpart.R | 2 +- R/bart_dbarts.R | 2 +- R/boost_tree_C5.0.R | 2 +- R/boost_tree_mboost.R | 2 +- R/boost_tree_spark.R | 2 +- R/boost_tree_xgboost.R | 2 +- R/cubist_rules_Cubist.R | 2 +- R/decision_tree_C5.0.R | 2 +- R/decision_tree_party.R | 2 +- R/decision_tree_rpart.R | 2 +- R/decision_tree_spark.R | 2 +- R/discrim_flexible_earth.R | 2 +- R/discrim_linear_MASS.R | 2 +- R/discrim_linear_mda.R | 2 +- R/discrim_linear_sda.R | 2 +- R/discrim_linear_sparsediscrim.R | 2 +- R/discrim_quad_MASS.R | 2 +- R/discrim_quad_sparsediscrim.R | 2 +- R/discrim_regularized_klaR.R | 2 +- R/linear_reg_brulee.R | 2 +- R/linear_reg_gee.R | 2 +- R/linear_reg_glm.R | 2 +- R/linear_reg_glmnet.R | 2 +- R/linear_reg_gls.R | 2 +- R/linear_reg_keras.R | 2 +- R/linear_reg_lm.R | 2 +- R/linear_reg_lme.R | 2 +- R/linear_reg_lmer.R | 2 +- R/linear_reg_spark.R | 2 +- R/linear_reg_stan.R | 2 +- R/linear_reg_stan_glmer.R | 2 +- R/logistic_reg_LiblineaR.R | 2 +- R/logistic_reg_brulee.R | 2 +- R/logistic_reg_gee.R | 2 +- R/logistic_reg_glm.R | 2 +- R/logistic_reg_glmer.R | 2 +- R/logistic_reg_glmnet.R | 2 +- R/logistic_reg_keras.R | 2 +- R/logistic_reg_spark.R | 2 +- R/logistic_reg_stan.R | 2 +- R/logistic_reg_stan_glmer.R | 2 +- R/mars_earth.R | 2 +- R/mlp_brulee.R | 2 +- R/mlp_keras.R | 2 +- R/mlp_nnet.R | 2 +- R/multinom_reg_glmnet.R | 2 +- R/multinom_reg_keras.R | 2 +- R/multinom_reg_nnet.R | 2 +- R/multinom_reg_spark.R | 2 +- R/naive_Bayes_klaR.R | 2 +- R/naive_Bayes_naivebayes.R | 2 +- R/nearest_neighbor_kknn.R | 2 +- R/pls_mixOmics.R | 2 +- R/poisson_reg_gee.R | 2 +- R/poisson_reg_glm.R | 2 +- R/poisson_reg_glmer.R | 2 +- R/poisson_reg_glmnet.R | 2 +- R/poisson_reg_hurdle.R | 2 +- R/poisson_reg_stan.R | 2 +- R/poisson_reg_stan_glmer.R | 2 +- R/poisson_reg_zeroinfl.R | 2 +- R/proportional_hazards_glmnet.R | 2 +- R/proportional_hazards_survival.R | 2 +- R/rand_forest_party.R | 2 +- R/rand_forest_randomForest.R | 2 +- R/rand_forest_ranger.R | 2 +- R/rand_forest_spark.R | 2 +- R/rule_fit_xrf.R | 2 +- R/survival_reg_flexsurv.R | 2 +- R/survival_reg_survival.R | 2 +- R/svm_linear_LiblineaR.R | 2 +- R/svm_linear_kernlab.R | 2 +- R/svm_poly_kernlab.R | 2 +- R/svm_rbf_kernlab.R | 2 +- 77 files changed, 77 insertions(+), 77 deletions(-) diff --git a/R/C5_rules_C5.0.R b/R/C5_rules_C5.0.R index 4e800581a..ed745759f 100644 --- a/R/C5_rules_C5.0.R +++ b/R/C5_rules_C5.0.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/bag_mars_earth.R b/R/bag_mars_earth.R index 9f48413ae..d99eb783f 100644 --- a/R/bag_mars_earth.R +++ b/R/bag_mars_earth.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/bag_tree_C5.0.R b/R/bag_tree_C5.0.R index 5f4f591d6..d1b8aefeb 100644 --- a/R/bag_tree_C5.0.R +++ b/R/bag_tree_C5.0.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/bag_tree_rpart.R b/R/bag_tree_rpart.R index 407d99ec3..53d400c3e 100644 --- a/R/bag_tree_rpart.R +++ b/R/bag_tree_rpart.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/bart_dbarts.R b/R/bart_dbarts.R index 8f393d86c..de348dbd6 100644 --- a/R/bart_dbarts.R +++ b/R/bart_dbarts.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/boost_tree_C5.0.R b/R/boost_tree_C5.0.R index 3bbd52949..ec72ad267 100644 --- a/R/boost_tree_C5.0.R +++ b/R/boost_tree_C5.0.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/boost_tree_mboost.R b/R/boost_tree_mboost.R index 0aab174b6..e736b45fc 100644 --- a/R/boost_tree_mboost.R +++ b/R/boost_tree_mboost.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/boost_tree_spark.R b/R/boost_tree_spark.R index 95cdbd672..264d0060c 100644 --- a/R/boost_tree_spark.R +++ b/R/boost_tree_spark.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/boost_tree_xgboost.R b/R/boost_tree_xgboost.R index 781a0ff7b..c39e61fb6 100644 --- a/R/boost_tree_xgboost.R +++ b/R/boost_tree_xgboost.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/cubist_rules_Cubist.R b/R/cubist_rules_Cubist.R index 6f1efebd8..f548ac9e8 100644 --- a/R/cubist_rules_Cubist.R +++ b/R/cubist_rules_Cubist.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/decision_tree_C5.0.R b/R/decision_tree_C5.0.R index 5dfa3dee1..7a8b5b626 100644 --- a/R/decision_tree_C5.0.R +++ b/R/decision_tree_C5.0.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/decision_tree_party.R b/R/decision_tree_party.R index 8a8dc067e..37ba92df7 100644 --- a/R/decision_tree_party.R +++ b/R/decision_tree_party.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/decision_tree_rpart.R b/R/decision_tree_rpart.R index bbe1e11bd..3953431da 100644 --- a/R/decision_tree_rpart.R +++ b/R/decision_tree_rpart.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/decision_tree_spark.R b/R/decision_tree_spark.R index 144f2351b..de5912c62 100644 --- a/R/decision_tree_spark.R +++ b/R/decision_tree_spark.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/discrim_flexible_earth.R b/R/discrim_flexible_earth.R index 1b87e2f8d..6c8d146ea 100644 --- a/R/discrim_flexible_earth.R +++ b/R/discrim_flexible_earth.R @@ -11,4 +11,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/discrim_linear_MASS.R b/R/discrim_linear_MASS.R index a3c7cfd91..9b6e1eba0 100644 --- a/R/discrim_linear_MASS.R +++ b/R/discrim_linear_MASS.R @@ -11,4 +11,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/discrim_linear_mda.R b/R/discrim_linear_mda.R index 4365db3a3..e1253b4ba 100644 --- a/R/discrim_linear_mda.R +++ b/R/discrim_linear_mda.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/discrim_linear_sda.R b/R/discrim_linear_sda.R index 817cb5b4c..69a589dfd 100644 --- a/R/discrim_linear_sda.R +++ b/R/discrim_linear_sda.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/discrim_linear_sparsediscrim.R b/R/discrim_linear_sparsediscrim.R index 8ad80ea84..ed99b5f0b 100644 --- a/R/discrim_linear_sparsediscrim.R +++ b/R/discrim_linear_sparsediscrim.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/discrim_quad_MASS.R b/R/discrim_quad_MASS.R index 7a9829fd9..4d98ed3e9 100644 --- a/R/discrim_quad_MASS.R +++ b/R/discrim_quad_MASS.R @@ -11,4 +11,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/discrim_quad_sparsediscrim.R b/R/discrim_quad_sparsediscrim.R index 06851aa7a..2582d292d 100644 --- a/R/discrim_quad_sparsediscrim.R +++ b/R/discrim_quad_sparsediscrim.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/discrim_regularized_klaR.R b/R/discrim_regularized_klaR.R index b0e75c088..9998182b8 100644 --- a/R/discrim_regularized_klaR.R +++ b/R/discrim_regularized_klaR.R @@ -12,4 +12,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/linear_reg_brulee.R b/R/linear_reg_brulee.R index 7515d66a7..1d7815dd9 100644 --- a/R/linear_reg_brulee.R +++ b/R/linear_reg_brulee.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/linear_reg_gee.R b/R/linear_reg_gee.R index edf8aa47f..ed67b5a60 100644 --- a/R/linear_reg_gee.R +++ b/R/linear_reg_gee.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/linear_reg_glm.R b/R/linear_reg_glm.R index a458e49ff..8a93cbf8c 100644 --- a/R/linear_reg_glm.R +++ b/R/linear_reg_glm.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/linear_reg_glmnet.R b/R/linear_reg_glmnet.R index 0e338e705..76a33662f 100644 --- a/R/linear_reg_glmnet.R +++ b/R/linear_reg_glmnet.R @@ -8,4 +8,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/linear_reg_gls.R b/R/linear_reg_gls.R index 6b15f4a4b..ae9f739ae 100644 --- a/R/linear_reg_gls.R +++ b/R/linear_reg_gls.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/linear_reg_keras.R b/R/linear_reg_keras.R index 5845113cd..e3f4c5164 100644 --- a/R/linear_reg_keras.R +++ b/R/linear_reg_keras.R @@ -8,4 +8,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/linear_reg_lm.R b/R/linear_reg_lm.R index c34d00db8..840f9a09c 100644 --- a/R/linear_reg_lm.R +++ b/R/linear_reg_lm.R @@ -8,4 +8,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/linear_reg_lme.R b/R/linear_reg_lme.R index f56f3b273..0fe58478d 100644 --- a/R/linear_reg_lme.R +++ b/R/linear_reg_lme.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/linear_reg_lmer.R b/R/linear_reg_lmer.R index bf805104d..745a0e6ad 100644 --- a/R/linear_reg_lmer.R +++ b/R/linear_reg_lmer.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/linear_reg_spark.R b/R/linear_reg_spark.R index 88ffad626..31db6a93b 100644 --- a/R/linear_reg_spark.R +++ b/R/linear_reg_spark.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/linear_reg_stan.R b/R/linear_reg_stan.R index cad16f8b7..caad47d50 100644 --- a/R/linear_reg_stan.R +++ b/R/linear_reg_stan.R @@ -8,4 +8,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/linear_reg_stan_glmer.R b/R/linear_reg_stan_glmer.R index 2dc2ec555..f6c8d7c52 100644 --- a/R/linear_reg_stan_glmer.R +++ b/R/linear_reg_stan_glmer.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/logistic_reg_LiblineaR.R b/R/logistic_reg_LiblineaR.R index dcc1c1c55..356bcc889 100644 --- a/R/logistic_reg_LiblineaR.R +++ b/R/logistic_reg_LiblineaR.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/logistic_reg_brulee.R b/R/logistic_reg_brulee.R index ffdc92777..f02fa8862 100644 --- a/R/logistic_reg_brulee.R +++ b/R/logistic_reg_brulee.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/logistic_reg_gee.R b/R/logistic_reg_gee.R index d97be886c..3fd05cd50 100644 --- a/R/logistic_reg_gee.R +++ b/R/logistic_reg_gee.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/logistic_reg_glm.R b/R/logistic_reg_glm.R index 96e37871d..939c14b13 100644 --- a/R/logistic_reg_glm.R +++ b/R/logistic_reg_glm.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/logistic_reg_glmer.R b/R/logistic_reg_glmer.R index f241a1717..ade514a1b 100644 --- a/R/logistic_reg_glmer.R +++ b/R/logistic_reg_glmer.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/logistic_reg_glmnet.R b/R/logistic_reg_glmnet.R index 56e38d083..c1a9406bd 100644 --- a/R/logistic_reg_glmnet.R +++ b/R/logistic_reg_glmnet.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/logistic_reg_keras.R b/R/logistic_reg_keras.R index 4845f16b1..792b4e34c 100644 --- a/R/logistic_reg_keras.R +++ b/R/logistic_reg_keras.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/logistic_reg_spark.R b/R/logistic_reg_spark.R index b9bc3c324..726f9dd59 100644 --- a/R/logistic_reg_spark.R +++ b/R/logistic_reg_spark.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/logistic_reg_stan.R b/R/logistic_reg_stan.R index 637b69483..22775ad89 100644 --- a/R/logistic_reg_stan.R +++ b/R/logistic_reg_stan.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/logistic_reg_stan_glmer.R b/R/logistic_reg_stan_glmer.R index 2649591a8..192a1eff1 100644 --- a/R/logistic_reg_stan_glmer.R +++ b/R/logistic_reg_stan_glmer.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/mars_earth.R b/R/mars_earth.R index 9462f941c..69dcc66f5 100644 --- a/R/mars_earth.R +++ b/R/mars_earth.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/mlp_brulee.R b/R/mlp_brulee.R index 787b7d9cf..f2b522f5e 100644 --- a/R/mlp_brulee.R +++ b/R/mlp_brulee.R @@ -8,4 +8,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/mlp_keras.R b/R/mlp_keras.R index 4838a13fe..21affaf64 100644 --- a/R/mlp_keras.R +++ b/R/mlp_keras.R @@ -8,4 +8,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/mlp_nnet.R b/R/mlp_nnet.R index 625ece227..42891fd28 100644 --- a/R/mlp_nnet.R +++ b/R/mlp_nnet.R @@ -8,4 +8,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/multinom_reg_glmnet.R b/R/multinom_reg_glmnet.R index 76a7d2ed7..d6efee201 100644 --- a/R/multinom_reg_glmnet.R +++ b/R/multinom_reg_glmnet.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/multinom_reg_keras.R b/R/multinom_reg_keras.R index 334da6b3e..3e3b94c0a 100644 --- a/R/multinom_reg_keras.R +++ b/R/multinom_reg_keras.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/multinom_reg_nnet.R b/R/multinom_reg_nnet.R index 1d9d92741..dafe1eb6d 100644 --- a/R/multinom_reg_nnet.R +++ b/R/multinom_reg_nnet.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/multinom_reg_spark.R b/R/multinom_reg_spark.R index c9c01530c..1048312a1 100644 --- a/R/multinom_reg_spark.R +++ b/R/multinom_reg_spark.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/naive_Bayes_klaR.R b/R/naive_Bayes_klaR.R index 42d403e9c..09dc519e8 100644 --- a/R/naive_Bayes_klaR.R +++ b/R/naive_Bayes_klaR.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/naive_Bayes_naivebayes.R b/R/naive_Bayes_naivebayes.R index b3c1b981c..c4c2aff6a 100644 --- a/R/naive_Bayes_naivebayes.R +++ b/R/naive_Bayes_naivebayes.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/nearest_neighbor_kknn.R b/R/nearest_neighbor_kknn.R index 5c25c45ad..55d97b6a9 100644 --- a/R/nearest_neighbor_kknn.R +++ b/R/nearest_neighbor_kknn.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/pls_mixOmics.R b/R/pls_mixOmics.R index 03efc62fb..7a99381fb 100644 --- a/R/pls_mixOmics.R +++ b/R/pls_mixOmics.R @@ -8,4 +8,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/poisson_reg_gee.R b/R/poisson_reg_gee.R index ed83cfe54..0d8e31a42 100644 --- a/R/poisson_reg_gee.R +++ b/R/poisson_reg_gee.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/poisson_reg_glm.R b/R/poisson_reg_glm.R index 59fd66dc2..b2a33fa36 100644 --- a/R/poisson_reg_glm.R +++ b/R/poisson_reg_glm.R @@ -8,4 +8,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/poisson_reg_glmer.R b/R/poisson_reg_glmer.R index 1b22ed1df..2f1f3d6df 100644 --- a/R/poisson_reg_glmer.R +++ b/R/poisson_reg_glmer.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/poisson_reg_glmnet.R b/R/poisson_reg_glmnet.R index bf40cb76f..17471830c 100644 --- a/R/poisson_reg_glmnet.R +++ b/R/poisson_reg_glmnet.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/poisson_reg_hurdle.R b/R/poisson_reg_hurdle.R index 1f1426cf4..c31705c0c 100644 --- a/R/poisson_reg_hurdle.R +++ b/R/poisson_reg_hurdle.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/poisson_reg_stan.R b/R/poisson_reg_stan.R index 937d097ad..eb6ef90bc 100644 --- a/R/poisson_reg_stan.R +++ b/R/poisson_reg_stan.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/poisson_reg_stan_glmer.R b/R/poisson_reg_stan_glmer.R index 98082762f..0edc2249c 100644 --- a/R/poisson_reg_stan_glmer.R +++ b/R/poisson_reg_stan_glmer.R @@ -9,4 +9,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/poisson_reg_zeroinfl.R b/R/poisson_reg_zeroinfl.R index 4b6016cc0..d39fb1936 100644 --- a/R/poisson_reg_zeroinfl.R +++ b/R/poisson_reg_zeroinfl.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/proportional_hazards_glmnet.R b/R/proportional_hazards_glmnet.R index b017038ec..47925372e 100644 --- a/R/proportional_hazards_glmnet.R +++ b/R/proportional_hazards_glmnet.R @@ -8,4 +8,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/proportional_hazards_survival.R b/R/proportional_hazards_survival.R index a3a7fe367..11c438336 100644 --- a/R/proportional_hazards_survival.R +++ b/R/proportional_hazards_survival.R @@ -8,4 +8,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/rand_forest_party.R b/R/rand_forest_party.R index 5fe37cfc6..175dc2a4f 100644 --- a/R/rand_forest_party.R +++ b/R/rand_forest_party.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/rand_forest_randomForest.R b/R/rand_forest_randomForest.R index 36fd3a9ca..0fb7a8f13 100644 --- a/R/rand_forest_randomForest.R +++ b/R/rand_forest_randomForest.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/rand_forest_ranger.R b/R/rand_forest_ranger.R index 80160198e..20c1494db 100644 --- a/R/rand_forest_ranger.R +++ b/R/rand_forest_ranger.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/rand_forest_spark.R b/R/rand_forest_spark.R index 07684c29e..bf1a94237 100644 --- a/R/rand_forest_spark.R +++ b/R/rand_forest_spark.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/rule_fit_xrf.R b/R/rule_fit_xrf.R index 5a45e36e2..71e3d4f01 100644 --- a/R/rule_fit_xrf.R +++ b/R/rule_fit_xrf.R @@ -10,4 +10,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/survival_reg_flexsurv.R b/R/survival_reg_flexsurv.R index b7f62aff0..c2a75e943 100644 --- a/R/survival_reg_flexsurv.R +++ b/R/survival_reg_flexsurv.R @@ -8,4 +8,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/survival_reg_survival.R b/R/survival_reg_survival.R index 5b4ed13f3..409810b31 100644 --- a/R/survival_reg_survival.R +++ b/R/survival_reg_survival.R @@ -8,4 +8,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/svm_linear_LiblineaR.R b/R/svm_linear_LiblineaR.R index 76ccb1071..83f835b48 100644 --- a/R/svm_linear_LiblineaR.R +++ b/R/svm_linear_LiblineaR.R @@ -11,4 +11,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/svm_linear_kernlab.R b/R/svm_linear_kernlab.R index 1300fc542..80ea0b8cb 100644 --- a/R/svm_linear_kernlab.R +++ b/R/svm_linear_kernlab.R @@ -11,4 +11,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/svm_poly_kernlab.R b/R/svm_poly_kernlab.R index 31a214821..a25dda028 100644 --- a/R/svm_poly_kernlab.R +++ b/R/svm_poly_kernlab.R @@ -11,4 +11,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/svm_rbf_kernlab.R b/R/svm_rbf_kernlab.R index 694181052..7d0a299d0 100644 --- a/R/svm_rbf_kernlab.R +++ b/R/svm_rbf_kernlab.R @@ -11,4 +11,4 @@ #' @keywords internal NULL -# See man/rmd/README.md for a description of how these files are processed +# See inst/README-DOCS.md for a description of how these files are processed From eb03ec824d40ffa514eb56954bf2fd94401b7ee3 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 4 Jan 2022 15:00:09 -0500 Subject: [PATCH 37/65] add two functions back --- R/engine_docs.R | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/R/engine_docs.R b/R/engine_docs.R index 07d7812bf..66796efa3 100644 --- a/R/engine_docs.R +++ b/R/engine_docs.R @@ -287,3 +287,20 @@ generate_set_engine_bullets <- function() { sort_c <- function(x) { withr::with_collate("C", sort(x)) } + +get_sorted_unique_engines <- function(x) { + engines <- x$engine + engines <- unique(engines) + engines <- sort_c(engines) + engines +} +combine_prefix_with_engines <- function(prefix, engines) { + if (length(engines) == 0L) { + engines <- "No engines currently available" + } else { + engines <- glue::glue_collapse(engines, sep = ", ") + } + + glue::glue("{prefix} {engines}") +} + From 60e438491230e01cd587bd575c1dd70d7f69c738 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 4 Jan 2022 17:29:44 -0500 Subject: [PATCH 38/65] also merge model info by mode. --- R/engine_docs.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/engine_docs.R b/R/engine_docs.R index 66796efa3..d51388e51 100644 --- a/R/engine_docs.R +++ b/R/engine_docs.R @@ -63,7 +63,7 @@ update_model_info_file <- function(path = "inst/models.tsv") { tidyr::unnest(cols = "pkg") %>% dplyr::inner_join(tibble::tibble(pkg = extensions()), by = "pkg") - info <- dplyr::left_join(info, exts, by = c("model", "engine")) + info <- dplyr::left_join(info, exts, by = c("model", "engine", "mode")) csv <- utils::write.table(info, file = path, row.names = FALSE, sep = "\t") invisible(info) From f5fc2d5559029b1eecf85ce9ed3b8dc8964d0450 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 4 Jan 2022 17:29:59 -0500 Subject: [PATCH 39/65] mode-specific dependencies for rpart --- inst/models.tsv | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/inst/models.tsv b/inst/models.tsv index 3ac928450..00f551610 100644 --- a/inst/models.tsv +++ b/inst/models.tsv @@ -2,11 +2,8 @@ "bag_mars" "classification" "earth" "baguette" "bag_mars" "regression" "earth" "baguette" "bag_tree" "censored regression" "rpart" "censored" -"bag_tree" "censored regression" "rpart" "baguette" "bag_tree" "classification" "C5.0" "baguette" -"bag_tree" "classification" "rpart" "censored" "bag_tree" "classification" "rpart" "baguette" -"bag_tree" "regression" "rpart" "censored" "bag_tree" "regression" "rpart" "baguette" "bart" "classification" "dbarts" NA "bart" "regression" "dbarts" NA @@ -21,9 +18,9 @@ "decision_tree" "censored regression" "party" "censored" "decision_tree" "censored regression" "rpart" "censored" "decision_tree" "classification" "C5.0" NA -"decision_tree" "classification" "rpart" "censored" +"decision_tree" "classification" "rpart" NA "decision_tree" "classification" "spark" NA -"decision_tree" "regression" "rpart" "censored" +"decision_tree" "regression" "rpart" NA "decision_tree" "regression" "spark" NA "discrim_flexible" "classification" "earth" "discrim" "discrim_linear" "classification" "MASS" "discrim" From da5f9ca53537000907c373f5ffa1309c87c0a3c1 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Wed, 5 Jan 2022 12:04:52 -0500 Subject: [PATCH 40/65] avoid failing for "Packages unavailable to check Rd xrefs" --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 31200d631..0d7d2589c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -81,6 +81,7 @@ Config/Needs/website: tidymodels/tidymodels, tidyverse/tidytemplate, xgboost +Config/rcmdcheck/ignore-inconsequential-notes: true Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) From 5cd120ce765ae12802eb75bbc6ae7bac12ee0e3d Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Tue, 11 Jan 2022 17:11:14 -0700 Subject: [PATCH 41/65] Edits to README-DOCS, mainly so I think I will be able to come back to this and understand it --- inst/README-DOCS.md | 57 +++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/inst/README-DOCS.md b/inst/README-DOCS.md index b77bcc14f..bc0079527 100644 --- a/inst/README-DOCS.md +++ b/inst/README-DOCS.md @@ -1,81 +1,78 @@ # About the parsnip documentation system -parsnip has uses a hierarchy to describe different models: +parsnip uses three concepts to describe models: - - The _model type_ defines the model equation - - Example: `"linear_reg"` is for models that predict a numeric outcome using a linear combination of predictors and coefficients. + - The _model type_ specifies its mathematical structure. + - Example: `linear_reg()` is for models that predict a numeric outcome using a linear combination of predictors and coefficients. - - The _model mode_ reflects the usage of a model, usually defined by the outcome type. + - The _model mode_ reflects the type of prediction outcome. - Values: `"classification"`, `"regression"`, and `"censored regression"`. - The _model engine_ is a designation of _how_ the model should be fit. + - This is often an R package or function name (e.g. `"ranger"` for the ranger package). - - This is often a package or function name (e.g. `"ranger"` for the ranger package). +There are parsnip extension packages that use the parsnip model functions to define _new engines_. For example, the poissonreg package has engines for the `poission_reg()` function. -There are extension packages that use the parsnip model functions to define _new engines_. For example, the poissonreg package has engines for the `poission_reg()` function. - -There are many combinations of type/engine/mode available in parsnip. We try to keep track of these values for packages that have their model definitions in parsnip and fully adhere to the tidymodels APIs. A tab-delimited file with these values is in the package (called `models.tsv`). +There are many combinations of type/engine/mode available in parsnip. We keep track of these values for packages that have their model definitions in parsnip and fully adhere to the tidymodels APIs. A tab-delimited file with these values is in the package (called `models.tsv`). ## Main function and engine documentation -Each modeling function defined in parsnip has a documentation file (with extension `Rd`). - -Also, each combination of engine and model type has a corresponding Rd file (a.k.a the "engine-specific" documentation files). The list of known engines is also shown in the Rd file for the main function. +Each modeling function defined in parsnip has a documentation file (with extension `.Rd`). +Also, each combination of engine and model type has a corresponding `.Rd` file (a.k.a the "engine-specific" documentation files). The list of known engines is also shown in the `.Rd` file for the main function. -## Creating the engine-specific Rd files -We'll use an example with `poisson_reg()` and the `"zeroinfl"` engine. +## Creating the engine-specific `.Rd` files -Each model/engine combination has its own Rd file with a naming convention reflecting the contents (`poisson_reg_zeroinfl.R`). +How do we generate these `.Rd` files? We'll use an example with `poisson_reg()` and the `"zeroinfl"` engine. -This file has a description of the type of model and the underlying function that is used for that engine: +Each model/engine combination has its own `.R` file with a naming convention reflecting the contents (`poisson_reg_zeroinfl.R`). This file has a description of the type of model and the underlying function that is used for that engine: > `[pscl::zeroinfl()]` uses maximum likelihood estimation to fit a model for count data that has separate model terms for predicting the counts and for predicting the probability of a zero count. -Next comes an indication that a specific _markdown_ file should be included: +Next comes a roxygen comment including a specific _markdown_ file (notice we use `@includeRmd` but we actually include markdown): > `@includeRmd man/rmd/poisson_reg_zeroinfl.md details` -as well as a directive for the Rd file name: +as well as a directive for the `.Rd` file name to be created: > `@name details_poisson_reg_zeroinfl` The engine markdown file (`poisson_reg_zeroinfl.md`) is made by the developer offline. -## Creating the engine-specific md files +## Creating the engine-specific `.md` files -These files are created by corresponding Rmd files contained in `parsnip/man/rmd/`. There are Rmd files for the engines defined in parsnip as well as the extension packages listed by `parsnip:::extensions()`. +How do we make these markdown files? These are created by corresponding `.Rmd` files contained in `parsnip/man/rmd/`. There are `.Rmd` files for the engines defined in parsnip as well as the extension packages listed by `parsnip:::extensions()`. -Each Rmd file imports `parsnip/man/rmd/aaa.Rmd`. This file defines a few functions and loads some specific packages. +Each `.Rmd` file uses `parsnip/man/rmd/aaa.Rmd` as a child document. This file defines helper functions for the engine-specific documentation and loads some specific packages. -The Rmd files use packages that are not formally parsnip dependencies (these are listed in `aaa.Rmd`). It also requires the parsnip extension packages defined in `parsnip:::extensions()`. +The `.Rmd` files use packages that are not formally parsnip dependencies (these are listed in `aaa.Rmd`). It also requires the parsnip extension packages defined in `parsnip:::extensions()`. -The Rmd files have a consistent structure and there are numerous examples of these files in the package. The main sections are: +The `.Rmd` files have a consistent structure and there are numerous examples of these files in the package. The main sections are: - The list of possible engines. - The list of tuning parameters, if any, and other arguments of interest. - Details about how parsnip translates the function call to the call for the underlying model function. - Other details (e.g. preprocessing requirements, etc.) -To convert the Rmd files to md, the function `knit_engine_docs()` is used to generate the md files. After this, using `devtools::document()` will create the engine specific `Rd` files. +To convert the `.Rmd` files to `.md`, use the function `knit_engine_docs()`. After this, use `devtools::document()` to create the engine specific `.Rd` files. To test the results, do a hard restart of the R session (i.e., do not use `load_all()`). -## The main function Rd files +## The main function `.Rd` files -These files determine the engine specific Rd files specific for the function and enumerates their values in a bulleted list. For example, `poisson_reg.R` has the line: +This type of file determines the engine-specific `.Rd` files for the model function and enumerates their values in a bulleted list. For example, `poisson_reg.R` has the line: -``` +```r #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("poisson_reg")} ``` -This finds the relevant engine Rd files and creates the corresponding Rd markup: +This finds the relevant engine `.Rd` files and creates the corresponding `.Rd` markup: ``` There are different ways to fit this model. The method of estimation is chosen by setting the model \emph{engine}. The engine-specific pages -for this model are listed below. +for this model are listed below. \itemize{ \item \code{\link[parsnip:details_poisson_reg_glm]{glm}¹²} @@ -93,13 +90,13 @@ for this model are listed below. There is a similar line at the bottom of the files that creates the _See Also_ list: -``` +```r #' @seealso \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("poisson_reg")} ``` ## Generating the model flat file -As previously mentioned, the package contains a file `models.tsv`. The create this file: +As previously mentioned, the parsnip package contains a file `models.tsv`. To create this file: 1. Load the packages listed in `parsnip:::extensions()`. 2. Run `parsnip::update_model_info_file()`. From 1b1a5db0b16e9b0676b2b9f990648b8b0a58d006 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 11 Jan 2022 20:16:26 -0500 Subject: [PATCH 42/65] Apply suggestions from code review Co-authored-by: Julia Silge Co-authored-by: Hannah Frick --- R/C5_rules_C5.0.R | 2 +- R/bag_mars.R | 2 +- R/bag_tree.R | 2 +- R/bart.R | 2 +- R/boost_tree.R | 2 +- R/c5_rules.R | 2 +- R/cubist_rules.R | 2 +- R/decision_tree.R | 2 +- R/discrim_flexible_earth.R | 2 +- R/discrim_linear.R | 2 +- R/discrim_linear_sparsediscrim.R | 2 +- R/discrim_quad.R | 2 +- R/discrim_quad_sparsediscrim.R | 2 +- R/discrim_regularized.R | 2 +- R/engine_docs.R | 14 +++++++------- R/gen_additive_mod.R | 2 +- R/linear_reg.R | 2 +- R/linear_reg_gee.R | 2 +- R/linear_reg_gls.R | 2 +- R/linear_reg_lme.R | 2 +- R/linear_reg_lmer.R | 2 +- R/linear_reg_stan_glmer.R | 2 +- R/logistic_reg.R | 2 +- R/logistic_reg_gee.R | 2 +- R/logistic_reg_glmer.R | 2 +- R/logistic_reg_stan_glmer.R | 2 +- R/mars.R | 2 +- R/mlp.R | 2 +- R/mlp_brulee.R | 2 +- R/multinom_reg.R | 2 +- R/naive_Bayes.R | 2 +- R/nullmodel.R | 2 +- R/pls.R | 2 +- R/poisson_reg.R | 2 +- R/poisson_reg_gee.R | 2 +- R/poisson_reg_glmer.R | 2 +- R/poisson_reg_stan_glmer.R | 2 +- R/proportional_hazards.R | 6 +++--- R/rule_fit_xrf.R | 2 +- R/svm_poly.R | 8 ++++---- man-roxygen/spec-references.R | 2 +- man/rmd/C5_rules_C5.0.Rmd | 2 +- man/rmd/decision_tree_party.Rmd | 2 +- man/rmd/discrim_flexible_earth.Rmd | 2 +- man/rmd/discrim_linear_mda.Rmd | 2 +- man/rmd/discrim_linear_sparsediscrim.Rmd | 2 +- man/rmd/discrim_quad_sparsediscrim.Rmd | 2 +- man/rmd/discrim_regularized_klaR.Rmd | 2 +- man/rmd/example_mlm.Rmd | 4 ++-- man/rmd/linear_reg_gee.Rmd | 12 ++++++------ man/rmd/linear_reg_gls.Rmd | 8 ++++---- man/rmd/linear_reg_lme.Rmd | 4 ++-- man/rmd/linear_reg_lmer.Rmd | 4 ++-- man/rmd/linear_reg_stan_glmer.Rmd | 8 ++++---- man/rmd/logistic_reg_gee.Rmd | 8 ++++---- man/rmd/logistic_reg_glmer.Rmd | 4 ++-- man/rmd/logistic_reg_stan_glmer.Rmd | 6 +++--- man/rmd/naive_Bayes_klaR.Rmd | 2 +- man/rmd/naive_Bayes_naivebayes.Rmd | 4 ++-- man/rmd/pls_mixOmics.Rmd | 2 +- man/rmd/poisson_reg_gee.Rmd | 8 ++++---- man/rmd/poisson_reg_glmer.Rmd | 4 ++-- man/rmd/poisson_reg_glmnet.Rmd | 1 - man/rmd/poisson_reg_stan.Rmd | 2 +- man/rmd/poisson_reg_stan_glmer.Rmd | 6 +++--- man/rmd/proportional_hazards_glmnet.Rmd | 2 +- man/rmd/rule_fit_xrf.Rmd | 4 ++-- man/rmd/surv_reg_flexsurv.Rmd | 2 +- man/rmd/surv_reg_survival.Rmd | 2 +- man/rmd/survival_reg_flexsurv.Rmd | 4 ++-- 70 files changed, 109 insertions(+), 110 deletions(-) diff --git a/R/C5_rules_C5.0.R b/R/C5_rules_C5.0.R index ed745759f..012a4c9ba 100644 --- a/R/C5_rules_C5.0.R +++ b/R/C5_rules_C5.0.R @@ -1,6 +1,6 @@ #' C5.0 rule-based classification models #' -#' [C50::C5.0()] fits model that derives feature rules from a tree for +#' [C50::C5.0()] fits a model that derives feature rules from a tree for #' prediction. A single tree or boosted ensemble can be used. [rules::c5_fit()] #' is a wrapper around this function. #' diff --git a/R/bag_mars.R b/R/bag_mars.R index 970dac731..a98169467 100644 --- a/R/bag_mars.R +++ b/R/bag_mars.R @@ -5,7 +5,7 @@ #' `bag_mars()` defines an ensemble of generalized linear models that use #' artificial features for some predictors. These features resemble hinge #' functions and the result is a model that is a segmented regression in small -#' dimensions. The function can fit classification and regression models. +#' dimensions. This function can fit classification and regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("bag_mars")} #' diff --git a/R/bag_tree.R b/R/bag_tree.R index ed4c1d3a3..fdfb746ba 100644 --- a/R/bag_tree.R +++ b/R/bag_tree.R @@ -2,7 +2,7 @@ #' #' @description #' -#' `bag_tree()` defines an ensemble of decision trees. The function can fit +#' `bag_tree()` defines an ensemble of decision trees. This function can fit #' classification, regression, and censored regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("bag_tree")} diff --git a/R/bart.R b/R/bart.R index 5e51bc12c..20beb02b1 100644 --- a/R/bart.R +++ b/R/bart.R @@ -3,7 +3,7 @@ #' @description #' #' `bart()` defines a tree ensemble model that uses Bayesian analysis to -#' assemble the ensemble. The function can fit classification and regression +#' assemble the ensemble. This function can fit classification and regression #' models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("bart")} diff --git a/R/boost_tree.R b/R/boost_tree.R index bb8f3d3e1..97d21624c 100644 --- a/R/boost_tree.R +++ b/R/boost_tree.R @@ -6,7 +6,7 @@ #' #' `boost_tree()` defines a model that creates a series of decision trees #' forming an ensemble. Each tree depends on the results of previous trees. -#' All trees in the ensemble are combined to produce a final prediction. The +#' All trees in the ensemble are combined to produce a final prediction. This #' function can fit classification, regression, and censored regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("boost_tree")} diff --git a/R/c5_rules.R b/R/c5_rules.R index b32ac08df..2c3cef0db 100644 --- a/R/c5_rules.R +++ b/R/c5_rules.R @@ -5,7 +5,7 @@ #' #' @description #' `C5_rules()` defines a model that derives feature rules from a tree for -#' prediction. A single tree or boosted ensemble can be used. The function can +#' prediction. A single tree or boosted ensemble can be used. This function can #' fit classification models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("C5_rules")} diff --git a/R/cubist_rules.R b/R/cubist_rules.R index 88452dc98..5459c40c2 100644 --- a/R/cubist_rules.R +++ b/R/cubist_rules.R @@ -2,7 +2,7 @@ #' #' @description #' `cubist_rules()` defines a model that derives simple feature rules from a tree -#' ensemble and creates regression models within each rule. The function can fit +#' ensemble and creates regression models within each rule. This function can fit #' regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("cubist_rules")} diff --git a/R/decision_tree.R b/R/decision_tree.R index ed44cc277..d0e08117b 100644 --- a/R/decision_tree.R +++ b/R/decision_tree.R @@ -4,7 +4,7 @@ #' #' @description #' `decision_tree()` defines a model as a set of `if/then` statements that -#' creates a tree-based structure. The function can fit classification, +#' creates a tree-based structure. This function can fit classification, #' regression, and censored regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("decision_tree")} diff --git a/R/discrim_flexible_earth.R b/R/discrim_flexible_earth.R index 6c8d146ea..5fa1268b0 100644 --- a/R/discrim_flexible_earth.R +++ b/R/discrim_flexible_earth.R @@ -2,7 +2,7 @@ #' #' [mda::fda()] (in conjunction with [earth::earth()] can fit a nonlinear #' discriminant analysis model that uses nonlinear features created using -#' multivariate adaptive regression splines (MARS). The function can fit +#' multivariate adaptive regression splines (MARS). This function can fit #' classification models. #' #' @includeRmd man/rmd/discrim_flexible_earth.md details diff --git a/R/discrim_linear.R b/R/discrim_linear.R index 281104964..1cfc93b69 100644 --- a/R/discrim_linear.R +++ b/R/discrim_linear.R @@ -5,7 +5,7 @@ #' `discrim_linear()` defines a model that estimates a multivariate #' distribution for the predictors separately for the data in each class #' (usually Gaussian with a common covariance matrix). Bayes' theorem is used -#' to compute the probability of each class, given the predictor values. The +#' to compute the probability of each class, given the predictor values. This #' function can fit classification models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("discrim_linear")} diff --git a/R/discrim_linear_sparsediscrim.R b/R/discrim_linear_sparsediscrim.R index ed99b5f0b..2a74ba6dc 100644 --- a/R/discrim_linear_sparsediscrim.R +++ b/R/discrim_linear_sparsediscrim.R @@ -1,6 +1,6 @@ #' Linear discriminant analysis via regularization #' -#' Functions in the `sparsediscrim` package fit different types of linear +#' Functions in the \pkg{sparsediscrim} package fit different types of linear #' discriminant analysis model that regularize the estimates (like the mean or #' covariance). #' diff --git a/R/discrim_quad.R b/R/discrim_quad.R index ba999a4a4..2edbef2c2 100644 --- a/R/discrim_quad.R +++ b/R/discrim_quad.R @@ -5,7 +5,7 @@ #' `discrim_quad()` defines a model that estimates a multivariate #' distribution for the predictors separately for the data in each class #' (usually Gaussian with separate covariance matrices). Bayes' theorem is used -#' to compute the probability of each class, given the predictor values. The +#' to compute the probability of each class, given the predictor values. This #' function can fit classification models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("discrim_quad")} diff --git a/R/discrim_quad_sparsediscrim.R b/R/discrim_quad_sparsediscrim.R index 2582d292d..1e8dd2e9f 100644 --- a/R/discrim_quad_sparsediscrim.R +++ b/R/discrim_quad_sparsediscrim.R @@ -1,6 +1,6 @@ #' Quadratic discriminant analysis via regularization #' -#' Functions in the `sparsediscrim` package fit different types of quadratic +#' Functions in the \pkg{sparsediscrim} package fit different types of quadratic #' discriminant analysis model that regularize the estimates (like the mean or #' covariance). #' diff --git a/R/discrim_regularized.R b/R/discrim_regularized.R index 397b02372..92f5f9548 100644 --- a/R/discrim_regularized.R +++ b/R/discrim_regularized.R @@ -6,7 +6,7 @@ #' distribution for the predictors separately for the data in each class. The #' structure of the model can be LDA, QDA, or some amalgam of the two. Bayes' #' theorem is used to compute the probability of each class, given the -#' predictor values. The function can fit classification models. +#' predictor values. This function can fit classification models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("discrim_regularized")} #' diff --git a/R/engine_docs.R b/R/engine_docs.R index d51388e51..ca5a7203b 100644 --- a/R/engine_docs.R +++ b/R/engine_docs.R @@ -35,18 +35,18 @@ extensions <- function() { #' @description #' This function writes a tab delimited file to the package to capture #' information about the known models. This information includes packages in -#' the tidymodels GitHub repository as well as packages that are know to work +#' the tidymodels GitHub repository as well as packages that are known to work #' well with tidymodels packages (e.g. \pkg{tune}, etc.). There are likely #' other model definitions in other extension packages that are not included #' here that do not follow the #' [model implementation guidelines](https://tidymodels.github.io/model-implementation-principles) -#' or do not work with packages other than \pkg{parsnip}. +#' or do not work with tidymodels packages other than \pkg{parsnip}. #' #' These data are used to document engines for each model function man page. #' @keywords internal #' @param path A character string for the location of the tab delimited file. #' @details -#' It is highly recommended that the know parsnip extension packages are loaded. +#' It is highly recommended that the known parsnip extension packages are loaded. #' The unexported \pkg{parsnip} function `extensions()` will list these. #' @export update_model_info_file <- function(path = "inst/models.tsv") { @@ -104,14 +104,14 @@ update_model_info_file <- function(path = "inst/models.tsv") { #' `parsnip::linear_reg()` help can show a link to a detailed help page in the #' other package. #' -#' To enable this, the process for a package developer is to: +#' The process for a package developer to create \pkg{parsnip} documentation is: #' #' 1. Create an engine-specific R file in the `R` directory with the name #' `{model}_{engine}.R` (e.g. `boost_tree_C5.0.R`). This has a small amount of #' documentation, as well as the directives "`@name details_{model}_{engine}`" #' and "`@includeRmd man/rmd/{model}_{engine}.md details`". #' -#' 2. Copy the file in \pkg{parsnip} that is in `man/rmd/setup.Rmd` and put +#' 2. Copy the file in \pkg{parsnip} that is in `man/rmd/aaa.Rmd` and put #' it in the same place in your package. #' #' 3. Write your own `man/rmd/{model}_{engine}.Rmd` file. This can include @@ -119,7 +119,7 @@ update_model_info_file <- function(path = "inst/models.tsv") { #' required when the documentation file is created locally (probably using #' [devtools::document()]). #' -#' 4. Run [devtools::document()] so that the Rmd content is included in the +#' 4. Run [devtools::document()] so that the `.md` content is included in the #' Rd file. #' #' The examples in \pkg{parsnip} can provide guidance for how to organize @@ -175,7 +175,7 @@ make_engine_list <- function(mod) { ) } else { main <- paste( - "There are different ways to fit this model. The method of estimation is ", + "There are different ways to fit this model, and the method of estimation is ", "chosen by setting the model \\emph{engine}. The engine-specific pages ", "for this model are listed below.\n\n" ) diff --git a/R/gen_additive_mod.R b/R/gen_additive_mod.R index 06003d04e..a2a6b46d3 100644 --- a/R/gen_additive_mod.R +++ b/R/gen_additive_mod.R @@ -2,7 +2,7 @@ #' #' @description #' `gen_additive_mod()` defines a model that can use smoothed functions of -#' numeric predictors in a generalized linear model. The function can fit +#' numeric predictors in a generalized linear model. This function can fit #' classification and regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("gen_additive_mod")} diff --git a/R/linear_reg.R b/R/linear_reg.R index 453544c10..8782ea82e 100644 --- a/R/linear_reg.R +++ b/R/linear_reg.R @@ -3,7 +3,7 @@ #' @description #' #' `linear_reg()` defines a model that can predict numeric values from -#' predictors using a linear function. The function can fit regression models. +#' predictors using a linear function. This function can fit regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("linear_reg")} #' diff --git a/R/linear_reg_gee.R b/R/linear_reg_gee.R index ed67b5a60..7c9ca8f5f 100644 --- a/R/linear_reg_gee.R +++ b/R/linear_reg_gee.R @@ -1,7 +1,7 @@ #' Linear regression via generalized estimating equations (GEE) #' #' `gee::gee()` uses generalized least squares to fit different types of models -#' that have errors that are not independent. +#' with errors that are not independent. #' #' @includeRmd man/rmd/linear_reg_gee.md details #' diff --git a/R/linear_reg_gls.R b/R/linear_reg_gls.R index ae9f739ae..209906a7d 100644 --- a/R/linear_reg_gls.R +++ b/R/linear_reg_gls.R @@ -1,6 +1,6 @@ #' Linear regression via generalized least squares #' -#' The `gls` engine estimates linear regression for models where the rows of the +#' The `"gls"` engine estimates linear regression for models where the rows of the #' data are not indpendent. #' #' @includeRmd man/rmd/linear_reg_gls.md details diff --git a/R/linear_reg_lme.R b/R/linear_reg_lme.R index 0fe58478d..7418a86e9 100644 --- a/R/linear_reg_lme.R +++ b/R/linear_reg_lme.R @@ -1,6 +1,6 @@ #' Linear regression via mixed models #' -#' The `lme` engine estimates fixed and random effect regression parameters +#' The `"lme"` engine estimates fixed and random effect regression parameters #' using maximum likelihood (or restricted maximum likelihood) estimation. #' #' @includeRmd man/rmd/linear_reg_lme.md details diff --git a/R/linear_reg_lmer.R b/R/linear_reg_lmer.R index 745a0e6ad..24aec3ff6 100644 --- a/R/linear_reg_lmer.R +++ b/R/linear_reg_lmer.R @@ -1,6 +1,6 @@ #' Linear regression via mixed models #' -#' The `lmer` engine estimates fixed and random effect regression parameters +#' The `"lmer"` engine estimates fixed and random effect regression parameters #' using maximum likelihood (or restricted maximum likelihood) estimation. #' #' @includeRmd man/rmd/linear_reg_lmer.md details diff --git a/R/linear_reg_stan_glmer.R b/R/linear_reg_stan_glmer.R index f6c8d7c52..091c4468f 100644 --- a/R/linear_reg_stan_glmer.R +++ b/R/linear_reg_stan_glmer.R @@ -1,6 +1,6 @@ #' Linear regression via hierarchical Bayesian methods #' -#' The `stan_glmer` engine estimates hierarchical regression parameters using +#' The `"stan_glmer"` engine estimates hierarchical regression parameters using #' Bayesian estimation. #' #' @includeRmd man/rmd/linear_reg_stan_glmer.md details diff --git a/R/logistic_reg.R b/R/logistic_reg.R index e16cba54f..524b50e76 100644 --- a/R/logistic_reg.R +++ b/R/logistic_reg.R @@ -3,7 +3,7 @@ #' @description #' [logistic_reg()] defines a generalized linear model for binary outcomes. A #' linear combination of the predictors is used to model the log odds of an -#' event. The function can fit classification models. +#' event. This function can fit classification models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("logistic_reg")} #' diff --git a/R/logistic_reg_gee.R b/R/logistic_reg_gee.R index 3fd05cd50..5f56b0b01 100644 --- a/R/logistic_reg_gee.R +++ b/R/logistic_reg_gee.R @@ -1,7 +1,7 @@ #' Logistic regression via generalized estimating equations (GEE) #' #' `gee::gee()` uses generalized least squares to fit different types of models -#' that have errors that are not independent. +#' with errors that are not independent. #' #' @includeRmd man/rmd/logistic_reg_gee.md details #' diff --git a/R/logistic_reg_glmer.R b/R/logistic_reg_glmer.R index ade514a1b..73db9ac8e 100644 --- a/R/logistic_reg_glmer.R +++ b/R/logistic_reg_glmer.R @@ -1,6 +1,6 @@ #' Logistic regression via mixed models #' -#' The `glmer` engine estimates fixed and random effect regression parameters +#' The `"glmer"` engine estimates fixed and random effect regression parameters #' using maximum likelihood (or restricted maximum likelihood) estimation. #' #' @includeRmd man/rmd/logistic_reg_glmer.md details diff --git a/R/logistic_reg_stan_glmer.R b/R/logistic_reg_stan_glmer.R index 192a1eff1..f23eb8bb9 100644 --- a/R/logistic_reg_stan_glmer.R +++ b/R/logistic_reg_stan_glmer.R @@ -1,6 +1,6 @@ #' Logistic regression via hierarchical Bayesian methods #' -#' The `stan_glmer` engine estimates hierarchical regression parameters using +#' The `"stan_glmer"` engine estimates hierarchical regression parameters using #' Bayesian estimation. #' #' @includeRmd man/rmd/logistic_reg_stan_glmer.md details diff --git a/R/mars.R b/R/mars.R index 70bb909f4..e9f3a4681 100644 --- a/R/mars.R +++ b/R/mars.R @@ -4,7 +4,7 @@ #' #' `mars()` defines a generalized linear model that uses artificial features for #' some predictors. These features resemble hinge functions and the result is -#' a model that is a segmented regression in small dimensions. The function can +#' a model that is a segmented regression in small dimensions. This function can #' fit classification and regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("mars")} diff --git a/R/mlp.R b/R/mlp.R index 13b6f5dec..5436d2568 100644 --- a/R/mlp.R +++ b/R/mlp.R @@ -2,7 +2,7 @@ #' #' @description #' `mlp()` defines a multilayer perceptron model (a.k.a. a single layer, -#' feed-forward neural network). The function can fit classification and +#' feed-forward neural network). This function can fit classification and #' regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("mlp")} diff --git a/R/mlp_brulee.R b/R/mlp_brulee.R index f2b522f5e..9238e61bd 100644 --- a/R/mlp_brulee.R +++ b/R/mlp_brulee.R @@ -1,6 +1,6 @@ #' Multilayer perceptron via brulee #' -#' [brulee::brulee_mlp()] fits a neural networks. +#' [brulee::brulee_mlp()] fits a neural network. #' #' @includeRmd man/rmd/mlp_brulee.md details #' diff --git a/R/multinom_reg.R b/R/multinom_reg.R index 65c644f15..ca5106ecb 100644 --- a/R/multinom_reg.R +++ b/R/multinom_reg.R @@ -3,7 +3,7 @@ #' @description #' #' `multinom_reg()` defines a model that uses linear predictors to predict -#' multiclass data using the multinomial distribution. The function can fit +#' multiclass data using the multinomial distribution. This function can fit #' classification models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("multinom_reg")} diff --git a/R/naive_Bayes.R b/R/naive_Bayes.R index 347eb1a07..6a94ede89 100644 --- a/R/naive_Bayes.R +++ b/R/naive_Bayes.R @@ -3,7 +3,7 @@ #' @description #' #' `naive_Bayes()` defines a model that uses Bayes' theorem to compute the -#' probability of each class, given the predictor values. The function can fit +#' probability of each class, given the predictor values. This function can fit #' classification models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("naive_Bayes")} diff --git a/R/nullmodel.R b/R/nullmodel.R index 6b7c88beb..85de11a08 100644 --- a/R/nullmodel.R +++ b/R/nullmodel.R @@ -128,7 +128,7 @@ predict.nullmodel <- function (object, new_data = NULL, type = NULL, ...) { #' Null model #' #' `null_model()` defines a simple, non-informative model. It doesn't have any -#' main arguments. The function can fit classification and regression models. +#' main arguments. This function can fit classification and regression models. #' #' @inheritParams boost_tree #' @details The model can be created using the `fit()` function using the diff --git a/R/pls.R b/R/pls.R index e1bf1d089..2d2a2c6c6 100644 --- a/R/pls.R +++ b/R/pls.R @@ -3,7 +3,7 @@ #' @description #' `pls()` defines a partial least squares model that uses latent variables to #' model the data. It is similar to a supervised version of principal component. -#' The function can fit classification and regression models. +#' This function can fit classification and regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("pls")} #' diff --git a/R/poisson_reg.R b/R/poisson_reg.R index 616b11b2b..7ecdd02c0 100644 --- a/R/poisson_reg.R +++ b/R/poisson_reg.R @@ -3,7 +3,7 @@ #' @description #' #' `poisson_reg()` defines a generalized linear model for count data that follow -#' a Poisson distribution. The function can fit regression models. +#' a Poisson distribution. This function can fit regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("poisson_reg")} #' diff --git a/R/poisson_reg_gee.R b/R/poisson_reg_gee.R index 0d8e31a42..0472abc39 100644 --- a/R/poisson_reg_gee.R +++ b/R/poisson_reg_gee.R @@ -1,7 +1,7 @@ #' Poisson regression via generalized estimating equations (GEE) #' #' `gee::gee()` uses generalized least squares to fit different types of models -#' that have errors that are not independent. +#' with errors that are not independent. #' #' @includeRmd man/rmd/poisson_reg_gee.md details #' diff --git a/R/poisson_reg_glmer.R b/R/poisson_reg_glmer.R index 2f1f3d6df..c12ad1f6e 100644 --- a/R/poisson_reg_glmer.R +++ b/R/poisson_reg_glmer.R @@ -1,6 +1,6 @@ #' Poisson regression via mixed models #' -#' The `glmer` engine estimates fixed and random effect regression parameters +#' The `"glmer"` engine estimates fixed and random effect regression parameters #' using maximum likelihood (or restricted maximum likelihood) estimation. #' #' @includeRmd man/rmd/poisson_reg_glmer.md details diff --git a/R/poisson_reg_stan_glmer.R b/R/poisson_reg_stan_glmer.R index 0edc2249c..4a7732e57 100644 --- a/R/poisson_reg_stan_glmer.R +++ b/R/poisson_reg_stan_glmer.R @@ -1,6 +1,6 @@ #' Poisson regression via hierarchical Bayesian methods #' -#' The `stan_glmer` engine estimates hierarchical regression parameters using +#' The `"stan_glmer"` engine estimates hierarchical regression parameters using #' Bayesian estimation. #' #' @includeRmd man/rmd/poisson_reg_stan_glmer.md details diff --git a/R/proportional_hazards.R b/R/proportional_hazards.R index 5d8e2b7a6..695f91717 100644 --- a/R/proportional_hazards.R +++ b/R/proportional_hazards.R @@ -1,9 +1,9 @@ #' Proportional hazards regression #' #' @description -#' `proportional_hazards()` defines a technique that models the hazard function -#' as a multiplicative function of covariates times a baseline hazard. The -#' function can fit censored regression models. +#' `proportional_hazards()` defines a model for the hazard function +#' as a multiplicative function of covariates times a baseline hazard. This +#' function can fit censored regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("proportional_hazards")} #' diff --git a/R/rule_fit_xrf.R b/R/rule_fit_xrf.R index 71e3d4f01..c72efb1fe 100644 --- a/R/rule_fit_xrf.R +++ b/R/rule_fit_xrf.R @@ -1,7 +1,7 @@ #' RuleFit models via xrf #' #' [xrf::xrf()] fits a model that derives simple feature rules from a tree -#' ensemble and uses them as features to a regularized model. [rules::xrf_fit()] +#' ensemble and uses the rules as features to a regularized model. [rules::xrf_fit()] #' is a wrapper around this function. #' #' @includeRmd man/rmd/rule_fit_xrf.md details diff --git a/R/svm_poly.R b/R/svm_poly.R index 2ba82ce36..bb5c0ce06 100644 --- a/R/svm_poly.R +++ b/R/svm_poly.R @@ -3,10 +3,10 @@ #' @description #' #' `svm_poly()` defines a support vector machine model. For classification, -#' the model tries to maximize the width of the margin between classes (using a -#' polynomial class boundary). For regression, the model optimizes a robust loss -#' function that is only affected by very large model residuals (via polynomial -#' functions of the predictors). The function can fit classification and +#' the model tries to maximize the width of the margin between classes using a +#' polynomial class boundary. For regression, the model optimizes a robust loss +#' function that is only affected by very large model residuals and uses polynomial +#' functions of the predictors. This function can fit classification and #' regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("svm_poly")} diff --git a/man-roxygen/spec-references.R b/man-roxygen/spec-references.R index e47ceda3e..a55519947 100644 --- a/man-roxygen/spec-references.R +++ b/man-roxygen/spec-references.R @@ -1 +1 @@ -#' @references \url{https://www.tidymodels.org}, [_Tidy Modeling with R_](https://www.tmwr.org/), [searchable list of parsnip models](https://www.tidymodels.org/find/parsnip/) +#' @references \url{https://www.tidymodels.org}, [_Tidy Modeling with R_](https://www.tmwr.org/), [searchable table of parsnip models](https://www.tidymodels.org/find/parsnip/) diff --git a/man/rmd/C5_rules_C5.0.Rmd b/man/rmd/C5_rules_C5.0.Rmd index 7f9d33d5f..2db867139 100644 --- a/man/rmd/C5_rules_C5.0.Rmd +++ b/man/rmd/C5_rules_C5.0.Rmd @@ -22,7 +22,7 @@ This model has `r nrow(param)` tuning parameters: param$item ``` -Note that C5.0 has a tool for _early stopping_ during boosting where less iterations of boosting are performed than the number requested. `C5_rules()` turns this feature off (although it can be re-enabled using [C50::C5.0Control()]). +Note that C5.0 has a tool for _early stopping_ during boosting where less iterations of boosting are performed than the number requested. `C5_rules()` turns this feature off (although it can be re-enabled using [C50::C5.0Control()]). ## Translation from parsnip to the underlying model call (classification) diff --git a/man/rmd/decision_tree_party.Rmd b/man/rmd/decision_tree_party.Rmd index 9beac0bc6..06fa585fe 100644 --- a/man/rmd/decision_tree_party.Rmd +++ b/man/rmd/decision_tree_party.Rmd @@ -25,7 +25,7 @@ param$item The `tree_depth` parameter defaults to `0` which means no restrictions are applied to tree depth. -An engine specific parameter for this model is: +An engine-specific parameter for this model is: * `mtry`: the number of predictors, selected at random, that are evaluated for splitting. The default is to use all predictors. diff --git a/man/rmd/discrim_flexible_earth.Rmd b/man/rmd/discrim_flexible_earth.Rmd index e09e2fdbe..76a631b65 100644 --- a/man/rmd/discrim_flexible_earth.Rmd +++ b/man/rmd/discrim_flexible_earth.Rmd @@ -13,7 +13,7 @@ defaults <- param <- discrim_flexible() %>% set_engine("earth") %>% -make_parameter_list(defaults) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameter: diff --git a/man/rmd/discrim_linear_mda.Rmd b/man/rmd/discrim_linear_mda.Rmd index e931a4617..9430e2fa1 100644 --- a/man/rmd/discrim_linear_mda.Rmd +++ b/man/rmd/discrim_linear_mda.Rmd @@ -14,7 +14,7 @@ defaults <- param <- discrim_linear() %>% set_engine("mda") %>% -make_parameter_list(defaults) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameter: diff --git a/man/rmd/discrim_linear_sparsediscrim.Rmd b/man/rmd/discrim_linear_sparsediscrim.Rmd index 5b6d3bd5d..cdd4c12c7 100644 --- a/man/rmd/discrim_linear_sparsediscrim.Rmd +++ b/man/rmd/discrim_linear_sparsediscrim.Rmd @@ -13,7 +13,7 @@ defaults <- param <- discrim_linear() %>% set_engine("sparsediscrim") %>% -make_parameter_list(defaults) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameter: diff --git a/man/rmd/discrim_quad_sparsediscrim.Rmd b/man/rmd/discrim_quad_sparsediscrim.Rmd index 2d8c6aaa0..17f1e4973 100644 --- a/man/rmd/discrim_quad_sparsediscrim.Rmd +++ b/man/rmd/discrim_quad_sparsediscrim.Rmd @@ -13,7 +13,7 @@ defaults <- param <- discrim_quad() %>% set_engine("sparsediscrim") %>% -make_parameter_list(defaults) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameter: diff --git a/man/rmd/discrim_regularized_klaR.Rmd b/man/rmd/discrim_regularized_klaR.Rmd index cc1f0e836..582491bc8 100644 --- a/man/rmd/discrim_regularized_klaR.Rmd +++ b/man/rmd/discrim_regularized_klaR.Rmd @@ -14,7 +14,7 @@ defaults <- param <- discrim_regularized() %>% set_engine("klaR") %>% -make_parameter_list(defaults) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameter: diff --git a/man/rmd/example_mlm.Rmd b/man/rmd/example_mlm.Rmd index a84188ba0..be6054cde 100644 --- a/man/rmd/example_mlm.Rmd +++ b/man/rmd/example_mlm.Rmd @@ -1,13 +1,13 @@ ```{r quiet-load, include = FALSE} library(tidymodels) library(multilevelmod) -library(poissonreg) # current required for poisson_reg() +library(poissonreg) # contains engines for poisson_reg() ``` ```{r, message = FALSE, warning = FALSE} library(tidymodels) library(multilevelmod) -library(poissonreg) # current required for poisson_reg() +library(poissonreg) # contains engines for poisson_reg() # The lme4 package is required for this model. diff --git a/man/rmd/linear_reg_gee.Rmd b/man/rmd/linear_reg_gee.Rmd index 5a54c7e9f..90b721d75 100644 --- a/man/rmd/linear_reg_gee.Rmd +++ b/man/rmd/linear_reg_gee.Rmd @@ -5,7 +5,7 @@ ## Tuning Parameters -This model has no formal tuning parameters. It might be beneficial to determine the appropriate correlation structure to use. However, this typically does not affect the predicted value of the model but does have an effect on the inferential results and parameter covariance values. +This model has no formal tuning parameters. It may be beneficial to determine the appropriate correlation structure to use, but this typically does not affect the predicted value of the model. It _does_ have an effect on the inferential results and parameter covariance values. ## Translation from parsnip to the original package @@ -20,7 +20,7 @@ linear_reg() %>% translate() ``` -`multilevelmod::gee_fit()` is a wrapper model around `gee()`. +`multilevelmod::gee_fit()` is a wrapper model around `gee::gee()`. ## Preprocessing requirements @@ -31,13 +31,13 @@ There are no specific preprocessing needs. However, it is helpful to keep the cl The model cannot accept case weights. -Both `gee:gee(a)` and `gee:geepack()` specifies the id/cluster variable using an argument `id` that requires a vector. parsnip doesn't work that way so we enable this model to be fit using a artificial function called `id_var()` to be used in the formula. So, in the original package, the call would look like: +Both `gee:gee()` and `gee:geepack()` specify the id/cluster variable using an argument `id` that requires a vector. parsnip doesn't work that way so we enable this model to be fit using a artificial function `id_var()` to be used in the formula. So, in the original package, the call would look like: ```r gee(breaks ~ tension, id = wool, data = warpbreaks, corstr = "exchangeable") ``` -With `parsnip`, we suggest using the formula method when fitting: +With parsnip, we suggest using the formula method when fitting: ```r library(tidymodels) @@ -47,7 +47,7 @@ linear_reg() %>% fit(breaks ~ tension + id_var(wool), data = warpbreaks) ``` -When using the general tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the GEE formula when adding the model: +When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the GEE formula when adding the model: ```r library(tidymodels) @@ -65,7 +65,7 @@ gee_wflow <- fit(gee_wflow, data = warpbreaks) ``` -`gee()` always prints out warnings and output even when `silent = TRUE`. When using the `gee` engine, it will never produce output, even if `silent = FALSE`. +`gee()` always prints out warnings and output even when `silent = TRUE`. When using the `"gee"` engine, it will never produce output, even if `silent = FALSE`. Also, because of issues with the `gee()` function, a supplementary call to `glm()` is needed to get the rank and QR decomposition objects so that `predict()` can be used. diff --git a/man/rmd/linear_reg_gls.Rmd b/man/rmd/linear_reg_gls.Rmd index cb5334466..e58da313e 100644 --- a/man/rmd/linear_reg_gls.Rmd +++ b/man/rmd/linear_reg_gls.Rmd @@ -29,7 +29,7 @@ There are no specific preprocessing needs. However, it is helpful to keep the cl The model can accept case weights. -With `parsnip`, we suggest using the _fixed effects_ formula method when fitting but the details of the correlation structure should be passed to `set_engine()` since it is an irregular (but required) argument: +With parsnip, we suggest using the _fixed effects_ formula method when fitting, but the details of the correlation structure should be passed to `set_engine()` since it is an irregular (but required) argument: ```{r} library(tidymodels) @@ -43,7 +43,7 @@ linear_reg() %>% fit(depr_score ~ week, data = riesby) ``` -When using the general tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: +When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: ```r library(tidymodels) @@ -65,8 +65,8 @@ fit(gls_wflow, data = riesby) Note that [nlme::lme()] and [nlme::gls()] can fit the same model but will count degrees of freedom differently. If there are `n` data points, `p` fixed effects parameters, and `q` random effect parameters, the residual degrees of freedom are: -* `lme`: n - p - q -* `gls`: n - p +* **lme**: n - p - q +* **gls**: n - p As a result, p-values will be different. For example, we can fit the same model using different estimation methods (assuming a positive covariance value): diff --git a/man/rmd/linear_reg_lme.Rmd b/man/rmd/linear_reg_lme.Rmd index 7f4eb5071..63df4ab93 100644 --- a/man/rmd/linear_reg_lme.Rmd +++ b/man/rmd/linear_reg_lme.Rmd @@ -31,7 +31,7 @@ There are no specific preprocessing needs. However, it is helpful to keep the cl The model can accept case weights. -With `parsnip`, we suggest using the _fixed effects_ formula method when fitting but the random effects formula should be passed to `set_engine()` since it is an irregular (but required) argument: +With parsnip, we suggest using the _fixed effects_ formula method when fitting, but the random effects formula should be passed to `set_engine()` since it is an irregular (but required) argument: ```r library(tidymodels) @@ -42,7 +42,7 @@ linear_reg() %>% fit(depr_score ~ week, data = riesby) ``` -When using the general tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: +When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: ```r library(tidymodels) diff --git a/man/rmd/linear_reg_lmer.Rmd b/man/rmd/linear_reg_lmer.Rmd index 742a2da8e..976956223 100644 --- a/man/rmd/linear_reg_lmer.Rmd +++ b/man/rmd/linear_reg_lmer.Rmd @@ -31,7 +31,7 @@ There are no specific preprocessing needs. However, it is helpful to keep the cl The model can accept case weights. -With `parsnip`, we suggest using the formula method when fitting: +With parsnip, we suggest using the formula method when fitting: ```r library(tidymodels) @@ -42,7 +42,7 @@ linear_reg() %>% fit(depr_score ~ week + (1|subject), data = riesby) ``` -When using the general tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: +When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: ```r library(tidymodels) diff --git a/man/rmd/linear_reg_stan_glmer.Rmd b/man/rmd/linear_reg_stan_glmer.Rmd index 787618b2b..b2d76f8c7 100644 --- a/man/rmd/linear_reg_stan_glmer.Rmd +++ b/man/rmd/linear_reg_stan_glmer.Rmd @@ -11,7 +11,7 @@ This model has no tuning parameters. Some relevant arguments that can be passed to `set_engine()`: - * `chains`: A positive integer specifying the number of Markov chains. The default is 4. + * `chains`: A positive integer specifying the number of Markov chains. The default is 4. * `iter`: A positive integer specifying the number of iterations for each chain (including warmup). The default is 2000. * `seed`: The seed for random number generation. * `cores`: Number of cores to use when executing the chains in parallel. @@ -44,7 +44,7 @@ There are no specific preprocessing needs. However, it is helpful to keep the cl The model can accept case weights. -With `parsnip`, we suggest using the formula method when fitting: +With parsnip, we suggest using the formula method when fitting: ```r library(tidymodels) @@ -55,7 +55,7 @@ linear_reg() %>% fit(depr_score ~ week + (1|subject), data = riesby) ``` -When using the general tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: +When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: ```r library(tidymodels) @@ -73,7 +73,7 @@ glmer_wflow <- fit(glmer_wflow, data = riesby) ``` -For prediction, the `stan_glmer` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome and when `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. +For prediction, the `"stan_glmer"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. ## References diff --git a/man/rmd/logistic_reg_gee.Rmd b/man/rmd/logistic_reg_gee.Rmd index d2603ad7f..588700a76 100644 --- a/man/rmd/logistic_reg_gee.Rmd +++ b/man/rmd/logistic_reg_gee.Rmd @@ -5,7 +5,7 @@ ## Tuning Parameters -This model has no formal tuning parameters. It might be beneficial to determine the appropriate correlation structure to use. However, this typically does not affect the predicted value of the model but does have an effect on the inferential results and parameter covariance values. +This model has no formal tuning parameters. It may be beneficial to determine the appropriate correlation structure to use, but this typically does not affect the predicted value of the model. It _does_ have an effect on the inferential results and parameter covariance values. ## Translation from parsnip to the original package @@ -19,7 +19,7 @@ logistic_reg() %>% translate() ``` -`multilevelmod::gee_fit()` is a wrapper model around `gee()`. +`multilevelmod::gee_fit()` is a wrapper model around `gee::gee()`. ## Preprocessing requirements @@ -30,7 +30,7 @@ There are no specific preprocessing needs. However, it is helpful to keep the cl The model cannot accept case weights. -Both `gee:gee(a)` and `gee:geepack()` specifies the id/cluster variable using an argument `id` that requires a vector. parsnip doesn't work that way so we enable this model to be fit using a artificial function called `id_var()` to be used in the formula. So, in the original package, the call would look like: +Both `gee:gee()` and `gee:geepack()` specify the id/cluster variable using an argument `id` that requires a vector. parsnip doesn't work that way so we enable this model to be fit using a artificial function `id_var()` to be used in the formula. So, in the original package, the call would look like: ```r gee(breaks ~ tension, id = wool, data = warpbreaks, corstr = "exchangeable") @@ -47,7 +47,7 @@ logistic_reg() %>% fit(outcome ~ treatment * visit + id_var(patientID), data = toenail) ``` -When using the general tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the GEE formula when adding the model: +When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the GEE formula when adding the model: ```r library(tidymodels) diff --git a/man/rmd/logistic_reg_glmer.Rmd b/man/rmd/logistic_reg_glmer.Rmd index 3f86442e2..4951fcf2c 100644 --- a/man/rmd/logistic_reg_glmer.Rmd +++ b/man/rmd/logistic_reg_glmer.Rmd @@ -30,7 +30,7 @@ There are no specific preprocessing needs. However, it is helpful to keep the cl The model can accept case weights. -With `parsnip`, we suggest using the formula method when fitting: +With parsnip, we suggest using the formula method when fitting: ```r library(tidymodels) @@ -41,7 +41,7 @@ logistic_reg() %>% fit(outcome ~ treatment * visit + (1 | patientID), data = toenail) ``` -When using the general tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: +When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: ```r library(tidymodels) diff --git a/man/rmd/logistic_reg_stan_glmer.Rmd b/man/rmd/logistic_reg_stan_glmer.Rmd index 4617eabdd..0c516680d 100644 --- a/man/rmd/logistic_reg_stan_glmer.Rmd +++ b/man/rmd/logistic_reg_stan_glmer.Rmd @@ -43,7 +43,7 @@ There are no specific preprocessing needs. However, it is helpful to keep the cl The model can accept case weights. -With `parsnip`, we suggest using the formula method when fitting: +With parsnip, we suggest using the formula method when fitting: ```r library(tidymodels) @@ -54,7 +54,7 @@ logistic_reg() %>% fit(outcome ~ treatment * visit + (1 | patientID), data = toenail) ``` -When using the general tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: +When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: ```r library(tidymodels) @@ -72,7 +72,7 @@ glmer_wflow <- fit(glmer_wflow, data = toenail) ``` -For prediction, the `stan_glmer` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome and when `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. +For prediction, the `"stan_glmer"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. ## References diff --git a/man/rmd/naive_Bayes_klaR.Rmd b/man/rmd/naive_Bayes_klaR.Rmd index 05eba966f..e1669e977 100644 --- a/man/rmd/naive_Bayes_klaR.Rmd +++ b/man/rmd/naive_Bayes_klaR.Rmd @@ -14,7 +14,7 @@ defaults <- param <- naive_Bayes() %>% set_engine("klaR") %>% -make_parameter_list(defaults) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameter: diff --git a/man/rmd/naive_Bayes_naivebayes.Rmd b/man/rmd/naive_Bayes_naivebayes.Rmd index 73291a43c..dc02f66e3 100644 --- a/man/rmd/naive_Bayes_naivebayes.Rmd +++ b/man/rmd/naive_Bayes_naivebayes.Rmd @@ -14,7 +14,7 @@ defaults <- param <- naive_Bayes() %>% set_engine("naivebayes") %>% -make_parameter_list(defaults) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameter: @@ -39,7 +39,7 @@ naive_Bayes(smoothness = numeric(0), Laplace = numeric(0)) %>% The columns for qualitative predictors should always be represented as factors (as opposed to dummy/indicator variables). When the predictors are factors, the underlying code treats them as multinomial data and appropriately computes their conditional distributions. -For count data, integers can be estimated using a Poisson distribution if the augment `usepoisson = TRUE` is passed as an engine argument. +For count data, integers can be estimated using a Poisson distribution if the argument `usepoisson = TRUE` is passed as an engine argument. ```{r child = "template-zv.Rmd"} ``` diff --git a/man/rmd/pls_mixOmics.Rmd b/man/rmd/pls_mixOmics.Rmd index c9725279d..b4d31800c 100644 --- a/man/rmd/pls_mixOmics.Rmd +++ b/man/rmd/pls_mixOmics.Rmd @@ -37,7 +37,7 @@ pls(num_comp = integer(1), predictor_prop = double(1)) %>% translate() ``` -[plsmod::pls_fit()] is a function that +[plsmod::pls_fit()] is a function that: - Determines the number of predictors in the data. - Adjusts `num_comp` if the value is larger than the number of factors. diff --git a/man/rmd/poisson_reg_gee.Rmd b/man/rmd/poisson_reg_gee.Rmd index a8d8a3b2e..0c80fcd17 100644 --- a/man/rmd/poisson_reg_gee.Rmd +++ b/man/rmd/poisson_reg_gee.Rmd @@ -5,7 +5,7 @@ ## Tuning Parameters -This model has no formal tuning parameters. It might be beneficial to determine the appropriate correlation structure to use. However, this typically does not affect the predicted value of the model but does have an effect on the inferential results and parameter covariance values. +This model has no formal tuning parameters. It may be beneficial to determine the appropriate correlation structure to use, but this typically does not affect the predicted value of the model. It _does_ have an effect on the inferential results and parameter covariance values. ## Translation from parsnip to the original package @@ -30,13 +30,13 @@ There are no specific preprocessing needs. However, it is helpful to keep the cl The model cannot accept case weights. -Both `gee:gee(a)` and `gee:geepack()` specifies the id/cluster variable using an argument `id` that requires a vector. parsnip doesn't work that way so we enable this model to be fit using a artificial function called `id_var()` to be used in the formula. So, in the original package, the call would look like: +Both `gee:gee()` and `gee:geepack()` specify the id/cluster variable using an argument `id` that requires a vector. parsnip doesn't work that way so we enable this model to be fit using a artificial function `id_var()` to be used in the formula. So, in the original package, the call would look like: ```r gee(breaks ~ tension, id = wool, data = warpbreaks, corstr = "exchangeable") ``` -With `parsnip`, we suggest using the formula method when fitting: +With parsnip, we suggest using the formula method when fitting: ```r library(tidymodels) @@ -46,7 +46,7 @@ poisson_reg() %>% fit(y ~ time + x + id_var(subject), data = longitudinal_counts) ``` -When using the general tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the GEE formula when adding the model: +When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the GEE formula when adding the model: ```r library(tidymodels) diff --git a/man/rmd/poisson_reg_glmer.Rmd b/man/rmd/poisson_reg_glmer.Rmd index b7c65ce3b..d330b2f33 100644 --- a/man/rmd/poisson_reg_glmer.Rmd +++ b/man/rmd/poisson_reg_glmer.Rmd @@ -30,7 +30,7 @@ There are no specific preprocessing needs. However, it is helpful to keep the cl The model can accept case weights. -With `parsnip`, we suggest using the formula method when fitting: +With parsnip, we suggest using the formula method when fitting: ```r library(tidymodels) @@ -40,7 +40,7 @@ poisson_reg() %>% fit(y ~ time + x + (1 | subject), data = longitudinal_counts) ``` -When using the general tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: +When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: ```r library(tidymodels) diff --git a/man/rmd/poisson_reg_glmnet.Rmd b/man/rmd/poisson_reg_glmnet.Rmd index ffe7bd20e..237dae9b1 100644 --- a/man/rmd/poisson_reg_glmnet.Rmd +++ b/man/rmd/poisson_reg_glmnet.Rmd @@ -45,6 +45,5 @@ poisson_reg(penalty = double(1), mixture = double(1)) %>% ```{r child = "template-same-scale.Rmd"} ``` - By default, `glmnet::glmnet()` uses the argument `standardize = TRUE` to center and scale the data. diff --git a/man/rmd/poisson_reg_stan.Rmd b/man/rmd/poisson_reg_stan.Rmd index b1166d615..d82a3d9d9 100644 --- a/man/rmd/poisson_reg_stan.Rmd +++ b/man/rmd/poisson_reg_stan.Rmd @@ -41,7 +41,7 @@ Note that the `refresh` default prevents logging of the estimation process. Chan ## Other details -For prediction, the `"stan"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome and when `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. +For prediction, the `"stan"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. ## Examples diff --git a/man/rmd/poisson_reg_stan_glmer.Rmd b/man/rmd/poisson_reg_stan_glmer.Rmd index 9002a2a58..f912e1d21 100644 --- a/man/rmd/poisson_reg_stan_glmer.Rmd +++ b/man/rmd/poisson_reg_stan_glmer.Rmd @@ -43,7 +43,7 @@ There are no specific preprocessing needs. However, it is helpful to keep the cl The model can accept case weights. -With `parsnip`, we suggest using the formula method when fitting: +With parsnip, we suggest using the formula method when fitting: ```r library(tidymodels) @@ -53,7 +53,7 @@ poisson_reg() %>% fit(y ~ time + x + (1 | subject), data = longitudinal_counts) ``` -When using the general tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: +When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: ```r library(tidymodels) @@ -71,7 +71,7 @@ glmer_wflow <- fit(glmer_wflow, data = longitudinal_counts) ``` -For prediction, the `stan_glmer` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome and when `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. +For prediction, the `"stan_glmer"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. ## References diff --git a/man/rmd/proportional_hazards_glmnet.Rmd b/man/rmd/proportional_hazards_glmnet.Rmd index bd0ae7126..2729003bb 100644 --- a/man/rmd/proportional_hazards_glmnet.Rmd +++ b/man/rmd/proportional_hazards_glmnet.Rmd @@ -54,7 +54,7 @@ The model does not fit an intercept. [glmnet::glmnet()] does not use the formula interface but, for consistency, this package requires a model formula. -The model formula can include _special_ terms, such as [survival::strata()]. The allows the baseline hazard to differ between groups contained in the function. The column used inside `strata()` is treated as qualitative no matter its type. This is different than the syntax offered by the [glmnet::glmnet()] package (i.e., [glmnet::stratifySurv()]) which is mot recommended here. +The model formula can include _special_ terms, such as [survival::strata()]. This allows the baseline hazard to differ between groups contained in the function. The column used inside `strata()` is treated as qualitative no matter its type. This is different than the syntax offered by the [glmnet::glmnet()] package (i.e., [glmnet::stratifySurv()]) which is not recommended here. For example, in this model, the numeric column `rx` is used to estimate two different baseline hazards for each value of the column: diff --git a/man/rmd/rule_fit_xrf.Rmd b/man/rmd/rule_fit_xrf.Rmd index 206744b2f..b59ae5141 100644 --- a/man/rmd/rule_fit_xrf.Rmd +++ b/man/rmd/rule_fit_xrf.Rmd @@ -73,13 +73,13 @@ rule_fit( Note that, per the documentation in `?xrf`, transformations of the response variable are not supported. To use these with `rule_fit()`, we recommend using a recipe instead of the formula method. -Also, there are several configuration differences in how `xrf()` is fit between that package and the wrapper used in `rules`. Some differences in default values are: +Also, there are several configuration differences in how `xrf()` is fit between that package and the wrapper used in **rules**. Some differences in default values are: - `trees`: `xrf`: 100, `rules`: 15 - `max_depth`: `xrf`: 3, `rules`: 6 -These differences will create a difference in the values of the `penalty` argument that `glmnet` uses. Also, \pkg{rules} can also set `penalty` whereas \pkg{xrf} uses an internal 5-fold cross-validation to determine it (by default). +These differences will create a disparity in the values of the `penalty` argument that **glmnet** uses. Also, **rules** can also set `penalty` whereas **xrf** uses an internal 5-fold cross-validation to determine it (by default). ## Preprocessing requirements diff --git a/man/rmd/surv_reg_flexsurv.Rmd b/man/rmd/surv_reg_flexsurv.Rmd index 967d62103..0fd521f4a 100644 --- a/man/rmd/surv_reg_flexsurv.Rmd +++ b/man/rmd/surv_reg_flexsurv.Rmd @@ -14,7 +14,7 @@ param <- surv_reg() %>% set_engine("flexsurv") %>% set_mode("regression") %>% - make_parameter_list(defaults) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/surv_reg_survival.Rmd b/man/rmd/surv_reg_survival.Rmd index d9713650d..bb54fe980 100644 --- a/man/rmd/surv_reg_survival.Rmd +++ b/man/rmd/surv_reg_survival.Rmd @@ -14,7 +14,7 @@ param <- surv_reg() %>% set_engine("survival") %>% set_mode("regression") %>% - make_parameter_list(defaults) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: diff --git a/man/rmd/survival_reg_flexsurv.Rmd b/man/rmd/survival_reg_flexsurv.Rmd index b0ed8bc3f..3e244ae26 100644 --- a/man/rmd/survival_reg_flexsurv.Rmd +++ b/man/rmd/survival_reg_flexsurv.Rmd @@ -14,7 +14,7 @@ param <- survival_reg() %>% set_engine("flexsurv") %>% set_mode("censored regression") %>% - make_parameter_list(defaults) + make_parameter_list(defaults) ``` This model has `r nrow(param)` tuning parameters: @@ -40,7 +40,7 @@ survival_reg(dist = character(1)) %>% The main interface for this model uses the formula method since the model specification typically involved the use of [survival::Surv()]. -For this engine, stratification cannot be specified via [`strata()`], please see the documentation of the [`flexsurv`] package for alternative specifications. +For this engine, stratification cannot be specified via [strata()]; please see the documentation of the [flexsurv] package for alternative specifications. ## References From 55259c62444450fcff4e9a5f148678ee25265ed6 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 12 Jan 2022 15:35:09 -0800 Subject: [PATCH 43/65] add install_engine_packages --- DESCRIPTION | 1 + R/install_packages.R | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 R/install_packages.R diff --git a/DESCRIPTION b/DESCRIPTION index 0d7d2589c..8179dd242 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -53,6 +53,7 @@ Suggests: nlme, randomForest, ranger (>= 0.12.0), + remotes, rmarkdown, rpart, sparklyr (>= 1.0.0), diff --git a/R/install_packages.R b/R/install_packages.R new file mode 100644 index 000000000..f1f7cf58c --- /dev/null +++ b/R/install_packages.R @@ -0,0 +1,34 @@ +install_engine_packages <- function(extension = TRUE, + ignore_pkgs = c("stats", "liquidSVM", + "parsnip")) { + bio_pkgs <- c() + + if (extension) { + extensions_packages <- extensions() + repositories <- glue::glue("tidymodels/{extensions_packages}") + + remotes::install_github(repositories) + + extensions_packages <- extensions() + purrr::walk(extensions_packages, library, character.only = TRUE) + bio_pkgs <- c(bio_pkgs, "mixOmics") + } + + engine_packages <- purrr::map_dfr( + ls(envir = get_model_env(), pattern = "_pkgs$"), + get_from_env + ) %>% + dplyr::pull(pkg) %>% + unlist() %>% + unique() %>% + setdiff(ignore_pkgs) %>% + setdiff(bio_pkgs) + + if (extension) { + engine_packages <- setdiff(engine_packages, extensions_packages) + } + + remotes::install_cran(engine_packages) + + remotes::install_bioc(bio_pkgs) +} From 034012d7e9989e8a4913f11553ca1360af21c796 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Thu, 13 Jan 2022 13:33:50 -0800 Subject: [PATCH 44/65] add list_md_problems() to find errors and warnings in engine specific docs --- R/engine_docs.R | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/R/engine_docs.R b/R/engine_docs.R index ca5a7203b..f51a8fbca 100644 --- a/R/engine_docs.R +++ b/R/engine_docs.R @@ -304,3 +304,23 @@ combine_prefix_with_engines <- function(prefix, engines) { glue::glue("{prefix} {engines}") } +# ------------------------------------------------------------------------------ + +#' Locate and show errors/warnings in engine-specific documentation +#' @return A tibble with column `file` for the file name, `line` indicating +#' the line where the error/warning occurred, and `problem` showing the +#' error/warning message. +#' @keywords internal +#' @export +list_md_problems <- function() { + md_files <- list.files("man/rmd", pattern = "\\.md", full.names = TRUE) + + get_errors <- function(file) { + lines <- readLines(file) + line <- grep("## (Error|Warning)", lines) + problem <- lines[line] + tibble(file, line, problem) + } + + purrr::map_dfr(md_files, get_errors) +} From 9ca28863c30e9682a8f31957c95281ea3037f735 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Thu, 13 Jan 2022 13:36:45 -0800 Subject: [PATCH 45/65] document list_md_problems() --- NAMESPACE | 1 + man/list_md_problems.Rd | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 man/list_md_problems.Rd diff --git a/NAMESPACE b/NAMESPACE index 75920e7fa..4ad6b288c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -204,6 +204,7 @@ export(is_varying) export(keras_mlp) export(knit_engine_docs) export(linear_reg) +export(list_md_problems) export(logistic_reg) export(make_call) export(make_classes) diff --git a/man/list_md_problems.Rd b/man/list_md_problems.Rd new file mode 100644 index 000000000..0d07ff357 --- /dev/null +++ b/man/list_md_problems.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/engine_docs.R +\name{list_md_problems} +\alias{list_md_problems} +\title{Locate and show errors/warnings in engine-specific documentation} +\usage{ +list_md_problems() +} +\value{ +A tibble with column \code{file} for the file name, \code{line} indicating +the line where the error/warning occurred, and \code{problem} showing the +error/warning message. +} +\description{ +Locate and show errors/warnings in engine-specific documentation +} +\keyword{internal} From 9616f677598fb004fe88c7860ac01c5f3ac080c8 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 1 Feb 2022 14:32:08 -0500 Subject: [PATCH 46/65] add a few more packages --- R/install_packages.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/install_packages.R b/R/install_packages.R index f1f7cf58c..5c157b486 100644 --- a/R/install_packages.R +++ b/R/install_packages.R @@ -1,4 +1,4 @@ -install_engine_packages <- function(extension = TRUE, +install_engine_packages <- function(extension = TRUE, extras = TRUE, ignore_pkgs = c("stats", "liquidSVM", "parsnip")) { bio_pkgs <- c() @@ -28,6 +28,11 @@ install_engine_packages <- function(extension = TRUE, engine_packages <- setdiff(engine_packages, extensions_packages) } + if (extras) { + rmd_pkgs <- c("tidymodels", "broom.mixed", "glmnet", "Cubist", "xrf", "ape") + engine_packages <- setdiff(engine_packages, rmd_pkgs) + } + remotes::install_cran(engine_packages) remotes::install_bioc(bio_pkgs) From bd9d0e32fefca18b97d3f01c2283c089fd155977 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 1 Feb 2022 14:32:29 -0500 Subject: [PATCH 47/65] add issue summary post-knit --- R/engine_docs.R | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/R/engine_docs.R b/R/engine_docs.R index f51a8fbca..39c845a4b 100644 --- a/R/engine_docs.R +++ b/R/engine_docs.R @@ -1,6 +1,7 @@ #' Knit engine-specific documentation #' @param pattern A regular expression to specify which files to knit. The #' default knits all engine documentation files. +#' @param ... Options passed to [knitr::knit()]. #' @return A tibble with column `file` for the file name and `result` (a #' character vector that echos the output file name or, when there is #' a failure, the error message). @@ -13,13 +14,22 @@ knit_engine_docs <- function(pattern = NULL) { target_exists <- grepl(pattern, rmd_files) files <- rmd_files[target_exists] } else { - files <- rmd_files[!grepl("(template-)|(setup\\.)", rmd_files)] + files <- rmd_files[!grepl("(template-)|(setup\\.)|(aaa\\.)", rmd_files)] } outputs <- gsub("Rmd$", "md", files) res <- purrr::map2(files, outputs, ~ try(knitr::knit(.x, .y), silent = TRUE)) res <- purrr::map_chr(res, as.character) - tibble::tibble(file = basename(files), result = res) + + issues <- list_md_problems() + if (nrow(issues) > 0) { + cat("There are some issues with the help files:\n") + print(issues) + } else { + cat("No issues found in the help files.\n\n") + } + + invisible(tibble::tibble(file = basename(files), result = res)) } # ------------------------------------------------------------------------------ @@ -319,7 +329,7 @@ list_md_problems <- function() { lines <- readLines(file) line <- grep("## (Error|Warning)", lines) problem <- lines[line] - tibble(file, line, problem) + tibble(basename(file), line, problem) } purrr::map_dfr(md_files, get_errors) From a0f310ecc6912cba81a4e4382f1a3b99d9291aea Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 1 Feb 2022 14:33:14 -0500 Subject: [PATCH 48/65] doc update --- man/C5_rules.Rd | 4 +- man/bag_mars.Rd | 4 +- man/bag_tree.Rd | 4 +- man/bart.Rd | 4 +- man/boost_tree.Rd | 4 +- man/cubist_rules.Rd | 4 +- man/decision_tree.Rd | 4 +- man/details_C5_rules_C5.0.Rd | 4 +- man/details_bag_mars_earth.Rd | 5 +- man/details_bag_tree_rpart.Rd | 12 ++--- man/details_bart_dbarts.Rd | 5 +- man/details_boost_tree_C5.0.Rd | 4 +- man/details_cubist_rules_Cubist.Rd | 2 +- man/details_decision_tree_party.Rd | 2 +- man/details_discrim_flexible_earth.Rd | 7 ++- man/details_discrim_linear_MASS.Rd | 5 +- man/details_discrim_linear_mda.Rd | 5 +- man/details_discrim_linear_sda.Rd | 5 +- man/details_discrim_linear_sparsediscrim.Rd | 7 ++- man/details_discrim_quad_MASS.Rd | 5 +- man/details_discrim_quad_sparsediscrim.Rd | 7 ++- man/details_discrim_regularized_klaR.Rd | 5 +- man/details_gen_additive_mod_mgcv.Rd | 5 +- man/details_linear_reg_brulee.Rd | 5 +- man/details_linear_reg_gee.Rd | 25 ++++----- man/details_linear_reg_glm.Rd | 5 +- man/details_linear_reg_glmnet.Rd | 5 +- man/details_linear_reg_gls.Rd | 16 +++--- man/details_linear_reg_keras.Rd | 5 +- man/details_linear_reg_lm.Rd | 5 +- man/details_linear_reg_lme.Rd | 12 ++--- man/details_linear_reg_lmer.Rd | 10 ++-- man/details_linear_reg_spark.Rd | 5 +- man/details_linear_reg_stan.Rd | 5 +- man/details_linear_reg_stan_glmer.Rd | 18 +++---- man/details_logistic_reg_LiblineaR.Rd | 5 +- man/details_logistic_reg_brulee.Rd | 5 +- man/details_logistic_reg_gee.Rd | 21 ++++---- man/details_logistic_reg_glm.Rd | 5 +- man/details_logistic_reg_glmer.Rd | 10 ++-- man/details_logistic_reg_glmnet.Rd | 5 +- man/details_logistic_reg_keras.Rd | 5 +- man/details_logistic_reg_spark.Rd | 5 +- man/details_logistic_reg_stan.Rd | 5 +- man/details_logistic_reg_stan_glmer.Rd | 18 +++---- man/details_mars_earth.Rd | 5 +- man/details_mlp_brulee.Rd | 7 ++- man/details_mlp_keras.Rd | 5 +- man/details_mlp_nnet.Rd | 5 +- man/details_multinom_reg_glmnet.Rd | 5 +- man/details_multinom_reg_keras.Rd | 5 +- man/details_multinom_reg_nnet.Rd | 5 +- man/details_multinom_reg_spark.Rd | 5 +- man/details_naive_Bayes_naivebayes.Rd | 2 +- man/details_nearest_neighbor_kknn.Rd | 5 +- man/details_pls_mixOmics.Rd | 14 +++--- man/details_poisson_reg_gee.Rd | 21 ++++---- man/details_poisson_reg_glm.Rd | 5 +- man/details_poisson_reg_glmer.Rd | 10 ++-- man/details_poisson_reg_glmnet.Rd | 11 ++-- man/details_poisson_reg_hurdle.Rd | 5 +- man/details_poisson_reg_stan.Rd | 7 ++- man/details_poisson_reg_stan_glmer.Rd | 18 +++---- man/details_poisson_reg_zeroinfl.Rd | 5 +- man/details_proportional_hazards_glmnet.Rd | 9 ++-- man/details_rule_fit_xrf.Rd | 17 +++---- man/details_survival_reg_flexsurv.Rd | 4 +- man/details_svm_linear_LiblineaR.Rd | 5 +- man/details_svm_linear_kernlab.Rd | 5 +- man/details_svm_poly_kernlab.Rd | 5 +- man/details_svm_rbf_kernlab.Rd | 5 +- man/discrim_flexible.Rd | 2 +- man/discrim_linear.Rd | 4 +- man/discrim_quad.Rd | 4 +- man/discrim_regularized.Rd | 4 +- man/doc-tools.Rd | 6 +-- man/gen_additive_mod.Rd | 4 +- man/knit_engine_docs.Rd | 2 + man/linear_reg.Rd | 4 +- man/logistic_reg.Rd | 4 +- man/mars.Rd | 4 +- man/mlp.Rd | 4 +- man/multinom_reg.Rd | 4 +- man/naive_Bayes.Rd | 4 +- man/nearest_neighbor.Rd | 2 +- man/null_model.Rd | 2 +- man/pls.Rd | 4 +- man/poisson_reg.Rd | 4 +- man/proportional_hazards.Rd | 8 +-- man/rand_forest.Rd | 2 +- man/rmd/aaa.Rmd | 3 +- man/rmd/linear_reg_gls.Rmd | 2 +- man/rmd/tidy-example.Rmd | 56 --------------------- man/rule_fit.Rd | 2 +- man/surv_reg.Rd | 2 +- man/survival_reg.Rd | 2 +- man/svm_linear.Rd | 2 +- man/svm_poly.Rd | 10 ++-- man/svm_rbf.Rd | 2 +- man/update_model_info_file.Rd | 6 +-- 100 files changed, 284 insertions(+), 383 deletions(-) delete mode 100644 man/rmd/tidy-example.Rmd diff --git a/man/C5_rules.Rd b/man/C5_rules.Rd index c067f38c1..eb29041ab 100644 --- a/man/C5_rules.Rd +++ b/man/C5_rules.Rd @@ -21,7 +21,7 @@ to use for fitting.} } \description{ \code{C5_rules()} defines a model that derives feature rules from a tree for -prediction. A single tree or boosted ensemble can be used. The function can +prediction. A single tree or boosted ensemble can be used. This function can fit classification models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("C5_rules")} @@ -53,7 +53,7 @@ C5_rules() Quinlan R (1993). \emph{C4.5: Programs for Machine Learning}. Morgan Kaufmann Publishers. -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \code{\link[C50:C5.0]{C50::C5.0()}}, \code{\link[C50:C5.0Control]{C50::C5.0Control()}}, diff --git a/man/bag_mars.Rd b/man/bag_mars.Rd index ea8efee77..8e40847f4 100644 --- a/man/bag_mars.Rd +++ b/man/bag_mars.Rd @@ -31,7 +31,7 @@ to use for fitting.} \code{bag_mars()} defines an ensemble of generalized linear models that use artificial features for some predictors. These features resemble hinge functions and the result is a model that is a segmented regression in small -dimensions. The function can fit classification and regression models. +dimensions. This function can fit classification and regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("bag_mars")} @@ -46,7 +46,7 @@ The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model with the data. } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("bag_mars")} diff --git a/man/bag_tree.Rd b/man/bag_tree.Rd index 01c84bed3..eb98fa3d7 100644 --- a/man/bag_tree.Rd +++ b/man/bag_tree.Rd @@ -35,7 +35,7 @@ and one can be used to bias to the second level of the factor.} to use for fitting.} } \description{ -\code{bag_tree()} defines an ensemble of decision trees. The function can fit +\code{bag_tree()} defines an ensemble of decision trees. This function can fit classification, regression, and censored regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("bag_tree")} @@ -51,7 +51,7 @@ The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model with the data. } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("bag_tree")} diff --git a/man/bart.Rd b/man/bart.Rd index 80da4841d..099a7c38d 100644 --- a/man/bart.Rd +++ b/man/bart.Rd @@ -43,7 +43,7 @@ on the logit scale). The default value is 2.} } \description{ \code{bart()} defines a tree ensemble model that uses Bayesian analysis to -assemble the ensemble. The function can fit classification and regression +assemble the ensemble. This function can fit classification and regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("bart")} @@ -90,7 +90,7 @@ prior_test(coef = c(0.05, 0.5, .95), expo = c(1/2, 1, 2)) \%>\% facet_wrap(~ expo) } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("bart")} diff --git a/man/boost_tree.Rd b/man/boost_tree.Rd index 3e1bd4c5a..99bc9d631 100644 --- a/man/boost_tree.Rd +++ b/man/boost_tree.Rd @@ -54,7 +54,7 @@ stopping (specific engines only).} \description{ \code{boost_tree()} defines a model that creates a series of decision trees forming an ensemble. Each tree depends on the results of previous trees. -All trees in the ensemble are combined to produce a final prediction. The +All trees in the ensemble are combined to produce a final prediction. This function can fit classification, regression, and censored regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("boost_tree")} @@ -75,7 +75,7 @@ show_engines("boost_tree") boost_tree(mode = "classification", trees = 20) } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("boost_tree")}, diff --git a/man/cubist_rules.Rd b/man/cubist_rules.Rd index 7fe715af2..cb5dd1af8 100644 --- a/man/cubist_rules.Rd +++ b/man/cubist_rules.Rd @@ -29,7 +29,7 @@ to use for fitting.} } \description{ \code{cubist_rules()} defines a model that derives simple feature rules from a tree -ensemble and creates regression models within each rule. The function can fit +ensemble and creates regression models within each rule. This function can fit regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("cubist_rules")} @@ -76,7 +76,7 @@ The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model with the data. } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} Quinlan R (1992). "Learning with Continuous Classes." Proceedings of the 5th Australian Joint Conference On Artificial Intelligence, pp. diff --git a/man/decision_tree.Rd b/man/decision_tree.Rd index d156ac437..6fa48ff52 100644 --- a/man/decision_tree.Rd +++ b/man/decision_tree.Rd @@ -30,7 +30,7 @@ in a node that are required for the node to be split further.} } \description{ \code{decision_tree()} defines a model as a set of \verb{if/then} statements that -creates a tree-based structure. The function can fit classification, +creates a tree-based structure. This function can fit classification, regression, and censored regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("decision_tree")} @@ -51,7 +51,7 @@ show_engines("decision_tree") decision_tree(mode = "classification", tree_depth = 5) } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("decision_tree")} diff --git a/man/details_C5_rules_C5.0.Rd b/man/details_C5_rules_C5.0.Rd index 8584e9455..134cae562 100644 --- a/man/details_C5_rules_C5.0.Rd +++ b/man/details_C5_rules_C5.0.Rd @@ -4,7 +4,7 @@ \alias{details_C5_rules_C5.0} \title{C5.0 rule-based classification models} \description{ -\code{\link[C50:C5.0]{C50::C5.0()}} fits model that derives feature rules from a tree for +\code{\link[C50:C5.0]{C50::C5.0()}} fits a model that derives feature rules from a tree for prediction. A single tree or boosted ensemble can be used. \code{\link[rules:rules-internal]{rules::c5_fit()}} is a wrapper around this function. } @@ -62,7 +62,7 @@ are not required for this model. \item Quinlan R (1992). “Learning with Continuous Classes.” Proceedings of the 5th Australian Joint Conference On Artificial Intelligence, pp. 343-348. -\item Quinlan R (1993).“Combining Instance-Based and Model-Based +\item Quinlan R (1993).”Combining Instance-Based and Model-Based Learning.” Proceedings of the Tenth International Conference on Machine Learning, pp. 236-243. \item Kuhn M and Johnson K (2013). \emph{Applied Predictive Modeling}. diff --git a/man/details_bag_mars_earth.Rd b/man/details_bag_mars_earth.Rd index 9d1b63095..708512286 100644 --- a/man/details_bag_mars_earth.Rd +++ b/man/details_bag_mars_earth.Rd @@ -81,9 +81,8 @@ bag_mars( Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } \subsection{References}{ diff --git a/man/details_bag_tree_rpart.Rd b/man/details_bag_tree_rpart.Rd index 7db27487e..efb873fbd 100644 --- a/man/details_bag_tree_rpart.Rd +++ b/man/details_bag_tree_rpart.Rd @@ -31,8 +31,8 @@ the second level of the factor. \subsection{Translation from parsnip to the original package (classification)}{ -There are parsnip extension packages required to fit this model to this -mode: \strong{censored}, \strong{baguette}.\if{html}{\out{
}}\preformatted{library(baguette) +There is a parsnip extension package required to fit this model to this +mode: \strong{baguette}.\if{html}{\out{
}}\preformatted{library(baguette) bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% set_engine("rpart") \%>\% @@ -56,8 +56,8 @@ bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1 \subsection{Translation from parsnip to the original package (regression)}{ -There are parsnip extension packages required to fit this model to this -mode: \strong{censored}, \strong{baguette}.\if{html}{\out{
}}\preformatted{library(baguette) +There is a parsnip extension package required to fit this model to this +mode: \strong{baguette}.\if{html}{\out{
}}\preformatted{library(baguette) bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% set_engine("rpart") \%>\% @@ -81,8 +81,8 @@ bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1 \subsection{Translation from parsnip to the original package (censored regression)}{ -There are parsnip extension packages required to fit this model to this -mode: \strong{censored}, \strong{baguette}.\if{html}{\out{
}}\preformatted{library(censored) +There is a parsnip extension package required to fit this model to this +mode: \strong{censored}.\if{html}{\out{
}}\preformatted{library(censored) bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\% set_engine("rpart") \%>\% diff --git a/man/details_bart_dbarts.Rd b/man/details_bart_dbarts.Rd index 44fff3bc1..5fdcccc62 100644 --- a/man/details_bart_dbarts.Rd +++ b/man/details_bart_dbarts.Rd @@ -103,9 +103,8 @@ times number of observations. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. \code{\link[dbarts:bart]{dbarts::bart()}} will also convert the factors to indicators if the user does not create them first. diff --git a/man/details_boost_tree_C5.0.Rd b/man/details_boost_tree_C5.0.Rd index 086c7a91c..1acce68f4 100644 --- a/man/details_boost_tree_C5.0.Rd +++ b/man/details_boost_tree_C5.0.Rd @@ -59,8 +59,8 @@ are not required for this model. By default, early stopping is used. To use the complete set of boosting iterations, pass \code{earlyStopping = FALSE} to -\code{\link[=set_engine]{set_engine()}}. Also, it is unlikely that early -stopping will occur if \code{sample_size = 1}. +\code{\link[=set_engine]{set_engine()}}. Also, it is unlikely that early stopping +will occur if \code{sample_size = 1}. } } diff --git a/man/details_cubist_rules_Cubist.Rd b/man/details_cubist_rules_Cubist.Rd index b56984a82..d022f184d 100644 --- a/man/details_cubist_rules_Cubist.Rd +++ b/man/details_cubist_rules_Cubist.Rd @@ -61,7 +61,7 @@ are not required for this model. \item Quinlan R (1992). “Learning with Continuous Classes.” Proceedings of the 5th Australian Joint Conference On Artificial Intelligence, pp. 343-348. -\item Quinlan R (1993).“Combining Instance-Based and Model-Based +\item Quinlan R (1993).”Combining Instance-Based and Model-Based Learning.” Proceedings of the Tenth International Conference on Machine Learning, pp. 236-243. \item Kuhn M and Johnson K (2013). \emph{Applied Predictive Modeling}. diff --git a/man/details_decision_tree_party.Rd b/man/details_decision_tree_party.Rd index 93a349fac..7f1ed6a49 100644 --- a/man/details_decision_tree_party.Rd +++ b/man/details_decision_tree_party.Rd @@ -20,7 +20,7 @@ This model has 2 tuning parameters: The \code{tree_depth} parameter defaults to \code{0} which means no restrictions are applied to tree depth. -An engine specific parameter for this model is: +An engine-specific parameter for this model is: \itemize{ \item \code{mtry}: the number of predictors, selected at random, that are evaluated for splitting. The default is to use all predictors. diff --git a/man/details_discrim_flexible_earth.Rd b/man/details_discrim_flexible_earth.Rd index b70091fd6..1fbe2806b 100644 --- a/man/details_discrim_flexible_earth.Rd +++ b/man/details_discrim_flexible_earth.Rd @@ -6,7 +6,7 @@ \description{ \code{\link[mda:fda]{mda::fda()}} (in conjunction with \code{\link[earth:earth]{earth::earth()}} can fit a nonlinear discriminant analysis model that uses nonlinear features created using -multivariate adaptive regression splines (MARS). The function can fit +multivariate adaptive regression splines (MARS). This function can fit classification models. } \details{ @@ -56,9 +56,8 @@ discrim_flexible( Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } \subsection{References}{ diff --git a/man/details_discrim_linear_MASS.Rd b/man/details_discrim_linear_MASS.Rd index 428e9d1d3..232a9f7f7 100644 --- a/man/details_discrim_linear_MASS.Rd +++ b/man/details_discrim_linear_MASS.Rd @@ -37,9 +37,8 @@ discrim_linear() \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Variance calculations are used in these computations so \emph{zero-variance} predictors (i.e., with a single unique value) should be eliminated diff --git a/man/details_discrim_linear_mda.Rd b/man/details_discrim_linear_mda.Rd index b26442b43..bdbff23a2 100644 --- a/man/details_discrim_linear_mda.Rd +++ b/man/details_discrim_linear_mda.Rd @@ -43,9 +43,8 @@ discrim_linear(penalty = numeric(0)) \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Variance calculations are used in these computations so \emph{zero-variance} predictors (i.e., with a single unique value) should be eliminated diff --git a/man/details_discrim_linear_sda.Rd b/man/details_discrim_linear_sda.Rd index 7fbc118b3..95506f7e0 100644 --- a/man/details_discrim_linear_sda.Rd +++ b/man/details_discrim_linear_sda.Rd @@ -53,9 +53,8 @@ discrim_linear() \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Variance calculations are used in these computations so \emph{zero-variance} predictors (i.e., with a single unique value) should be eliminated diff --git a/man/details_discrim_linear_sparsediscrim.Rd b/man/details_discrim_linear_sparsediscrim.Rd index 584b5f2ad..275dd538f 100644 --- a/man/details_discrim_linear_sparsediscrim.Rd +++ b/man/details_discrim_linear_sparsediscrim.Rd @@ -4,7 +4,7 @@ \alias{details_discrim_linear_sparsediscrim} \title{Linear discriminant analysis via regularization} \description{ -Functions in the \code{sparsediscrim} package fit different types of linear +Functions in the \pkg{sparsediscrim} package fit different types of linear discriminant analysis model that regularize the estimates (like the mean or covariance). } @@ -57,9 +57,8 @@ discrim_linear(regularization_method = character(0)) \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Variance calculations are used in these computations so \emph{zero-variance} predictors (i.e., with a single unique value) should be eliminated diff --git a/man/details_discrim_quad_MASS.Rd b/man/details_discrim_quad_MASS.Rd index 94d0f9117..ca1e8283d 100644 --- a/man/details_discrim_quad_MASS.Rd +++ b/man/details_discrim_quad_MASS.Rd @@ -37,9 +37,8 @@ discrim_quad() \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Variance calculations are used in these computations within each outcome class. For this reason, \emph{zero-variance} predictors (i.e., with a single diff --git a/man/details_discrim_quad_sparsediscrim.Rd b/man/details_discrim_quad_sparsediscrim.Rd index 9f4df2921..fc9bbef07 100644 --- a/man/details_discrim_quad_sparsediscrim.Rd +++ b/man/details_discrim_quad_sparsediscrim.Rd @@ -4,7 +4,7 @@ \alias{details_discrim_quad_sparsediscrim} \title{Quadratic discriminant analysis via regularization} \description{ -Functions in the \code{sparsediscrim} package fit different types of quadratic +Functions in the \pkg{sparsediscrim} package fit different types of quadratic discriminant analysis model that regularize the estimates (like the mean or covariance). } @@ -55,9 +55,8 @@ discrim_quad(regularization_method = character(0)) \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Variance calculations are used in these computations within each outcome class. For this reason, \emph{zero-variance} predictors (i.e., with a single diff --git a/man/details_discrim_regularized_klaR.Rd b/man/details_discrim_regularized_klaR.Rd index 0626055ea..d30120792 100644 --- a/man/details_discrim_regularized_klaR.Rd +++ b/man/details_discrim_regularized_klaR.Rd @@ -57,9 +57,8 @@ discrim_regularized(frac_identity = numeric(0), frac_common_cov = numeric(0)) \% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Variance calculations are used in these computations within each outcome class. For this reason, \emph{zero-variance} predictors (i.e., with a single diff --git a/man/details_gen_additive_mod_mgcv.Rd b/man/details_gen_additive_mod_mgcv.Rd index 0f71a0f2c..c432bcd1a 100644 --- a/man/details_gen_additive_mod_mgcv.Rd +++ b/man/details_gen_additive_mod_mgcv.Rd @@ -87,9 +87,8 @@ the \code{adjust_deg_free} parameter. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } \subsection{References}{ diff --git a/man/details_linear_reg_brulee.Rd b/man/details_linear_reg_brulee.Rd index 36bc2d425..4df9c8dfb 100644 --- a/man/details_linear_reg_brulee.Rd +++ b/man/details_linear_reg_brulee.Rd @@ -53,9 +53,8 @@ no improvement before stopping. (default: 5L). Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_linear_reg_gee.Rd b/man/details_linear_reg_gee.Rd index 85aa8fc6b..f74fbfd9f 100644 --- a/man/details_linear_reg_gee.Rd +++ b/man/details_linear_reg_gee.Rd @@ -5,16 +5,17 @@ \title{Linear regression via generalized estimating equations (GEE)} \description{ \code{gee::gee()} uses generalized least squares to fit different types of models -that have errors that are not independent. +with errors that are not independent. } \details{ For this engine, there is a single mode: regression \subsection{Tuning Parameters}{ -This model has no formal tuning parameters. It might be beneficial to -determine the appropriate correlation structure to use. However, this -typically does not affect the predicted value of the model but does have -an effect on the inferential results and parameter covariance values. +This model has no formal tuning parameters. It may be beneficial to +determine the appropriate correlation structure to use, but this +typically does not affect the predicted value of the model. It \emph{does} +have an effect on the inferential results and parameter covariance +values. } \subsection{Translation from parsnip to the original package}{ @@ -35,7 +36,7 @@ linear_reg() \%>\% ## family = gaussian) } -\code{multilevelmod::gee_fit()} is a wrapper model around \code{gee()}. +\code{multilevelmod::gee_fit()} is a wrapper model around \code{gee::gee()}. } \subsection{Preprocessing requirements}{ @@ -50,22 +51,22 @@ next section. The model cannot accept case weights. -Both \code{gee:gee(a)} and \code{gee:geepack()} specifies the id/cluster variable +Both \code{gee:gee()} and \code{gee:geepack()} specify the id/cluster variable using an argument \code{id} that requires a vector. parsnip doesn’t work that -way so we enable this model to be fit using a artificial function called +way so we enable this model to be fit using a artificial function \code{id_var()} to be used in the formula. So, in the original package, the call would look like:\if{html}{\out{
}}\preformatted{gee(breaks ~ tension, id = wool, data = warpbreaks, corstr = "exchangeable") }\if{html}{\out{
}} -With \code{parsnip}, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) +With parsnip, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) linear_reg() \%>\% set_engine("gee", corstr = "exchangeable") \%>\% fit(breaks ~ tension + id_var(wool), data = warpbreaks) }\if{html}{\out{
}} -When using the general tidymodels infrastructure, it may be better to -use a workflow. In this case, you can add the appropriate columns using +When using tidymodels infrastructure, it may be better to use a +workflow. In this case, you can add the appropriate columns using \code{add_variables()} then supply the GEE formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) gee_spec <- @@ -82,7 +83,7 @@ fit(gee_wflow, data = warpbreaks) }\if{html}{\out{
}} \code{gee()} always prints out warnings and output even when \code{silent = TRUE}. -When using the \code{gee} engine, it will never produce output, even if +When using the \code{"gee"} engine, it will never produce output, even if \code{silent = FALSE}. Also, because of issues with the \code{gee()} function, a supplementary call diff --git a/man/details_linear_reg_glm.Rd b/man/details_linear_reg_glm.Rd index 0c78b37d1..89e6a18a2 100644 --- a/man/details_linear_reg_glm.Rd +++ b/man/details_linear_reg_glm.Rd @@ -49,9 +49,8 @@ To use a non-default \code{family} and/or \code{link}, pass in as an argument to Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } \subsection{Examples}{ diff --git a/man/details_linear_reg_glmnet.Rd b/man/details_linear_reg_glmnet.Rd index f1f499502..c1caebb96 100644 --- a/man/details_linear_reg_glmnet.Rd +++ b/man/details_linear_reg_glmnet.Rd @@ -46,9 +46,8 @@ see \link{glmnet-details}. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_linear_reg_gls.Rd b/man/details_linear_reg_gls.Rd index 8c2e0dfcc..c80a01770 100644 --- a/man/details_linear_reg_gls.Rd +++ b/man/details_linear_reg_gls.Rd @@ -4,7 +4,7 @@ \alias{details_linear_reg_gls} \title{Linear regression via generalized least squares} \description{ -The \code{gls} engine estimates linear regression for models where the rows of the +The \code{"gls"} engine estimates linear regression for models where the rows of the data are not indpendent. } \details{ @@ -44,9 +44,9 @@ next section. The model can accept case weights. -With \code{parsnip}, we suggest using the \emph{fixed effects} formula method when -fitting but the details of the correlation structure should be passed to -\code{set_engine()} since it is an irregular (but required) argument:\if{html}{\out{
}}\preformatted{library(tidymodels) +With parsnip, we suggest using the \emph{fixed effects} formula method when +fitting, but the details of the correlation structure should be passed +to \code{set_engine()} since it is an irregular (but required) argument:\if{html}{\out{
}}\preformatted{library(tidymodels) # load nlme to be able to use the `cor*()` functions library(nlme) @@ -75,8 +75,8 @@ linear_reg() \%>\% ## Residual standard error: 6.868785 } -When using the general tidymodels infrastructure, it may be better to -use a workflow. In this case, you can add the appropriate columns using +When using tidymodels infrastructure, it may be better to use a +workflow. In this case, you can add the appropriate columns using \code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) gls_spec <- @@ -99,8 +99,8 @@ can fit the same model but will count degrees of freedom differently. If there are \code{n} data points, \code{p} fixed effects parameters, and \code{q} random effect parameters, the residual degrees of freedom are: \itemize{ -\item \code{lme}: n - p - q -\item \code{gls}: n - p +\item \strong{lme}: n - p - q +\item \strong{gls}: n - p } As a result, p-values will be different. For example, we can fit the diff --git a/man/details_linear_reg_keras.Rd b/man/details_linear_reg_keras.Rd index 250fa16c8..4f2630662 100644 --- a/man/details_linear_reg_keras.Rd +++ b/man/details_linear_reg_keras.Rd @@ -43,9 +43,8 @@ single hidden unit. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_linear_reg_lm.Rd b/man/details_linear_reg_lm.Rd index 43b156925..888386139 100644 --- a/man/details_linear_reg_lm.Rd +++ b/man/details_linear_reg_lm.Rd @@ -29,9 +29,8 @@ This engine has no tuning parameters. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } \subsection{Examples}{ diff --git a/man/details_linear_reg_lme.Rd b/man/details_linear_reg_lme.Rd index a5ed1c5ee..900aef0c5 100644 --- a/man/details_linear_reg_lme.Rd +++ b/man/details_linear_reg_lme.Rd @@ -4,7 +4,7 @@ \alias{details_linear_reg_lme} \title{Linear regression via mixed models} \description{ -The \code{lme} engine estimates fixed and random effect regression parameters +The \code{"lme"} engine estimates fixed and random effect regression parameters using maximum likelihood (or restricted maximum likelihood) estimation. } \details{ @@ -38,7 +38,7 @@ This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor (\emph{η}) for a random intercept: -\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}}+\emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} where \emph{i} denotes the \code{i}th independent experimental unit (e.g. subject). When the model has seen subject \code{i}, it can use that @@ -73,8 +73,8 @@ next section. The model can accept case weights. -With \code{parsnip}, we suggest using the \emph{fixed effects} formula method when -fitting but the random effects formula should be passed to +With parsnip, we suggest using the \emph{fixed effects} formula method when +fitting, but the random effects formula should be passed to \code{set_engine()} since it is an irregular (but required) argument:\if{html}{\out{
}}\preformatted{library(tidymodels) data("riesby") @@ -83,8 +83,8 @@ linear_reg() \%>\% fit(depr_score ~ week, data = riesby) }\if{html}{\out{
}} -When using the general tidymodels infrastructure, it may be better to -use a workflow. In this case, you can add the appropriate columns using +When using tidymodels infrastructure, it may be better to use a +workflow. In this case, you can add the appropriate columns using \code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) lme_spec <- diff --git a/man/details_linear_reg_lmer.Rd b/man/details_linear_reg_lmer.Rd index e2ea8de39..c923e0d5b 100644 --- a/man/details_linear_reg_lmer.Rd +++ b/man/details_linear_reg_lmer.Rd @@ -4,7 +4,7 @@ \alias{details_linear_reg_lmer} \title{Linear regression via mixed models} \description{ -The \code{lmer} engine estimates fixed and random effect regression parameters +The \code{"lmer"} engine estimates fixed and random effect regression parameters using maximum likelihood (or restricted maximum likelihood) estimation. } \details{ @@ -38,7 +38,7 @@ This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor (\emph{η}) for a random intercept: -\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}}+\emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} where \emph{i} denotes the \code{i}th independent experimental unit (e.g. subject). When the model has seen subject \code{i}, it can use that @@ -73,7 +73,7 @@ next section. The model can accept case weights. -With \code{parsnip}, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) +With parsnip, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) data("riesby") linear_reg() \%>\% @@ -81,8 +81,8 @@ linear_reg() \%>\% fit(depr_score ~ week + (1|subject), data = riesby) }\if{html}{\out{
}} -When using the general tidymodels infrastructure, it may be better to -use a workflow. In this case, you can add the appropriate columns using +When using tidymodels infrastructure, it may be better to use a +workflow. In this case, you can add the appropriate columns using \code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) lmer_spec <- diff --git a/man/details_linear_reg_spark.Rd b/man/details_linear_reg_spark.Rd index 84c6d09e9..6853f7041 100644 --- a/man/details_linear_reg_spark.Rd +++ b/man/details_linear_reg_spark.Rd @@ -45,9 +45,8 @@ A value of \code{mixture = 1} corresponds to a pure lasso model, while Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_linear_reg_stan.Rd b/man/details_linear_reg_stan.Rd index c05a5e625..9e12761ec 100644 --- a/man/details_linear_reg_stan.Rd +++ b/man/details_linear_reg_stan.Rd @@ -57,9 +57,8 @@ process. Change this value in \code{set_engine()} to show the MCMC logs. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } \subsection{Other details}{ diff --git a/man/details_linear_reg_stan_glmer.Rd b/man/details_linear_reg_stan_glmer.Rd index 119c54b19..32b8b4536 100644 --- a/man/details_linear_reg_stan_glmer.Rd +++ b/man/details_linear_reg_stan_glmer.Rd @@ -4,7 +4,7 @@ \alias{details_linear_reg_stan_glmer} \title{Linear regression via hierarchical Bayesian methods} \description{ -The \code{stan_glmer} engine estimates hierarchical regression parameters using +The \code{"stan_glmer"} engine estimates hierarchical regression parameters using Bayesian estimation. } \details{ @@ -59,7 +59,7 @@ This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor (\emph{η}) for a random intercept: -\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}}+\emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} where \emph{i} denotes the \code{i}th independent experimental unit (e.g. subject). When the model has seen subject \code{i}, it can use that @@ -94,7 +94,7 @@ next section. The model can accept case weights. -With \code{parsnip}, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) +With parsnip, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) data("riesby") linear_reg() \%>\% @@ -102,8 +102,8 @@ linear_reg() \%>\% fit(depr_score ~ week + (1|subject), data = riesby) }\if{html}{\out{
}} -When using the general tidymodels infrastructure, it may be better to -use a workflow. In this case, you can add the appropriate columns using +When using tidymodels infrastructure, it may be better to use a +workflow. In this case, you can add the appropriate columns using \code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) glmer_spec <- @@ -119,10 +119,10 @@ glmer_wflow <- fit(glmer_wflow, data = riesby) }\if{html}{\out{
}} -For prediction, the \code{stan_glmer} engine can compute posterior intervals -analogous to confidence and prediction intervals. In these instances, -the units are the original outcome and when \code{std_error = TRUE}, the -standard deviation of the posterior distribution (or posterior +For prediction, the \code{"stan_glmer"} engine can compute posterior +intervals analogous to confidence and prediction intervals. In these +instances, the units are the original outcome. When \code{std_error = TRUE}, +the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. } diff --git a/man/details_logistic_reg_LiblineaR.Rd b/man/details_logistic_reg_LiblineaR.Rd index a809583ab..57c7a64e1 100644 --- a/man/details_logistic_reg_LiblineaR.Rd +++ b/man/details_logistic_reg_LiblineaR.Rd @@ -50,9 +50,8 @@ parameter estimates. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_logistic_reg_brulee.Rd b/man/details_logistic_reg_brulee.Rd index e8a5e70c5..76680801d 100644 --- a/man/details_logistic_reg_brulee.Rd +++ b/man/details_logistic_reg_brulee.Rd @@ -58,9 +58,8 @@ no improvement before stopping. (default: 5L). Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_logistic_reg_gee.Rd b/man/details_logistic_reg_gee.Rd index b6cee8428..f42648f48 100644 --- a/man/details_logistic_reg_gee.Rd +++ b/man/details_logistic_reg_gee.Rd @@ -5,16 +5,17 @@ \title{Logistic regression via generalized estimating equations (GEE)} \description{ \code{gee::gee()} uses generalized least squares to fit different types of models -that have errors that are not independent. +with errors that are not independent. } \details{ For this engine, there is a single mode: classification \subsection{Tuning Parameters}{ -This model has no formal tuning parameters. It might be beneficial to -determine the appropriate correlation structure to use. However, this -typically does not affect the predicted value of the model but does have -an effect on the inferential results and parameter covariance values. +This model has no formal tuning parameters. It may be beneficial to +determine the appropriate correlation structure to use, but this +typically does not affect the predicted value of the model. It \emph{does} +have an effect on the inferential results and parameter covariance +values. } \subsection{Translation from parsnip to the original package}{ @@ -34,7 +35,7 @@ logistic_reg() \%>\% ## family = binomial) } -\code{multilevelmod::gee_fit()} is a wrapper model around \code{gee()}. +\code{multilevelmod::gee_fit()} is a wrapper model around \code{gee::gee()}. } \subsection{Preprocessing requirements}{ @@ -49,9 +50,9 @@ next section. The model cannot accept case weights. -Both \code{gee:gee(a)} and \code{gee:geepack()} specifies the id/cluster variable +Both \code{gee:gee()} and \code{gee:geepack()} specify the id/cluster variable using an argument \code{id} that requires a vector. parsnip doesn’t work that -way so we enable this model to be fit using a artificial function called +way so we enable this model to be fit using a artificial function \code{id_var()} to be used in the formula. So, in the original package, the call would look like:\if{html}{\out{
}}\preformatted{gee(breaks ~ tension, id = wool, data = warpbreaks, corstr = "exchangeable") }\if{html}{\out{
}} @@ -64,8 +65,8 @@ logistic_reg() \%>\% fit(outcome ~ treatment * visit + id_var(patientID), data = toenail) }\if{html}{\out{
}} -When using the general tidymodels infrastructure, it may be better to -use a workflow. In this case, you can add the appropriate columns using +When using tidymodels infrastructure, it may be better to use a +workflow. In this case, you can add the appropriate columns using \code{add_variables()} then supply the GEE formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) gee_spec <- diff --git a/man/details_logistic_reg_glm.Rd b/man/details_logistic_reg_glm.Rd index 404448d86..755d9cc60 100644 --- a/man/details_logistic_reg_glm.Rd +++ b/man/details_logistic_reg_glm.Rd @@ -49,9 +49,8 @@ To use a non-default \code{family} and/or \code{link}, pass in as an argument to Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } \subsection{Examples}{ diff --git a/man/details_logistic_reg_glmer.Rd b/man/details_logistic_reg_glmer.Rd index f0318a394..73ee741e2 100644 --- a/man/details_logistic_reg_glmer.Rd +++ b/man/details_logistic_reg_glmer.Rd @@ -4,7 +4,7 @@ \alias{details_logistic_reg_glmer} \title{Logistic regression via mixed models} \description{ -The \code{glmer} engine estimates fixed and random effect regression parameters +The \code{"glmer"} engine estimates fixed and random effect regression parameters using maximum likelihood (or restricted maximum likelihood) estimation. } \details{ @@ -37,7 +37,7 @@ This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor (\emph{η}) for a random intercept: -\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}}+\emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} where \emph{i} denotes the \code{i}th independent experimental unit (e.g. subject). When the model has seen subject \code{i}, it can use that @@ -72,7 +72,7 @@ next section. The model can accept case weights. -With \code{parsnip}, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) +With parsnip, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) data("toenail", package = "HSAUR3") logistic_reg() \%>\% @@ -80,8 +80,8 @@ logistic_reg() \%>\% fit(outcome ~ treatment * visit + (1 | patientID), data = toenail) }\if{html}{\out{
}} -When using the general tidymodels infrastructure, it may be better to -use a workflow. In this case, you can add the appropriate columns using +When using tidymodels infrastructure, it may be better to use a +workflow. In this case, you can add the appropriate columns using \code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) glmer_spec <- diff --git a/man/details_logistic_reg_glmnet.Rd b/man/details_logistic_reg_glmnet.Rd index bfd7d30be..c4f0e9ab2 100644 --- a/man/details_logistic_reg_glmnet.Rd +++ b/man/details_logistic_reg_glmnet.Rd @@ -48,9 +48,8 @@ see \link{glmnet-details}. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_logistic_reg_keras.Rd b/man/details_logistic_reg_keras.Rd index 204fd9852..dc4c2dfc2 100644 --- a/man/details_logistic_reg_keras.Rd +++ b/man/details_logistic_reg_keras.Rd @@ -45,9 +45,8 @@ single hidden unit. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_logistic_reg_spark.Rd b/man/details_logistic_reg_spark.Rd index ae0b7916a..899ba1370 100644 --- a/man/details_logistic_reg_spark.Rd +++ b/man/details_logistic_reg_spark.Rd @@ -47,9 +47,8 @@ A value of \code{mixture = 1} corresponds to a pure lasso model, while Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_logistic_reg_stan.Rd b/man/details_logistic_reg_stan.Rd index efd012b1d..a6c87e9a3 100644 --- a/man/details_logistic_reg_stan.Rd +++ b/man/details_logistic_reg_stan.Rd @@ -58,9 +58,8 @@ process. Change this value in \code{set_engine()} to show the MCMC logs. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } \subsection{Other details}{ diff --git a/man/details_logistic_reg_stan_glmer.Rd b/man/details_logistic_reg_stan_glmer.Rd index 348446ce7..fb0e716cf 100644 --- a/man/details_logistic_reg_stan_glmer.Rd +++ b/man/details_logistic_reg_stan_glmer.Rd @@ -4,7 +4,7 @@ \alias{details_logistic_reg_stan_glmer} \title{Logistic regression via hierarchical Bayesian methods} \description{ -The \code{stan_glmer} engine estimates hierarchical regression parameters using +The \code{"stan_glmer"} engine estimates hierarchical regression parameters using Bayesian estimation. } \details{ @@ -58,7 +58,7 @@ This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor (\emph{η}) for a random intercept: -\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}}+\emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} where \emph{i} denotes the \code{i}th independent experimental unit (e.g. subject). When the model has seen subject \code{i}, it can use that @@ -93,7 +93,7 @@ next section. The model can accept case weights. -With \code{parsnip}, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) +With parsnip, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) data("toenail", package = "HSAUR3") logistic_reg() \%>\% @@ -101,8 +101,8 @@ logistic_reg() \%>\% fit(outcome ~ treatment * visit + (1 | patientID), data = toenail) }\if{html}{\out{
}} -When using the general tidymodels infrastructure, it may be better to -use a workflow. In this case, you can add the appropriate columns using +When using tidymodels infrastructure, it may be better to use a +workflow. In this case, you can add the appropriate columns using \code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) glmer_spec <- @@ -118,10 +118,10 @@ glmer_wflow <- fit(glmer_wflow, data = toenail) }\if{html}{\out{
}} -For prediction, the \code{stan_glmer} engine can compute posterior intervals -analogous to confidence and prediction intervals. In these instances, -the units are the original outcome and when \code{std_error = TRUE}, the -standard deviation of the posterior distribution (or posterior +For prediction, the \code{"stan_glmer"} engine can compute posterior +intervals analogous to confidence and prediction intervals. In these +instances, the units are the original outcome. When \code{std_error = TRUE}, +the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. } diff --git a/man/details_mars_earth.Rd b/man/details_mars_earth.Rd index 78a96e09f..d99c15cd2 100644 --- a/man/details_mars_earth.Rd +++ b/man/details_mars_earth.Rd @@ -76,9 +76,8 @@ in \code{\link[=discrim_flexible]{discrim_flexible()}}. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } \subsection{Examples}{ diff --git a/man/details_mlp_brulee.Rd b/man/details_mlp_brulee.Rd index 4b26bc211..efff61b03 100644 --- a/man/details_mlp_brulee.Rd +++ b/man/details_mlp_brulee.Rd @@ -4,7 +4,7 @@ \alias{details_mlp_brulee} \title{Multilayer perceptron via brulee} \description{ -\code{\link[brulee:brulee_mlp]{brulee::brulee_mlp()}} fits a neural networks. +\code{\link[brulee:brulee_mlp]{brulee::brulee_mlp()}} fits a neural network. } \details{ For this engine, there are multiple modes: classification and regression @@ -102,9 +102,8 @@ layer. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_mlp_keras.Rd b/man/details_mlp_keras.Rd index 750f4eb57..681e723d9 100644 --- a/man/details_mlp_keras.Rd +++ b/man/details_mlp_keras.Rd @@ -81,9 +81,8 @@ This model has 5 tuning parameters: Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_mlp_nnet.Rd b/man/details_mlp_nnet.Rd index 7985ff93f..e290d269c 100644 --- a/man/details_mlp_nnet.Rd +++ b/man/details_mlp_nnet.Rd @@ -78,9 +78,8 @@ layer. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_multinom_reg_glmnet.Rd b/man/details_multinom_reg_glmnet.Rd index f35772f3c..e58d12343 100644 --- a/man/details_multinom_reg_glmnet.Rd +++ b/man/details_multinom_reg_glmnet.Rd @@ -47,9 +47,8 @@ see \link{glmnet-details}. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_multinom_reg_keras.Rd b/man/details_multinom_reg_keras.Rd index 97b3a0980..705adaba9 100644 --- a/man/details_multinom_reg_keras.Rd +++ b/man/details_multinom_reg_keras.Rd @@ -44,9 +44,8 @@ single hidden unit. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_multinom_reg_nnet.Rd b/man/details_multinom_reg_nnet.Rd index 721a9a747..868256f25 100644 --- a/man/details_multinom_reg_nnet.Rd +++ b/man/details_multinom_reg_nnet.Rd @@ -40,9 +40,8 @@ For \code{penalty}, the amount of regularization includes only the L2 penalty Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_multinom_reg_spark.Rd b/man/details_multinom_reg_spark.Rd index 61ebcc5ef..798462aa8 100644 --- a/man/details_multinom_reg_spark.Rd +++ b/man/details_multinom_reg_spark.Rd @@ -46,9 +46,8 @@ A value of \code{mixture = 1} corresponds to a pure lasso model, while Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_naive_Bayes_naivebayes.Rd b/man/details_naive_Bayes_naivebayes.Rd index 66ec9e3cd..222dd3c33 100644 --- a/man/details_naive_Bayes_naivebayes.Rd +++ b/man/details_naive_Bayes_naivebayes.Rd @@ -48,7 +48,7 @@ are factors, the underlying code treats them as multinomial data and appropriately computes their conditional distributions. For count data, integers can be estimated using a Poisson distribution -if the augment \code{usepoisson = TRUE} is passed as an engine argument. +if the argument \code{usepoisson = TRUE} is passed as an engine argument. Variance calculations are used in these computations so \emph{zero-variance} predictors (i.e., with a single unique value) should be eliminated diff --git a/man/details_nearest_neighbor_kknn.Rd b/man/details_nearest_neighbor_kknn.Rd index b44049de3..0323a1b47 100644 --- a/man/details_nearest_neighbor_kknn.Rd +++ b/man/details_nearest_neighbor_kknn.Rd @@ -73,9 +73,8 @@ it is not consistent with the actual data dimensions. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_pls_mixOmics.Rd b/man/details_pls_mixOmics.Rd index 85b4b1378..878971706 100644 --- a/man/details_pls_mixOmics.Rd +++ b/man/details_pls_mixOmics.Rd @@ -40,7 +40,7 @@ pls(num_comp = integer(1), predictor_prop = double(1)) \%>\% ## ncomp = integer(1)) } -\code{\link[plsmod:pls_fit]{plsmod::pls_fit()}} is a function that +\code{\link[plsmod:pls_fit]{plsmod::pls_fit()}} is a function that: \itemize{ \item Determines the number of predictors in the data. \item Adjusts \code{num_comp} if the value is larger than the number of @@ -74,19 +74,17 @@ pls(num_comp = integer(1), predictor_prop = double(1)) \%>\% ## ncomp = integer(1)) } -In this case, \code{\link[plsmod:pls_fit]{plsmod::pls_fit()}} has the same -role as above but eventually targets -\code{\link[mixOmics:plsda]{mixOmics::plsda()}} or -\code{\link[mixOmics:splsda]{mixOmics::splsda()}} . +In this case, \code{\link[plsmod:pls_fit]{plsmod::pls_fit()}} has the same role +as above but eventually targets \code{\link[mixOmics:plsda]{mixOmics::plsda()}} +or \code{\link[mixOmics:splsda]{mixOmics::splsda()}} . } \subsection{Preprocessing requirements}{ Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Variance calculations are used in these computations so \emph{zero-variance} predictors (i.e., with a single unique value) should be eliminated diff --git a/man/details_poisson_reg_gee.Rd b/man/details_poisson_reg_gee.Rd index 4fb21d049..cdaf927ae 100644 --- a/man/details_poisson_reg_gee.Rd +++ b/man/details_poisson_reg_gee.Rd @@ -5,16 +5,17 @@ \title{Poisson regression via generalized estimating equations (GEE)} \description{ \code{gee::gee()} uses generalized least squares to fit different types of models -that have errors that are not independent. +with errors that are not independent. } \details{ For this engine, there is a single mode: regression \subsection{Tuning Parameters}{ -This model has no formal tuning parameters. It might be beneficial to -determine the appropriate correlation structure to use. However, this -typically does not affect the predicted value of the model but does have -an effect on the inferential results and parameter covariance values. +This model has no formal tuning parameters. It may be beneficial to +determine the appropriate correlation structure to use, but this +typically does not affect the predicted value of the model. It \emph{does} +have an effect on the inferential results and parameter covariance +values. } \subsection{Translation from parsnip to the original package}{ @@ -49,22 +50,22 @@ next section. The model cannot accept case weights. -Both \code{gee:gee(a)} and \code{gee:geepack()} specifies the id/cluster variable +Both \code{gee:gee()} and \code{gee:geepack()} specify the id/cluster variable using an argument \code{id} that requires a vector. parsnip doesn’t work that -way so we enable this model to be fit using a artificial function called +way so we enable this model to be fit using a artificial function \code{id_var()} to be used in the formula. So, in the original package, the call would look like:\if{html}{\out{
}}\preformatted{gee(breaks ~ tension, id = wool, data = warpbreaks, corstr = "exchangeable") }\if{html}{\out{
}} -With \code{parsnip}, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) +With parsnip, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) poisson_reg() \%>\% set_engine("gee", corstr = "exchangeable") \%>\% fit(y ~ time + x + id_var(subject), data = longitudinal_counts) }\if{html}{\out{
}} -When using the general tidymodels infrastructure, it may be better to -use a workflow. In this case, you can add the appropriate columns using +When using tidymodels infrastructure, it may be better to use a +workflow. In this case, you can add the appropriate columns using \code{add_variables()} then supply the GEE formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) gee_spec <- diff --git a/man/details_poisson_reg_glm.Rd b/man/details_poisson_reg_glm.Rd index c481c62df..e28cc33b4 100644 --- a/man/details_poisson_reg_glm.Rd +++ b/man/details_poisson_reg_glm.Rd @@ -35,9 +35,8 @@ poisson_reg() \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } } \keyword{internal} diff --git a/man/details_poisson_reg_glmer.Rd b/man/details_poisson_reg_glmer.Rd index 11183c2df..17b1c84ee 100644 --- a/man/details_poisson_reg_glmer.Rd +++ b/man/details_poisson_reg_glmer.Rd @@ -4,7 +4,7 @@ \alias{details_poisson_reg_glmer} \title{Poisson regression via mixed models} \description{ -The \code{glmer} engine estimates fixed and random effect regression parameters +The \code{"glmer"} engine estimates fixed and random effect regression parameters using maximum likelihood (or restricted maximum likelihood) estimation. } \details{ @@ -37,7 +37,7 @@ This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor (\emph{η}) for a random intercept: -\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}}+\emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} where \emph{i} denotes the \code{i}th independent experimental unit (e.g. subject). When the model has seen subject \code{i}, it can use that @@ -72,15 +72,15 @@ next section. The model can accept case weights. -With \code{parsnip}, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) +With parsnip, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) poisson_reg() \%>\% set_engine("glmer") \%>\% fit(y ~ time + x + (1 | subject), data = longitudinal_counts) }\if{html}{\out{
}} -When using the general tidymodels infrastructure, it may be better to -use a workflow. In this case, you can add the appropriate columns using +When using tidymodels infrastructure, it may be better to use a +workflow. In this case, you can add the appropriate columns using \code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) glmer_spec <- diff --git a/man/details_poisson_reg_glmnet.Rd b/man/details_poisson_reg_glmnet.Rd index 9278ca50a..5581c5be9 100644 --- a/man/details_poisson_reg_glmnet.Rd +++ b/man/details_poisson_reg_glmnet.Rd @@ -52,16 +52,13 @@ poisson_reg(penalty = double(1), mixture = double(1)) \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a -variance of one. - -By default, \code{glmnet::glmnet()} uses the argument \code{standardize = TRUE} to -center and scale the data. +variance of one. By default, \code{glmnet::glmnet()} uses the argument +\code{standardize = TRUE} to center and scale the data. } } \keyword{internal} diff --git a/man/details_poisson_reg_hurdle.Rd b/man/details_poisson_reg_hurdle.Rd index 22841351f..f4503d7fb 100644 --- a/man/details_poisson_reg_hurdle.Rd +++ b/man/details_poisson_reg_hurdle.Rd @@ -36,9 +36,8 @@ poisson_reg() \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. For this particular model, a special formula is used to specify which columns affect the counts and which affect the model for the probability diff --git a/man/details_poisson_reg_stan.Rd b/man/details_poisson_reg_stan.Rd index 836d05572..f24c77f56 100644 --- a/man/details_poisson_reg_stan.Rd +++ b/man/details_poisson_reg_stan.Rd @@ -62,16 +62,15 @@ process. Change this value in \code{set_engine()} to show the MCMC logs. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } \subsection{Other details}{ For prediction, the \code{"stan"} engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, -the units are the original outcome and when \code{std_error = TRUE}, the +the units are the original outcome. When \code{std_error = TRUE}, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. } diff --git a/man/details_poisson_reg_stan_glmer.Rd b/man/details_poisson_reg_stan_glmer.Rd index dc3e575b0..e77757818 100644 --- a/man/details_poisson_reg_stan_glmer.Rd +++ b/man/details_poisson_reg_stan_glmer.Rd @@ -4,7 +4,7 @@ \alias{details_poisson_reg_stan_glmer} \title{Poisson regression via hierarchical Bayesian methods} \description{ -The \code{stan_glmer} engine estimates hierarchical regression parameters using +The \code{"stan_glmer"} engine estimates hierarchical regression parameters using Bayesian estimation. } \details{ @@ -58,7 +58,7 @@ This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor (\emph{η}) for a random intercept: -\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}}+\emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} where \emph{i} denotes the \code{i}th independent experimental unit (e.g. subject). When the model has seen subject \code{i}, it can use that @@ -93,15 +93,15 @@ next section. The model can accept case weights. -With \code{parsnip}, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) +With parsnip, we suggest using the formula method when fitting:\if{html}{\out{
}}\preformatted{library(tidymodels) poisson_reg() \%>\% set_engine("stan_glmer") \%>\% fit(y ~ time + x + (1 | subject), data = longitudinal_counts) }\if{html}{\out{
}} -When using the general tidymodels infrastructure, it may be better to -use a workflow. In this case, you can add the appropriate columns using +When using tidymodels infrastructure, it may be better to use a +workflow. In this case, you can add the appropriate columns using \code{add_variables()} then supply the typical formula when adding the model:\if{html}{\out{
}}\preformatted{library(tidymodels) glmer_spec <- @@ -117,10 +117,10 @@ glmer_wflow <- fit(glmer_wflow, data = longitudinal_counts) }\if{html}{\out{
}} -For prediction, the \code{stan_glmer} engine can compute posterior intervals -analogous to confidence and prediction intervals. In these instances, -the units are the original outcome and when \code{std_error = TRUE}, the -standard deviation of the posterior distribution (or posterior +For prediction, the \code{"stan_glmer"} engine can compute posterior +intervals analogous to confidence and prediction intervals. In these +instances, the units are the original outcome. When \code{std_error = TRUE}, +the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. } diff --git a/man/details_poisson_reg_zeroinfl.Rd b/man/details_poisson_reg_zeroinfl.Rd index db965e59b..298ca88d2 100644 --- a/man/details_poisson_reg_zeroinfl.Rd +++ b/man/details_poisson_reg_zeroinfl.Rd @@ -37,9 +37,8 @@ poisson_reg() \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. For this particular model, a special formula is used to specify which columns affect the counts and which affect the model for the probability diff --git a/man/details_proportional_hazards_glmnet.Rd b/man/details_proportional_hazards_glmnet.Rd index 9d1d191f0..6ce3417ab 100644 --- a/man/details_proportional_hazards_glmnet.Rd +++ b/man/details_proportional_hazards_glmnet.Rd @@ -51,9 +51,8 @@ proportional_hazards(penalty = double(1), mixture = double(1)) \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a @@ -69,12 +68,12 @@ The model does not fit an intercept. interface but, for consistency, this package requires a model formula. The model formula can include \emph{special} terms, such as -\code{\link[survival:strata]{survival::strata()}}. The allows the baseline +\code{\link[survival:strata]{survival::strata()}}. This allows the baseline hazard to differ between groups contained in the function. The column used inside \code{strata()} is treated as qualitative no matter its type. This is different than the syntax offered by the \code{\link[glmnet:glmnet]{glmnet::glmnet()}} package (i.e., -\code{\link[glmnet:stratifySurv]{glmnet::stratifySurv()}}) which is mot +\code{\link[glmnet:stratifySurv]{glmnet::stratifySurv()}}) which is not recommended here. For example, in this model, the numeric column \code{rx} is used to estimate diff --git a/man/details_rule_fit_xrf.Rd b/man/details_rule_fit_xrf.Rd index f1ed436f4..3e0a23812 100644 --- a/man/details_rule_fit_xrf.Rd +++ b/man/details_rule_fit_xrf.Rd @@ -5,7 +5,7 @@ \title{RuleFit models via xrf} \description{ \code{\link[xrf:xrf]{xrf::xrf()}} fits a model that derives simple feature rules from a tree -ensemble and uses them as features to a regularized model. \code{\link[rules:rules-internal]{rules::xrf_fit()}} +ensemble and uses the rules as features to a regularized model. \code{\link[rules:rules-internal]{rules::xrf_fit()}} is a wrapper around this function. } \details{ @@ -115,26 +115,25 @@ response variable are not supported. To use these with \code{rule_fit()}, we recommend using a recipe instead of the formula method. Also, there are several configuration differences in how \code{xrf()} is fit -between that package and the wrapper used in \code{rules}. Some differences +between that package and the wrapper used in \strong{rules}. Some differences in default values are: \itemize{ \item \code{trees}: \code{xrf}: 100, \code{rules}: 15 \item \code{max_depth}: \code{xrf}: 3, \code{rules}: 6 } -These differences will create a difference in the values of the -\code{penalty} argument that \code{glmnet} uses. Also, can also set \code{penalty} -whereas uses an internal 5-fold cross-validation to determine it (by -default). +These differences will create a disparity in the values of the \code{penalty} +argument that \strong{glmnet} uses. Also, \strong{rules} can also set \code{penalty} +whereas \strong{xrf} uses an internal 5-fold cross-validation to determine it +(by default). } \subsection{Preprocessing requirements}{ Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } \subsection{References}{ diff --git a/man/details_survival_reg_flexsurv.Rd b/man/details_survival_reg_flexsurv.Rd index 2937973dc..25f3a705e 100644 --- a/man/details_survival_reg_flexsurv.Rd +++ b/man/details_survival_reg_flexsurv.Rd @@ -45,8 +45,8 @@ model specification typically involved the use of \code{\link[survival:Surv]{survival::Surv()}}. For this engine, stratification cannot be specified via -\code{\link[=strata]{strata()}}, please see the documentation of the -\code{\link{flexsurv}} package for alternative specifications. +\code{\link[=strata]{strata()}}; please see the documentation of the +\link{flexsurv} package for alternative specifications. } \subsection{References}{ diff --git a/man/details_svm_linear_LiblineaR.Rd b/man/details_svm_linear_LiblineaR.Rd index 12123383f..e2cd2ab83 100644 --- a/man/details_svm_linear_LiblineaR.Rd +++ b/man/details_svm_linear_LiblineaR.Rd @@ -76,9 +76,8 @@ class predictions (e.g., accuracy). Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_svm_linear_kernlab.Rd b/man/details_svm_linear_kernlab.Rd index 0ff7eeb03..dd6726a5e 100644 --- a/man/details_svm_linear_kernlab.Rd +++ b/man/details_svm_linear_kernlab.Rd @@ -73,9 +73,8 @@ by R’s random number stream. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_svm_poly_kernlab.Rd b/man/details_svm_poly_kernlab.Rd index 2ddd4605a..ed1d80f78 100644 --- a/man/details_svm_poly_kernlab.Rd +++ b/man/details_svm_poly_kernlab.Rd @@ -85,9 +85,8 @@ by R’s random number stream. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_svm_rbf_kernlab.Rd b/man/details_svm_rbf_kernlab.Rd index 1126a78c5..05f4f8e30 100644 --- a/man/details_svm_rbf_kernlab.Rd +++ b/man/details_svm_rbf_kernlab.Rd @@ -85,9 +85,8 @@ by R’s random number stream. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/discrim_flexible.Rd b/man/discrim_flexible.Rd index d980a16e0..2126b0f9f 100644 --- a/man/discrim_flexible.Rd +++ b/man/discrim_flexible.Rd @@ -45,7 +45,7 @@ The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model with the data. } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("discrim_flexible")} diff --git a/man/discrim_linear.Rd b/man/discrim_linear.Rd index 0b4b09b1f..7a283d4df 100644 --- a/man/discrim_linear.Rd +++ b/man/discrim_linear.Rd @@ -29,7 +29,7 @@ to use for fitting.} \code{discrim_linear()} defines a model that estimates a multivariate distribution for the predictors separately for the data in each class (usually Gaussian with a common covariance matrix). Bayes' theorem is used -to compute the probability of each class, given the predictor values. The +to compute the probability of each class, given the predictor values. This function can fit classification models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("discrim_linear")} @@ -45,7 +45,7 @@ The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model with the data. } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("discrim_linear")} diff --git a/man/discrim_quad.Rd b/man/discrim_quad.Rd index 30354d97f..46dbab991 100644 --- a/man/discrim_quad.Rd +++ b/man/discrim_quad.Rd @@ -25,7 +25,7 @@ to use for fitting.} \code{discrim_quad()} defines a model that estimates a multivariate distribution for the predictors separately for the data in each class (usually Gaussian with separate covariance matrices). Bayes' theorem is used -to compute the probability of each class, given the predictor values. The +to compute the probability of each class, given the predictor values. This function can fit classification models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("discrim_quad")} @@ -41,7 +41,7 @@ The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model with the data. } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("discrim_quad")} diff --git a/man/discrim_regularized.Rd b/man/discrim_regularized.Rd index 20a260818..104d78e57 100644 --- a/man/discrim_regularized.Rd +++ b/man/discrim_regularized.Rd @@ -26,7 +26,7 @@ to use for fitting.} distribution for the predictors separately for the data in each class. The structure of the model can be LDA, QDA, or some amalgam of the two. Bayes' theorem is used to compute the probability of each class, given the -predictor values. The function can fit classification models. +predictor values. This function can fit classification models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("discrim_regularized")} @@ -56,7 +56,7 @@ The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model with the data. } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} Friedman, J (1989). Regularized Discriminant Analysis. \emph{Journal of the American Statistical Association}, 84, 165-175. diff --git a/man/doc-tools.Rd b/man/doc-tools.Rd index ce77744a7..af3360522 100644 --- a/man/doc-tools.Rd +++ b/man/doc-tools.Rd @@ -47,19 +47,19 @@ another package has a new engine for \code{linear_reg()}, the \code{parsnip::linear_reg()} help can show a link to a detailed help page in the other package. -To enable this, the process for a package developer is to: +The process for a package developer to create \pkg{parsnip} documentation is: \enumerate{ \item Create an engine-specific R file in the \code{R} directory with the name \verb{\{model\}_\{engine\}.R} (e.g. \code{boost_tree_C5.0.R}). This has a small amount of documentation, as well as the directives "\verb{@name details_\{model\}_\{engine\}}" and "\verb{@includeRmd man/rmd/\{model\}_\{engine\}.md details}". -\item Copy the file in \pkg{parsnip} that is in \code{man/rmd/setup.Rmd} and put +\item Copy the file in \pkg{parsnip} that is in \code{man/rmd/aaa.Rmd} and put it in the same place in your package. \item Write your own \verb{man/rmd/\{model\}_\{engine\}.Rmd} file. This can include packages that are not listed in the DESCRIPTION file. Those are only required when the documentation file is created locally (probably using \code{\link[devtools:document]{devtools::document()}}). -\item Run \code{\link[devtools:document]{devtools::document()}} so that the Rmd content is included in the +\item Run \code{\link[devtools:document]{devtools::document()}} so that the \code{.md} content is included in the Rd file. } diff --git a/man/gen_additive_mod.Rd b/man/gen_additive_mod.Rd index df5e779b7..04a55187b 100644 --- a/man/gen_additive_mod.Rd +++ b/man/gen_additive_mod.Rd @@ -28,7 +28,7 @@ to use for fitting.} } \description{ \code{gen_additive_mod()} defines a model that can use smoothed functions of -numeric predictors in a generalized linear model. The function can fit +numeric predictors in a generalized linear model. This function can fit classification and regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("gen_additive_mod")} @@ -50,7 +50,7 @@ gen_additive_mod() } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("gen_additive_mod")} diff --git a/man/knit_engine_docs.Rd b/man/knit_engine_docs.Rd index 3315d0eb2..9aea69827 100644 --- a/man/knit_engine_docs.Rd +++ b/man/knit_engine_docs.Rd @@ -9,6 +9,8 @@ knit_engine_docs(pattern = NULL) \arguments{ \item{pattern}{A regular expression to specify which files to knit. The default knits all engine documentation files.} + +\item{...}{Options passed to \code{\link[knitr:knit]{knitr::knit()}}.} } \value{ A tibble with column \code{file} for the file name and \code{result} (a diff --git a/man/linear_reg.Rd b/man/linear_reg.Rd index 5c8b24a23..e4f5a756e 100644 --- a/man/linear_reg.Rd +++ b/man/linear_reg.Rd @@ -24,7 +24,7 @@ ridge regression is being used (specific engines only).} } \description{ \code{linear_reg()} defines a model that can predict numeric values from -predictors using a linear function. The function can fit regression models. +predictors using a linear function. This function can fit regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("linear_reg")} @@ -44,7 +44,7 @@ show_engines("linear_reg") linear_reg() } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("linear_reg")} diff --git a/man/logistic_reg.Rd b/man/logistic_reg.Rd index 728d11f30..582815cc2 100644 --- a/man/logistic_reg.Rd +++ b/man/logistic_reg.Rd @@ -34,7 +34,7 @@ For \code{LiblineaR} models, \code{mixture} must be exactly 0 or 1 only.} \description{ \code{\link[=logistic_reg]{logistic_reg()}} defines a generalized linear model for binary outcomes. A linear combination of the predictors is used to model the log odds of an -event. The function can fit classification models. +event. This function can fit classification models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("logistic_reg")} @@ -54,7 +54,7 @@ show_engines("logistic_reg") logistic_reg() } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("logistic_reg")} diff --git a/man/mars.Rd b/man/mars.Rd index e8f3581e1..00099eb67 100644 --- a/man/mars.Rd +++ b/man/mars.Rd @@ -30,7 +30,7 @@ final model, including the intercept.} \description{ \code{mars()} defines a generalized linear model that uses artificial features for some predictors. These features resemble hinge functions and the result is -a model that is a segmented regression in small dimensions. The function can +a model that is a segmented regression in small dimensions. This function can fit classification and regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("mars")} @@ -51,7 +51,7 @@ show_engines("mars") mars(mode = "regression", num_terms = 5) } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("mars")} diff --git a/man/mlp.Rd b/man/mlp.Rd index 07f445f43..7c19630ba 100644 --- a/man/mlp.Rd +++ b/man/mlp.Rd @@ -44,7 +44,7 @@ from iteration-to-iteration (specific engines only).} } \description{ \code{mlp()} defines a multilayer perceptron model (a.k.a. a single layer, -feed-forward neural network). The function can fit classification and +feed-forward neural network). This function can fit classification and regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("mlp")} @@ -65,7 +65,7 @@ show_engines("mlp") mlp(mode = "classification", penalty = 0.01) } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("mlp")} diff --git a/man/multinom_reg.Rd b/man/multinom_reg.Rd index 4c97de1a6..7a43e39ce 100644 --- a/man/multinom_reg.Rd +++ b/man/multinom_reg.Rd @@ -32,7 +32,7 @@ ridge regression is being used. (specific engines only).} } \description{ \code{multinom_reg()} defines a model that uses linear predictors to predict -multiclass data using the multinomial distribution. The function can fit +multiclass data using the multinomial distribution. This function can fit classification models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("multinom_reg")} @@ -53,7 +53,7 @@ show_engines("multinom_reg") multinom_reg() } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("multinom_reg")} diff --git a/man/naive_Bayes.Rd b/man/naive_Bayes.Rd index 9620683bb..4791fd59e 100644 --- a/man/naive_Bayes.Rd +++ b/man/naive_Bayes.Rd @@ -29,7 +29,7 @@ to use for fitting.} } \description{ \code{naive_Bayes()} defines a model that uses Bayes' theorem to compute the -probability of each class, given the predictor values. The function can fit +probability of each class, given the predictor values. This function can fit classification models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("naive_Bayes")} @@ -45,7 +45,7 @@ The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model with the data. } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("naive_Bayes")} diff --git a/man/nearest_neighbor.Rd b/man/nearest_neighbor.Rd index 61778d3b5..7f88aa30c 100644 --- a/man/nearest_neighbor.Rd +++ b/man/nearest_neighbor.Rd @@ -55,7 +55,7 @@ nearest_neighbor(neighbors = 11) } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("nearest_neighbor")} diff --git a/man/null_model.Rd b/man/null_model.Rd index 55d943ca9..976e51585 100644 --- a/man/null_model.Rd +++ b/man/null_model.Rd @@ -13,7 +13,7 @@ Possible values for this model are "unknown", "regression", or } \description{ \code{null_model()} defines a simple, non-informative model. It doesn't have any -main arguments. The function can fit classification and regression models. +main arguments. This function can fit classification and regression models. } \details{ The model can be created using the \code{fit()} function using the diff --git a/man/pls.Rd b/man/pls.Rd index dc3527ba5..a55aa3e77 100644 --- a/man/pls.Rd +++ b/man/pls.Rd @@ -28,7 +28,7 @@ to use for fitting.} \description{ \code{pls()} defines a partial least squares model that uses latent variables to model the data. It is similar to a supervised version of principal component. -The function can fit classification and regression models. +This function can fit classification and regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("pls")} @@ -43,7 +43,7 @@ The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model with the data. } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("pls")} diff --git a/man/poisson_reg.Rd b/man/poisson_reg.Rd index d94cd8ad0..590ce4905 100644 --- a/man/poisson_reg.Rd +++ b/man/poisson_reg.Rd @@ -28,7 +28,7 @@ to use for fitting.} } \description{ \code{poisson_reg()} defines a generalized linear model for count data that follow -a Poisson distribution. The function can fit regression models. +a Poisson distribution. This function can fit regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("poisson_reg")} @@ -43,7 +43,7 @@ The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model with the data. } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("poisson_reg")} diff --git a/man/proportional_hazards.Rd b/man/proportional_hazards.Rd index 71a344980..b746f5a1a 100644 --- a/man/proportional_hazards.Rd +++ b/man/proportional_hazards.Rd @@ -27,9 +27,9 @@ proportion of L1 regularization (i.e. lasso) in the model. When ridge regression is being used (specific engines only).} } \description{ -\code{proportional_hazards()} defines a technique that models the hazard function -as a multiplicative function of covariates times a baseline hazard. The -function can fit censored regression models. +\code{proportional_hazards()} defines a model for the hazard function +as a multiplicative function of covariates times a baseline hazard. This +function can fit censored regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("proportional_hazards")} @@ -55,7 +55,7 @@ show_engines("proportional_hazards") proportional_hazards(mode = "censored regression") } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("proportional_hazards")} diff --git a/man/rand_forest.Rd b/man/rand_forest.Rd index dafb9cbe1..444c449ee 100644 --- a/man/rand_forest.Rd +++ b/man/rand_forest.Rd @@ -53,7 +53,7 @@ show_engines("rand_forest") rand_forest(mode = "classification", trees = 2000) } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("rand_forest")} diff --git a/man/rmd/aaa.Rmd b/man/rmd/aaa.Rmd index 5ba7fa8f1..d1af9ab90 100644 --- a/man/rmd/aaa.Rmd +++ b/man/rmd/aaa.Rmd @@ -9,7 +9,8 @@ check_pkg_for_docs <- function(x){ purrr::map(x, ~ require(.x, character.only = TRUE)) } -rmd_pkgs <- c("tune", "glue", "dplyr", "parsnip", "dials", "glmnet", "Cubist", "xrf") +rmd_pkgs <- c("tune", "glue", "dplyr", "parsnip", "dials", "glmnet", + "Cubist", "xrf", "ape") check_pkg_for_docs(rmd_pkgs) check_pkg_for_docs(parsnip:::extensions()) diff --git a/man/rmd/linear_reg_gls.Rmd b/man/rmd/linear_reg_gls.Rmd index e58da313e..c780d3c45 100644 --- a/man/rmd/linear_reg_gls.Rmd +++ b/man/rmd/linear_reg_gls.Rmd @@ -84,7 +84,7 @@ lme_fit <- The estimated within-subject correlations are the same: -```{r} +```{r, warning = FALSE} library(ape) # lme, use ape package: diff --git a/man/rmd/tidy-example.Rmd b/man/rmd/tidy-example.Rmd deleted file mode 100644 index 3ebbb0538..000000000 --- a/man/rmd/tidy-example.Rmd +++ /dev/null @@ -1,56 +0,0 @@ -## An example - -```{r, warnings = FALSE} -library(dplyr) - -data(ames, package = "modeldata") - -ames <- - ames %>% - mutate(Sale_Price = log10(ames$Sale_Price), - Gr_Liv_Area = log10(ames$Gr_Liv_Area)) - -# ------------------------------------------------------------------------------ - -cb_fit <- - cubist_rules(committees = 10) %>% - set_engine("Cubist") %>% - fit(Sale_Price ~ Neighborhood + Longitude + Latitude + Gr_Liv_Area + Central_Air, - data = ames) - -cb_res <- tidy(cb_fit) -cb_res - -cb_res$estimate[[1]] -cb_res$statistic[[1]] - -# ------------------------------------------------------------------------------ - -library(recipes) - -xrf_reg_mod <- - rule_fit(trees = 10, penalty = .001) %>% - set_engine("xrf") %>% - set_mode("regression") - -# Make dummy variables since xgboost will not -ames_rec <- - recipe(Sale_Price ~ Neighborhood + Longitude + Latitude + - Gr_Liv_Area + Central_Air, - data = ames) %>% - step_dummy(Neighborhood, Central_Air) %>% - step_zv(all_predictors()) - -ames_processed <- prep(ames_rec) %>% bake(new_data = NULL) - -set.seed(1) -xrf_reg_fit <- - xrf_reg_mod %>% - fit(Sale_Price ~ ., data = ames_processed) - -xrf_rule_res <- tidy(xrf_reg_fit) -xrf_rule_res$rule[nrow(xrf_rule_res)] %>% rlang::parse_expr() - -xrf_col_res <- tidy(xrf_reg_fit, unit = "columns") -xrf_col_res -``` diff --git a/man/rule_fit.Rd b/man/rule_fit.Rd index 43958dde8..1107359fa 100644 --- a/man/rule_fit.Rd +++ b/man/rule_fit.Rd @@ -83,7 +83,7 @@ rule_fit() Friedman, J. H., and Popescu, B. E. (2008). "Predictive learning via rule ensembles." \emph{The Annals of Applied Statistics}, 2(3), 916-954. -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \code{\link[xrf:xrf.formula]{xrf::xrf.formula()}}, \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("rule_fit")} diff --git a/man/surv_reg.Rd b/man/surv_reg.Rd index 1c2da015f..c835a05cf 100644 --- a/man/surv_reg.Rd +++ b/man/surv_reg.Rd @@ -46,7 +46,7 @@ show_engines("surv_reg") surv_reg(mode = "regression", dist = "weibull") } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("surv_reg")} diff --git a/man/survival_reg.Rd b/man/survival_reg.Rd index f2d48bebd..a510b90f4 100644 --- a/man/survival_reg.Rd +++ b/man/survival_reg.Rd @@ -42,7 +42,7 @@ show_engines("survival_reg") survival_reg(mode = "censored regression", dist = "weibull") } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("survival_reg")} diff --git a/man/svm_linear.Rd b/man/svm_linear.Rd index 5f9dcbf22..7bfa9f6d9 100644 --- a/man/svm_linear.Rd +++ b/man/svm_linear.Rd @@ -45,7 +45,7 @@ show_engines("svm_linear") svm_linear(mode = "classification") } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("svm_linear")} diff --git a/man/svm_poly.Rd b/man/svm_poly.Rd index 8ef4ad19c..086dfb5eb 100644 --- a/man/svm_poly.Rd +++ b/man/svm_poly.Rd @@ -33,10 +33,10 @@ loss function (regression only)} } \description{ \code{svm_poly()} defines a support vector machine model. For classification, -the model tries to maximize the width of the margin between classes (using a -polynomial class boundary). For regression, the model optimizes a robust loss -function that is only affected by very large model residuals (via polynomial -functions of the predictors). The function can fit classification and +the model tries to maximize the width of the margin between classes using a +polynomial class boundary. For regression, the model optimizes a robust loss +function that is only affected by very large model residuals and uses polynomial +functions of the predictors. This function can fit classification and regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("svm_poly")} @@ -57,7 +57,7 @@ show_engines("svm_poly") svm_poly(mode = "classification", degree = 1.2) } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("svm_poly")} diff --git a/man/svm_rbf.Rd b/man/svm_rbf.Rd index 1d7982fae..a1986fc9c 100644 --- a/man/svm_rbf.Rd +++ b/man/svm_rbf.Rd @@ -55,7 +55,7 @@ show_engines("svm_rbf") svm_rbf(mode = "classification", rbf_sigma = 0.2) } \references{ -\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable list of parsnip models} +\url{https://www.tidymodels.org}, \href{https://www.tmwr.org/}{\emph{Tidy Modeling with R}}, \href{https://www.tidymodels.org/find/parsnip/}{searchable table of parsnip models} } \seealso{ \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("svm_rbf")} diff --git a/man/update_model_info_file.Rd b/man/update_model_info_file.Rd index c91d9953e..97d58c900 100644 --- a/man/update_model_info_file.Rd +++ b/man/update_model_info_file.Rd @@ -12,17 +12,17 @@ update_model_info_file(path = "inst/models.tsv") \description{ This function writes a tab delimited file to the package to capture information about the known models. This information includes packages in -the tidymodels GitHub repository as well as packages that are know to work +the tidymodels GitHub repository as well as packages that are known to work well with tidymodels packages (e.g. \pkg{tune}, etc.). There are likely other model definitions in other extension packages that are not included here that do not follow the \href{https://tidymodels.github.io/model-implementation-principles}{model implementation guidelines} -or do not work with packages other than \pkg{parsnip}. +or do not work with tidymodels packages other than \pkg{parsnip}. These data are used to document engines for each model function man page. } \details{ -It is highly recommended that the know parsnip extension packages are loaded. +It is highly recommended that the known parsnip extension packages are loaded. The unexported \pkg{parsnip} function \code{extensions()} will list these. } \keyword{internal} From acc2488296a37368b7d4e34ab985384d84dc5c6c Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 1 Feb 2022 14:43:11 -0500 Subject: [PATCH 49/65] update model about model = TRUE for survival --- man/details_survival_reg_survival.Rd | 5 +++-- man/rmd/survival_reg_survival.Rmd | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/man/details_survival_reg_survival.Rd b/man/details_survival_reg_survival.Rd index 56584b040..26fe8558e 100644 --- a/man/details_survival_reg_survival.Rd +++ b/man/details_survival_reg_survival.Rd @@ -40,8 +40,9 @@ survival_reg(dist = character(1)) \%>\% \subsection{Other details}{ -Note that \code{model = TRUE} is needed to produce quantile predictions when -there is a stratification variable and can be overridden in other cases. +In the translated syntax above, note that \code{model = TRUE} is needed to +produce quantile predictions when there is a stratification variable and +can be overridden in other cases. The main interface for this model uses the formula method since the model specification typically involved the use of diff --git a/man/rmd/survival_reg_survival.Rmd b/man/rmd/survival_reg_survival.Rmd index e20681ef9..4dabb03b2 100644 --- a/man/rmd/survival_reg_survival.Rmd +++ b/man/rmd/survival_reg_survival.Rmd @@ -38,7 +38,7 @@ survival_reg(dist = character(1)) %>% ## Other details -Note that `model = TRUE` is needed to produce quantile predictions when there is a stratification variable and can be overridden in other cases. +In the translated syntax above, note that `model = TRUE` is needed to produce quantile predictions when there is a stratification variable and can be overridden in other cases. The main interface for this model uses the formula method since the model specification typically involved the use of [survival::Surv()]. From 9cad0449163fdb5c39520f9f66eebf5f5ba39eac Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 1 Feb 2022 14:51:48 -0500 Subject: [PATCH 50/65] list to table --- man/details_rule_fit_xrf.Rd | 9 +++++---- man/rmd/rule_fit_xrf.Rmd | 6 ++++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/man/details_rule_fit_xrf.Rd b/man/details_rule_fit_xrf.Rd index 3e0a23812..86e2ada73 100644 --- a/man/details_rule_fit_xrf.Rd +++ b/man/details_rule_fit_xrf.Rd @@ -116,12 +116,13 @@ recommend using a recipe instead of the formula method. Also, there are several configuration differences in how \code{xrf()} is fit between that package and the wrapper used in \strong{rules}. Some differences -in default values are: -\itemize{ -\item \code{trees}: \code{xrf}: 100, \code{rules}: 15 -\item \code{max_depth}: \code{xrf}: 3, \code{rules}: 6 +in default values are:\tabular{lll}{ + parameter \tab \strong{xrf} \tab \strong{rules} \cr + \code{trees} \tab 100 \tab 15 \cr + \code{max_depth} \tab 3 \tab 6 \cr } + These differences will create a disparity in the values of the \code{penalty} argument that \strong{glmnet} uses. Also, \strong{rules} can also set \code{penalty} whereas \strong{xrf} uses an internal 5-fold cross-validation to determine it diff --git a/man/rmd/rule_fit_xrf.Rmd b/man/rmd/rule_fit_xrf.Rmd index b59ae5141..3a7ce74bd 100644 --- a/man/rmd/rule_fit_xrf.Rmd +++ b/man/rmd/rule_fit_xrf.Rmd @@ -75,8 +75,10 @@ use these with `rule_fit()`, we recommend using a recipe instead of the formula Also, there are several configuration differences in how `xrf()` is fit between that package and the wrapper used in **rules**. Some differences in default values are: -- `trees`: `xrf`: 100, `rules`: 15 -- `max_depth`: `xrf`: 3, `rules`: 6 +| parameter | **xrf** | **rules** | +|------------|---------|-----------| +| `trees` | 100 | 15 | +|`max_depth` | 3 | 6 | These differences will create a disparity in the values of the `penalty` argument that **glmnet** uses. Also, **rules** can also set `penalty` whereas **xrf** uses an internal 5-fold cross-validation to determine it (by default). From 600f0034a9f8092962e68744740396bb18257ddb Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 1 Feb 2022 14:52:02 -0500 Subject: [PATCH 51/65] notes about mode --- R/discrim_flexible.R | 2 +- R/nearest_neighbor.R | 3 ++- man/discrim_flexible.Rd | 2 +- man/nearest_neighbor.Rd | 3 ++- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/R/discrim_flexible.R b/R/discrim_flexible.R index 668f7d1cb..fa2795906 100644 --- a/R/discrim_flexible.R +++ b/R/discrim_flexible.R @@ -4,7 +4,7 @@ #' #' `discrim_flexible()` defines a model that fits a discriminant analysis model #' that can use nonlinear features created using multivariate adaptive -#' regression splines (MARS). +#' regression splines (MARS). This function can fit classification models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("discrim_flexible")} #' diff --git a/R/nearest_neighbor.R b/R/nearest_neighbor.R index 0e3ff2491..56b1f1c9c 100644 --- a/R/nearest_neighbor.R +++ b/R/nearest_neighbor.R @@ -3,7 +3,8 @@ #' @description #' #' `nearest_neighbor()` defines a model that uses the `K` most similar data -#' points from the training set to predict new samples. +#' points from the training set to predict new samples. This function can +#' fit classification and regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("nearest_neighbor")} #' diff --git a/man/discrim_flexible.Rd b/man/discrim_flexible.Rd index 2126b0f9f..9bcff24c0 100644 --- a/man/discrim_flexible.Rd +++ b/man/discrim_flexible.Rd @@ -30,7 +30,7 @@ to use for fitting.} \description{ \code{discrim_flexible()} defines a model that fits a discriminant analysis model that can use nonlinear features created using multivariate adaptive -regression splines (MARS). +regression splines (MARS). This function can fit classification models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("discrim_flexible")} diff --git a/man/nearest_neighbor.Rd b/man/nearest_neighbor.Rd index 7f88aa30c..2dda80414 100644 --- a/man/nearest_neighbor.Rd +++ b/man/nearest_neighbor.Rd @@ -34,7 +34,8 @@ calculating Minkowski distance.} } \description{ \code{nearest_neighbor()} defines a model that uses the \code{K} most similar data -points from the training set to predict new samples. +points from the training set to predict new samples. This function can +fit classification and regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("nearest_neighbor")} From 84019d21104cfaf30b60bd02a60998cfa1837578 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 1 Feb 2022 14:52:15 -0500 Subject: [PATCH 52/65] remove "dynamically" --- R/engine_docs.R | 2 +- man/doc-tools.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/engine_docs.R b/R/engine_docs.R index 39c845a4b..87ed1b318 100644 --- a/R/engine_docs.R +++ b/R/engine_docs.R @@ -82,7 +82,7 @@ update_model_info_file <- function(path = "inst/models.tsv") { # ------------------------------------------------------------------------------ -#' Tools for dynamically documenting packages +#' Tools for documenting packages #' #' @description #' These are functions used to create dynamic documentation in Rd files diff --git a/man/doc-tools.Rd b/man/doc-tools.Rd index af3360522..8708675ea 100644 --- a/man/doc-tools.Rd +++ b/man/doc-tools.Rd @@ -5,7 +5,7 @@ \alias{find_engine_files} \alias{make_engine_list} \alias{make_seealso_list} -\title{Tools for dynamically documenting packages} +\title{Tools for documenting packages} \usage{ find_engine_files(mod) From deb4485d0c9075a0105326006e3d463b9d94659d Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 1 Feb 2022 15:56:32 -0500 Subject: [PATCH 53/65] more error trapping during knit --- R/engine_docs.R | 17 +++++++++++++++-- R/install_packages.R | 5 +++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/R/engine_docs.R b/R/engine_docs.R index 87ed1b318..087b9d85f 100644 --- a/R/engine_docs.R +++ b/R/engine_docs.R @@ -19,14 +19,27 @@ knit_engine_docs <- function(pattern = NULL) { outputs <- gsub("Rmd$", "md", files) res <- purrr::map2(files, outputs, ~ try(knitr::knit(.x, .y), silent = TRUE)) + is_error <- purrr::map_lgl(res, ~ inherits(.x, "try-error")) + + if (any(is_error)) { + # In some cases where there are issues, the md file is empty. + errors <- res[which(is_error)] + error_nms <- basename(files)[which(is_error)] + errors <- + purrr::map_chr(errors, ~ cli::ansi_strip(as.character(.x))) %>% + purrr::map2_chr(error_nms, ~ paste0(.y, ": ", .x)) %>% + purrr::map_chr(~ gsub("Error in .f(.x[[i]], ...) :", "", .x, fixed = TRUE)) + cat("There were failures duing knitting:\n\n") + cat(errors) + cat("\n\n") + } + res <- purrr::map_chr(res, as.character) issues <- list_md_problems() if (nrow(issues) > 0) { cat("There are some issues with the help files:\n") print(issues) - } else { - cat("No issues found in the help files.\n\n") } invisible(tibble::tibble(file = basename(files), result = res)) diff --git a/R/install_packages.R b/R/install_packages.R index 5c157b486..110f7d843 100644 --- a/R/install_packages.R +++ b/R/install_packages.R @@ -29,8 +29,9 @@ install_engine_packages <- function(extension = TRUE, extras = TRUE, } if (extras) { - rmd_pkgs <- c("tidymodels", "broom.mixed", "glmnet", "Cubist", "xrf", "ape") - engine_packages <- setdiff(engine_packages, rmd_pkgs) + rmd_pkgs <- c("tidymodels", "broom.mixed", "glmnet", "Cubist", "xrf", "ape", + "rmarkdown") + engine_packages <- unique(c(engine_packages, rmd_pkgs)) } remotes::install_cran(engine_packages) From 244a07c550b98487e2da0add79f061afb322de07 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Wed, 2 Feb 2022 14:05:32 -0500 Subject: [PATCH 54/65] changes for new brulee version --- R/linear_reg_data.R | 9 +++ R/logistic_reg_data.R | 10 +++ R/mlp_data.R | 9 +++ R/multinom_reg_brulee.R | 12 ++++ R/multinom_reg_data.R | 9 +++ man/details_C5_rules_C5.0.Rd | 2 +- man/details_bag_mars_earth.Rd | 5 +- man/details_bart_dbarts.Rd | 5 +- man/details_boost_tree_C5.0.Rd | 4 +- man/details_cubist_rules_Cubist.Rd | 2 +- man/details_discrim_flexible_earth.Rd | 5 +- man/details_discrim_linear_MASS.Rd | 5 +- man/details_discrim_linear_mda.Rd | 5 +- man/details_discrim_linear_sda.Rd | 5 +- man/details_discrim_linear_sparsediscrim.Rd | 5 +- man/details_discrim_quad_MASS.Rd | 5 +- man/details_discrim_quad_sparsediscrim.Rd | 5 +- man/details_discrim_regularized_klaR.Rd | 5 +- man/details_gen_additive_mod_mgcv.Rd | 5 +- man/details_linear_reg_brulee.Rd | 13 +++- man/details_linear_reg_glm.Rd | 5 +- man/details_linear_reg_glmnet.Rd | 5 +- man/details_linear_reg_keras.Rd | 5 +- man/details_linear_reg_lm.Rd | 5 +- man/details_linear_reg_lme.Rd | 2 +- man/details_linear_reg_lmer.Rd | 2 +- man/details_linear_reg_spark.Rd | 5 +- man/details_linear_reg_stan.Rd | 5 +- man/details_linear_reg_stan_glmer.Rd | 2 +- man/details_logistic_reg_LiblineaR.Rd | 5 +- man/details_logistic_reg_brulee.Rd | 16 +++-- man/details_logistic_reg_glm.Rd | 5 +- man/details_logistic_reg_glmer.Rd | 2 +- man/details_logistic_reg_glmnet.Rd | 5 +- man/details_logistic_reg_keras.Rd | 5 +- man/details_logistic_reg_spark.Rd | 5 +- man/details_logistic_reg_stan.Rd | 5 +- man/details_logistic_reg_stan_glmer.Rd | 2 +- man/details_mars_earth.Rd | 5 +- man/details_mlp_brulee.Rd | 13 +++- man/details_mlp_keras.Rd | 5 +- man/details_mlp_nnet.Rd | 5 +- man/details_multinom_reg_brulee.Rd | 77 +++++++++++++++++++++ man/details_multinom_reg_glmnet.Rd | 5 +- man/details_multinom_reg_keras.Rd | 5 +- man/details_multinom_reg_nnet.Rd | 5 +- man/details_multinom_reg_spark.Rd | 5 +- man/details_nearest_neighbor_kknn.Rd | 5 +- man/details_pls_mixOmics.Rd | 12 ++-- man/details_poisson_reg_glm.Rd | 5 +- man/details_poisson_reg_glmer.Rd | 2 +- man/details_poisson_reg_glmnet.Rd | 5 +- man/details_poisson_reg_hurdle.Rd | 5 +- man/details_poisson_reg_stan.Rd | 5 +- man/details_poisson_reg_stan_glmer.Rd | 2 +- man/details_poisson_reg_zeroinfl.Rd | 5 +- man/details_proportional_hazards_glmnet.Rd | 5 +- man/details_rule_fit_xrf.Rd | 5 +- man/details_svm_linear_LiblineaR.Rd | 5 +- man/details_svm_linear_kernlab.Rd | 5 +- man/details_svm_poly_kernlab.Rd | 5 +- man/details_svm_rbf_kernlab.Rd | 5 +- man/rmd/linear_reg_brulee.Rmd | 6 +- man/rmd/logistic_reg_brulee.Rmd | 8 +-- man/rmd/mlp_brulee.Rmd | 6 +- man/rmd/multinom_reg_brulee.Rmd | 55 +++++++++++++++ 66 files changed, 365 insertions(+), 122 deletions(-) create mode 100644 R/multinom_reg_brulee.R create mode 100644 man/details_multinom_reg_brulee.Rd create mode 100644 man/rmd/multinom_reg_brulee.Rmd diff --git a/R/linear_reg_data.R b/R/linear_reg_data.R index 295e0776a..5f95b9bd4 100644 --- a/R/linear_reg_data.R +++ b/R/linear_reg_data.R @@ -530,6 +530,15 @@ set_model_arg( has_submodel = FALSE ) +set_model_arg( + model = "linear_reg", + eng = "brulee", + parsnip = "mixture", + original = "mixture", + func = list(pkg = "dials", fun = "mixture"), + has_submodel = FALSE +) + set_model_arg( model = "linear_reg", eng = "brulee", diff --git a/R/logistic_reg_data.R b/R/logistic_reg_data.R index b30828254..482f085ff 100644 --- a/R/logistic_reg_data.R +++ b/R/logistic_reg_data.R @@ -666,6 +666,16 @@ set_model_arg( has_submodel = FALSE ) +set_model_arg( + model = "logistic_reg", + eng = "brulee", + parsnip = "mixture", + original = "mixture", + func = list(pkg = "dials", fun = "mixture"), + has_submodel = FALSE +) + + set_model_arg( model = "logistic_reg", eng = "brulee", diff --git a/R/mlp_data.R b/R/mlp_data.R index 62c2a0cd4..8731c8535 100644 --- a/R/mlp_data.R +++ b/R/mlp_data.R @@ -390,6 +390,15 @@ set_model_arg( has_submodel = FALSE ) +set_model_arg( + model = "mlp", + eng = "brulee", + parsnip = "mixture", + original = "mixture", + func = list(pkg = "dials", fun = "mixture"), + has_submodel = FALSE +) + set_model_arg( model = "mlp", eng = "brulee", diff --git a/R/multinom_reg_brulee.R b/R/multinom_reg_brulee.R new file mode 100644 index 000000000..d4a60273d --- /dev/null +++ b/R/multinom_reg_brulee.R @@ -0,0 +1,12 @@ +#' Multinomial regression via brulee +#' +#' [brulee::brulee_multinomial_reg()] fits a model that uses linear predictors +#' to predict multiclass data using the multinomial distribution. +#' +#' @includeRmd man/rmd/multinom_reg_brulee.md details +#' +#' @name details_multinom_reg_brulee +#' @keywords internal +NULL + +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/multinom_reg_data.R b/R/multinom_reg_data.R index fab239bc3..fb0bda966 100644 --- a/R/multinom_reg_data.R +++ b/R/multinom_reg_data.R @@ -372,6 +372,15 @@ set_model_arg( has_submodel = FALSE ) +set_model_arg( + model = "multinom_reg", + eng = "brulee", + parsnip = "mixture", + original = "mixture", + func = list(pkg = "dials", fun = "mixture"), + has_submodel = FALSE +) + set_model_arg( model = "multinom_reg", eng = "brulee", diff --git a/man/details_C5_rules_C5.0.Rd b/man/details_C5_rules_C5.0.Rd index 134cae562..a99b764a2 100644 --- a/man/details_C5_rules_C5.0.Rd +++ b/man/details_C5_rules_C5.0.Rd @@ -62,7 +62,7 @@ are not required for this model. \item Quinlan R (1992). “Learning with Continuous Classes.” Proceedings of the 5th Australian Joint Conference On Artificial Intelligence, pp. 343-348. -\item Quinlan R (1993).”Combining Instance-Based and Model-Based +\item Quinlan R (1993).“Combining Instance-Based and Model-Based Learning.” Proceedings of the Tenth International Conference on Machine Learning, pp. 236-243. \item Kuhn M and Johnson K (2013). \emph{Applied Predictive Modeling}. diff --git a/man/details_bag_mars_earth.Rd b/man/details_bag_mars_earth.Rd index 708512286..9d1b63095 100644 --- a/man/details_bag_mars_earth.Rd +++ b/man/details_bag_mars_earth.Rd @@ -81,8 +81,9 @@ bag_mars( Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. } \subsection{References}{ diff --git a/man/details_bart_dbarts.Rd b/man/details_bart_dbarts.Rd index 5fdcccc62..44fff3bc1 100644 --- a/man/details_bart_dbarts.Rd +++ b/man/details_bart_dbarts.Rd @@ -103,8 +103,9 @@ times number of observations. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. \code{\link[dbarts:bart]{dbarts::bart()}} will also convert the factors to indicators if the user does not create them first. diff --git a/man/details_boost_tree_C5.0.Rd b/man/details_boost_tree_C5.0.Rd index 1acce68f4..086c7a91c 100644 --- a/man/details_boost_tree_C5.0.Rd +++ b/man/details_boost_tree_C5.0.Rd @@ -59,8 +59,8 @@ are not required for this model. By default, early stopping is used. To use the complete set of boosting iterations, pass \code{earlyStopping = FALSE} to -\code{\link[=set_engine]{set_engine()}}. Also, it is unlikely that early stopping -will occur if \code{sample_size = 1}. +\code{\link[=set_engine]{set_engine()}}. Also, it is unlikely that early +stopping will occur if \code{sample_size = 1}. } } diff --git a/man/details_cubist_rules_Cubist.Rd b/man/details_cubist_rules_Cubist.Rd index d022f184d..b56984a82 100644 --- a/man/details_cubist_rules_Cubist.Rd +++ b/man/details_cubist_rules_Cubist.Rd @@ -61,7 +61,7 @@ are not required for this model. \item Quinlan R (1992). “Learning with Continuous Classes.” Proceedings of the 5th Australian Joint Conference On Artificial Intelligence, pp. 343-348. -\item Quinlan R (1993).”Combining Instance-Based and Model-Based +\item Quinlan R (1993).“Combining Instance-Based and Model-Based Learning.” Proceedings of the Tenth International Conference on Machine Learning, pp. 236-243. \item Kuhn M and Johnson K (2013). \emph{Applied Predictive Modeling}. diff --git a/man/details_discrim_flexible_earth.Rd b/man/details_discrim_flexible_earth.Rd index 1fbe2806b..3b2fdff44 100644 --- a/man/details_discrim_flexible_earth.Rd +++ b/man/details_discrim_flexible_earth.Rd @@ -56,8 +56,9 @@ discrim_flexible( Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. } \subsection{References}{ diff --git a/man/details_discrim_linear_MASS.Rd b/man/details_discrim_linear_MASS.Rd index 232a9f7f7..428e9d1d3 100644 --- a/man/details_discrim_linear_MASS.Rd +++ b/man/details_discrim_linear_MASS.Rd @@ -37,8 +37,9 @@ discrim_linear() \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Variance calculations are used in these computations so \emph{zero-variance} predictors (i.e., with a single unique value) should be eliminated diff --git a/man/details_discrim_linear_mda.Rd b/man/details_discrim_linear_mda.Rd index bdbff23a2..b26442b43 100644 --- a/man/details_discrim_linear_mda.Rd +++ b/man/details_discrim_linear_mda.Rd @@ -43,8 +43,9 @@ discrim_linear(penalty = numeric(0)) \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Variance calculations are used in these computations so \emph{zero-variance} predictors (i.e., with a single unique value) should be eliminated diff --git a/man/details_discrim_linear_sda.Rd b/man/details_discrim_linear_sda.Rd index 95506f7e0..7fbc118b3 100644 --- a/man/details_discrim_linear_sda.Rd +++ b/man/details_discrim_linear_sda.Rd @@ -53,8 +53,9 @@ discrim_linear() \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Variance calculations are used in these computations so \emph{zero-variance} predictors (i.e., with a single unique value) should be eliminated diff --git a/man/details_discrim_linear_sparsediscrim.Rd b/man/details_discrim_linear_sparsediscrim.Rd index 275dd538f..a5672349e 100644 --- a/man/details_discrim_linear_sparsediscrim.Rd +++ b/man/details_discrim_linear_sparsediscrim.Rd @@ -57,8 +57,9 @@ discrim_linear(regularization_method = character(0)) \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Variance calculations are used in these computations so \emph{zero-variance} predictors (i.e., with a single unique value) should be eliminated diff --git a/man/details_discrim_quad_MASS.Rd b/man/details_discrim_quad_MASS.Rd index ca1e8283d..94d0f9117 100644 --- a/man/details_discrim_quad_MASS.Rd +++ b/man/details_discrim_quad_MASS.Rd @@ -37,8 +37,9 @@ discrim_quad() \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Variance calculations are used in these computations within each outcome class. For this reason, \emph{zero-variance} predictors (i.e., with a single diff --git a/man/details_discrim_quad_sparsediscrim.Rd b/man/details_discrim_quad_sparsediscrim.Rd index fc9bbef07..8dc39be25 100644 --- a/man/details_discrim_quad_sparsediscrim.Rd +++ b/man/details_discrim_quad_sparsediscrim.Rd @@ -55,8 +55,9 @@ discrim_quad(regularization_method = character(0)) \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Variance calculations are used in these computations within each outcome class. For this reason, \emph{zero-variance} predictors (i.e., with a single diff --git a/man/details_discrim_regularized_klaR.Rd b/man/details_discrim_regularized_klaR.Rd index d30120792..0626055ea 100644 --- a/man/details_discrim_regularized_klaR.Rd +++ b/man/details_discrim_regularized_klaR.Rd @@ -57,8 +57,9 @@ discrim_regularized(frac_identity = numeric(0), frac_common_cov = numeric(0)) \% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Variance calculations are used in these computations within each outcome class. For this reason, \emph{zero-variance} predictors (i.e., with a single diff --git a/man/details_gen_additive_mod_mgcv.Rd b/man/details_gen_additive_mod_mgcv.Rd index c432bcd1a..0f71a0f2c 100644 --- a/man/details_gen_additive_mod_mgcv.Rd +++ b/man/details_gen_additive_mod_mgcv.Rd @@ -87,8 +87,9 @@ the \code{adjust_deg_free} parameter. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. } \subsection{References}{ diff --git a/man/details_linear_reg_brulee.Rd b/man/details_linear_reg_brulee.Rd index 4df9c8dfb..966cd776e 100644 --- a/man/details_linear_reg_brulee.Rd +++ b/man/details_linear_reg_brulee.Rd @@ -11,11 +11,17 @@ numeric outcomes. For this engine, there is a single mode: regression \subsection{Tuning Parameters}{ -This model has 1 tuning parameter: +This model has 2 tuning parameter: \itemize{ \item \code{penalty}: Amount of Regularization (type: double, default: 0.001) +\item \code{mixture}: Proportion of Lasso Penalty (type: double, default: 0.0) } +The use of the L1 penalty (a.k.a. the lasso penalty) does \emph{not} force +parameters to be strictly zero (as it does in packages such as glmnet). +The zeroing out of parameters is a specific feature the optimization +method used in those packages. + Other engine arguments of interest: \itemize{ \item \code{optimizer()}: The optimization method. See @@ -53,8 +59,9 @@ no improvement before stopping. (default: 5L). Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_linear_reg_glm.Rd b/man/details_linear_reg_glm.Rd index 89e6a18a2..0c78b37d1 100644 --- a/man/details_linear_reg_glm.Rd +++ b/man/details_linear_reg_glm.Rd @@ -49,8 +49,9 @@ To use a non-default \code{family} and/or \code{link}, pass in as an argument to Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. } \subsection{Examples}{ diff --git a/man/details_linear_reg_glmnet.Rd b/man/details_linear_reg_glmnet.Rd index c1caebb96..f1f499502 100644 --- a/man/details_linear_reg_glmnet.Rd +++ b/man/details_linear_reg_glmnet.Rd @@ -46,8 +46,9 @@ see \link{glmnet-details}. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_linear_reg_keras.Rd b/man/details_linear_reg_keras.Rd index 4f2630662..250fa16c8 100644 --- a/man/details_linear_reg_keras.Rd +++ b/man/details_linear_reg_keras.Rd @@ -43,8 +43,9 @@ single hidden unit. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_linear_reg_lm.Rd b/man/details_linear_reg_lm.Rd index 888386139..43b156925 100644 --- a/man/details_linear_reg_lm.Rd +++ b/man/details_linear_reg_lm.Rd @@ -29,8 +29,9 @@ This engine has no tuning parameters. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. } \subsection{Examples}{ diff --git a/man/details_linear_reg_lme.Rd b/man/details_linear_reg_lme.Rd index 900aef0c5..f55d6501c 100644 --- a/man/details_linear_reg_lme.Rd +++ b/man/details_linear_reg_lme.Rd @@ -38,7 +38,7 @@ This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor (\emph{η}) for a random intercept: -\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}}+\emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} where \emph{i} denotes the \code{i}th independent experimental unit (e.g. subject). When the model has seen subject \code{i}, it can use that diff --git a/man/details_linear_reg_lmer.Rd b/man/details_linear_reg_lmer.Rd index c923e0d5b..ee4b6b376 100644 --- a/man/details_linear_reg_lmer.Rd +++ b/man/details_linear_reg_lmer.Rd @@ -38,7 +38,7 @@ This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor (\emph{η}) for a random intercept: -\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}}+\emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} where \emph{i} denotes the \code{i}th independent experimental unit (e.g. subject). When the model has seen subject \code{i}, it can use that diff --git a/man/details_linear_reg_spark.Rd b/man/details_linear_reg_spark.Rd index 6853f7041..84c6d09e9 100644 --- a/man/details_linear_reg_spark.Rd +++ b/man/details_linear_reg_spark.Rd @@ -45,8 +45,9 @@ A value of \code{mixture = 1} corresponds to a pure lasso model, while Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_linear_reg_stan.Rd b/man/details_linear_reg_stan.Rd index 9e12761ec..c05a5e625 100644 --- a/man/details_linear_reg_stan.Rd +++ b/man/details_linear_reg_stan.Rd @@ -57,8 +57,9 @@ process. Change this value in \code{set_engine()} to show the MCMC logs. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. } \subsection{Other details}{ diff --git a/man/details_linear_reg_stan_glmer.Rd b/man/details_linear_reg_stan_glmer.Rd index 32b8b4536..ac4364117 100644 --- a/man/details_linear_reg_stan_glmer.Rd +++ b/man/details_linear_reg_stan_glmer.Rd @@ -59,7 +59,7 @@ This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor (\emph{η}) for a random intercept: -\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}}+\emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} where \emph{i} denotes the \code{i}th independent experimental unit (e.g. subject). When the model has seen subject \code{i}, it can use that diff --git a/man/details_logistic_reg_LiblineaR.Rd b/man/details_logistic_reg_LiblineaR.Rd index 57c7a64e1..a809583ab 100644 --- a/man/details_logistic_reg_LiblineaR.Rd +++ b/man/details_logistic_reg_LiblineaR.Rd @@ -50,8 +50,9 @@ parameter estimates. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_logistic_reg_brulee.Rd b/man/details_logistic_reg_brulee.Rd index 76680801d..5b9843264 100644 --- a/man/details_logistic_reg_brulee.Rd +++ b/man/details_logistic_reg_brulee.Rd @@ -12,12 +12,16 @@ odds of an event. For this engine, there is a single mode: classification \subsection{Tuning Parameters}{ -This model has 1 tuning parameter: +This model has 2 tuning parameter: \itemize{ \item \code{penalty}: Amount of Regularization (type: double, default: 0.001) +\item \code{mixture}: Proportion of Lasso Penalty (type: double, default: 0.0) } -Both \code{penalty} and \code{dropout} should be not be used in the same model. +The use of the L1 penalty (a.k.a. the lasso penalty) does \emph{not} force +parameters to be strictly zero (as it does in packages such as glmnet). +The zeroing out of parameters is a specific feature the optimization +method used in those packages. Other engine arguments of interest: \itemize{ @@ -52,14 +56,12 @@ no improvement before stopping. (default: 5L). ## brulee::brulee_logistic_reg(x = missing_arg(), y = missing_arg(), ## penalty = double(1)) } -} - -\subsection{Preprocessing requirements}{ Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_logistic_reg_glm.Rd b/man/details_logistic_reg_glm.Rd index 755d9cc60..404448d86 100644 --- a/man/details_logistic_reg_glm.Rd +++ b/man/details_logistic_reg_glm.Rd @@ -49,8 +49,9 @@ To use a non-default \code{family} and/or \code{link}, pass in as an argument to Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. } \subsection{Examples}{ diff --git a/man/details_logistic_reg_glmer.Rd b/man/details_logistic_reg_glmer.Rd index 73ee741e2..474208c88 100644 --- a/man/details_logistic_reg_glmer.Rd +++ b/man/details_logistic_reg_glmer.Rd @@ -37,7 +37,7 @@ This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor (\emph{η}) for a random intercept: -\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}}+\emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} where \emph{i} denotes the \code{i}th independent experimental unit (e.g. subject). When the model has seen subject \code{i}, it can use that diff --git a/man/details_logistic_reg_glmnet.Rd b/man/details_logistic_reg_glmnet.Rd index c4f0e9ab2..bfd7d30be 100644 --- a/man/details_logistic_reg_glmnet.Rd +++ b/man/details_logistic_reg_glmnet.Rd @@ -48,8 +48,9 @@ see \link{glmnet-details}. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_logistic_reg_keras.Rd b/man/details_logistic_reg_keras.Rd index dc4c2dfc2..204fd9852 100644 --- a/man/details_logistic_reg_keras.Rd +++ b/man/details_logistic_reg_keras.Rd @@ -45,8 +45,9 @@ single hidden unit. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_logistic_reg_spark.Rd b/man/details_logistic_reg_spark.Rd index 899ba1370..ae0b7916a 100644 --- a/man/details_logistic_reg_spark.Rd +++ b/man/details_logistic_reg_spark.Rd @@ -47,8 +47,9 @@ A value of \code{mixture = 1} corresponds to a pure lasso model, while Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_logistic_reg_stan.Rd b/man/details_logistic_reg_stan.Rd index a6c87e9a3..efd012b1d 100644 --- a/man/details_logistic_reg_stan.Rd +++ b/man/details_logistic_reg_stan.Rd @@ -58,8 +58,9 @@ process. Change this value in \code{set_engine()} to show the MCMC logs. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. } \subsection{Other details}{ diff --git a/man/details_logistic_reg_stan_glmer.Rd b/man/details_logistic_reg_stan_glmer.Rd index fb0e716cf..2f4ed3ef1 100644 --- a/man/details_logistic_reg_stan_glmer.Rd +++ b/man/details_logistic_reg_stan_glmer.Rd @@ -58,7 +58,7 @@ This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor (\emph{η}) for a random intercept: -\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}}+\emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} where \emph{i} denotes the \code{i}th independent experimental unit (e.g. subject). When the model has seen subject \code{i}, it can use that diff --git a/man/details_mars_earth.Rd b/man/details_mars_earth.Rd index d99c15cd2..78a96e09f 100644 --- a/man/details_mars_earth.Rd +++ b/man/details_mars_earth.Rd @@ -76,8 +76,9 @@ in \code{\link[=discrim_flexible]{discrim_flexible()}}. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. } \subsection{Examples}{ diff --git a/man/details_mlp_brulee.Rd b/man/details_mlp_brulee.Rd index efff61b03..072008c36 100644 --- a/man/details_mlp_brulee.Rd +++ b/man/details_mlp_brulee.Rd @@ -10,16 +10,22 @@ For this engine, there are multiple modes: classification and regression \subsection{Tuning Parameters}{ -This model has 6 tuning parameters: +This model has 7 tuning parameters: \itemize{ \item \code{hidden_units}: # Hidden Units (type: integer, default: 3L) \item \code{penalty}: Amount of Regularization (type: double, default: 0.0) +\item \code{mixture}: Proportion of Lasso Penalty (type: double, default: 0.0) \item \code{epochs}: # Epochs (type: integer, default: 0.01) \item \code{dropout}: Dropout Rate (type: double, default: 0.0) \item \code{learn_rate}: Learning Rate (type: double, default: 100L) \item \code{activation}: Activation Function (type: character, default: ‘relu’) } +The use of the L1 penalty (a.k.a. the lasso penalty) does \emph{not} force +parameters to be strictly zero (as it does in packages such as glmnet). +The zeroing out of parameters is a specific feature the optimization +method used in those packages. + Both \code{penalty} and \code{dropout} should be not be used in the same model. Other engine arguments of interest: @@ -102,8 +108,9 @@ layer. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_mlp_keras.Rd b/man/details_mlp_keras.Rd index 681e723d9..750f4eb57 100644 --- a/man/details_mlp_keras.Rd +++ b/man/details_mlp_keras.Rd @@ -81,8 +81,9 @@ This model has 5 tuning parameters: Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_mlp_nnet.Rd b/man/details_mlp_nnet.Rd index e290d269c..7985ff93f 100644 --- a/man/details_mlp_nnet.Rd +++ b/man/details_mlp_nnet.Rd @@ -78,8 +78,9 @@ layer. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_multinom_reg_brulee.Rd b/man/details_multinom_reg_brulee.Rd new file mode 100644 index 000000000..6280c0c62 --- /dev/null +++ b/man/details_multinom_reg_brulee.Rd @@ -0,0 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/multinom_reg_brulee.R +\name{details_multinom_reg_brulee} +\alias{details_multinom_reg_brulee} +\title{Multinomial regression via brulee} +\description{ +\code{\link[brulee:brulee_multinomial_reg]{brulee::brulee_multinomial_reg()}} fits a model that uses linear predictors +to predict multiclass data using the multinomial distribution. +} +\details{ +For this engine, there is a single mode: classification +\subsection{Tuning Parameters}{ + +This model has 2 tuning parameter: +\itemize{ +\item \code{penalty}: Amount of Regularization (type: double, default: 0.001) +\item \code{mixture}: Proportion of Lasso Penalty (type: double, default: 0.0) +} + +The use of the L1 penalty (a.k.a. the lasso penalty) does \emph{not} force +parameters to be strictly zero (as it does in packages such as glmnet). +The zeroing out of parameters is a specific feature the optimization +method used in those packages. + +Other engine arguments of interest: +\itemize{ +\item \code{optimizer()}: The optimization method. See +\code{\link[brulee:brulee_linear_reg]{brulee::brulee_linear_reg()}}. +\item \code{epochs()}: An integer for the number of passes through the training +set. +\item \code{lean_rate()}: A number used to accelerate the gradient decsent +process. +\item \code{momentum()}: A number used to use historical gradient information +during optimization (\code{optimizer = "SGD"} only). +\item \code{batch_size()}: An integer for the number of training set points in +each batch. +\item \code{stop_iter()}: A non-negative integer for how many iterations with +no improvement before stopping. (default: 5L). +\item \code{class_weights()}: Numeric class weights. See +\code{\link[brulee:brulee_multinom_reg]{brulee::brulee_multinom_reg()}}. +} +} + +\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{
}}\preformatted{multinom_reg(penalty = double(1)) \%>\% + set_engine("brulee") \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Multinomial Regression Model Specification (classification) +## +## Main Arguments: +## penalty = double(1) +## +## Computational engine: brulee +## +## Model fit template: +## brulee::brulee_multinomial_reg(x = missing_arg(), y = missing_arg(), +## penalty = double(1)) +} + +Factor/categorical predictors need to be converted to numeric values +(e.g., dummy or indicator variables) for this engine. When using the +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. + +Predictors should have the same scale. One way to achieve this is to +center and scale each so that each predictor has mean zero and a +variance of one. +} + +\subsection{References}{ +\itemize{ +\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}. +Springer. +} +} +} +\keyword{internal} diff --git a/man/details_multinom_reg_glmnet.Rd b/man/details_multinom_reg_glmnet.Rd index e58d12343..f35772f3c 100644 --- a/man/details_multinom_reg_glmnet.Rd +++ b/man/details_multinom_reg_glmnet.Rd @@ -47,8 +47,9 @@ see \link{glmnet-details}. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_multinom_reg_keras.Rd b/man/details_multinom_reg_keras.Rd index 705adaba9..97b3a0980 100644 --- a/man/details_multinom_reg_keras.Rd +++ b/man/details_multinom_reg_keras.Rd @@ -44,8 +44,9 @@ single hidden unit. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_multinom_reg_nnet.Rd b/man/details_multinom_reg_nnet.Rd index 868256f25..721a9a747 100644 --- a/man/details_multinom_reg_nnet.Rd +++ b/man/details_multinom_reg_nnet.Rd @@ -40,8 +40,9 @@ For \code{penalty}, the amount of regularization includes only the L2 penalty Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_multinom_reg_spark.Rd b/man/details_multinom_reg_spark.Rd index 798462aa8..61ebcc5ef 100644 --- a/man/details_multinom_reg_spark.Rd +++ b/man/details_multinom_reg_spark.Rd @@ -46,8 +46,9 @@ A value of \code{mixture = 1} corresponds to a pure lasso model, while Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_nearest_neighbor_kknn.Rd b/man/details_nearest_neighbor_kknn.Rd index 0323a1b47..b44049de3 100644 --- a/man/details_nearest_neighbor_kknn.Rd +++ b/man/details_nearest_neighbor_kknn.Rd @@ -73,8 +73,9 @@ it is not consistent with the actual data dimensions. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_pls_mixOmics.Rd b/man/details_pls_mixOmics.Rd index 878971706..faf4bfdb5 100644 --- a/man/details_pls_mixOmics.Rd +++ b/man/details_pls_mixOmics.Rd @@ -74,17 +74,19 @@ pls(num_comp = integer(1), predictor_prop = double(1)) \%>\% ## ncomp = integer(1)) } -In this case, \code{\link[plsmod:pls_fit]{plsmod::pls_fit()}} has the same role -as above but eventually targets \code{\link[mixOmics:plsda]{mixOmics::plsda()}} -or \code{\link[mixOmics:splsda]{mixOmics::splsda()}} . +In this case, \code{\link[plsmod:pls_fit]{plsmod::pls_fit()}} has the same +role as above but eventually targets +\code{\link[mixOmics:plsda]{mixOmics::plsda()}} or +\code{\link[mixOmics:splsda]{mixOmics::splsda()}} . } \subsection{Preprocessing requirements}{ Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Variance calculations are used in these computations so \emph{zero-variance} predictors (i.e., with a single unique value) should be eliminated diff --git a/man/details_poisson_reg_glm.Rd b/man/details_poisson_reg_glm.Rd index e28cc33b4..c481c62df 100644 --- a/man/details_poisson_reg_glm.Rd +++ b/man/details_poisson_reg_glm.Rd @@ -35,8 +35,9 @@ poisson_reg() \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. } } \keyword{internal} diff --git a/man/details_poisson_reg_glmer.Rd b/man/details_poisson_reg_glmer.Rd index 17b1c84ee..0878a08bb 100644 --- a/man/details_poisson_reg_glmer.Rd +++ b/man/details_poisson_reg_glmer.Rd @@ -37,7 +37,7 @@ This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor (\emph{η}) for a random intercept: -\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}}+\emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} where \emph{i} denotes the \code{i}th independent experimental unit (e.g. subject). When the model has seen subject \code{i}, it can use that diff --git a/man/details_poisson_reg_glmnet.Rd b/man/details_poisson_reg_glmnet.Rd index 5581c5be9..12d270dee 100644 --- a/man/details_poisson_reg_glmnet.Rd +++ b/man/details_poisson_reg_glmnet.Rd @@ -52,8 +52,9 @@ poisson_reg(penalty = double(1), mixture = double(1)) \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_poisson_reg_hurdle.Rd b/man/details_poisson_reg_hurdle.Rd index f4503d7fb..22841351f 100644 --- a/man/details_poisson_reg_hurdle.Rd +++ b/man/details_poisson_reg_hurdle.Rd @@ -36,8 +36,9 @@ poisson_reg() \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. For this particular model, a special formula is used to specify which columns affect the counts and which affect the model for the probability diff --git a/man/details_poisson_reg_stan.Rd b/man/details_poisson_reg_stan.Rd index f24c77f56..ac5cd8767 100644 --- a/man/details_poisson_reg_stan.Rd +++ b/man/details_poisson_reg_stan.Rd @@ -62,8 +62,9 @@ process. Change this value in \code{set_engine()} to show the MCMC logs. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. } \subsection{Other details}{ diff --git a/man/details_poisson_reg_stan_glmer.Rd b/man/details_poisson_reg_stan_glmer.Rd index e77757818..44802f0d6 100644 --- a/man/details_poisson_reg_stan_glmer.Rd +++ b/man/details_poisson_reg_stan_glmer.Rd @@ -58,7 +58,7 @@ This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor (\emph{η}) for a random intercept: -\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}}+\emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} where \emph{i} denotes the \code{i}th independent experimental unit (e.g. subject). When the model has seen subject \code{i}, it can use that diff --git a/man/details_poisson_reg_zeroinfl.Rd b/man/details_poisson_reg_zeroinfl.Rd index 298ca88d2..db965e59b 100644 --- a/man/details_poisson_reg_zeroinfl.Rd +++ b/man/details_poisson_reg_zeroinfl.Rd @@ -37,8 +37,9 @@ poisson_reg() \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. For this particular model, a special formula is used to specify which columns affect the counts and which affect the model for the probability diff --git a/man/details_proportional_hazards_glmnet.Rd b/man/details_proportional_hazards_glmnet.Rd index 6ce3417ab..cdb544bed 100644 --- a/man/details_proportional_hazards_glmnet.Rd +++ b/man/details_proportional_hazards_glmnet.Rd @@ -51,8 +51,9 @@ proportional_hazards(penalty = double(1), mixture = double(1)) \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_rule_fit_xrf.Rd b/man/details_rule_fit_xrf.Rd index 86e2ada73..d2b0d6f31 100644 --- a/man/details_rule_fit_xrf.Rd +++ b/man/details_rule_fit_xrf.Rd @@ -133,8 +133,9 @@ whereas \strong{xrf} uses an internal 5-fold cross-validation to determine it Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. } \subsection{References}{ diff --git a/man/details_svm_linear_LiblineaR.Rd b/man/details_svm_linear_LiblineaR.Rd index e2cd2ab83..12123383f 100644 --- a/man/details_svm_linear_LiblineaR.Rd +++ b/man/details_svm_linear_LiblineaR.Rd @@ -76,8 +76,9 @@ class predictions (e.g., accuracy). Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_svm_linear_kernlab.Rd b/man/details_svm_linear_kernlab.Rd index dd6726a5e..0ff7eeb03 100644 --- a/man/details_svm_linear_kernlab.Rd +++ b/man/details_svm_linear_kernlab.Rd @@ -73,8 +73,9 @@ by R’s random number stream. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_svm_poly_kernlab.Rd b/man/details_svm_poly_kernlab.Rd index ed1d80f78..2ddd4605a 100644 --- a/man/details_svm_poly_kernlab.Rd +++ b/man/details_svm_poly_kernlab.Rd @@ -85,8 +85,9 @@ by R’s random number stream. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_svm_rbf_kernlab.Rd b/man/details_svm_rbf_kernlab.Rd index 05f4f8e30..1126a78c5 100644 --- a/man/details_svm_rbf_kernlab.Rd +++ b/man/details_svm_rbf_kernlab.Rd @@ -85,8 +85,9 @@ by R’s random number stream. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, -parsnip will convert factor columns to indicators. +formula method via +\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will +convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/rmd/linear_reg_brulee.Rmd b/man/rmd/linear_reg_brulee.Rmd index 9cbba1e40..94e4ea598 100644 --- a/man/rmd/linear_reg_brulee.Rmd +++ b/man/rmd/linear_reg_brulee.Rmd @@ -7,8 +7,8 @@ ```{r brulee-param-info, echo = FALSE} defaults <- - tibble::tibble(parsnip = c("penalty"), - default = c( "0.001")) + tibble::tibble(parsnip = c("penalty", "mixture"), + default = c( "0.001", "0.0")) param <- linear_reg() %>% @@ -22,6 +22,8 @@ This model has `r nrow(param)` tuning parameter: param$item ``` +The use of the L1 penalty (a.k.a. the lasso penalty) does _not_ force parameters to be strictly zero (as it does in packages such as glmnet). The zeroing out of parameters is a specific feature the optimization method used in those packages. + Other engine arguments of interest: - `optimizer()`: The optimization method. See [brulee::brulee_linear_reg()]. diff --git a/man/rmd/logistic_reg_brulee.Rmd b/man/rmd/logistic_reg_brulee.Rmd index 9fc36259a..75cf83785 100644 --- a/man/rmd/logistic_reg_brulee.Rmd +++ b/man/rmd/logistic_reg_brulee.Rmd @@ -7,8 +7,8 @@ ```{r brulee-param-info, echo = FALSE} defaults <- - tibble::tibble(parsnip = c("penalty"), - default = c( "0.001")) + tibble::tibble(parsnip = c("penalty", "mixture"), + default = c( "0.001", "0.0")) param <- logistic_reg() %>% @@ -22,7 +22,7 @@ This model has `r nrow(param)` tuning parameter: param$item ``` -Both `penalty` and `dropout` should be not be used in the same model. +The use of the L1 penalty (a.k.a. the lasso penalty) does _not_ force parameters to be strictly zero (as it does in packages such as glmnet). The zeroing out of parameters is a specific feature the optimization method used in those packages. Other engine arguments of interest: @@ -44,8 +44,6 @@ logistic_reg(penalty = double(1)) %>% ``` -## Preprocessing requirements - ```{r child = "template-makes-dummies.Rmd"} ``` diff --git a/man/rmd/mlp_brulee.Rmd b/man/rmd/mlp_brulee.Rmd index 05a486ed2..8f0dddc47 100644 --- a/man/rmd/mlp_brulee.Rmd +++ b/man/rmd/mlp_brulee.Rmd @@ -7,8 +7,8 @@ ```{r brulee-param-info, echo = FALSE} defaults <- - tibble::tibble(parsnip = c("hidden_units", "penalty", "dropout", "epochs", "learn_rate", "activation"), - default = c("3L", "0.0", "0.0", "0.01", "100L", "'relu'")) + tibble::tibble(parsnip = c("hidden_units", "penalty", "dropout", "epochs", "learn_rate", "activation", "mixture"), + default = c("3L", "0.0", "0.0", "0.01", "100L", "'relu'", "0.0")) param <- mlp() %>% @@ -22,6 +22,8 @@ This model has `r nrow(param)` tuning parameters: param$item ``` +The use of the L1 penalty (a.k.a. the lasso penalty) does _not_ force parameters to be strictly zero (as it does in packages such as glmnet). The zeroing out of parameters is a specific feature the optimization method used in those packages. + Both `penalty` and `dropout` should be not be used in the same model. Other engine arguments of interest: diff --git a/man/rmd/multinom_reg_brulee.Rmd b/man/rmd/multinom_reg_brulee.Rmd new file mode 100644 index 000000000..f6856b428 --- /dev/null +++ b/man/rmd/multinom_reg_brulee.Rmd @@ -0,0 +1,55 @@ +```{r, child = "aaa.Rmd", include = FALSE} +``` + +`r descr_models("multinom_reg", "brulee")` + +## Tuning Parameters + +```{r brulee-param-info, echo = FALSE} +defaults <- + tibble::tibble(parsnip = c("penalty", "mixture"), + default = c( "0.001", "0.0")) + +param <- + multinom_reg() %>% + set_engine("brulee") %>% + make_parameter_list(defaults) +``` + +This model has `r nrow(param)` tuning parameter: + +```{r brulee-param-list, echo = FALSE, results = "asis"} +param$item +``` + +The use of the L1 penalty (a.k.a. the lasso penalty) does _not_ force parameters to be strictly zero (as it does in packages such as glmnet). The zeroing out of parameters is a specific feature the optimization method used in those packages. + +Other engine arguments of interest: + + - `optimizer()`: The optimization method. See [brulee::brulee_linear_reg()]. + - `epochs()`: An integer for the number of passes through the training set. + - `lean_rate()`: A number used to accelerate the gradient decsent process. + - `momentum()`: A number used to use historical gradient information during optimization (`optimizer = "SGD"` only). + - `batch_size()`: An integer for the number of training set points in each batch. + - `stop_iter()`: A non-negative integer for how many iterations with no improvement before stopping. (default: 5L). + - `class_weights()`: Numeric class weights. See [brulee::brulee_multinom_reg()]. + + +## Translation from parsnip to the original package (classification) + +```{r brulee-cls} +multinom_reg(penalty = double(1)) %>% + set_engine("brulee") %>% + translate() +``` + + +```{r child = "template-makes-dummies.Rmd"} +``` + +```{r child = "template-same-scale.Rmd"} +``` + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. From 10a48a2bdb2832fd656d1410bd62496875d4b8b9 Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Wed, 2 Feb 2022 15:35:07 -0700 Subject: [PATCH 55/65] Rework explanation of what is/isn't in our tab-delimited file --- R/engine_docs.R | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/R/engine_docs.R b/R/engine_docs.R index 087b9d85f..4174b1d9f 100644 --- a/R/engine_docs.R +++ b/R/engine_docs.R @@ -59,16 +59,18 @@ extensions <- function() { #' This function writes a tab delimited file to the package to capture #' information about the known models. This information includes packages in #' the tidymodels GitHub repository as well as packages that are known to work -#' well with tidymodels packages (e.g. \pkg{tune}, etc.). There are likely -#' other model definitions in other extension packages that are not included -#' here that do not follow the -#' [model implementation guidelines](https://tidymodels.github.io/model-implementation-principles) -#' or do not work with tidymodels packages other than \pkg{parsnip}. +#' well with tidymodels packages (e.g. not only \pkg{parsnip} but also +#' \pkg{tune}, etc.). There may be more model definitions in other extension +#' packages that are not included here. #' #' These data are used to document engines for each model function man page. #' @keywords internal #' @param path A character string for the location of the tab delimited file. #' @details +#' See our +#' [model implementation guidelines](https://tidymodels.github.io/model-implementation-principles) +#' on best practices for modeling and modeling packages. +#' #' It is highly recommended that the known parsnip extension packages are loaded. #' The unexported \pkg{parsnip} function `extensions()` will list these. #' @export From 487acdb04712630c97623c03bda000b7fa7200fb Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Wed, 2 Feb 2022 15:40:51 -0700 Subject: [PATCH 56/65] Update R/rand_forest.R --- R/rand_forest.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/rand_forest.R b/R/rand_forest.R index fcbf80ed0..eb8b41ff3 100644 --- a/R/rand_forest.R +++ b/R/rand_forest.R @@ -4,7 +4,7 @@ #' #' `rand_forest()` defines a model that creates a large number of decision #' trees, each independent of the others. The final prediction uses all -#' predictions from the individual trees and combines them. The function can fit +#' predictions from the individual trees and combines them. This function can fit #' classification, regression, and censored regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("rand_forest")} From 6a1a8d4aef0133a24ea47b4288e39ebc534f7246 Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Wed, 2 Feb 2022 15:41:57 -0700 Subject: [PATCH 57/65] Update R/rule_fit.R --- R/rule_fit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/rule_fit.R b/R/rule_fit.R index a90218e30..75bc80ada 100644 --- a/R/rule_fit.R +++ b/R/rule_fit.R @@ -2,7 +2,7 @@ #' #' @description #' `rule_fit()` defines a model that derives simple feature rules from a tree -#' ensemble and uses them as features in a regularized model. The function can +#' ensemble and uses them as features in a regularized model. This function can #' fit classification and regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("rule_fit")} From 6fad7d66ae9ef926d3f06c4987ff29a53f622fa0 Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Wed, 2 Feb 2022 15:43:43 -0700 Subject: [PATCH 58/65] Update R/svm_linear.R --- R/svm_linear.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/svm_linear.R b/R/svm_linear.R index 6e3efea9f..590e7e6d2 100644 --- a/R/svm_linear.R +++ b/R/svm_linear.R @@ -6,7 +6,7 @@ #' the model tries to maximize the width of the margin between classes (using a #' linear class boundary). For regression, the model optimizes a robust loss #' function that is only affected by very large model residuals and uses a -#' linear fit. The function can fit classification and regression models. +#' linear fit. This function can fit classification and regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("svm_linear")} #' From f7124bb4da3107a2c02d61526b8699f23ed6b046 Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Wed, 2 Feb 2022 15:44:19 -0700 Subject: [PATCH 59/65] Update R/survival_reg.R --- R/survival_reg.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/survival_reg.R b/R/survival_reg.R index 35772fa8c..ce8689db3 100644 --- a/R/survival_reg.R +++ b/R/survival_reg.R @@ -1,7 +1,7 @@ #' Parametric survival regression #' #' @description -#' `survival_reg()` defines a parametric survival model. The function can fit +#' `survival_reg()` defines a parametric survival model. This function can fit #' censored regression models. #' #' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("survival_reg")} From 3266b82d47b84be8857c7c6a235e7814ed69f5da Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Wed, 2 Feb 2022 15:54:44 -0700 Subject: [PATCH 60/65] Remove details on glmnet not using the formula interface --- man/rmd/proportional_hazards_glmnet.Rmd | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/man/rmd/proportional_hazards_glmnet.Rmd b/man/rmd/proportional_hazards_glmnet.Rmd index 2729003bb..8758018c5 100644 --- a/man/rmd/proportional_hazards_glmnet.Rmd +++ b/man/rmd/proportional_hazards_glmnet.Rmd @@ -52,9 +52,7 @@ By default, [glmnet::glmnet()] uses the argument `standardize = TRUE` to center The model does not fit an intercept. -[glmnet::glmnet()] does not use the formula interface but, for consistency, this package requires a model formula. - -The model formula can include _special_ terms, such as [survival::strata()]. This allows the baseline hazard to differ between groups contained in the function. The column used inside `strata()` is treated as qualitative no matter its type. This is different than the syntax offered by the [glmnet::glmnet()] package (i.e., [glmnet::stratifySurv()]) which is not recommended here. +The model formula (which is required) can include _special_ terms, such as [survival::strata()]. This allows the baseline hazard to differ between groups contained in the function. The column used inside `strata()` is treated as qualitative no matter its type. This is different than the syntax offered by the [glmnet::glmnet()] package (i.e., [glmnet::stratifySurv()]) which is not recommended here. For example, in this model, the numeric column `rx` is used to estimate two different baseline hazards for each value of the column: From 45aa0d1ad9a649996385050c9be017ca849cce60 Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Wed, 2 Feb 2022 16:23:35 -0700 Subject: [PATCH 61/65] Redocument --- man/details_proportional_hazards_glmnet.Rd | 7 ++----- man/update_model_info_file.Rd | 12 +++++++----- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/man/details_proportional_hazards_glmnet.Rd b/man/details_proportional_hazards_glmnet.Rd index cdb544bed..8ca345b89 100644 --- a/man/details_proportional_hazards_glmnet.Rd +++ b/man/details_proportional_hazards_glmnet.Rd @@ -65,11 +65,8 @@ the argument \code{standardize = TRUE} to center and scale the data. The model does not fit an intercept. -\code{\link[glmnet:glmnet]{glmnet::glmnet()}} does not use the formula -interface but, for consistency, this package requires a model formula. - -The model formula can include \emph{special} terms, such as -\code{\link[survival:strata]{survival::strata()}}. This allows the baseline +The model formula (which is required) can include \emph{special} terms, such +as \code{\link[survival:strata]{survival::strata()}}. This allows the baseline hazard to differ between groups contained in the function. The column used inside \code{strata()} is treated as qualitative no matter its type. This is different than the syntax offered by the diff --git a/man/update_model_info_file.Rd b/man/update_model_info_file.Rd index 97d58c900..e95c3fd53 100644 --- a/man/update_model_info_file.Rd +++ b/man/update_model_info_file.Rd @@ -13,15 +13,17 @@ update_model_info_file(path = "inst/models.tsv") This function writes a tab delimited file to the package to capture information about the known models. This information includes packages in the tidymodels GitHub repository as well as packages that are known to work -well with tidymodels packages (e.g. \pkg{tune}, etc.). There are likely -other model definitions in other extension packages that are not included -here that do not follow the -\href{https://tidymodels.github.io/model-implementation-principles}{model implementation guidelines} -or do not work with tidymodels packages other than \pkg{parsnip}. +well with tidymodels packages (e.g. not only \pkg{parsnip} but also +\pkg{tune}, etc.). There may be more model definitions in other extension +packages that are not included here. These data are used to document engines for each model function man page. } \details{ +See our +\href{https://tidymodels.github.io/model-implementation-principles}{model implementation guidelines} +on best practices for modeling and modeling packages. + It is highly recommended that the known parsnip extension packages are loaded. The unexported \pkg{parsnip} function \code{extensions()} will list these. } From f3392d04f518ab012b416a24da38233b34850cfe Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 7 Feb 2022 20:52:48 -0500 Subject: [PATCH 62/65] doc update --- man/details_C5_rules_C5.0.Rd | 2 +- man/details_bag_mars_earth.Rd | 5 +-- man/details_bart_dbarts.Rd | 5 +-- man/details_boost_tree_C5.0.Rd | 4 +- man/details_cubist_rules_Cubist.Rd | 5 ++- man/details_discrim_flexible_earth.Rd | 5 +-- man/details_discrim_linear_MASS.Rd | 5 +-- man/details_discrim_linear_mda.Rd | 5 +-- man/details_discrim_linear_sda.Rd | 5 +-- man/details_discrim_linear_sparsediscrim.Rd | 5 +-- man/details_discrim_quad_MASS.Rd | 5 +-- man/details_discrim_quad_sparsediscrim.Rd | 5 +-- man/details_discrim_regularized_klaR.Rd | 5 +-- man/details_gen_additive_mod_mgcv.Rd | 5 +-- man/details_linear_reg_brulee.Rd | 5 +-- man/details_linear_reg_gee.Rd | 6 +-- man/details_linear_reg_glm.Rd | 5 +-- man/details_linear_reg_glmnet.Rd | 5 +-- man/details_linear_reg_gls.Rd | 4 +- man/details_linear_reg_keras.Rd | 5 +-- man/details_linear_reg_lm.Rd | 5 +-- man/details_linear_reg_lme.Rd | 2 +- man/details_linear_reg_lmer.Rd | 2 +- man/details_linear_reg_spark.Rd | 5 +-- man/details_linear_reg_stan.Rd | 5 +-- man/details_linear_reg_stan_glmer.Rd | 2 +- man/details_logistic_reg_LiblineaR.Rd | 5 +-- man/details_logistic_reg_brulee.Rd | 5 +-- man/details_logistic_reg_gee.Rd | 6 +-- man/details_logistic_reg_glm.Rd | 5 +-- man/details_logistic_reg_glmer.Rd | 2 +- man/details_logistic_reg_glmnet.Rd | 5 +-- man/details_logistic_reg_keras.Rd | 5 +-- man/details_logistic_reg_spark.Rd | 5 +-- man/details_logistic_reg_stan.Rd | 5 +-- man/details_logistic_reg_stan_glmer.Rd | 2 +- man/details_mars_earth.Rd | 5 +-- man/details_mlp_brulee.Rd | 5 +-- man/details_mlp_keras.Rd | 5 +-- man/details_mlp_nnet.Rd | 5 +-- man/details_multinom_reg_brulee.Rd | 5 +-- man/details_multinom_reg_glmnet.Rd | 5 +-- man/details_multinom_reg_keras.Rd | 5 +-- man/details_multinom_reg_nnet.Rd | 5 +-- man/details_multinom_reg_spark.Rd | 5 +-- man/details_nearest_neighbor_kknn.Rd | 5 +-- man/details_pls_mixOmics.Rd | 12 +++--- man/details_poisson_reg_glm.Rd | 5 +-- man/details_poisson_reg_glmer.Rd | 2 +- man/details_poisson_reg_glmnet.Rd | 5 +-- man/details_poisson_reg_hurdle.Rd | 5 +-- man/details_poisson_reg_stan.Rd | 5 +-- man/details_poisson_reg_stan_glmer.Rd | 2 +- man/details_poisson_reg_zeroinfl.Rd | 5 +-- man/details_proportional_hazards_glmnet.Rd | 5 +-- man/details_rule_fit_xrf.Rd | 5 +-- man/details_svm_linear_LiblineaR.Rd | 5 +-- man/details_svm_linear_kernlab.Rd | 5 +-- man/details_svm_poly_kernlab.Rd | 5 +-- man/details_svm_rbf_kernlab.Rd | 5 +-- man/rand_forest.Rd | 2 +- man/rmd/example_mlm.Rmd | 41 --------------------- man/rmd/linear_reg_gee.Rmd | 2 +- man/rmd/linear_reg_gls.Rmd | 4 +- man/rmd/logistic_reg_gee.Rmd | 2 +- man/rule_fit.Rd | 2 +- man/survival_reg.Rd | 2 +- man/svm_linear.Rd | 2 +- 68 files changed, 128 insertions(+), 212 deletions(-) delete mode 100644 man/rmd/example_mlm.Rmd diff --git a/man/details_C5_rules_C5.0.Rd b/man/details_C5_rules_C5.0.Rd index a99b764a2..134cae562 100644 --- a/man/details_C5_rules_C5.0.Rd +++ b/man/details_C5_rules_C5.0.Rd @@ -62,7 +62,7 @@ are not required for this model. \item Quinlan R (1992). “Learning with Continuous Classes.” Proceedings of the 5th Australian Joint Conference On Artificial Intelligence, pp. 343-348. -\item Quinlan R (1993).“Combining Instance-Based and Model-Based +\item Quinlan R (1993).”Combining Instance-Based and Model-Based Learning.” Proceedings of the Tenth International Conference on Machine Learning, pp. 236-243. \item Kuhn M and Johnson K (2013). \emph{Applied Predictive Modeling}. diff --git a/man/details_bag_mars_earth.Rd b/man/details_bag_mars_earth.Rd index 9d1b63095..708512286 100644 --- a/man/details_bag_mars_earth.Rd +++ b/man/details_bag_mars_earth.Rd @@ -81,9 +81,8 @@ bag_mars( Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } \subsection{References}{ diff --git a/man/details_bart_dbarts.Rd b/man/details_bart_dbarts.Rd index 44fff3bc1..5fdcccc62 100644 --- a/man/details_bart_dbarts.Rd +++ b/man/details_bart_dbarts.Rd @@ -103,9 +103,8 @@ times number of observations. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. \code{\link[dbarts:bart]{dbarts::bart()}} will also convert the factors to indicators if the user does not create them first. diff --git a/man/details_boost_tree_C5.0.Rd b/man/details_boost_tree_C5.0.Rd index 086c7a91c..1acce68f4 100644 --- a/man/details_boost_tree_C5.0.Rd +++ b/man/details_boost_tree_C5.0.Rd @@ -59,8 +59,8 @@ are not required for this model. By default, early stopping is used. To use the complete set of boosting iterations, pass \code{earlyStopping = FALSE} to -\code{\link[=set_engine]{set_engine()}}. Also, it is unlikely that early -stopping will occur if \code{sample_size = 1}. +\code{\link[=set_engine]{set_engine()}}. Also, it is unlikely that early stopping +will occur if \code{sample_size = 1}. } } diff --git a/man/details_cubist_rules_Cubist.Rd b/man/details_cubist_rules_Cubist.Rd index b56984a82..ef48fe06a 100644 --- a/man/details_cubist_rules_Cubist.Rd +++ b/man/details_cubist_rules_Cubist.Rd @@ -44,7 +44,8 @@ cubist_rules( ## ## Model fit template: ## rules::cubist_fit(x = missing_arg(), y = missing_arg(), weights = missing_arg(), -## committees = integer(1), neighbors = integer(1), max_rules = integer(1)) +## committees = integer(1), neighbors = integer(1), max_rules = integer(1), +## composite = TRUE) } } @@ -61,7 +62,7 @@ are not required for this model. \item Quinlan R (1992). “Learning with Continuous Classes.” Proceedings of the 5th Australian Joint Conference On Artificial Intelligence, pp. 343-348. -\item Quinlan R (1993).“Combining Instance-Based and Model-Based +\item Quinlan R (1993).”Combining Instance-Based and Model-Based Learning.” Proceedings of the Tenth International Conference on Machine Learning, pp. 236-243. \item Kuhn M and Johnson K (2013). \emph{Applied Predictive Modeling}. diff --git a/man/details_discrim_flexible_earth.Rd b/man/details_discrim_flexible_earth.Rd index 3b2fdff44..1fbe2806b 100644 --- a/man/details_discrim_flexible_earth.Rd +++ b/man/details_discrim_flexible_earth.Rd @@ -56,9 +56,8 @@ discrim_flexible( Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } \subsection{References}{ diff --git a/man/details_discrim_linear_MASS.Rd b/man/details_discrim_linear_MASS.Rd index 428e9d1d3..232a9f7f7 100644 --- a/man/details_discrim_linear_MASS.Rd +++ b/man/details_discrim_linear_MASS.Rd @@ -37,9 +37,8 @@ discrim_linear() \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Variance calculations are used in these computations so \emph{zero-variance} predictors (i.e., with a single unique value) should be eliminated diff --git a/man/details_discrim_linear_mda.Rd b/man/details_discrim_linear_mda.Rd index b26442b43..bdbff23a2 100644 --- a/man/details_discrim_linear_mda.Rd +++ b/man/details_discrim_linear_mda.Rd @@ -43,9 +43,8 @@ discrim_linear(penalty = numeric(0)) \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Variance calculations are used in these computations so \emph{zero-variance} predictors (i.e., with a single unique value) should be eliminated diff --git a/man/details_discrim_linear_sda.Rd b/man/details_discrim_linear_sda.Rd index 7fbc118b3..95506f7e0 100644 --- a/man/details_discrim_linear_sda.Rd +++ b/man/details_discrim_linear_sda.Rd @@ -53,9 +53,8 @@ discrim_linear() \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Variance calculations are used in these computations so \emph{zero-variance} predictors (i.e., with a single unique value) should be eliminated diff --git a/man/details_discrim_linear_sparsediscrim.Rd b/man/details_discrim_linear_sparsediscrim.Rd index a5672349e..275dd538f 100644 --- a/man/details_discrim_linear_sparsediscrim.Rd +++ b/man/details_discrim_linear_sparsediscrim.Rd @@ -57,9 +57,8 @@ discrim_linear(regularization_method = character(0)) \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Variance calculations are used in these computations so \emph{zero-variance} predictors (i.e., with a single unique value) should be eliminated diff --git a/man/details_discrim_quad_MASS.Rd b/man/details_discrim_quad_MASS.Rd index 94d0f9117..ca1e8283d 100644 --- a/man/details_discrim_quad_MASS.Rd +++ b/man/details_discrim_quad_MASS.Rd @@ -37,9 +37,8 @@ discrim_quad() \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Variance calculations are used in these computations within each outcome class. For this reason, \emph{zero-variance} predictors (i.e., with a single diff --git a/man/details_discrim_quad_sparsediscrim.Rd b/man/details_discrim_quad_sparsediscrim.Rd index 8dc39be25..fc9bbef07 100644 --- a/man/details_discrim_quad_sparsediscrim.Rd +++ b/man/details_discrim_quad_sparsediscrim.Rd @@ -55,9 +55,8 @@ discrim_quad(regularization_method = character(0)) \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Variance calculations are used in these computations within each outcome class. For this reason, \emph{zero-variance} predictors (i.e., with a single diff --git a/man/details_discrim_regularized_klaR.Rd b/man/details_discrim_regularized_klaR.Rd index 0626055ea..d30120792 100644 --- a/man/details_discrim_regularized_klaR.Rd +++ b/man/details_discrim_regularized_klaR.Rd @@ -57,9 +57,8 @@ discrim_regularized(frac_identity = numeric(0), frac_common_cov = numeric(0)) \% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Variance calculations are used in these computations within each outcome class. For this reason, \emph{zero-variance} predictors (i.e., with a single diff --git a/man/details_gen_additive_mod_mgcv.Rd b/man/details_gen_additive_mod_mgcv.Rd index 0f71a0f2c..c432bcd1a 100644 --- a/man/details_gen_additive_mod_mgcv.Rd +++ b/man/details_gen_additive_mod_mgcv.Rd @@ -87,9 +87,8 @@ the \code{adjust_deg_free} parameter. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } \subsection{References}{ diff --git a/man/details_linear_reg_brulee.Rd b/man/details_linear_reg_brulee.Rd index 966cd776e..63715217d 100644 --- a/man/details_linear_reg_brulee.Rd +++ b/man/details_linear_reg_brulee.Rd @@ -59,9 +59,8 @@ no improvement before stopping. (default: 5L). Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_linear_reg_gee.Rd b/man/details_linear_reg_gee.Rd index f74fbfd9f..717dc5cac 100644 --- a/man/details_linear_reg_gee.Rd +++ b/man/details_linear_reg_gee.Rd @@ -82,9 +82,9 @@ gee_wflow <- fit(gee_wflow, data = warpbreaks) }\if{html}{\out{
}} -\code{gee()} always prints out warnings and output even when \code{silent = TRUE}. -When using the \code{"gee"} engine, it will never produce output, even if -\code{silent = FALSE}. +The \code{gee::gee()} function always prints out warnings and output even +when \code{silent = TRUE}. The parsnip “gee” engine, by contrast, silences +all console output coming from \code{gee::gee()}, even if \code{silent = FALSE}. Also, because of issues with the \code{gee()} function, a supplementary call to \code{glm()} is needed to get the rank and QR decomposition objects so diff --git a/man/details_linear_reg_glm.Rd b/man/details_linear_reg_glm.Rd index 0c78b37d1..89e6a18a2 100644 --- a/man/details_linear_reg_glm.Rd +++ b/man/details_linear_reg_glm.Rd @@ -49,9 +49,8 @@ To use a non-default \code{family} and/or \code{link}, pass in as an argument to Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } \subsection{Examples}{ diff --git a/man/details_linear_reg_glmnet.Rd b/man/details_linear_reg_glmnet.Rd index f1f499502..c1caebb96 100644 --- a/man/details_linear_reg_glmnet.Rd +++ b/man/details_linear_reg_glmnet.Rd @@ -46,9 +46,8 @@ see \link{glmnet-details}. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_linear_reg_gls.Rd b/man/details_linear_reg_gls.Rd index c80a01770..ac4c97c6c 100644 --- a/man/details_linear_reg_gls.Rd +++ b/man/details_linear_reg_gls.Rd @@ -143,7 +143,9 @@ as are the fixed effects (and their standard errors):\if{html}{\out{
}}\preformatted{library(broom.mixed) # lme: -lme_fit \%>\% tidy() \%>\% filter(group == "fixed") \%>\% select(-group, -effect) +lme_fit \%>\% tidy() \%>\% + dplyr::filter(group == "fixed") \%>\% + dplyr::select(-group, -effect) }\if{html}{\out{
}}\preformatted{## # A tibble: 2 × 6 ## term estimate std.error df statistic p.value ## diff --git a/man/details_linear_reg_keras.Rd b/man/details_linear_reg_keras.Rd index 250fa16c8..4f2630662 100644 --- a/man/details_linear_reg_keras.Rd +++ b/man/details_linear_reg_keras.Rd @@ -43,9 +43,8 @@ single hidden unit. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_linear_reg_lm.Rd b/man/details_linear_reg_lm.Rd index 43b156925..888386139 100644 --- a/man/details_linear_reg_lm.Rd +++ b/man/details_linear_reg_lm.Rd @@ -29,9 +29,8 @@ This engine has no tuning parameters. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } \subsection{Examples}{ diff --git a/man/details_linear_reg_lme.Rd b/man/details_linear_reg_lme.Rd index f55d6501c..900aef0c5 100644 --- a/man/details_linear_reg_lme.Rd +++ b/man/details_linear_reg_lme.Rd @@ -38,7 +38,7 @@ This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor (\emph{η}) for a random intercept: -\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}}+\emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} where \emph{i} denotes the \code{i}th independent experimental unit (e.g. subject). When the model has seen subject \code{i}, it can use that diff --git a/man/details_linear_reg_lmer.Rd b/man/details_linear_reg_lmer.Rd index ee4b6b376..c923e0d5b 100644 --- a/man/details_linear_reg_lmer.Rd +++ b/man/details_linear_reg_lmer.Rd @@ -38,7 +38,7 @@ This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor (\emph{η}) for a random intercept: -\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}}+\emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} where \emph{i} denotes the \code{i}th independent experimental unit (e.g. subject). When the model has seen subject \code{i}, it can use that diff --git a/man/details_linear_reg_spark.Rd b/man/details_linear_reg_spark.Rd index 84c6d09e9..6853f7041 100644 --- a/man/details_linear_reg_spark.Rd +++ b/man/details_linear_reg_spark.Rd @@ -45,9 +45,8 @@ A value of \code{mixture = 1} corresponds to a pure lasso model, while Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_linear_reg_stan.Rd b/man/details_linear_reg_stan.Rd index c05a5e625..9e12761ec 100644 --- a/man/details_linear_reg_stan.Rd +++ b/man/details_linear_reg_stan.Rd @@ -57,9 +57,8 @@ process. Change this value in \code{set_engine()} to show the MCMC logs. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } \subsection{Other details}{ diff --git a/man/details_linear_reg_stan_glmer.Rd b/man/details_linear_reg_stan_glmer.Rd index ac4364117..32b8b4536 100644 --- a/man/details_linear_reg_stan_glmer.Rd +++ b/man/details_linear_reg_stan_glmer.Rd @@ -59,7 +59,7 @@ This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor (\emph{η}) for a random intercept: -\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}}+\emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} where \emph{i} denotes the \code{i}th independent experimental unit (e.g. subject). When the model has seen subject \code{i}, it can use that diff --git a/man/details_logistic_reg_LiblineaR.Rd b/man/details_logistic_reg_LiblineaR.Rd index a809583ab..57c7a64e1 100644 --- a/man/details_logistic_reg_LiblineaR.Rd +++ b/man/details_logistic_reg_LiblineaR.Rd @@ -50,9 +50,8 @@ parameter estimates. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_logistic_reg_brulee.Rd b/man/details_logistic_reg_brulee.Rd index 5b9843264..d908b5486 100644 --- a/man/details_logistic_reg_brulee.Rd +++ b/man/details_logistic_reg_brulee.Rd @@ -59,9 +59,8 @@ no improvement before stopping. (default: 5L). Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_logistic_reg_gee.Rd b/man/details_logistic_reg_gee.Rd index f42648f48..fc06b3b53 100644 --- a/man/details_logistic_reg_gee.Rd +++ b/man/details_logistic_reg_gee.Rd @@ -82,9 +82,9 @@ gee_wflow <- fit(gee_wflow, data = toenail) }\if{html}{\out{
}} -\code{gee()} always prints out warnings and output even when \code{silent = TRUE}. -When using the \code{gee} engine, it will never produce output, even if -\code{silent = FALSE}. +The \code{gee::gee()} function always prints out warnings and output even +when \code{silent = TRUE}. The parsnip “gee” engine, by contrast, silences +all console output coming from \code{gee::gee()}, even if \code{silent = FALSE}. Also, because of issues with the \code{gee()} function, a supplementary call to \code{glm()} is needed to get the rank and QR decomposition objects so diff --git a/man/details_logistic_reg_glm.Rd b/man/details_logistic_reg_glm.Rd index 404448d86..755d9cc60 100644 --- a/man/details_logistic_reg_glm.Rd +++ b/man/details_logistic_reg_glm.Rd @@ -49,9 +49,8 @@ To use a non-default \code{family} and/or \code{link}, pass in as an argument to Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } \subsection{Examples}{ diff --git a/man/details_logistic_reg_glmer.Rd b/man/details_logistic_reg_glmer.Rd index 474208c88..73ee741e2 100644 --- a/man/details_logistic_reg_glmer.Rd +++ b/man/details_logistic_reg_glmer.Rd @@ -37,7 +37,7 @@ This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor (\emph{η}) for a random intercept: -\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}}+\emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} where \emph{i} denotes the \code{i}th independent experimental unit (e.g. subject). When the model has seen subject \code{i}, it can use that diff --git a/man/details_logistic_reg_glmnet.Rd b/man/details_logistic_reg_glmnet.Rd index bfd7d30be..c4f0e9ab2 100644 --- a/man/details_logistic_reg_glmnet.Rd +++ b/man/details_logistic_reg_glmnet.Rd @@ -48,9 +48,8 @@ see \link{glmnet-details}. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_logistic_reg_keras.Rd b/man/details_logistic_reg_keras.Rd index 204fd9852..dc4c2dfc2 100644 --- a/man/details_logistic_reg_keras.Rd +++ b/man/details_logistic_reg_keras.Rd @@ -45,9 +45,8 @@ single hidden unit. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_logistic_reg_spark.Rd b/man/details_logistic_reg_spark.Rd index ae0b7916a..899ba1370 100644 --- a/man/details_logistic_reg_spark.Rd +++ b/man/details_logistic_reg_spark.Rd @@ -47,9 +47,8 @@ A value of \code{mixture = 1} corresponds to a pure lasso model, while Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_logistic_reg_stan.Rd b/man/details_logistic_reg_stan.Rd index efd012b1d..a6c87e9a3 100644 --- a/man/details_logistic_reg_stan.Rd +++ b/man/details_logistic_reg_stan.Rd @@ -58,9 +58,8 @@ process. Change this value in \code{set_engine()} to show the MCMC logs. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } \subsection{Other details}{ diff --git a/man/details_logistic_reg_stan_glmer.Rd b/man/details_logistic_reg_stan_glmer.Rd index 2f4ed3ef1..fb0e716cf 100644 --- a/man/details_logistic_reg_stan_glmer.Rd +++ b/man/details_logistic_reg_stan_glmer.Rd @@ -58,7 +58,7 @@ This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor (\emph{η}) for a random intercept: -\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}}+\emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} where \emph{i} denotes the \code{i}th independent experimental unit (e.g. subject). When the model has seen subject \code{i}, it can use that diff --git a/man/details_mars_earth.Rd b/man/details_mars_earth.Rd index 78a96e09f..d99c15cd2 100644 --- a/man/details_mars_earth.Rd +++ b/man/details_mars_earth.Rd @@ -76,9 +76,8 @@ in \code{\link[=discrim_flexible]{discrim_flexible()}}. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } \subsection{Examples}{ diff --git a/man/details_mlp_brulee.Rd b/man/details_mlp_brulee.Rd index 072008c36..384fbce1b 100644 --- a/man/details_mlp_brulee.Rd +++ b/man/details_mlp_brulee.Rd @@ -108,9 +108,8 @@ layer. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_mlp_keras.Rd b/man/details_mlp_keras.Rd index 750f4eb57..681e723d9 100644 --- a/man/details_mlp_keras.Rd +++ b/man/details_mlp_keras.Rd @@ -81,9 +81,8 @@ This model has 5 tuning parameters: Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_mlp_nnet.Rd b/man/details_mlp_nnet.Rd index 7985ff93f..e290d269c 100644 --- a/man/details_mlp_nnet.Rd +++ b/man/details_mlp_nnet.Rd @@ -78,9 +78,8 @@ layer. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_multinom_reg_brulee.Rd b/man/details_multinom_reg_brulee.Rd index 6280c0c62..6734f6f67 100644 --- a/man/details_multinom_reg_brulee.Rd +++ b/man/details_multinom_reg_brulee.Rd @@ -58,9 +58,8 @@ no improvement before stopping. (default: 5L). Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_multinom_reg_glmnet.Rd b/man/details_multinom_reg_glmnet.Rd index f35772f3c..e58d12343 100644 --- a/man/details_multinom_reg_glmnet.Rd +++ b/man/details_multinom_reg_glmnet.Rd @@ -47,9 +47,8 @@ see \link{glmnet-details}. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_multinom_reg_keras.Rd b/man/details_multinom_reg_keras.Rd index 97b3a0980..705adaba9 100644 --- a/man/details_multinom_reg_keras.Rd +++ b/man/details_multinom_reg_keras.Rd @@ -44,9 +44,8 @@ single hidden unit. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_multinom_reg_nnet.Rd b/man/details_multinom_reg_nnet.Rd index 721a9a747..868256f25 100644 --- a/man/details_multinom_reg_nnet.Rd +++ b/man/details_multinom_reg_nnet.Rd @@ -40,9 +40,8 @@ For \code{penalty}, the amount of regularization includes only the L2 penalty Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_multinom_reg_spark.Rd b/man/details_multinom_reg_spark.Rd index 61ebcc5ef..798462aa8 100644 --- a/man/details_multinom_reg_spark.Rd +++ b/man/details_multinom_reg_spark.Rd @@ -46,9 +46,8 @@ A value of \code{mixture = 1} corresponds to a pure lasso model, while Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_nearest_neighbor_kknn.Rd b/man/details_nearest_neighbor_kknn.Rd index b44049de3..0323a1b47 100644 --- a/man/details_nearest_neighbor_kknn.Rd +++ b/man/details_nearest_neighbor_kknn.Rd @@ -73,9 +73,8 @@ it is not consistent with the actual data dimensions. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_pls_mixOmics.Rd b/man/details_pls_mixOmics.Rd index faf4bfdb5..878971706 100644 --- a/man/details_pls_mixOmics.Rd +++ b/man/details_pls_mixOmics.Rd @@ -74,19 +74,17 @@ pls(num_comp = integer(1), predictor_prop = double(1)) \%>\% ## ncomp = integer(1)) } -In this case, \code{\link[plsmod:pls_fit]{plsmod::pls_fit()}} has the same -role as above but eventually targets -\code{\link[mixOmics:plsda]{mixOmics::plsda()}} or -\code{\link[mixOmics:splsda]{mixOmics::splsda()}} . +In this case, \code{\link[plsmod:pls_fit]{plsmod::pls_fit()}} has the same role +as above but eventually targets \code{\link[mixOmics:plsda]{mixOmics::plsda()}} +or \code{\link[mixOmics:splsda]{mixOmics::splsda()}} . } \subsection{Preprocessing requirements}{ Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Variance calculations are used in these computations so \emph{zero-variance} predictors (i.e., with a single unique value) should be eliminated diff --git a/man/details_poisson_reg_glm.Rd b/man/details_poisson_reg_glm.Rd index c481c62df..e28cc33b4 100644 --- a/man/details_poisson_reg_glm.Rd +++ b/man/details_poisson_reg_glm.Rd @@ -35,9 +35,8 @@ poisson_reg() \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } } \keyword{internal} diff --git a/man/details_poisson_reg_glmer.Rd b/man/details_poisson_reg_glmer.Rd index 0878a08bb..17b1c84ee 100644 --- a/man/details_poisson_reg_glmer.Rd +++ b/man/details_poisson_reg_glmer.Rd @@ -37,7 +37,7 @@ This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor (\emph{η}) for a random intercept: -\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}}+\emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} where \emph{i} denotes the \code{i}th independent experimental unit (e.g. subject). When the model has seen subject \code{i}, it can use that diff --git a/man/details_poisson_reg_glmnet.Rd b/man/details_poisson_reg_glmnet.Rd index 12d270dee..5581c5be9 100644 --- a/man/details_poisson_reg_glmnet.Rd +++ b/man/details_poisson_reg_glmnet.Rd @@ -52,9 +52,8 @@ poisson_reg(penalty = double(1), mixture = double(1)) \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_poisson_reg_hurdle.Rd b/man/details_poisson_reg_hurdle.Rd index 22841351f..f4503d7fb 100644 --- a/man/details_poisson_reg_hurdle.Rd +++ b/man/details_poisson_reg_hurdle.Rd @@ -36,9 +36,8 @@ poisson_reg() \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. For this particular model, a special formula is used to specify which columns affect the counts and which affect the model for the probability diff --git a/man/details_poisson_reg_stan.Rd b/man/details_poisson_reg_stan.Rd index ac5cd8767..f24c77f56 100644 --- a/man/details_poisson_reg_stan.Rd +++ b/man/details_poisson_reg_stan.Rd @@ -62,9 +62,8 @@ process. Change this value in \code{set_engine()} to show the MCMC logs. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } \subsection{Other details}{ diff --git a/man/details_poisson_reg_stan_glmer.Rd b/man/details_poisson_reg_stan_glmer.Rd index 44802f0d6..e77757818 100644 --- a/man/details_poisson_reg_stan_glmer.Rd +++ b/man/details_poisson_reg_stan_glmer.Rd @@ -58,7 +58,7 @@ This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor (\emph{η}) for a random intercept: -\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}} + \emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} +\emph{η}\if{html}{\out{}}\emph{i}\if{html}{\out{}} = (\emph{β}\if{html}{\out{}}0\if{html}{\out{}}+\emph{b}\if{html}{\out{}}0\emph{i}\if{html}{\out{}}) + \emph{β}\if{html}{\out{}}1\if{html}{\out{}}\emph{x}\if{html}{\out{}}\emph{i}1\if{html}{\out{}} where \emph{i} denotes the \code{i}th independent experimental unit (e.g. subject). When the model has seen subject \code{i}, it can use that diff --git a/man/details_poisson_reg_zeroinfl.Rd b/man/details_poisson_reg_zeroinfl.Rd index db965e59b..298ca88d2 100644 --- a/man/details_poisson_reg_zeroinfl.Rd +++ b/man/details_poisson_reg_zeroinfl.Rd @@ -37,9 +37,8 @@ poisson_reg() \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. For this particular model, a special formula is used to specify which columns affect the counts and which affect the model for the probability diff --git a/man/details_proportional_hazards_glmnet.Rd b/man/details_proportional_hazards_glmnet.Rd index 8ca345b89..ae235edda 100644 --- a/man/details_proportional_hazards_glmnet.Rd +++ b/man/details_proportional_hazards_glmnet.Rd @@ -51,9 +51,8 @@ proportional_hazards(penalty = double(1), mixture = double(1)) \%>\% Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_rule_fit_xrf.Rd b/man/details_rule_fit_xrf.Rd index d2b0d6f31..86e2ada73 100644 --- a/man/details_rule_fit_xrf.Rd +++ b/man/details_rule_fit_xrf.Rd @@ -133,9 +133,8 @@ whereas \strong{xrf} uses an internal 5-fold cross-validation to determine it Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. } \subsection{References}{ diff --git a/man/details_svm_linear_LiblineaR.Rd b/man/details_svm_linear_LiblineaR.Rd index 12123383f..e2cd2ab83 100644 --- a/man/details_svm_linear_LiblineaR.Rd +++ b/man/details_svm_linear_LiblineaR.Rd @@ -76,9 +76,8 @@ class predictions (e.g., accuracy). Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_svm_linear_kernlab.Rd b/man/details_svm_linear_kernlab.Rd index 0ff7eeb03..dd6726a5e 100644 --- a/man/details_svm_linear_kernlab.Rd +++ b/man/details_svm_linear_kernlab.Rd @@ -73,9 +73,8 @@ by R’s random number stream. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_svm_poly_kernlab.Rd b/man/details_svm_poly_kernlab.Rd index 2ddd4605a..ed1d80f78 100644 --- a/man/details_svm_poly_kernlab.Rd +++ b/man/details_svm_poly_kernlab.Rd @@ -85,9 +85,8 @@ by R’s random number stream. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/details_svm_rbf_kernlab.Rd b/man/details_svm_rbf_kernlab.Rd index 1126a78c5..05f4f8e30 100644 --- a/man/details_svm_rbf_kernlab.Rd +++ b/man/details_svm_rbf_kernlab.Rd @@ -85,9 +85,8 @@ by R’s random number stream. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the -formula method via -\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will -convert factor columns to indicators. +formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, +parsnip will convert factor columns to indicators. Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a diff --git a/man/rand_forest.Rd b/man/rand_forest.Rd index 444c449ee..3d8abd77a 100644 --- a/man/rand_forest.Rd +++ b/man/rand_forest.Rd @@ -32,7 +32,7 @@ in a node that are required for the node to be split further.} \description{ \code{rand_forest()} defines a model that creates a large number of decision trees, each independent of the others. The final prediction uses all -predictions from the individual trees and combines them. The function can fit +predictions from the individual trees and combines them. This function can fit classification, regression, and censored regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("rand_forest")} diff --git a/man/rmd/example_mlm.Rmd b/man/rmd/example_mlm.Rmd deleted file mode 100644 index be6054cde..000000000 --- a/man/rmd/example_mlm.Rmd +++ /dev/null @@ -1,41 +0,0 @@ -```{r quiet-load, include = FALSE} -library(tidymodels) -library(multilevelmod) -library(poissonreg) # contains engines for poisson_reg() -``` - -```{r, message = FALSE, warning = FALSE} -library(tidymodels) -library(multilevelmod) -library(poissonreg) # contains engines for poisson_reg() - -# The lme4 package is required for this model. - -tidymodels_prefer() - -# Split out two subjects to show how prediction works -data_train <- - longitudinal_counts %>% - filter(!(subject %in% c("1", "2"))) - -data_new <- - longitudinal_counts %>% - filter(subject %in% c("1", "2")) - -# Fit the model -count_mod <- - poisson_reg() %>% - set_engine("glmer") %>% - fit(y ~ time + x + (1 | subject), data = data_train) -``` - -```{r} -count_mod -``` - -When making predictions, the basic `predict()` method does the trick: - -```{r} -count_mod %>% predict(data_new) -``` - diff --git a/man/rmd/linear_reg_gee.Rmd b/man/rmd/linear_reg_gee.Rmd index 90b721d75..55c73a7cd 100644 --- a/man/rmd/linear_reg_gee.Rmd +++ b/man/rmd/linear_reg_gee.Rmd @@ -65,7 +65,7 @@ gee_wflow <- fit(gee_wflow, data = warpbreaks) ``` -`gee()` always prints out warnings and output even when `silent = TRUE`. When using the `"gee"` engine, it will never produce output, even if `silent = FALSE`. +The `gee::gee()` function always prints out warnings and output even when `silent = TRUE`. The parsnip "gee" engine, by contrast, silences all console output coming from `gee::gee()`, even if `silent = FALSE`. Also, because of issues with the `gee()` function, a supplementary call to `glm()` is needed to get the rank and QR decomposition objects so that `predict()` can be used. diff --git a/man/rmd/linear_reg_gls.Rmd b/man/rmd/linear_reg_gls.Rmd index c780d3c45..717108cc7 100644 --- a/man/rmd/linear_reg_gls.Rmd +++ b/man/rmd/linear_reg_gls.Rmd @@ -111,7 +111,9 @@ library(broom.mixed) library(broom.mixed) # lme: -lme_fit %>% tidy() %>% filter(group == "fixed") %>% select(-group, -effect) +lme_fit %>% tidy() %>% + dplyr::filter(group == "fixed") %>% + dplyr::select(-group, -effect) # gls: gls_fit %>% tidy() diff --git a/man/rmd/logistic_reg_gee.Rmd b/man/rmd/logistic_reg_gee.Rmd index 588700a76..ca02f34b4 100644 --- a/man/rmd/logistic_reg_gee.Rmd +++ b/man/rmd/logistic_reg_gee.Rmd @@ -65,7 +65,7 @@ gee_wflow <- fit(gee_wflow, data = toenail) ``` -`gee()` always prints out warnings and output even when `silent = TRUE`. When using the `gee` engine, it will never produce output, even if `silent = FALSE`. +The `gee::gee()` function always prints out warnings and output even when `silent = TRUE`. The parsnip "gee" engine, by contrast, silences all console output coming from `gee::gee()`, even if `silent = FALSE`. Also, because of issues with the `gee()` function, a supplementary call to `glm()` is needed to get the rank and QR decomposition objects so that `predict()` can be used. diff --git a/man/rule_fit.Rd b/man/rule_fit.Rd index 1107359fa..ed3fd0130 100644 --- a/man/rule_fit.Rd +++ b/man/rule_fit.Rd @@ -52,7 +52,7 @@ to use for fitting.} } \description{ \code{rule_fit()} defines a model that derives simple feature rules from a tree -ensemble and uses them as features in a regularized model. The function can +ensemble and uses them as features in a regularized model. This function can fit classification and regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("rule_fit")} diff --git a/man/survival_reg.Rd b/man/survival_reg.Rd index a510b90f4..212b6cd30 100644 --- a/man/survival_reg.Rd +++ b/man/survival_reg.Rd @@ -17,7 +17,7 @@ to use for fitting.} outcome. The default is "weibull".} } \description{ -\code{survival_reg()} defines a parametric survival model. The function can fit +\code{survival_reg()} defines a parametric survival model. This function can fit censored regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("survival_reg")} diff --git a/man/svm_linear.Rd b/man/svm_linear.Rd index 7bfa9f6d9..0d9e3944d 100644 --- a/man/svm_linear.Rd +++ b/man/svm_linear.Rd @@ -25,7 +25,7 @@ loss function (regression only)} the model tries to maximize the width of the margin between classes (using a linear class boundary). For regression, the model optimizes a robust loss function that is only affected by very large model residuals and uses a -linear fit. The function can fit classification and regression models. +linear fit. This function can fit classification and regression models. \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("svm_linear")} From 241656086d1735ade2183da52a9ec6d75f03c7af Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 7 Feb 2022 20:53:51 -0500 Subject: [PATCH 63/65] retain md files --- .gitignore | 1 - man/rmd/C5_rules_C5.0.md | 58 ++++++ man/rmd/bag_mars_earth.md | 96 ++++++++++ man/rmd/bag_tree_C5.0.md | 53 ++++++ man/rmd/bag_tree_rpart.md | 125 +++++++++++++ man/rmd/bart_dbarts.md | 113 ++++++++++++ man/rmd/boost_tree_C5.0.md | 64 +++++++ man/rmd/boost_tree_mboost.md | 59 +++++++ man/rmd/boost_tree_spark.md | 118 +++++++++++++ man/rmd/boost_tree_xgboost.md | 140 +++++++++++++++ man/rmd/cubist_rules_Cubist.md | 64 +++++++ man/rmd/decision_tree_C5.0.md | 51 ++++++ man/rmd/decision_tree_party.md | 62 +++++++ man/rmd/decision_tree_rpart.md | 118 +++++++++++++ man/rmd/decision_tree_spark.md | 85 +++++++++ man/rmd/discrim_flexible_earth.md | 62 +++++++ man/rmd/discrim_linear_MASS.md | 44 +++++ man/rmd/discrim_linear_mda.md | 54 ++++++ man/rmd/discrim_linear_sda.md | 54 ++++++ man/rmd/discrim_linear_sparsediscrim.md | 66 +++++++ man/rmd/discrim_quad_MASS.md | 44 +++++ man/rmd/discrim_quad_sparsediscrim.md | 62 +++++++ man/rmd/discrim_regularized_klaR.md | 66 +++++++ man/rmd/gen_additive_mod_mgcv.md | 107 ++++++++++++ man/rmd/glmnet-details.md | 213 +++++++++++++++++++++++ man/rmd/linear_reg_brulee.md | 62 +++++++ man/rmd/linear_reg_gee.md | 88 ++++++++++ man/rmd/linear_reg_glm.md | 62 +++++++ man/rmd/linear_reg_glmnet.md | 62 +++++++ man/rmd/linear_reg_gls.md | 207 ++++++++++++++++++++++ man/rmd/linear_reg_keras.md | 56 ++++++ man/rmd/linear_reg_lm.md | 39 +++++ man/rmd/linear_reg_lme.md | 103 +++++++++++ man/rmd/linear_reg_lmer.md | 103 +++++++++++ man/rmd/linear_reg_spark.md | 70 ++++++++ man/rmd/linear_reg_stan.md | 59 +++++++ man/rmd/linear_reg_stan_glmer.md | 112 ++++++++++++ man/rmd/logistic-reg.md | 196 +++++++++++++++++++++ man/rmd/logistic_reg_LiblineaR.md | 61 +++++++ man/rmd/logistic_reg_brulee.md | 61 +++++++ man/rmd/logistic_reg_gee.md | 88 ++++++++++ man/rmd/logistic_reg_glm.md | 62 +++++++ man/rmd/logistic_reg_glmer.md | 102 +++++++++++ man/rmd/logistic_reg_glmnet.md | 62 +++++++ man/rmd/logistic_reg_keras.md | 56 ++++++ man/rmd/logistic_reg_spark.md | 71 ++++++++ man/rmd/logistic_reg_stan.md | 59 +++++++ man/rmd/logistic_reg_stan_glmer.md | 111 ++++++++++++ man/rmd/mars_earth.md | 94 ++++++++++ man/rmd/mlp_brulee.md | 127 ++++++++++++++ man/rmd/mlp_keras.md | 108 ++++++++++++ man/rmd/mlp_nnet.md | 102 +++++++++++ man/rmd/multinom_reg_brulee.md | 61 +++++++ man/rmd/multinom_reg_glmnet.md | 62 +++++++ man/rmd/multinom_reg_keras.md | 56 ++++++ man/rmd/multinom_reg_nnet.md | 58 ++++++ man/rmd/multinom_reg_spark.md | 71 ++++++++ man/rmd/naive_Bayes_klaR.md | 57 ++++++ man/rmd/naive_Bayes_naivebayes.md | 57 ++++++ man/rmd/nearest-neighbor.md | 67 +++++++ man/rmd/nearest_neighbor_kknn.md | 95 ++++++++++ man/rmd/no-pooling.md | 20 +++ man/rmd/null-model.md | 41 +++++ man/rmd/one-hot.md | 78 +++++++++ man/rmd/pls_mixOmics.md | 99 +++++++++++ man/rmd/poisson_reg_gee.md | 87 +++++++++ man/rmd/poisson_reg_glm.md | 38 ++++ man/rmd/poisson_reg_glmer.md | 101 +++++++++++ man/rmd/poisson_reg_glmnet.md | 56 ++++++ man/rmd/poisson_reg_hurdle.md | 108 ++++++++++++ man/rmd/poisson_reg_stan.md | 63 +++++++ man/rmd/poisson_reg_stan_glmer.md | 110 ++++++++++++ man/rmd/poisson_reg_zeroinfl.md | 109 ++++++++++++ man/rmd/proportional_hazards_glmnet.md | 115 ++++++++++++ man/rmd/proportional_hazards_survival.md | 100 +++++++++++ man/rmd/rand_forest_party.md | 58 ++++++ man/rmd/rand_forest_randomForest.md | 96 ++++++++++ man/rmd/rand_forest_ranger.md | 105 +++++++++++ man/rmd/rand_forest_spark.md | 102 +++++++++++ man/rmd/rule_fit_xrf.md | 143 +++++++++++++++ man/rmd/surv_reg_flexsurv.md | 46 +++++ man/rmd/surv_reg_survival.md | 76 ++++++++ man/rmd/survival_reg_flexsurv.md | 49 ++++++ man/rmd/survival_reg_survival.md | 80 +++++++++ man/rmd/svm_linear_LiblineaR.md | 90 ++++++++++ man/rmd/svm_linear_kernlab.md | 92 ++++++++++ man/rmd/svm_poly_kernlab.md | 106 +++++++++++ man/rmd/svm_rbf_kernlab.md | 100 +++++++++++ 88 files changed, 7213 insertions(+), 1 deletion(-) create mode 100644 man/rmd/C5_rules_C5.0.md create mode 100644 man/rmd/bag_mars_earth.md create mode 100644 man/rmd/bag_tree_C5.0.md create mode 100644 man/rmd/bag_tree_rpart.md create mode 100644 man/rmd/bart_dbarts.md create mode 100644 man/rmd/boost_tree_C5.0.md create mode 100644 man/rmd/boost_tree_mboost.md create mode 100644 man/rmd/boost_tree_spark.md create mode 100644 man/rmd/boost_tree_xgboost.md create mode 100644 man/rmd/cubist_rules_Cubist.md create mode 100644 man/rmd/decision_tree_C5.0.md create mode 100644 man/rmd/decision_tree_party.md create mode 100644 man/rmd/decision_tree_rpart.md create mode 100644 man/rmd/decision_tree_spark.md create mode 100644 man/rmd/discrim_flexible_earth.md create mode 100644 man/rmd/discrim_linear_MASS.md create mode 100644 man/rmd/discrim_linear_mda.md create mode 100644 man/rmd/discrim_linear_sda.md create mode 100644 man/rmd/discrim_linear_sparsediscrim.md create mode 100644 man/rmd/discrim_quad_MASS.md create mode 100644 man/rmd/discrim_quad_sparsediscrim.md create mode 100644 man/rmd/discrim_regularized_klaR.md create mode 100644 man/rmd/gen_additive_mod_mgcv.md create mode 100644 man/rmd/glmnet-details.md create mode 100644 man/rmd/linear_reg_brulee.md create mode 100644 man/rmd/linear_reg_gee.md create mode 100644 man/rmd/linear_reg_glm.md create mode 100644 man/rmd/linear_reg_glmnet.md create mode 100644 man/rmd/linear_reg_gls.md create mode 100644 man/rmd/linear_reg_keras.md create mode 100644 man/rmd/linear_reg_lm.md create mode 100644 man/rmd/linear_reg_lme.md create mode 100644 man/rmd/linear_reg_lmer.md create mode 100644 man/rmd/linear_reg_spark.md create mode 100644 man/rmd/linear_reg_stan.md create mode 100644 man/rmd/linear_reg_stan_glmer.md create mode 100644 man/rmd/logistic-reg.md create mode 100644 man/rmd/logistic_reg_LiblineaR.md create mode 100644 man/rmd/logistic_reg_brulee.md create mode 100644 man/rmd/logistic_reg_gee.md create mode 100644 man/rmd/logistic_reg_glm.md create mode 100644 man/rmd/logistic_reg_glmer.md create mode 100644 man/rmd/logistic_reg_glmnet.md create mode 100644 man/rmd/logistic_reg_keras.md create mode 100644 man/rmd/logistic_reg_spark.md create mode 100644 man/rmd/logistic_reg_stan.md create mode 100644 man/rmd/logistic_reg_stan_glmer.md create mode 100644 man/rmd/mars_earth.md create mode 100644 man/rmd/mlp_brulee.md create mode 100644 man/rmd/mlp_keras.md create mode 100644 man/rmd/mlp_nnet.md create mode 100644 man/rmd/multinom_reg_brulee.md create mode 100644 man/rmd/multinom_reg_glmnet.md create mode 100644 man/rmd/multinom_reg_keras.md create mode 100644 man/rmd/multinom_reg_nnet.md create mode 100644 man/rmd/multinom_reg_spark.md create mode 100644 man/rmd/naive_Bayes_klaR.md create mode 100644 man/rmd/naive_Bayes_naivebayes.md create mode 100644 man/rmd/nearest-neighbor.md create mode 100644 man/rmd/nearest_neighbor_kknn.md create mode 100644 man/rmd/no-pooling.md create mode 100644 man/rmd/null-model.md create mode 100644 man/rmd/one-hot.md create mode 100644 man/rmd/pls_mixOmics.md create mode 100644 man/rmd/poisson_reg_gee.md create mode 100644 man/rmd/poisson_reg_glm.md create mode 100644 man/rmd/poisson_reg_glmer.md create mode 100644 man/rmd/poisson_reg_glmnet.md create mode 100644 man/rmd/poisson_reg_hurdle.md create mode 100644 man/rmd/poisson_reg_stan.md create mode 100644 man/rmd/poisson_reg_stan_glmer.md create mode 100644 man/rmd/poisson_reg_zeroinfl.md create mode 100644 man/rmd/proportional_hazards_glmnet.md create mode 100644 man/rmd/proportional_hazards_survival.md create mode 100644 man/rmd/rand_forest_party.md create mode 100644 man/rmd/rand_forest_randomForest.md create mode 100644 man/rmd/rand_forest_ranger.md create mode 100644 man/rmd/rand_forest_spark.md create mode 100644 man/rmd/rule_fit_xrf.md create mode 100644 man/rmd/surv_reg_flexsurv.md create mode 100644 man/rmd/surv_reg_survival.md create mode 100644 man/rmd/survival_reg_flexsurv.md create mode 100644 man/rmd/survival_reg_survival.md create mode 100644 man/rmd/svm_linear_LiblineaR.md create mode 100644 man/rmd/svm_linear_kernlab.md create mode 100644 man/rmd/svm_poly_kernlab.md create mode 100644 man/rmd/svm_rbf_kernlab.md diff --git a/.gitignore b/.gitignore index f45c8601f..9d682298c 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,3 @@ derby.log logs/* revdep/* docs* -man/rmd/*.md diff --git a/man/rmd/C5_rules_C5.0.md b/man/rmd/C5_rules_C5.0.md new file mode 100644 index 000000000..631a299fa --- /dev/null +++ b/man/rmd/C5_rules_C5.0.md @@ -0,0 +1,58 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + + + +This model has 1 tuning parameters: + +- `trees`: # Trees (type: integer, default: 1L) + +Note that C5.0 has a tool for _early stopping_ during boosting where less iterations of boosting are performed than the number requested. `C5_rules()` turns this feature off (although it can be re-enabled using [C50::C5.0Control()]). + +## Translation from parsnip to the underlying model call (classification) + +There is a parsnip extension package required to fit this model to this mode: **rules**. + + +```r +library(rules) + +C5_rules( + trees = integer(1), + min_n = integer(1) +) %>% + set_engine("C5.0") %>% + set_mode("classification") %>% + translate() +``` + +``` +## C5.0 Model Specification (classification) +## +## Main Arguments: +## trees = integer(1) +## min_n = integer(1) +## +## Computational engine: C5.0 +## +## Model fit template: +## rules::c5_fit(x = missing_arg(), y = missing_arg(), weights = missing_arg(), +## trials = integer(1), minCases = integer(1)) +``` + +## Preprocessing requirements + + +This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model. + +## References + + - Quinlan R (1992). "Learning with Continuous Classes." Proceedings of the 5th Australian Joint Conference On Artificial Intelligence, pp. 343-348. + + - Quinlan R (1993)."Combining Instance-Based and Model-Based Learning." Proceedings of the Tenth International Conference on Machine Learning, pp. 236-243. + + - Kuhn M and Johnson K (2013). _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/bag_mars_earth.md b/man/rmd/bag_mars_earth.md new file mode 100644 index 000000000..466f3d43c --- /dev/null +++ b/man/rmd/bag_mars_earth.md @@ -0,0 +1,96 @@ + + + +For this engine, there are multiple modes: classification and regression + +## Tuning Parameters + + + +This model has 3 tuning parameters: + +- `prod_degree`: Degree of Interaction (type: integer, default: 1L) + +- `prune_method`: Pruning Method (type: character, default: 'backward') + +- `num_terms`: # Model Terms (type: integer, default: see below) + +The default value of `num_terms` depends on the number of predictor columns. For a data frame `x`, the default is `min(200, max(20, 2 * ncol(x))) + 1` (see [earth::earth()] and the reference below). + +## Translation from parsnip to the original package (regression) + +There is a parsnip extension package required to fit this model to this mode: **baguette**. + + +```r +bag_mars(num_terms = integer(1), prod_degree = integer(1), prune_method = character(1)) %>% + set_engine("earth") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Bagged MARS Model Specification (regression) +## +## Main Arguments: +## num_terms = integer(1) +## prod_degree = integer(1) +## prune_method = character(1) +## +## Computational engine: earth +## +## Model fit template: +## baguette::bagger(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), nprune = integer(1), degree = integer(1), +## pmethod = character(1), base_model = "MARS") +``` + +## Translation from parsnip to the original package (classification) + +There is a parsnip extension package required to fit this model to this mode: **baguette**. + + +```r +library(baguette) + +bag_mars( + num_terms = integer(1), + prod_degree = integer(1), + prune_method = character(1) +) %>% + set_engine("earth") %>% + set_mode("classification") %>% + translate() +``` + +``` +## Bagged MARS Model Specification (classification) +## +## Main Arguments: +## num_terms = integer(1) +## prod_degree = integer(1) +## prune_method = character(1) +## +## Computational engine: earth +## +## Model fit template: +## baguette::bagger(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), nprune = integer(1), degree = integer(1), +## pmethod = character(1), base_model = "MARS") +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + +## References + + - Breiman, L. 1996. "Bagging predictors". Machine Learning. 24 (2): 123-140 + + - Friedman, J. 1991. "Multivariate Adaptive Regression Splines." _The Annals of Statistics_, vol. 19, no. 1, pp. 1-67. + + - Milborrow, S. ["Notes on the earth package."](http://www.milbo.org/doc/earth-notes.pdf) + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. + diff --git a/man/rmd/bag_tree_C5.0.md b/man/rmd/bag_tree_C5.0.md new file mode 100644 index 000000000..d5a1597b2 --- /dev/null +++ b/man/rmd/bag_tree_C5.0.md @@ -0,0 +1,53 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + + + +This model has 1 tuning parameters: + +- `min_n`: Minimal Node Size (type: integer, default: 2L) + +## Translation from parsnip to the original package (classification) + +There is a parsnip extension package required to fit this model to this mode: **baguette**. + + +```r +library(baguette) + +bag_tree(min_n = integer()) %>% + set_engine("C5.0") %>% + set_mode("classification") %>% + translate() +``` + +``` +## Bagged Decision Tree Model Specification (classification) +## +## Main Arguments: +## cost_complexity = 0 +## min_n = integer() +## +## Computational engine: C5.0 +## +## Model fit template: +## baguette::bagger(x = missing_arg(), y = missing_arg(), weights = missing_arg(), +## minCases = integer(), base_model = "C5.0") +``` + +## Preprocessing requirements + + +This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model. + + +## References + + - Breiman, L. 1996. "Bagging predictors". Machine Learning. 24 (2): 123-140 + + - Kuhn, M, and K Johnson. 2013. *Applied Predictive Modeling*. Springer. + diff --git a/man/rmd/bag_tree_rpart.md b/man/rmd/bag_tree_rpart.md new file mode 100644 index 000000000..33c684c3e --- /dev/null +++ b/man/rmd/bag_tree_rpart.md @@ -0,0 +1,125 @@ + + + +For this engine, there are multiple modes: classification, regression, and censored regression + +## Tuning Parameters + + + +This model has 4 tuning parameters: + +- `class_cost`: Class Cost (type: double, default: (see below)) + +- `tree_depth`: Tree Depth (type: integer, default: 30L) + +- `min_n`: Minimal Node Size (type: integer, default: 2L) + +- `cost_complexity`: Cost-Complexity Parameter (type: double, default: 0.01) + +For the `class_cost` parameter, the value can be a non-negative scalar for a class cost (where a cost of 1 means no extra cost). This is useful for when the first level of the outcome factor is the minority class. If this is not the case, values between zero and one can be used to bias to the second level of the factor. + + +## Translation from parsnip to the original package (classification) + +There is a parsnip extension package required to fit this model to this mode: **baguette**. + + +```r +library(baguette) + +bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) %>% + set_engine("rpart") %>% + set_mode("classification") %>% + translate() +``` + +``` +## Bagged Decision Tree Model Specification (classification) +## +## Main Arguments: +## cost_complexity = double(1) +## tree_depth = integer(1) +## min_n = integer(1) +## +## Computational engine: rpart +## +## Model fit template: +## baguette::bagger(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), cp = double(1), maxdepth = integer(1), +## minsplit = integer(1), base_model = "CART") +``` + + +## Translation from parsnip to the original package (regression) + +There is a parsnip extension package required to fit this model to this mode: **baguette**. + + +```r +library(baguette) + +bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) %>% + set_engine("rpart") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Bagged Decision Tree Model Specification (regression) +## +## Main Arguments: +## cost_complexity = double(1) +## tree_depth = integer(1) +## min_n = integer(1) +## +## Computational engine: rpart +## +## Model fit template: +## baguette::bagger(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), cp = double(1), maxdepth = integer(1), +## minsplit = integer(1), base_model = "CART") +``` + +## Translation from parsnip to the original package (censored regression) + +There is a parsnip extension package required to fit this model to this mode: **censored**. + + +```r +library(censored) + +bag_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) %>% + set_engine("rpart") %>% + set_mode("censored regression") %>% + translate() +``` + +``` +## Bagged Decision Tree Model Specification (censored regression) +## +## Main Arguments: +## cost_complexity = double(1) +## tree_depth = integer(1) +## min_n = integer(1) +## +## Computational engine: rpart +## +## Model fit template: +## ipred::bagging(formula = missing_arg(), data = missing_arg(), +## cp = double(1), maxdepth = integer(1), minsplit = integer(1)) +``` + + +## Preprocessing requirements + + +This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model. + +## References + + - Breiman L. 1996. "Bagging predictors". Machine Learning. 24 (2): 123-140 + + - Hothorn T, Lausen B, Benner A, Radespiel-Troeger M. 2004. Bagging Survival Trees. _Statistics in Medicine_, 23(1), 77–91. + + - Kuhn, M, and K Johnson. 2013. *Applied Predictive Modeling*. Springer. diff --git a/man/rmd/bart_dbarts.md b/man/rmd/bart_dbarts.md new file mode 100644 index 000000000..ea8595476 --- /dev/null +++ b/man/rmd/bart_dbarts.md @@ -0,0 +1,113 @@ + + + +For this engine, there are multiple modes: classification and regression + +## Tuning Parameters + + + +This model has 4 tuning parameters: + +- `trees`: # Trees (type: integer, default: 200L) + +- `prior_terminal_node_coef`: Terminal Node Prior Coefficient (type: double, default: 0.95) + +- `prior_terminal_node_expo`: Terminal Node Prior Exponent (type: double, default: 2.00) + +- `prior_outcome_range`: Prior for Outcome Range (type: double, default: 2.00) + +## Important engine-specific options + +Some relevant arguments that can be passed to `set_engine()`: + +* `keepevery`, `n.thin`: Every `keepevery` draw is kept to be returned to the user. Useful for "thinning" samples. + +* `ntree`, `n.trees`: The number of trees in the sum-of-trees formulation. + +* `ndpost`, `n.samples`: The number of posterior draws after burn in, `ndpost` / `keepevery` will actually be returned. + +* `nskip`, `n.burn`: Number of MCMC iterations to be treated as burn in. + +* `nchain`, `n.chains`: Integer specifying how many independent tree sets and fits should be calculated. + +* `nthread`, `n.threads`: Integer specifying how many threads to use. Depending on the CPU architecture, using more than the number of chains can degrade performance for small/medium data sets. As such some calculations may be executed single threaded regardless. + +* `combinechains`, `combineChains`: Logical; if `TRUE`, samples will be returned in arrays of dimensions equal to `nchain` times `ndpost` times number of observations. + +## Translation from parsnip to the original package (classification) + + +```r +bart( + trees = integer(1), + prior_terminal_node_coef = double(1), + prior_terminal_node_expo = double(1), + prior_outcome_range = double(1) +) %>% + set_engine("dbarts") %>% + set_mode("classification") %>% + translate() +``` + +``` +## BART Model Specification (classification) +## +## Main Arguments: +## trees = integer(1) +## prior_terminal_node_coef = double(1) +## prior_terminal_node_expo = double(1) +## prior_outcome_range = double(1) +## +## Computational engine: dbarts +## +## Model fit template: +## dbarts::bart(x = missing_arg(), y = missing_arg(), ntree = integer(1), +## base = double(1), power = double(1), k = double(1), verbose = FALSE, +## keeptrees = TRUE, keepcall = FALSE) +``` + + +## Translation from parsnip to the original package (regression) + + +```r +bart( + trees = integer(1), + prior_terminal_node_coef = double(1), + prior_terminal_node_expo = double(1), + prior_outcome_range = double(1) +) %>% + set_engine("dbarts") %>% + set_mode("regression") %>% + translate() +``` + +``` +## BART Model Specification (regression) +## +## Main Arguments: +## trees = integer(1) +## prior_terminal_node_coef = double(1) +## prior_terminal_node_expo = double(1) +## prior_outcome_range = double(1) +## +## Computational engine: dbarts +## +## Model fit template: +## dbarts::bart(x = missing_arg(), y = missing_arg(), ntree = integer(1), +## base = double(1), power = double(1), k = double(1), verbose = FALSE, +## keeptrees = TRUE, keepcall = FALSE) +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + +[dbarts::bart()] will also convert the factors to indicators if the user does not create them first. + + +## References + + - Chipman, George, McCulloch. "BART: Bayesian additive regression trees." _Ann. Appl. Stat._ 4 (1) 266 - 298, March 2010. diff --git a/man/rmd/boost_tree_C5.0.md b/man/rmd/boost_tree_C5.0.md new file mode 100644 index 000000000..720d0f2bd --- /dev/null +++ b/man/rmd/boost_tree_C5.0.md @@ -0,0 +1,64 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + + + +This model has 3 tuning parameters: + +- `trees`: # Trees (type: integer, default: 15L) + +- `min_n`: Minimal Node Size (type: integer, default: 2L) + +- `sample_size`: Proportion Observations Sampled (type: double, default: 1.0) + +The implementation of C5.0 limits the number of trees to be between 1 and 100. + +## Translation from parsnip to the original package (classification) + + +```r +boost_tree(trees = integer(), min_n = integer(), sample_size = numeric()) %>% + set_engine("C5.0") %>% + set_mode("classification") %>% + translate() +``` + +``` +## Boosted Tree Model Specification (classification) +## +## Main Arguments: +## trees = integer() +## min_n = integer() +## sample_size = numeric() +## +## Computational engine: C5.0 +## +## Model fit template: +## parsnip::C5.0_train(x = missing_arg(), y = missing_arg(), weights = missing_arg(), +## trials = integer(), minCases = integer(), sample = numeric()) +``` + +[C5.0_train()] is a wrapper around [C50::C5.0()] that makes it easier to run this model. + +## Preprocessing requirements + + +This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model. + +## Other details + +### Early stopping + +By default, early stopping is used. To use the complete set of boosting iterations, pass `earlyStopping = FALSE` to [set_engine()]. Also, it is unlikely that early stopping will occur if `sample_size = 1`. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#boost-tree-C5.0) for `boost_tree()` with the `"C5.0"` engine. + +## References + +- Kuhn, M, and K Johnson. 2013. *Applied Predictive Modeling*. Springer. diff --git a/man/rmd/boost_tree_mboost.md b/man/rmd/boost_tree_mboost.md new file mode 100644 index 000000000..04f751d7f --- /dev/null +++ b/man/rmd/boost_tree_mboost.md @@ -0,0 +1,59 @@ + + + +For this engine, there is a single mode: censored regression + +## Tuning Parameters + + + +This model has 5 tuning parameters: + +- `mtry`: # Randomly Selected Predictors (type: integer, default: see below) + +- `trees`: # Trees (type: integer, default: 100L) + +- `tree_depth`: Tree Depth (type: integer, default: 2L) + +- `min_n`: Minimal Node Size (type: integer, default: 10L) + +- `loss_reduction`: Minimum Loss Reduction (type: double, default: 0) + +The `mtry` parameter is related to the number of predictors. The default is to use all predictors. + +## Translation from parsnip to the original package (censored regression) + +There is a parsnip extension package required to fit this model to this mode: **censored**. + + +```r +library(censored) + +boost_tree() %>% + set_engine("mboost") %>% + set_mode("censored regression") %>% + translate() +``` + +``` +## Boosted Tree Model Specification (censored regression) +## +## Computational engine: mboost +## +## Model fit template: +## censored::blackboost_train(formula = missing_arg(), data = missing_arg(), +## family = mboost::CoxPH()) +``` + +[blackboost_train()] is a wrapper around [mboost::blackboost()] (and other functions) that makes it easier to run this model. + +## Preprocessing requirements + + +This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model. + +## References + + - Buehlmann P, Hothorn T. 2007. Boosting algorithms: regularization, prediction and model fitting. _Statistical Science_, 22(4), 477–505. + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/boost_tree_spark.md b/man/rmd/boost_tree_spark.md new file mode 100644 index 000000000..89b63ae02 --- /dev/null +++ b/man/rmd/boost_tree_spark.md @@ -0,0 +1,118 @@ + + + +For this engine, there are multiple modes: classification and regression. However, multiclass classification is not supported yet. + +## Tuning Parameters + + + +This model has 7 tuning parameters: + +- `tree_depth`: Tree Depth (type: integer, default: 5L) + +- `trees`: # Trees (type: integer, default: 20L) + +- `learn_rate`: Learning Rate (type: double, default: 0.1) + +- `mtry`: # Randomly Selected Predictors (type: integer, default: see below) + +- `min_n`: Minimal Node Size (type: integer, default: 1L) + +- `loss_reduction`: Minimum Loss Reduction (type: double, default: 0.0) + +- `sample_size`: # Observations Sampled (type: integer, default: 1.0) + +The `mtry` parameter is related to the number of predictors. The default depends on the model mode. For classification, the square root of the number of predictors is used and for regression, one third of the predictors are sampled. + +## Translation from parsnip to the original package (regression) + + +```r +boost_tree( + mtry = integer(), trees = integer(), min_n = integer(), tree_depth = integer(), + learn_rate = numeric(), loss_reduction = numeric(), sample_size = numeric() +) %>% + set_engine("spark") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Boosted Tree Model Specification (regression) +## +## Main Arguments: +## mtry = integer() +## trees = integer() +## min_n = integer() +## tree_depth = integer() +## learn_rate = numeric() +## loss_reduction = numeric() +## sample_size = numeric() +## +## Computational engine: spark +## +## Model fit template: +## sparklyr::ml_gradient_boosted_trees(x = missing_arg(), formula = missing_arg(), +## type = "regression", feature_subset_strategy = integer(), +## max_iter = integer(), min_instances_per_node = min_rows(integer(0), +## x), max_depth = integer(), step_size = numeric(), min_info_gain = numeric(), +## subsampling_rate = numeric(), seed = sample.int(10^5, 1)) +``` + +## Translation from parsnip to the original package (classification) + + +```r +boost_tree( + mtry = integer(), trees = integer(), min_n = integer(), tree_depth = integer(), + learn_rate = numeric(), loss_reduction = numeric(), sample_size = numeric() +) %>% + set_engine("spark") %>% + set_mode("classification") %>% + translate() +``` + +``` +## Boosted Tree Model Specification (classification) +## +## Main Arguments: +## mtry = integer() +## trees = integer() +## min_n = integer() +## tree_depth = integer() +## learn_rate = numeric() +## loss_reduction = numeric() +## sample_size = numeric() +## +## Computational engine: spark +## +## Model fit template: +## sparklyr::ml_gradient_boosted_trees(x = missing_arg(), formula = missing_arg(), +## type = "classification", feature_subset_strategy = integer(), +## max_iter = integer(), min_instances_per_node = min_rows(integer(0), +## x), max_depth = integer(), step_size = numeric(), min_info_gain = numeric(), +## subsampling_rate = numeric(), seed = sample.int(10^5, 1)) +``` + +## Preprocessing requirements + + +This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model. + +## Other details + + +For models created using the `"spark"` engine, there are several things to consider. + +* Only the formula interface to via `fit()` is available; using `fit_xy()` will generate an error. +* The predictions will always be in a Spark table format. The names will be the same as documented but without the dots. +* There is no equivalent to factor columns in Spark tables so class predictions are returned as character columns. +* To retain the model object for a new R session (via `save()`), the `model$fit` element of the parsnip object should be serialized via `ml_save(object$fit)` and separately saved to disk. In a new session, the object can be reloaded and reattached to the parsnip object. + +## References + + - Luraschi, J, K Kuo, and E Ruiz. 2019. _Mastering Spark with R_. O'Reilly Media + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. + diff --git a/man/rmd/boost_tree_xgboost.md b/man/rmd/boost_tree_xgboost.md new file mode 100644 index 000000000..e9ec02af3 --- /dev/null +++ b/man/rmd/boost_tree_xgboost.md @@ -0,0 +1,140 @@ + + + +For this engine, there are multiple modes: classification and regression + +## Tuning Parameters + + + +This model has 8 tuning parameters: + +- `tree_depth`: Tree Depth (type: integer, default: 6L) + +- `trees`: # Trees (type: integer, default: 15L) + +- `learn_rate`: Learning Rate (type: double, default: 0.3) + +- `mtry`: # Randomly Selected Predictors (type: integer, default: see below) + +- `min_n`: Minimal Node Size (type: integer, default: 1L) + +- `loss_reduction`: Minimum Loss Reduction (type: double, default: 0.0) + +- `sample_size`: Proportion Observations Sampled (type: double, default: 1.0) + +- `stop_iter`: # Iterations Before Stopping (type: integer, default: Inf) + +The `mtry` parameter is related to the number of predictors. The default is to use all predictors. [xgboost::xgb.train()] encodes this as a real number between zero and one. parsnip translates the number of columns to this type of value. The user should give the argument to `boost_tree()` as an integer (not a real number). + +## Translation from parsnip to the original package (regression) + + +```r +boost_tree( + mtry = integer(), trees = integer(), min_n = integer(), tree_depth = integer(), + learn_rate = numeric(), loss_reduction = numeric(), sample_size = numeric(), + stop_iter = integer() +) %>% + set_engine("xgboost") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Boosted Tree Model Specification (regression) +## +## Main Arguments: +## mtry = integer() +## trees = integer() +## min_n = integer() +## tree_depth = integer() +## learn_rate = numeric() +## loss_reduction = numeric() +## sample_size = numeric() +## stop_iter = integer() +## +## Computational engine: xgboost +## +## Model fit template: +## parsnip::xgb_train(x = missing_arg(), y = missing_arg(), colsample_bynode = integer(), +## nrounds = integer(), min_child_weight = integer(), max_depth = integer(), +## eta = numeric(), gamma = numeric(), subsample = numeric(), +## early_stop = integer(), nthread = 1, verbose = 0) +``` + +## Translation from parsnip to the original package (classification) + + +```r +boost_tree( + mtry = integer(), trees = integer(), min_n = integer(), tree_depth = integer(), + learn_rate = numeric(), loss_reduction = numeric(), sample_size = numeric(), + stop_iter = integer() +) %>% + set_engine("xgboost") %>% + set_mode("classification") %>% + translate() +``` + +``` +## Boosted Tree Model Specification (classification) +## +## Main Arguments: +## mtry = integer() +## trees = integer() +## min_n = integer() +## tree_depth = integer() +## learn_rate = numeric() +## loss_reduction = numeric() +## sample_size = numeric() +## stop_iter = integer() +## +## Computational engine: xgboost +## +## Model fit template: +## parsnip::xgb_train(x = missing_arg(), y = missing_arg(), colsample_bynode = integer(), +## nrounds = integer(), min_child_weight = integer(), max_depth = integer(), +## eta = numeric(), gamma = numeric(), subsample = numeric(), +## early_stop = integer(), nthread = 1, verbose = 0) +``` + +[xgb_train()] is a wrapper around [xgboost::xgb.train()] (and other functions) that makes it easier to run this model. + +## Preprocessing requirements + +xgboost does not have a means to translate factor predictors to grouped splits. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via [fit.model_spec()], parsnip will convert factor columns to indicators using a one-hot encoding. + +For classification, non-numeric outcomes (i.e., factors) are internally converted to numeric. For binary classification, the `event_level` argument of `set_engine()` can be set to either `"first"` or `"second"` to specify which level should be used as the event. This can be helpful when a watchlist is used to monitor performance from with the xgboost training process. + +## Other details + +### Sparse matrices + +xgboost requires the data to be in a sparse format. If your predictor data are already in this format, then use [fit_xy.model_spec()] to pass it to the model function. Otherwise, parsnip converts the data to this format. + +### Parallel processing + +By default, the model is trained without parallel processing. This can be change by passing the `nthread` parameter to [set_engine()]. However, it is unwise to combine this with external parallel processing when using the \pkg{tune} package. + +### Early stopping + +The `stop_iter()` argument allows the model to prematurely stop training if the objective function does not improve within `early_stop` iterations. + +The best way to use this feature is in conjunction with an _internal validation set_. To do this, pass the `validation` parameter of [xgb_train()] via the parsnip [set_engine()] function. This is the proportion of the training set that should be reserved for measuring performance (and stop early). + +If the model specification has `early_stop >= trees`, `early_stop` is converted to `trees - 1` and a warning is issued. + +### Objective function + +parsnip chooses the objective function based on the characteristics of the outcome. To use a different loss, pass the `objective` argument to [set_engine()]. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#boost-tree-xgboost) for `boost_tree()` with the `"xgboost"` engine. + +## References + + - [XGBoost: A Scalable Tree Boosting System](https://arxiv.org/abs/1603.02754) + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/cubist_rules_Cubist.md b/man/rmd/cubist_rules_Cubist.md new file mode 100644 index 000000000..1680737a4 --- /dev/null +++ b/man/rmd/cubist_rules_Cubist.md @@ -0,0 +1,64 @@ + + + +For this engine, there is a single mode: regression + +## Tuning Parameters + + + +This model has 3 tuning parameters: + +- `committees`: # Committees (type: integer, default: 1L) + +- `neighbors`: # Nearest Neighbors (type: integer, default: 0L) + +- `max_rules`: Max. Rules (type: integer, default: NA_integer) + + +## Translation from parsnip to the underlying model call (regression) + +There is a parsnip extension package required to fit this model to this mode: **rules**. + + +```r +library(rules) + +cubist_rules( + committees = integer(1), + neighbors = integer(1), + max_rules = integer(1) +) %>% + set_engine("Cubist") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Cubist Model Specification (regression) +## +## Main Arguments: +## committees = integer(1) +## neighbors = integer(1) +## max_rules = integer(1) +## +## Computational engine: Cubist +## +## Model fit template: +## rules::cubist_fit(x = missing_arg(), y = missing_arg(), weights = missing_arg(), +## committees = integer(1), neighbors = integer(1), max_rules = integer(1), +## composite = TRUE) +``` + +## Preprocessing requirements + + +This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model. + +## References + + - Quinlan R (1992). "Learning with Continuous Classes." Proceedings of the 5th Australian Joint Conference On Artificial Intelligence, pp. 343-348. + + - Quinlan R (1993)."Combining Instance-Based and Model-Based Learning." Proceedings of the Tenth International Conference on Machine Learning, pp. 236-243. + + - Kuhn M and Johnson K (2013). _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/decision_tree_C5.0.md b/man/rmd/decision_tree_C5.0.md new file mode 100644 index 000000000..4891679e9 --- /dev/null +++ b/man/rmd/decision_tree_C5.0.md @@ -0,0 +1,51 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + + + +This model has 1 tuning parameters: + +- `min_n`: Minimal Node Size (type: integer, default: 2L) + +## Translation from parsnip to the original package (classification) + + +```r +decision_tree(min_n = integer()) %>% + set_engine("C5.0") %>% + set_mode("classification") %>% + translate() +``` + +``` +## Decision Tree Model Specification (classification) +## +## Main Arguments: +## min_n = integer() +## +## Computational engine: C5.0 +## +## Model fit template: +## parsnip::C5.0_train(x = missing_arg(), y = missing_arg(), weights = missing_arg(), +## minCases = integer(), trials = 1) +``` + +[C5.0_train()] is a wrapper around [C50::C5.0()] that makes it easier to run this model. + +## Preprocessing requirements + + +This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#decision-tree-C5.0) for `decision_tree()` with the `"C5.0"` engine. + +## References + +- Kuhn, M, and K Johnson. 2013. *Applied Predictive Modeling*. Springer. + diff --git a/man/rmd/decision_tree_party.md b/man/rmd/decision_tree_party.md new file mode 100644 index 000000000..3d11deee1 --- /dev/null +++ b/man/rmd/decision_tree_party.md @@ -0,0 +1,62 @@ + + + +For this engine, there is a single mode: censored regression + +## Tuning Parameters + + + +This model has 2 tuning parameters: + +- `tree_depth`: Tree Depth (type: integer, default: see below) + +- `min_n`: Minimal Node Size (type: integer, default: 20L) + +The `tree_depth` parameter defaults to `0` which means no restrictions are applied to tree depth. + +An engine-specific parameter for this model is: + + * `mtry`: the number of predictors, selected at random, that are evaluated for splitting. The default is to use all predictors. + +## Translation from parsnip to the original package (censored regression) + +There is a parsnip extension package required to fit this model to this mode: **censored**. + + +```r +library(censored) + +decision_tree(tree_depth = integer(1), min_n = integer(1)) %>% + set_engine("party") %>% + set_mode("censored regression") %>% + translate() +``` + +``` +## Decision Tree Model Specification (censored regression) +## +## Main Arguments: +## tree_depth = integer(1) +## min_n = integer(1) +## +## Computational engine: party +## +## Model fit template: +## censored::cond_inference_surv_ctree(formula = missing_arg(), +## data = missing_arg(), maxdepth = integer(1), minsplit = min_rows(0L, +## data)) +``` + +[cond_inference_surv_ctree()] is a wrapper around [party::ctree()] (and other functions) that makes it easier to run this model. + +## Preprocessing requirements + + +This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model. + +## References + + - Hothorn T, Hornik K, Zeileis A. 2006. Unbiased Recursive Partitioning: A Conditional Inference Framework. _Journal of Computational and Graphical Statistics_, 15(3), 651–674. + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/decision_tree_rpart.md b/man/rmd/decision_tree_rpart.md new file mode 100644 index 000000000..65aafff4b --- /dev/null +++ b/man/rmd/decision_tree_rpart.md @@ -0,0 +1,118 @@ + + + +For this engine, there are multiple modes: classification, regression, and censored regression + +## Tuning Parameters + + + +This model has 3 tuning parameters: + +- `tree_depth`: Tree Depth (type: integer, default: 30L) + +- `min_n`: Minimal Node Size (type: integer, default: 2L) + +- `cost_complexity`: Cost-Complexity Parameter (type: double, default: 0.01) + +## Translation from parsnip to the original package (classification) + + +```r +decision_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) %>% + set_engine("rpart") %>% + set_mode("classification") %>% + translate() +``` + +``` +## Decision Tree Model Specification (classification) +## +## Main Arguments: +## cost_complexity = double(1) +## tree_depth = integer(1) +## min_n = integer(1) +## +## Computational engine: rpart +## +## Model fit template: +## rpart::rpart(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), +## cp = double(1), maxdepth = integer(1), minsplit = min_rows(0L, +## data)) +``` + + +## Translation from parsnip to the original package (regression) + + +```r +decision_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) %>% + set_engine("rpart") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Decision Tree Model Specification (regression) +## +## Main Arguments: +## cost_complexity = double(1) +## tree_depth = integer(1) +## min_n = integer(1) +## +## Computational engine: rpart +## +## Model fit template: +## rpart::rpart(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), +## cp = double(1), maxdepth = integer(1), minsplit = min_rows(0L, +## data)) +``` + + +## Translation from parsnip to the original package (censored regression) + +There is a parsnip extension package required to fit this model to this mode: **censored**. + + +```r +library(censored) + +decision_tree( + tree_depth = integer(1), + min_n = integer(1), + cost_complexity = double(1) +) %>% + set_engine("rpart") %>% + set_mode("censored regression") %>% + translate() +``` + +``` +## Decision Tree Model Specification (censored regression) +## +## Main Arguments: +## cost_complexity = double(1) +## tree_depth = integer(1) +## min_n = integer(1) +## +## Computational engine: rpart +## +## Model fit template: +## pec::pecRpart(formula = missing_arg(), data = missing_arg(), +## cp = double(1), maxdepth = integer(1), minsplit = min_rows(0L, +## data)) +``` + +## Preprocessing requirements + + +This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#decision-tree-rpart) for `decision_tree()` with the `"rpart"` engine. + +## References + +- Kuhn, M, and K Johnson. 2013. *Applied Predictive Modeling*. Springer. + diff --git a/man/rmd/decision_tree_spark.md b/man/rmd/decision_tree_spark.md new file mode 100644 index 000000000..0bd0fcc7d --- /dev/null +++ b/man/rmd/decision_tree_spark.md @@ -0,0 +1,85 @@ + + + +For this engine, there are multiple modes: classification and regression + +## Tuning Parameters + + + +This model has 2 tuning parameters: + +- `tree_depth`: Tree Depth (type: integer, default: 5L) + +- `min_n`: Minimal Node Size (type: integer, default: 1L) + +## Translation from parsnip to the original package (classification) + + +```r +decision_tree(tree_depth = integer(1), min_n = integer(1)) %>% + set_engine("spark") %>% + set_mode("classification") %>% + translate() +``` + +``` +## Decision Tree Model Specification (classification) +## +## Main Arguments: +## tree_depth = integer(1) +## min_n = integer(1) +## +## Computational engine: spark +## +## Model fit template: +## sparklyr::ml_decision_tree_classifier(x = missing_arg(), formula = missing_arg(), +## max_depth = integer(1), min_instances_per_node = min_rows(0L, +## x), seed = sample.int(10^5, 1)) +``` + + +## Translation from parsnip to the original package (regression) + + +```r +decision_tree(tree_depth = integer(1), min_n = integer(1)) %>% + set_engine("spark") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Decision Tree Model Specification (regression) +## +## Main Arguments: +## tree_depth = integer(1) +## min_n = integer(1) +## +## Computational engine: spark +## +## Model fit template: +## sparklyr::ml_decision_tree_regressor(x = missing_arg(), formula = missing_arg(), +## max_depth = integer(1), min_instances_per_node = min_rows(0L, +## x), seed = sample.int(10^5, 1)) +``` + +## Preprocessing requirements + + +This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model. + +## Other details + + +For models created using the `"spark"` engine, there are several things to consider. + +* Only the formula interface to via `fit()` is available; using `fit_xy()` will generate an error. +* The predictions will always be in a Spark table format. The names will be the same as documented but without the dots. +* There is no equivalent to factor columns in Spark tables so class predictions are returned as character columns. +* To retain the model object for a new R session (via `save()`), the `model$fit` element of the parsnip object should be serialized via `ml_save(object$fit)` and separately saved to disk. In a new session, the object can be reloaded and reattached to the parsnip object. + +## References + +- Kuhn, M, and K Johnson. 2013. *Applied Predictive Modeling*. Springer. + diff --git a/man/rmd/discrim_flexible_earth.md b/man/rmd/discrim_flexible_earth.md new file mode 100644 index 000000000..2dc556ca2 --- /dev/null +++ b/man/rmd/discrim_flexible_earth.md @@ -0,0 +1,62 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + + + +This model has 3 tuning parameter: + +- `num_terms`: # Model Terms (type: integer, default: (see below)) + +- `prod_degree`: Degree of Interaction (type: integer, default: 1L) + +- `prune_method`: Pruning Method (type: character, default: 'backward') + +The default value of `num_terms` depends on the number of columns (`p`): `min(200, max(20, 2 * p)) + 1`. Note that `num_terms = 1` is an intercept-only model. + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **discrim**. + + +```r +library(discrim) + +discrim_flexible( + num_terms = integer(0), + prod_degree = integer(0), + prune_method = character(0) +) %>% + translate() +``` + +``` +## Flexible Discriminant Model Specification (classification) +## +## Main Arguments: +## num_terms = integer(0) +## prod_degree = integer(0) +## prune_method = character(0) +## +## Computational engine: earth +## +## Model fit template: +## mda::fda(formula = missing_arg(), data = missing_arg(), nprune = integer(0), +## degree = integer(0), pmethod = character(0), method = earth::earth) +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +## References + + - Hastie, Tibshirani & Buja (1994) Flexible Discriminant Analysis by Optimal + Scoring, _Journal of the American Statistical Association_, 89:428, 1255-1270 + + - Friedman (1991). Multivariate Adaptive Regression Splines. _The Annals of Statistics_, 19(1), 1-67. diff --git a/man/rmd/discrim_linear_MASS.md b/man/rmd/discrim_linear_MASS.md new file mode 100644 index 000000000..c1a5eb4d6 --- /dev/null +++ b/man/rmd/discrim_linear_MASS.md @@ -0,0 +1,44 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + +This engine has no tuning parameters. + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **discrim**. + + +```r +library(discrim) + +discrim_linear() %>% + set_engine("MASS") %>% + translate() +``` + +``` +## Linear Discriminant Model Specification (classification) +## +## Computational engine: MASS +## +## Model fit template: +## MASS::lda(formula = missing_arg(), data = missing_arg()) +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Variance calculations are used in these computations so _zero-variance_ predictors (i.e., with a single unique value) should be eliminated before fitting the model. + + + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/discrim_linear_mda.md b/man/rmd/discrim_linear_mda.md new file mode 100644 index 000000000..e32de4d70 --- /dev/null +++ b/man/rmd/discrim_linear_mda.md @@ -0,0 +1,54 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + + + + +This model has 1 tuning parameter: + +- `penalty`: Amount of Regularization (type: double, default: 1.0) + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **discrim**. + + +```r +library(discrim) + +discrim_linear(penalty = numeric(0)) %>% + set_engine("mda") %>% + translate() +``` + +``` +## Linear Discriminant Model Specification (classification) +## +## Main Arguments: +## penalty = numeric(0) +## +## Computational engine: mda +## +## Model fit template: +## mda::fda(formula = missing_arg(), data = missing_arg(), lambda = numeric(0), +## method = mda::gen.ridge, keep.fitted = FALSE) +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Variance calculations are used in these computations so _zero-variance_ predictors (i.e., with a single unique value) should be eliminated before fitting the model. + + + +## References + + - Hastie, Tibshirani & Buja (1994) Flexible Discriminant Analysis by Optimal + Scoring, _Journal of the American Statistical Association_, 89:428, 1255-1270 diff --git a/man/rmd/discrim_linear_sda.md b/man/rmd/discrim_linear_sda.md new file mode 100644 index 000000000..2ee3511fa --- /dev/null +++ b/man/rmd/discrim_linear_sda.md @@ -0,0 +1,54 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + +This engine has no tuning parameter arguments in [discrim_linear()]. + +However, there are a few engine-specific parameters that can be set or optimized when calling [set_engine()]: + +* `lambda`: the shrinkage parameters for the correlation matrix. This maps to the \pkg{dials} parameter [dials::shrinkage_correlation()]. + +* `lambda.var`: the shrinkage parameters for the predictor variances. This maps to [dials::shrinkage_variance()]. + +* `lambda.freqs`: the shrinkage parameters for the class frequencies. This maps to [dials::shrinkage_frequencies()]. + +* `diagonal`: a logical to make the model covariance diagonal or not. This maps to [dials::diagonal_covariance()]. + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **discrim**. + + +```r +library(discrim) + +discrim_linear() %>% + set_engine("sda") %>% + translate() +``` + +``` +## Linear Discriminant Model Specification (classification) +## +## Computational engine: sda +## +## Model fit template: +## sda::sda(Xtrain = missing_arg(), L = missing_arg(), verbose = FALSE) +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Variance calculations are used in these computations so _zero-variance_ predictors (i.e., with a single unique value) should be eliminated before fitting the model. + + + +## References + + - Ahdesmaki, A., and K. Strimmer. 2010. Feature selection in omics prediction problems using cat scores and false non-discovery rate control. Ann. Appl. Stat. 4: 503-519. [Preprint](http://arxiv.org/abs/0903.2003). diff --git a/man/rmd/discrim_linear_sparsediscrim.md b/man/rmd/discrim_linear_sparsediscrim.md new file mode 100644 index 000000000..fed84696d --- /dev/null +++ b/man/rmd/discrim_linear_sparsediscrim.md @@ -0,0 +1,66 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + + + +This model has 1 tuning parameter: + +- `regularization_method`: Regularization Method (type: character, default: 'diagonal') + +The possible values of this parameter, and the functions that they execute, are: + +* `"diagonal"`: [sparsediscrim::lda_diag()] +* `"min_distance"`: [sparsediscrim::lda_emp_bayes_eigen()] +* `"shrink_mean"`: [sparsediscrim::lda_shrink_mean()] +* `"shrink_cov"`: [sparsediscrim::lda_shrink_cov()] + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **discrim**. + + +```r +library(discrim) + +discrim_linear(regularization_method = character(0)) %>% + set_engine("sparsediscrim") %>% + translate() +``` + +``` +## Linear Discriminant Model Specification (classification) +## +## Main Arguments: +## regularization_method = character(0) +## +## Computational engine: sparsediscrim +## +## Model fit template: +## discrim::fit_regularized_linear(x = missing_arg(), y = missing_arg(), +## method = character(0)) +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Variance calculations are used in these computations so _zero-variance_ predictors (i.e., with a single unique value) should be eliminated before fitting the model. + + + +## References + + + - `lda_diag()`: Dudoit, Fridlyand and Speed (2002) Comparison of Discrimination Methods for the Classification of Tumors Using Gene Expression Data, _Journal of the American Statistical Association_, 97:457, 77-87. + + - `lda_shrink_mean()`: Tong, Chen, Zhao, Improved mean estimation and its application to diagonal discriminant analysis, _Bioinformatics_, Volume 28, Issue 4, 15 February 2012, Pages 531-537. + + - `lda_shrink_cov()`: Pang, Tong and Zhao (2009), Shrinkage-based Diagonal Discriminant Analysis and Its Applications in High-Dimensional Data. _Biometrics_, 65, 1021-1029. + + - `lda_emp_bayes_eigen()`: Srivistava and Kubokawa (2007), Comparison of Discrimination Methods for High Dimensional Data, _Journal of the Japan Statistical Society_, 37:1, 123-134. diff --git a/man/rmd/discrim_quad_MASS.md b/man/rmd/discrim_quad_MASS.md new file mode 100644 index 000000000..9bf02973d --- /dev/null +++ b/man/rmd/discrim_quad_MASS.md @@ -0,0 +1,44 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + +This engine has no tuning parameters. + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **discrim**. + + +```r +library(discrim) + +discrim_quad() %>% + set_engine("MASS") %>% + translate() +``` + +``` +## Quadratic Discriminant Model Specification (classification) +## +## Computational engine: MASS +## +## Model fit template: +## MASS::qda(formula = missing_arg(), data = missing_arg()) +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Variance calculations are used in these computations within each outcome class. For this reason, _zero-variance_ predictors (i.e., with a single unique value) within each class should be eliminated before fitting the model. + + + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/discrim_quad_sparsediscrim.md b/man/rmd/discrim_quad_sparsediscrim.md new file mode 100644 index 000000000..efcb5f158 --- /dev/null +++ b/man/rmd/discrim_quad_sparsediscrim.md @@ -0,0 +1,62 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + + + +This model has 1 tuning parameter: + +- `regularization_method`: Regularization Method (type: character, default: 'diagonal') + +The possible values of this parameter, and the functions that they execute, are: + +* `"diagonal"`: [sparsediscrim::qda_diag()] +* `"shrink_mean"`: [sparsediscrim::qda_shrink_mean()] +* `"shrink_cov"`: [sparsediscrim::qda_shrink_cov()] + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **discrim**. + + +```r +library(discrim) + +discrim_quad(regularization_method = character(0)) %>% + set_engine("sparsediscrim") %>% + translate() +``` + +``` +## Quadratic Discriminant Model Specification (classification) +## +## Main Arguments: +## regularization_method = character(0) +## +## Computational engine: sparsediscrim +## +## Model fit template: +## discrim::fit_regularized_quad(x = missing_arg(), y = missing_arg(), +## method = character(0)) +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Variance calculations are used in these computations within each outcome class. For this reason, _zero-variance_ predictors (i.e., with a single unique value) within each class should be eliminated before fitting the model. + + + +## References + + - `qda_diag()`: Dudoit, Fridlyand and Speed (2002) Comparison of Discrimination Methods for the Classification of Tumors Using Gene Expression Data, _Journal of the American Statistical Association_, 97:457, 77-87. + + - `qda_shrink_mean()`: Tong, Chen, Zhao, Improved mean estimation and its application to diagonal discriminant analysis, _Bioinformatics_, Volume 28, Issue 4, 15 February 2012, Pages 531-537. + + - `qda_shrink_cov()`: Pang, Tong and Zhao (2009), Shrinkage-based Diagonal Discriminant Analysis and Its Applications in High-Dimensional Data. _Biometrics_, 65, 1021-1029. diff --git a/man/rmd/discrim_regularized_klaR.md b/man/rmd/discrim_regularized_klaR.md new file mode 100644 index 000000000..63238fd8b --- /dev/null +++ b/man/rmd/discrim_regularized_klaR.md @@ -0,0 +1,66 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + + + + +This model has 2 tuning parameter: + +- `frac_common_cov`: Fraction of the Common Covariance Matrix (type: double, default: (see below)) + +- `frac_identity`: Fraction of the Identity Matrix (type: double, default: (see below)) + +Some special cases for the RDA model: + +* `frac_identity = 0` and `frac_common_cov = 1` is a linear discriminant analysis (LDA) model. + +* `frac_identity = 0` and `frac_common_cov = 0` is a quadratic discriminant analysis (QDA) model. + + + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **discrim**. + + +```r +library(discrim) + +discrim_regularized(frac_identity = numeric(0), frac_common_cov = numeric(0)) %>% + set_engine("klaR") %>% + translate() +``` + +``` +## Regularized Discriminant Model Specification (classification) +## +## Main Arguments: +## frac_common_cov = numeric(0) +## frac_identity = numeric(0) +## +## Computational engine: klaR +## +## Model fit template: +## klaR::rda(formula = missing_arg(), data = missing_arg(), lambda = numeric(0), +## gamma = numeric(0)) +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Variance calculations are used in these computations within each outcome class. For this reason, _zero-variance_ predictors (i.e., with a single unique value) within each class should be eliminated before fitting the model. + + + +## References + + - Friedman, J (1989). Regularized Discriminant Analysis. _Journal of the American Statistical Association_, 84, 165-175. + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/gen_additive_mod_mgcv.md b/man/rmd/gen_additive_mod_mgcv.md new file mode 100644 index 000000000..727f21039 --- /dev/null +++ b/man/rmd/gen_additive_mod_mgcv.md @@ -0,0 +1,107 @@ + + + +For this engine, there are multiple modes: regression and classification + +## Tuning Parameters + + + + +This model has 2 tuning parameters: + +- `select_features`: Select Features? (type: logical, default: FALSE) + +- `adjust_deg_free`: Smoothness Adjustment (type: double, default: 1.0) + + +## Translation from parsnip to the original package (regression) + + +```r +gen_additive_mod(adjust_deg_free = numeric(1), select_features = logical(1)) %>% + set_engine("mgcv") %>% + set_mode("regression") %>% + translate() +``` + +``` +## GAM Specification (regression) +## +## Main Arguments: +## select_features = logical(1) +## adjust_deg_free = numeric(1) +## +## Computational engine: mgcv +## +## Model fit template: +## mgcv::gam(formula = missing_arg(), data = missing_arg(), select = logical(1), +## gamma = numeric(1)) +``` + +## Translation from parsnip to the original package (classification) + + +```r +gen_additive_mod(adjust_deg_free = numeric(1), select_features = logical(1)) %>% + set_engine("mgcv") %>% + set_mode("classification") %>% + translate() +``` + +``` +## GAM Specification (classification) +## +## Main Arguments: +## select_features = logical(1) +## adjust_deg_free = numeric(1) +## +## Computational engine: mgcv +## +## Model fit template: +## mgcv::gam(formula = missing_arg(), data = missing_arg(), select = logical(1), +## gamma = numeric(1), family = stats::binomial(link = "logit")) +``` + +## Model fitting + +This model should be used with a model formula so that smooth terms can be specified. For example: + + + +```r +library(mgcv) +gen_additive_mod() %>% + set_engine("mgcv") %>% + set_mode("regression") %>% + fit(mpg ~ wt + gear + cyl + s(disp, k = 10), data = mtcars) +``` + +``` +## parsnip model object +## +## +## Family: gaussian +## Link function: identity +## +## Formula: +## mpg ~ wt + gear + cyl + s(disp, k = 10) +## +## Estimated degrees of freedom: +## 7.52 total = 11.52 +## +## GCV score: 4.225228 +``` + +The smoothness of the terms will need to be manually specified (e.g., using `s(x, df = 10)`) in the formula. Tuning can be accomplished using the `adjust_deg_free` parameter. + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + +## References + + - Ross, W. 2021. [_Generalized Additive Models in R: A Free, Interactive Course using mgcv_](https://noamross.github.io/gams-in-r-course/) + + - Wood, S. 2017. _Generalized Additive Models: An Introduction with R_. Chapman and Hall/CRC. diff --git a/man/rmd/glmnet-details.md b/man/rmd/glmnet-details.md new file mode 100644 index 000000000..3c6750536 --- /dev/null +++ b/man/rmd/glmnet-details.md @@ -0,0 +1,213 @@ + + + +# tidymodels and glmnet + +The implementation of the glmnet package has some nice features. For example, one of the main tuning parameters, the regularization penalty, does not need to be specified when fitting the model. The package fits a compendium of values, called the regularization path. These values depend on the data set and the value of `alpha`, the mixture parameter between a pure ridge model (`alpha = 0`) and a pure lasso model (`alpha = 1`). When predicting, any penalty values can be simultaneously predicted, even those that are not exactly on the regularization path. For those, the model approximates between the closest path values to produce a prediction. There is an argument called `lambda` to the `glmnet()` function that is used to specify the path. + +In the discussion below, `linear_reg()` is used. The information is true for all parsnip models that have a `"glmnet"` engine. + +## Fitting and predicting using parsnip + +Recall that tidymodels uses standardized parameter names across models chosen to be low on jargon. The argument `penalty` is the equivalent of what glmnet calls the `lambda` value and `mixture` is the same as their `alpha` value. + +In tidymodels, our `predict()` methods are defined to make one prediction at a time. For this model, that means predictions are for a single penalty value. For this reason, models that have glmnet engines require the user to always specify a single penalty value when the model is defined. For example, for linear regression: + +```r +linear_reg(penalty = 1) %>% set_engine("glmnet") +``` + +When the `predict()` method is called, it automatically uses the penalty that was given when the model was defined. For example: + + + +```r +library(tidymodels) + +fit <- + linear_reg(penalty = 1) %>% + set_engine("glmnet") %>% + fit(mpg ~ ., data = mtcars) + +# predict at penalty = 1 +predict(fit, mtcars[1:3,]) +``` + +``` +## # A tibble: 3 × 1 +## .pred +## +## 1 22.2 +## 2 21.5 +## 3 24.9 +``` + +However, any penalty values can be predicted simultaneously using the `multi_predict()` method: + + +```r +# predict at c(0.00, 0.01) +multi_predict(fit, mtcars[1:3,], penalty = c(0.00, 0.01)) +``` + +``` +## # A tibble: 3 × 1 +## .pred +## +## 1 +## 2 +## 3 +``` + +```r +# unnested: +multi_predict(fit, mtcars[1:3,], penalty = c(0.00, 0.01)) %>% + add_rowindex() %>% + unnest(cols = ".pred") +``` + +``` +## # A tibble: 6 × 3 +## penalty .pred .row +## +## 1 0 22.6 1 +## 2 0.01 22.5 1 +## 3 0 22.1 2 +## 4 0.01 22.1 2 +## 5 0 26.3 3 +## 6 0.01 26.3 3 +``` + +### Where did `lambda` go? + +It may appear odd that the `lambda` value does not get used in the fit: + + +```r +linear_reg(penalty = 1) %>% + set_engine("glmnet") %>% + translate() +``` + +``` +## Linear Regression Model Specification (regression) +## +## Main Arguments: +## penalty = 1 +## +## Computational engine: glmnet +## +## Model fit template: +## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(), +## family = "gaussian") +``` + +Internally, the value of `penalty = 1` is saved in the parsnip object and no value is set for `lambda`. This enables the full path to be fit by `glmnet()`. See the section below about setting the path. + +## How do I set the regularization path? + +Regardless of what value you use for `penalty`, the full coefficient path is used when [glmnet::glmnet()] is called. + +What if you want to manually set this path? Normally, you would pass a vector to `lambda` in [glmnet::glmnet()]. + +parsnip models that use a `glmnet` engine can use a special optional argument called `path_values`. This is _not_ an argument to [glmnet::glmnet()]; it is used by parsnip to independently set the path. + +For example, we have found that if you want a fully ridge regression model (i.e., `mixture = 0`), you can get the _wrong coefficients_ if the path does not contain zero (see [issue #431](https://github.com/tidymodels/parsnip/issues/431#issuecomment-782883848)). + +If we want to use our own path, the argument is passed as an engine-specific option: + + +```r +coef_path_values <- c(0, 10^seq(-5, 1, length.out = 7)) + +fit_ridge <- + linear_reg(penalty = 1, mixture = 0) %>% + set_engine("glmnet", path_values = coef_path_values) %>% + fit(mpg ~ ., data = mtcars) + +all.equal(sort(fit_ridge$fit$lambda), coef_path_values) +``` + +``` +## [1] TRUE +``` + +```r +# predict at penalty = 1 +predict(fit_ridge, mtcars[1:3,]) +``` + +``` +## # A tibble: 3 × 1 +## .pred +## +## 1 22.1 +## 2 21.8 +## 3 26.6 +``` + +## Tidying the model object + +[broom::tidy()] is a function that gives a summary of the object as a tibble. + +**tl;dr** `tidy()` on a `glmnet` model produced by parsnip gives the coefficients for the value given by `penalty`. + +When parsnip makes a model, it gives it an extra class. Use the `tidy()` method on the object, it produces coefficients for the penalty that was originally requested: + + +```r +tidy(fit) +``` + +``` +## # A tibble: 11 × 3 +## term estimate penalty +## +## 1 (Intercept) 35.3 1 +## 2 cyl -0.872 1 +## 3 disp 0 1 +## 4 hp -0.0101 1 +## 5 drat 0 1 +## 6 wt -2.59 1 +## 7 qsec 0 1 +## 8 vs 0 1 +## 9 am 0 1 +## 10 gear 0 1 +## 11 carb 0 1 +``` + +Note that there is a `tidy()` method for `glmnet` objects in the `broom` package. If this is used directly on the underlying `glmnet` object, it returns _all of coefficients on the path_: + + +```r +# Use the basic tidy() method for glmnet +all_tidy_coefs <- broom:::tidy.glmnet(fit$fit) +all_tidy_coefs +``` + +``` +## # A tibble: 640 × 5 +## term step estimate lambda dev.ratio +## +## 1 (Intercept) 1 20.1 5.15 0 +## 2 (Intercept) 2 21.6 4.69 0.129 +## 3 (Intercept) 3 23.2 4.27 0.248 +## 4 (Intercept) 4 24.7 3.89 0.347 +## 5 (Intercept) 5 26.0 3.55 0.429 +## 6 (Intercept) 6 27.2 3.23 0.497 +## 7 (Intercept) 7 28.4 2.95 0.554 +## 8 (Intercept) 8 29.4 2.68 0.601 +## 9 (Intercept) 9 30.3 2.45 0.640 +## 10 (Intercept) 10 31.1 2.23 0.673 +## # … with 630 more rows +``` + +```r +length(unique(all_tidy_coefs$lambda)) +``` + +``` +## [1] 79 +``` + +This can be nice for plots but it might not contain the penalty value that you are interested in. diff --git a/man/rmd/linear_reg_brulee.md b/man/rmd/linear_reg_brulee.md new file mode 100644 index 000000000..92e3fea84 --- /dev/null +++ b/man/rmd/linear_reg_brulee.md @@ -0,0 +1,62 @@ + + + +For this engine, there is a single mode: regression + +## Tuning Parameters + + + +This model has 2 tuning parameter: + +- `penalty`: Amount of Regularization (type: double, default: 0.001) + +- `mixture`: Proportion of Lasso Penalty (type: double, default: 0.0) + +The use of the L1 penalty (a.k.a. the lasso penalty) does _not_ force parameters to be strictly zero (as it does in packages such as glmnet). The zeroing out of parameters is a specific feature the optimization method used in those packages. + +Other engine arguments of interest: + + - `optimizer()`: The optimization method. See [brulee::brulee_linear_reg()]. + - `epochs()`: An integer for the number of passes through the training set. + - `lean_rate()`: A number used to accelerate the gradient decsent process. + - `momentum()`: A number used to use historical gradient infomration during optimization (`optimizer = "SGD"` only). + - `batch_size()`: An integer for the number of training set points in each batch. + - `stop_iter()`: A non-negative integer for how many iterations with no improvement before stopping. (default: 5L). + + +## Translation from parsnip to the original package (regression) + + +```r +linear_reg(penalty = double(1)) %>% + set_engine("brulee") %>% + translate() +``` + +``` +## Linear Regression Model Specification (regression) +## +## Main Arguments: +## penalty = double(1) +## +## Computational engine: brulee +## +## Model fit template: +## brulee::brulee_linear_reg(x = missing_arg(), y = missing_arg(), +## penalty = double(1)) +``` + + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/linear_reg_gee.md b/man/rmd/linear_reg_gee.md new file mode 100644 index 000000000..e4012adb1 --- /dev/null +++ b/man/rmd/linear_reg_gee.md @@ -0,0 +1,88 @@ + + + +For this engine, there is a single mode: regression + +## Tuning Parameters + +This model has no formal tuning parameters. It may be beneficial to determine the appropriate correlation structure to use, but this typically does not affect the predicted value of the model. It _does_ have an effect on the inferential results and parameter covariance values. + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **multilevelmod**. + + +```r +library(multilevelmod) + +linear_reg() %>% + set_engine("gee") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Linear Regression Model Specification (regression) +## +## Computational engine: gee +## +## Model fit template: +## multilevelmod::gee_fit(formula = missing_arg(), data = missing_arg(), +## family = gaussian) +``` + +`multilevelmod::gee_fit()` is a wrapper model around `gee::gee()`. + + +## Preprocessing requirements + +There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. + +## Other details + +The model cannot accept case weights. + +Both `gee:gee()` and `gee:geepack()` specify the id/cluster variable using an argument `id` that requires a vector. parsnip doesn't work that way so we enable this model to be fit using a artificial function `id_var()` to be used in the formula. So, in the original package, the call would look like: + +```r +gee(breaks ~ tension, id = wool, data = warpbreaks, corstr = "exchangeable") +``` + +With parsnip, we suggest using the formula method when fitting: + +```r +library(tidymodels) + +linear_reg() %>% + set_engine("gee", corstr = "exchangeable") %>% + fit(breaks ~ tension + id_var(wool), data = warpbreaks) +``` + +When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the GEE formula when adding the model: + +```r +library(tidymodels) + +gee_spec <- + linear_reg() %>% + set_engine("gee", corstr = "exchangeable") + +gee_wflow <- + workflow() %>% + # The data are included as-is using: + add_variables(outcomes = breaks, predictors = c(tension, wool)) %>% + add_model(gee_spec, formula = breaks ~ tension + id_var(wool)) + +fit(gee_wflow, data = warpbreaks) +``` + +The `gee::gee()` function always prints out warnings and output even when `silent = TRUE`. The parsnip "gee" engine, by contrast, silences all console output coming from `gee::gee()`, even if `silent = FALSE`. + +Also, because of issues with the `gee()` function, a supplementary call to `glm()` is needed to get the rank and QR decomposition objects so that `predict()` can be used. + +## References + + - Liang, K.Y. and Zeger, S.L. (1986) Longitudinal data analysis using generalized linear models. _Biometrika_, 73 13–22. + + - Zeger, S.L. and Liang, K.Y. (1986) Longitudinal data analysis for discrete and continuous outcomes. _Biometrics_, 42 121–130. + diff --git a/man/rmd/linear_reg_glm.md b/man/rmd/linear_reg_glm.md new file mode 100644 index 000000000..908014743 --- /dev/null +++ b/man/rmd/linear_reg_glm.md @@ -0,0 +1,62 @@ + + + +For this engine, there is a single mode: regression + +## Tuning Parameters + +This engine has no tuning parameters but you can set the `family` parameter (and/or `link`) as an engine argument (see below). + +## Translation from parsnip to the original package + + +```r +linear_reg() %>% + set_engine("glm") %>% + translate() +``` + +``` +## Linear Regression Model Specification (regression) +## +## Computational engine: glm +## +## Model fit template: +## stats::glm(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), +## family = stats::gaussian) +``` + +To use a non-default `family` and/or `link`, pass in as an argument to `set_engine()`: + + +```r +linear_reg() %>% + set_engine("glm", family = stats::poisson(link = "sqrt")) %>% + translate() +``` + +``` +## Linear Regression Model Specification (regression) +## +## Engine-Specific Arguments: +## family = stats::poisson(link = "sqrt") +## +## Computational engine: glm +## +## Model fit template: +## stats::glm(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), +## family = stats::poisson(link = "sqrt")) +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-glm) for `linear_reg()` with the `"glm"` engine. + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/linear_reg_glmnet.md b/man/rmd/linear_reg_glmnet.md new file mode 100644 index 000000000..8835ebde4 --- /dev/null +++ b/man/rmd/linear_reg_glmnet.md @@ -0,0 +1,62 @@ + + + +For this engine, there is a single mode: regression + +## Tuning Parameters + + + +This model has 2 tuning parameters: + +- `penalty`: Amount of Regularization (type: double, default: see below) + +- `mixture`: Proportion of Lasso Penalty (type: double, default: 1.0) + +A value of `mixture = 1` corresponds to a pure lasso model, while `mixture = 0` indicates ridge regression. + +The `penalty` parameter has no default and requires a single numeric value. For more details about this, and the `glmnet` model in general, see [glmnet-details]. + +## Translation from parsnip to the original package + + +```r +linear_reg(penalty = double(1), mixture = double(1)) %>% + set_engine("glmnet") %>% + translate() +``` + +``` +## Linear Regression Model Specification (regression) +## +## Main Arguments: +## penalty = 0 +## mixture = double(1) +## +## Computational engine: glmnet +## +## Model fit template: +## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(), +## alpha = double(1), family = "gaussian") +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. +By default, [glmnet::glmnet()] uses the argument `standardize = TRUE` to center and scale the data. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-glmnet) for `linear_reg()` with the `"glmnet"` engine. + +## References + + - Hastie, T, R Tibshirani, and M Wainwright. 2015. _Statistical Learning with Sparsity_. CRC Press. + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. + diff --git a/man/rmd/linear_reg_gls.md b/man/rmd/linear_reg_gls.md new file mode 100644 index 000000000..5731f09fe --- /dev/null +++ b/man/rmd/linear_reg_gls.md @@ -0,0 +1,207 @@ + + + +For this engine, there is a single mode: regression + +## Tuning Parameters + +This model has no tuning parameters. + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **multilevelmod**. + + +```r +library(multilevelmod) + +linear_reg() %>% + set_engine("gls") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Linear Regression Model Specification (regression) +## +## Computational engine: gls +## +## Model fit template: +## nlme::gls(formula = missing_arg(), data = missing_arg()) +``` + + +## Preprocessing requirements + +There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. + +## Other details + +The model can accept case weights. + +With parsnip, we suggest using the _fixed effects_ formula method when fitting, but the details of the correlation structure should be passed to `set_engine()` since it is an irregular (but required) argument: + + +```r +library(tidymodels) +# load nlme to be able to use the `cor*()` functions +library(nlme) + +data("riesby") + +linear_reg() %>% + set_engine("gls", correlation = corCompSymm(form = ~ 1 | subject)) %>% + fit(depr_score ~ week, data = riesby) +``` + +``` +## parsnip model object +## +## Generalized least squares fit by REML +## Model: depr_score ~ week +## Data: data +## Log-restricted-likelihood: -765.0148 +## +## Coefficients: +## (Intercept) week +## -4.953439 -2.119678 +## +## Correlation Structure: Compound symmetry +## Formula: ~1 | subject +## Parameter estimate(s): +## Rho +## 0.6820145 +## Degrees of freedom: 250 total; 248 residual +## Residual standard error: 6.868785 +``` + +When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: + +```r +library(tidymodels) + +gls_spec <- + linear_reg() %>% + set_engine("gls", correlation = corCompSymm(form = ~ 1 | subject)) + +gls_wflow <- + workflow() %>% + # The data are included as-is using: + add_variables(outcomes = depr_score, predictors = c(week, subject)) %>% + add_model(gls_spec, formula = depr_score ~ week) + +fit(gls_wflow, data = riesby) +``` + +# Degrees of freedom + +Note that [nlme::lme()] and [nlme::gls()] can fit the same model but will count degrees of freedom differently. If there are `n` data points, `p` fixed effects parameters, and `q` random effect parameters, the residual degrees of freedom are: + +* **lme**: n - p - q +* **gls**: n - p + +As a result, p-values will be different. For example, we can fit the same model using different estimation methods (assuming a positive covariance value): + + +```r +gls_fit <- + linear_reg() %>% + set_engine("gls", correlation = corCompSymm(form = ~ 1 | subject)) %>% + fit(depr_score ~ week, data = riesby) + +lme_fit <- + linear_reg() %>% + set_engine("lme", random = ~ 1 | subject) %>% + fit(depr_score ~ week, data = riesby) +``` + +The estimated within-subject correlations are the same: + + +```r +library(ape) + +# lme, use ape package: +lme_within_sub <- varcomp(lme_fit$fit)/sum(varcomp(lme_fit$fit)) +lme_within_sub["subject"] +``` + +``` +## subject +## 0.6820145 +``` + +```r +# gls: +summary(gls_fit$fit$modelStruct) +``` + +``` +## Correlation Structure: Compound symmetry +## Formula: ~1 | subject +## Parameter estimate(s): +## Rho +## 0.6820145 +``` + +as are the fixed effects (and their standard errors): + + +```r +nlme::fixef(lme_fit$fit) +``` + +``` +## (Intercept) week +## -4.953439 -2.119678 +``` + +```r +coef(gls_fit$fit) +``` + +``` +## (Intercept) week +## -4.953439 -2.119678 +``` + +However, the p-values for the fixed effects are different: + + + +```r +library(broom.mixed) + +# lme: +lme_fit %>% tidy() %>% + dplyr::filter(group == "fixed") %>% + dplyr::select(-group, -effect) +``` + +``` +## # A tibble: 2 × 6 +## term estimate std.error df statistic p.value +## +## 1 (Intercept) -4.95 0.808 183 -6.13 5.37e- 9 +## 2 week -2.12 0.224 183 -9.47 1.41e-17 +``` + +```r +# gls: +gls_fit %>% tidy() +``` + +``` +## # A tibble: 2 × 5 +## term estimate std.error statistic p.value +## +## 1 (Intercept) -4.95 0.808 -6.13 3.50e- 9 +## 2 week -2.12 0.224 -9.47 2.26e-18 +``` + + + +## References + +- J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY + diff --git a/man/rmd/linear_reg_keras.md b/man/rmd/linear_reg_keras.md new file mode 100644 index 000000000..756913fe3 --- /dev/null +++ b/man/rmd/linear_reg_keras.md @@ -0,0 +1,56 @@ + + + +For this engine, there is a single mode: regression + +## Tuning Parameters + + + +This model has one tuning parameter: + +- `penalty`: Amount of Regularization (type: double, default: 0.0) + +For `penalty`, the amount of regularization is _only_ L2 penalty (i.e., ridge or weight decay). + +## Translation from parsnip to the original package + + +```r +linear_reg(penalty = double(1)) %>% + set_engine("keras") %>% + translate() +``` + +``` +## Linear Regression Model Specification (regression) +## +## Main Arguments: +## penalty = double(1) +## +## Computational engine: keras +## +## Model fit template: +## parsnip::keras_mlp(x = missing_arg(), y = missing_arg(), penalty = double(1), +## hidden_units = 1, act = "linear") +``` + +[keras_mlp()] is a parsnip wrapper around keras code for neural networks. This model fits a linear regression as a network with a single hidden unit. + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-keras) for `linear_reg()` with the `"keras"` engine. + +## References + + - Hoerl, A., & Kennard, R. (2000). _Ridge Regression: Biased Estimation for Nonorthogonal Problems_. Technometrics, 42(1), 80-86. + diff --git a/man/rmd/linear_reg_lm.md b/man/rmd/linear_reg_lm.md new file mode 100644 index 000000000..67716b628 --- /dev/null +++ b/man/rmd/linear_reg_lm.md @@ -0,0 +1,39 @@ + + + +For this engine, there is a single mode: regression + +## Tuning Parameters + +This engine has no tuning parameters. + +## Translation from parsnip to the original package + + +```r +linear_reg() %>% + set_engine("lm") %>% + translate() +``` + +``` +## Linear Regression Model Specification (regression) +## +## Computational engine: lm +## +## Model fit template: +## stats::lm(formula = missing_arg(), data = missing_arg(), weights = missing_arg()) +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-lm) for `linear_reg()` with the `"lm"` engine. + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/linear_reg_lme.md b/man/rmd/linear_reg_lme.md new file mode 100644 index 000000000..ba323d310 --- /dev/null +++ b/man/rmd/linear_reg_lme.md @@ -0,0 +1,103 @@ + + + +For this engine, there is a single mode: regression + +## Tuning Parameters + +This model has no tuning parameters. + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **multilevelmod**. + + +```r +library(multilevelmod) + +linear_reg() %>% + set_engine("lme") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Linear Regression Model Specification (regression) +## +## Computational engine: lme +## +## Model fit template: +## nlme::lme(fixed = missing_arg(), data = missing_arg()) +``` + + +## Predicting new samples + +This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor ($\eta$) for a random intercept: + +$$ +\eta_{i} = (\beta_0 + b_{0i}) + \beta_1x_{i1} +$$ + +where $i$ denotes the `i`th independent experimental unit (e.g. subject). When the model has seen subject `i`, it can use that subject's data to adjust the _population_ intercept to be more specific to that subjects results. + +What happens when data are being predicted for a subject that was not used in the model fit? In that case, this package uses _only_ the population parameter estimates for prediction: + +$$ +\hat{\eta}_{i'} = \hat{\beta}_0+ \hat{\beta}x_{i'1} +$$ + +Depending on what covariates are in the model, this might have the effect of making the same prediction for all new samples. The population parameters are the "best estimate" for a subject that was not included in the model fit. + +The tidymodels framework deliberately constrains predictions for new data to not use the training set or other data (to prevent information leakage). + + +## Preprocessing requirements + +There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. + +## Other details + +The model can accept case weights. + +With parsnip, we suggest using the _fixed effects_ formula method when fitting, but the random effects formula should be passed to `set_engine()` since it is an irregular (but required) argument: + +```r +library(tidymodels) +data("riesby") + +linear_reg() %>% + set_engine("lme", random = ~ 1|subject) %>% + fit(depr_score ~ week, data = riesby) +``` + +When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: + +```r +library(tidymodels) + +lme_spec <- + linear_reg() %>% + set_engine("lme", random = ~ 1|subject) + +lme_wflow <- + workflow() %>% + # The data are included as-is using: + add_variables(outcomes = depr_score, predictors = c(week, subject)) %>% + add_model(lme_spec, formula = depr_score ~ week) + +fit(lme_wflow, data = riesby) +``` + +## References + +- J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY + +- West, K, Band Welch, and A Galecki. 2014. _Linear Mixed Models: A Practical Guide Using Statistical Software_. CRC Press. + +- Thorson, J, Minto, C. 2015, Mixed effects: a unifying framework for statistical modelling in fisheries biology. _ICES Journal of Marine Science_, Volume 72, Issue 5, Pages 1245–1256. + +- Harrison, XA, Donaldson, L, Correa-Cano, ME, Evans, J, Fisher, DN, Goodwin, CED, Robinson, BS, Hodgson, DJ, Inger, R. 2018. _A brief introduction to mixed effects modelling and multi-model inference in ecology_. PeerJ 6:e4794. + +- DeBruine LM, Barr DJ. Understanding Mixed-Effects Models Through Data Simulation. 2021. _Advances in Methods and Practices in Psychological Science_. + diff --git a/man/rmd/linear_reg_lmer.md b/man/rmd/linear_reg_lmer.md new file mode 100644 index 000000000..396770813 --- /dev/null +++ b/man/rmd/linear_reg_lmer.md @@ -0,0 +1,103 @@ + + + +For this engine, there is a single mode: regression + +## Tuning Parameters + +This model has no tuning parameters. + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **multilevelmod**. + + +```r +library(multilevelmod) + +linear_reg() %>% + set_engine("lmer") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Linear Regression Model Specification (regression) +## +## Computational engine: lmer +## +## Model fit template: +## lme4::lmer(formula = missing_arg(), data = missing_arg()) +``` + + +## Predicting new samples + +This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor ($\eta$) for a random intercept: + +$$ +\eta_{i} = (\beta_0 + b_{0i}) + \beta_1x_{i1} +$$ + +where $i$ denotes the `i`th independent experimental unit (e.g. subject). When the model has seen subject `i`, it can use that subject's data to adjust the _population_ intercept to be more specific to that subjects results. + +What happens when data are being predicted for a subject that was not used in the model fit? In that case, this package uses _only_ the population parameter estimates for prediction: + +$$ +\hat{\eta}_{i'} = \hat{\beta}_0+ \hat{\beta}x_{i'1} +$$ + +Depending on what covariates are in the model, this might have the effect of making the same prediction for all new samples. The population parameters are the "best estimate" for a subject that was not included in the model fit. + +The tidymodels framework deliberately constrains predictions for new data to not use the training set or other data (to prevent information leakage). + + +## Preprocessing requirements + +There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. + +## Other details + +The model can accept case weights. + +With parsnip, we suggest using the formula method when fitting: + +```r +library(tidymodels) +data("riesby") + +linear_reg() %>% + set_engine("lmer") %>% + fit(depr_score ~ week + (1|subject), data = riesby) +``` + +When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: + +```r +library(tidymodels) + +lmer_spec <- + linear_reg() %>% + set_engine("lmer") + +lmer_wflow <- + workflow() %>% + # The data are included as-is using: + add_variables(outcomes = depr_score, predictors = c(week, subject)) %>% + add_model(lmer_spec, formula = depr_score ~ week + (1|subject)) + +fit(lmer_wflow, data = riesby) +``` + +## References + + - J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY + + - West, K, Band Welch, and A Galecki. 2014. _Linear Mixed Models: A Practical Guide Using Statistical Software_. CRC Press. + + - Thorson, J, Minto, C. 2015, Mixed effects: a unifying framework for statistical modelling in fisheries biology. _ICES Journal of Marine Science_, Volume 72, Issue 5, Pages 1245–1256. + + - Harrison, XA, Donaldson, L, Correa-Cano, ME, Evans, J, Fisher, DN, Goodwin, CED, Robinson, BS, Hodgson, DJ, Inger, R. 2018. _A brief introduction to mixed effects modelling and multi-model inference in ecology_. PeerJ 6:e4794. + + - DeBruine LM, Barr DJ. Understanding Mixed-Effects Models Through Data Simulation. 2021. _Advances in Methods and Practices in Psychological Science_. + diff --git a/man/rmd/linear_reg_spark.md b/man/rmd/linear_reg_spark.md new file mode 100644 index 000000000..75ace131f --- /dev/null +++ b/man/rmd/linear_reg_spark.md @@ -0,0 +1,70 @@ + + + +For this engine, there is a single mode: regression + +## Tuning Parameters + + + +This model has 2 tuning parameters: + +- `penalty`: Amount of Regularization (type: double, default: 0.0) + +- `mixture`: Proportion of Lasso Penalty (type: double, default: 0.0) + +For `penalty`, the amount of regularization includes both the L1 penalty (i.e., lasso) and the L2 penalty (i.e., ridge or weight decay). + +A value of `mixture = 1` corresponds to a pure lasso model, while `mixture = 0` indicates ridge regression. + +## Translation from parsnip to the original package + + +```r +linear_reg(penalty = double(1), mixture = double(1)) %>% + set_engine("spark") %>% + translate() +``` + +``` +## Linear Regression Model Specification (regression) +## +## Main Arguments: +## penalty = double(1) +## mixture = double(1) +## +## Computational engine: spark +## +## Model fit template: +## sparklyr::ml_linear_regression(x = missing_arg(), formula = missing_arg(), +## weight_col = missing_arg(), reg_param = double(1), elastic_net_param = double(1)) +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. +By default, `ml_linear_regression()` uses the argument `standardization = TRUE` to center and scale the data. + +## Other details + + +For models created using the `"spark"` engine, there are several things to consider. + +* Only the formula interface to via `fit()` is available; using `fit_xy()` will generate an error. +* The predictions will always be in a Spark table format. The names will be the same as documented but without the dots. +* There is no equivalent to factor columns in Spark tables so class predictions are returned as character columns. +* To retain the model object for a new R session (via `save()`), the `model$fit` element of the parsnip object should be serialized via `ml_save(object$fit)` and separately saved to disk. In a new session, the object can be reloaded and reattached to the parsnip object. + +## References + + - Luraschi, J, K Kuo, and E Ruiz. 2019. _Mastering Spark with R_. O'Reilly Media + + - Hastie, T, R Tibshirani, and M Wainwright. 2015. _Statistical Learning with Sparsity_. CRC Press. + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. + diff --git a/man/rmd/linear_reg_stan.md b/man/rmd/linear_reg_stan.md new file mode 100644 index 000000000..40b6e8a38 --- /dev/null +++ b/man/rmd/linear_reg_stan.md @@ -0,0 +1,59 @@ + + + +For this engine, there is a single mode: regression + +## Tuning Parameters + +This engine has no tuning parameters. + +## Important engine-specific options + +Some relevant arguments that can be passed to `set_engine()`: + + * `chains`: A positive integer specifying the number of Markov chains. The default is 4. + * `iter`: A positive integer specifying the number of iterations for each chain (including warmup). The default is 2000. + * `seed`: The seed for random number generation. + * `cores`: Number of cores to use when executing the chains in parallel. + * `prior`: The prior distribution for the (non-hierarchical) regression coefficients. The `"stan"` engine does not fit any hierarchical terms. See the `"stan_glmer"` engine from the multilevelmod package for that type of model. + * `prior_intercept`: The prior distribution for the intercept (after centering all predictors). + +See [rstan::sampling()] and [rstanarm::priors()] for more information on these and other options. + +## Translation from parsnip to the original package + + +```r +linear_reg() %>% + set_engine("stan") %>% + translate() +``` + +``` +## Linear Regression Model Specification (regression) +## +## Computational engine: stan +## +## Model fit template: +## rstanarm::stan_glm(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), family = stats::gaussian, refresh = 0) +``` + +Note that the `refresh` default prevents logging of the estimation process. Change this value in `set_engine()` to show the MCMC logs. + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + +## Other details + +For prediction, the `"stan"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome and when `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-stan) for `linear_reg()` with the `"stan"` engine. + +## References + + - McElreath, R. 2020 _Statistical Rethinking_. CRC Press. diff --git a/man/rmd/linear_reg_stan_glmer.md b/man/rmd/linear_reg_stan_glmer.md new file mode 100644 index 000000000..f6cbb7898 --- /dev/null +++ b/man/rmd/linear_reg_stan_glmer.md @@ -0,0 +1,112 @@ + + + +For this engine, there is a single mode: regression + +## Tuning Parameters + +This model has no tuning parameters. + +## Important engine-specific options + +Some relevant arguments that can be passed to `set_engine()`: + + * `chains`: A positive integer specifying the number of Markov chains. The default is 4. + * `iter`: A positive integer specifying the number of iterations for each chain (including warmup). The default is 2000. + * `seed`: The seed for random number generation. + * `cores`: Number of cores to use when executing the chains in parallel. + * `prior`: The prior distribution for the (non-hierarchical) regression coefficients. + * `prior_intercept`: The prior distribution for the intercept (after centering all predictors). + +See `?rstanarm::stan_glmer` and `?rstan::sampling` for more information. + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **multilevelmod**. + + +```r +library(multilevelmod) + +linear_reg() %>% + set_engine("stan_glmer") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Linear Regression Model Specification (regression) +## +## Computational engine: stan_glmer +## +## Model fit template: +## rstanarm::stan_glmer(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), family = stats::gaussian, refresh = 0) +``` + + +## Predicting new samples + +This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor ($\eta$) for a random intercept: + +$$ +\eta_{i} = (\beta_0 + b_{0i}) + \beta_1x_{i1} +$$ + +where $i$ denotes the `i`th independent experimental unit (e.g. subject). When the model has seen subject `i`, it can use that subject's data to adjust the _population_ intercept to be more specific to that subjects results. + +What happens when data are being predicted for a subject that was not used in the model fit? In that case, this package uses _only_ the population parameter estimates for prediction: + +$$ +\hat{\eta}_{i'} = \hat{\beta}_0+ \hat{\beta}x_{i'1} +$$ + +Depending on what covariates are in the model, this might have the effect of making the same prediction for all new samples. The population parameters are the "best estimate" for a subject that was not included in the model fit. + +The tidymodels framework deliberately constrains predictions for new data to not use the training set or other data (to prevent information leakage). + + +## Preprocessing requirements + +There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. + +## Other details + +The model can accept case weights. + +With parsnip, we suggest using the formula method when fitting: + +```r +library(tidymodels) +data("riesby") + +linear_reg() %>% + set_engine("stan_glmer") %>% + fit(depr_score ~ week + (1|subject), data = riesby) +``` + +When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: + +```r +library(tidymodels) + +glmer_spec <- + linear_reg() %>% + set_engine("stan_glmer") + +glmer_wflow <- + workflow() %>% + # The data are included as-is using: + add_variables(outcomes = depr_score, predictors = c(week, subject)) %>% + add_model(glmer_spec, formula = depr_score ~ week + (1|subject)) + +fit(glmer_wflow, data = riesby) +``` + +For prediction, the `"stan_glmer"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. + +## References + + - McElreath, R. 2020 _Statistical Rethinking_. CRC Press. + + - Sorensen, T, Vasishth, S. 2016. Bayesian linear mixed models using Stan: A tutorial for psychologists, linguists, and cognitive scientists, arXiv:1506.06201. diff --git a/man/rmd/logistic-reg.md b/man/rmd/logistic-reg.md new file mode 100644 index 000000000..2bfd3343c --- /dev/null +++ b/man/rmd/logistic-reg.md @@ -0,0 +1,196 @@ +# Engine Details + + + + +Engines may have pre-set default arguments when executing the model fit call. +For this type of model, the template of the fit calls are below. + +## glm + + +```r +logistic_reg() %>% + set_engine("glm") %>% + translate() +``` + +``` +## Logistic Regression Model Specification (classification) +## +## Computational engine: glm +## +## Model fit template: +## stats::glm(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), +## family = stats::binomial) +``` + +## glmnet + + +```r +logistic_reg(penalty = 0.1) %>% + set_engine("glmnet") %>% + translate() +``` + +``` +## Logistic Regression Model Specification (classification) +## +## Main Arguments: +## penalty = 0.1 +## +## Computational engine: glmnet +## +## Model fit template: +## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(), +## family = "binomial") +``` + +The glmnet engine requires a single value for the `penalty` argument (a number +or `tune()`), but the full regularization path is always fit +regardless of the value given to `penalty`. To pass in a custom sequence of +values for glmnet's `lambda`, use the argument `path_values` in `set_engine()`. +This will assign the value of the glmnet `lambda` parameter without disturbing +the value given of `logistic_reg(penalty)`. For example: + + +```r +logistic_reg(penalty = .1) %>% + set_engine("glmnet", path_values = c(0, 10^seq(-10, 1, length.out = 20))) %>% + translate() +``` + +``` +## Logistic Regression Model Specification (classification) +## +## Main Arguments: +## penalty = 0.1 +## +## Computational engine: glmnet +## +## Model fit template: +## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(), +## lambda = c(0, 10^seq(-10, 1, length.out = 20)), family = "binomial") +``` + +When fitting a pure ridge regression model (i.e., `penalty = 0`), we _strongly +suggest_ that you pass in a vector for `path_values` that includes zero. See +[issue #431](https://github.com/tidymodels/parsnip/issues/431) for a discussion. + +When using `predict()`, the single `penalty` value used for prediction is the +one specified in `logistic_reg()`. + +To predict on multiple penalties, use the `multi_predict()` function. +This function returns a tibble with a list column called `.pred` containing +all of the penalty results. + + +## LiblineaR + + +```r +logistic_reg() %>% + set_engine("LiblineaR") %>% + translate() +``` + +``` +## Logistic Regression Model Specification (classification) +## +## Computational engine: LiblineaR +## +## Model fit template: +## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(), +## verbose = FALSE) +``` + +For `LiblineaR` models, the value for `mixture` can either be 0 (for ridge) or 1 +(for lasso) but not other intermediate values. In the `LiblineaR` documentation, +these correspond to types 0 (L2-regularized) and 6 (L1-regularized). + +Be aware that the `LiblineaR` engine regularizes the intercept. Other +regularized regression models do not, which will result in different parameter estimates. + +## stan + + +```r +logistic_reg() %>% + set_engine("stan") %>% + translate() +``` + +``` +## Logistic Regression Model Specification (classification) +## +## Computational engine: stan +## +## Model fit template: +## rstanarm::stan_glm(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), family = stats::binomial, refresh = 0) +``` + +Note that the `refresh` default prevents logging of the estimation process. +Change this value in `set_engine()` to show the logs. + +For prediction, the `stan` engine can compute posterior intervals analogous to +confidence and prediction intervals. In these instances, the units are the +original outcome and when `std_error = TRUE`, the standard deviation of the +posterior distribution (or posterior predictive distribution as appropriate) is +returned. + +## spark + + +```r +logistic_reg() %>% + set_engine("spark") %>% + translate() +``` + +``` +## Logistic Regression Model Specification (classification) +## +## Computational engine: spark +## +## Model fit template: +## sparklyr::ml_logistic_regression(x = missing_arg(), formula = missing_arg(), +## weight_col = missing_arg(), family = "binomial") +``` + +## keras + + +```r +logistic_reg() %>% + set_engine("keras") %>% + translate() +``` + +``` +## Logistic Regression Model Specification (classification) +## +## Computational engine: keras +## +## Model fit template: +## parsnip::keras_mlp(x = missing_arg(), y = missing_arg(), hidden_units = 1, +## act = "linear") +``` + + +## Parameter translations + +The standardized parameter names in parsnip can be mapped to their original +names in each engine that has main parameters. Each engine typically has a +different default value (shown in parentheses) for each parameter. + + +|**parsnip** |**glmnet** |**LiblineaR** |**spark** |**keras** |**brulee** | +|:-----------|:----------|:-------------|:---------------------|:-----------|:----------| +|penalty |lambda |cost |reg_param (0) |penalty (0) |penalty | +|mixture |alpha (1) |type (0) |elastic_net_param (0) |NA |mixture | +|epochs |NA |NA |NA |NA |epochs | +|learn_rate |NA |NA |NA |NA |learn_rate | +|momentum |NA |NA |NA |NA |momentum | +|stop_iter |NA |NA |NA |NA |stop_iter | diff --git a/man/rmd/logistic_reg_LiblineaR.md b/man/rmd/logistic_reg_LiblineaR.md new file mode 100644 index 000000000..194ab9b47 --- /dev/null +++ b/man/rmd/logistic_reg_LiblineaR.md @@ -0,0 +1,61 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + + + +This model has 2 tuning parameters: + +- `penalty`: Amount of Regularization (type: double, default: see below) + +- `mixture`: Proportion of Lasso Penalty (type: double, default: 0) + +For `LiblineaR` models, the value for `mixture` can either be 0 (for ridge) or 1 (for lasso) but not other intermediate values. In the [LiblineaR::LiblineaR()] documentation, these correspond to types 0 (L2-regularized) and 6 (L1-regularized). + +Be aware that the `LiblineaR` engine regularizes the intercept. Other regularized regression models do not, which will result in different parameter estimates. + +## Translation from parsnip to the original package + + +```r +logistic_reg(penalty = double(1), mixture = double(1)) %>% + set_engine("LiblineaR") %>% + translate() +``` + +``` +## Logistic Regression Model Specification (classification) +## +## Main Arguments: +## penalty = double(1) +## mixture = double(1) +## +## Computational engine: LiblineaR +## +## Model fit template: +## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(), +## cost = Inf, type = double(1), verbose = FALSE) +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#logistic-reg-LiblineaR) for `logistic_reg()` with the `"LiblineaR"` engine. + +## References + + - Hastie, T, R Tibshirani, and M Wainwright. 2015. _Statistical Learning with Sparsity_. CRC Press. + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. + diff --git a/man/rmd/logistic_reg_brulee.md b/man/rmd/logistic_reg_brulee.md new file mode 100644 index 000000000..50c9879ab --- /dev/null +++ b/man/rmd/logistic_reg_brulee.md @@ -0,0 +1,61 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + + + +This model has 2 tuning parameter: + +- `penalty`: Amount of Regularization (type: double, default: 0.001) + +- `mixture`: Proportion of Lasso Penalty (type: double, default: 0.0) + +The use of the L1 penalty (a.k.a. the lasso penalty) does _not_ force parameters to be strictly zero (as it does in packages such as glmnet). The zeroing out of parameters is a specific feature the optimization method used in those packages. + +Other engine arguments of interest: + + - `optimizer()`: The optimization method. See [brulee::brulee_linear_reg()]. + - `epochs()`: An integer for the number of passes through the training set. + - `lean_rate()`: A number used to accelerate the gradient decsent process. + - `momentum()`: A number used to use historical gradient information during optimization (`optimizer = "SGD"` only). + - `batch_size()`: An integer for the number of training set points in each batch. + - `stop_iter()`: A non-negative integer for how many iterations with no improvement before stopping. (default: 5L). + - `class_weights()`: Numeric class weights. See [brulee::brulee_logistic_reg()]. + + +## Translation from parsnip to the original package (classification) + + +```r +logistic_reg(penalty = double(1)) %>% + set_engine("brulee") %>% + translate() +``` + +``` +## Logistic Regression Model Specification (classification) +## +## Main Arguments: +## penalty = double(1) +## +## Computational engine: brulee +## +## Model fit template: +## brulee::brulee_logistic_reg(x = missing_arg(), y = missing_arg(), +## penalty = double(1)) +``` + + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/logistic_reg_gee.md b/man/rmd/logistic_reg_gee.md new file mode 100644 index 000000000..cb4a83f59 --- /dev/null +++ b/man/rmd/logistic_reg_gee.md @@ -0,0 +1,88 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + +This model has no formal tuning parameters. It may be beneficial to determine the appropriate correlation structure to use, but this typically does not affect the predicted value of the model. It _does_ have an effect on the inferential results and parameter covariance values. + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **multilevelmod**. + + +```r +library(multilevelmod) + +logistic_reg() %>% + set_engine("gee") %>% + translate() +``` + +``` +## Logistic Regression Model Specification (classification) +## +## Computational engine: gee +## +## Model fit template: +## multilevelmod::gee_fit(formula = missing_arg(), data = missing_arg(), +## family = binomial) +``` + +`multilevelmod::gee_fit()` is a wrapper model around `gee::gee()`. + + +## Preprocessing requirements + +There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. + +## Other details + +The model cannot accept case weights. + +Both `gee:gee()` and `gee:geepack()` specify the id/cluster variable using an argument `id` that requires a vector. parsnip doesn't work that way so we enable this model to be fit using a artificial function `id_var()` to be used in the formula. So, in the original package, the call would look like: + +```r +gee(breaks ~ tension, id = wool, data = warpbreaks, corstr = "exchangeable") +``` + +With `parsnip`, we suggest using the formula method when fitting: + +```r +library(tidymodels) +data("toenail", package = "HSAUR3") + +logistic_reg() %>% + set_engine("gee", corstr = "exchangeable") %>% + fit(outcome ~ treatment * visit + id_var(patientID), data = toenail) +``` + +When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the GEE formula when adding the model: + +```r +library(tidymodels) + +gee_spec <- + logistic_reg() %>% + set_engine("gee", corstr = "exchangeable") + +gee_wflow <- + workflow() %>% + # The data are included as-is using: + add_variables(outcomes = outcome, predictors = c(treatment, visit, patientID)) %>% + add_model(gee_spec, formula = outcome ~ treatment * visit + id_var(patientID)) + +fit(gee_wflow, data = toenail) +``` + +The `gee::gee()` function always prints out warnings and output even when `silent = TRUE`. The parsnip "gee" engine, by contrast, silences all console output coming from `gee::gee()`, even if `silent = FALSE`. + +Also, because of issues with the `gee()` function, a supplementary call to `glm()` is needed to get the rank and QR decomposition objects so that `predict()` can be used. + +## References + + - Liang, K.Y. and Zeger, S.L. (1986) Longitudinal data analysis using generalized linear models. _Biometrika_, 73 13–22. + + - Zeger, S.L. and Liang, K.Y. (1986) Longitudinal data analysis for discrete and continuous outcomes. _Biometrics_, 42 121–130. + diff --git a/man/rmd/logistic_reg_glm.md b/man/rmd/logistic_reg_glm.md new file mode 100644 index 000000000..eb03e06c7 --- /dev/null +++ b/man/rmd/logistic_reg_glm.md @@ -0,0 +1,62 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + +This engine has no tuning parameters but you can set the `family` parameter (and/or `link`) as an engine argument (see below). + +## Translation from parsnip to the original package + + +```r +logistic_reg() %>% + set_engine("glm") %>% + translate() +``` + +``` +## Logistic Regression Model Specification (classification) +## +## Computational engine: glm +## +## Model fit template: +## stats::glm(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), +## family = stats::binomial) +``` + +To use a non-default `family` and/or `link`, pass in as an argument to `set_engine()`: + + +```r +linear_reg() %>% + set_engine("glm", family = stats::binomial(link = "probit")) %>% + translate() +``` + +``` +## Linear Regression Model Specification (regression) +## +## Engine-Specific Arguments: +## family = stats::binomial(link = "probit") +## +## Computational engine: glm +## +## Model fit template: +## stats::glm(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), +## family = stats::binomial(link = "probit")) +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#logistic-reg-glm) for `logistic_reg()` with the `"glm"` engine. + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/logistic_reg_glmer.md b/man/rmd/logistic_reg_glmer.md new file mode 100644 index 000000000..c71ee2988 --- /dev/null +++ b/man/rmd/logistic_reg_glmer.md @@ -0,0 +1,102 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + +This model has no tuning parameters. + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **multilevelmod**. + + +```r +library(multilevelmod) + +logistic_reg() %>% + set_engine("glmer") %>% + translate() +``` + +``` +## Logistic Regression Model Specification (classification) +## +## Computational engine: glmer +## +## Model fit template: +## lme4::glmer(formula = missing_arg(), data = missing_arg(), family = binomial) +``` + + +## Predicting new samples + +This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor ($\eta$) for a random intercept: + +$$ +\eta_{i} = (\beta_0 + b_{0i}) + \beta_1x_{i1} +$$ + +where $i$ denotes the `i`th independent experimental unit (e.g. subject). When the model has seen subject `i`, it can use that subject's data to adjust the _population_ intercept to be more specific to that subjects results. + +What happens when data are being predicted for a subject that was not used in the model fit? In that case, this package uses _only_ the population parameter estimates for prediction: + +$$ +\hat{\eta}_{i'} = \hat{\beta}_0+ \hat{\beta}x_{i'1} +$$ + +Depending on what covariates are in the model, this might have the effect of making the same prediction for all new samples. The population parameters are the "best estimate" for a subject that was not included in the model fit. + +The tidymodels framework deliberately constrains predictions for new data to not use the training set or other data (to prevent information leakage). + + +## Preprocessing requirements + +There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. + +## Other details + +The model can accept case weights. + +With parsnip, we suggest using the formula method when fitting: + +```r +library(tidymodels) +data("toenail", package = "HSAUR3") + +logistic_reg() %>% + set_engine("glmer") %>% + fit(outcome ~ treatment * visit + (1 | patientID), data = toenail) +``` + +When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: + +```r +library(tidymodels) + +glmer_spec <- + logistic_reg() %>% + set_engine("glmer") + +glmer_wflow <- + workflow() %>% + # The data are included as-is using: + add_variables(outcomes = outcome, predictors = c(treatment, visit, patientID)) %>% + add_model(glmer_spec, formula = outcome ~ treatment * visit + (1 | patientID)) + +fit(glmer_wflow, data = toenail) +``` + +## References + + - J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY + + - West, K, Band Welch, and A Galecki. 2014. _Linear Mixed Models: A Practical Guide Using Statistical Software_. CRC Press. + + - Thorson, J, Minto, C. 2015, Mixed effects: a unifying framework for statistical modelling in fisheries biology. _ICES Journal of Marine Science_, Volume 72, Issue 5, Pages 1245–1256. + + - Harrison, XA, Donaldson, L, Correa-Cano, ME, Evans, J, Fisher, DN, Goodwin, CED, Robinson, BS, Hodgson, DJ, Inger, R. 2018. _A brief introduction to mixed effects modelling and multi-model inference in ecology_. PeerJ 6:e4794. + + - DeBruine LM, Barr DJ. Understanding Mixed-Effects Models Through Data Simulation. 2021. _Advances in Methods and Practices in Psychological Science_. + diff --git a/man/rmd/logistic_reg_glmnet.md b/man/rmd/logistic_reg_glmnet.md new file mode 100644 index 000000000..f52582fb8 --- /dev/null +++ b/man/rmd/logistic_reg_glmnet.md @@ -0,0 +1,62 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + + + +This model has 2 tuning parameters: + +- `penalty`: Amount of Regularization (type: double, default: see below) + +- `mixture`: Proportion of Lasso Penalty (type: double, default: 1.0) + +A value of `mixture = 1` corresponds to a pure lasso model, while `mixture = 0` indicates ridge regression. + +The `penalty` parameter has no default and requires a single numeric value. For more details about this, and the `glmnet` model in general, see [glmnet-details]. + +## Translation from parsnip to the original package + + +```r +logistic_reg(penalty = double(1), mixture = double(1)) %>% + set_engine("glmnet") %>% + translate() +``` + +``` +## Logistic Regression Model Specification (classification) +## +## Main Arguments: +## penalty = 0 +## mixture = double(1) +## +## Computational engine: glmnet +## +## Model fit template: +## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(), +## alpha = double(1), family = "binomial") +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. +By default, [glmnet::glmnet()] uses the argument `standardize = TRUE` to center and scale the data. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#logistic-reg-glmnet) for `logistic_reg()` with the `"glmnet"` engine. + +## References + + - Hastie, T, R Tibshirani, and M Wainwright. 2015. _Statistical Learning with Sparsity_. CRC Press. + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. + diff --git a/man/rmd/logistic_reg_keras.md b/man/rmd/logistic_reg_keras.md new file mode 100644 index 000000000..db24ece0a --- /dev/null +++ b/man/rmd/logistic_reg_keras.md @@ -0,0 +1,56 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + + + +This model has one tuning parameter: + +- `penalty`: Amount of Regularization (type: double, default: 0.0) + +For `penalty`, the amount of regularization is _only_ L2 penalty (i.e., ridge or weight decay). + +## Translation from parsnip to the original package + + +```r +logistic_reg(penalty = double(1)) %>% + set_engine("keras") %>% + translate() +``` + +``` +## Logistic Regression Model Specification (classification) +## +## Main Arguments: +## penalty = double(1) +## +## Computational engine: keras +## +## Model fit template: +## parsnip::keras_mlp(x = missing_arg(), y = missing_arg(), penalty = double(1), +## hidden_units = 1, act = "linear") +``` + +[keras_mlp()] is a parsnip wrapper around keras code for neural networks. This model fits a linear regression as a network with a single hidden unit. + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#logistic-reg-keras) for `logistic_reg()` with the `"keras"` engine. + +## References + + - Hoerl, A., & Kennard, R. (2000). _Ridge Regression: Biased Estimation for Nonorthogonal Problems_. Technometrics, 42(1), 80-86. + diff --git a/man/rmd/logistic_reg_spark.md b/man/rmd/logistic_reg_spark.md new file mode 100644 index 000000000..f9c9f252d --- /dev/null +++ b/man/rmd/logistic_reg_spark.md @@ -0,0 +1,71 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + + + +This model has 2 tuning parameters: + +- `penalty`: Amount of Regularization (type: double, default: 0.0) + +- `mixture`: Proportion of Lasso Penalty (type: double, default: 0.0) + +For `penalty`, the amount of regularization includes both the L1 penalty (i.e., lasso) and the L2 penalty (i.e., ridge or weight decay). + +A value of `mixture = 1` corresponds to a pure lasso model, while `mixture = 0` indicates ridge regression. + +## Translation from parsnip to the original package + + +```r +logistic_reg(penalty = double(1), mixture = double(1)) %>% + set_engine("spark") %>% + translate() +``` + +``` +## Logistic Regression Model Specification (classification) +## +## Main Arguments: +## penalty = double(1) +## mixture = double(1) +## +## Computational engine: spark +## +## Model fit template: +## sparklyr::ml_logistic_regression(x = missing_arg(), formula = missing_arg(), +## weight_col = missing_arg(), reg_param = double(1), elastic_net_param = double(1), +## family = "binomial") +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. +By default, `ml_logistic_regression()` uses the argument `standardization = TRUE` to center and scale the data. + +## Other details + + +For models created using the `"spark"` engine, there are several things to consider. + +* Only the formula interface to via `fit()` is available; using `fit_xy()` will generate an error. +* The predictions will always be in a Spark table format. The names will be the same as documented but without the dots. +* There is no equivalent to factor columns in Spark tables so class predictions are returned as character columns. +* To retain the model object for a new R session (via `save()`), the `model$fit` element of the parsnip object should be serialized via `ml_save(object$fit)` and separately saved to disk. In a new session, the object can be reloaded and reattached to the parsnip object. + +## References + + - Luraschi, J, K Kuo, and E Ruiz. 2019. _Mastering Spark with R_. O'Reilly Media + + - Hastie, T, R Tibshirani, and M Wainwright. 2015. _Statistical Learning with Sparsity_. CRC Press. + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. + diff --git a/man/rmd/logistic_reg_stan.md b/man/rmd/logistic_reg_stan.md new file mode 100644 index 000000000..b244d592e --- /dev/null +++ b/man/rmd/logistic_reg_stan.md @@ -0,0 +1,59 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + +This engine has no tuning parameters. + +## Important engine-specific options + +Some relevant arguments that can be passed to `set_engine()`: + + * `chains`: A positive integer specifying the number of Markov chains. The default is 4. + * `iter`: A positive integer specifying the number of iterations for each chain (including warmup). The default is 2000. + * `seed`: The seed for random number generation. + * `cores`: Number of cores to use when executing the chains in parallel. + * `prior`: The prior distribution for the (non-hierarchical) regression coefficients. This `"stan"` engine does not fit any hierarchical terms. + * `prior_intercept`: The prior distribution for the intercept (after centering all predictors). + +See [rstan::sampling()] and [rstanarm::priors()] for more information on these and other options. + +## Translation from parsnip to the original package + + +```r +logistic_reg() %>% + set_engine("stan") %>% + translate() +``` + +``` +## Logistic Regression Model Specification (classification) +## +## Computational engine: stan +## +## Model fit template: +## rstanarm::stan_glm(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), family = stats::binomial, refresh = 0) +``` + +Note that the `refresh` default prevents logging of the estimation process. Change this value in `set_engine()` to show the MCMC logs. + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + +## Other details + +For prediction, the `"stan"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome and when `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#logistic-reg-stan) for `logistic_reg()` with the `"stan"` engine. + +## References + + - McElreath, R. 2020 _Statistical Rethinking_. CRC Press. diff --git a/man/rmd/logistic_reg_stan_glmer.md b/man/rmd/logistic_reg_stan_glmer.md new file mode 100644 index 000000000..32f139df5 --- /dev/null +++ b/man/rmd/logistic_reg_stan_glmer.md @@ -0,0 +1,111 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + +This model has no tuning parameters. + +## Important engine-specific options + +Some relevant arguments that can be passed to `set_engine()`: + + * `chains`: A positive integer specifying the number of Markov chains. The default is 4. + * `iter`: A positive integer specifying the number of iterations for each chain (including warmup). The default is 2000. + * `seed`: The seed for random number generation. + * `cores`: Number of cores to use when executing the chains in parallel. + * `prior`: The prior distribution for the (non-hierarchical) regression coefficients. + * `prior_intercept`: The prior distribution for the intercept (after centering all predictors). + +See `?rstanarm::stan_glmer` and `?rstan::sampling` for more information. + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **multilevelmod**. + + +```r +library(multilevelmod) + +logistic_reg() %>% + set_engine("stan_glmer") %>% + translate() +``` + +``` +## Logistic Regression Model Specification (classification) +## +## Computational engine: stan_glmer +## +## Model fit template: +## rstanarm::stan_glmer(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), family = stats::binomial, refresh = 0) +``` + + +## Predicting new samples + +This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor ($\eta$) for a random intercept: + +$$ +\eta_{i} = (\beta_0 + b_{0i}) + \beta_1x_{i1} +$$ + +where $i$ denotes the `i`th independent experimental unit (e.g. subject). When the model has seen subject `i`, it can use that subject's data to adjust the _population_ intercept to be more specific to that subjects results. + +What happens when data are being predicted for a subject that was not used in the model fit? In that case, this package uses _only_ the population parameter estimates for prediction: + +$$ +\hat{\eta}_{i'} = \hat{\beta}_0+ \hat{\beta}x_{i'1} +$$ + +Depending on what covariates are in the model, this might have the effect of making the same prediction for all new samples. The population parameters are the "best estimate" for a subject that was not included in the model fit. + +The tidymodels framework deliberately constrains predictions for new data to not use the training set or other data (to prevent information leakage). + + +## Preprocessing requirements + +There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. + +## Other details + +The model can accept case weights. + +With parsnip, we suggest using the formula method when fitting: + +```r +library(tidymodels) +data("toenail", package = "HSAUR3") + +logistic_reg() %>% + set_engine("stan_glmer") %>% + fit(outcome ~ treatment * visit + (1 | patientID), data = toenail) +``` + +When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: + +```r +library(tidymodels) + +glmer_spec <- + logistic_reg() %>% + set_engine("stan_glmer") + +glmer_wflow <- + workflow() %>% + # The data are included as-is using: + add_variables(outcomes = outcome, predictors = c(treatment, visit, patientID)) %>% + add_model(glmer_spec, formula = outcome ~ treatment * visit + (1 | patientID)) + +fit(glmer_wflow, data = toenail) +``` + +For prediction, the `"stan_glmer"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. + +## References + + - McElreath, R. 2020 _Statistical Rethinking_. CRC Press. + + - Sorensen, T, Vasishth, S. 2016. Bayesian linear mixed models using Stan: A tutorial for psychologists, linguists, and cognitive scientists, arXiv:1506.06201. diff --git a/man/rmd/mars_earth.md b/man/rmd/mars_earth.md new file mode 100644 index 000000000..c3a5f5f3e --- /dev/null +++ b/man/rmd/mars_earth.md @@ -0,0 +1,94 @@ + + + +For this engine, there are multiple modes: classification and regression + +## Tuning Parameters + + + +This model has 3 tuning parameters: + +- `num_terms`: # Model Terms (type: integer, default: see below) + +- `prod_degree`: Degree of Interaction (type: integer, default: 1L) + +- `prune_method`: Pruning Method (type: character, default: 'backward') + +The default value of `num_terms` depends on the number of predictor columns. For a data frame `x`, the default is `min(200, max(20, 2 * ncol(x))) + 1` (see [earth::earth()] and the reference below). + +## Translation from parsnip to the original package (regression) + + +```r +mars(num_terms = integer(1), prod_degree = integer(1), prune_method = character(1)) %>% + set_engine("earth") %>% + set_mode("regression") %>% + translate() +``` + +``` +## MARS Model Specification (regression) +## +## Main Arguments: +## num_terms = integer(1) +## prod_degree = integer(1) +## prune_method = character(1) +## +## Computational engine: earth +## +## Model fit template: +## earth::earth(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), +## nprune = integer(1), degree = integer(1), pmethod = character(1), +## keepxy = TRUE) +``` + +## Translation from parsnip to the original package (classification) + + +```r +mars(num_terms = integer(1), prod_degree = integer(1), prune_method = character(1)) %>% + set_engine("earth") %>% + set_mode("classification") %>% + translate() +``` + +``` +## MARS Model Specification (classification) +## +## Main Arguments: +## num_terms = integer(1) +## prod_degree = integer(1) +## prune_method = character(1) +## +## Engine-Specific Arguments: +## glm = list(family = stats::binomial) +## +## Computational engine: earth +## +## Model fit template: +## earth::earth(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), +## nprune = integer(1), degree = integer(1), pmethod = character(1), +## glm = list(family = stats::binomial), keepxy = TRUE) +``` + +An alternate method for using MARs for categorical outcomes can be found in [discrim_flexible()]. + + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#mars-earth) for `mars()` with the `"earth"` engine. + +## References + + - Friedman, J. 1991. "Multivariate Adaptive Regression Splines." _The Annals of Statistics_, vol. 19, no. 1, pp. 1-67. + + - Milborrow, S. ["Notes on the earth package."](http://www.milbo.org/doc/earth-notes.pdf) + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. + diff --git a/man/rmd/mlp_brulee.md b/man/rmd/mlp_brulee.md new file mode 100644 index 000000000..520f4c473 --- /dev/null +++ b/man/rmd/mlp_brulee.md @@ -0,0 +1,127 @@ + + + +For this engine, there are multiple modes: classification and regression + +## Tuning Parameters + + + +This model has 7 tuning parameters: + +- `hidden_units`: # Hidden Units (type: integer, default: 3L) + +- `penalty`: Amount of Regularization (type: double, default: 0.0) + +- `mixture`: Proportion of Lasso Penalty (type: double, default: 0.0) + +- `epochs`: # Epochs (type: integer, default: 0.01) + +- `dropout`: Dropout Rate (type: double, default: 0.0) + +- `learn_rate`: Learning Rate (type: double, default: 100L) + +- `activation`: Activation Function (type: character, default: 'relu') + +The use of the L1 penalty (a.k.a. the lasso penalty) does _not_ force parameters to be strictly zero (as it does in packages such as glmnet). The zeroing out of parameters is a specific feature the optimization method used in those packages. + +Both `penalty` and `dropout` should be not be used in the same model. + +Other engine arguments of interest: + + - `momentum()`: A number used to use historical gradient infomration during optimization. + - `batch_size()`: An integer for the number of training set points in each batch. + - `class_weights()`: Numeric class weights. See [brulee::brulee_mlp()]. + - `stop_iter()`: A non-negative integer for how many iterations with no improvement before stopping. (default: 5L). + + +## Translation from parsnip to the original package (regression) + + +```r +mlp( + hidden_units = integer(1), + penalty = double(1), + dropout = double(1), + epochs = integer(1), + learn_rate = double(1), + activation = character(1) +) %>% + set_engine("brulee") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Single Layer Neural Network Specification (regression) +## +## Main Arguments: +## hidden_units = integer(1) +## penalty = double(1) +## dropout = double(1) +## epochs = integer(1) +## activation = character(1) +## learn_rate = double(1) +## +## Computational engine: brulee +## +## Model fit template: +## brulee::brulee_mlp(x = missing_arg(), y = missing_arg(), hidden_units = integer(1), +## penalty = double(1), dropout = double(1), epochs = integer(1), +## activation = character(1), learn_rate = double(1)) +``` + +Note that parsnip automatically sets linear activation in the last layer. + +## Translation from parsnip to the original package (classification) + + +```r +mlp( + hidden_units = integer(1), + penalty = double(1), + dropout = double(1), + epochs = integer(1), + learn_rate = double(1), + activation = character(1) +) %>% + set_engine("brulee") %>% + set_mode("classification") %>% + translate() +``` + +``` +## Single Layer Neural Network Specification (classification) +## +## Main Arguments: +## hidden_units = integer(1) +## penalty = double(1) +## dropout = double(1) +## epochs = integer(1) +## activation = character(1) +## learn_rate = double(1) +## +## Computational engine: brulee +## +## Model fit template: +## brulee::brulee_mlp(x = missing_arg(), y = missing_arg(), hidden_units = integer(1), +## penalty = double(1), dropout = double(1), epochs = integer(1), +## activation = character(1), learn_rate = double(1)) +``` + + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. + + + diff --git a/man/rmd/mlp_keras.md b/man/rmd/mlp_keras.md new file mode 100644 index 000000000..4f42ade7b --- /dev/null +++ b/man/rmd/mlp_keras.md @@ -0,0 +1,108 @@ + + + +For this engine, there are multiple modes: classification and regression + +## Tuning Parameters + + + +This model has 5 tuning parameters: + +- `hidden_units`: # Hidden Units (type: integer, default: 5L) + +- `penalty`: Amount of Regularization (type: double, default: 0.0) + +- `dropout`: Dropout Rate (type: double, default: 0.0) + +- `epochs`: # Epochs (type: integer, default: 20L) + +- `activation`: Activation Function (type: character, default: 'softmax') + +## Translation from parsnip to the original package (regression) + + +```r +mlp( + hidden_units = integer(1), + penalty = double(1), + dropout = double(1), + epochs = integer(1), + activation = character(1) +) %>% + set_engine("keras") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Single Layer Neural Network Specification (regression) +## +## Main Arguments: +## hidden_units = integer(1) +## penalty = double(1) +## dropout = double(1) +## epochs = integer(1) +## activation = character(1) +## +## Computational engine: keras +## +## Model fit template: +## parsnip::keras_mlp(x = missing_arg(), y = missing_arg(), hidden_units = integer(1), +## penalty = double(1), dropout = double(1), epochs = integer(1), +## activation = character(1)) +``` + +## Translation from parsnip to the original package (classification) + + +```r +mlp( + hidden_units = integer(1), + penalty = double(1), + dropout = double(1), + epochs = integer(1), + activation = character(1) +) %>% + set_engine("keras") %>% + set_mode("classification") %>% + translate() +``` + +``` +## Single Layer Neural Network Specification (classification) +## +## Main Arguments: +## hidden_units = integer(1) +## penalty = double(1) +## dropout = double(1) +## epochs = integer(1) +## activation = character(1) +## +## Computational engine: keras +## +## Model fit template: +## parsnip::keras_mlp(x = missing_arg(), y = missing_arg(), hidden_units = integer(1), +## penalty = double(1), dropout = double(1), epochs = integer(1), +## activation = character(1)) +``` + + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#mlp-keras) for `mlp()` with the `"keras"` engine. + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. + + diff --git a/man/rmd/mlp_nnet.md b/man/rmd/mlp_nnet.md new file mode 100644 index 000000000..6776d9347 --- /dev/null +++ b/man/rmd/mlp_nnet.md @@ -0,0 +1,102 @@ + + + +For this engine, there are multiple modes: classification and regression + +## Tuning Parameters + + + +This model has 3 tuning parameters: + +- `hidden_units`: # Hidden Units (type: integer, default: none) + +- `penalty`: Amount of Regularization (type: double, default: 0.0) + +- `epochs`: # Epochs (type: integer, default: 100L) + +Note that, in [nnet::nnet()], the maximum number of parameters is an argument with a fairly low value of `maxit = 1000`. For some models, you may need to pass this value in via [set_engine()] so that the model does not fail. + + +## Translation from parsnip to the original package (regression) + + +```r +mlp( + hidden_units = integer(1), + penalty = double(1), + epochs = integer(1) +) %>% + set_engine("nnet") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Single Layer Neural Network Specification (regression) +## +## Main Arguments: +## hidden_units = integer(1) +## penalty = double(1) +## epochs = integer(1) +## +## Computational engine: nnet +## +## Model fit template: +## nnet::nnet(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), +## size = integer(1), decay = double(1), maxit = integer(1), +## trace = FALSE, linout = TRUE) +``` + +Note that parsnip automatically sets linear activation in the last layer. + +## Translation from parsnip to the original package (classification) + + +```r +mlp( + hidden_units = integer(1), + penalty = double(1), + epochs = integer(1) +) %>% + set_engine("nnet") %>% + set_mode("classification") %>% + translate() +``` + +``` +## Single Layer Neural Network Specification (classification) +## +## Main Arguments: +## hidden_units = integer(1) +## penalty = double(1) +## epochs = integer(1) +## +## Computational engine: nnet +## +## Model fit template: +## nnet::nnet(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), +## size = integer(1), decay = double(1), maxit = integer(1), +## trace = FALSE, linout = FALSE) +``` + + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#mlp-nnet) for `mlp()` with the `"nnet"` engine. + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. + + + diff --git a/man/rmd/multinom_reg_brulee.md b/man/rmd/multinom_reg_brulee.md new file mode 100644 index 000000000..9f662d75e --- /dev/null +++ b/man/rmd/multinom_reg_brulee.md @@ -0,0 +1,61 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + + + +This model has 2 tuning parameter: + +- `penalty`: Amount of Regularization (type: double, default: 0.001) + +- `mixture`: Proportion of Lasso Penalty (type: double, default: 0.0) + +The use of the L1 penalty (a.k.a. the lasso penalty) does _not_ force parameters to be strictly zero (as it does in packages such as glmnet). The zeroing out of parameters is a specific feature the optimization method used in those packages. + +Other engine arguments of interest: + + - `optimizer()`: The optimization method. See [brulee::brulee_linear_reg()]. + - `epochs()`: An integer for the number of passes through the training set. + - `lean_rate()`: A number used to accelerate the gradient decsent process. + - `momentum()`: A number used to use historical gradient information during optimization (`optimizer = "SGD"` only). + - `batch_size()`: An integer for the number of training set points in each batch. + - `stop_iter()`: A non-negative integer for how many iterations with no improvement before stopping. (default: 5L). + - `class_weights()`: Numeric class weights. See [brulee::brulee_multinom_reg()]. + + +## Translation from parsnip to the original package (classification) + + +```r +multinom_reg(penalty = double(1)) %>% + set_engine("brulee") %>% + translate() +``` + +``` +## Multinomial Regression Model Specification (classification) +## +## Main Arguments: +## penalty = double(1) +## +## Computational engine: brulee +## +## Model fit template: +## brulee::brulee_multinomial_reg(x = missing_arg(), y = missing_arg(), +## penalty = double(1)) +``` + + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/multinom_reg_glmnet.md b/man/rmd/multinom_reg_glmnet.md new file mode 100644 index 000000000..0d82f43b1 --- /dev/null +++ b/man/rmd/multinom_reg_glmnet.md @@ -0,0 +1,62 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + + + +This model has 2 tuning parameters: + +- `penalty`: Amount of Regularization (type: double, default: see below) + +- `mixture`: Proportion of Lasso Penalty (type: double, default: 1.0) + +A value of `mixture = 1` corresponds to a pure lasso model, while `mixture = 0` indicates ridge regression. + +The `penalty` parameter has no default and requires a single numeric value. For more details about this, and the `glmnet` model in general, see [glmnet-details]. + +## Translation from parsnip to the original package + + +```r +multinom_reg(penalty = double(1), mixture = double(1)) %>% + set_engine("glmnet") %>% + translate() +``` + +``` +## Multinomial Regression Model Specification (classification) +## +## Main Arguments: +## penalty = 0 +## mixture = double(1) +## +## Computational engine: glmnet +## +## Model fit template: +## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(), +## alpha = double(1), family = "multinomial") +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. +By default, [glmnet::glmnet()] uses the argument `standardize = TRUE` to center and scale the data. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#multinom-reg-glmnet) for `multinom_reg()` with the `"glmnet"` engine. + +## References + + - Hastie, T, R Tibshirani, and M Wainwright. 2015. _Statistical Learning with Sparsity_. CRC Press. + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. + diff --git a/man/rmd/multinom_reg_keras.md b/man/rmd/multinom_reg_keras.md new file mode 100644 index 000000000..928f1836a --- /dev/null +++ b/man/rmd/multinom_reg_keras.md @@ -0,0 +1,56 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + + + +This model has one tuning parameter: + +- `penalty`: Amount of Regularization (type: double, default: 0.0) + +For `penalty`, the amount of regularization is _only_ L2 penalty (i.e., ridge or weight decay). + +## Translation from parsnip to the original package + + +```r +multinom_reg(penalty = double(1)) %>% + set_engine("keras") %>% + translate() +``` + +``` +## Multinomial Regression Model Specification (classification) +## +## Main Arguments: +## penalty = double(1) +## +## Computational engine: keras +## +## Model fit template: +## parsnip::keras_mlp(x = missing_arg(), y = missing_arg(), penalty = double(1), +## hidden_units = 1, act = "linear") +``` + +[keras_mlp()] is a parsnip wrapper around keras code for neural networks. This model fits a linear regression as a network with a single hidden unit. + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#multinom-reg-keras) for `multinom_reg()` with the `"keras"` engine. + +## References + + - Hoerl, A., & Kennard, R. (2000). _Ridge Regression: Biased Estimation for Nonorthogonal Problems_. Technometrics, 42(1), 80-86. + diff --git a/man/rmd/multinom_reg_nnet.md b/man/rmd/multinom_reg_nnet.md new file mode 100644 index 000000000..bbc882edf --- /dev/null +++ b/man/rmd/multinom_reg_nnet.md @@ -0,0 +1,58 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + + + +This model has 1 tuning parameters: + +- `penalty`: Amount of Regularization (type: double, default: 0.0) + +For `penalty`, the amount of regularization includes only the L2 penalty (i.e., ridge or weight decay). + +## Translation from parsnip to the original package + + +```r +multinom_reg(penalty = double(1)) %>% + set_engine("nnet") %>% + translate() +``` + +``` +## Multinomial Regression Model Specification (classification) +## +## Main Arguments: +## penalty = double(1) +## +## Computational engine: nnet +## +## Model fit template: +## nnet::multinom(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), decay = double(1), trace = FALSE) +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#multinom-reg-nnet) for `multinom_reg()` with the `"nnet"` engine. + +## References + + - Luraschi, J, K Kuo, and E Ruiz. 2019. _Mastering nnet with R_. O'Reilly Media + + - Hastie, T, R Tibshirani, and M Wainwright. 2015. _Statistical Learning with Sparsity_. CRC Press. + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. + diff --git a/man/rmd/multinom_reg_spark.md b/man/rmd/multinom_reg_spark.md new file mode 100644 index 000000000..14a1cf2dd --- /dev/null +++ b/man/rmd/multinom_reg_spark.md @@ -0,0 +1,71 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + + + +This model has 2 tuning parameters: + +- `penalty`: Amount of Regularization (type: double, default: 0.0) + +- `mixture`: Proportion of Lasso Penalty (type: double, default: 0.0) + +For `penalty`, the amount of regularization includes both the L1 penalty (i.e., lasso) and the L2 penalty (i.e., ridge or weight decay). + +A value of `mixture = 1` corresponds to a pure lasso model, while `mixture = 0` indicates ridge regression. + +## Translation from parsnip to the original package + + +```r +multinom_reg(penalty = double(1), mixture = double(1)) %>% + set_engine("spark") %>% + translate() +``` + +``` +## Multinomial Regression Model Specification (classification) +## +## Main Arguments: +## penalty = double(1) +## mixture = double(1) +## +## Computational engine: spark +## +## Model fit template: +## sparklyr::ml_logistic_regression(x = missing_arg(), formula = missing_arg(), +## weight_col = missing_arg(), reg_param = double(1), elastic_net_param = double(1), +## family = "multinomial") +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. +By default, `ml_multinom_regression()` uses the argument `standardization = TRUE` to center and scale the data. + +## Other details + + +For models created using the `"spark"` engine, there are several things to consider. + +* Only the formula interface to via `fit()` is available; using `fit_xy()` will generate an error. +* The predictions will always be in a Spark table format. The names will be the same as documented but without the dots. +* There is no equivalent to factor columns in Spark tables so class predictions are returned as character columns. +* To retain the model object for a new R session (via `save()`), the `model$fit` element of the parsnip object should be serialized via `ml_save(object$fit)` and separately saved to disk. In a new session, the object can be reloaded and reattached to the parsnip object. + +## References + + - Luraschi, J, K Kuo, and E Ruiz. 2019. _Mastering Spark with R_. O'Reilly Media + + - Hastie, T, R Tibshirani, and M Wainwright. 2015. _Statistical Learning with Sparsity_. CRC Press. + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. + diff --git a/man/rmd/naive_Bayes_klaR.md b/man/rmd/naive_Bayes_klaR.md new file mode 100644 index 000000000..2c70b328c --- /dev/null +++ b/man/rmd/naive_Bayes_klaR.md @@ -0,0 +1,57 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + + + + +This model has 2 tuning parameter: + +- `smoothness`: Kernel Smoothness (type: double, default: 1.0) + +- `Laplace`: Laplace Correction (type: double, default: 0.0) + +Note that `usekernel` is always set to `TRUE` for the `klaR` engine. + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **discrim**. + + +```r +library(discrim) + +naive_Bayes(smoothness = numeric(0), Laplace = numeric(0)) %>% + set_engine("klaR") %>% + translate() +``` + +``` +## Naive Bayes Model Specification (classification) +## +## Main Arguments: +## smoothness = numeric(0) +## Laplace = numeric(0) +## +## Computational engine: klaR +## +## Model fit template: +## discrim::klar_bayes_wrapper(x = missing_arg(), y = missing_arg(), +## adjust = numeric(0), fL = numeric(0), usekernel = TRUE) +``` + +## Preprocessing requirements + +The columns for qualitative predictors should always be represented as factors (as opposed to dummy/indicator variables). When the predictors are factors, the underlying code treats them as multinomial data and appropriately computes their conditional distributions. + + +Variance calculations are used in these computations so _zero-variance_ predictors (i.e., with a single unique value) should be eliminated before fitting the model. + + + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/naive_Bayes_naivebayes.md b/man/rmd/naive_Bayes_naivebayes.md new file mode 100644 index 000000000..d01c32b24 --- /dev/null +++ b/man/rmd/naive_Bayes_naivebayes.md @@ -0,0 +1,57 @@ + + + +For this engine, there is a single mode: classification + +## Tuning Parameters + + + + +This model has 2 tuning parameter: + +- `smoothness`: Kernel Smoothness (type: double, default: 1.0) + +- `Laplace`: Laplace Correction (type: double, default: 0.0) + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **discrim**. + + +```r +library(discrim) + +naive_Bayes(smoothness = numeric(0), Laplace = numeric(0)) %>% + set_engine("naivebayes") %>% + translate() +``` + +``` +## Naive Bayes Model Specification (classification) +## +## Main Arguments: +## smoothness = numeric(0) +## Laplace = numeric(0) +## +## Computational engine: naivebayes +## +## Model fit template: +## naivebayes::naive_bayes(x = missing_arg(), y = missing_arg(), +## adjust = numeric(0), laplace = numeric(0), usekernel = TRUE) +``` + +## Preprocessing requirements + +The columns for qualitative predictors should always be represented as factors (as opposed to dummy/indicator variables). When the predictors are factors, the underlying code treats them as multinomial data and appropriately computes their conditional distributions. + +For count data, integers can be estimated using a Poisson distribution if the argument `usepoisson = TRUE` is passed as an engine argument. + + +Variance calculations are used in these computations so _zero-variance_ predictors (i.e., with a single unique value) should be eliminated before fitting the model. + + + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/nearest-neighbor.md b/man/rmd/nearest-neighbor.md new file mode 100644 index 000000000..c89b534ac --- /dev/null +++ b/man/rmd/nearest-neighbor.md @@ -0,0 +1,67 @@ +# Engine Details + + + + +Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are below: + +## kknn + + +```r +nearest_neighbor() %>% + set_engine("kknn") %>% + set_mode("regression") %>% + translate() +``` + +``` +## K-Nearest Neighbor Model Specification (regression) +## +## Computational engine: kknn +## +## Model fit template: +## kknn::train.kknn(formula = missing_arg(), data = missing_arg(), +## ks = min_rows(5, data, 5)) +``` + + +```r +nearest_neighbor() %>% + set_engine("kknn") %>% + set_mode("classification") %>% + translate() +``` + +``` +## K-Nearest Neighbor Model Specification (classification) +## +## Computational engine: kknn +## +## Model fit template: +## kknn::train.kknn(formula = missing_arg(), data = missing_arg(), +## ks = min_rows(5, data, 5)) +``` + +For `kknn`, the underlying modeling function used is a restricted version of +`train.kknn()` and not `kknn()`. It is set up in this way so that `parsnip` can +utilize the underlying `predict.train.kknn` method to predict on new data. This +also means that a single value of that function's `kernel` argument (a.k.a +`weight_func` here) can be supplied + +For this engine, tuning over `neighbors` is very efficient since the same model +object can be used to make predictions over multiple values of `neighbors`. + +## Parameter translations + +The standardized parameter names in parsnip can be mapped to their original +names in each engine that has main parameters. Each engine typically has a +different default value (shown in parentheses) for each parameter. + + +|**parsnip** |**kknn** | +|:-----------|:----------------| +|neighbors |ks | +|weight_func |kernel (optimal) | +|dist_power |distance (2) | + diff --git a/man/rmd/nearest_neighbor_kknn.md b/man/rmd/nearest_neighbor_kknn.md new file mode 100644 index 000000000..d1f594265 --- /dev/null +++ b/man/rmd/nearest_neighbor_kknn.md @@ -0,0 +1,95 @@ + + + +For this engine, there are multiple modes: classification and regression + +## Tuning Parameters + + + +This model has 3 tuning parameters: + +- `neighbors`: # Nearest Neighbors (type: integer, default: 5L) + +- `weight_func`: Distance Weighting Function (type: character, default: 'optimal') + +- `dist_power`: Minkowski Distance Order (type: double, default: 2.0) + +## Translation from parsnip to the original package (regression) + + +```r +nearest_neighbor( + neighbors = integer(1), + weight_func = character(1), + dist_power = double(1) +) %>% + set_engine("kknn") %>% + set_mode("regression") %>% + translate() +``` + +``` +## K-Nearest Neighbor Model Specification (regression) +## +## Main Arguments: +## neighbors = integer(1) +## weight_func = character(1) +## dist_power = double(1) +## +## Computational engine: kknn +## +## Model fit template: +## kknn::train.kknn(formula = missing_arg(), data = missing_arg(), +## ks = min_rows(0L, data, 5), kernel = character(1), distance = double(1)) +``` + +`min_rows()` will adjust the number of neighbors if the chosen value if it is not consistent with the actual data dimensions. + +## Translation from parsnip to the original package (classification) + + +```r +nearest_neighbor( + neighbors = integer(1), + weight_func = character(1), + dist_power = double(1) +) %>% + set_engine("kknn") %>% + set_mode("classification") %>% + translate() +``` + +``` +## K-Nearest Neighbor Model Specification (classification) +## +## Main Arguments: +## neighbors = integer(1) +## weight_func = character(1) +## dist_power = double(1) +## +## Computational engine: kknn +## +## Model fit template: +## kknn::train.kknn(formula = missing_arg(), data = missing_arg(), +## ks = min_rows(0L, data, 5), kernel = character(1), distance = double(1)) +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#nearest-neighbor-kknn) for `nearest_neighbor()` with the `"kknn"` engine. + +## References + + - Hechenbichler K. and Schliep K.P. (2004) [Weighted k-Nearest-Neighbor Techniques and Ordinal Classification](https://epub.ub.uni-muenchen.de/1769/), Discussion Paper 399, SFB 386, Ludwig-Maximilians University Munich + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/no-pooling.md b/man/rmd/no-pooling.md new file mode 100644 index 000000000..52568f095 --- /dev/null +++ b/man/rmd/no-pooling.md @@ -0,0 +1,20 @@ +## Predicting new samples + +This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor ($\eta$) for a random intercept: + +$$ +\eta_{i} = (\beta_0 + b_{0i}) + \beta_1x_{i1} +$$ + +where $i$ denotes the `i`th independent experimental unit (e.g. subject). When the model has seen subject `i`, it can use that subject's data to adjust the _population_ intercept to be more specific to that subjects results. + +What happens when data are being predicted for a subject that was not used in the model fit? In that case, this package uses _only_ the population parameter estimates for prediction: + +$$ +\hat{\eta}_{i'} = \hat{\beta}_0+ \hat{\beta}x_{i'1} +$$ + +Depending on what covariates are in the model, this might have the effect of making the same prediction for all new samples. The population parameters are the "best estimate" for a subject that was not included in the model fit. + +The tidymodels framework deliberately constrains predictions for new data to not use the training set or other data (to prevent information leakage). + diff --git a/man/rmd/null-model.md b/man/rmd/null-model.md new file mode 100644 index 000000000..2c7333f75 --- /dev/null +++ b/man/rmd/null-model.md @@ -0,0 +1,41 @@ +# Engine Details + +Engines may have pre-set default arguments when executing the model fit call. +For this type of model, the template of the fit calls are below: + +## parsnip + + +```r +null_model() %>% + set_engine("parsnip") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Model Specification (regression) +## +## Computational engine: parsnip +## +## Model fit template: +## nullmodel(x = missing_arg(), y = missing_arg()) +``` + + +```r +null_model() %>% + set_engine("parsnip") %>% + set_mode("classification") %>% + translate() +``` + +``` +## Model Specification (classification) +## +## Computational engine: parsnip +## +## Model fit template: +## nullmodel(x = missing_arg(), y = missing_arg()) +``` + diff --git a/man/rmd/one-hot.md b/man/rmd/one-hot.md new file mode 100644 index 000000000..cb221c248 --- /dev/null +++ b/man/rmd/one-hot.md @@ -0,0 +1,78 @@ + + +By default, `model.matrix()` generates binary indicator variables for factor predictors. When the formula does not remove an intercept, an incomplete set of indicators are created; no indicator is made for the first level of the factor. + +For example, `species` and `island` both have three levels but `model.matrix()` creates two indicator variables for each: + + +```r +library(dplyr) +library(modeldata) +data(penguins) + +levels(penguins$species) +``` + +``` +## [1] "Adelie" "Chinstrap" "Gentoo" +``` + +```r +levels(penguins$island) +``` + +``` +## [1] "Biscoe" "Dream" "Torgersen" +``` + +```r +model.matrix(~ species + island, data = penguins) %>% + colnames() +``` + +``` +## [1] "(Intercept)" "speciesChinstrap" "speciesGentoo" "islandDream" +## [5] "islandTorgersen" +``` + +For a formula with no intercept, the first factor is expanded to indicators for _all_ factor levels but all other factors are expanded to all but one (as above): + + +```r +model.matrix(~ 0 + species + island, data = penguins) %>% + colnames() +``` + +``` +## [1] "speciesAdelie" "speciesChinstrap" "speciesGentoo" "islandDream" +## [5] "islandTorgersen" +``` + +For inference, this hybrid encoding can be problematic. + +To generate all indicators, use this contrast: + + +```r +# Switch out the contrast method +old_contr <- options("contrasts")$contrasts +new_contr <- old_contr +new_contr["unordered"] <- "contr_one_hot" +options(contrasts = new_contr) + +model.matrix(~ species + island, data = penguins) %>% + colnames() +``` + +``` +## [1] "(Intercept)" "speciesAdelie" "speciesChinstrap" "speciesGentoo" +## [5] "islandBiscoe" "islandDream" "islandTorgersen" +``` + +```r +options(contrasts = old_contr) +``` + +Removing the intercept here does not affect the factor encodings. + + diff --git a/man/rmd/pls_mixOmics.md b/man/rmd/pls_mixOmics.md new file mode 100644 index 000000000..9f678cfca --- /dev/null +++ b/man/rmd/pls_mixOmics.md @@ -0,0 +1,99 @@ + + + +For this engine, there are multiple modes: classification and regression + +## Tuning Parameters + + + +This model has 2 tuning parameters: + +- `predictor_prop`: Proportion of Predictors (type: double, default: see below) + +- `num_comp`: # Components (type: integer, default: 2L) + + +## Translation from parsnip to the underlying model call (regression) + +There is a parsnip extension package required to fit this model to this mode: **plsmod**. + + +```r +library(plsmod) + +pls(num_comp = integer(1), predictor_prop = double(1)) %>% + set_engine("mixOmics") %>% + set_mode("regression") %>% + translate() +``` + +``` +## PLS Model Specification (regression) +## +## Main Arguments: +## predictor_prop = double(1) +## num_comp = integer(1) +## +## Computational engine: mixOmics +## +## Model fit template: +## plsmod::pls_fit(x = missing_arg(), y = missing_arg(), predictor_prop = double(1), +## ncomp = integer(1)) +``` + +[plsmod::pls_fit()] is a function that: + + - Determines the number of predictors in the data. + - Adjusts `num_comp` if the value is larger than the number of factors. + - Determines whether sparsity is required based on the value of `predictor_prop`. + - Sets the `keepX` argument of [mixOmics::spls()] for sparse models. + +## Translation from parsnip to the underlying model call (classification) + +There is a parsnip extension package required to fit this model to this mode: **plsmod**. + + +```r +library(plsmod) + +pls(num_comp = integer(1), predictor_prop = double(1)) %>% + set_engine("mixOmics") %>% + set_mode("classification") %>% + translate() +``` + +``` +## PLS Model Specification (classification) +## +## Main Arguments: +## predictor_prop = double(1) +## num_comp = integer(1) +## +## Computational engine: mixOmics +## +## Model fit template: +## plsmod::pls_fit(x = missing_arg(), y = missing_arg(), predictor_prop = double(1), +## ncomp = integer(1)) +``` + +In this case, [plsmod::pls_fit()] has the same role as above but eventually targets [mixOmics::plsda()] or [mixOmics::splsda()] . + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Variance calculations are used in these computations so _zero-variance_ predictors (i.e., with a single unique value) should be eliminated before fitting the model. + + + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. + +## References + + - Rohart F and Gautier B and Singh A and Le Cao K-A (2017). "mixOmics: An R package for 'omics feature selection and multiple data integration." PLoS computational biology, 13(11), e1005752. + diff --git a/man/rmd/poisson_reg_gee.md b/man/rmd/poisson_reg_gee.md new file mode 100644 index 000000000..ca4f79028 --- /dev/null +++ b/man/rmd/poisson_reg_gee.md @@ -0,0 +1,87 @@ + + + +For this engine, there is a single mode: regression + +## Tuning Parameters + +This model has no formal tuning parameters. It may be beneficial to determine the appropriate correlation structure to use, but this typically does not affect the predicted value of the model. It _does_ have an effect on the inferential results and parameter covariance values. + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **multilevelmod**. + + +```r +library(multilevelmod) + +poisson_reg(engine = "gee") %>% + set_engine("gee") %>% + translate() +``` + +``` +## Poisson Regression Model Specification (regression) +## +## Computational engine: gee +## +## Model fit template: +## multilevelmod::gee_fit(formula = missing_arg(), data = missing_arg(), +## family = stats::poisson) +``` + +`multilevelmod::gee_fit()` is a wrapper model around `gee()`. + + +## Preprocessing requirements + +There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. + +## Other details + +The model cannot accept case weights. + +Both `gee:gee()` and `gee:geepack()` specify the id/cluster variable using an argument `id` that requires a vector. parsnip doesn't work that way so we enable this model to be fit using a artificial function `id_var()` to be used in the formula. So, in the original package, the call would look like: + +```r +gee(breaks ~ tension, id = wool, data = warpbreaks, corstr = "exchangeable") +``` + +With parsnip, we suggest using the formula method when fitting: + +```r +library(tidymodels) + +poisson_reg() %>% + set_engine("gee", corstr = "exchangeable") %>% + fit(y ~ time + x + id_var(subject), data = longitudinal_counts) +``` + +When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the GEE formula when adding the model: + +```r +library(tidymodels) + +gee_spec <- + poisson_reg() %>% + set_engine("gee", corstr = "exchangeable") + +gee_wflow <- + workflow() %>% + # The data are included as-is using: + add_variables(outcomes = y, predictors = c(time, x, subject)) %>% + add_model(gee_spec, formula = y ~ time + x + id_var(subject)) + +fit(gee_wflow, data = longitudinal_counts) +``` + +`gee()` always prints out warnings and output even when `silent = TRUE`. When using the `gee` engine, it will never produce output, even if `silent = FALSE`. + +Also, because of issues with the `gee()` function, a supplementary call to `glm()` is needed to get the rank and QR decomposition objects so that `predict()` can be used. + +## References + + - Liang, K.Y. and Zeger, S.L. (1986) Longitudinal data analysis using generalized linear models. _Biometrika_, 73 13–22. + + - Zeger, S.L. and Liang, K.Y. (1986) Longitudinal data analysis for discrete and continuous outcomes. _Biometrics_, 42 121–130. + diff --git a/man/rmd/poisson_reg_glm.md b/man/rmd/poisson_reg_glm.md new file mode 100644 index 000000000..2c96860c1 --- /dev/null +++ b/man/rmd/poisson_reg_glm.md @@ -0,0 +1,38 @@ + + + +For this engine, there is a single mode: regression + +## Tuning Parameters + +This engine has no tuning parameters. + +## Translation from parsnip to the underlying model call (regression) + +There is a parsnip extension package required to fit this model to this mode: **poissonreg**. + + +```r +library(poissonreg) + +poisson_reg() %>% + set_engine("glm") %>% + translate() +``` + +``` +## Poisson Regression Model Specification (regression) +## +## Computational engine: glm +## +## Model fit template: +## stats::glm(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), +## family = stats::poisson) +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + diff --git a/man/rmd/poisson_reg_glmer.md b/man/rmd/poisson_reg_glmer.md new file mode 100644 index 000000000..7f87c57b0 --- /dev/null +++ b/man/rmd/poisson_reg_glmer.md @@ -0,0 +1,101 @@ + + + +For this engine, there is a single mode: regression + +## Tuning Parameters + +This model has no tuning parameters. + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **multilevelmod**. + + +```r +library(multilevelmod) + +poisson_reg(engine = "glmer") %>% + set_engine("glmer") %>% + translate() +``` + +``` +## Poisson Regression Model Specification (regression) +## +## Computational engine: glmer +## +## Model fit template: +## lme4::glmer(formula = missing_arg(), data = missing_arg(), family = stats::poisson) +``` + + +## Predicting new samples + +This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor ($\eta$) for a random intercept: + +$$ +\eta_{i} = (\beta_0 + b_{0i}) + \beta_1x_{i1} +$$ + +where $i$ denotes the `i`th independent experimental unit (e.g. subject). When the model has seen subject `i`, it can use that subject's data to adjust the _population_ intercept to be more specific to that subjects results. + +What happens when data are being predicted for a subject that was not used in the model fit? In that case, this package uses _only_ the population parameter estimates for prediction: + +$$ +\hat{\eta}_{i'} = \hat{\beta}_0+ \hat{\beta}x_{i'1} +$$ + +Depending on what covariates are in the model, this might have the effect of making the same prediction for all new samples. The population parameters are the "best estimate" for a subject that was not included in the model fit. + +The tidymodels framework deliberately constrains predictions for new data to not use the training set or other data (to prevent information leakage). + + +## Preprocessing requirements + +There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. + +## Other details + +The model can accept case weights. + +With parsnip, we suggest using the formula method when fitting: + +```r +library(tidymodels) + +poisson_reg() %>% + set_engine("glmer") %>% + fit(y ~ time + x + (1 | subject), data = longitudinal_counts) +``` + +When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: + +```r +library(tidymodels) + +glmer_spec <- + poisson_reg() %>% + set_engine("glmer") + +glmer_wflow <- + workflow() %>% + # The data are included as-is using: + add_variables(outcomes = y, predictors = c(time, x, subject)) %>% + add_model(glmer_spec, formula = y ~ time + x + (1 | subject)) + +fit(glmer_wflow, data = longitudinal_counts) +``` + +## References + + - J Pinheiro, and D Bates. 2000. _Mixed-effects models in S and S-PLUS_. Springer, New York, NY + + - West, K, Band Welch, and A Galecki. 2014. _Linear Mixed Models: A Practical Guide Using Statistical Software_. CRC Press. + + - Thorson, J, Minto, C. 2015, Mixed effects: a unifying framework for statistical modelling in fisheries biology. _ICES Journal of Marine Science_, Volume 72, Issue 5, Pages 1245–1256. + + - Harrison, XA, Donaldson, L, Correa-Cano, ME, Evans, J, Fisher, DN, Goodwin, CED, Robinson, BS, Hodgson, DJ, Inger, R. 2018. _A brief introduction to mixed effects modelling and multi-model inference in ecology_. PeerJ 6:e4794. + + - DeBruine LM, Barr DJ. Understanding Mixed-Effects Models Through Data Simulation. 2021. _Advances in Methods and Practices in Psychological Science_. + diff --git a/man/rmd/poisson_reg_glmnet.md b/man/rmd/poisson_reg_glmnet.md new file mode 100644 index 000000000..ec0f4daf0 --- /dev/null +++ b/man/rmd/poisson_reg_glmnet.md @@ -0,0 +1,56 @@ + + + +For this engine, there is a single mode: regression + +## Tuning Parameters + + + +This model has 2 tuning parameters: + +- `penalty`: Amount of Regularization (type: double, default: see below) + +- `mixture`: Proportion of Lasso Penalty (type: double, default: 1.0) + +A value of `mixture = 1` corresponds to a pure lasso model, while `mixture = 0` indicates ridge regression. + +The `penalty` parameter has no default and requires a single numeric value. For more details about this, and the `glmnet` model in general, see [glmnet-details]. + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **poissonreg**. + + +```r +library(poissonreg) + +poisson_reg(penalty = double(1), mixture = double(1)) %>% + set_engine("glmnet") %>% + translate() +``` + +``` +## Poisson Regression Model Specification (regression) +## +## Main Arguments: +## penalty = 0 +## mixture = double(1) +## +## Computational engine: glmnet +## +## Model fit template: +## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(), +## alpha = double(1), family = "poisson") +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. +By default, `glmnet::glmnet()` uses the argument `standardize = TRUE` to center and scale the data. + diff --git a/man/rmd/poisson_reg_hurdle.md b/man/rmd/poisson_reg_hurdle.md new file mode 100644 index 000000000..f16bb7f91 --- /dev/null +++ b/man/rmd/poisson_reg_hurdle.md @@ -0,0 +1,108 @@ + + + +For this engine, there is a single mode: regression + +## Tuning Parameters + +This engine has no tuning parameters. + +## Translation from parsnip to the underlying model call (regression) + +There is a parsnip extension package required to fit this model to this mode: **poissonreg**. + + +```r +library(poissonreg) + +poisson_reg() %>% + set_engine("hurdle") %>% + translate() +``` + +``` +## Poisson Regression Model Specification (regression) +## +## Computational engine: hurdle +## +## Model fit template: +## pscl::hurdle(formula = missing_arg(), data = missing_arg(), weights = missing_arg()) +``` + +## Preprocessing and special formulas for zero-inflated Poisson models + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + +For this particular model, a special formula is used to specify which columns affect the counts and which affect the model for the probability of zero counts. These sets of terms are separated by a bar. For example, `y ~ x | z`. This type of formula is not used by the base R infrastructure (e.g. `model.matrix()`) + +When fitting a parsnip model with this engine directly, the formula method is required and the formula is just passed through. For example: + + + + +```r +library(tidymodels) +tidymodels_prefer() + +data("bioChemists", package = "pscl") +poisson_reg() %>% + set_engine("hurdle") %>% + fit(art ~ fem + mar | ment, data = bioChemists) +``` + +``` +## parsnip model object +## +## +## Call: +## pscl::hurdle(formula = art ~ fem + mar | ment, data = data) +## +## Count model coefficients (truncated poisson with log link): +## (Intercept) femWomen marMarried +## 0.847598 -0.237351 0.008846 +## +## Zero hurdle model coefficients (binomial with logit link): +## (Intercept) ment +## 0.24871 0.08092 +``` + +However, when using a workflow, the best approach is to avoid using [workflows::add_formula()] and use [workflows::add_variables()] in conjunction with a model formula: + + +```r +data("bioChemists", package = "pscl") +spec <- + poisson_reg() %>% + set_engine("hurdle") + +workflow() %>% + add_variables(outcomes = c(art), predictors = c(fem, mar, ment)) %>% + add_model(spec, formula = art ~ fem + mar | ment) %>% + fit(data = bioChemists) +``` + +``` +## ══ Workflow [trained] ══════════════════════════════════════════════════════════ +## Preprocessor: Variables +## Model: poisson_reg() +## +## ── Preprocessor ──────────────────────────────────────────────────────────────── +## Outcomes: c(art) +## Predictors: c(fem, mar, ment) +## +## ── Model ─────────────────────────────────────────────────────────────────────── +## +## Call: +## pscl::hurdle(formula = art ~ fem + mar | ment, data = data) +## +## Count model coefficients (truncated poisson with log link): +## (Intercept) femWomen marMarried +## 0.847598 -0.237351 0.008846 +## +## Zero hurdle model coefficients (binomial with logit link): +## (Intercept) ment +## 0.24871 0.08092 +``` + +The reason for this is that [workflows::add_formula()] will try to create the model matrix and either fail or create dummy variables prematurely. diff --git a/man/rmd/poisson_reg_stan.md b/man/rmd/poisson_reg_stan.md new file mode 100644 index 000000000..941a0bbef --- /dev/null +++ b/man/rmd/poisson_reg_stan.md @@ -0,0 +1,63 @@ + + + +For this engine, there is a single mode: regression + +## Tuning Parameters + +This engine has no tuning parameters. + +## Important engine-specific options + +Some relevant arguments that can be passed to `set_engine()`: + + * `chains`: A positive integer specifying the number of Markov chains. The default is 4. + * `iter`: A positive integer specifying the number of iterations for each chain (including warmup). The default is 2000. + * `seed`: The seed for random number generation. + * `cores`: Number of cores to use when executing the chains in parallel. + * `prior`: The prior distribution for the (non-hierarchical) regression coefficients. The `"stan"` engine does not fit any hierarchical terms. + * `prior_intercept`: The prior distribution for the intercept (after centering all predictors). + +See [rstan::sampling()] and [rstanarm::priors()] for more information on these and other options. + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **poissonreg**. + + +```r +library(poissonreg) + +poisson_reg() %>% + set_engine("stan") %>% + translate() +``` + +``` +## Poisson Regression Model Specification (regression) +## +## Computational engine: stan +## +## Model fit template: +## rstanarm::stan_glm(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), family = stats::poisson) +``` + +Note that the `refresh` default prevents logging of the estimation process. Change this value in `set_engine()` to show the MCMC logs. + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + +## Other details + +For prediction, the `"stan"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-stan) for `poisson_reg()` with the `"stan"` engine. + +## References + + - McElreath, R. 2020 _Statistical Rethinking_. CRC Press. diff --git a/man/rmd/poisson_reg_stan_glmer.md b/man/rmd/poisson_reg_stan_glmer.md new file mode 100644 index 000000000..14f544f90 --- /dev/null +++ b/man/rmd/poisson_reg_stan_glmer.md @@ -0,0 +1,110 @@ + + + +For this engine, there is a single mode: regression + +## Tuning Parameters + +This model has no tuning parameters. + +## Important engine-specific options + +Some relevant arguments that can be passed to `set_engine()`: + + * `chains`: A positive integer specifying the number of Markov chains. The default is 4. + * `iter`: A positive integer specifying the number of iterations for each chain (including warmup). The default is 2000. + * `seed`: The seed for random number generation. + * `cores`: Number of cores to use when executing the chains in parallel. + * `prior`: The prior distribution for the (non-hierarchical) regression coefficients. + * `prior_intercept`: The prior distribution for the intercept (after centering all predictors). + +See `?rstanarm::stan_glmer` and `?rstan::sampling` for more information. + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **multilevelmod**. + + +```r +library(multilevelmod) + +poisson_reg(engine = "stan_glmer") %>% + set_engine("stan_glmer") %>% + translate() +``` + +``` +## Poisson Regression Model Specification (regression) +## +## Computational engine: stan_glmer +## +## Model fit template: +## rstanarm::stan_glmer(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), family = stats::poisson, refresh = 0) +``` + + +## Predicting new samples + +This model can use subject-specific coefficient estimates to make predictions (i.e. partial pooling). For example, this equation shows the linear predictor ($\eta$) for a random intercept: + +$$ +\eta_{i} = (\beta_0 + b_{0i}) + \beta_1x_{i1} +$$ + +where $i$ denotes the `i`th independent experimental unit (e.g. subject). When the model has seen subject `i`, it can use that subject's data to adjust the _population_ intercept to be more specific to that subjects results. + +What happens when data are being predicted for a subject that was not used in the model fit? In that case, this package uses _only_ the population parameter estimates for prediction: + +$$ +\hat{\eta}_{i'} = \hat{\beta}_0+ \hat{\beta}x_{i'1} +$$ + +Depending on what covariates are in the model, this might have the effect of making the same prediction for all new samples. The population parameters are the "best estimate" for a subject that was not included in the model fit. + +The tidymodels framework deliberately constrains predictions for new data to not use the training set or other data (to prevent information leakage). + + +## Preprocessing requirements + +There are no specific preprocessing needs. However, it is helpful to keep the clustering/subject identifier column as factor or character (instead of making them into dummy variables). See the examples in the next section. + +## Other details + +The model can accept case weights. + +With parsnip, we suggest using the formula method when fitting: + +```r +library(tidymodels) + +poisson_reg() %>% + set_engine("stan_glmer") %>% + fit(y ~ time + x + (1 | subject), data = longitudinal_counts) +``` + +When using tidymodels infrastructure, it may be better to use a workflow. In this case, you can add the appropriate columns using `add_variables()` then supply the typical formula when adding the model: + +```r +library(tidymodels) + +glmer_spec <- + poisson_reg() %>% + set_engine("stan_glmer") + +glmer_wflow <- + workflow() %>% + # The data are included as-is using: + add_variables(outcomes = y, predictors = c(time, x, subject)) %>% + add_model(glmer_spec, formula = y ~ time + x + (1 | subject)) + +fit(glmer_wflow, data = longitudinal_counts) +``` + +For prediction, the `"stan_glmer"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome. When `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned. + +## References + + - McElreath, R. 2020 _Statistical Rethinking_. CRC Press. + + - Sorensen, T, Vasishth, S. 2016. Bayesian linear mixed models using Stan: A tutorial for psychologists, linguists, and cognitive scientists, arXiv:1506.06201. diff --git a/man/rmd/poisson_reg_zeroinfl.md b/man/rmd/poisson_reg_zeroinfl.md new file mode 100644 index 000000000..f285745da --- /dev/null +++ b/man/rmd/poisson_reg_zeroinfl.md @@ -0,0 +1,109 @@ + + + +For this engine, there is a single mode: regression + +## Tuning Parameters + +This engine has no tuning parameters. + +## Translation from parsnip to the underlying model call (regression) + +There is a parsnip extension package required to fit this model to this mode: **poissonreg**. + + +```r +library(poissonreg) + +poisson_reg() %>% + set_engine("zeroinfl") %>% + translate() +``` + +``` +## Poisson Regression Model Specification (regression) +## +## Computational engine: zeroinfl +## +## Model fit template: +## pscl::zeroinfl(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg()) +``` + +## Preprocessing and special formulas for zero-inflated Poisson models + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + +For this particular model, a special formula is used to specify which columns affect the counts and which affect the model for the probability of zero counts. These sets of terms are separated by a bar. For example, `y ~ x | z`. This type of formula is not used by the base R infrastructure (e.g. `model.matrix()`) + +When fitting a parsnip model with this engine directly, the formula method is required and the formula is just passed through. For example: + + + + +```r +library(tidymodels) +tidymodels_prefer() + +data("bioChemists", package = "pscl") +poisson_reg() %>% + set_engine("zeroinfl") %>% + fit(art ~ fem + mar | ment, data = bioChemists) +``` + +``` +## parsnip model object +## +## +## Call: +## pscl::zeroinfl(formula = art ~ fem + mar | ment, data = data) +## +## Count model coefficients (poisson with log link): +## (Intercept) femWomen marMarried +## 0.82840 -0.21365 0.02576 +## +## Zero-inflation model coefficients (binomial with logit link): +## (Intercept) ment +## -0.363 -0.166 +``` + +However, when using a workflow, the best approach is to avoid using [workflows::add_formula()] and use [workflows::add_variables()] in conjunction with a model formula: + + +```r +data("bioChemists", package = "pscl") +spec <- + poisson_reg() %>% + set_engine("zeroinfl") + +workflow() %>% + add_variables(outcomes = c(art), predictors = c(fem, mar, ment)) %>% + add_model(spec, formula = art ~ fem + mar | ment) %>% + fit(data = bioChemists) +``` + +``` +## ══ Workflow [trained] ══════════════════════════════════════════════════════════ +## Preprocessor: Variables +## Model: poisson_reg() +## +## ── Preprocessor ──────────────────────────────────────────────────────────────── +## Outcomes: c(art) +## Predictors: c(fem, mar, ment) +## +## ── Model ─────────────────────────────────────────────────────────────────────── +## +## Call: +## pscl::zeroinfl(formula = art ~ fem + mar | ment, data = data) +## +## Count model coefficients (poisson with log link): +## (Intercept) femWomen marMarried +## 0.82840 -0.21365 0.02576 +## +## Zero-inflation model coefficients (binomial with logit link): +## (Intercept) ment +## -0.363 -0.166 +``` + +The reason for this is that [workflows::add_formula()] will try to create the model matrix and either fail or create dummy variables prematurely. diff --git a/man/rmd/proportional_hazards_glmnet.md b/man/rmd/proportional_hazards_glmnet.md new file mode 100644 index 000000000..e1a221b41 --- /dev/null +++ b/man/rmd/proportional_hazards_glmnet.md @@ -0,0 +1,115 @@ + + + +For this engine, there is a single mode: censored regression + +## Tuning Parameters + + + +This model has 2 tuning parameters: + +- `penalty`: Amount of Regularization (type: double, default: see below) + +- `mixture`: Proportion of Lasso Penalty (type: double, default: 1.0) + +A value of `mixture = 1` corresponds to a pure lasso model, while `mixture = 0` indicates ridge regression. + +The `penalty` parameter has no default and requires a single numeric value. For more details about this, and the `glmnet` model in general, see [parsnip::glmnet-details]. + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **censored**. + + +```r +library(censored) + +proportional_hazards(penalty = double(1), mixture = double(1)) %>% + set_engine("glmnet") %>% + translate() +``` + +``` +## Proportional Hazards Model Specification (censored regression) +## +## Main Arguments: +## penalty = 0 +## mixture = double(1) +## +## Computational engine: glmnet +## +## Model fit template: +## censored::glmnet_fit_wrapper(formula = missing_arg(), data = missing_arg(), +## family = missing_arg(), alpha = double(1)) +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. +By default, [glmnet::glmnet()] uses the argument `standardize = TRUE` to center and scale the data. + + +## Other details + +The model does not fit an intercept. + +The model formula (which is required) can include _special_ terms, such as [survival::strata()]. This allows the baseline hazard to differ between groups contained in the function. The column used inside `strata()` is treated as qualitative no matter its type. This is different than the syntax offered by the [glmnet::glmnet()] package (i.e., [glmnet::stratifySurv()]) which is not recommended here. + +For example, in this model, the numeric column `rx` is used to estimate two different baseline hazards for each value of the column: + + + + +```r +library(survival) +library(censored) +library(dplyr) +library(tidyr) + +mod <- + proportional_hazards(penalty = 0.01) %>% + set_engine("glmnet", nlambda = 5) %>% + fit(Surv(futime, fustat) ~ age + ecog.ps + strata(rx), data = ovarian) + +pred_data <- data.frame(age = c(50, 50), ecog.ps = c(1, 1), rx = c(1, 2)) + +# Different survival probabilities for different values of 'rx' +predict(mod, pred_data, type = "survival", time = 500) %>% + bind_cols(pred_data) %>% + unnest(.pred) +``` + +``` +## # A tibble: 2 × 5 +## .time .pred_survival age ecog.ps rx +## +## 1 500 0.666 50 1 1 +## 2 500 0.769 50 1 2 +``` + +Note that columns used in the `strata()` function _will_ also be estimated in the regular portion of the model (i.e., within the linear predictor). + +# Linear predictor values + +Since risk regression and parametric survival models are modeling different characteristics (e.g. relative hazard versus event time), their linear predictors will be going in opposite directions. + +For example, for parametric models, the linear predictor _increases with time_. For proportional hazards models the linear predictor _decreases with time_ (since hazard is increasing). As such, the linear predictors for these two quantities will have opposite signs. + +tidymodels does not treat different models differently when computing performance metrics. To standardize across model types, the default for proportional hazards models is to have _increasing values with time_. As a result, the sign of the linear predictor will be the opposite of the value produced by the `predict()` method in the \pkg{survival} package. + +This behavior can be changed by using the `increasing` argument when calling `predict()` on a \pkg{parsnip} model object. + +# References + + - Simon N, Friedman J, Hastie T, Tibshirani R. 2011. "Regularization Paths for Cox’s Proportional Hazards Model via Coordinate Descent." _Journal of Statistical Software_, Articles 39 (5): 1–13. \doi{10.18637/jss.v039.i05}. + + - Hastie T, Tibshirani R, Wainwright M. 2015. _Statistical Learning with Sparsity_. CRC Press. + + - Kuhn M, Johnson K. 2013. _Applied Predictive Modeling_. Springer. + diff --git a/man/rmd/proportional_hazards_survival.md b/man/rmd/proportional_hazards_survival.md new file mode 100644 index 000000000..f74ad4283 --- /dev/null +++ b/man/rmd/proportional_hazards_survival.md @@ -0,0 +1,100 @@ + + + +For this engine, there is a single mode: censored regression + +## Tuning Parameters + +This model has no tuning parameters. + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **censored**. + + +```r +library(censored) + +proportional_hazards() %>% + set_engine("survival") %>% + set_mode("censored regression") %>% + translate() +``` + +``` +## Proportional Hazards Model Specification (censored regression) +## +## Computational engine: survival +## +## Model fit template: +## survival::coxph(formula = missing_arg(), data = missing_arg(), +## x = TRUE, model = TRUE) +``` + +## Other details + +The model does not fit an intercept. + +The main interface for this model uses the formula method since the model specification typically involved the use of [survival::Surv()]. + +The model formula can include _special_ terms, such as [survival::strata()]. The allows the baseline hazard to differ between groups contained in the function. The column used inside `strata()` is treated as qualitative no matter its type. + +For example, in this model, the numeric column `rx` is used to estimate two different baseline hazards for each value of the column: + + +```r +library(survival) + +proportional_hazards() %>% + fit(Surv(futime, fustat) ~ age + strata(rx), data = ovarian) %>% + extract_fit_engine() %>% + # Two different hazards for each value of 'rx' + basehaz() +``` + +``` +## hazard time strata +## 1 0.02250134 59 rx=1 +## 2 0.05088586 115 rx=1 +## 3 0.09467873 156 rx=1 +## 4 0.14809975 268 rx=1 +## 5 0.30670509 329 rx=1 +## 6 0.46962698 431 rx=1 +## 7 0.46962698 448 rx=1 +## 8 0.46962698 477 rx=1 +## 9 1.07680229 638 rx=1 +## 10 1.07680229 803 rx=1 +## 11 1.07680229 855 rx=1 +## 12 1.07680229 1040 rx=1 +## 13 1.07680229 1106 rx=1 +## 14 0.05843331 353 rx=2 +## 15 0.12750063 365 rx=2 +## 16 0.12750063 377 rx=2 +## 17 0.12750063 421 rx=2 +## 18 0.23449656 464 rx=2 +## 19 0.35593895 475 rx=2 +## 20 0.50804209 563 rx=2 +## 21 0.50804209 744 rx=2 +## 22 0.50804209 769 rx=2 +## 23 0.50804209 770 rx=2 +## 24 0.50804209 1129 rx=2 +## 25 0.50804209 1206 rx=2 +## 26 0.50804209 1227 rx=2 +``` + +Note that columns used in the `strata()` function will not be estimated in the regular portion of the model (i.e., within the linear predictor). + + +# Linear predictor values + +Since risk regression and parametric survival models are modeling different characteristics (e.g. relative hazard versus event time), their linear predictors will be going in opposite directions. + +For example, for parametric models, the linear predictor _increases with time_. For proportional hazards models the linear predictor _decreases with time_ (since hazard is increasing). As such, the linear predictors for these two quantities will have opposite signs. + +tidymodels does not treat different models differently when computing performance metrics. To standardize across model types, the default for proportional hazards models is to have _increasing values with time_. As a result, the sign of the linear predictor will be the opposite of the value produced by the `predict()` method in the \pkg{survival} package. + +This behavior can be changed by using the `increasing` argument when calling `predict()` on a \pkg{parsnip} model object. + +## References + +- Andersen P, Gill R. 1982. Cox's regression model for counting processes, a large sample study. _Annals of Statistics_ 10, 1100-1120. diff --git a/man/rmd/rand_forest_party.md b/man/rmd/rand_forest_party.md new file mode 100644 index 000000000..4e1582836 --- /dev/null +++ b/man/rmd/rand_forest_party.md @@ -0,0 +1,58 @@ + + + +For this engine, there is a single mode: censored regression + +## Tuning Parameters + + + +This model has 3 tuning parameters: + +- `trees`: # Trees (type: integer, default: 500L) + +- `min_n`: Minimal Node Size (type: integer, default: 20L) + +- `mtry`: # Randomly Selected Predictors (type: integer, default: 5L) + +## Translation from parsnip to the original package (censored regression) + +There is a parsnip extension package required to fit this model to this mode: **censored**. + + +```r +library(censored) + +rand_forest() %>% + set_engine("party") %>% + set_mode("censored regression") %>% + translate() +``` + +``` +## Random Forest Model Specification (censored regression) +## +## Computational engine: party +## +## Model fit template: +## censored::cond_inference_surv_cforest(formula = missing_arg(), +## data = missing_arg()) +``` + +[cond_inference_surv_cforest()] is a wrapper around [party::cforest()] (and other functions) that makes it easier to run this model. + +## Preprocessing requirements + + +This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model. + +## Other details + +The main interface for this model uses the formula method since the model specification typically involved the use of [survival::Surv()]. + + +## References + + - Hothorn T, Buhlmann P, Dudoit S, Molinaro A, Van der Laan MJ. 2006. Survival Ensembles. _Biostatistics_, 7(3), 355–373. + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/rand_forest_randomForest.md b/man/rmd/rand_forest_randomForest.md new file mode 100644 index 000000000..b0360238c --- /dev/null +++ b/man/rmd/rand_forest_randomForest.md @@ -0,0 +1,96 @@ + + + +For this engine, there are multiple modes: classification and regression + +## Tuning Parameters + + + +This model has 3 tuning parameters: + +- `mtry`: # Randomly Selected Predictors (type: integer, default: see below) + +- `trees`: # Trees (type: integer, default: 500L) + +- `min_n`: Minimal Node Size (type: integer, default: see below) + +`mtry` depends on the number of columns and the model mode. The default in [randomForest::randomForest()] is `floor(sqrt(ncol(x)))` for classification and `floor(ncol(x)/3)` for regression. + +`min_n` depends on the mode. For regression, a value of 5 is the default. For classification, a value of 10 is used. + +## Translation from parsnip to the original package (regression) + + +```r +rand_forest( + mtry = integer(1), + trees = integer(1), + min_n = integer(1) +) %>% + set_engine("randomForest") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Random Forest Model Specification (regression) +## +## Main Arguments: +## mtry = integer(1) +## trees = integer(1) +## min_n = integer(1) +## +## Computational engine: randomForest +## +## Model fit template: +## randomForest::randomForest(x = missing_arg(), y = missing_arg(), +## mtry = min_cols(~integer(1), x), ntree = integer(1), nodesize = min_rows(~integer(1), +## x)) +``` + +`min_rows()` and `min_cols()` will adjust the number of neighbors if the chosen value if it is not consistent with the actual data dimensions. + +## Translation from parsnip to the original package (classification) + + +```r +rand_forest( + mtry = integer(1), + trees = integer(1), + min_n = integer(1) +) %>% + set_engine("randomForest") %>% + set_mode("classification") %>% + translate() +``` + +``` +## Random Forest Model Specification (classification) +## +## Main Arguments: +## mtry = integer(1) +## trees = integer(1) +## min_n = integer(1) +## +## Computational engine: randomForest +## +## Model fit template: +## randomForest::randomForest(x = missing_arg(), y = missing_arg(), +## mtry = min_cols(~integer(1), x), ntree = integer(1), nodesize = min_rows(~integer(1), +## x)) +``` + +## Preprocessing requirements + + +This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#rand-forest-randomForest) for `rand_forest()` with the `"randomForest"` engine. + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. + diff --git a/man/rmd/rand_forest_ranger.md b/man/rmd/rand_forest_ranger.md new file mode 100644 index 000000000..16b3cdba9 --- /dev/null +++ b/man/rmd/rand_forest_ranger.md @@ -0,0 +1,105 @@ + + + +For this engine, there are multiple modes: classification and regression + +## Tuning Parameters + + + +This model has 3 tuning parameters: + +- `mtry`: # Randomly Selected Predictors (type: integer, default: see below) + +- `trees`: # Trees (type: integer, default: 500L) + +- `min_n`: Minimal Node Size (type: integer, default: see below) + +`mtry` depends on the number of columns. The default in [ranger::ranger()] is `floor(sqrt(ncol(x)))`. + +`min_n` depends on the mode. For regression, a value of 5 is the default. For classification, a value of 10 is used. + +## Translation from parsnip to the original package (regression) + + +```r +rand_forest( + mtry = integer(1), + trees = integer(1), + min_n = integer(1) +) %>% + set_engine("ranger") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Random Forest Model Specification (regression) +## +## Main Arguments: +## mtry = integer(1) +## trees = integer(1) +## min_n = integer(1) +## +## Computational engine: ranger +## +## Model fit template: +## ranger::ranger(x = missing_arg(), y = missing_arg(), case.weights = missing_arg(), +## mtry = min_cols(~integer(1), x), num.trees = integer(1), +## min.node.size = min_rows(~integer(1), x), num.threads = 1, +## verbose = FALSE, seed = sample.int(10^5, 1)) +``` + +`min_rows()` and `min_cols()` will adjust the number of neighbors if the chosen value if it is not consistent with the actual data dimensions. + +## Translation from parsnip to the original package (classification) + + +```r +rand_forest( + mtry = integer(1), + trees = integer(1), + min_n = integer(1) +) %>% + set_engine("ranger") %>% + set_mode("classification") %>% + translate() +``` + +``` +## Random Forest Model Specification (classification) +## +## Main Arguments: +## mtry = integer(1) +## trees = integer(1) +## min_n = integer(1) +## +## Computational engine: ranger +## +## Model fit template: +## ranger::ranger(x = missing_arg(), y = missing_arg(), case.weights = missing_arg(), +## mtry = min_cols(~integer(1), x), num.trees = integer(1), +## min.node.size = min_rows(~integer(1), x), num.threads = 1, +## verbose = FALSE, seed = sample.int(10^5, 1), probability = TRUE) +``` + +Note that a `ranger` probability forest is always fit (unless the `probability` argument is changed by the user via [set_engine()]). + +## Preprocessing requirements + + +This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model. + +## Other notes + +By default, parallel processing is turned off. When tuning, it is more efficient to parallelize over the resamples and tuning parameters. To parallelize the construction of the trees within the `ranger` model, change the `num.threads` argument via [set_engine()]. + +For `ranger` confidence intervals, the intervals are constructed using the form `estimate +/- z * std_error`. For classification probabilities, these values can fall outside of `[0, 1]` and will be coerced to be in this range. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#rand-forest-ranger) for `rand_forest()` with the `"ranger"` engine. + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. diff --git a/man/rmd/rand_forest_spark.md b/man/rmd/rand_forest_spark.md new file mode 100644 index 000000000..9253ebbab --- /dev/null +++ b/man/rmd/rand_forest_spark.md @@ -0,0 +1,102 @@ + + + +For this engine, there are multiple modes: classification and regression + +## Tuning Parameters + + + +This model has 3 tuning parameters: + +- `mtry`: # Randomly Selected Predictors (type: integer, default: see below) + +- `trees`: # Trees (type: integer, default: 20L) + +- `min_n`: Minimal Node Size (type: integer, default: 1L) + +`mtry` depends on the number of columns and the model mode. The default in [sparklyr::ml_random_forest()] is `floor(sqrt(ncol(x)))` for classification and `floor(ncol(x)/3)` for regression. + +## Translation from parsnip to the original package (regression) + + +```r +rand_forest( + mtry = integer(1), + trees = integer(1), + min_n = integer(1) +) %>% + set_engine("spark") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Random Forest Model Specification (regression) +## +## Main Arguments: +## mtry = integer(1) +## trees = integer(1) +## min_n = integer(1) +## +## Computational engine: spark +## +## Model fit template: +## sparklyr::ml_random_forest(x = missing_arg(), formula = missing_arg(), +## type = "regression", feature_subset_strategy = integer(1), +## num_trees = integer(1), min_instances_per_node = min_rows(~integer(1), +## x), seed = sample.int(10^5, 1)) +``` + +`min_rows()` and `min_cols()` will adjust the number of neighbors if the chosen value if it is not consistent with the actual data dimensions. + +## Translation from parsnip to the original package (classification) + + +```r +rand_forest( + mtry = integer(1), + trees = integer(1), + min_n = integer(1) +) %>% + set_engine("spark") %>% + set_mode("classification") %>% + translate() +``` + +``` +## Random Forest Model Specification (classification) +## +## Main Arguments: +## mtry = integer(1) +## trees = integer(1) +## min_n = integer(1) +## +## Computational engine: spark +## +## Model fit template: +## sparklyr::ml_random_forest(x = missing_arg(), formula = missing_arg(), +## type = "classification", feature_subset_strategy = integer(1), +## num_trees = integer(1), min_instances_per_node = min_rows(~integer(1), +## x), seed = sample.int(10^5, 1)) +``` + +## Preprocessing requirements + + +This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model. + +## Other details + + +For models created using the `"spark"` engine, there are several things to consider. + +* Only the formula interface to via `fit()` is available; using `fit_xy()` will generate an error. +* The predictions will always be in a Spark table format. The names will be the same as documented but without the dots. +* There is no equivalent to factor columns in Spark tables so class predictions are returned as character columns. +* To retain the model object for a new R session (via `save()`), the `model$fit` element of the parsnip object should be serialized via `ml_save(object$fit)` and separately saved to disk. In a new session, the object can be reloaded and reattached to the parsnip object. + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. + diff --git a/man/rmd/rule_fit_xrf.md b/man/rmd/rule_fit_xrf.md new file mode 100644 index 000000000..04582d83f --- /dev/null +++ b/man/rmd/rule_fit_xrf.md @@ -0,0 +1,143 @@ + + + +For this engine, there are multiple modes: classification and regression + +## Tuning Parameters + + + +This model has 8 tuning parameters: + +- `mtry`: Proportion Randomly Selected Predictors (type: double, default: 1.0) + +- `trees`: # Trees (type: integer, default: 15L) + +- `min_n`: Minimal Node Size (type: integer, default: 1L) + +- `tree_depth`: Tree Depth (type: integer, default: 6L) + +- `learn_rate`: Learning Rate (type: double, default: 0.3) + +- `loss_reduction`: Minimum Loss Reduction (type: double, default: 0.0) + +- `sample_size`: Proportion Observations Sampled (type: double, default: 1.0) + +- `penalty`: Amount of Regularization (type: double, default: 0.1) + + +## Translation from parsnip to the underlying model call (regression) + +There is a parsnip extension package required to fit this model to this mode: **rules**. + + +```r +library(rules) + +rule_fit( + mtry = numeric(1), + trees = integer(1), + min_n = integer(1), + tree_depth = integer(1), + learn_rate = numeric(1), + loss_reduction = numeric(1), + sample_size = numeric(1), + penalty = numeric(1) +) %>% + set_engine("xrf") %>% + set_mode("regression") %>% + translate() +``` + +``` +## RuleFit Model Specification (regression) +## +## Main Arguments: +## mtry = numeric(1) +## trees = integer(1) +## min_n = integer(1) +## tree_depth = integer(1) +## learn_rate = numeric(1) +## loss_reduction = numeric(1) +## sample_size = numeric(1) +## penalty = numeric(1) +## +## Computational engine: xrf +## +## Model fit template: +## rules::xrf_fit(object = missing_arg(), data = missing_arg(), +## colsample_bytree = numeric(1), nrounds = integer(1), min_child_weight = integer(1), +## max_depth = integer(1), eta = numeric(1), gamma = numeric(1), +## subsample = numeric(1), lambda = numeric(1)) +``` + +## Translation from parsnip to the underlying model call (classification) + +There is a parsnip extension package required to fit this model to this mode: **rules**. + + + +```r +library(rules) + +rule_fit( + mtry = numeric(1), + trees = integer(1), + min_n = integer(1), + tree_depth = integer(1), + learn_rate = numeric(1), + loss_reduction = numeric(1), + sample_size = numeric(1), + penalty = numeric(1) +) %>% + set_engine("xrf") %>% + set_mode("classification") %>% + translate() +``` + +``` +## RuleFit Model Specification (classification) +## +## Main Arguments: +## mtry = numeric(1) +## trees = integer(1) +## min_n = integer(1) +## tree_depth = integer(1) +## learn_rate = numeric(1) +## loss_reduction = numeric(1) +## sample_size = numeric(1) +## penalty = numeric(1) +## +## Computational engine: xrf +## +## Model fit template: +## rules::xrf_fit(object = missing_arg(), data = missing_arg(), +## colsample_bytree = numeric(1), nrounds = integer(1), min_child_weight = integer(1), +## max_depth = integer(1), eta = numeric(1), gamma = numeric(1), +## subsample = numeric(1), lambda = numeric(1)) +``` + +## Differences from the xrf package + +Note that, per the documentation in `?xrf`, transformations of the response variable are not supported. To +use these with `rule_fit()`, we recommend using a recipe instead of the formula method. + +Also, there are several configuration differences in how `xrf()` is fit between that package and the wrapper used in **rules**. Some differences in default values are: + +| parameter | **xrf** | **rules** | +|------------|---------|-----------| +| `trees` | 100 | 15 | +|`max_depth` | 3 | 6 | + + +These differences will create a disparity in the values of the `penalty` argument that **glmnet** uses. Also, **rules** can also set `penalty` whereas **xrf** uses an internal 5-fold cross-validation to determine it (by default). + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + +## References + + - Friedman and Popescu. "Predictive learning via rule ensembles." Ann. Appl. Stat. 2 (3) 916- 954, September 2008 + diff --git a/man/rmd/surv_reg_flexsurv.md b/man/rmd/surv_reg_flexsurv.md new file mode 100644 index 000000000..e98a23aa6 --- /dev/null +++ b/man/rmd/surv_reg_flexsurv.md @@ -0,0 +1,46 @@ + + + +For this engine, there is a single mode: regression + +## Tuning Parameters + + + +This model has 1 tuning parameters: + +- `dist`: Distribution (type: character, default: 'weibull') + + +## Translation from parsnip to the original package + + +```r +surv_reg(dist = character(1)) %>% + set_engine("flexsurv") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Parametric Survival Regression Model Specification (regression) +## +## Main Arguments: +## dist = character(1) +## +## Computational engine: flexsurv +## +## Model fit template: +## flexsurv::flexsurvreg(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), dist = character(1)) +``` + +## Other details + +The main interface for this model uses the formula method since the model specification typically involved the use of [survival::Surv()]. + +For this engine, stratification cannot be specified via [`strata()`], please see the documentation of the [`flexsurv::flexsurv-package`] package for alternative specifications. + +## References + +- Jackson, C. 2016. `flexsurv`: A Platform for Parametric Survival Modeling in R. _Journal of Statistical Software_, 70(8), 1 - 33. diff --git a/man/rmd/surv_reg_survival.md b/man/rmd/surv_reg_survival.md new file mode 100644 index 000000000..8189f95dd --- /dev/null +++ b/man/rmd/surv_reg_survival.md @@ -0,0 +1,76 @@ + + + +For this engine, there is a single mode: regression + +## Tuning Parameters + + + +This model has 1 tuning parameters: + +- `dist`: Distribution (type: character, default: 'weibull') + +## Translation from parsnip to the original package + + +```r +surv_reg(dist = character(1)) %>% + set_engine("survival") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Parametric Survival Regression Model Specification (regression) +## +## Main Arguments: +## dist = character(1) +## +## Computational engine: survival +## +## Model fit template: +## survival::survreg(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), dist = character(1), model = TRUE) +``` + +## Other details + +Note that `model = TRUE` is needed to produce quantile predictions when there is a stratification variable and can be overridden in other cases. + +The main interface for this model uses the formula method since the model specification typically involved the use of [survival::Surv()]. + +The model formula can include _special_ terms, such as [survival::strata()]. The allows the model scale parameter to differ between groups contained in the function. The column used inside `strata()` is treated as qualitative no matter its type. + +For example, in this model, the numeric column `rx` is used to estimate two different scale parameters for each value of the column: + + +```r +library(survival) + +surv_reg() %>% + fit(Surv(futime, fustat) ~ age + strata(rx), data = ovarian) %>% + extract_fit_engine() +``` + +``` +## Call: +## survival::survreg(formula = Surv(futime, fustat) ~ age + strata(rx), +## data = data, model = TRUE) +## +## Coefficients: +## (Intercept) age +## 12.8734120 -0.1033569 +## +## Scale: +## rx=1 rx=2 +## 0.7695509 0.4703602 +## +## Loglik(model)= -89.4 Loglik(intercept only)= -97.1 +## Chisq= 15.36 on 1 degrees of freedom, p= 8.88e-05 +## n= 26 +``` + +## References + +- Kalbfleisch, J. D. and Prentice, R. L. 2002 _The statistical analysis of failure time data_, Wiley. diff --git a/man/rmd/survival_reg_flexsurv.md b/man/rmd/survival_reg_flexsurv.md new file mode 100644 index 000000000..924772b4b --- /dev/null +++ b/man/rmd/survival_reg_flexsurv.md @@ -0,0 +1,49 @@ + + + +For this engine, there is a single mode: censored regression + +## Tuning Parameters + + + +This model has 1 tuning parameters: + +- `dist`: Distribution (type: character, default: 'weibull') + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **censored**. + + +```r +library(censored) + +survival_reg(dist = character(1)) %>% + set_engine("flexsurv") %>% + set_mode("censored regression") %>% + translate() +``` + +``` +## Parametric Survival Regression Model Specification (censored regression) +## +## Main Arguments: +## dist = character(1) +## +## Computational engine: flexsurv +## +## Model fit template: +## flexsurv::flexsurvreg(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), dist = character(1)) +``` + +## Other details + +The main interface for this model uses the formula method since the model specification typically involved the use of [survival::Surv()]. + +For this engine, stratification cannot be specified via [strata()]; please see the documentation of the [flexsurv] package for alternative specifications. + +## References + +- Jackson, C. 2016. `flexsurv`: A Platform for Parametric Survival Modeling in R. _Journal of Statistical Software_, 70(8), 1 - 33. diff --git a/man/rmd/survival_reg_survival.md b/man/rmd/survival_reg_survival.md new file mode 100644 index 000000000..9a8132c10 --- /dev/null +++ b/man/rmd/survival_reg_survival.md @@ -0,0 +1,80 @@ + + + +For this engine, there is a single mode: censored regression + +## Tuning Parameters + + + +This model has 1 tuning parameters: + +- `dist`: Distribution (type: character, default: 'weibull') + +## Translation from parsnip to the original package + +There is a parsnip extension package required to fit this model to this mode: **censored**. + + +```r +library(censored) + +survival_reg(dist = character(1)) %>% + set_engine("survival") %>% + set_mode("censored regression") %>% + translate() +``` + +``` +## Parametric Survival Regression Model Specification (censored regression) +## +## Main Arguments: +## dist = character(1) +## +## Computational engine: survival +## +## Model fit template: +## survival::survreg(formula = missing_arg(), data = missing_arg(), +## weights = missing_arg(), dist = character(1), model = TRUE) +``` + +## Other details + +In the translated syntax above, note that `model = TRUE` is needed to produce quantile predictions when there is a stratification variable and can be overridden in other cases. + +The main interface for this model uses the formula method since the model specification typically involved the use of [survival::Surv()]. + +The model formula can include _special_ terms, such as [survival::strata()]. The allows the model scale parameter to differ between groups contained in the function. The column used inside `strata()` is treated as qualitative no matter its type. + +For example, in this model, the numeric column `rx` is used to estimate two different scale parameters for each value of the column: + + +```r +library(survival) + +survival_reg() %>% + fit(Surv(futime, fustat) ~ age + strata(rx), data = ovarian) %>% + extract_fit_engine() +``` + +``` +## Call: +## survival::survreg(formula = Surv(futime, fustat) ~ age + strata(rx), +## data = data, model = TRUE) +## +## Coefficients: +## (Intercept) age +## 12.8734120 -0.1033569 +## +## Scale: +## rx=1 rx=2 +## 0.7695509 0.4703602 +## +## Loglik(model)= -89.4 Loglik(intercept only)= -97.1 +## Chisq= 15.36 on 1 degrees of freedom, p= 8.88e-05 +## n= 26 +``` + +## References + +- Kalbfleisch, J. D. and Prentice, R. L. 2002 _The statistical analysis of failure time data_, Wiley. diff --git a/man/rmd/svm_linear_LiblineaR.md b/man/rmd/svm_linear_LiblineaR.md new file mode 100644 index 000000000..d55d9a1c4 --- /dev/null +++ b/man/rmd/svm_linear_LiblineaR.md @@ -0,0 +1,90 @@ + + + +For this engine, there are multiple modes: classification and regression + +## Tuning Parameters + + + +This model has 2 tuning parameters: + +- `cost`: Cost (type: double, default: 1.0) + +- `margin`: Insensitivity Margin (type: double, default: no default) + +This engine fits models that are L2-regularized for L2-loss. In the [LiblineaR::LiblineaR()] documentation, these are types 1 (classification) and 11 (regression). + +## Translation from parsnip to the original package (regression) + + +```r +svm_linear( + cost = double(1), + margin = double(1) +) %>% + set_engine("LiblineaR") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Linear Support Vector Machine Specification (regression) +## +## Main Arguments: +## cost = double(1) +## margin = double(1) +## +## Computational engine: LiblineaR +## +## Model fit template: +## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(), +## C = double(1), svr_eps = double(1), type = 11) +``` + +## Translation from parsnip to the original package (classification) + + +```r +svm_linear( + cost = double(1) +) %>% + set_engine("LiblineaR") %>% + set_mode("classification") %>% + translate() +``` + +``` +## Linear Support Vector Machine Specification (classification) +## +## Main Arguments: +## cost = double(1) +## +## Computational engine: LiblineaR +## +## Model fit template: +## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(), +## C = double(1), type = 1) +``` + +The `margin` parameter does not apply to classification models. + +Note that the `LiblineaR` engine does not produce class probabilities. When optimizing the model using the tune package, the default metrics require class probabilities. To use the `tune_*()` functions, a metric set must be passed as an argument that only contains metrics for hard class predictions (e.g., accuracy). + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#svm-linear-LiblineaR) for `svm_linear()` with the `"LiblineaR"` engine. + +## References + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. + diff --git a/man/rmd/svm_linear_kernlab.md b/man/rmd/svm_linear_kernlab.md new file mode 100644 index 000000000..16f474b0b --- /dev/null +++ b/man/rmd/svm_linear_kernlab.md @@ -0,0 +1,92 @@ + + + +For this engine, there are multiple modes: classification and regression + +## Tuning Parameters + + + +This model has 2 tuning parameters: + +- `cost`: Cost (type: double, default: 1.0) + +- `margin`: Insensitivity Margin (type: double, default: 0.1) + +## Translation from parsnip to the original package (regression) + + +```r +svm_linear( + cost = double(1), + margin = double(1) +) %>% + set_engine("kernlab") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Linear Support Vector Machine Specification (regression) +## +## Main Arguments: +## cost = double(1) +## margin = double(1) +## +## Computational engine: kernlab +## +## Model fit template: +## kernlab::ksvm(x = missing_arg(), data = missing_arg(), C = double(1), +## epsilon = double(1), kernel = "vanilladot") +``` + +## Translation from parsnip to the original package (classification) + + +```r +svm_linear( + cost = double(1) +) %>% + set_engine("kernlab") %>% + set_mode("classification") %>% + translate() +``` + +``` +## Linear Support Vector Machine Specification (classification) +## +## Main Arguments: +## cost = double(1) +## +## Computational engine: kernlab +## +## Model fit template: +## kernlab::ksvm(x = missing_arg(), data = missing_arg(), C = double(1), +## kernel = "vanilladot", prob.model = TRUE) +``` + +The `margin` parameter does not apply to classification models. + +Note that the `"kernlab"` engine does not naturally estimate class probabilities. To produce them, the decision values of the model are converted to probabilities using Platt scaling. This method fits an additional model on top of the SVM model. When fitting the Platt scaling model, random numbers are used that are not reproducible or controlled by R's random number stream. + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#svm-linear-kernlab) for `svm_linear()` with the `"kernlab"` engine. + +## References + + - Lin, HT, and R Weng. ["A Note on Platt’s Probabilistic Outputs for Support Vector Machines"](https://www.csie.ntu.edu.tw/~cjlin/papers/plattprob.pdf) + + - Karatzoglou, A, Smola, A, Hornik, K, and A Zeileis. 2004. ["kernlab - An S4 Package for Kernel Methods in R."](https://www.jstatsoft.org/article/view/v011i09), _Journal of Statistical Software_. + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. + diff --git a/man/rmd/svm_poly_kernlab.md b/man/rmd/svm_poly_kernlab.md new file mode 100644 index 000000000..31e87e4be --- /dev/null +++ b/man/rmd/svm_poly_kernlab.md @@ -0,0 +1,106 @@ + + + +For this engine, there are multiple modes: classification and regression + +## Tuning Parameters + + + +This model has 4 tuning parameters: + +- `cost`: Cost (type: double, default: 1.0) + +- `degree`: Degree of Interaction (type: integer, default: 1L1) + +- `scale_factor`: Scale Factor (type: double, default: 1.0) + +- `margin`: Insensitivity Margin (type: double, default: 0.1) + +## Translation from parsnip to the original package (regression) + + +```r +svm_poly( + cost = double(1), + degree = integer(1), + scale_factor = double(1), + margin = double(1) +) %>% + set_engine("kernlab") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Polynomial Support Vector Machine Specification (regression) +## +## Main Arguments: +## cost = double(1) +## degree = integer(1) +## scale_factor = double(1) +## margin = double(1) +## +## Computational engine: kernlab +## +## Model fit template: +## kernlab::ksvm(x = missing_arg(), data = missing_arg(), C = double(1), +## epsilon = double(1), kernel = "polydot", kpar = list(degree = ~integer(1), +## scale = ~double(1))) +``` + +## Translation from parsnip to the original package (classification) + + +```r +svm_poly( + cost = double(1), + degree = integer(1), + scale_factor = double(1) +) %>% + set_engine("kernlab") %>% + set_mode("classification") %>% + translate() +``` + +``` +## Polynomial Support Vector Machine Specification (classification) +## +## Main Arguments: +## cost = double(1) +## degree = integer(1) +## scale_factor = double(1) +## +## Computational engine: kernlab +## +## Model fit template: +## kernlab::ksvm(x = missing_arg(), data = missing_arg(), C = double(1), +## kernel = "polydot", prob.model = TRUE, kpar = list(degree = ~integer(1), +## scale = ~double(1))) +``` + +The `margin` parameter does not apply to classification models. + +Note that the `"kernlab"` engine does not naturally estimate class probabilities. To produce them, the decision values of the model are converted to probabilities using Platt scaling. This method fits an additional model on top of the SVM model. When fitting the Platt scaling model, random numbers are used that are not reproducible or controlled by R's random number stream. + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#svm-poly-kernlab) for `svm_poly()` with the `"kernlab"` engine. + +## References + + - Lin, HT, and R Weng. ["A Note on Platt’s Probabilistic Outputs for Support Vector Machines"](https://www.csie.ntu.edu.tw/~cjlin/papers/plattprob.pdf) + + - Karatzoglou, A, Smola, A, Hornik, K, and A Zeileis. 2004. ["kernlab - An S4 Package for Kernel Methods in R."](https://www.jstatsoft.org/article/view/v011i09), _Journal of Statistical Software_. + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. + diff --git a/man/rmd/svm_rbf_kernlab.md b/man/rmd/svm_rbf_kernlab.md new file mode 100644 index 000000000..4610a0e8d --- /dev/null +++ b/man/rmd/svm_rbf_kernlab.md @@ -0,0 +1,100 @@ + + + +For this engine, there are multiple modes: classification and regression + +## Tuning Parameters + + + +This model has 3 tuning parameters: + +- `cost`: Cost (type: double, default: 1.0) + +- `rbf_sigma`: Radial Basis Function sigma (type: double, default: see below) + +- `margin`: Insensitivity Margin (type: double, default: 0.1) + +There is no default for the radial basis function kernel parameter. kernlab estimates it from the data using a heuristic method. See [kernlab::sigest()]. This method uses random numbers so, without setting the seed before fitting, the model will not be reproducible. + +## Translation from parsnip to the original package (regression) + + +```r +svm_rbf( + cost = double(1), + rbf_sigma = double(1), + margin = double(1) +) %>% + set_engine("kernlab") %>% + set_mode("regression") %>% + translate() +``` + +``` +## Radial Basis Function Support Vector Machine Specification (regression) +## +## Main Arguments: +## cost = double(1) +## rbf_sigma = double(1) +## margin = double(1) +## +## Computational engine: kernlab +## +## Model fit template: +## kernlab::ksvm(x = missing_arg(), data = missing_arg(), C = double(1), +## epsilon = double(1), kernel = "rbfdot", kpar = list(sigma = ~double(1))) +``` + +## Translation from parsnip to the original package (classification) + + +```r +svm_rbf( + cost = double(1), + rbf_sigma = double(1) +) %>% + set_engine("kernlab") %>% + set_mode("classification") %>% + translate() +``` + +``` +## Radial Basis Function Support Vector Machine Specification (classification) +## +## Main Arguments: +## cost = double(1) +## rbf_sigma = double(1) +## +## Computational engine: kernlab +## +## Model fit template: +## kernlab::ksvm(x = missing_arg(), data = missing_arg(), C = double(1), +## kernel = "rbfdot", prob.model = TRUE, kpar = list(sigma = ~double(1))) +``` + +The `margin` parameter does not apply to classification models. + +Note that the `"kernlab"` engine does not naturally estimate class probabilities. To produce them, the decision values of the model are converted to probabilities using Platt scaling. This method fits an additional model on top of the SVM model. When fitting the Platt scaling model, random numbers are used that are not reproducible or controlled by R's random number stream. + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators. + + +Predictors should have the same scale. One way to achieve this is to center and +scale each so that each predictor has mean zero and a variance of one. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#svm-rbf-kernlab) for `svm_rbf()` with the `"kernlab"` engine. + +## References + + - Lin, HT, and R Weng. ["A Note on Platt’s Probabilistic Outputs for Support Vector Machines"](https://www.csie.ntu.edu.tw/~cjlin/papers/plattprob.pdf) + + - Karatzoglou, A, Smola, A, Hornik, K, and A Zeileis. 2004. ["kernlab - An S4 Package for Kernel Methods in R."](https://www.jstatsoft.org/article/view/v011i09), _Journal of Statistical Software_. + + - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer. + From 523599caeaa6b4656f585e2db713abee53cf8722 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 7 Feb 2022 21:09:52 -0500 Subject: [PATCH 64/65] rm randomForest. See tidymodels/extratests/41 --- DESCRIPTION | 1 - .../testthat/test_rand_forest_randomForest.R | 248 ------------------ 2 files changed, 249 deletions(-) delete mode 100644 tests/testthat/test_rand_forest_randomForest.R diff --git a/DESCRIPTION b/DESCRIPTION index d6e2d1279..1f93660b0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -51,7 +51,6 @@ Suggests: mgcv, modeldata, nlme, - randomForest, ranger (>= 0.12.0), remotes, rmarkdown, diff --git a/tests/testthat/test_rand_forest_randomForest.R b/tests/testthat/test_rand_forest_randomForest.R deleted file mode 100644 index 50738e84f..000000000 --- a/tests/testthat/test_rand_forest_randomForest.R +++ /dev/null @@ -1,248 +0,0 @@ -library(testthat) -library(parsnip) -library(tibble) - -# ------------------------------------------------------------------------------ - -context("random forest execution with randomForest") -source(test_path("helper-objects.R")) - -# ------------------------------------------------------------------------------ - -lending_club <- head(lending_club, 200) -num_pred <- c("funded_amnt", "annual_inc", "num_il_tl") - -lc_basic <- rand_forest(mode = "classification") %>% - set_engine("randomForest") -bad_rf_cls <- rand_forest(mode = "classification") %>% - set_engine("randomForest", sampsize = -10) - -# ------------------------------------------------------------------------------ - -test_that('randomForest classification execution', { - - skip_if_not_installed("randomForest") - - # check: passes interactively but not on R CMD check - # expect_error( - # fit( - # lc_basic, - # Class ~ funded_amnt + term, - # data = lending_club, - # control = ctrl - # ), - # regexp = NA - # ) - - expect_error( - fit_xy( - lc_basic, - control = ctrl, - x = lending_club[, num_pred], - y = lending_club$Class - ), - regexp = NA - ) - - expect_error( - fit( - bad_rf_cls, - funded_amnt ~ term, - data = lending_club, - control = ctrl - ) - ) - - # check: passes interactively but not on R CMD check - # randomForest_form_catch <- fit( - # bad_rf_cls, - # funded_amnt ~ term, - # data = lending_club, - # control = caught_ctrl - # ) - # expect_true(inherits(randomForest_form_catch$fit, "try-error")) - - expect_error( - fit_xy( - bad_rf_cls, - x = lending_club[, num_pred], - y = lending_club$total_bal_il, - control = caught_ctrl - ) - ) - -}) - - -test_that('randomForest classification prediction', { - - skip_if_not_installed("randomForest") - - xy_fit <- fit_xy( - lc_basic, - x = lending_club[, num_pred], - y = lending_club$Class, - control = ctrl - ) - - xy_pred <- predict(xy_fit$fit, newdata = lending_club[1:6, num_pred]) - xy_pred <- unname(xy_pred) - expect_equal(xy_pred, predict(xy_fit, new_data = lending_club[1:6, num_pred])$.pred_class) - - form_fit <- fit( - lc_basic, - Class ~ funded_amnt + int_rate, - data = lending_club, - control = ctrl - ) - - form_pred <- predict(form_fit$fit, newdata = lending_club[1:6, c("funded_amnt", "int_rate")]) - form_pred <- unname(form_pred) - expect_equal( - form_pred, - predict(form_fit, new_data = lending_club[1:6, c("funded_amnt", "int_rate")])$.pred_class - ) -}) - -test_that('randomForest classification probabilities', { - - skip_if_not_installed("randomForest") - - xy_fit <- fit_xy( - lc_basic, - x = lending_club[, num_pred], - y = lending_club$Class, - control = ctrl - ) - - xy_pred <- predict(xy_fit$fit, newdata = lending_club[1:6, num_pred], type = "prob") - xy_pred <- as_tibble(as.data.frame(xy_pred)) - names(xy_pred) <- paste0(".pred_", names(xy_pred)) - expect_equal(xy_pred, predict(xy_fit, new_data = lending_club[1:6, num_pred], type = "prob")) - - one_row <- predict(xy_fit, new_data = lending_club[1, num_pred], type = "prob") - expect_equivalent(xy_pred[1,], one_row) - - form_fit <- fit( - lc_basic, - Class ~ funded_amnt + int_rate, - data = lending_club, - control = ctrl - ) - - form_pred <- predict(form_fit$fit, newdata = lending_club[1:6, c("funded_amnt", "int_rate")], type = "prob") - form_pred <- as_tibble(as.data.frame(form_pred)) - names(form_pred) <- paste0(".pred_", names(form_pred)) - expect_equal( - form_pred, - predict(form_fit, new_data = lending_club[1:6, c("funded_amnt", "int_rate")], type = "prob") - ) -}) - - -# ------------------------------------------------------------------------------ - -car_form <- as.formula(mpg ~ .) -num_pred <- names(mtcars)[3:6] - -car_basic <- rand_forest(mode = "regression") %>% set_engine("randomForest") - -bad_ranger_reg <- rand_forest(mode = "regression") %>% - set_engine("randomForest", min.node.size = -10) -bad_rf_reg <- rand_forest(mode = "regression") %>% - set_engine("randomForest", sampsize = -10) - -# ------------------------------------------------------------------------------ - -test_that('randomForest regression execution', { - - skip_if_not_installed("randomForest") - - expect_error( - fit( - car_basic, - car_form, - data = mtcars, - control = ctrl - ), - regexp = NA - ) - - expect_error( - fit_xy( - car_basic, - x = mtcars, - y = mtcars$mpg, - control = ctrl - ), - regexp = NA - ) - - randomForest_form_catch <- fit( - bad_rf_reg, - car_form, - data = mtcars, - control = caught_ctrl - ) - expect_true(inherits(randomForest_form_catch$fit, "try-error")) - - randomForest_xy_catch <- fit_xy( - bad_rf_reg, - x = mtcars, - y = mtcars$mpg, - control = caught_ctrl - ) - expect_true(inherits(randomForest_xy_catch$fit, "try-error")) - -}) - -test_that('randomForest regression prediction', { - - skip_if_not_installed("randomForest") - - xy_fit <- fit_xy( - car_basic, - x = mtcars, - y = mtcars$mpg, - control = ctrl - ) - - xy_pred <- predict(xy_fit$fit, newdata = tail(mtcars)) - xy_pred <- unname(xy_pred) - - expect_equal(xy_pred, predict(xy_fit, new_data = tail(mtcars))$.pred) - -}) - -## ----------------------------------------------------------------------------- - -test_that('argument checks for data dimensions', { - - skip_if_not_installed("randomForest") - - data(penguins, package = "modeldata") - penguins <- na.omit(penguins) - - spec <- - rand_forest(mtry = 1000, min_n = 1000, trees = 5) %>% - set_engine("randomForest") %>% - set_mode("regression") - - expect_warning( - f_fit <- spec %>% fit(body_mass_g ~ ., data = penguins), - "(1000 samples)|(1000 columns)" - ) - - expect_warning( - xy_fit <- spec %>% fit_xy(x = penguins[, -6], y = penguins$body_mass_g), - "(1000 samples)|(1000 columns)" - ) - - expect_equal(f_fit$fit$mtry, 6) - expect_equal(f_fit$fit$call$nodesize, rlang::expr(min_rows(~1000, x))) - expect_equal(xy_fit$fit$mtry, 6) - expect_equal(xy_fit$fit$call$nodesize, rlang::expr(min_rows(~1000, x))) - -}) - - From e54a4397081c4cb9eaac3856580292286f5671bb Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Tue, 8 Feb 2022 07:36:34 -0500 Subject: [PATCH 65/65] temporarily disable windows testing (tensorflow install) --- .github/workflows/R-CMD-check.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 52d7d472d..b69f1fd92 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -23,7 +23,8 @@ jobs: matrix: config: - {os: macOS-latest, r: 'release'} - - {os: windows-latest, r: 'release'} + # disable until tensorflow install is worked out + # - {os: windows-latest, r: 'release'} # Use older ubuntu to maximise backward compatibility - {os: ubuntu-18.04, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-18.04, r: 'release'}