diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 17efced74..b2a69e549 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,7 +1,10 @@ on: push: + branches: + - master pull_request: - types: [opened, synchronize, reopened] + branches: + - master name: R-CMD-check @@ -18,7 +21,7 @@ jobs: - {os: macOS-latest, r: 'release'} - {os: windows-latest, r: 'release'} - {os: windows-latest, r: '3.6'} - - {os: ubuntu-16.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} + - {os: ubuntu-16.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest", http-user-agent: "R/4.0.0 (ubuntu-16.04) R (4.0.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" } - {os: ubuntu-16.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} - {os: ubuntu-16.04, r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} - {os: ubuntu-16.04, r: '3.5', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} @@ -34,6 +37,7 @@ jobs: - uses: r-lib/actions/setup-r@master with: r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} - uses: r-lib/actions/setup-pandoc@master @@ -79,7 +83,7 @@ jobs: run: | reticulate::conda_create('r-reticulate', packages = c('python==3.6.9')) tensorflow::install_tensorflow(version='1.14.0') - shell: Rscript {0} + shell: Rscript {0} - name: Session info run: | diff --git a/NAMESPACE b/NAMESPACE index d238df397..87169f197 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -168,6 +168,7 @@ export(set_model_mode) export(set_new_model) export(set_pred) export(show_call) +export(show_engines) export(show_fit) export(show_model_info) export(surv_reg) diff --git a/NEWS.md b/NEWS.md index 8a5fc3ef0..adb9b6db8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # parsnip (development version) +* `show_engines()` will provide information on the current set for a model. + # parsnip 0.1.3 * A `glance()` method for `model_fit` objects was added (#325) diff --git a/R/boost_tree.R b/R/boost_tree.R index d3f1cc31d..a005dbb64 100644 --- a/R/boost_tree.R +++ b/R/boost_tree.R @@ -62,6 +62,8 @@ #' \item \pkg{Spark}: `"spark"` #' } #' +#' For this model, other packages may add additional engines. Use +#' [show_engines()] to see the current set of engines. #' #' @includeRmd man/rmd/boost-tree.Rmd details #' @@ -81,6 +83,8 @@ #' @importFrom purrr map_lgl #' @seealso [fit()], [set_engine()] #' @examples +#' show_engines("boost_tree") +#' #' boost_tree(mode = "classification", trees = 20) #' # Parameters can be represented by a placeholder: #' boost_tree(mode = "regression", mtry = varying()) diff --git a/R/decision_tree.R b/R/decision_tree.R index 5c518771f..015501971 100644 --- a/R/decision_tree.R +++ b/R/decision_tree.R @@ -62,6 +62,8 @@ #' @importFrom purrr map_lgl #' @seealso [fit()] #' @examples +#' show_engines("decision_tree") +#' #' decision_tree(mode = "classification", tree_depth = 5) #' # Parameters can be represented by a placeholder: #' decision_tree(mode = "regression", cost_complexity = varying()) diff --git a/R/engines.R b/R/engines.R index f1ad8a3ec..20593dc9e 100644 --- a/R/engines.R +++ b/R/engines.R @@ -106,3 +106,24 @@ set_engine <- function(object, engine, ...) { engine = object$engine ) } + +#' Display available engines for a model +#' +#' The possible engines for a model may depend on what packages are loaded. There +#' are some `parsnip`-adjacent packages that only add engines to existing models. +#' @param x The name of a `parsnip` model (e.g., "linear_reg", "mars", etc.) +#' @return A tibble. +#' show_engines("linear_reg") +#' @export +show_engines <- function(x) { + if (!is.character(x) || length(x) > 1) { + rlang::abort("`show_engines()` takes a single character string as input.") + } + res <- try(get_from_env(x), silent = TRUE) + if (inherits(res, "try-error")) { + rlang::abort( + paste0("No results found for model function '", x, "'.") + ) + } + res +} diff --git a/R/linear_reg.R b/R/linear_reg.R index 619a3c761..0839f4132 100644 --- a/R/linear_reg.R +++ b/R/linear_reg.R @@ -42,6 +42,9 @@ #' \item \pkg{keras}: `"keras"` #' } #' +#' For this model, other packages may add additional engines. Use +#' [show_engines()] to see the current set of engines. +#' #' @includeRmd man/rmd/linear-reg.Rmd details #' #' @note For models created using the spark engine, there are @@ -59,6 +62,8 @@ #' #' @seealso [fit()], [set_engine()] #' @examples +#' show_engines("linear_reg") +#' #' linear_reg() #' # Parameters can be represented by a placeholder: #' linear_reg(penalty = varying()) diff --git a/R/logistic_reg.R b/R/logistic_reg.R index 049a19685..c8bf93c6e 100644 --- a/R/logistic_reg.R +++ b/R/logistic_reg.R @@ -40,6 +40,9 @@ #' \item \pkg{keras}: `"keras"` #' } #' +#' For this model, other packages may add additional engines. Use +#' [show_engines()] to see the current set of engines. +#' #' @includeRmd man/rmd/logistic-reg.Rmd details #' #' @note For models created using the spark engine, there are @@ -57,6 +60,8 @@ #' #' @seealso [fit()] #' @examples +#' show_engines("logistic_reg") +#' #' logistic_reg() #' # Parameters can be represented by a placeholder: #' logistic_reg(penalty = varying()) diff --git a/R/mars.R b/R/mars.R index 1e68efb95..276ff82f7 100644 --- a/R/mars.R +++ b/R/mars.R @@ -41,6 +41,8 @@ #' @importFrom purrr map_lgl #' @seealso [fit()] #' @examples +#' show_engines("mars") +#' #' mars(mode = "regression", num_terms = 5) #' @export mars <- diff --git a/R/mlp.R b/R/mlp.R index 422e1fcd3..bee75a322 100644 --- a/R/mlp.R +++ b/R/mlp.R @@ -56,6 +56,8 @@ #' @importFrom purrr map_lgl #' @seealso [fit()] #' @examples +#' show_engines("mlp") +#' #' mlp(mode = "classification", penalty = 0.01) #' # Parameters can be represented by a placeholder: #' mlp(mode = "regression", hidden_units = varying()) diff --git a/R/multinom_reg.R b/R/multinom_reg.R index 48310ca48..1bcd4525b 100644 --- a/R/multinom_reg.R +++ b/R/multinom_reg.R @@ -56,6 +56,8 @@ #' #' @seealso [fit()] #' @examples +#' show_engines("multinom_reg") +#' #' multinom_reg() #' # Parameters can be represented by a placeholder: #' multinom_reg(penalty = varying()) diff --git a/R/nearest_neighbor.R b/R/nearest_neighbor.R index 6988697a6..37b953596 100644 --- a/R/nearest_neighbor.R +++ b/R/nearest_neighbor.R @@ -51,6 +51,8 @@ #' @seealso [fit()] #' #' @examples +#' show_engines("nearest_neighbor") +#' #' nearest_neighbor(neighbors = 11) #' #' @export diff --git a/R/rand_forest.R b/R/rand_forest.R index 98177eae5..1f018ec19 100644 --- a/R/rand_forest.R +++ b/R/rand_forest.R @@ -56,6 +56,8 @@ #' @importFrom purrr map_lgl #' @seealso [fit()] #' @examples +#' show_engines("rand_forest") +#' #' rand_forest(mode = "classification", trees = 2000) #' # Parameters can be represented by a placeholder: #' rand_forest(mode = "regression", mtry = varying()) diff --git a/R/surv_reg.R b/R/surv_reg.R index cced97abf..d775ff399 100644 --- a/R/surv_reg.R +++ b/R/surv_reg.R @@ -45,6 +45,8 @@ #' @references Jackson, C. (2016). `flexsurv`: A Platform for Parametric Survival #' Modeling in R. _Journal of Statistical Software_, 70(8), 1 - 33. #' @examples +#' show_engines("surv_reg") +#' #' surv_reg() #' # Parameters can be represented by a placeholder: #' surv_reg(dist = varying()) diff --git a/R/svm_poly.R b/R/svm_poly.R index 5edc8c631..087af9b79 100644 --- a/R/svm_poly.R +++ b/R/svm_poly.R @@ -41,6 +41,8 @@ #' @importFrom purrr map_lgl #' @seealso [fit()] #' @examples +#' show_engines("svm_poly") +#' #' svm_poly(mode = "classification", degree = 1.2) #' # Parameters can be represented by a placeholder: #' svm_poly(mode = "regression", cost = varying()) diff --git a/R/svm_rbf.R b/R/svm_rbf.R index 52503f72a..e64abee70 100644 --- a/R/svm_rbf.R +++ b/R/svm_rbf.R @@ -42,6 +42,8 @@ #' @importFrom purrr map_lgl #' @seealso [fit()] #' @examples +#' show_engines("svm_rbf") +#' #' svm_rbf(mode = "classification", rbf_sigma = 0.2) #' # Parameters can be represented by a placeholder: #' svm_rbf(mode = "regression", cost = varying()) diff --git a/man/boost_tree.Rd b/man/boost_tree.Rd index af7fbf2ff..d3904ca93 100644 --- a/man/boost_tree.Rd +++ b/man/boost_tree.Rd @@ -117,6 +117,9 @@ following \emph{engines}: \item \pkg{R}: \code{"xgboost"} (the default), \code{"C5.0"} \item \pkg{Spark}: \code{"spark"} } + +For this model, other packages may add additional engines. Use +\code{\link[=show_engines]{show_engines()}} to see the current set of engines. } \note{ For models created using the spark engine, there are @@ -178,6 +181,10 @@ mod_param <- update(sample_size = sample_prop(c(0.4, 0.9))) }\if{html}{\out{}} +For this engine, tuning over \code{trees} is very efficient since the same +model object can be used to make predictions over multiple values of +\code{trees}. + Finally, note that \code{xgboost} models require that non-numeric predictors (e.g., factors) must be converted to dummy variables or some other numeric representation. By default, when using \code{fit()} with \code{xgboost}, a @@ -201,6 +208,10 @@ Note that \code{\link[C50:C5.0]{C50::C5.0()}} does not require factor predictors to be converted to indicator variables. \code{fit()} does not affect the encoding of the predictor values (i.e. factors stay factors) for this model. + +For this engine, tuning over \code{trees} is very efficient since the same +model object can be used to make predictions over multiple values of +\code{trees}. } \subsection{spark}{\if{html}{\out{
}}\preformatted{boost_tree() \%>\% @@ -256,6 +267,8 @@ regression. } \examples{ +show_engines("boost_tree") + boost_tree(mode = "classification", trees = 20) # Parameters can be represented by a placeholder: boost_tree(mode = "regression", mtry = varying()) diff --git a/man/decision_tree.Rd b/man/decision_tree.Rd index f18144657..88c495752 100644 --- a/man/decision_tree.Rd +++ b/man/decision_tree.Rd @@ -189,6 +189,8 @@ parameter.\tabular{llll}{ } \examples{ +show_engines("decision_tree") + decision_tree(mode = "classification", tree_depth = 5) # Parameters can be represented by a placeholder: decision_tree(mode = "regression", cost_complexity = varying()) diff --git a/man/linear_reg.Rd b/man/linear_reg.Rd index a11abb402..5eb262d0f 100644 --- a/man/linear_reg.Rd +++ b/man/linear_reg.Rd @@ -74,6 +74,9 @@ following \emph{engines}: \item \pkg{Spark}: \code{"spark"} \item \pkg{keras}: \code{"keras"} } + +For this model, other packages may add additional engines. Use +\code{\link[=show_engines]{show_engines()}} to see the current set of engines. } \note{ For models created using the spark engine, there are @@ -195,6 +198,8 @@ parameter.\tabular{llll}{ } \examples{ +show_engines("linear_reg") + linear_reg() # Parameters can be represented by a placeholder: linear_reg(penalty = varying()) diff --git a/man/logistic_reg.Rd b/man/logistic_reg.Rd index fcee139bd..4bba6a7ac 100644 --- a/man/logistic_reg.Rd +++ b/man/logistic_reg.Rd @@ -72,6 +72,9 @@ following \emph{engines}: \item \pkg{Spark}: \code{"spark"} \item \pkg{keras}: \code{"keras"} } + +For this model, other packages may add additional engines. Use +\code{\link[=show_engines]{show_engines()}} to see the current set of engines. } \note{ For models created using the spark engine, there are @@ -194,6 +197,8 @@ parameter.\tabular{llll}{ } \examples{ +show_engines("logistic_reg") + logistic_reg() # Parameters can be represented by a placeholder: logistic_reg(penalty = varying()) diff --git a/man/mars.Rd b/man/mars.Rd index e8cb16234..b8534ce28 100644 --- a/man/mars.Rd +++ b/man/mars.Rd @@ -110,6 +110,10 @@ attached. Also, \code{fit()} passes the data directly to \code{earth::earth()} so that its formula method can create dummy variables as-needed. + +For this engine, tuning over \code{num_terms} is very efficient since the +same model object can be used to make predictions over multiple values +of \code{num_terms}. } \subsection{Parameter translations}{ @@ -128,6 +132,8 @@ parameter.\tabular{ll}{ } \examples{ +show_engines("mars") + mars(mode = "regression", num_terms = 5) model <- mars(num_terms = 10, prune_method = "none") model diff --git a/man/mlp.Rd b/man/mlp.Rd index ed6b1087a..6b5e00a01 100644 --- a/man/mlp.Rd +++ b/man/mlp.Rd @@ -176,6 +176,8 @@ parameter.\tabular{lll}{ } \examples{ +show_engines("mlp") + mlp(mode = "classification", penalty = 0.01) # Parameters can be represented by a placeholder: mlp(mode = "regression", hidden_units = varying()) diff --git a/man/multinom_reg.Rd b/man/multinom_reg.Rd index 895a9d51a..954dd3089 100644 --- a/man/multinom_reg.Rd +++ b/man/multinom_reg.Rd @@ -170,6 +170,8 @@ parameter.\tabular{lllll}{ } \examples{ +show_engines("multinom_reg") + multinom_reg() # Parameters can be represented by a placeholder: multinom_reg(penalty = varying()) diff --git a/man/nearest_neighbor.Rd b/man/nearest_neighbor.Rd index d4dd84df1..86513d55a 100644 --- a/man/nearest_neighbor.Rd +++ b/man/nearest_neighbor.Rd @@ -88,6 +88,10 @@ version of \code{train.kknn()} and not \code{kknn()}. It is set up in this way s that \code{parsnip} can utilize the underlying \code{predict.train.kknn} method to predict on new data. This also means that a single value of that function’s \code{kernel} argument (a.k.a \code{weight_func} here) can be supplied + +For this engine, tuning over \code{neighbors} is very efficient since the +same model object can be used to make predictions over multiple values +of \code{neighbors}. } \subsection{Parameter translations}{ @@ -106,6 +110,8 @@ parameter.\tabular{ll}{ } \examples{ +show_engines("nearest_neighbor") + nearest_neighbor(neighbors = 11) } diff --git a/man/rand_forest.Rd b/man/rand_forest.Rd index 03380a213..a42e01664 100644 --- a/man/rand_forest.Rd +++ b/man/rand_forest.Rd @@ -206,6 +206,8 @@ classification and 5 for regression. } \examples{ +show_engines("rand_forest") + rand_forest(mode = "classification", trees = 2000) # Parameters can be represented by a placeholder: rand_forest(mode = "regression", mtry = varying()) diff --git a/man/rmd/boost-tree.Rmd b/man/rmd/boost-tree.Rmd index dae9e5156..b99b8ada5 100644 --- a/man/rmd/boost-tree.Rmd +++ b/man/rmd/boost-tree.Rmd @@ -38,6 +38,9 @@ mod_param <- update(sample_size = sample_prop(c(0.4, 0.9))) ``` +For this engine, tuning over `trees` is very efficient since the same model +object can be used to make predictions over multiple values of `trees`. + Finally, note that `xgboost` models require that non-numeric predictors (e.g., factors) must be converted to dummy variables or some other numeric representation. By default, when using `fit()` with `xgboost`, a one-hot encoding is used to convert factor predictors to indicator variables. @@ -52,6 +55,9 @@ boost_tree() %>% Note that [C50::C5.0()] does not require factor predictors to be converted to indicator variables. `fit()` does not affect the encoding of the predictor values (i.e. factors stay factors) for this model. +For this engine, tuning over `trees` is very efficient since the same model +object can be used to make predictions over multiple values of `trees`. + ## spark ```{r spark-reg} diff --git a/man/rmd/mars.Rmd b/man/rmd/mars.Rmd index d0976f697..88ab12440 100644 --- a/man/rmd/mars.Rmd +++ b/man/rmd/mars.Rmd @@ -26,6 +26,8 @@ Note that, when the model is fit, the `earth` package only has its namespace lo Also, `fit()` passes the data directly to `earth::earth()` so that its formula method can create dummy variables as-needed. +For this engine, tuning over `num_terms` is very efficient since the same model +object can be used to make predictions over multiple values of `num_terms`. ## Parameter translations diff --git a/man/rmd/nearest-neighbor.Rmd b/man/rmd/nearest-neighbor.Rmd index 002b9c8d6..14d0dd352 100644 --- a/man/rmd/nearest-neighbor.Rmd +++ b/man/rmd/nearest-neighbor.Rmd @@ -27,6 +27,9 @@ utilize the underlying `predict.train.kknn` method to predict on new data. This also means that a single value of that function's `kernel` argument (a.k.a `weight_func` here) can be supplied +For this engine, tuning over `neighbors` is very efficient since the same model +object can be used to make predictions over multiple values of `neighbors`. + ## Parameter translations The standardized parameter names in parsnip can be mapped to their original diff --git a/man/show_engines.Rd b/man/show_engines.Rd new file mode 100644 index 000000000..69d0a52db --- /dev/null +++ b/man/show_engines.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/engines.R +\name{show_engines} +\alias{show_engines} +\title{Display available engines for a model} +\usage{ +show_engines(x) +} +\arguments{ +\item{x}{The name of a \code{parsnip} model (e.g., "linear_reg", "mars", etc.)} +} +\value{ +A tibble. +show_engines("linear_reg") +} +\description{ +The possible engines for a model may depend on what packages are loaded. There +are some \code{parsnip}-adjacent packages that only add engines to existing models. +} diff --git a/man/surv_reg.Rd b/man/surv_reg.Rd index f65dcd705..a087ef1c2 100644 --- a/man/surv_reg.Rd +++ b/man/surv_reg.Rd @@ -117,6 +117,8 @@ parameter.\tabular{lll}{ } \examples{ +show_engines("surv_reg") + surv_reg() # Parameters can be represented by a placeholder: surv_reg(dist = varying()) diff --git a/man/svm_poly.Rd b/man/svm_poly.Rd index 147ebdf99..489fb477b 100644 --- a/man/svm_poly.Rd +++ b/man/svm_poly.Rd @@ -125,6 +125,8 @@ parameter.\tabular{ll}{ } \examples{ +show_engines("svm_poly") + svm_poly(mode = "classification", degree = 1.2) # Parameters can be represented by a placeholder: svm_poly(mode = "regression", cost = varying()) diff --git a/man/svm_rbf.Rd b/man/svm_rbf.Rd index 7b93e64ba..53827252d 100644 --- a/man/svm_rbf.Rd +++ b/man/svm_rbf.Rd @@ -156,6 +156,8 @@ parameter.\tabular{lll}{ } \examples{ +show_engines("svm_rbf") + svm_rbf(mode = "classification", rbf_sigma = 0.2) # Parameters can be represented by a placeholder: svm_rbf(mode = "regression", cost = varying()) diff --git a/tests/testthat/test_linear_reg.R b/tests/testthat/test_linear_reg.R index ca304d0ea..312e73b78 100644 --- a/tests/testthat/test_linear_reg.R +++ b/tests/testthat/test_linear_reg.R @@ -371,3 +371,10 @@ test_that('default engine', { expect_true(inherits(fit$fit, "lm")) }) +test_that('show engine', { + res <- show_engines("linear_reg") + expt <- get_from_env("linear_reg") + expect_equal(res, expt) + expect_error(show_engines("linear_re"), "No results found for model function") +}) +