From 452be816b0659a9054fdb1021e5f41571a1661e7 Mon Sep 17 00:00:00 2001 From: topepo Date: Mon, 8 Oct 2018 17:57:38 -0400 Subject: [PATCH 1/2] added pipe-able functions for setting elements --- NAMESPACE | 2 ++ R/arguments.R | 53 ++++++++++++++++++++++++++++ man/set_args.Rd | 37 +++++++++++++++++++ tests/testthat/test_args_and_modes.R | 32 +++++++++++++++++ 4 files changed, 124 insertions(+) create mode 100644 man/set_args.Rd create mode 100644 tests/testthat/test_args_and_modes.R diff --git a/NAMESPACE b/NAMESPACE index f8b57d3f8..2f68b9a64 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -79,6 +79,8 @@ export(predict_predint.model_fit) export(predict_raw) export(predict_raw.model_fit) export(rand_forest) +export(set_args) +export(set_mode) export(show_call) export(surv_reg) export(translate) diff --git a/R/arguments.R b/R/arguments.R index 3e5b99b59..6b14401bb 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -64,3 +64,56 @@ check_others <- function(args, obj, core_args) { } args } + +#' Change elements of a model specification +#' +#' `set_args` can be used to modify the arguments of a model specification while +#' `set_mode` is used to change the model's mode. +#' +#' @param object A model specification. +#' @param ... One or more named model arguments. +#' @param mode A character string for the model type (e.g. "classification" or +#' "regression") +#' @return An updated model object. +#' @details `set_args` will replace existing values of the arguments. +#' +#' @examples +#' rand_forest() +#' +#' rand_forest() %>% +#' set_args(mtry = 3, importance = TRUE) %>% +#' set_mode("regression") +#' +#' @export +set_args <- function(object, ...) { + the_dots <- list(...) + if (length(the_dots) == 0) + stop("Please pass at least one named argument.", call. = FALSE) + main_args <- names(object$args) + new_args <- names(the_dots) + for (i in new_args) { + if (any(main_args == i)) { + object$args[[i]] <- the_dots[[i]] + } else { + object$others[[i]] <- the_dots[[i]] + } + } + object +} + +#' @rdname set_args +#' @export +set_mode <- function(object, mode) { + if (is.null(mode)) + return(object) + mode <- mode[1] + if (!(any(all_modes == mode))) { + stop("`mode` should be one of ", + paste0("'", all_modes, "'", collapse = ", "), + call. = FALSE) + } + object$mode <- mode + object +} + + diff --git a/man/set_args.Rd b/man/set_args.Rd new file mode 100644 index 000000000..0ea2d3656 --- /dev/null +++ b/man/set_args.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/arguments.R +\name{set_args} +\alias{set_args} +\alias{set_mode} +\title{Change elements of a model specification} +\usage{ +set_args(object, ...) + +set_mode(object, mode) +} +\arguments{ +\item{object}{A model specification.} + +\item{...}{One or more named model arguments.} + +\item{mode}{A character string for the model type (e.g. "classification" or +"regression")} +} +\value{ +An updated model object. +} +\description{ +\code{set_args} can be used to modify the arguments of a model specification while +\code{set_mode} is used to change the model's mode. +} +\details{ +\code{set_args} will replace existing values of the arguments. +} +\examples{ +rand_forest() + +rand_forest() \%>\% + set_args(mtry = 3, importance = TRUE) \%>\% + set_mode("regression") + +} diff --git a/tests/testthat/test_args_and_modes.R b/tests/testthat/test_args_and_modes.R new file mode 100644 index 000000000..317663501 --- /dev/null +++ b/tests/testthat/test_args_and_modes.R @@ -0,0 +1,32 @@ +library(testthat) +library(parsnip) +library(dplyr) + +context("changing arguments and engine") + +test_that('pipe arguments', { + mod_1 <- rand_forest() %>% + set_args(mtry = 1, something = "blah") + expect_equal(mod_1$args$mtry, 1) + expect_equal(mod_1$others$something, "blah") + + mod_2 <- rand_forest(mtry = 2, others = list(var = "x")) %>% + set_args(mtry = 1, something = "blah") + expect_equal(mod_2$args$mtry, 1) + expect_equal(mod_2$others$something, "blah") + expect_equal(mod_2$others$var, "x") + + expect_error(rand_forest() %>% set_args()) + +}) + + +test_that('pipe engine', { + mod_1 <- rand_forest() %>% + set_mode("regression") + expect_equal(mod_1$mode, "regression") + + expect_error(rand_forest() %>% set_mode()) + expect_error(rand_forest() %>% set_mode(2)) + expect_error(rand_forest() %>% set_mode("haberdashery")) +}) \ No newline at end of file From 85bb790603d120f12c2fc831581c4412c1d0d51b Mon Sep 17 00:00:00 2001 From: topepo Date: Mon, 8 Oct 2018 19:57:54 -0400 Subject: [PATCH 2/2] closes #77 --- _pkgdown.yml | 2 + docs/articles/articles/Classification.html | 92 +++++----- docs/articles/articles/Regression.html | 31 ++-- docs/articles/articles/Scratch.html | 4 +- docs/articles/parsnip_Intro.html | 4 +- docs/reference/boost_tree.html | 4 +- docs/reference/fit.html | 10 +- docs/reference/fit_control.html | 2 +- docs/reference/index.html | 14 +- docs/reference/linear_reg.html | 4 +- docs/reference/logistic_reg.html | 4 +- docs/reference/mars.html | 4 +- docs/reference/mlp.html | 6 +- docs/reference/multinom_reg.html | 4 +- docs/reference/nearest_neighbor.html | 4 +- docs/reference/predict.model_fit.html | 2 +- docs/reference/rand_forest.html | 4 +- docs/reference/reexports.html | 2 +- docs/reference/set_args.html | 194 +++++++++++++++++++++ docs/reference/surv_reg.html | 6 +- 20 files changed, 298 insertions(+), 99 deletions(-) create mode 100644 docs/reference/set_args.html diff --git a/_pkgdown.yml b/_pkgdown.yml index 9b89b74dc..aaae7fa7a 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -32,6 +32,8 @@ reference: - model_fit - model_spec - predict.model_fit + - set_args + - set_mode - translate - varying - varying_args diff --git a/docs/articles/articles/Classification.html b/docs/articles/articles/Classification.html index 9c0f132ee..f1f1b8a2f 100644 --- a/docs/articles/articles/Classification.html +++ b/docs/articles/articles/Classification.html @@ -88,9 +88,9 @@

Classification Example

To demonstrate parsnip for classification models, the credit data will be used.

+#> ✖ recipes::step() masks stats::step() + +data(credit_data) + +set.seed(7075) +data_split <- initial_split(credit_data, strata = "Status", p = 0.75) + +credit_train <- training(data_split) +credit_test <- testing(data_split)

A single hidden layer neural network will be used to predict a person’s credit status. To do so, the columns of the predictor matrix should be numeric and on a common scale. recipes will be used to do so.

keras will be used to fit a model with 5 hidden units and uses a 10% dropout rate to regularize the model. At each training iteration (aka epoch) a random 20% of the data will be used to measure the cross-entropy of the model.

+ +set.seed(57974) +nnet_fit <- mlp( + epochs = 100, hidden_units = 5, dropout = 0.1, + others = list(verbose = 0, validation_split = .20) +) %>% + parsnip::fit.model_spec(Status ~ ., data = juice(credit_rec), engine = "keras") + +nnet_fit +#> parsnip model object +#> +#> Model +#> ___________________________________________________________________________ +#> Layer (type) Output Shape Param # +#> =========================================================================== +#> dense_1 (Dense) (None, 5) 115 +#> ___________________________________________________________________________ +#> dense_2 (Dense) (None, 5) 30 +#> ___________________________________________________________________________ +#> dropout_1 (Dropout) (None, 5) 0 +#> ___________________________________________________________________________ +#> dense_3 (Dense) (None, 2) 12 +#> =========================================================================== +#> Total params: 157 +#> Trainable params: 157 +#> Non-trainable params: 0 +#> ___________________________________________________________________________

In parsnip, the predict function is only appropriate for numeric outcomes while predict_class and predict_classprob can be used for categorical outcomes.

+#> bad 171 81 +#> good 142 719 +#> ✖ recipes::step() masks stats::step() + +set.seed(4595) +data_split <- initial_split(ames, strata = "Sale_Price", p = 0.75) + +ames_train <- training(data_split) +ames_test <- testing(data_split)

Random Forests

@@ -127,7 +126,7 @@

preds <- c("Longitude", "Latitude", "Lot_Area", "Neighborhood", "Year_Sold")
 
 rf_xy_fit <- rf_defaults %>%
-  fit_xy(
+  fit_xy(
     x = ames_train[, preds],
     y = log10(ames_train$Sale_Price),
     engine = "ranger"
@@ -189,7 +188,7 @@ 

Now, for illustration, let’s use the formula method using some new parameter values:

Suppose that there was some feature in the randomForest package that we’d like to evaluate. To do so, the only part of the syntaxt that needs to change is the engine argument:

rand_forest(mode = "regression", mtry = 3, trees = 1000) %>%
-  fit(
+  fit(
     log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold,
     data = ames_train,
     engine = "randomForest"
@@ -242,7 +241,7 @@ 

Since ranger won’t create indicator values, n_cols would be appropriate for using mtry for a bagging model.

For example, let’s use an expression with the n_cols descriptor to fit a bagging model:

rand_forest(mode = "regression", mtry = expr(n_cols), trees = 1000) %>%
-  fit(
+  fit(
     log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold,
     data = ames_train,
     engine = "ranger"
@@ -284,7 +283,7 @@ 

# Now let's fit the model using the processed version of the data glmn_fit <- linear_reg(penalty = 0.001, mixture = 0.5) %>% - fit( + fit( Sale_Price ~ ., data = juice(norm_recipe), engine = "glmnet" diff --git a/docs/articles/articles/Scratch.html b/docs/articles/articles/Scratch.html index 1a3923af4..f2ecb54b0 100644 --- a/docs/articles/articles/Scratch.html +++ b/docs/articles/articles/Scratch.html @@ -297,7 +297,7 @@

mda_spec <- mixture_da(subclasses = 2) mda_fit <- mda_spec %>% - fit(Species ~ ., data = iris_train, engine = "mda") + fit(Species ~ ., data = iris_train, engine = "mda") mda_fit #> parsnip model object #> @@ -371,7 +371,7 @@

There are some models (e.g. glmnet, plsr, Cubist, etc.) that can make predictions for different models from the same fitted model object. We want to facilitate that here so that, for these cases, the current convention is to return a tibble with the prediction in a column called values and have extra columns for any parameters that define the different sub-models.

For example, if I fit a linear regression model via glmnet and get four values of the regularization parameter (lambda):

linear_reg(others = list(nlambda = 4)) %>%
-  fit(mpg ~ ., data = mtcars, engine = "glmnet") %>%
+  fit(mpg ~ ., data = mtcars, engine = "glmnet") %>%
   predict(new_data = mtcars[1:3, -1])
 #> # A tibble: 12 x 2
 #>    .pred_values .pred_lambda
diff --git a/docs/articles/parsnip_Intro.html b/docs/articles/parsnip_Intro.html
index 39dfb6b1a..bad1de08c 100644
--- a/docs/articles/parsnip_Intro.html
+++ b/docs/articles/parsnip_Intro.html
@@ -237,7 +237,7 @@ 

Fitting the Model

These models can be fit using the fit function. Only the model object is returned.

-
fit(rf_mod, mpg ~ ., data = mtcars, engine = "ranger")
+
fit(rf_mod, mpg ~ ., data = mtcars, engine = "ranger")
## parsnip model object
 ## 
 ## Ranger result
@@ -255,7 +255,7 @@ 

## Splitrule: variance ## OOB prediction error (MSE): 5.71 ## R squared (OOB): 0.843

-
fit(rf_mod, mpg ~ ., data = mtcars, engine = "randomForest")
+
fit(rf_mod, mpg ~ ., data = mtcars, engine = "randomForest")
## parsnip model object
 ## 
 ## Call:
diff --git a/docs/reference/boost_tree.html b/docs/reference/boost_tree.html
index 39457d436..baf060991 100644
--- a/docs/reference/boost_tree.html
+++ b/docs/reference/boost_tree.html
@@ -211,7 +211,7 @@ 

Details

The data given to the function are not saved and are only used to determine the mode of the model. For boost_tree, the possible modes are "regression" and "classification".

-

The model can be created using the fit() function using the +

The model can be created using the fit() function using the following engines:

  • R: "xgboost", "C5.0"

  • Spark: "spark"

  • @@ -265,7 +265,7 @@

    Note

    See also

    - +

    Examples

    diff --git a/docs/reference/fit.html b/docs/reference/fit.html index cac75e19b..6d9009d87 100644 --- a/docs/reference/fit.html +++ b/docs/reference/fit.html @@ -107,13 +107,11 @@

    Fit a Model Specification to a Dataset

# S3 method for model_spec
-fit(object, formula = NULL, data = NULL,
+fit(object, formula = NULL, data = NULL,
   engine = object$engine, control = fit_control(), ...)
 
-fit_xy(object, ...)
-
 # S3 method for model_spec
-fit_xy(object, x = NULL, y = NULL,
+fit_xy(object, x = NULL, y = NULL,
   engine = object$engine, control = fit_control(), ...)

Arguments

@@ -216,13 +214,13 @@

Examp using_formula <- lm_mod %>% - fit(Class ~ funded_amnt + int_rate, + fit(Class ~ funded_amnt + int_rate, data = lending_club, engine = "glm") using_xy <- lm_mod %>% - fit_xy(x = lending_club[, c("funded_amnt", "int_rate")], + fit_xy(x = lending_club[, c("funded_amnt", "int_rate")], y = lending_club$Class, engine = "glm") diff --git a/docs/reference/fit_control.html b/docs/reference/fit_control.html index 63439737f..d62da0239 100644 --- a/docs/reference/fit_control.html +++ b/docs/reference/fit_control.html @@ -100,7 +100,7 @@

Control the fit function

-

Options can be passed to the fit() function that control the output and +

Options can be passed to the fit() function that control the output and computations

diff --git a/docs/reference/index.html b/docs/reference/index.html index 5eb48a464..e21bb0810 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -182,11 +182,17 @@

fit(<model_spec>) fit_xy()

+

fit(<model_spec>) fit_xy(<model_spec>)

Fit a Model Specification to a Dataset

+ +

reexports

+ +

Objects exported from other packages

+ +

fit_control()

@@ -211,6 +217,12 @@

set_args() set_mode()

+ +

Change elements of a model specification

+ +

translate()

diff --git a/docs/reference/linear_reg.html b/docs/reference/linear_reg.html index b3483a5c4..3ce1b64fb 100644 --- a/docs/reference/linear_reg.html +++ b/docs/reference/linear_reg.html @@ -177,7 +177,7 @@

Details

The data given to the function are not saved and are only used to determine the mode of the model. For linear_reg, the mode will always be "regression".

-

The model can be created using the fit() function using the +

The model can be created using the fit() function using the following engines:

  • R: "lm" or "glmnet"

  • Stan: "stan"

  • @@ -239,7 +239,7 @@

    Note

    See also

    - +

    Examples

    diff --git a/docs/reference/logistic_reg.html b/docs/reference/logistic_reg.html index f7a9a9004..c29d25122 100644 --- a/docs/reference/logistic_reg.html +++ b/docs/reference/logistic_reg.html @@ -175,7 +175,7 @@

    Value

    Details

    For logistic_reg, the mode will always be "classification".

    -

    The model can be created using the fit() function using the +

    The model can be created using the fit() function using the following engines:

    • R: "glm" or "glmnet"

    • Stan: "stan"

    • @@ -239,7 +239,7 @@

      Note

      See also

      - +

      Examples

      diff --git a/docs/reference/mars.html b/docs/reference/mars.html index e75fc5746..4dab481f6 100644 --- a/docs/reference/mars.html +++ b/docs/reference/mars.html @@ -182,7 +182,7 @@

      Details

      Main parameter arguments (and those in others) can avoid evaluation until the underlying function is executed by wrapping the argument in rlang::expr().

      -

      The model can be created using the fit() function using the +

      The model can be created using the fit() function using the following engines:

      • R: "earth"

      @@ -206,7 +206,7 @@

      Details

      See also

      - +

      Examples

      diff --git a/docs/reference/mlp.html b/docs/reference/mlp.html index 06e2c9438..33dd68334 100644 --- a/docs/reference/mlp.html +++ b/docs/reference/mlp.html @@ -168,7 +168,7 @@

      Arg others

      A named list of arguments to be used by the underlying models (e.g., nnet::nnet, -keras::fit, keras::compile, etc.). .

      +keras::fit, keras::compile, etc.). .

      ... @@ -202,7 +202,7 @@

      Details nnet::nnet will be set to TRUE when a regression model is created. If parameters need to be modified, update can be used in lieu of recreating the object from scratch.

      -

      The model can be created using the fit() function using the +

      The model can be created using the fit() function using the following engines:

      • R: "nnet"

      • keras: "keras"

      • @@ -237,7 +237,7 @@

        Details

        See also

        - +

        Examples

        diff --git a/docs/reference/multinom_reg.html b/docs/reference/multinom_reg.html index 77c5e03ae..df9b907ba 100644 --- a/docs/reference/multinom_reg.html +++ b/docs/reference/multinom_reg.html @@ -174,7 +174,7 @@

        Value

        Details

        For multinom_reg, the mode will always be "classification".

        -

        The model can be created using the fit() function using the +

        The model can be created using the fit() function using the following engines:

        • R: "glmnet"

        • Stan: "stan"

        • @@ -220,7 +220,7 @@

          Note

          See also

          - +

          Examples

          diff --git a/docs/reference/nearest_neighbor.html b/docs/reference/nearest_neighbor.html index 3a9dba116..2448260ec 100644 --- a/docs/reference/nearest_neighbor.html +++ b/docs/reference/nearest_neighbor.html @@ -165,7 +165,7 @@

          Arg

          Details

          -

          The model can be created using the fit() function using the +

          The model can be created using the fit() function using the following engines:

          • R: "kknn"

          @@ -189,7 +189,7 @@

          Note

          See also

          -

          varying(), fit()

          +

          Examples

          diff --git a/docs/reference/predict.model_fit.html b/docs/reference/predict.model_fit.html index a9a6a4367..23e72f2af 100644 --- a/docs/reference/predict.model_fit.html +++ b/docs/reference/predict.model_fit.html @@ -188,7 +188,7 @@

          Examp lm_model <- linear_reg() %>% - fit(mpg ~ ., data = mtcars %>% slice(11:32), engine = "lm") + fit(mpg ~ ., data = mtcars %>% slice(11:32), engine = "lm") pred_cars <- mtcars %>% diff --git a/docs/reference/rand_forest.html b/docs/reference/rand_forest.html index 4badc480f..70c1465db 100644 --- a/docs/reference/rand_forest.html +++ b/docs/reference/rand_forest.html @@ -177,7 +177,7 @@

          Value

          Details

          -

          The model can be created using the fit() function using the +

          The model can be created using the fit() function using the following engines:

          • R: "ranger" or "randomForest"

          • Spark: "spark"

          • @@ -241,7 +241,7 @@

            Note

            See also

            - +

            Examples

            diff --git a/docs/reference/reexports.html b/docs/reference/reexports.html index 76dba8833..382337451 100644 --- a/docs/reference/reexports.html +++ b/docs/reference/reexports.html @@ -103,7 +103,7 @@

            Objects exported from other packages

            These objects are imported from other packages. Follow the links below to see their documentation.

            -
            generics

            fit

            +
            generics

            fit, fit_xy

            magrittr

            %>%

            diff --git a/docs/reference/set_args.html b/docs/reference/set_args.html new file mode 100644 index 000000000..392fac8bb --- /dev/null +++ b/docs/reference/set_args.html @@ -0,0 +1,194 @@ + + + + + + + + +Change elements of a model specification — set_args • parsnip + + + + + + + + + + + + + + + + + + + + + + + +
            +
            + + + +
            + +
            +
            + + +
            + +

            set_args can be used to modify the arguments of a model specification while +set_mode is used to change the model's mode.

            + +
            + +
            set_args(object, ...)
            +
            +set_mode(object, mode)
            + +

            Arguments

            + + + + + + + + + + + + + + +
            object

            A model specification.

            ...

            One or more named model arguments.

            mode

            A character string for the model type (e.g. "classification" or +"regression")

            + +

            Value

            + +

            An updated model object.

            + +

            Details

            + +

            set_args will replace existing values of the arguments.

            + + +

            Examples

            +
            #> Random Forest Model Specification (unknown) +#>
            +rand_forest() %>% + set_args(mtry = 3, importance = TRUE) %>% + set_mode("regression")
            #> Random Forest Model Specification (regression) +#> +#> Main Arguments: +#> mtry = 3 +#> +#> Engine-Specific Arguments: +#> importance = TRUE +#>
            +
            +
            + +
            + +
            +
            +

            parsnip is a part of the tidyverse, an ecosystem of packages designed with common APIs and a shared philosophy. Learn more at tidyverse.org.

            +
            + +
            +

            Developed by Max Kuhn.

            +

            Site built by pkgdown.

            +
            + + + +
            +
            + + + + + + diff --git a/docs/reference/surv_reg.html b/docs/reference/surv_reg.html index f6c479bc6..c375b04d9 100644 --- a/docs/reference/surv_reg.html +++ b/docs/reference/surv_reg.html @@ -167,12 +167,12 @@

            Details to determine the mode of the model. For surv_reg,the mode will always be "regression".

            Since survival models typically involve censoring (and require the use of -survival::Surv() objects), the fit() function will require that the +survival::Surv() objects), the fit() function will require that the survival model be specified via the formula interface.

            Also, for the flexsurv::flexsurvfit engine, the typical strata function cannot be used. To achieve the same effect, the extra parameter roles can be used (as described above).

            -

            The model can be created using the fit() function using the +

            The model can be created using the fit() function using the following engines:

            • R: "flexsurv"

            @@ -184,7 +184,7 @@

            R

            See also

            - +

            Examples