tidymodels · topepo · Oct 21, 2018 · Oct 10, 2018 · Oct 10, 2018 · Oct 10, 2018
diff --git a/.travis.yml b/.travis.yml
@@ -8,11 +8,22 @@ sudo: true
 warnings_are_errors: false
 
 r:
+- 3.1
+- 3.2
+- oldrel
 - release
 - devel
 
 env:
+  global:
   - KERAS_BACKEND="tensorflow"
+  - MAKEFLAGS="-j 2"
+
+# until we troubleshoot these issues
+matrix:
+  allow_failures:
+    - r: 3.1
+    - r: 3.2
 
 r_binary_packages: 
  - rstan

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: parsnip
-Version: 0.0.0.9003
+Version: 0.0.0.9004
 Title: A Common API to Modeling and analysis Functions
 Description: A common interface is provided to allow users to specify a model without having to remember the different argument names across different functions or computational engines (e.g. R, spark, stan, etc). 
 Authors@R: c(
@@ -25,7 +25,8 @@ Imports:
     glue,
     magrittr,
     stats,
-    tidyr
+    tidyr,
+    globals
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 6.1.0.9000
 Suggests: 

diff --git a/NAMESPACE b/NAMESPACE
@@ -9,13 +9,23 @@ S3method(multi_predict,"_lognet")
 S3method(multi_predict,"_multnet")
 S3method(multi_predict,"_xgb.Booster")
 S3method(multi_predict,default)
+S3method(predict,"_elnet")
+S3method(predict,"_lognet")
 S3method(predict,"_multnet")
 S3method(predict,model_fit)
+S3method(predict_class,"_lognet")
 S3method(predict_class,model_fit)
+S3method(predict_classprob,"_lognet")
+S3method(predict_classprob,"_multnet")
 S3method(predict_classprob,model_fit)
 S3method(predict_confint,model_fit)
+S3method(predict_num,"_elnet")
 S3method(predict_num,model_fit)
 S3method(predict_predint,model_fit)
+S3method(predict_quantile,model_fit)
+S3method(predict_raw,"_elnet")
+S3method(predict_raw,"_lognet")
+S3method(predict_raw,"_multnet")
 S3method(predict_raw,model_fit)
 S3method(print,boost_tree)
 S3method(print,linear_reg)
@@ -49,13 +59,23 @@ S3method(varying_args,model_spec)
 S3method(varying_args,recipe)
 S3method(varying_args,step)
 export("%>%")
+export(.cols)
+export(.dat)
+export(.facts)
+export(.lvls)
+export(.obs)
+export(.preds)
+export(.x)
+export(.y)
+export(C5.0_train)
 export(boost_tree)
 export(check_empty_ellipse)
 export(fit)
 export(fit.model_spec)
 export(fit_control)
 export(fit_xy)
 export(fit_xy.model_spec)
+export(keras_mlp)
 export(linear_reg)
 export(logistic_reg)
 export(make_classes)
@@ -76,6 +96,8 @@ export(predict_num)
 export(predict_num.model_fit)
 export(predict_predint)
 export(predict_predint.model_fit)
+export(predict_quantile)
+export(predict_quantile.model_fit)
 export(predict_raw)
 export(predict_raw.model_fit)
 export(rand_forest)
@@ -89,14 +111,17 @@ export(varying_args)
 export(varying_args.model_spec)
 export(varying_args.recipe)
 export(varying_args.step)
+export(xgb_train)
 import(rlang)
 importFrom(dplyr,arrange)
 importFrom(dplyr,as_tibble)
 importFrom(dplyr,bind_cols)
+importFrom(dplyr,bind_rows)
 importFrom(dplyr,collect)
 importFrom(dplyr,full_join)
 importFrom(dplyr,funs)
 importFrom(dplyr,group_by)
+importFrom(dplyr,mutate)
 importFrom(dplyr,pull)
 importFrom(dplyr,rename)
 importFrom(dplyr,rename_at)
@@ -120,6 +145,7 @@ importFrom(purrr,map_dbl)
 importFrom(purrr,map_df)
 importFrom(purrr,map_dfr)
 importFrom(purrr,map_lgl)
+importFrom(rlang,eval_tidy)
 importFrom(rlang,sym)
 importFrom(rlang,syms)
 importFrom(stats,.checkMFClasses)
@@ -138,6 +164,7 @@ importFrom(stats,predict)
 importFrom(stats,qnorm)
 importFrom(stats,qt)
 importFrom(stats,quantile)
+importFrom(stats,setNames)
 importFrom(stats,terms)
 importFrom(stats,update)
 importFrom(tibble,as_tibble)

diff --git a/NEWS.md b/NEWS.md
@@ -1,7 +1,13 @@
+# parsnip 0.0.0.9004
+
+* Arguments to modeling functions are now captured as quosures. 
+* `others` has been replaced by `...`
+* Data descriptor names have beemn changed and are now functions. The descriptor definitions for "cols" and "preds" have been switched. 
+
 # parsnip 0.0.0.9003
 
 * `regularization` was changed to `penalty` in a few models to be consistent with [this change](tidymodels/model-implementation-principles@08d3afd). 
-* if a mode is not chosen in the model specification, it is assigned at the time of fit. [51](https://github.com/topepo/parsnip/issues/51)
+* If a mode is not chosen in the model specification, it is assigned at the time of fit. [51](https://github.com/topepo/parsnip/issues/51)
 * The underlying modeling packages now are loaded by namespace. There will be some exceptions noted in the documentation for each model. For example, in some `predict` methods, the `earth` package will need to be attached to be fully operational.
 
 # parsnip 0.0.0.9002

diff --git a/R/aaa_spark_helpers.R b/R/aaa_spark_helpers.R
@@ -3,12 +3,10 @@
 #' @importFrom dplyr starts_with rename rename_at vars funs
 format_spark_probs <- function(results, object) {
   results <- dplyr::select(results, starts_with("probability_"))
-  results <- dplyr::rename_at(
-    results,
-    vars(starts_with("probability_")),
-    funs(gsub("probability", "pred", .))
-  )
-  results
+  p <- ncol(results)
+  lvl <- paste0("probability_", 0:(p - 1))
+  names(lvl) <- paste0("pred_", object$fit$.index_labels)
+  results %>% rename(!!!syms(lvl))
 }
 
 format_spark_class <- function(results, object) {

diff --git a/R/arguments.R b/R/arguments.R
@@ -86,7 +86,7 @@ check_others <- function(args, obj, core_args) {
 #'
 #' @export
 set_args <- function(object, ...) {
-  the_dots <- list(...)
+  the_dots <- enquos(...)
   if (length(the_dots) == 0)
     stop("Please pass at least one named argument.", call. = FALSE)
   main_args <- names(object$args)
@@ -116,4 +116,20 @@ set_mode <- function(object, mode) {
   object
 }
 
+# ------------------------------------------------------------------------------
 
+#' @importFrom rlang eval_tidy
+#' @importFrom purrr map
+maybe_eval <- function(x) {
+  # if descriptors are in `x`, eval fails
+  y <- try(rlang::eval_tidy(x), silent = TRUE)
+  if (inherits(y, "try-error"))
+    y <- x
+  y
+}
+
+eval_args <- function(spec, ...) {
+  spec$args   <- purrr::map(spec$args,   maybe_eval)
+  spec$others <- purrr::map(spec$others, maybe_eval)
+  spec
+}
diff --git a/R/boost_tree.R b/R/boost_tree.R
@@ -22,16 +22,14 @@
 #' }
 #' These arguments are converted to their specific names at the
 #'  time that the model is fit. Other options and argument can be
-#'  set using the `others` argument. If left to their defaults
+#'  set using the  `...` slot. If left to their defaults
 #'  here (`NULL`), the values are taken from the underlying model
 #'  functions.  If parameters need to be modified, `update` can be used
 #'  in lieu of recreating the object from scratch.
 #'
 #' @param mode A single character string for the type of model.
 #'  Possible values for this model are "unknown", "regression", or
 #'  "classification".
-#' @param others A named list of arguments to be used by the
-#'  underlying models (e.g., `xgboost::xgb.train`, etc.). .
 #' @param mtry An number for the number (or proportion) of predictors that will
 #'  be randomly sampled at each split when creating the tree models (`xgboost`
 #'  only).
@@ -48,8 +46,11 @@
 #' @param sample_size An number for the number (or proportion) of data that is
 #'  exposed to the fitting routine. For `xgboost`, the sampling is done at at
 #'  each iteration while `C5.0` samples once during traning.
-#' @param ... Used for method consistency. Any arguments passed to
-#'  the ellipses will result in an error. Use `others` instead.
+#' @param ... Other arguments to pass to the specific engine's
+#'  model fit function (see the Engine Details section below). This
+#'  should not include arguments defined by the main parameters to
+#'  this function. For the `update` function, the ellipses can
+#'  contain the primary arguments or any others.
 #' @details
 #' The data given to the function are not saved and are only used
 #'  to determine the _mode_ of the model. For `boost_tree`, the
@@ -62,12 +63,15 @@
 #' \item \pkg{Spark}: `"spark"`
 #' }
 #'
-#' Main parameter arguments (and those in `others`) can avoid
+#' Main parameter arguments (and those in `...`) can avoid
 #'  evaluation until the underlying function is executed by wrapping the
 #'  argument in [rlang::expr()] (e.g. `mtry = expr(floor(sqrt(p)))`).
 #'
+#'
+#' @section Engine Details:
+#'
 #' Engines may have pre-set default arguments when executing the
-#'  model fit call. These can be changed by using the `others`
+#'  model fit call. These can be changed by using the `...`
 #'  argument to pass in the preferred values. For this type of
 #'  model, the template of the fit calls are:
 #'
@@ -114,35 +118,30 @@
 
 boost_tree <-
   function(mode = "unknown",
-           ...,
            mtry = NULL, trees = NULL, min_n = NULL,
            tree_depth = NULL, learn_rate = NULL,
            loss_reduction = NULL,
            sample_size = NULL,
-           others = list()) {
-    check_empty_ellipse(...)
+           ...) {
+
+    others <- enquos(...)
+
+    args <- list(
+      mtry = enquo(mtry),
+      trees = enquo(trees),
+      min_n = enquo(min_n),
+      tree_depth = enquo(tree_depth),
+      learn_rate = enquo(learn_rate),
+      loss_reduction = enquo(loss_reduction),
+      sample_size = enquo(sample_size)
+    )
 
     if (!(mode %in% boost_tree_modes))
       stop("`mode` should be one of: ",
            paste0("'", boost_tree_modes, "'", collapse = ", "),
            call. = FALSE)
 
-    if (is.numeric(trees) && trees < 0)
-      stop("`trees` should be >= 1", call. = FALSE)
-    if (is.numeric(sample_size) && (sample_size < 0 | sample_size > 1))
-      stop("`sample_size` should be within [0,1]", call. = FALSE)
-    if (is.numeric(tree_depth) && tree_depth < 0)
-      stop("`tree_depth` should be >= 1", call. = FALSE)
-    if (is.numeric(min_n) && min_n < 0)
-      stop("`min_n` should be >= 1", call. = FALSE)
-
-    args <- list(
-      mtry = mtry, trees = trees, min_n = min_n, tree_depth = tree_depth,
-      learn_rate = learn_rate, loss_reduction = loss_reduction,
-      sample_size = sample_size
-    )
-
-    no_value <- !vapply(others, is.null, logical(1))
+    no_value <- !vapply(others, null_value, logical(1))
     others <- others[no_value]
 
     out <- list(args = args, others = others,
@@ -184,16 +183,20 @@ update.boost_tree <-
            mtry = NULL, trees = NULL, min_n = NULL,
            tree_depth = NULL, learn_rate = NULL,
            loss_reduction = NULL, sample_size = NULL,
-           others = list(),
            fresh = FALSE,
            ...) {
-    check_empty_ellipse(...)
+
+    others <- enquos(...)
 
     args <- list(
-      mtry = mtry, trees = trees, min_n = min_n, tree_depth = tree_depth,
-      learn_rate = learn_rate, loss_reduction = loss_reduction,
-      sample_size = sample_size
-      )
+      mtry = enquo(mtry),
+      trees = enquo(trees),
+      min_n = enquo(min_n),
+      tree_depth = enquo(tree_depth),
+      learn_rate = enquo(learn_rate),
+      loss_reduction = enquo(loss_reduction),
+      sample_size = enquo(sample_size)
+    )
 
     # TODO make these blocks into a function and document well
     if (fresh) {
@@ -235,9 +238,45 @@ translate.boost_tree <- function(x, engine, ...) {
   x
 }
 
+# ------------------------------------------------------------------------------
+
+check_args.boost_tree <- function(object) {
+
+  args <- lapply(object$args, rlang::eval_tidy)
+
+  if (is.numeric(args$trees) && args$trees < 0)
+    stop("`trees` should be >= 1", call. = FALSE)
+  if (is.numeric(args$sample_size) && (args$sample_size < 0 | args$sample_size > 1))
+    stop("`sample_size` should be within [0,1]", call. = FALSE)
+  if (is.numeric(args$tree_depth) && args$tree_depth < 0)
+    stop("`tree_depth` should be >= 1", call. = FALSE)
+  if (is.numeric(args$min_n) && args$min_n < 0)
+    stop("`min_n` should be >= 1", call. = FALSE)
+
+  invisible(object)
+}
 
 # xgboost helpers --------------------------------------------------------------
 
+#' Boosted trees via xgboost
+#'
+#' `xgb_train` is a wrapper for `xgboost` tree-based models
+#'  where all of the model arguments are in the main function.
+#'
+#' @param x A data frame or matrix of predictors
+#' @param y A vector (factor or numeric) or matrix (numeric) of outcome data.
+#' @param max_depth An integer for the maximum depth of the tree.
+#' @param nrounds An integer for the number of boosting iterations.
+#' @param eta A numeric value between zero and one to control the learning rate.
+#' @param colsample_bytree Subsampling proportion of columns.
+#' @param min_child_weight A numeric value for the minimum sum of instance
+#'  weights needed in a child to continue to split.
+#' @param gamma An number for the minimum loss reduction required to make a
+#'  further partition on a leaf node of the tree
+#' @param subsample Subsampling proportion of rows.
+#' @param ... Other options to pass to `xgb.train`.
+#' @return A fitted `xgboost` object.
+#' @export
 xgb_train <- function(
   x, y,
   max_depth = 6, nrounds = 15, eta  = 0.3, colsample_bytree = 1,
@@ -380,6 +419,31 @@ xgb_by_tree <- function(tree, object, new_data, type, ...) {
 
 # C5.0 helpers -----------------------------------------------------------------
 
+#' Boosted trees via C5.0
+#'
+#' `C5.0_train` is a wrapper for [C50::C5.0()] tree-based models
+#'  where all of the model arguments are in the main function.
+#'
+#' @param x A data frame or matrix of predictors.
+#' @param y A factor vector with 2 or more levels
+#' @param trials An integer specifying the number of boosting
+#'  iterations. A value of one indicates that a single model is
+#'  used.
+#' @param weights An optional numeric vector of case weights. Note
+#'  that the data used for the case weights will not be used as a
+#'  splitting variable in the model (see
+#'  \url{http://www.rulequest.com/see5-win.html#CASEWEIGHT} for
+#'  Quinlan's notes on case weights).
+#' @param minCases An integer for the smallest number of samples
+#'  that must be put in at least two of the splits.
+#' @param sample A value between (0, .999) that specifies the
+#'  random proportion of the data should be used to train the model.
+#'  By default, all the samples are used for model training. Samples
+#'  not used for training are used to evaluate the accuracy of the
+#'  model in the printed output.
+#' @param ... Other arguments to pass.
+#' @return A fitted C5.0 model.
+#' @export
 C5.0_train <-
   function(x, y, weights = NULL, trials = 15, minCases = 2, sample = 0, ...) {
     other_args <- list(...)