From 37d3299ed4c3763fa09c0541c8707448ca144e45 Mon Sep 17 00:00:00 2001
From: DavisVaughan <davis@rstudio.com>
Date: Fri, 25 Jan 2019 16:02:57 -0500
Subject: [PATCH] varying_args() improvements: - Update documentation for
 varying_args() - For recipes, using the unique step id as the id column - For
 model_specs, using the top level class as the id column - Better return value
 for recipes with no steps (tibble with correct column names)

---
 R/varying.R                   | 138 ++++++++++++++++---------------
 man/varying_args.Rd           |  47 +++++------
 tests/testthat/test_varying.R | 151 +++++++++++++++++++---------------
 3 files changed, 181 insertions(+), 155 deletions(-)

diff --git a/R/varying.R b/R/varying.R
index 7fb37feee..ab95a4267 100644
--- a/R/varying.R
+++ b/R/varying.R
@@ -2,75 +2,81 @@
 #'
 #' [varying()] is used when a parameter will be specified at a later date.
 #' @export
-varying <- function()
+varying <- function() {
   quote(varying())
+}
 
 #' Determine varying arguments
 #'
-#' `varying_args` takes a model specification and lists all of the arguments
-#'  along with whether they are fully specified or not.
-#' @param x An object
-#' @param id A string describing the object `x`.
+#' `varying_args()` takes a model specification or a recipe and returns a tibble
+#' of information on all possible varying arguments and whether or not they
+#' are actually varying.
+#'
+#' The `id` column is determined differently depending on whether a `model_spec`
+#' or a `recipe` is used. For a `model_spec`, the first class is used. For
+#' a `recipe`, the unique step `id` is used.
+#'
+#' @param x A `model_spec` or a `recipe`.
+#'
 #' @param ... Not currently used.
-#' @return A tibble with columns for the parameter name (`name`), whether is
-#'  contains _any_ varying value (`varying`), the `id` for the object, and the
-#'  class that was used to call the method (`type`).
+#'
+#' @return A tibble with columns for the parameter name (`name`), whether it
+#' contains _any_ varying value (`varying`), the `id` for the object, and the
+#' class that was used to call the method (`type`).
+#'
 #' @examples
-#' library(dplyr)
-#' library(rlang)
 #'
-#' rand_forest() %>% varying_args(id = "plain")
+#' # List all possible varying args for the random forest spec
+#' rand_forest() %>% varying_args()
 #'
-#' rand_forest(mtry = varying()) %>% varying_args(id = "one arg")
+#' # mtry is now recognized as varying
+#' rand_forest(mtry = varying()) %>% varying_args()
 #'
+#' # Even engine specific arguments can vary
 #' rand_forest() %>%
 #'   set_engine("ranger", sample.fraction = varying()) %>%
-#'   varying_args(id = "only eng_args")
+#'   varying_args()
 #'
 #' rand_forest() %>%
 #'   set_engine(
-#'     "ranger",
-#'     strata = expr(Class),
-#'      sampsize = c(varying(), varying())
+#'     "randomForest",
+#'     strata = Class,
+#'     sampsize = varying()
 #'   ) %>%
-#'   varying_args(id = "add an expr")
-#'
-#'  rand_forest() %>%
-#'    set_engine("ranger", classwt = c(class1 = 1, class2 = varying())) %>%
-#'    varying_args(id = "list of values")
+#'   varying_args()
 #'
 #' @export
-varying_args <- function (x, id, ...)
+varying_args <- function (x, ...) {
   UseMethod("varying_args")
+}
 
 #' @importFrom purrr map map_lgl
 #' @export
 #' @export varying_args.model_spec
 #' @rdname varying_args
-varying_args.model_spec <- function(x, id = NULL, ...) {
-  cl <- match.call()
+varying_args.model_spec <- function(x, ...) {
 
-  if (!is.null(id) && !is.character(id))
-    stop ("`id` should be a single character string.", call. = FALSE)
-  id <- id[1]
+  # use the model_spec top level class as the id
+  id <- class(x)[1]
+
+  if (length(x$args) == 0L & length(x$eng_args) == 0L) {
+    return(varying_tbl())
+  }
 
-  if (is.null(id))
-    id <- deparse(cl$x)
+  # Locate varying args in spec args and engine specific args
   varying_args <- map_lgl(x$args, find_varying)
   varying_eng_args <- map_lgl(x$eng_args, find_varying)
+
   res <- c(varying_args, varying_eng_args)
-  tibble(
+
+  varying_tbl(
     name = names(res),
     varying = unname(res),
     id = id,
-    type = caller_method(cl)
+    type = "model_spec"
   )
-}
 
-# NOTE Look at the `sampsize` and `classwt` examples above. Using varying() in
-#  a vector will convert it to list. When the model-specific `translate` is
-#  run, we should catch those and convert them back to vectors if the varying
-#  parameter has been replaced with a real value.
+}
 
 # Need to figure out a way to meld the results of varying_args with
 #  parameter objects from `dials` or from novel parameters in the user's
@@ -81,36 +87,29 @@ varying_args.model_spec <- function(x, id = NULL, ...) {
 # Maybe use this data as substrate to make a new object type (param_set?) that
 #  would have its own methods for grids and random sampling.
 
-# lots of code duplication below and probably poor planning; just a prototype.
-# once the generics package is done, these will go into recipes
-
 #' @importFrom purrr map2_dfr map_chr
 #' @export
 #' @export varying_args.recipe
 #' @rdname varying_args
-varying_args.recipe <- function(x, id = NULL, ...) {
-  step_type <- map_chr(x$steps, function(x) class(x)[1])
-  step_type <- make.names(step_type, unique = TRUE) # change with new tibble version
-  res <- map2_dfr(x$steps, step_type, varying_args)
-  res
+varying_args.recipe <- function(x, ...) {
+
+  steps <- x$steps
+
+  if (length(steps) == 0L) {
+    return(varying_tbl())
+  }
+
+  map_dfr(x$steps, varying_args)
 }
 
 #' @importFrom purrr map map_lgl
 #' @export
 #' @export varying_args.step
 #' @rdname varying_args
-varying_args.step <- function(x, id = NULL, ...) {
-  cl <- match.call()
+varying_args.step <- function(x, ...) {
 
-  if (!is.null(id) && !is.character(id)) {
-    stop ("`id` should be a single character string.", call. = FALSE)
-  }
-
-  id <- id[1]
-
-  if (is.null(id)) {
-    id <- deparse(cl$x)
-  }
+  # Unique step id
+  id <- x$id
 
   # Grab the step class before the subset, as that removes the class
   step_type <- class(x)[1]
@@ -127,12 +126,29 @@ varying_args.step <- function(x, id = NULL, ...) {
   # remove the non-varying arguments as they are not important
   res <- res[!(names(x) %in% non_varying_step_arguments)]
 
-  tibble(
+  varying_tbl(
     name = names(res),
     varying = unname(res),
     id = id,
-    type = caller_method(cl)
+    type = "step"
+  )
+
+}
+
+# useful for standardization and for creating a 0 row varying tbl
+# (i.e. for when there are no steps in a recipe)
+varying_tbl <- function(name = character(),
+                        varying = logical(),
+                        id = character(),
+                        type = character()) {
+
+  tibble(
+    name = name,
+    varying = varying,
+    id = id,
+    type = type
   )
+
 }
 
 validate_only_allowed_step_args <- function(x, step_type) {
@@ -214,11 +230,3 @@ find_varying <- function(x) {
 
   return(any_varying_elems)
 }
-
-caller_method <- function(cl) {
-  x <- cl[[1]]
-  x <- deparse(x)
-  x <- gsub("varying_args.", "", x, fixed = TRUE)
-  x
-}
-
diff --git a/man/varying_args.Rd b/man/varying_args.Rd
index ff3c73160..fe765c1e3 100644
--- a/man/varying_args.Rd
+++ b/man/varying_args.Rd
@@ -7,52 +7,53 @@
 \alias{varying_args.step}
 \title{Determine varying arguments}
 \usage{
-varying_args(x, id, ...)
+varying_args(x, ...)
 
-\method{varying_args}{model_spec}(x, id = NULL, ...)
+\method{varying_args}{model_spec}(x, ...)
 
-\method{varying_args}{recipe}(x, id = NULL, ...)
+\method{varying_args}{recipe}(x, ...)
 
-\method{varying_args}{step}(x, id = NULL, ...)
+\method{varying_args}{step}(x, ...)
 }
 \arguments{
-\item{x}{An object}
-
-\item{id}{A string describing the object \code{x}.}
+\item{x}{A \code{model_spec} or a \code{recipe}.}
 
 \item{...}{Not currently used.}
 }
 \value{
-A tibble with columns for the parameter name (\code{name}), whether is
+A tibble with columns for the parameter name (\code{name}), whether it
 contains \emph{any} varying value (\code{varying}), the \code{id} for the object, and the
 class that was used to call the method (\code{type}).
 }
 \description{
-\code{varying_args} takes a model specification and lists all of the arguments
-along with whether they are fully specified or not.
+\code{varying_args()} takes a model specification or a recipe and returns a tibble
+of information on all possible varying arguments and whether or not they
+are actually varying.
+}
+\details{
+The \code{id} column is determined differently depending on whether a \code{model_spec}
+or a \code{recipe} is used. For a \code{model_spec}, the first class is used. For
+a \code{recipe}, the unique step \code{id} is used.
 }
 \examples{
-library(dplyr)
-library(rlang)
 
-rand_forest() \%>\% varying_args(id = "plain")
+# List all possible varying args for the random forest spec
+rand_forest() \%>\% varying_args()
 
-rand_forest(mtry = varying()) \%>\% varying_args(id = "one arg")
+# mtry is now recognized as varying
+rand_forest(mtry = varying()) \%>\% varying_args()
 
+# Even engine specific arguments can vary
 rand_forest() \%>\%
   set_engine("ranger", sample.fraction = varying()) \%>\%
-  varying_args(id = "only eng_args")
+  varying_args()
 
 rand_forest() \%>\%
   set_engine(
-    "ranger",
-    strata = expr(Class),
-     sampsize = c(varying(), varying())
+    "randomForest",
+    strata = Class,
+    sampsize = varying()
   ) \%>\%
-  varying_args(id = "add an expr")
-
- rand_forest() \%>\%
-   set_engine("ranger", classwt = c(class1 = 1, class2 = varying())) \%>\%
-   varying_args(id = "list of values")
+  varying_args()
 
 }
diff --git a/tests/testthat/test_varying.R b/tests/testthat/test_varying.R
index 8c17c8847..2c1779910 100644
--- a/tests/testthat/test_varying.R
+++ b/tests/testthat/test_varying.R
@@ -5,118 +5,135 @@ library(dplyr)
 
 context("varying parameters")
 
-load("recipes_examples.RData")
+load(test_path("recipes_examples.RData"))
 
 test_that('main parsnip arguments', {
 
-  mod_1 <-
-    rand_forest() %>%
-    varying_args(id = "")
-  exp_1 <-
-    tibble(
-      name = c("mtry", "trees", "min_n"),
-      varying = rep(FALSE, 3),
-      id = rep("", 3),
-      type = rep("model_spec", 3)
-    )
+  mod_1 <- rand_forest() %>%
+    varying_args()
+
+  exp_1 <- tibble(
+    name = c("mtry", "trees", "min_n"),
+    varying = rep(FALSE, 3),
+    id = rep("rand_forest", 3),
+    type = rep("model_spec", 3)
+  )
+
   expect_equal(mod_1, exp_1)
 
-  mod_2 <-
-    rand_forest(mtry = varying()) %>%
-    varying_args(id = "")
+  mod_2 <- rand_forest(mtry = varying()) %>%
+    varying_args()
+
   exp_2 <- exp_1
   exp_2$varying[1] <- TRUE
   expect_equal(mod_2, exp_2)
 
-  mod_3 <-
-    rand_forest(mtry = varying(), trees  = varying()) %>%
-    varying_args(id = "wat")
+  mod_3 <- rand_forest(mtry = varying(), trees  = varying()) %>%
+    varying_args()
+
   exp_3 <- exp_2
   exp_3$varying[1:2] <- TRUE
-  exp_3$id <- "wat"
   expect_equal(mod_3, exp_3)
 })
 
 
 test_that('other parsnip arguments', {
 
-  other_1 <-
-    rand_forest() %>%
+  other_1 <- rand_forest() %>%
     set_engine("ranger", sample.fraction = varying()) %>%
-    varying_args(id = "only engine args")
-  exp_1 <-
-    tibble(
-      name = c("mtry", "trees", "min_n", "sample.fraction"),
-      varying = c(rep(FALSE, 3), TRUE),
-      id = rep("only engine args", 4),
-      type = rep("model_spec", 4)
-    )
+    varying_args()
+
+  exp_1 <- tibble(
+    name = c("mtry", "trees", "min_n", "sample.fraction"),
+    varying = c(rep(FALSE, 3), TRUE),
+    id = rep("rand_forest", 4),
+    type = rep("model_spec", 4)
+  )
+
   expect_equal(other_1, exp_1)
 
-  other_2 <-
-    rand_forest(min_n = varying())  %>%
+  other_2 <- rand_forest(min_n = varying())  %>%
     set_engine("ranger", sample.fraction = varying()) %>%
-    varying_args(id = "only engine args")
-  exp_2 <-
-    tibble(
-      name = c("mtry", "trees", "min_n", "sample.fraction"),
-      varying = c(rep(FALSE, 2), rep(TRUE, 2)),
-      id = rep("only engine args", 4),
-      type = rep("model_spec", 4)
-    )
+    varying_args()
+
+  exp_2 <- tibble(
+    name = c("mtry", "trees", "min_n", "sample.fraction"),
+    varying = c(rep(FALSE, 2), rep(TRUE, 2)),
+    id = rep("rand_forest", 4),
+    type = rep("model_spec", 4)
+  )
+
   expect_equal(other_2, exp_2)
 
-  other_3 <-
-    rand_forest()  %>%
+  # We can detect these as varying, but they won't actually
+  # be used in this way
+  other_3 <- rand_forest()  %>%
     set_engine("ranger", strata = Class, sampsize = c(varying(), varying())) %>%
-    varying_args(id = "add an expr")
-  exp_3 <-
-    tibble(
+    varying_args()
+
+  exp_3 <- tibble(
       name = c("mtry", "trees", "min_n", "strata", "sampsize"),
       varying = c(rep(FALSE, 4), TRUE),
-      id = rep("add an expr", 5),
+      id = rep("rand_forest", 5),
       type = rep("model_spec", 5)
     )
+
   expect_equal(other_3, exp_3)
 
-  other_4 <-
-    rand_forest()  %>%
+  other_4 <- rand_forest()  %>%
     set_engine("ranger", strata = Class, sampsize = c(12, varying())) %>%
-    varying_args(id = "num and varying in vec")
-  exp_4 <-
-    tibble(
-      name = c("mtry", "trees", "min_n", "strata", "sampsize"),
-      varying = c(rep(FALSE, 4), TRUE),
-      id = rep("num and varying in vec", 5),
-      type = rep("model_spec", 5)
-    )
+    varying_args()
+
+  exp_4 <- tibble(
+    name = c("mtry", "trees", "min_n", "strata", "sampsize"),
+    varying = c(rep(FALSE, 4), TRUE),
+    id = rep("rand_forest", 5),
+    type = rep("model_spec", 5)
+  )
+
   expect_equal(other_4, exp_4)
 })
 
 
 test_that('recipe parameters', {
 
-  rec_res_1 <- varying_args(rec_1)
-  exp_1 <-
-    tibble(
-      name = c("K", "num", "threshold", "options"),
-      varying = c(TRUE, TRUE, FALSE, FALSE),
-      id = c("step_knnimpute", rep("step_pca", 3)),
-      type = rep("step", 4)
-    )
+  # un-randomify the id names
+  rec_1_id <- rec_1
+  rec_1_id$steps[[1]]$id <- "center_1"
+  rec_1_id$steps[[2]]$id <- "knnimpute_1"
+  rec_1_id$steps[[3]]$id <- "pca_1"
+
+  rec_res_1 <- varying_args(rec_1_id)
+
+  exp_1 <- tibble(
+    name = c("K", "num", "threshold", "options"),
+    varying = c(TRUE, TRUE, FALSE, FALSE),
+    id = c("knnimpute_1", rep("pca_1", 3)),
+    type = rep("step", 4)
+  )
+
   expect_equal(rec_res_1, exp_1)
 
-  rec_res_2 <- varying_args(rec_2)
-  exp_2 <- exp_1
-  expect_equal(rec_res_2, exp_2)
+  # un-randomify the id names
+  rec_3_id <- rec_3
+  rec_3_id$steps[[1]]$id <- "center_1"
+  rec_3_id$steps[[2]]$id <- "knnimpute_1"
+  rec_3_id$steps[[3]]$id <- "pca_1"
 
-  rec_res_3 <- varying_args(rec_3)
+  rec_res_3 <- varying_args(rec_3_id)
   exp_3 <- exp_1
   exp_3$varying <- FALSE
   expect_equal(rec_res_3, exp_3)
 
   rec_res_4 <- varying_args(rec_4)
-  exp_4 <- tibble()
+
+  exp_4 <- tibble(
+    name = character(),
+    varying = logical(),
+    id = character(),
+    type = character()
+  )
+
   expect_equal(rec_res_4, exp_4)
 })