From 37d3299ed4c3763fa09c0541c8707448ca144e45 Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Fri, 25 Jan 2019 16:02:57 -0500 Subject: [PATCH] varying_args() improvements: - Update documentation for varying_args() - For recipes, using the unique step id as the id column - For model_specs, using the top level class as the id column - Better return value for recipes with no steps (tibble with correct column names) --- R/varying.R | 138 ++++++++++++++++--------------- man/varying_args.Rd | 47 +++++------ tests/testthat/test_varying.R | 151 +++++++++++++++++++--------------- 3 files changed, 181 insertions(+), 155 deletions(-) diff --git a/R/varying.R b/R/varying.R index 7fb37feee..ab95a4267 100644 --- a/R/varying.R +++ b/R/varying.R @@ -2,75 +2,81 @@ #' #' [varying()] is used when a parameter will be specified at a later date. #' @export -varying <- function() +varying <- function() { quote(varying()) +} #' Determine varying arguments #' -#' `varying_args` takes a model specification and lists all of the arguments -#' along with whether they are fully specified or not. -#' @param x An object -#' @param id A string describing the object `x`. +#' `varying_args()` takes a model specification or a recipe and returns a tibble +#' of information on all possible varying arguments and whether or not they +#' are actually varying. +#' +#' The `id` column is determined differently depending on whether a `model_spec` +#' or a `recipe` is used. For a `model_spec`, the first class is used. For +#' a `recipe`, the unique step `id` is used. +#' +#' @param x A `model_spec` or a `recipe`. +#' #' @param ... Not currently used. -#' @return A tibble with columns for the parameter name (`name`), whether is -#' contains _any_ varying value (`varying`), the `id` for the object, and the -#' class that was used to call the method (`type`). +#' +#' @return A tibble with columns for the parameter name (`name`), whether it +#' contains _any_ varying value (`varying`), the `id` for the object, and the +#' class that was used to call the method (`type`). +#' #' @examples -#' library(dplyr) -#' library(rlang) #' -#' rand_forest() %>% varying_args(id = "plain") +#' # List all possible varying args for the random forest spec +#' rand_forest() %>% varying_args() #' -#' rand_forest(mtry = varying()) %>% varying_args(id = "one arg") +#' # mtry is now recognized as varying +#' rand_forest(mtry = varying()) %>% varying_args() #' +#' # Even engine specific arguments can vary #' rand_forest() %>% #' set_engine("ranger", sample.fraction = varying()) %>% -#' varying_args(id = "only eng_args") +#' varying_args() #' #' rand_forest() %>% #' set_engine( -#' "ranger", -#' strata = expr(Class), -#' sampsize = c(varying(), varying()) +#' "randomForest", +#' strata = Class, +#' sampsize = varying() #' ) %>% -#' varying_args(id = "add an expr") -#' -#' rand_forest() %>% -#' set_engine("ranger", classwt = c(class1 = 1, class2 = varying())) %>% -#' varying_args(id = "list of values") +#' varying_args() #' #' @export -varying_args <- function (x, id, ...) +varying_args <- function (x, ...) { UseMethod("varying_args") +} #' @importFrom purrr map map_lgl #' @export #' @export varying_args.model_spec #' @rdname varying_args -varying_args.model_spec <- function(x, id = NULL, ...) { - cl <- match.call() +varying_args.model_spec <- function(x, ...) { - if (!is.null(id) && !is.character(id)) - stop ("`id` should be a single character string.", call. = FALSE) - id <- id[1] + # use the model_spec top level class as the id + id <- class(x)[1] + + if (length(x$args) == 0L & length(x$eng_args) == 0L) { + return(varying_tbl()) + } - if (is.null(id)) - id <- deparse(cl$x) + # Locate varying args in spec args and engine specific args varying_args <- map_lgl(x$args, find_varying) varying_eng_args <- map_lgl(x$eng_args, find_varying) + res <- c(varying_args, varying_eng_args) - tibble( + + varying_tbl( name = names(res), varying = unname(res), id = id, - type = caller_method(cl) + type = "model_spec" ) -} -# NOTE Look at the `sampsize` and `classwt` examples above. Using varying() in -# a vector will convert it to list. When the model-specific `translate` is -# run, we should catch those and convert them back to vectors if the varying -# parameter has been replaced with a real value. +} # Need to figure out a way to meld the results of varying_args with # parameter objects from `dials` or from novel parameters in the user's @@ -81,36 +87,29 @@ varying_args.model_spec <- function(x, id = NULL, ...) { # Maybe use this data as substrate to make a new object type (param_set?) that # would have its own methods for grids and random sampling. -# lots of code duplication below and probably poor planning; just a prototype. -# once the generics package is done, these will go into recipes - #' @importFrom purrr map2_dfr map_chr #' @export #' @export varying_args.recipe #' @rdname varying_args -varying_args.recipe <- function(x, id = NULL, ...) { - step_type <- map_chr(x$steps, function(x) class(x)[1]) - step_type <- make.names(step_type, unique = TRUE) # change with new tibble version - res <- map2_dfr(x$steps, step_type, varying_args) - res +varying_args.recipe <- function(x, ...) { + + steps <- x$steps + + if (length(steps) == 0L) { + return(varying_tbl()) + } + + map_dfr(x$steps, varying_args) } #' @importFrom purrr map map_lgl #' @export #' @export varying_args.step #' @rdname varying_args -varying_args.step <- function(x, id = NULL, ...) { - cl <- match.call() +varying_args.step <- function(x, ...) { - if (!is.null(id) && !is.character(id)) { - stop ("`id` should be a single character string.", call. = FALSE) - } - - id <- id[1] - - if (is.null(id)) { - id <- deparse(cl$x) - } + # Unique step id + id <- x$id # Grab the step class before the subset, as that removes the class step_type <- class(x)[1] @@ -127,12 +126,29 @@ varying_args.step <- function(x, id = NULL, ...) { # remove the non-varying arguments as they are not important res <- res[!(names(x) %in% non_varying_step_arguments)] - tibble( + varying_tbl( name = names(res), varying = unname(res), id = id, - type = caller_method(cl) + type = "step" + ) + +} + +# useful for standardization and for creating a 0 row varying tbl +# (i.e. for when there are no steps in a recipe) +varying_tbl <- function(name = character(), + varying = logical(), + id = character(), + type = character()) { + + tibble( + name = name, + varying = varying, + id = id, + type = type ) + } validate_only_allowed_step_args <- function(x, step_type) { @@ -214,11 +230,3 @@ find_varying <- function(x) { return(any_varying_elems) } - -caller_method <- function(cl) { - x <- cl[[1]] - x <- deparse(x) - x <- gsub("varying_args.", "", x, fixed = TRUE) - x -} - diff --git a/man/varying_args.Rd b/man/varying_args.Rd index ff3c73160..fe765c1e3 100644 --- a/man/varying_args.Rd +++ b/man/varying_args.Rd @@ -7,52 +7,53 @@ \alias{varying_args.step} \title{Determine varying arguments} \usage{ -varying_args(x, id, ...) +varying_args(x, ...) -\method{varying_args}{model_spec}(x, id = NULL, ...) +\method{varying_args}{model_spec}(x, ...) -\method{varying_args}{recipe}(x, id = NULL, ...) +\method{varying_args}{recipe}(x, ...) -\method{varying_args}{step}(x, id = NULL, ...) +\method{varying_args}{step}(x, ...) } \arguments{ -\item{x}{An object} - -\item{id}{A string describing the object \code{x}.} +\item{x}{A \code{model_spec} or a \code{recipe}.} \item{...}{Not currently used.} } \value{ -A tibble with columns for the parameter name (\code{name}), whether is +A tibble with columns for the parameter name (\code{name}), whether it contains \emph{any} varying value (\code{varying}), the \code{id} for the object, and the class that was used to call the method (\code{type}). } \description{ -\code{varying_args} takes a model specification and lists all of the arguments -along with whether they are fully specified or not. +\code{varying_args()} takes a model specification or a recipe and returns a tibble +of information on all possible varying arguments and whether or not they +are actually varying. +} +\details{ +The \code{id} column is determined differently depending on whether a \code{model_spec} +or a \code{recipe} is used. For a \code{model_spec}, the first class is used. For +a \code{recipe}, the unique step \code{id} is used. } \examples{ -library(dplyr) -library(rlang) -rand_forest() \%>\% varying_args(id = "plain") +# List all possible varying args for the random forest spec +rand_forest() \%>\% varying_args() -rand_forest(mtry = varying()) \%>\% varying_args(id = "one arg") +# mtry is now recognized as varying +rand_forest(mtry = varying()) \%>\% varying_args() +# Even engine specific arguments can vary rand_forest() \%>\% set_engine("ranger", sample.fraction = varying()) \%>\% - varying_args(id = "only eng_args") + varying_args() rand_forest() \%>\% set_engine( - "ranger", - strata = expr(Class), - sampsize = c(varying(), varying()) + "randomForest", + strata = Class, + sampsize = varying() ) \%>\% - varying_args(id = "add an expr") - - rand_forest() \%>\% - set_engine("ranger", classwt = c(class1 = 1, class2 = varying())) \%>\% - varying_args(id = "list of values") + varying_args() } diff --git a/tests/testthat/test_varying.R b/tests/testthat/test_varying.R index 8c17c8847..2c1779910 100644 --- a/tests/testthat/test_varying.R +++ b/tests/testthat/test_varying.R @@ -5,118 +5,135 @@ library(dplyr) context("varying parameters") -load("recipes_examples.RData") +load(test_path("recipes_examples.RData")) test_that('main parsnip arguments', { - mod_1 <- - rand_forest() %>% - varying_args(id = "") - exp_1 <- - tibble( - name = c("mtry", "trees", "min_n"), - varying = rep(FALSE, 3), - id = rep("", 3), - type = rep("model_spec", 3) - ) + mod_1 <- rand_forest() %>% + varying_args() + + exp_1 <- tibble( + name = c("mtry", "trees", "min_n"), + varying = rep(FALSE, 3), + id = rep("rand_forest", 3), + type = rep("model_spec", 3) + ) + expect_equal(mod_1, exp_1) - mod_2 <- - rand_forest(mtry = varying()) %>% - varying_args(id = "") + mod_2 <- rand_forest(mtry = varying()) %>% + varying_args() + exp_2 <- exp_1 exp_2$varying[1] <- TRUE expect_equal(mod_2, exp_2) - mod_3 <- - rand_forest(mtry = varying(), trees = varying()) %>% - varying_args(id = "wat") + mod_3 <- rand_forest(mtry = varying(), trees = varying()) %>% + varying_args() + exp_3 <- exp_2 exp_3$varying[1:2] <- TRUE - exp_3$id <- "wat" expect_equal(mod_3, exp_3) }) test_that('other parsnip arguments', { - other_1 <- - rand_forest() %>% + other_1 <- rand_forest() %>% set_engine("ranger", sample.fraction = varying()) %>% - varying_args(id = "only engine args") - exp_1 <- - tibble( - name = c("mtry", "trees", "min_n", "sample.fraction"), - varying = c(rep(FALSE, 3), TRUE), - id = rep("only engine args", 4), - type = rep("model_spec", 4) - ) + varying_args() + + exp_1 <- tibble( + name = c("mtry", "trees", "min_n", "sample.fraction"), + varying = c(rep(FALSE, 3), TRUE), + id = rep("rand_forest", 4), + type = rep("model_spec", 4) + ) + expect_equal(other_1, exp_1) - other_2 <- - rand_forest(min_n = varying()) %>% + other_2 <- rand_forest(min_n = varying()) %>% set_engine("ranger", sample.fraction = varying()) %>% - varying_args(id = "only engine args") - exp_2 <- - tibble( - name = c("mtry", "trees", "min_n", "sample.fraction"), - varying = c(rep(FALSE, 2), rep(TRUE, 2)), - id = rep("only engine args", 4), - type = rep("model_spec", 4) - ) + varying_args() + + exp_2 <- tibble( + name = c("mtry", "trees", "min_n", "sample.fraction"), + varying = c(rep(FALSE, 2), rep(TRUE, 2)), + id = rep("rand_forest", 4), + type = rep("model_spec", 4) + ) + expect_equal(other_2, exp_2) - other_3 <- - rand_forest() %>% + # We can detect these as varying, but they won't actually + # be used in this way + other_3 <- rand_forest() %>% set_engine("ranger", strata = Class, sampsize = c(varying(), varying())) %>% - varying_args(id = "add an expr") - exp_3 <- - tibble( + varying_args() + + exp_3 <- tibble( name = c("mtry", "trees", "min_n", "strata", "sampsize"), varying = c(rep(FALSE, 4), TRUE), - id = rep("add an expr", 5), + id = rep("rand_forest", 5), type = rep("model_spec", 5) ) + expect_equal(other_3, exp_3) - other_4 <- - rand_forest() %>% + other_4 <- rand_forest() %>% set_engine("ranger", strata = Class, sampsize = c(12, varying())) %>% - varying_args(id = "num and varying in vec") - exp_4 <- - tibble( - name = c("mtry", "trees", "min_n", "strata", "sampsize"), - varying = c(rep(FALSE, 4), TRUE), - id = rep("num and varying in vec", 5), - type = rep("model_spec", 5) - ) + varying_args() + + exp_4 <- tibble( + name = c("mtry", "trees", "min_n", "strata", "sampsize"), + varying = c(rep(FALSE, 4), TRUE), + id = rep("rand_forest", 5), + type = rep("model_spec", 5) + ) + expect_equal(other_4, exp_4) }) test_that('recipe parameters', { - rec_res_1 <- varying_args(rec_1) - exp_1 <- - tibble( - name = c("K", "num", "threshold", "options"), - varying = c(TRUE, TRUE, FALSE, FALSE), - id = c("step_knnimpute", rep("step_pca", 3)), - type = rep("step", 4) - ) + # un-randomify the id names + rec_1_id <- rec_1 + rec_1_id$steps[[1]]$id <- "center_1" + rec_1_id$steps[[2]]$id <- "knnimpute_1" + rec_1_id$steps[[3]]$id <- "pca_1" + + rec_res_1 <- varying_args(rec_1_id) + + exp_1 <- tibble( + name = c("K", "num", "threshold", "options"), + varying = c(TRUE, TRUE, FALSE, FALSE), + id = c("knnimpute_1", rep("pca_1", 3)), + type = rep("step", 4) + ) + expect_equal(rec_res_1, exp_1) - rec_res_2 <- varying_args(rec_2) - exp_2 <- exp_1 - expect_equal(rec_res_2, exp_2) + # un-randomify the id names + rec_3_id <- rec_3 + rec_3_id$steps[[1]]$id <- "center_1" + rec_3_id$steps[[2]]$id <- "knnimpute_1" + rec_3_id$steps[[3]]$id <- "pca_1" - rec_res_3 <- varying_args(rec_3) + rec_res_3 <- varying_args(rec_3_id) exp_3 <- exp_1 exp_3$varying <- FALSE expect_equal(rec_res_3, exp_3) rec_res_4 <- varying_args(rec_4) - exp_4 <- tibble() + + exp_4 <- tibble( + name = character(), + varying = logical(), + id = character(), + type = character() + ) + expect_equal(rec_res_4, exp_4) })