From 15e3508b2661f6b35d81f0ad0c3fa1d06269e367 Mon Sep 17 00:00:00 2001 From: Ryan Peterson Date: Wed, 17 Jul 2024 16:44:36 -0600 Subject: [PATCH] Update poly default --- NEWS.md | 4 ++++ R/sparseR.R | 28 +++++++++++++++----------- man/sparseR.Rd | 28 ++++++++++++++++---------- tests/testthat/test_plotting_sparseR.R | 6 +++--- tests/testthat/test_sparseR_methods.R | 4 ++-- 5 files changed, 42 insertions(+), 28 deletions(-) diff --git a/NEWS.md b/NEWS.md index ad3eb53..e23db96 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # 0.3.0.9000 +- Updated default `poly` to 2 so that limited non-linearities get discovered by default, + as advertised (thank you to the anonymous reviewer who noticed this) +- Fixed formatting issue in documentation for `sparseR` + # 0.3.0 - Fixed bug in the `effect_plot` function that was causing issues with diff --git a/R/sparseR.R b/R/sparseR.R index a70f8a6..9c0f2bd 100644 --- a/R/sparseR.R +++ b/R/sparseR.R @@ -9,8 +9,9 @@ #' @param ncvgamma The tuning parameter for ncvreg (for MCP or SCAD) #' @param lambda.min The minimum value to be used for lambda (as ratio of max, #' see ?ncvreg) -#' @param k The maximum order of interactions to consider -#' @param poly The maximum order of polynomials to consider +#' @param k The maximum order of interactions to consider (default: 1; all +#' pairwise) +#' @param poly The maximum order of polynomials to consider (default: 2) #' @param gamma The degree of extremity of sparsity rankings (see details) #' @param cumulative_k Should penalties be increased cumulatively as order #' interaction increases? @@ -51,14 +52,17 @@ #' factors with low prevalence be combined?) - none (should no preprocessing be #' done? can also specify a null object) #' -#' The options that can be passed to `extra_opts` are: - centers (named numeric -#' vector which denotes where each covariate should be centered) - center_fn -#' (alternatively, a function can be specified to calculate center such as `min` -#' or `median`) - freq_cut, unique_cut (see ?step_nzv - these get used by the -#' filtering steps) - neighbors (the number of neighbors for knnImpute) - -#' one_hot (see ?step_dummy), this defaults to cell-means coding which can be -#' done in regularized regression (change at your own risk) - raw (should -#' polynomials not be orthogonal? defaults to true because variables are +#' The options that can be passed to `extra_opts` are: +#' - centers (named numeric vector which denotes where each covariate should be +#' centered) +#' - center_fn (alternatively, a function can be specified to calculate center +#' such as `min` +#' or `median`) +#' - freq_cut, unique_cut (see ?step_nzv; these get used by the filtering steps) +#' - neighbors (the number of neighbors for knnImpute) +#' - one_hot (see ?step_dummy), this defaults to cell-means coding which can be +#' done in regularized regression (change at your own risk) +#' - raw (should polynomials not be orthogonal? defaults to true because variables are #' centered and scaled already by this point by default) #' #' \code{ia_formula} will by default interact all variables with each other up @@ -96,7 +100,7 @@ sparseR <- function(formula, data, family = c("gaussian", "binomial", "poisson", "coxph"), penalty = c("lasso", "MCP", "SCAD"), alpha = 1, ncvgamma = 3, lambda.min = .005, - k = 1, poly = 1, gamma = .5, cumulative_k = FALSE, + k = 1, poly = 2, gamma = .5, cumulative_k = FALSE, cumulative_poly = TRUE, pool = FALSE, ia_formula = NULL, pre_process = TRUE, model_matrix = NULL, y = NULL, @@ -217,7 +221,7 @@ sparseR <- function(formula, data, family = c("gaussian", "binomial", "poisson", ungroup() %>% mutate(Vartype = as.character(.data$Vartype)) - info <- list(k = k, poly = k, cumulative_k = cumulative_k, + info <- list(k = k, poly = poly, cumulative_k = cumulative_k, cumulative_poly = cumulative_poly, pool = pool, pre_process = pre_process, model_matrix = model_matrix, y = y, poly_prefix = poly_prefix, int_sep = int_sep) diff --git a/man/sparseR.Rd b/man/sparseR.Rd index 114bd05..779bfc0 100644 --- a/man/sparseR.Rd +++ b/man/sparseR.Rd @@ -13,7 +13,7 @@ sparseR( ncvgamma = 3, lambda.min = 0.005, k = 1, - poly = 1, + poly = 2, gamma = 0.5, cumulative_k = FALSE, cumulative_poly = TRUE, @@ -47,9 +47,10 @@ penalty)} \item{lambda.min}{The minimum value to be used for lambda (as ratio of max, see ?ncvreg)} -\item{k}{The maximum order of interactions to consider} +\item{k}{The maximum order of interactions to consider (default: 1; all +pairwise)} -\item{poly}{The maximum order of polynomials to consider} +\item{poly}{The maximum order of polynomials to consider (default: 2)} \item{gamma}{The degree of extremity of sparsity rankings (see details)} @@ -120,15 +121,20 @@ missing data be imputed?) - scale (should data be standardized)? - center factors with low prevalence be combined?) - none (should no preprocessing be done? can also specify a null object) -The options that can be passed to \code{extra_opts} are: - centers (named numeric -vector which denotes where each covariate should be centered) - center_fn -(alternatively, a function can be specified to calculate center such as \code{min} -or \code{median}) - freq_cut, unique_cut (see ?step_nzv - these get used by the -filtering steps) - neighbors (the number of neighbors for knnImpute) - -one_hot (see ?step_dummy), this defaults to cell-means coding which can be -done in regularized regression (change at your own risk) - raw (should -polynomials not be orthogonal? defaults to true because variables are +The options that can be passed to \code{extra_opts} are: +\itemize{ +\item centers (named numeric vector which denotes where each covariate should be +centered) +\item center_fn (alternatively, a function can be specified to calculate center +such as \code{min} +or \code{median}) +\item freq_cut, unique_cut (see ?step_nzv; these get used by the filtering steps) +\item neighbors (the number of neighbors for knnImpute) +\item one_hot (see ?step_dummy), this defaults to cell-means coding which can be +done in regularized regression (change at your own risk) +\item raw (should polynomials not be orthogonal? defaults to true because variables are centered and scaled already by this point by default) +} \code{ia_formula} will by default interact all variables with each other up to order k. If specified, ia_formula will be passed as the \code{terms} argument diff --git a/tests/testthat/test_plotting_sparseR.R b/tests/testthat/test_plotting_sparseR.R index 4eb6e46..9ef370e 100644 --- a/tests/testthat/test_plotting_sparseR.R +++ b/tests/testthat/test_plotting_sparseR.R @@ -17,9 +17,9 @@ test_that("Plotting sparseR runs without error", { test_that("Effect plots for sparseR run without error", { expect_warning(effect_plot(obj1, coef_name = "Species")) - expect_warning(effect_plot(obj1, "Petal.Width", by = "Species")) + expect_silent(effect_plot(obj1, coef_name = "Petal.Width", by = "Species")) expect_warning(effect_plot(obj1, coef_name = "Petal.Width")) - expect_warning(effect_plot(obj1, "Petal.Length", by = "Sepal.Length")) + expect_silent(effect_plot(obj1, coef_name = "Petal.Length", by = "Sepal.Length")) expect_silent(effect_plot(obj2, coef_name = "Petal.Width")) expect_silent(effect_plot(obj2, coef_name = "Species")) @@ -27,7 +27,7 @@ test_that("Effect plots for sparseR run without error", { expect_warning(effect_plot(obj3, coef_name = "Petal.Width")) expect_warning(effect_plot(obj3, coef_name = "Species")) - expect_warning(effect_plot(obj3, "Petal.Width", by = "Species")) + expect_silent(effect_plot(obj3, "Petal.Width", by = "Species")) expect_error(effect_plot(obj2, "Species", by = "Petal.Width")) }) diff --git a/tests/testthat/test_sparseR_methods.R b/tests/testthat/test_sparseR_methods.R index 300c967..d8370db 100644 --- a/tests/testthat/test_sparseR_methods.R +++ b/tests/testthat/test_sparseR_methods.R @@ -78,10 +78,10 @@ test_that("coef method works", { obj2 <- sparseR(Sepal.Width ~ ., data = iris) b <- coef(obj2, at = "cvmin") sum(b != 0) - }, 18) + }, 16) expect_equal({ sum(coef(obj2, at = "cv1se") != 0) - }, 11) + }, 6) })