From 15e3508b2661f6b35d81f0ad0c3fa1d06269e367 Mon Sep 17 00:00:00 2001
From: Ryan Peterson <rp1992@gmail.com>
Date: Wed, 17 Jul 2024 16:44:36 -0600
Subject: [PATCH] Update poly default

---
 NEWS.md                                |  4 ++++
 R/sparseR.R                            | 28 +++++++++++++++-----------
 man/sparseR.Rd                         | 28 ++++++++++++++++----------
 tests/testthat/test_plotting_sparseR.R |  6 +++---
 tests/testthat/test_sparseR_methods.R  |  4 ++--
 5 files changed, 42 insertions(+), 28 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index ad3eb53..e23db96 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,9 @@
 # 0.3.0.9000
 
+- Updated default `poly` to 2 so that limited non-linearities get discovered by default,
+  as advertised (thank you to the anonymous reviewer who noticed this)
+- Fixed formatting issue in documentation for `sparseR`
+
 # 0.3.0
 
 - Fixed bug in the `effect_plot` function that was causing issues with 
diff --git a/R/sparseR.R b/R/sparseR.R
index a70f8a6..9c0f2bd 100644
--- a/R/sparseR.R
+++ b/R/sparseR.R
@@ -9,8 +9,9 @@
 #' @param ncvgamma The tuning parameter for ncvreg (for MCP or SCAD)
 #' @param lambda.min The minimum value to be used for lambda (as ratio of max,
 #'   see ?ncvreg)
-#' @param k The maximum order of interactions to consider
-#' @param poly The maximum order of polynomials to consider
+#' @param k The maximum order of interactions to consider (default: 1; all
+#'   pairwise)
+#' @param poly The maximum order of polynomials to consider (default: 2)
 #' @param gamma The degree of extremity of sparsity rankings (see details)
 #' @param cumulative_k Should penalties be increased cumulatively as order
 #'   interaction increases?
@@ -51,14 +52,17 @@
 #' factors with low prevalence be combined?) - none (should no preprocessing be
 #' done? can also specify a null object)
 #'
-#' The options that can be passed to `extra_opts` are: - centers (named numeric
-#' vector which denotes where each covariate should be centered) - center_fn
-#' (alternatively, a function can be specified to calculate center such as `min`
-#' or `median`) - freq_cut, unique_cut (see ?step_nzv - these get used by the
-#' filtering steps) - neighbors (the number of neighbors for knnImpute) -
-#' one_hot (see ?step_dummy), this defaults to cell-means coding which can be
-#' done in regularized regression (change at your own risk) - raw (should
-#' polynomials not be orthogonal? defaults to true because variables are
+#' The options that can be passed to `extra_opts` are:
+#' - centers (named numeric vector which denotes where each covariate should be
+#'   centered)
+#' - center_fn (alternatively, a function can be specified to calculate center
+#'   such as `min`
+#' or `median`)
+#' - freq_cut, unique_cut (see ?step_nzv; these get used by the filtering steps)
+#' - neighbors (the number of neighbors for knnImpute)
+#' - one_hot (see ?step_dummy), this defaults to cell-means coding which can be
+#' done in regularized regression (change at your own risk)
+#' - raw (should polynomials not be orthogonal? defaults to true because variables are
 #' centered and scaled already by this point by default)
 #'
 #' \code{ia_formula} will by default interact all variables with each other up
@@ -96,7 +100,7 @@
 sparseR <- function(formula, data, family = c("gaussian", "binomial", "poisson", "coxph"),
                     penalty = c("lasso", "MCP", "SCAD"), alpha = 1, ncvgamma = 3,
                     lambda.min = .005,
-                    k = 1, poly = 1, gamma = .5, cumulative_k = FALSE,
+                    k = 1, poly = 2, gamma = .5, cumulative_k = FALSE,
                     cumulative_poly = TRUE, pool = FALSE,
                     ia_formula = NULL,
                     pre_process = TRUE, model_matrix = NULL, y = NULL,
@@ -217,7 +221,7 @@ sparseR <- function(formula, data, family = c("gaussian", "binomial", "poisson",
     ungroup() %>%
     mutate(Vartype = as.character(.data$Vartype))
 
-  info <- list(k = k, poly = k, cumulative_k = cumulative_k,
+  info <- list(k = k, poly = poly, cumulative_k = cumulative_k,
                cumulative_poly = cumulative_poly, pool = pool,
                pre_process = pre_process, model_matrix = model_matrix, y = y,
                poly_prefix = poly_prefix, int_sep = int_sep)
diff --git a/man/sparseR.Rd b/man/sparseR.Rd
index 114bd05..779bfc0 100644
--- a/man/sparseR.Rd
+++ b/man/sparseR.Rd
@@ -13,7 +13,7 @@ sparseR(
   ncvgamma = 3,
   lambda.min = 0.005,
   k = 1,
-  poly = 1,
+  poly = 2,
   gamma = 0.5,
   cumulative_k = FALSE,
   cumulative_poly = TRUE,
@@ -47,9 +47,10 @@ penalty)}
 \item{lambda.min}{The minimum value to be used for lambda (as ratio of max,
 see ?ncvreg)}
 
-\item{k}{The maximum order of interactions to consider}
+\item{k}{The maximum order of interactions to consider (default: 1; all
+pairwise)}
 
-\item{poly}{The maximum order of polynomials to consider}
+\item{poly}{The maximum order of polynomials to consider (default: 2)}
 
 \item{gamma}{The degree of extremity of sparsity rankings (see details)}
 
@@ -120,15 +121,20 @@ missing data be imputed?) - scale (should data be standardized)? - center
 factors with low prevalence be combined?) - none (should no preprocessing be
 done? can also specify a null object)
 
-The options that can be passed to \code{extra_opts} are: - centers (named numeric
-vector which denotes where each covariate should be centered) - center_fn
-(alternatively, a function can be specified to calculate center such as \code{min}
-or \code{median}) - freq_cut, unique_cut (see ?step_nzv - these get used by the
-filtering steps) - neighbors (the number of neighbors for knnImpute) -
-one_hot (see ?step_dummy), this defaults to cell-means coding which can be
-done in regularized regression (change at your own risk) - raw (should
-polynomials not be orthogonal? defaults to true because variables are
+The options that can be passed to \code{extra_opts} are:
+\itemize{
+\item centers (named numeric vector which denotes where each covariate should be
+centered)
+\item center_fn (alternatively, a function can be specified to calculate center
+such as \code{min}
+or \code{median})
+\item freq_cut, unique_cut (see ?step_nzv; these get used by the filtering steps)
+\item neighbors (the number of neighbors for knnImpute)
+\item one_hot (see ?step_dummy), this defaults to cell-means coding which can be
+done in regularized regression (change at your own risk)
+\item raw (should polynomials not be orthogonal? defaults to true because variables are
 centered and scaled already by this point by default)
+}
 
 \code{ia_formula} will by default interact all variables with each other up
 to order k. If specified, ia_formula will be passed as the \code{terms} argument
diff --git a/tests/testthat/test_plotting_sparseR.R b/tests/testthat/test_plotting_sparseR.R
index 4eb6e46..9ef370e 100644
--- a/tests/testthat/test_plotting_sparseR.R
+++ b/tests/testthat/test_plotting_sparseR.R
@@ -17,9 +17,9 @@ test_that("Plotting sparseR runs without error", {
 
 test_that("Effect plots for sparseR run without error", {
   expect_warning(effect_plot(obj1, coef_name = "Species"))
-  expect_warning(effect_plot(obj1, "Petal.Width", by = "Species"))
+  expect_silent(effect_plot(obj1, coef_name = "Petal.Width", by = "Species"))
   expect_warning(effect_plot(obj1, coef_name = "Petal.Width"))
-  expect_warning(effect_plot(obj1, "Petal.Length", by = "Sepal.Length"))
+  expect_silent(effect_plot(obj1, coef_name = "Petal.Length", by = "Sepal.Length"))
 
   expect_silent(effect_plot(obj2, coef_name = "Petal.Width"))
   expect_silent(effect_plot(obj2, coef_name = "Species"))
@@ -27,7 +27,7 @@ test_that("Effect plots for sparseR run without error", {
 
   expect_warning(effect_plot(obj3, coef_name = "Petal.Width"))
   expect_warning(effect_plot(obj3, coef_name = "Species"))
-  expect_warning(effect_plot(obj3, "Petal.Width", by = "Species"))
+  expect_silent(effect_plot(obj3, "Petal.Width", by = "Species"))
 
   expect_error(effect_plot(obj2, "Species", by = "Petal.Width"))
 })
diff --git a/tests/testthat/test_sparseR_methods.R b/tests/testthat/test_sparseR_methods.R
index 300c967..d8370db 100644
--- a/tests/testthat/test_sparseR_methods.R
+++ b/tests/testthat/test_sparseR_methods.R
@@ -78,10 +78,10 @@ test_that("coef method works", {
     obj2 <- sparseR(Sepal.Width ~ ., data = iris)
     b <- coef(obj2, at = "cvmin")
     sum(b != 0)
-  }, 18)
+  }, 16)
 
   expect_equal({
     sum(coef(obj2, at = "cv1se") != 0)
-  }, 11)
+  }, 6)
 })