diff --git a/.Rbuildignore b/.Rbuildignore index f9a0b56b4..5a9f7e74d 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -25,3 +25,5 @@ tests/testthat/NMF* ^vignettes/articles$ ^[\.]?air\.toml$ ^[.]?air[.]toml$ +^vignettes/\.quarto$ +^vignettes/*_files$ diff --git a/.gitignore b/.gitignore index f6000324f..746e9d122 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ revdep/library.noindex revdep/data.sqlite .httr-oauth revdep/cloud.noindex/* +**/.quarto/ diff --git a/DESCRIPTION b/DESCRIPTION index 7448ba904..92e4ec7f9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -51,10 +51,10 @@ Suggests: ggplot2, igraph, kernlab, - knitr, methods, modeldata (>= 0.1.1), parsnip (>= 1.2.0), + quarto, RANN, RcppRoll, rmarkdown, @@ -65,8 +65,7 @@ Suggests: testthat (>= 3.0.0), workflows, xml2 -VignetteBuilder: - knitr +VignetteBuilder: quarto RdMacros: lifecycle Config/Needs/website: tidyverse/tidytemplate, rmarkdown diff --git a/man/step_kpca_poly.Rd b/man/step_kpca_poly.Rd index e3be17ae5..01244ac81 100644 --- a/man/step_kpca_poly.Rd +++ b/man/step_kpca_poly.Rd @@ -48,7 +48,7 @@ preprocessing step has be trained by \code{\link[=prep]{prep()}}.} is a placeholder and will be populated once \code{\link[=prep]{prep()}} is used.} \item{degree, scale_factor, offset}{Numeric values for the polynomial kernel -function. See the documentation at \code{\link[kernlab:dots]{kernlab::polydot()}}.} +function. See the documentation at \code{\link[kernlab:polydot]{kernlab::polydot()}}.} \item{prefix}{A character string for the prefix of the resulting new variables. See notes below.} diff --git a/man/step_kpca_rbf.Rd b/man/step_kpca_rbf.Rd index 124ad851d..2f1e6c43e 100644 --- a/man/step_kpca_rbf.Rd +++ b/man/step_kpca_rbf.Rd @@ -46,7 +46,7 @@ preprocessing step has be trained by \code{\link[=prep]{prep()}}.} is a placeholder and will be populated once \code{\link[=prep]{prep()}} is used.} \item{sigma}{A numeric value for the radial basis function parameter. See the -documentation at \code{\link[kernlab:dots]{kernlab::rbfdot()}}.} +documentation at \code{\link[kernlab:rbfdot]{kernlab::rbfdot()}}.} \item{prefix}{A character string for the prefix of the resulting new variables. See notes below.} diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 000000000..ba7abf328 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,3 @@ +*.html +*.R +*_files diff --git a/vignettes/Dummies.Rmd b/vignettes/Dummies.qmd similarity index 94% rename from vignettes/Dummies.Rmd rename to vignettes/Dummies.qmd index 729dea1f4..21490571a 100644 --- a/vignettes/Dummies.Rmd +++ b/vignettes/Dummies.qmd @@ -5,9 +5,14 @@ description: | This vignette describes different methods for encoding categorical predictors, with special attention to interaction terms and contrasts. vignette: > - %\VignetteEngine{knitr::rmarkdown} %\VignetteIndexEntry{Handling categorical predictors} %\VignetteEncoding{UTF-8} + %\VignetteEngine{quarto::html} + %\VignetteEncoding{UTF-8} +knitr: + opts_chunk: + collapse: true + comment: '#>' --- ```{r} @@ -18,7 +23,7 @@ knitr::opts_chunk$set( digits = 3, collapse = TRUE, comment = "#>" - ) +) options(digits = 3) library(recipes) ``` @@ -36,7 +41,7 @@ library(recipes) # make a copy for use below iris <- iris |> mutate(original = Species) -iris_rec <- recipe( ~ ., data = iris) +iris_rec <- recipe(~., data = iris) summary(iris_rec) ``` @@ -46,8 +51,8 @@ The default approach is to create dummy variables using the "reference cell" par ```{r} #| label: iris-ref-cell -ref_cell <- - iris_rec |> +ref_cell <- + iris_rec |> step_dummy(Species) |> prep(training = iris) summary(ref_cell) @@ -69,8 +74,8 @@ To get this encoding you can use the `contrasts` argument like so:, the global o ```{r} #| label: iris-helmert # now make dummy variables with new parameterization -helmert <- - iris_rec |> +helmert <- + iris_rec |> step_dummy(Species, contrasts = "contr.helmert") |> prep(training = iris) summary(helmert) @@ -90,9 +95,9 @@ Creating interactions with recipes requires the use of a model formula, such as ```{r} #| label: iris-2int -iris_int <- +iris_int <- iris_rec |> - step_interact( ~ Sepal.Width:Sepal.Length) |> + step_interact(~ Sepal.Width:Sepal.Length) |> prep(training = iris) summary(iris_int) ``` @@ -105,10 +110,10 @@ For example, if you were to use the standard formula interface, the creation of ```{r} #| label: mm-int -model.matrix(~ Species*Sepal.Length, data = iris) |> - as.data.frame() |> +model.matrix(~ Species * Sepal.Length, data = iris) |> + as.data.frame() |> # show a few specific rows - slice(c(1, 51, 101)) |> + slice(c(1, 51, 101)) |> as.data.frame() ``` @@ -119,8 +124,10 @@ With recipes, you create them sequentially. This raises an issue: do I have to t #| eval: false # Must I do this? iris_rec |> - step_interact( ~ Species_versicolor:Sepal.Length + - Species_virginica:Sepal.Length) + step_interact( + ~ Species_versicolor:Sepal.Length + + Species_virginica:Sepal.Length + ) ``` Not only is this a pain, but it may not be obvious what dummy variables are available (especially when [`step_other`](https://recipes.tidymodels.org/reference/step_other.html) is used). @@ -129,10 +136,10 @@ The solution is to use a selector: ```{r} #| label: iris-sel -iris_int <- - iris_rec |> +iris_int <- + iris_rec |> step_dummy(Species) |> - step_interact( ~ starts_with("Species"):Sepal.Length) |> + step_interact(~ starts_with("Species"):Sepal.Length) |> prep(training = iris) summary(iris_int) ``` @@ -168,9 +175,9 @@ Would it work if I didn't convert species to a factor and used the interactions ```{r} #| label: iris-dont -iris_int <- - iris_rec |> - step_interact( ~ Species:Sepal.Length) |> +iris_int <- + iris_rec |> + step_interact(~ Species:Sepal.Length) |> prep(training = iris) summary(iris_int) ``` @@ -188,7 +195,7 @@ There are models (e.g. `glmnet` and others) that can avoid this issue so you mig ```{r} #| label: one-hot -iris_rec |> +iris_rec |> step_dummy(Species, one_hot = TRUE) |> prep(training = iris) |> bake(original, new_data = NULL, starts_with("Species")) |> @@ -203,8 +210,8 @@ This will give you the full set of indicators and, when you use the typical cont ```{r} #| label: one-hot-two -hot_reference <- - iris_rec |> +hot_reference <- + iris_rec |> step_dummy(Species, one_hot = TRUE) |> prep(training = iris) |> bake(original, new_data = NULL, starts_with("Species")) |> @@ -212,8 +219,8 @@ hot_reference <- hot_reference -hot_helmert <- - iris_rec |> +hot_helmert <- + iris_rec |> step_dummy(Species, one_hot = TRUE, contrasts = "contr.helmert") |> prep(training = iris) |> bake(original, new_data = NULL, starts_with("Species")) |> diff --git a/vignettes/Ordering.Rmd b/vignettes/Ordering.qmd similarity index 95% rename from vignettes/Ordering.Rmd rename to vignettes/Ordering.qmd index 8e2248264..f5d40016e 100644 --- a/vignettes/Ordering.Rmd +++ b/vignettes/Ordering.qmd @@ -5,9 +5,14 @@ description: | The order in which recipe steps are specified matters, and this vignette gives some general suggestions that you should consider. vignette: > - %\VignetteEngine{knitr::rmarkdown} %\VignetteIndexEntry{Ordering of steps} %\VignetteEncoding{UTF-8} + %\VignetteEngine{quarto::html} + %\VignetteEncoding{UTF-8} +knitr: + opts_chunk: + collapse: true + comment: '#>' --- In the recipes package, there are no constraints on the order in which steps are added to the recipe; you as a user are free to apply steps in the order appropriate to your data preprocessing needs. However, the **order of steps matters** and there are some general suggestions that you should consider. diff --git a/vignettes/Roles.Rmd b/vignettes/Roles.qmd similarity index 97% rename from vignettes/Roles.Rmd rename to vignettes/Roles.qmd index 74891852f..6c2dd03b9 100644 --- a/vignettes/Roles.Rmd +++ b/vignettes/Roles.qmd @@ -4,9 +4,14 @@ output: rmarkdown::html_vignette description: | In recipes, roles provide a way to select variables for different steps. vignette: > - %\VignetteEngine{knitr::rmarkdown} %\VignetteIndexEntry{Roles in recipes} %\VignetteEncoding{UTF-8} + %\VignetteEngine{quarto::html} + %\VignetteEncoding{UTF-8} +knitr: + opts_chunk: + collapse: true + comment: '#>' --- ```{r} diff --git a/vignettes/Selecting_Variables.Rmd b/vignettes/Selecting_Variables.qmd similarity index 96% rename from vignettes/Selecting_Variables.Rmd rename to vignettes/Selecting_Variables.qmd index fc030d7ba..aef1d8c46 100644 --- a/vignettes/Selecting_Variables.Rmd +++ b/vignettes/Selecting_Variables.qmd @@ -5,9 +5,14 @@ description: | You can select which variables or features should be used in recipes. This vignette goes over the basics of using selection functions. vignette: > - %\VignetteEngine{knitr::rmarkdown} %\VignetteIndexEntry{Selecting variables} %\VignetteEncoding{UTF-8} + %\VignetteEngine{quarto::html} + %\VignetteEncoding{UTF-8} +knitr: + opts_chunk: + collapse: true + comment: '#>' --- ```{r} diff --git a/vignettes/Skipping.Rmd b/vignettes/Skipping.qmd similarity index 96% rename from vignettes/Skipping.Rmd rename to vignettes/Skipping.qmd index dea16a0b1..5d9b9cbce 100644 --- a/vignettes/Skipping.Rmd +++ b/vignettes/Skipping.qmd @@ -6,9 +6,14 @@ description: | However, in some situations we only want to only apply a step to the training data and we want to skip that step on testing data. vignette: > - %\VignetteEngine{knitr::rmarkdown} %\VignetteIndexEntry{On skipping steps} %\VignetteEncoding{UTF-8} + %\VignetteEngine{quarto::html} + %\VignetteEncoding{UTF-8} +knitr: + opts_chunk: + collapse: true + comment: '#>' --- ```{r} @@ -19,7 +24,7 @@ knitr::opts_chunk$set( digits = 3, collapse = TRUE, comment = "#>" - ) +) options(digits = 3) library(recipes) ``` @@ -81,7 +86,7 @@ car_recipe <- recipe(mpg ~ ., data = mtcars) |> prep(training = mtcars) # These *should* produce the same results (as they do for `hp`) -bake(car_recipe, new_data = NULL) |> head() |> select(disp, hp) +bake(car_recipe, new_data = NULL) |> head() |> select(disp, hp) bake(car_recipe, new_data = mtcars) |> head() |> select(disp, hp) ``` diff --git a/vignettes/recipes.Rmd b/vignettes/recipes.qmd similarity index 98% rename from vignettes/recipes.Rmd rename to vignettes/recipes.qmd index c7b0cda7e..6a4d41e5a 100644 --- a/vignettes/recipes.Rmd +++ b/vignettes/recipes.qmd @@ -5,9 +5,14 @@ description: | Start here if this is your first time using recipes! You will learn about basic usage, steps, selectors, and checks. vignette: > - %\VignetteEngine{knitr::rmarkdown} %\VignetteIndexEntry{Introduction to recipes} %\VignetteEncoding{UTF-8} + %\VignetteEngine{quarto::html} + %\VignetteEncoding{UTF-8} +knitr: + opts_chunk: + collapse: true + comment: '#>' --- ```{r}