From 453946b46c09f137e4e8acf82ae1e37bb6449397 Mon Sep 17 00:00:00 2001
From: Julia Silge <julia.silge@gmail.com>
Date: Fri, 30 Apr 2021 14:10:15 -0600
Subject: [PATCH 1/9] Add model analysis template

---
 .../model-analysis/skeleton/skeleton.Rmd      | 145 ++++++++++++++++++
 .../templates/model-analysis/template.yaml    |   4 +
 2 files changed, 149 insertions(+)
 create mode 100644 inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd
 create mode 100644 inst/rmarkdown/templates/model-analysis/template.yaml

diff --git a/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd b/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd
new file mode 100644
index 0000000..2938d07
--- /dev/null
+++ b/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd
@@ -0,0 +1,145 @@
+---
+title: "Train and evaluate models with tidymodels"
+date: "`r Sys.Date()`"
+output: html_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE, fig.width = 8, fig.height = 5)
+```
+
+
+*This template offers an opinionated guide on how to structure a modeling analysis. Your individual modeling analysis may require you to add to, subtract from, or otherwise change this structure, but consider this a general framework to start from. If you want to learn more about using tidymodels, check out our [Getting Started](https://www.tidymodels.org/start/) guide.*
+
+In this example analysis, let's fit a model to predict [the sex of penguins](https://allisonhorst.github.io/palmerpenguins/) from species and measurement information.
+
+```{r}
+library(tidymodels)
+
+data(penguins)
+glimpse(penguins)
+
+penguins <- na.omit(penguins)
+```
+
+
+## Explore data
+
+Exploratory data analysis (EDA) is an [important part of the modeling process](https://www.tmwr.org/software-modeling.html#model-phases).
+
+```{r}
+penguins %>%
+  ggplot(aes(bill_depth_mm, bill_length_mm, color = sex, size = body_mass_g)) +
+  geom_point(alpha = 0.5) +
+  facet_wrap(~species) +
+  theme_bw()
+```
+
+
+## Build models
+
+Let's consider how to spend our data budget:
+
+- create training and testing sets
+- create resampling folds from the *training* set
+
+```{r}
+set.seed(123)
+penguin_split <- initial_split(penguins, strata = sex)
+penguin_train <- training(penguin_split)
+penguin_test <- testing(penguin_split)
+
+set.seed(234)
+penguin_folds <- vfold_cv(penguin_train, strata = sex)
+penguin_folds
+```
+
+Let's create a **model specification** for each model we want to try:
+
+```{r}
+glm_spec <-
+  logistic_reg() %>%
+  set_engine("glm")
+
+ranger_spec <-
+  rand_forest(trees = 1e3) %>%
+  set_engine("ranger") %>%
+  set_mode("classification")
+```
+
+To set up your modeling code, consider using the [parsnip addin](https://parsnip.tidymodels.org/reference/parsnip_addin.html) or the [usemodels](https://usemodels.tidymodels.org/) package.
+
+Now let's build a **model workflow** combining each model specification with a data preprocessor:
+
+```{r}
+glm_wf <-
+  workflow() %>%
+  add_formula(sex ~ .) %>%
+  add_model(glm_spec)
+
+ranger_wf <-
+  workflow() %>%
+  add_formula(sex ~ .) %>%
+  add_model(ranger_spec)
+```
+
+If your feature engineering needs are more complex than provided by a formula like `sex ~ .`, use a [recipe](https://www.tidymodels.org/start/recipes/). [Read more about feature engineering with recipes](https://www.tmwr.org/recipes.html) to learn how they work.
+
+
+## Evaluate models
+
+These models have no tuning parameters so we can evaluate them as they are. [Learn about tuning hyperparameters here.](https://www.tidymodels.org/start/tuning/)
+
+```{r}
+glm_rs <- fit_resamples(
+  glm_wf,
+  resamples = penguin_folds,
+  control = control_resamples(save_pred = TRUE)
+)
+
+ranger_rs <- fit_resamples(
+  ranger_wf,
+  resamples = penguin_folds,
+  control = control_resamples(save_pred = TRUE)
+)
+```
+
+To fit and evaluate _many_ modeling approaches together, consider using [workflowsets](https://workflowsets.tidymodels.org/). How did these two models compare?
+
+```{r}
+collect_metrics(glm_rs)
+collect_metrics(ranger_rs)
+```
+
+We can visualize these results using an ROC curve (or a confusion matrix via `conf_mat()`):
+
+```{r}
+bind_rows(
+  collect_predictions(glm_rs) %>%
+    mutate(mod = "glm"),
+  collect_predictions(ranger_rs) %>%
+    mutate(mod = "ranger")
+) %>%
+  group_by(mod) %>%
+  roc_curve(sex, .pred_female) %>%
+  autoplot()
+```
+
+These models perform very similarly, so perhaps we would choose the simpler, linear model. The function `last_fit()` *fits* one final time on the training data and *evaluates* on the testing data. This is the first time we have used the testing data.
+
+```{r}
+final_fitted <- last_fit(glm_wf, penguin_split)
+collect_metrics(final_fitted)  ## metrics evaluated on the *testing* data
+```
+
+This object contains a fitted workflow that we can use for prediction.
+
+```{r}
+final_wf <- final_fitted$.workflow[[1]]
+predict(final_wf, penguin_test[55,])
+```
+
+You can save this object to use later with new data, for example with `readr::write_rds()`.
+
+
+
diff --git a/inst/rmarkdown/templates/model-analysis/template.yaml b/inst/rmarkdown/templates/model-analysis/template.yaml
new file mode 100644
index 0000000..1789994
--- /dev/null
+++ b/inst/rmarkdown/templates/model-analysis/template.yaml
@@ -0,0 +1,4 @@
+name: Model Analysis
+description: >
+   Train and evaluate with tidymodels
+create_dir: FALSE

From 501e75f836fa68a4a8b763f8e8b974e2b9f056c7 Mon Sep 17 00:00:00 2001
From: Julia Silge <julia.silge@gmail.com>
Date: Wed, 5 May 2021 10:12:36 -0600
Subject: [PATCH 2/9] Update
 inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd

Co-authored-by: Max Kuhn <mxkuhn@gmail.com>
---
 inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd b/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd
index 2938d07..e5cece8 100644
--- a/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd
+++ b/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd
@@ -38,7 +38,7 @@ penguins %>%
 
 ## Build models
 
-Let's consider how to spend our data budget:
+Let's consider how to [spend our data budget](https://www.tmwr.org/splitting.html):
 
 - create training and testing sets
 - create resampling folds from the *training* set
@@ -142,4 +142,3 @@ predict(final_wf, penguin_test[55,])
 You can save this object to use later with new data, for example with `readr::write_rds()`.
 
 
-

From 11d49e6bfb952b1e210892eb9f338b67e3b4c9c6 Mon Sep 17 00:00:00 2001
From: Julia Silge <julia.silge@gmail.com>
Date: Wed, 5 May 2021 10:12:48 -0600
Subject: [PATCH 3/9] Update
 inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd

Co-authored-by: Max Kuhn <mxkuhn@gmail.com>
---
 inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd b/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd
index e5cece8..aa46ddd 100644
--- a/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd
+++ b/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd
@@ -54,7 +54,7 @@ penguin_folds <- vfold_cv(penguin_train, strata = sex)
 penguin_folds
 ```
 
-Let's create a **model specification** for each model we want to try:
+Let's create a [**model specification**](https://www.tmwr.org/models.html) for each model we want to try:
 
 ```{r}
 glm_spec <-
@@ -141,4 +141,3 @@ predict(final_wf, penguin_test[55,])
 
 You can save this object to use later with new data, for example with `readr::write_rds()`.
 
-

From ff538b37c4a74b466958ff0ae8d50b5ffd443c39 Mon Sep 17 00:00:00 2001
From: Julia Silge <julia.silge@gmail.com>
Date: Wed, 5 May 2021 10:12:55 -0600
Subject: [PATCH 4/9] Update
 inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd

Co-authored-by: Max Kuhn <mxkuhn@gmail.com>
---
 inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd b/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd
index aa46ddd..ef7055f 100644
--- a/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd
+++ b/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd
@@ -69,7 +69,7 @@ ranger_spec <-
 
 To set up your modeling code, consider using the [parsnip addin](https://parsnip.tidymodels.org/reference/parsnip_addin.html) or the [usemodels](https://usemodels.tidymodels.org/) package.
 
-Now let's build a **model workflow** combining each model specification with a data preprocessor:
+Now let's build a [**model workflow**](https://www.tmwr.org/workflows.html) combining each model specification with a data preprocessor:
 
 ```{r}
 glm_wf <-
@@ -140,4 +140,3 @@ predict(final_wf, penguin_test[55,])
 ```
 
 You can save this object to use later with new data, for example with `readr::write_rds()`.
-

From d91099ad4bbc94645a3b3c72cd5ca9112514af54 Mon Sep 17 00:00:00 2001
From: Julia Silge <julia.silge@gmail.com>
Date: Wed, 5 May 2021 10:14:11 -0600
Subject: [PATCH 5/9] Remove reference to workflowsets

---
 inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd b/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd
index ef7055f..3648104 100644
--- a/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd
+++ b/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd
@@ -104,7 +104,7 @@ ranger_rs <- fit_resamples(
 )
 ```
 
-To fit and evaluate _many_ modeling approaches together, consider using [workflowsets](https://workflowsets.tidymodels.org/). How did these two models compare?
+How did these two models compare?
 
 ```{r}
 collect_metrics(glm_rs)

From 66795539dac47a4e644f5273117c9e487bb74ad2 Mon Sep 17 00:00:00 2001
From: Julia Silge <julia.silge@gmail.com>
Date: Tue, 6 Jul 2021 15:03:03 -0600
Subject: [PATCH 6/9] Use new `extract_workflow()` function in template

---
 inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd b/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd
index 3648104..a188a6c 100644
--- a/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd
+++ b/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd
@@ -135,8 +135,8 @@ collect_metrics(final_fitted)  ## metrics evaluated on the *testing* data
 This object contains a fitted workflow that we can use for prediction.
 
 ```{r}
-final_wf <- final_fitted$.workflow[[1]]
+final_wf <- extract_workflow(final_fitted)
 predict(final_wf, penguin_test[55,])
 ```
 
-You can save this object to use later with new data, for example with `readr::write_rds()`.
+You can save this fitted `final_wf` object to use later with new data, for example with `readr::write_rds()`.

From 9da23ee4ab29ab98a41886d80e962e1669954025 Mon Sep 17 00:00:00 2001
From: Julia Silge <julia.silge@gmail.com>
Date: Tue, 6 Jul 2021 15:03:57 -0600
Subject: [PATCH 7/9] Need devel workflows for new extract_ function

---
 DESCRIPTION | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 17a3606..6563abf 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -32,7 +32,7 @@ Imports:
     tibble (>= 3.1.0),
     tidyr (>= 1.1.3),
     tune (>= 0.1.3),
-    workflows (>= 0.2.2),
+    workflows (>= 0.2.2.9000),
     workflowsets (>= 0.0.2),
     yardstick (>= 0.0.8)
 Suggests: 
@@ -45,3 +45,5 @@ Suggests:
 Encoding: UTF-8
 RoxygenNote: 7.1.1.9001
 VignetteBuilder: knitr
+Remotes:  
+    tidymodels/workflows

From 4692ef6d61e5f19625add2bd4b0a6916c5c33a2c Mon Sep 17 00:00:00 2001
From: Julia Silge <julia.silge@gmail.com>
Date: Tue, 6 Jul 2021 15:08:25 -0600
Subject: [PATCH 8/9] Use devel hardhat

---
 DESCRIPTION | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 6563abf..00b3152 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -20,6 +20,7 @@ Imports:
     conflicted (>= 1.0.4),
     dials (>= 0.0.9),
     dplyr (>= 1.0.5),
+    hardhat (>= 0.1.5.9000),
     ggplot2 (>= 3.3.3),
     infer (>= 0.5.4),
     modeldata (>= 0.1.0),
@@ -46,4 +47,5 @@ Encoding: UTF-8
 RoxygenNote: 7.1.1.9001
 VignetteBuilder: knitr
 Remotes:  
-    tidymodels/workflows
+    tidymodels/workflows,
+    tidymodels/hardhat

From ec0e7aedbd8bf27a531a641f0a5d014606548007 Mon Sep 17 00:00:00 2001
From: Julia Silge <julia.silge@gmail.com>
Date: Fri, 10 Sep 2021 10:27:49 -0600
Subject: [PATCH 9/9] Use new workflows interface, update pkg versions

---
 DESCRIPTION                                   |  7 ++-----
 .../model-analysis/skeleton/skeleton.Rmd      | 19 ++++++++-----------
 2 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 00b3152..9db77ed 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -20,7 +20,7 @@ Imports:
     conflicted (>= 1.0.4),
     dials (>= 0.0.9),
     dplyr (>= 1.0.5),
-    hardhat (>= 0.1.5.9000),
+    hardhat (>= 0.1.6),
     ggplot2 (>= 3.3.3),
     infer (>= 0.5.4),
     modeldata (>= 0.1.0),
@@ -33,7 +33,7 @@ Imports:
     tibble (>= 3.1.0),
     tidyr (>= 1.1.3),
     tune (>= 0.1.3),
-    workflows (>= 0.2.2.9000),
+    workflows (>= 0.2.3),
     workflowsets (>= 0.0.2),
     yardstick (>= 0.0.8)
 Suggests: 
@@ -46,6 +46,3 @@ Suggests:
 Encoding: UTF-8
 RoxygenNote: 7.1.1.9001
 VignetteBuilder: knitr
-Remotes:  
-    tidymodels/workflows,
-    tidymodels/hardhat
diff --git a/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd b/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd
index a188a6c..c4ddd5c 100644
--- a/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd
+++ b/inst/rmarkdown/templates/model-analysis/skeleton/skeleton.Rmd
@@ -72,15 +72,10 @@ To set up your modeling code, consider using the [parsnip addin](https://parsnip
 Now let's build a [**model workflow**](https://www.tmwr.org/workflows.html) combining each model specification with a data preprocessor:
 
 ```{r}
-glm_wf <-
-  workflow() %>%
-  add_formula(sex ~ .) %>%
-  add_model(glm_spec)
-
-ranger_wf <-
-  workflow() %>%
-  add_formula(sex ~ .) %>%
-  add_model(ranger_spec)
+penguin_formula <- sex ~ .
+
+glm_wf    <- workflow(penguin_formula, glm_spec)
+ranger_wf <- workflow(penguin_formula, ranger_spec)
 ```
 
 If your feature engineering needs are more complex than provided by a formula like `sex ~ .`, use a [recipe](https://www.tidymodels.org/start/recipes/). [Read more about feature engineering with recipes](https://www.tmwr.org/recipes.html) to learn how they work.
@@ -91,16 +86,18 @@ If your feature engineering needs are more complex than provided by a formula li
 These models have no tuning parameters so we can evaluate them as they are. [Learn about tuning hyperparameters here.](https://www.tidymodels.org/start/tuning/)
 
 ```{r}
+contrl_preds <- control_resamples(save_pred = TRUE)
+
 glm_rs <- fit_resamples(
   glm_wf,
   resamples = penguin_folds,
-  control = control_resamples(save_pred = TRUE)
+  control = contrl_preds
 )
 
 ranger_rs <- fit_resamples(
   ranger_wf,
   resamples = penguin_folds,
-  control = control_resamples(save_pred = TRUE)
+  control = contrl_preds
 )
 ```