From 6599edd36ab83d5d6a1d288d93edcd9667f50a9e Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Thu, 18 Aug 2022 11:14:25 +0100 Subject: [PATCH 1/4] update classwork --- classwork/05-classwork.qmd | 67 +++++++++++++++++++------ classwork/06-classwork.qmd | 100 ++++++++++++++----------------------- 2 files changed, 89 insertions(+), 78 deletions(-) diff --git a/classwork/05-classwork.qmd b/classwork/05-classwork.qmd index ac49435c..106128e6 100644 --- a/classwork/05-classwork.qmd +++ b/classwork/05-classwork.qmd @@ -52,7 +52,7 @@ nhl_train <- analysis(nhl_val$splits[[1]]) set.seed(100) nhl_train %>% sample_n(200) %>% - plot_nhl_shots(emphasis = position) + plot_nhl_shots(emphasis = shooter_type) # Your code here! @@ -191,14 +191,37 @@ autoplot(roc_curve_points) Compute and plot an ROC curve for your current model. +What data is being used for this ROC curve plot? + ```{r} # Your code here! ``` -## Your turn +## Collapsing factor levels -What data is being used for this ROC curve plot? +```{r} +nhl_other_rec <- + recipe(on_goal ~ ., data = nhl_train) %>% + # Any player with <= 0.01% of shots is set to "other" + step_other(shooter, threshold = 0.001) %>% + step_dummy(all_nominal_predictors()) %>% + step_zv(all_predictors()) +``` + +## Does othering help? + +```{r} +nhl_other_wflow <- + nhl_glm_wflow %>% + update_recipe(nhl_other_rec) + +nhl_other_res <- + nhl_other_wflow %>% + fit_resamples(nhl_val, control = ctrl) + +collect_metrics(nhl_other_res) +``` ## Player effects @@ -207,7 +230,7 @@ library(embed) nhl_effect_rec <- recipe(on_goal ~ ., data = nhl_train) %>% - step_lencode_mixed(player, outcome = vars(on_goal)) %>% + step_lencode_mixed(shooter, goaltender, outcome = vars(on_goal)) %>% step_dummy(all_nominal_predictors()) %>% step_zv(all_predictors()) ``` @@ -221,7 +244,7 @@ nhl_effect_wflow <- nhl_effect_res <- nhl_effect_wflow %>% - fit_resamples(nhl_val) + fit_resamples(nhl_val, control = ctrl) collect_metrics(nhl_effect_res) ``` @@ -231,36 +254,40 @@ collect_metrics(nhl_effect_res) ```{r} # angle nhl_angle_rec <- - nhl_indicators %>% + nhl_effect_rec %>% step_mutate( - angle = abs(atan2(abs(coord_y), (89 - abs(coord_x))) * (180 / pi)) + angle = abs( atan2(abs(coord_y), (89 - coord_x) ) * (180 / pi) ) ) -# distance -nhl_distance_rec <- +# defensive zone +nhl_zone_rec <- nhl_angle_rec %>% step_mutate( - distance = sqrt((89 - abs(coord_x))^2 + abs(coord_y)^2), - distance = log(distance) + defensive_zone = ifelse(coord_x <= -25.5, 1, 0) ) # behind goal line nhl_behind_rec <- - nhl_distance_rec %>% + nhl_zone_rec %>% step_mutate( - behind_goal_line = ifelse(abs(coord_x) >= 89, 1, 0) + behind_goal_line = ifelse(coord_x >= 89, 1, 0) ) ``` ## Fit different recipes ```{r} +no_coord_rec <- + nhl_indicators %>% + step_rm(starts_with("coord")) + set.seed(9) nhl_glm_set_res <- workflow_set( - list(`1_dummy` = nhl_indicators, `2_angle` = nhl_angle_rec, - `3_dist` = nhl_distance_rec, `4_bgl` = nhl_behind_rec), + list(`1_no_coord` = no_coord_rec, `2_other` = nhl_other_rec, + `3_effects` = nhl_effect_rec, `4_angle` = nhl_angle_rec, + `5_zone` = nhl_zone_rec, `6_bgl` = nhl_behind_rec), list(logistic = logistic_reg()) ) %>% workflow_map(fn = "fit_resamples", resamples = nhl_val, verbose = TRUE, control = ctrl) @@ -294,3 +321,13 @@ collect_metrics(nhl_glm_set_res) %>% geom_point(size = 3) + labs(y = NULL, x = "ROC AUC (validation set)") ``` + +## Debugging a recipe + +```{r} +nhl_angle_fit <- prep(nhl_angle_rec) + +tidy(nhl_angle_fit, number = 1) %>% slice(1:4) + +bake(nhl_angle_fit, nhl_train %>% slice(1:3), starts_with("coord"), angle, shooter) +``` diff --git a/classwork/06-classwork.qmd b/classwork/06-classwork.qmd index 23b60fac..cfe598e1 100644 --- a/classwork/06-classwork.qmd +++ b/classwork/06-classwork.qmd @@ -1,5 +1,5 @@ --- -title: "3 - Tuning Hyperparameters - Classwork" +title: "6 - Tuning Hyperparameters - Classwork" subtitle: "Machine learning with tidymodels" editor_options: chunk_output_type: console @@ -30,25 +30,24 @@ nhl_val <- validation_split(nhl_train_and_val, prop = 0.80) nhl_train <- analysis(nhl_val$splits[[1]]) -nhl_distance_rec <- +nhl_position_rec <- recipe(on_goal ~ ., data = nhl_train) %>% - step_lencode_mixed(player, outcome = vars(on_goal)) %>% + step_lencode_mixed(shooter, goaltender, outcome = vars(on_goal)) %>% step_other(all_nominal_predictors()) %>% # TODO: keep this? step_dummy(all_nominal_predictors()) %>% step_zv(all_predictors()) %>% step_mutate( - angle = abs(atan2(abs(coord_y), (89 - abs(coord_x))) * (180 / pi)), - distance = sqrt((89 - abs(coord_x))^2 + abs(coord_y)^2), - distance = log(distance) + angle = abs( atan2(abs(coord_y), (89 - coord_x) ) * (180 / pi)), + behind_goal_line = ifelse(coord_x >= 89, 1, 0) ) -nhl_distance_wflow <- +nhl_position_wflow <- workflow() %>% - add_recipe(nhl_distance_rec) %>% + add_recipe(nhl_position_rec) %>% add_model(logistic_reg()) -nhl_distance_res <- - nhl_distance_wflow %>% +nhl_position_res <- + nhl_position_wflow %>% fit_resamples(nhl_val) ``` @@ -57,18 +56,16 @@ nhl_distance_res <- ```{r} glm_rec <- recipe(on_goal ~ ., data = nhl_train) %>% - step_lencode_mixed(player, outcome = vars(on_goal)) %>% + step_lencode_mixed(shooter, goaltender, outcome = vars(on_goal)) %>% step_dummy(all_nominal_predictors()) %>% step_mutate( - angle = abs(atan2(abs(coord_y), (89 - abs(coord_x))) * (180 / pi)), - distance = sqrt((89 - abs(coord_x))^2 + abs(coord_y)^2), - distance = log(distance), - behind_goal_line = ifelse(abs(coord_x) >= 89, 1, 0) + angle = abs( atan2(abs(coord_y), (89 - coord_x) ) * (180 / pi) ), + defensive_zone = ifelse(coord_x <= -25.5, 1, 0), + behind_goal_line = ifelse(coord_x >= 89, 1, 0) ) %>% - step_rm(coord_x, coord_y) %>% step_zv(all_predictors()) %>% step_ns(angle, deg_free = tune("angle")) %>% - step_ns(distance, deg_free = tune("distance")) %>% + step_ns(coord_x, deg_free = tune("coord_x")) %>% step_normalize(all_numeric_predictors()) glm_spline_wflow <- @@ -80,7 +77,7 @@ glm_spline_wflow <- ## Create a grid ```{r} -set.seed(2) +set.seed(12) grid <- glm_spline_wflow %>% extract_parameter_set_dials() %>% @@ -101,16 +98,16 @@ Try creating a regular grid. ## Update parameter ranges ```{r} -set.seed(2) +set.seed(12) grid <- glm_spline_wflow %>% extract_parameter_set_dials() %>% - update(angle = spline_degree(c(2L, 20L)), - distance = spline_degree(c(2L, 20L))) %>% + update(angle = spline_degree(c(2L, 50L)), + coord_x = spline_degree(c(2L, 50L))) %>% grid_latin_hypercube(size = 25) grid %>% - ggplot(aes(angle, distance)) + + ggplot(aes(angle, coord_x)) + geom_point(size = 4) ``` @@ -123,6 +120,7 @@ ctrl <- control_grid(save_pred = TRUE, parallel_over = "everything") glm_spline_res <- glm_spline_wflow %>% tune_grid(resamples = nhl_val, grid = grid, control = ctrl) + glm_spline_res ``` @@ -157,33 +155,20 @@ show_best(glm_spline_res, metric = "roc_auc") select_best(glm_spline_res, metric = "roc_auc") ``` -## Your turn - -Try an alternative selection strategy. - -Read the docs for `select_by_pct_loss()`. - -Try choosing a model that has a simpler (less "wiggly") relationship for `distance`. - -```{r} -# Your code here! - -``` - ## Boosted trees ```{r} xgb_spec <- boost_tree( - trees = 500, min_n = tune(), stop_iter = tune(), tree_depth = tune(), + trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(), loss_reduction = tune() ) %>% set_mode("classification") %>% - set_engine("xgboost", validation = 1/10) # <- for better early stopping + set_engine("xgboost") xgb_rec <- recipe(on_goal ~ ., data = nhl_train) %>% - step_lencode_mixed(player, outcome = vars(on_goal)) %>% + step_lencode_mixed(shooter, goaltender, outcome = vars(on_goal)) %>% step_dummy(all_nominal_predictors()) %>% step_zv(all_predictors()) @@ -218,7 +203,7 @@ set.seed(9) xgb_res <- xgb_wflow %>% - tune_grid(resamples = nhl_val, grid = 15, control = ctrl) # automatic grid now! + tune_grid(resamples = nhl_val, grid = 30, control = ctrl) # automatic grid now! ``` ## Your turn @@ -246,12 +231,10 @@ autoplot(xgb_res) coord_rec <- xgb_rec %>% step_mutate( - angle = abs(atan2(abs(coord_y), (89 - abs(coord_x))) * (180 / pi)), - distance = sqrt((89 - abs(coord_x))^2 + abs(coord_y)^2), - distance = log(distance), - behind_goal_line = ifelse(abs(coord_x) >= 89, 1, 0) - ) %>% - step_rm(coord_x, coord_y) + angle = abs( atan2(abs(coord_y), (89 - coord_x) ) * (180 / pi) ), + defensive_zone = ifelse(coord_x <= -25.5, 1, 0), + behind_goal_line = ifelse(coord_x >= 89, 1, 0) + ) xgb_coord_wflow <- workflow() %>% @@ -261,14 +244,16 @@ xgb_coord_wflow <- set.seed(9) xgb_coord_res <- xgb_coord_wflow %>% - tune_grid(resamples = nhl_val, grid = 20, control = ctrl) + tune_grid(resamples = nhl_val, grid = 30, control = ctrl) ``` ## Did the machine figure it out? ```{r} -show_best(xgb_res, metric = "roc_auc") -show_best(xgb_coord_res, metric = "roc_auc") +# no extra features +show_best(xgb_res, metric = "roc_auc", n = 3) +# with additional coordinate features +show_best(xgb_coord_res, metric = "roc_auc", n = 3) ``` ## Compare models @@ -282,22 +267,11 @@ glm_spline_res %>% ```{r} # Best boosting results -xgb_coord_res %>% +xgb_res %>% show_best(metric = "roc_auc", n = 1) %>% select(.metric, .estimator, mean, n, std_err, .config) ``` -## Your turn - -Can you get better ROC results with xgboost? - -Try increasing `learn_rate` beyond the original range. - -```{r} -# Your code here! - -``` - ## Updating the workflow ```{r} @@ -366,7 +340,7 @@ glm_explainer <- explain_tidymodels( final_glm_spline_wflow, data = dplyr::select(nhl_train, -on_goal), # DALEX required an integer for factors: - y = as.integer(nhl_train$on_goal), + y = as.integer(nhl_train$on_goal) - 1, verbose = FALSE ) ``` @@ -381,13 +355,13 @@ pdp_coord_x <- model_profile( glm_explainer, variables = "coord_x", N = 500, - groups = "position" + groups = "strength" ) ``` ## Your turn -Try grouping by another variable, like `game_type` or `dow`. +Try grouping by another variable, like `extra_attacker` or `game_seconds`. ```{r} # Your code here! From ee346f491f25313115a568a67e089f977fea5ae5 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Thu, 18 Aug 2022 11:15:11 +0100 Subject: [PATCH 2/4] remove empty leading lines --- slides/06-tuning-hyperparameters.qmd | 3 --- slides/07-transit-case-study.qmd | 1 - 2 files changed, 4 deletions(-) diff --git a/slides/06-tuning-hyperparameters.qmd b/slides/06-tuning-hyperparameters.qmd index 64e22d73..212d1d77 100644 --- a/slides/06-tuning-hyperparameters.qmd +++ b/slides/06-tuning-hyperparameters.qmd @@ -360,7 +360,6 @@ grid %>% ## Spline grid search `r hexes(c("dials", "workflows", "tune"))` ```{r tuning} - set.seed(9) ctrl <- control_grid(save_pred = TRUE, parallel_over = "everything") @@ -629,7 +628,6 @@ Faceted on the expensiveness of preprocessing used. This will take some time to run ⏳ ```{r xgboost-tune} - set.seed(9) xgb_res <- @@ -740,7 +738,6 @@ glm_spline_wflow ## The final fit to the NHL data `r hexes(c("workflows", "tune"))` {.annotation} ```{r final-last-fit} - test_res <- glm_spline_wflow %>% last_fit(split = nhl_split) diff --git a/slides/07-transit-case-study.qmd b/slides/07-transit-case-study.qmd index db48e380..53aea5ae 100644 --- a/slides/07-transit-case-study.qmd +++ b/slides/07-transit-case-study.qmd @@ -406,7 +406,6 @@ cb_spec <- cubist_rules(committees = 25, neighbors = tune()) mars_spec <- mars(prod_degree = tune()) %>% set_mode("regression") lm_spec <- linear_reg() - chi_set <- workflow_set( list(pca = chi_pca_rec, basic = chi_rec), From 130ce99a9825fd223d1f21ac5ee862ca992528fa Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Thu, 18 Aug 2022 11:15:22 +0100 Subject: [PATCH 3/4] fix numbering --- slides/08-wrapping-up.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/slides/08-wrapping-up.qmd b/slides/08-wrapping-up.qmd index 15edef7d..2b0d23ee 100644 --- a/slides/08-wrapping-up.qmd +++ b/slides/08-wrapping-up.qmd @@ -1,5 +1,5 @@ --- -title: "7 - Wrapping up" +title: "8 - Wrapping up" subtitle: "Machine learning with tidymodels" format: revealjs: From 77a1938e8d9a4f2c781f85985a200b0ab46f0ab1 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Fri, 19 Aug 2022 09:51:50 -0400 Subject: [PATCH 4/4] re-render --- docs/search.json | 16 ++--- docs/sitemap.xml | 38 +++++------ docs/slides/01-introduction.html | 6 +- docs/slides/06-tuning-hyperparameters.html | 75 +++++++++++----------- docs/slides/07-transit-case-study.html | 19 +++--- docs/slides/08-wrapping-up.html | 8 +-- 6 files changed, 79 insertions(+), 83 deletions(-) diff --git a/docs/search.json b/docs/search.json index fda5b81d..0a2fc4ee 100644 --- a/docs/search.json +++ b/docs/search.json @@ -130,7 +130,7 @@ "href": "slides/01-introduction.html#what-is-tidymodels", "title": "1 - Introduction", "section": "What is tidymodels? ", - "text": "What is tidymodels? \n\nlibrary(tidymodels)\n#> ── Attaching packages ──────────────────────────── tidymodels 1.0.0 ──\n#> ✔ broom 1.0.0 ✔ rsample 1.1.0\n#> ✔ dials 1.0.0 ✔ tibble 3.1.8\n#> ✔ dplyr 1.0.9 ✔ tidyr 1.2.0\n#> ✔ infer 1.0.2 ✔ tune 1.0.0\n#> ✔ modeldata 1.0.0 ✔ workflows 1.0.0\n#> ✔ parsnip 1.0.0 ✔ workflowsets 1.0.0\n#> ✔ purrr 0.3.4 ✔ yardstick 1.0.0\n#> ✔ recipes 1.0.1\n#> ── Conflicts ─────────────────────────────── tidymodels_conflicts() ──\n#> ✖ purrr::discard() masks scales::discard()\n#> ✖ dplyr::filter() masks stats::filter()\n#> ✖ dplyr::lag() masks stats::lag()\n#> ✖ recipes::step() masks stats::step()\n#> • Use suppressPackageStartupMessages() to eliminate package startup messages" + "text": "What is tidymodels? \n\nlibrary(tidymodels)\n#> ── Attaching packages ──────────────────────────── tidymodels 1.0.0 ──\n#> ✔ broom 1.0.0 ✔ rsample 1.1.0\n#> ✔ dials 1.0.0 ✔ tibble 3.1.8\n#> ✔ dplyr 1.0.9 ✔ tidyr 1.2.0\n#> ✔ infer 1.0.2 ✔ tune 1.0.0\n#> ✔ modeldata 1.0.0 ✔ workflows 1.0.0\n#> ✔ parsnip 1.0.1 ✔ workflowsets 1.0.0\n#> ✔ purrr 0.3.4 ✔ yardstick 1.0.0\n#> ✔ recipes 1.0.1\n#> ── Conflicts ─────────────────────────────── tidymodels_conflicts() ──\n#> ✖ purrr::discard() masks scales::discard()\n#> ✖ dplyr::filter() masks stats::filter()\n#> ✖ dplyr::lag() masks stats::lag()\n#> ✖ recipes::step() masks stats::step()\n#> • Use tidymodels_prefer() to resolve common conflicts." }, { "objectID": "slides/01-introduction.html#the-whole-game", @@ -270,7 +270,7 @@ "href": "slides/01-introduction.html#our-versions", "title": "1 - Introduction", "section": "Our versions", - "text": "Our versions\n\n\n\nbroom (1.0.0, CRAN), DALEX (2.4.2, local), DALEXtra (2.2.0, CRAN), dials (1.0.0, CRAN), doParallel (1.0.17, CRAN), dplyr (1.0.9, CRAN), embed (1.0.0, CRAN), ggplot2 (3.3.6, CRAN), modeldata (1.0.0, CRAN), ongoal (0.0.3, Github (topepo/ongoal@68e6466bb), parsnip (1.0.0, CRAN), purrr (0.3.4, CRAN), ranger (0.14.1, CRAN), recipes (1.0.1, CRAN), rpart (4.1.16, CRAN), rpart.plot (3.1.0, CRAN), rsample (1.1.0, CRAN), scales (1.2.0, CRAN), stacks (1.0.0, CRAN), tibble (3.1.8, CRAN), tidymodels (1.0.0, CRAN), tidyr (1.2.0, CRAN), tune (1.0.0, CRAN), vetiver (0.1.5, CRAN), workflows (1.0.0, CRAN), workflowsets (1.0.0, CRAN), xgboost (1.6.0.1, CRAN), and yardstick (1.0.0, CRAN)\nQuarto: 1.0.38\n\n\nhttps://workshops.tidymodels.org" + "text": "Our versions\n\n\n\nbroom (1.0.0, CRAN), DALEX (2.4.2, local), DALEXtra (2.2.1, CRAN), dials (1.0.0, CRAN), doParallel (1.0.17, CRAN), dplyr (1.0.9, CRAN), embed (1.0.0, CRAN), ggplot2 (3.3.6, CRAN), modeldata (1.0.0, CRAN), ongoal (0.0.3, Github (topepo/ongoal@68e6466bb), parsnip (1.0.1, CRAN), purrr (0.3.4, CRAN), ranger (0.14.1, CRAN), recipes (1.0.1, CRAN), rpart (4.1.16, CRAN), rpart.plot (3.1.1, CRAN), rsample (1.1.0, CRAN), scales (1.2.0, CRAN), stacks (1.0.0, CRAN), tibble (3.1.8, CRAN), tidymodels (1.0.0, CRAN), tidyr (1.2.0, CRAN), tune (1.0.0, CRAN), vetiver (0.1.7, CRAN), workflows (1.0.0, CRAN), workflowsets (1.0.0, CRAN), xgboost (1.6.0.1, CRAN), and yardstick (1.0.0, CRAN)\nQuarto: 1.0.38\n\n\nhttps://workshops.tidymodels.org" }, { "objectID": "slides/02-data-budget.html#data-on-tree-frog-hatching", @@ -1481,7 +1481,7 @@ "href": "slides/06-tuning-hyperparameters.html#spline-grid-search", "title": "6 - Tuning Hyperparameters", "section": "Spline grid search ", - "text": "Spline grid search \n\n\nset.seed(9)\nctrl <- control_grid(save_pred = TRUE, parallel_over = \"everything\")\n\nglm_spline_res <-\n glm_spline_wflow %>%\n tune_grid(resamples = nhl_val, grid = grid, control = ctrl)\n\nglm_spline_res\n#> # Tuning results\n#> # Validation Set Split (0.8/0.2) \n#> # A tibble: 1 × 5\n#> splits id .metrics .notes .predictions \n#> \n#> 1 validation \n#> \n#> There were issues with some computations:\n#> \n#> - Warning(s) x3: prediction from a rank-deficient fit may be misleading\n#> \n#> Run `show_notes(.Last.tune.result)` for more information.\n\n\n\ntune_grid() is representative of tuning function syntax\nsimilar to fit_resamples()" + "text": "Spline grid search \n\nset.seed(9)\nctrl <- control_grid(save_pred = TRUE, parallel_over = \"everything\")\n\nglm_spline_res <-\n glm_spline_wflow %>%\n tune_grid(resamples = nhl_val, grid = grid, control = ctrl)\n\nglm_spline_res\n#> # Tuning results\n#> # Validation Set Split (0.8/0.2) \n#> # A tibble: 1 × 5\n#> splits id .metrics .notes .predictions \n#> \n#> 1 validation \n#> \n#> There were issues with some computations:\n#> \n#> - Warning(s) x3: prediction from a rank-deficient fit may be misleading\n#> \n#> Run `show_notes(.Last.tune.result)` for more information.\n\n\n\ntune_grid() is representative of tuning function syntax\nsimilar to fit_resamples()" }, { "objectID": "slides/06-tuning-hyperparameters.html#your-turn-1", @@ -1593,7 +1593,7 @@ "href": "slides/06-tuning-hyperparameters.html#tuning", "title": "6 - Tuning Hyperparameters", "section": "Tuning ", - "text": "Tuning \nThis will take some time to run ⏳\n\n\nset.seed(9)\n\nxgb_res <-\n xgb_wflow %>%\n tune_grid(resamples = nhl_val, grid = 30, control = ctrl) # automatic grid now!" + "text": "Tuning \nThis will take some time to run ⏳\n\nset.seed(9)\n\nxgb_res <-\n xgb_wflow %>%\n tune_grid(resamples = nhl_val, grid = 30, control = ctrl) # automatic grid now!" }, { "objectID": "slides/06-tuning-hyperparameters.html#your-turn-3", @@ -1649,7 +1649,7 @@ "href": "slides/06-tuning-hyperparameters.html#the-final-fit-to-the-nhl-data", "title": "6 - Tuning Hyperparameters", "section": "The final fit to the NHL data ", - "text": "The final fit to the NHL data \n\n\ntest_res <- \n glm_spline_wflow %>% \n last_fit(split = nhl_split)\n\ntest_res\n#> # Resampling results\n#> # Manual resampling \n#> # A tibble: 1 × 6\n#> splits id .metrics .notes .predictions .workflow \n#> \n#> 1 train/test split \n\n\nRemember that last_fit() fits one time with the combined training and validation set, then evaluates one time with the testing set." + "text": "The final fit to the NHL data \n\ntest_res <- \n glm_spline_wflow %>% \n last_fit(split = nhl_split)\n\ntest_res\n#> # Resampling results\n#> # Manual resampling \n#> # A tibble: 1 × 6\n#> splits id .metrics .notes .predictions .workflow \n#> \n#> 1 train/test split \n\n\nRemember that last_fit() fits one time with the combined training and validation set, then evaluates one time with the testing set." }, { "objectID": "slides/06-tuning-hyperparameters.html#your-turn-4", @@ -1866,7 +1866,7 @@ "href": "slides/07-transit-case-study.html#make-some-models", "title": "7 - Case Study on Transportation", "section": "Make some models ", - "text": "Make some models \nLet’s try three models. The first one requires the rules package (loaded earlier).\n\ncb_spec <- cubist_rules(committees = 25, neighbors = tune())\nmars_spec <- mars(prod_degree = tune()) %>% set_mode(\"regression\")\nlm_spec <- linear_reg()\n\n\nchi_set <- \n workflow_set(\n list(pca = chi_pca_rec, basic = chi_rec), \n list(cubist = cb_spec, mars = mars_spec, lm = lm_spec)\n ) %>% \n # Evaluate models using mean absolute errors\n option_add(metrics = metric_set(mae))\n\n\nBriefly talk about Cubist being a (sort of) boosted rule-based model and MARS being a nonlinear regression model. Both incorporate feature selection nicely." + "text": "Make some models \nLet’s try three models. The first one requires the rules package (loaded earlier).\n\ncb_spec <- cubist_rules(committees = 25, neighbors = tune())\nmars_spec <- mars(prod_degree = tune()) %>% set_mode(\"regression\")\nlm_spec <- linear_reg()\n\nchi_set <- \n workflow_set(\n list(pca = chi_pca_rec, basic = chi_rec), \n list(cubist = cb_spec, mars = mars_spec, lm = lm_spec)\n ) %>% \n # Evaluate models using mean absolute errors\n option_add(metrics = metric_set(mae))\n\n\nBriefly talk about Cubist being a (sort of) boosted rule-based model and MARS being a nonlinear regression model. Both incorporate feature selection nicely." }, { "objectID": "slides/07-transit-case-study.html#process-them-on-the-resamples", @@ -2011,14 +2011,14 @@ { "objectID": "slides/08-wrapping-up.html#your-turn", "href": "slides/08-wrapping-up.html#your-turn", - "title": "7 - Wrapping up", + "title": "8 - Wrapping up", "section": "Your turn", "text": "Your turn\n\nWhat is one thing you learned that surprised you?\nWhat is one thing you learned that you plan to use?\n\n\n\n05:00" }, { "objectID": "slides/08-wrapping-up.html#resources-to-keep-learning", "href": "slides/08-wrapping-up.html#resources-to-keep-learning", - "title": "7 - Wrapping up", + "title": "8 - Wrapping up", "section": "Resources to keep learning", "text": "Resources to keep learning\n\n\nhttps://www.tidymodels.org/\n\n\n\n\nhttps://www.tmwr.org/\n\n\n\n\nhttp://www.feat.engineering/\n\n\n\n\nhttps://smltar.com/\n\n\n\nFollow us on Twitter and at the tidyverse blog for updates!\n\n\nhttps://workshops.tidymodels.org" }, diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 2ca351ef..f78b39f1 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -2,78 +2,78 @@ https://workshops.tidymodels.org/index.html - 2022-08-17T16:35:24.760Z + 2022-08-19T13:47:57.435Z https://workshops.tidymodels.org/slides/01-introduction.html - 2022-08-17T16:35:30.969Z + 2022-08-19T13:48:03.747Z https://workshops.tidymodels.org/slides/02-data-budget.html - 2022-08-17T16:35:35.561Z + 2022-08-19T13:48:08.361Z https://workshops.tidymodels.org/slides/03-what-makes-a-model.html - 2022-08-17T16:35:41.014Z + 2022-08-19T13:48:13.877Z https://workshops.tidymodels.org/slides/04-evaluating-models.html - 2022-08-17T16:35:59.848Z + 2022-08-19T13:48:33.277Z https://workshops.tidymodels.org/slides/05-feature-engineering.html - 2022-08-17T16:36:35.326Z + 2022-08-19T13:49:10.432Z https://workshops.tidymodels.org/slides/06-tuning-hyperparameters.html - 2022-08-17T16:37:54.625Z + 2022-08-19T13:50:32.314Z https://workshops.tidymodels.org/slides/07-transit-case-study.html - 2022-08-17T16:38:24.274Z + 2022-08-19T13:51:03.636Z https://workshops.tidymodels.org/slides/08-wrapping-up.html - 2022-08-17T16:38:25.732Z + 2022-08-19T13:51:05.130Z https://workshops.tidymodels.org/slides/annotations.html - 2022-08-17T16:38:29.751Z + 2022-08-19T13:51:09.325Z https://workshops.tidymodels.org/archive/2022-07-RStudio-conf/01-introduction.html - 2022-08-17T16:38:30.329Z + 2022-08-19T13:51:09.912Z https://workshops.tidymodels.org/archive/2022-07-RStudio-conf/02-data-budget.html - 2022-08-17T16:38:30.851Z + 2022-08-19T13:51:10.455Z https://workshops.tidymodels.org/archive/2022-07-RStudio-conf/03-what-makes-a-model.html - 2022-08-17T16:38:31.490Z + 2022-08-19T13:51:11.100Z https://workshops.tidymodels.org/archive/2022-07-RStudio-conf/04-evaluating-models.html - 2022-08-17T16:38:32.271Z + 2022-08-19T13:51:11.866Z https://workshops.tidymodels.org/archive/2022-07-RStudio-conf/05-feature-engineering.html - 2022-08-17T16:38:32.981Z + 2022-08-19T13:51:12.598Z https://workshops.tidymodels.org/archive/2022-07-RStudio-conf/06-tuning-hyperparameters.html - 2022-08-17T16:38:33.705Z + 2022-08-19T13:51:13.378Z https://workshops.tidymodels.org/archive/2022-07-RStudio-conf/07-wrapping-up.html - 2022-08-17T16:38:33.967Z + 2022-08-19T13:51:13.632Z https://workshops.tidymodels.org/archive/2022-07-RStudio-conf/annotations.html - 2022-08-17T16:38:34.588Z + 2022-08-19T13:51:14.261Z https://workshops.tidymodels.org/archive/2022-07-RStudio-conf/index.html - 2022-08-17T16:38:35.041Z + 2022-08-19T13:51:14.719Z diff --git a/docs/slides/01-introduction.html b/docs/slides/01-introduction.html index a5313187..00e7ec4e 100644 --- a/docs/slides/01-introduction.html +++ b/docs/slides/01-introduction.html @@ -522,7 +522,7 @@

What is tidymodels? #> ✔ dplyr 1.0.9 ✔ tidyr 1.2.0 #> ✔ infer 1.0.2 ✔ tune 1.0.0 #> ✔ modeldata 1.0.0 ✔ workflows 1.0.0 -#> ✔ parsnip 1.0.0 ✔ workflowsets 1.0.0 +#> ✔ parsnip 1.0.1 ✔ workflowsets 1.0.0 #> ✔ purrr 0.3.4 ✔ yardstick 1.0.0 #> ✔ recipes 1.0.1 #> ── Conflicts ─────────────────────────────── tidymodels_conflicts() ── @@ -530,7 +530,7 @@

What is tidymodels? #> ✖ dplyr::filter() masks stats::filter() #> ✖ dplyr::lag() masks stats::lag() #> ✖ recipes::step() masks stats::step() -#> • Use suppressPackageStartupMessages() to eliminate package startup messages +#> • Use tidymodels_prefer() to resolve common conflicts.
@@ -647,7 +647,7 @@

Our versions

-

broom (1.0.0, CRAN), DALEX (2.4.2, local), DALEXtra (2.2.0, CRAN), dials (1.0.0, CRAN), doParallel (1.0.17, CRAN), dplyr (1.0.9, CRAN), embed (1.0.0, CRAN), ggplot2 (3.3.6, CRAN), modeldata (1.0.0, CRAN), ongoal (0.0.3, Github (topepo/ongoal@68e6466bb), parsnip (1.0.0, CRAN), purrr (0.3.4, CRAN), ranger (0.14.1, CRAN), recipes (1.0.1, CRAN), rpart (4.1.16, CRAN), rpart.plot (3.1.0, CRAN), rsample (1.1.0, CRAN), scales (1.2.0, CRAN), stacks (1.0.0, CRAN), tibble (3.1.8, CRAN), tidymodels (1.0.0, CRAN), tidyr (1.2.0, CRAN), tune (1.0.0, CRAN), vetiver (0.1.5, CRAN), workflows (1.0.0, CRAN), workflowsets (1.0.0, CRAN), xgboost (1.6.0.1, CRAN), and yardstick (1.0.0, CRAN)

+

broom (1.0.0, CRAN), DALEX (2.4.2, local), DALEXtra (2.2.1, CRAN), dials (1.0.0, CRAN), doParallel (1.0.17, CRAN), dplyr (1.0.9, CRAN), embed (1.0.0, CRAN), ggplot2 (3.3.6, CRAN), modeldata (1.0.0, CRAN), ongoal (0.0.3, Github (topepo/ongoal@68e6466bb), parsnip (1.0.1, CRAN), purrr (0.3.4, CRAN), ranger (0.14.1, CRAN), recipes (1.0.1, CRAN), rpart (4.1.16, CRAN), rpart.plot (3.1.1, CRAN), rsample (1.1.0, CRAN), scales (1.2.0, CRAN), stacks (1.0.0, CRAN), tibble (3.1.8, CRAN), tidymodels (1.0.0, CRAN), tidyr (1.2.0, CRAN), tune (1.0.0, CRAN), vetiver (0.1.7, CRAN), workflows (1.0.0, CRAN), workflowsets (1.0.0, CRAN), xgboost (1.6.0.1, CRAN), and yardstick (1.0.0, CRAN)

Quarto: 1.0.38

+chi_set <- + workflow_set( + list(pca = chi_pca_rec, basic = chi_rec), + list(cubist = cb_spec, mars = mars_spec, lm = lm_spec) + ) %>% + # Evaluate models using mean absolute errors + option_add(metrics = metric_set(mae))