tidymodels · topepo · Jun 27, 2022 · Jun 15, 2022 · Jun 16, 2022 · Jun 16, 2022
diff --git a/NAMESPACE b/NAMESPACE
@@ -137,6 +137,7 @@ export(C5.0_train)
 export(C5_rules)
 export(add_rowindex)
 export(augment)
+export(auto_ml)
 export(autoplot)
 export(bag_mars)
 export(bag_tree)

diff --git a/R/auto_ml.R b/R/auto_ml.R
@@ -0,0 +1,37 @@
+#' Automatic Machine Learning
+#'
+#' @description
+#'
+#' `auto_ml()` defines an automated searching and tuning process where
+#' many models of different families are trained and ranked given their
+#' performance on the training data.
+#'
+#' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("auto_ml")}
+#'
+#' More information on how \pkg{parsnip} is used for modeling is at
+#' \url{https://www.tidymodels.org/}.
+#'
+#' @param mode A single character string for the prediction outcome mode.
+#'  Possible values for this model are "unknown", "regression", or
+#'  "classification".
+#' @param engine A single character string specifying what computational engine
+#'  to use for fitting.
+#'
+#' @template spec-details
+#'
+#' @template spec-references
+#'
+#' @seealso \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("auto_ml")}
+#' @export
+auto_ml <- function(mode = "unknown", engine = "h2o") {
+  args <- list()
+  out <- list(args = args, eng_args = NULL,
+              mode = mode, method = NULL, engine = engine)
+  class(out) <- make_classes("auto_ml")
+  out
+}
+
+# ------------------------------------------------------------------------------
+set_new_model("auto_ml")
+set_model_mode("auto_ml", "regression")
+set_model_mode("auto_ml", "classification")
diff --git a/R/auto_ml_h2o.R b/R/auto_ml_h2o.R
@@ -0,0 +1,12 @@
+#' Automatic machine learning via h2o
+#'
+#' [h2o::h2o.automl] defines an automated model training process and returns a
+#' leaderboard of models with best performances.
+#'
+#' @includeRmd man/rmd/auto_ml_h2o.md details
+#'
+#' @name details_auto_ml_h2o
+#' @keywords internal
+NULL
+
+# See inst/README-DOCS.md for a description of how these files are processed
diff --git a/R/print.R b/R/print.R
@@ -32,6 +32,7 @@ get_model_desc <- function(cls) {
 
 model_descs <- tibble::tribble(
   ~cls,                   ~desc,
+  "auto_ml",              "Automatic Machine Learning",
   "bag_mars",             "Bagged MARS",
   "bag_tree",             "Bagged Decision Tree",
   "bart",                 "BART",

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -33,6 +33,7 @@ figures:
 reference:
   - title: Models
     contents:
+      - auto_ml
       - bag_mars
       - bag_tree
       - bart

diff --git a/inst/models.tsv b/inst/models.tsv
@@ -1,4 +1,6 @@
 "model"	"mode"	"engine"	"pkg"
+"auto_ml"	"classification"	"h2o"	"agua"
+"auto_ml"	"regression"	"h2o"	"agua"
 "bag_mars"	"classification"	"earth"	"baguette"
 "bag_mars"	"regression"	"earth"	"baguette"
 "bag_tree"	"censored regression"	"rpart"	"censored"

diff --git a/man/auto_ml.Rd b/man/auto_ml.Rd
diff --git a/man/details_auto_ml_h2o.Rd b/man/details_auto_ml_h2o.Rd
diff --git a/man/rmd/auto_ml_h2o.Rmd b/man/rmd/auto_ml_h2o.Rmd
@@ -0,0 +1,47 @@
+```{r, child = "aaa.Rmd", include = FALSE}
+```
+
+`r descr_models("auto_ml", "h2o")`
+
+## Tuning Parameters
+
+This model has no tuning parameters.
+
+Engine arguments of interest 
+
+- `max_runtime_secs` and `max_models`: controls the maximum running time and number of models to build in the automatic process. 
+
+- `exclude_algos` and `include_algos`: a character vector indicating the excluded or included algorithms during model building. To see a full list of supported models, see the details section in [h2o::h2o.automl()].
+
+- `validation`: An integer between 0 and 1 specifying the _proportion_ of training data reserved as validation set. This is used by h2o for performance assessment and potential early stopping. 
+
+## Translation from parsnip to the original package (regression)
+
+[agua::h2o_train_auto()] is a wrapper around [h2o::h2o.automl()]. 
+
+```{r h2o-reg}
+auto_ml() %>%  
+  set_engine("h2o") %>% 
+  set_mode("regression") %>% 
+  translate()
+```
+
+
+## Translation from parsnip to the original package (classification)
+
+```{r h2o-cls}
+auto_ml() %>%  
+  set_engine("h2o") %>% 
+  set_mode("classification") %>% 
+  translate()
+```
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+## Initializing h2o 
+
+```{r child = "template-h2o-init.Rmd"}
+```
diff --git a/man/rmd/auto_ml_h2o.md b/man/rmd/auto_ml_h2o.md
@@ -0,0 +1,73 @@
+
+
+
+For this engine, there are multiple modes: classification and regression
+
+## Tuning Parameters
+
+This model has no tuning parameters.
+
+Engine arguments of interest 
+
+- `max_runtime_secs` and `max_models`: controls the maximum running time and number of models to build in the automatic process. 
+
+- `exclude_algos` and `include_algos`: a character vector indicating the excluded or included algorithms during model building. To see a full list of supported models, see the details section in [h2o::h2o.automl()].
+
+- `validation`: An integer between 0 and 1 specifying the _proportion_ of training data reserved as validation set. This is used by h2o for performance assessment and potential early stopping. 
+
+## Translation from parsnip to the original package (regression)
+
+[agua::h2o_train_auto()] is a wrapper around [h2o::h2o.automl()]. 
+
+
+```r
+auto_ml() %>%  
+  set_engine("h2o") %>% 
+  set_mode("regression") %>% 
+  translate()
+```
+
+```
+## Automatic Machine Learning Model Specification (regression)
+## 
+## Computational engine: h2o 
+## 
+## Model fit template:
+## agua::h2o_train_auto(x = missing_arg(), y = missing_arg(), weights = missing_arg(), 
+##     validation_frame = missing_arg(), verbosity = NULL)
+```
+
+
+## Translation from parsnip to the original package (classification)
+
+
+```r
+auto_ml() %>%  
+  set_engine("h2o") %>% 
+  set_mode("classification") %>% 
+  translate()
+```
+
+```
+## Automatic Machine Learning Model Specification (classification)
+## 
+## Computational engine: h2o 
+## 
+## Model fit template:
+## agua::h2o_train_auto(x = missing_arg(), y = missing_arg(), weights = missing_arg(), 
+##     validation_frame = missing_arg(), verbosity = NULL)
+```
+
+## Preprocessing requirements
+
+
+Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators.
+
+## Initializing h2o 
+
+
+To use the h2o engine with tidymodels, please run `h2o::h2o.init()` first. By default, This connects R to the local h2o server. This needs to be done in every new R session. You can also connect to a remote h2o server with an IP address, for more details see [h2o::h2o.init()]. 
+
+You can control the number of threads in the thread pool used by h2o with the `nthreads` argument. By default, it uses all CPUs on the host. This is different from the usual parallel processing mechanism in tidymodels for tuning, while tidymodels parallelizes over resamples, h2o parallelizes over hyperparameter combinations for a given resample. 
+
+h2o will automatically shut down the local h2o instance started by R when R is terminated. To manually stop the h2o server, run `h2o::h2o.shutdown()`.