From bebc5d56328bdd78e469928fa6f493832e551949 Mon Sep 17 00:00:00 2001
From: "Simon P. Couch" <simonpatrickcouch@gmail.com>
Date: Fri, 19 Jun 2020 15:30:06 -0700
Subject: [PATCH 1/7] initial go at iris -> modeldata::hpc_data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Switching out iris for modeldata::hpc_data—much of the switches so far were global find-and-replace, with some manual bug fixes. Still quite a few to go.
---
 R/descriptors.R                             |  28 ++---
 R/model_object_docs.R                       |  20 ++--
 R/nullmodel.R                               |   1 -
 man/descriptors.Rd                          |  28 ++---
 man/model_spec.Rd                           |  20 ++--
 man/tidy.nullmodel.Rd                       |   1 -
 tests/testthat/helper-objects.R             |   1 +
 tests/testthat/test_adds.R                  |  10 +-
 tests/testthat/test_boost_tree.R            |   6 +-
 tests/testthat/test_boost_tree_spark.R      |  22 ++--
 tests/testthat/test_boost_tree_xgboost.R    |  48 ++++-----
 tests/testthat/test_convert_data.R          |  23 +++--
 tests/testthat/test_decision_tree.R         |   6 +-
 tests/testthat/test_descriptors.R           |  91 +++++++++--------
 tests/testthat/test_failed_models.R         |  15 +--
 tests/testthat/test_fit_interfaces.R        |  19 ++--
 tests/testthat/test_linear_reg.R            | 108 ++++++++++----------
 tests/testthat/test_linear_reg_glmnet.R     |  85 +++++++--------
 tests/testthat/test_linear_reg_keras.R      |  37 +++----
 tests/testthat/test_linear_reg_spark.R      |  16 +--
 tests/testthat/test_linear_reg_stan.R       |  49 ++++-----
 tests/testthat/test_logistic_reg.R          |   3 +-
 tests/testthat/test_logistic_reg_glmnet.R   |   1 +
 tests/testthat/test_logistic_reg_spark.R    |   1 +
 tests/testthat/test_logistic_reg_stan.R     |   1 +
 tests/testthat/test_mars.R                  |  61 +++++------
 tests/testthat/test_misc.R                  |   5 +-
 tests/testthat/test_mlp_keras.R             |  71 ++++++-------
 tests/testthat/test_mlp_nnet.R              |  47 ++++-----
 tests/testthat/test_multinom_reg.R          |   4 +-
 tests/testthat/test_multinom_reg_glmnet.R   |  56 +++++-----
 tests/testthat/test_multinom_reg_keras.R    |  22 ++--
 tests/testthat/test_multinom_reg_nnet.R     |  14 +--
 tests/testthat/test_multinom_reg_spark.R    |  20 ++--
 tests/testthat/test_nearest_neighbor_kknn.R |  77 +++++++-------
 tests/testthat/test_nullmodel.R             |  55 +++++-----
 tests/testthat/test_predict_formats.R       |  17 +--
 tests/testthat/test_rand_forest_ranger.R    |  38 +++----
 tests/testthat/test_rand_forest_spark.R     |  18 ++--
 tests/testthat/test_svm_liquidsvm.R         |  35 ++++---
 tests/testthat/test_svm_poly.R              |  50 ++++-----
 tests/testthat/test_svm_rbf.R               |  50 ++++-----
 42 files changed, 665 insertions(+), 615 deletions(-)
diff --git a/R/descriptors.R b/R/descriptors.R
index 310409735..dc64f0737 100644
--- a/R/descriptors.R
+++ b/R/descriptors.R
@@ -26,28 +26,28 @@
 #'   column, `..y`.
 #'   }
 #'
-#' For example, if you use the model formula `Sepal.Width ~ .` with the `iris`
+#' For example, if you use the model formula `mpg ~ .` with the `mtcars`
 #'  data, the values would be
 #' \preformatted{
-#'  .preds() =   4          (the 4 columns in `iris`)
-#'  .cols()  =   5          (3 numeric columns + 2 from Species dummy variables)
-#'  .obs()   = 150
+#'  .preds() =   10          (the 10 columns in `mtcars`)
+#'  .cols()  =   10          (10 numeric columns + 0 from dummy variables)
+#'  .obs()   = 32
 #'  .lvls()  =  NA          (no factor outcome)
-#'  .facts() =   1          (the Species predictor)
-#'  .y()     = <vector>     (Sepal.Width as a vector)
-#'  .x()     = <data.frame> (The other 4 columns as a data frame)
+#'  .facts() =   0          (no factor outcome)
+#'  .y()     = <vector>     (mpg as a vector)
+#'  .x()     = <data.frame> (The other 10 columns as a data frame)
 #'  .dat()   = <data.frame> (The full data set)
 #' }
 #'
-#' If the formula `Species ~ .` where used:
+#' If the formula `as.character(cyl) ~ .` where used:
 #' \preformatted{
-#'  .preds() =   4          (the 4 numeric columns in `iris`)
-#'  .cols()  =   4          (same)
-#'  .obs()   = 150
-#'  .lvls()  =  c(setosa = 50, versicolor = 50, virginica = 50)
+#'  .preds() =   10          (the 10 numeric columns in `mtcars`)
+#'  .cols()  =   10          (same)
+#'  .obs()   = 32
+#'  .lvls()  =  c("4" = 11, "6" = 7, "8" = 14)
 #'  .facts() =   0
-#'  .y()     = <vector>     (Species as a vector)
-#'  .x()     = <data.frame> (The other 4 columns as a data frame)
+#'  .y()     = <vector>     (as.character(cyl) as a vector)
+#'  .x()     = <data.frame> (The other 10 columns as a data frame)
 #'  .dat()   = <data.frame> (The full data set)
 #' }
 #'
diff --git a/R/model_object_docs.R b/R/model_object_docs.R
index af46bc0e8..f7006a922 100644
--- a/R/model_object_docs.R
+++ b/R/model_object_docs.R
@@ -53,18 +53,18 @@
 #' `parsnip` model functions do not do this. For example, using
 #'
 #'\preformatted{
-#'  rand_forest(mtry = ncol(iris) - 1)
+#'  rand_forest(mtry = ncol(mtcars) - 1)
 #' }
 #'
-#' **does not** execute `ncol(iris) - 1` when creating the specification.
+#' **does not** execute `ncol(mtcars) - 1` when creating the specification.
 #' This can be seen in the output:
 #'
 #'\preformatted{
-#'  > rand_forest(mtry = ncol(iris) - 1)
+#'  > rand_forest(mtry = ncol(mtcars) - 1)
 #'  Random Forest Model Specification (unknown)
 #'
 #'  Main Arguments:
-#'    mtry = ncol(iris) - 1
+#'    mtry = ncol(mtcars) - 1
 #'}
 #'
 #' The model functions save the argument _expressions_ and their
@@ -102,14 +102,14 @@
 #'  object is small. For example, using
 #'
 #'\preformatted{
-#'  rand_forest(mtry = ncol(!!iris) - 1)
+#'  rand_forest(mtry = ncol(!!mtcars) - 1)
 #' }
 #'
 #' would work (and be reproducible between sessions) but embeds
-#' the entire iris data set into the `mtry` expression:
+#' the entire mtcars data set into the `mtry` expression:
 #'
 #'\preformatted{
-#'  > rand_forest(mtry = ncol(!!iris) - 1)
+#'  > rand_forest(mtry = ncol(!!mtcars) - 1)
 #'  Random Forest Model Specification (unknown)
 #'
 #'  Main Arguments:
@@ -120,14 +120,14 @@
 #'  it, this wouldn't be too bad:
 #'
 #'\preformatted{
-#'  > mtry_val <- ncol(iris) - 1
+#'  > mtry_val <- ncol(mtcars) - 1
 #'  > mtry_val
-#'  [1] 4
+#'  [1] 10
 #'  > rand_forest(mtry = !!mtry_val)
 #'  Random Forest Model Specification (unknown)
 #'
 #'  Main Arguments:
-#'    mtry = 4
+#'    mtry = 10
 #'}
 #'
 #' More information on quosures and quasiquotation can be found at
diff --git a/R/nullmodel.R b/R/nullmodel.R
index 336e2356b..fbb557cd8 100644
--- a/R/nullmodel.R
+++ b/R/nullmodel.R
@@ -182,7 +182,6 @@ null_model <-
 #' @return A tibble with column `value`.
 #' @export
 #' @examples
-#' nullmodel(iris[,-5], iris$Species) %>% tidy()
 #'
 #' nullmodel(mtcars[,-1], mtcars$mpg) %>% tidy()
 
diff --git a/man/descriptors.Rd b/man/descriptors.Rd
index 22f44e5c2..b53ec6459 100644
--- a/man/descriptors.Rd
+++ b/man/descriptors.Rd
@@ -55,28 +55,28 @@ outcomes. If \code{fit_xy()} was used, the outcomes are attached as the
 column, \code{..y}.
 }
 
-For example, if you use the model formula \code{Sepal.Width ~ .} with the \code{iris}
+For example, if you use the model formula \code{mpg ~ .} with the \code{mtcars}
 data, the values would be
 \preformatted{
- .preds() =   4          (the 4 columns in `iris`)
- .cols()  =   5          (3 numeric columns + 2 from Species dummy variables)
- .obs()   = 150
+ .preds() =   10          (the 10 columns in `mtcars`)
+ .cols()  =   10          (10 numeric columns + 0 from dummy variables)
+ .obs()   = 32
  .lvls()  =  NA          (no factor outcome)
- .facts() =   1          (the Species predictor)
- .y()     = <vector>     (Sepal.Width as a vector)
- .x()     = <data.frame> (The other 4 columns as a data frame)
+ .facts() =   0          (no factor outcome)
+ .y()     = <vector>     (mpg as a vector)
+ .x()     = <data.frame> (The other 10 columns as a data frame)
  .dat()   = <data.frame> (The full data set)
 }
 
-If the formula \code{Species ~ .} where used:
+If the formula \code{as.character(cyl) ~ .} where used:
 \preformatted{
- .preds() =   4          (the 4 numeric columns in `iris`)
- .cols()  =   4          (same)
- .obs()   = 150
- .lvls()  =  c(setosa = 50, versicolor = 50, virginica = 50)
+ .preds() =   10          (the 10 numeric columns in `mtcars`)
+ .cols()  =   10          (same)
+ .obs()   = 32
+ .lvls()  =  c("4" = 11, "6" = 7, "8" = 14)
  .facts() =   0
- .y()     = <vector>     (Species as a vector)
- .x()     = <data.frame> (The other 4 columns as a data frame)
+ .y()     = <vector>     (as.character(cyl) as a vector)
+ .x()     = <data.frame> (The other 10 columns as a data frame)
  .dat()   = <data.frame> (The full data set)
 }
 
diff --git a/man/model_spec.Rd b/man/model_spec.Rd
index 54e19ac94..f61c9aba0 100644
--- a/man/model_spec.Rd
+++ b/man/model_spec.Rd
@@ -57,18 +57,18 @@ arguments. For example, when calling \code{mean(dat_vec)}, the object
 \code{parsnip} model functions do not do this. For example, using
 
 \preformatted{
- rand_forest(mtry = ncol(iris) - 1)
+ rand_forest(mtry = ncol(mtcars) - 1)
 }
 
-\strong{does not} execute \code{ncol(iris) - 1} when creating the specification.
+\strong{does not} execute \code{ncol(mtcars) - 1} when creating the specification.
 This can be seen in the output:
 
 \preformatted{
- > rand_forest(mtry = ncol(iris) - 1)
+ > rand_forest(mtry = ncol(mtcars) - 1)
  Random Forest Model Specification (unknown)
 
  Main Arguments:
-   mtry = ncol(iris) - 1
+   mtry = ncol(mtcars) - 1
 }
 
 The model functions save the argument \emph{expressions} and their
@@ -106,14 +106,14 @@ model specification and might be the best idea when the data
 object is small. For example, using
 
 \preformatted{
- rand_forest(mtry = ncol(!!iris) - 1)
+ rand_forest(mtry = ncol(!!mtcars) - 1)
 }
 
 would work (and be reproducible between sessions) but embeds
-the entire iris data set into the \code{mtry} expression:
+the entire mtcars data set into the \code{mtry} expression:
 
 \preformatted{
- > rand_forest(mtry = ncol(!!iris) - 1)
+ > rand_forest(mtry = ncol(!!mtcars) - 1)
  Random Forest Model Specification (unknown)
 
  Main Arguments:
@@ -124,14 +124,14 @@ However, if there were an object with the number of columns in
 it, this wouldn't be too bad:
 
 \preformatted{
- > mtry_val <- ncol(iris) - 1
+ > mtry_val <- ncol(mtcars) - 1
  > mtry_val
- [1] 4
+ [1] 10
  > rand_forest(mtry = !!mtry_val)
  Random Forest Model Specification (unknown)
 
  Main Arguments:
-   mtry = 4
+   mtry = 10
 }
 
 More information on quosures and quasiquotation can be found at
diff --git a/man/tidy.nullmodel.Rd b/man/tidy.nullmodel.Rd
index 6ff19e9da..c48a073e9 100644
--- a/man/tidy.nullmodel.Rd
+++ b/man/tidy.nullmodel.Rd
@@ -18,7 +18,6 @@ A tibble with column \code{value}.
 Return the results of \code{nullmodel} as a tibble
 }
 \examples{
-nullmodel(iris[,-5], iris$Species) \%>\% tidy()
 
 nullmodel(mtcars[,-1], mtcars$mpg) \%>\% tidy()
 }
diff --git a/tests/testthat/helper-objects.R b/tests/testthat/helper-objects.R
index 5c9a41c07..70eb0fb2e 100644
--- a/tests/testthat/helper-objects.R
+++ b/tests/testthat/helper-objects.R
@@ -2,6 +2,7 @@ library(modeldata)
 
 data("wa_churn")
 data("lending_club")
+data("hpc_data")
 
 # ------------------------------------------------------------------------------
 
diff --git a/tests/testthat/test_adds.R b/tests/testthat/test_adds.R
index e693cc973..086defaba 100644
--- a/tests/testthat/test_adds.R
+++ b/tests/testthat/test_adds.R
@@ -4,14 +4,16 @@ library(dplyr)
 
 context("adding functions")
 source("helpers.R")
+source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 # ------------------------------------------------------------------------------
 
 test_that('adding row indicies', {
-  iris_2 <- iris %>% add_rowindex()
-  expect_true(nrow(iris_2) == 150)
-  expect_true(sum(names(iris_2) == ".row") == 1)
-  expect_true(is.integer(iris_2$.row))
+  hpc_2 <- hpc %>% add_rowindex()
+  expect_true(nrow(hpc_2) == 150)
+  expect_true(sum(names(hpc_2) == ".row") == 1)
+  expect_true(is.integer(hpc_2$.row))
 
   mtcar_2 <- dplyr::as_tibble(mtcars) %>% dplyr::slice(0) %>% add_rowindex()
   expect_true(nrow(mtcar_2) == 0)
diff --git a/tests/testthat/test_boost_tree.R b/tests/testthat/test_boost_tree.R
index f31adca8c..724de2831 100644
--- a/tests/testthat/test_boost_tree.R
+++ b/tests/testthat/test_boost_tree.R
@@ -6,6 +6,8 @@ library(rlang)
 
 context("boosted trees")
 source("helpers.R")
+source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 # ------------------------------------------------------------------------------
 
@@ -134,11 +136,11 @@ test_that('bad input', {
   expect_error(boost_tree(mode = "bogus"))
   expect_error({
     bt <- boost_tree(trees = -1) %>% set_engine("xgboost")
-    fit(bt, Species ~ ., iris)
+    fit(bt, class ~ ., hpc)
   })
   expect_error({
     bt <- boost_tree(min_n = -10)  %>% set_engine("xgboost")
-    fit(bt, Species ~ ., iris)
+    fit(bt, class ~ ., hpc)
   })
   expect_message(translate(boost_tree(mode = "classification"), engine = NULL))
   expect_error(translate(boost_tree(formula = y ~ x)))
diff --git a/tests/testthat/test_boost_tree_spark.R b/tests/testthat/test_boost_tree_spark.R
index 45b050031..647cfca98 100644
--- a/tests/testthat/test_boost_tree_spark.R
+++ b/tests/testthat/test_boost_tree_spark.R
@@ -6,21 +6,21 @@ library(dplyr)
 
 context("boosted tree execution with spark")
 source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 # ------------------------------------------------------------------------------
 
 test_that('spark execution', {
 
   skip_if_not_installed("sparklyr")
-
   library(sparklyr)
 
   sc <- try(spark_connect(master = "local"), silent = TRUE)
 
   skip_if(inherits(sc, "try-error"))
 
-  iris_bt_tr <- copy_to(sc, iris[-(1:4),   ], "iris_bt_tr", overwrite = TRUE)
-  iris_bt_te <- copy_to(sc, iris[  1:4 , -1], "iris_bt_te", overwrite = TRUE)
+  hpc_bt_tr <- copy_to(sc, hpc[-(1:4),   ], "hpc_bt_tr", overwrite = TRUE)
+  hpc_bt_te <- copy_to(sc, hpc[  1:4 , -1], "hpc_bt_te", overwrite = TRUE)
 
   # ----------------------------------------------------------------------------
 
@@ -30,8 +30,8 @@ test_that('spark execution', {
         boost_tree(trees = 5, mode = "regression") %>%
           set_engine("spark", seed = 12),
         control = ctrl,
-        Sepal_Length ~ .,
-        data = iris_bt_tr
+        class ~ .,
+        data = hpc_bt_tr
       ),
     regexp = NA
   )
@@ -43,29 +43,29 @@ test_that('spark execution', {
         boost_tree(trees = 5, mode = "regression") %>%
           set_engine("spark", seed = 12),
         control = ctrl,
-        Sepal_Length ~ .,
-        data = iris_bt_tr
+        compounds ~ .,
+        data = hpc_bt_tr
       ),
     regexp = NA
   )
 
   expect_error(
-    spark_reg_pred <- predict(spark_reg_fit, iris_bt_te),
+    spark_reg_pred <- predict(spark_reg_fit, hpc_bt_te),
     regexp = NA
   )
 
   expect_error(
-    spark_reg_pred_num <- parsnip:::predict_numeric.model_fit(spark_reg_fit, iris_bt_te),
+    spark_reg_pred_num <- parsnip:::predict_numeric.model_fit(spark_reg_fit, hpc_bt_te),
     regexp = NA
   )
 
   expect_error(
-    spark_reg_dup <- predict(spark_reg_fit_dup, iris_bt_te),
+    spark_reg_dup <- predict(spark_reg_fit_dup, hpc_bt_te),
     regexp = NA
   )
 
   expect_error(
-    spark_reg_num_dup <- parsnip:::predict_numeric.model_fit(spark_reg_fit_dup, iris_bt_te),
+    spark_reg_num_dup <- parsnip:::predict_numeric.model_fit(spark_reg_fit_dup, hpc_bt_te),
     regexp = NA
   )
 
diff --git a/tests/testthat/test_boost_tree_xgboost.R b/tests/testthat/test_boost_tree_xgboost.R
index 8c70532b5..d181843ed 100644
--- a/tests/testthat/test_boost_tree_xgboost.R
+++ b/tests/testthat/test_boost_tree_xgboost.R
@@ -5,11 +5,11 @@ library(parsnip)
 
 context("boosted tree execution with xgboost")
 source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
+num_pred <- names(hpc)[1:4]
 
-num_pred <- names(iris)[1:4]
-
-iris_xgboost <-
+hpc_xgboost <-
   boost_tree(trees = 2, mode = "classification") %>%
   set_engine("xgboost")
 
@@ -21,18 +21,18 @@ test_that('xgboost execution, classification', {
 
   expect_error(
     res <- parsnip::fit(
-      iris_xgboost,
-      Species ~ Sepal.Width + Sepal.Length,
-      data = iris,
+      hpc_xgboost,
+      class ~ compounds + input_fields,
+      data = hpc,
       control = ctrl
     ),
     regexp = NA
   )
   expect_error(
     res <- parsnip::fit_xy(
-      iris_xgboost,
-      x = iris[, num_pred],
-      y = iris$Species,
+      hpc_xgboost,
+      x = hpc[, num_pred],
+      y = hpc$class,
       control = ctrl
     ),
     regexp = NA
@@ -43,9 +43,9 @@ test_that('xgboost execution, classification', {
 
   expect_error(
     res <- parsnip::fit(
-      iris_xgboost,
-      Species ~ novar,
-      data = iris,
+      hpc_xgboost,
+      class ~ novar,
+      data = hpc,
       control = ctrl
     )
   )
@@ -58,28 +58,28 @@ test_that('xgboost classification prediction', {
 
   library(xgboost)
   xy_fit <- fit_xy(
-    iris_xgboost,
-    x = iris[, num_pred],
-    y = iris$Species,
+    hpc_xgboost,
+    x = hpc[, num_pred],
+    y = hpc$class,
     control = ctrl
   )
 
-  xy_pred <- predict(xy_fit$fit, newdata = xgb.DMatrix(data = as.matrix(iris[1:8, num_pred])), type = "class")
+  xy_pred <- predict(xy_fit$fit, newdata = xgb.DMatrix(data = as.matrix(hpc[1:8, num_pred])), type = "class")
   xy_pred <- matrix(xy_pred, ncol = 3, byrow = TRUE)
-  xy_pred <- factor(levels(iris$Species)[apply(xy_pred, 1, which.max)], levels = levels(iris$Species))
-  expect_equal(xy_pred, predict(xy_fit, new_data = iris[1:8, num_pred], type = "class")$.pred_class)
+  xy_pred <- factor(levels(hpc$class)[apply(xy_pred, 1, which.max)], levels = levels(hpc$class))
+  expect_equal(xy_pred, predict(xy_fit, new_data = hpc[1:8, num_pred], type = "class")$.pred_class)
 
   form_fit <- fit(
-    iris_xgboost,
-    Species ~ .,
-    data = iris,
+    hpc_xgboost,
+    class ~ .,
+    data = hpc,
     control = ctrl
   )
 
-  form_pred <- predict(form_fit$fit, newdata = xgb.DMatrix(data = as.matrix(iris[1:8, num_pred])), type = "class")
+  form_pred <- predict(form_fit$fit, newdata = xgb.DMatrix(data = as.matrix(hpc[1:8, num_pred])), type = "class")
   form_pred <- matrix(form_pred, ncol = 3, byrow = TRUE)
-  form_pred <- factor(levels(iris$Species)[apply(form_pred, 1, which.max)], levels = levels(iris$Species))
-  expect_equal(form_pred, predict(form_fit, new_data = iris[1:8, num_pred], type = "class")$.pred_class)
+  form_pred <- factor(levels(hpc$class)[apply(form_pred, 1, which.max)], levels = levels(hpc$class))
+  expect_equal(form_pred, predict(form_fit, new_data = hpc[1:8, num_pred], type = "class")$.pred_class)
 })
 
 
diff --git a/tests/testthat/test_convert_data.R b/tests/testthat/test_convert_data.R
index c58364be2..48c0cf8fa 100644
--- a/tests/testthat/test_convert_data.R
+++ b/tests/testthat/test_convert_data.R
@@ -2,6 +2,9 @@ library(testthat)
 context("data conversion")
 library(parsnip)
 
+source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
+
 
 # to go from lm_object$x results to our format
 format_x_for_test <- function(x, df = TRUE) {
@@ -260,12 +263,12 @@ test_that("numeric x and numeric multivariate y", {
 test_that("numeric x and factor y", {
   expected <-
     expect_warning(
-      glm(Species ~ ., data = iris, x = TRUE, y = TRUE, family = binomial()
+      glm(class ~ ., data = hpc, x = TRUE, y = TRUE, family = binomial()
       )
     )
-  observed <- parsnip:::convert_form_to_xy_fit(Species ~ ., data = iris)
+  observed <- parsnip:::convert_form_to_xy_fit(class ~ ., data = hpc)
   expect_equal(format_x_for_test(expected$x), observed$x)
-  expect_equivalent(iris$Species, observed$y)
+  expect_equivalent(hpc$class, observed$y)
   expect_equal(expected$terms, observed$terms)
   expect_equal(expected$xlevels, observed$xlevels)
   expect_null(observed$weights)
@@ -273,7 +276,7 @@ test_that("numeric x and factor y", {
 
   expect_equal(
     head(format_x_for_test(expected$x)),
-    parsnip:::convert_form_to_xy_new(observed, new_data = head(iris))$x
+    parsnip:::convert_form_to_xy_new(observed, new_data = head(hpc))$x
   )
 
 })
@@ -445,22 +448,22 @@ test_that("1 col matrix x, 1 col matrix y", {
 
 
 test_that("matrix x, factor y", {
-  observed <- parsnip:::convert_xy_to_form_fit(as.matrix(iris[, -5]), iris$Species)
-  expected <- iris
+  observed <- parsnip:::convert_xy_to_form_fit(as.matrix(hpc[, -5]), hpc$class)
+  expected <- hpc
   names(expected)[5] <- "..y"
   expect_equal(expected, observed$data)
   expect_equal(formula("..y ~ ."), observed$formula)
-  expect_equal(names(iris)[-5], observed$x_var)
+  expect_equal(names(hpc)[-5], observed$x_var)
   expect_null(observed$weights)
 })
 
 test_that("data frame x, factor y", {
-  observed <- parsnip:::convert_xy_to_form_fit(iris[, -5], iris$Species)
-  expected <- iris
+  observed <- parsnip:::convert_xy_to_form_fit(hpc[, -5], hpc$class)
+  expected <- hpc
   names(expected)[5] <- "..y"
   expect_equivalent(expected, observed$data)
   expect_equal(formula("..y ~ ."), observed$formula)
-  expect_equal(names(iris)[-5], observed$x_var)
+  expect_equal(names(hpc)[-5], observed$x_var)
   expect_null(observed$weights)
 })
 
diff --git a/tests/testthat/test_decision_tree.R b/tests/testthat/test_decision_tree.R
index 87cbe1864..18720e7af 100644
--- a/tests/testthat/test_decision_tree.R
+++ b/tests/testthat/test_decision_tree.R
@@ -6,6 +6,8 @@ library(rlang)
 
 context("decision trees")
 source("helpers.R")
+source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 # ------------------------------------------------------------------------------
 
@@ -124,11 +126,11 @@ test_that('bad input', {
   expect_error(decision_tree(mode = "bogus"))
   expect_error({
     bt <- decision_tree(cost_complexity = -1) %>% set_engine("rpart")
-    fit(bt, Species ~ ., iris)
+    fit(bt, class ~ ., hpc)
   })
   expect_error({
     bt <- decision_tree(min_n = 0)  %>% set_engine("rpart")
-    fit(bt, Species ~ ., iris)
+    fit(bt, class ~ ., hpc)
   })
   expect_error(translate(decision_tree(), engine = NULL))
   expect_error(translate(decision_tree(formula = y ~ x)))
diff --git a/tests/testthat/test_descriptors.R b/tests/testthat/test_descriptors.R
index fc74fb606..34390a989 100644
--- a/tests/testthat/test_descriptors.R
+++ b/tests/testthat/test_descriptors.R
@@ -1,6 +1,9 @@
 library(testthat)
 library(parsnip)
 
+source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
+
 # ------------------------------------------------------------------------------
 
 context("descriptor variables")
@@ -26,7 +29,7 @@ eval_descrs <- function(descrs, not = NULL) {
   lapply(descrs, do.call, list())
 }
 
-species_tab <- table(iris$Species, dnn = NULL)
+class_tab <- table(hpc_data$class, dnn = NULL)
 
 # ------------------------------------------------------------------------------
 
@@ -83,39 +86,39 @@ context("Testing formula -> xy conversion")
 
 test_that("numeric y and dummy vars", {
   expect_equal(
-    template(5, 4, 150, NA, 1, iris, iris[-2], iris[,"Sepal.Width"]),
-    eval_descrs(get_descr_form(Sepal.Width ~ ., data = iris))
+    template(5, 4, 150, NA, 1, hpc, hpc[-2], hpc[,"compounds"]),
+    eval_descrs(get_descr_form(compounds ~ ., data = hpc))
   )
   expect_equal(
-    template(2, 1, 150, NA, 1, iris, iris["Species"], iris[,"Sepal.Width"]),
-    eval_descrs(get_descr_form(Sepal.Width ~ Species, data = iris))
+    template(2, 1, 150, NA, 1, hpc, hpc["class"], hpc[,"compounds"]),
+    eval_descrs(get_descr_form(compounds ~ class, data = hpc))
   )
 })
 
 test_that("numeric y and x", {
   expect_equal(
-    template(1, 1, 150, NA, 0, iris, iris["Sepal.Length"], iris[,"Sepal.Width"]),
-    eval_descrs(get_descr_form(Sepal.Width ~ Sepal.Length, data = iris))
+    template(1, 1, 150, NA, 0, hpc, hpc["input_fields"], hpc[,"compounds"]),
+    eval_descrs(get_descr_form(compounds ~ input_fields, data = hpc))
   )
   expect_equal(
     {
-      log_sep <- iris["Sepal.Length"]
-      log_sep[["Sepal.Length"]] <- log(log_sep[["Sepal.Length"]])
-      names(log_sep) <- "log(Sepal.Length)"
-      template(1, 1, 150, NA, 0, iris, log_sep, iris[,"Sepal.Width"])
+      log_sep <- hpc["input_fields"]
+      log_sep[["input_fields"]] <- log(log_sep[["input_fields"]])
+      names(log_sep) <- "log(input_fields)"
+      template(1, 1, 150, NA, 0, hpc, log_sep, hpc[,"compounds"])
     },
-    eval_descrs(get_descr_form(Sepal.Width ~ log(Sepal.Length), data = iris))
+    eval_descrs(get_descr_form(compounds ~ log(input_fields), data = hpc))
   )
 })
 
 test_that("factor y", {
   expect_equal(
-    template(4, 4, 150, species_tab, 0, iris, iris[-5], iris[,"Species"]),
-    eval_descrs(get_descr_form(Species ~ ., data = iris))
+    template(4, 4, 150, class_tab, 0, hpc, hpc[-5], hpc[,"class"]),
+    eval_descrs(get_descr_form(class ~ ., data = hpc))
   )
   expect_equal(
-    template(1, 1, 150, species_tab, 0, iris, iris["Sepal.Length"], iris[,"Species"]),
-    eval_descrs(get_descr_form(Species ~ Sepal.Length, data = iris))
+    template(1, 1, 150, class_tab, 0, hpc, hpc["input_fields"], hpc[,"class"]),
+    eval_descrs(get_descr_form(class ~ input_fields, data = hpc))
   )
 })
 
@@ -128,26 +131,26 @@ test_that("factors all the way down", {
 })
 
 test_that("weird cases", {
-  # So model.frame ignores - signs in a model formula so Species is not removed
+  # So model.frame ignores - signs in a model formula so class is not removed
   # prior to model.matrix; otherwise this should have n_cols = 3
   expect_equal(
-    template(3, 4, 150, NA, 1, iris, iris[-2], iris[,"Sepal.Width"]),
-    eval_descrs(get_descr_form(Sepal.Width ~ . - Species, data = iris))
+    template(3, 4, 150, NA, 1, hpc, hpc[-2], hpc[,"compounds"]),
+    eval_descrs(get_descr_form(compounds ~ . - class, data = hpc))
   )
 
   # Oy ve! Before going to model.matrix, model.frame produces a data frame
   # with one column and that column is a matrix (with the results from
-  # `poly(Sepal.Length, 3)`
-  x <- model.frame(~poly(Sepal.Length, 3), iris)
+  # `poly(input_fields, 3)`
+  x <- model.frame(~poly(input_fields, 3), hpc)
   attributes(x) <- attributes(as.data.frame(x))[c("names", "class", "row.names")]
   expect_equal(
-    template(3, 1, 150, NA, 0, iris, x, iris[,"Sepal.Width"]),
-    eval_descrs(get_descr_form(Sepal.Width ~ poly(Sepal.Length, 3), data = iris))
+    template(3, 1, 150, NA, 0, hpc, x, hpc[,"compounds"]),
+    eval_descrs(get_descr_form(compounds ~ poly(input_fields, 3), data = hpc))
   )
 
   expect_equal(
-    template(0, 0, 150, NA, 0, iris, iris[,numeric()], iris[,"Sepal.Width"]),
-    eval_descrs(get_descr_form(Sepal.Width ~ 1, data = iris))
+    template(0, 0, 150, NA, 0, hpc, hpc[,numeric()], hpc[,"compounds"]),
+    eval_descrs(get_descr_form(compounds ~ 1, data = hpc))
   )
 })
 
@@ -156,24 +159,24 @@ test_that("weird cases", {
 context("Testing xy -> formula conversion")
 
 test_that("numeric y and dummy vars", {
-  iris2 <- dplyr::rename(iris, ..y = Species)
-  rownames(iris2) <- rownames(iris2) # convert to char
+  hpc2 <- dplyr::rename(hpc, ..y = class)
+  rownames(hpc2) <- rownames(hpc2) # convert to char
   expect_equal(
-    template(4, 4, 150, species_tab, 0, iris2, iris[, 1:4], iris$Species),
-    eval_descrs(get_descr_xy(x = iris[, 1:4], y = iris$Species))
+    template(4, 4, 150, class_tab, 0, hpc2, hpc[, 1:4], hpc$class),
+    eval_descrs(get_descr_xy(x = hpc[, 1:4], y = hpc$class))
   )
 
-  iris2 <- iris[,c(4,5,1,2)]
-  rownames(iris2) <- rownames(iris2)
+  hpc2 <- hpc[,c(4,5,1,2)]
+  rownames(hpc2) <- rownames(hpc2)
   expect_equal(
-    template(2, 2, 150, NA, 1, iris2, iris[,4:5], iris[,1:2]),
-    eval_descrs(get_descr_xy(x = iris[, 4:5], y = iris[, 1:2]))
+    template(2, 2, 150, NA, 1, hpc2, hpc[,4:5], hpc[,1:2]),
+    eval_descrs(get_descr_xy(x = hpc[, 4:5], y = hpc[, 1:2]))
   )
 
-  iris3 <- iris2[,c("Petal.Width", "Species", "Sepal.Length")]
+  hpc3 <- hpc2[,c("iterations", "class", "input_fields")]
   expect_equal(
-    template(2, 2, 150, NA, 1, iris3, iris[, 4:5], iris[, 1, drop = FALSE]),
-    eval_descrs(get_descr_xy(x = iris[, 4:5], y = iris[, 1, drop = FALSE]))
+    template(2, 2, 150, NA, 1, hpc3, hpc[, 4:5], hpc[, 1, drop = FALSE]),
+    eval_descrs(get_descr_xy(x = hpc[, 4:5], y = hpc[, 1, drop = FALSE]))
   )
 })
 
@@ -193,7 +196,7 @@ test_that("spark descriptor", {
   skip_if(inherits(sc, "try-error"))
 
   npk_descr  <- copy_to(sc,  npk[, 1:4],  "npk_descr", overwrite = TRUE)
-  iris_descr <- copy_to(sc,        iris, "iris_descr", overwrite = TRUE)
+  hpc_descr <- copy_to(sc,        hpc, "hpc_descr", overwrite = TRUE)
 
   # spark does not allow .x, .y, .dat
   template2 <- purrr::partial(template, x = NULL, y = NULL, dat = NULL)
@@ -201,23 +204,23 @@ test_that("spark descriptor", {
 
   expect_equal(
     template2(5, 4, 150, NA, 1),
-    eval_descrs2(get_descr_form(Sepal_Width ~ ., data = iris_descr))
+    eval_descrs2(get_descr_form(Sepal_Width ~ ., data = hpc_descr))
   )
   expect_equal(
     template2(2, 1, 150, NA, 1),
-    eval_descrs2(get_descr_form(Sepal_Width ~ Species, data = iris_descr))
+    eval_descrs2(get_descr_form(Sepal_Width ~ class, data = hpc_descr))
   )
   expect_equal(
     template2(1, 1, 150, NA, 0),
-    eval_descrs2(get_descr_form(Sepal_Width ~ Sepal_Length, data = iris_descr))
+    eval_descrs2(get_descr_form(Sepal_Width ~ Sepal_Length, data = hpc_descr))
   )
   expect_equivalent(
-    template2(4, 4, 150, species_tab, 0),
-    eval_descrs2(get_descr_form(Species ~ ., data = iris_descr))
+    template2(4, 4, 150, class_tab, 0),
+    eval_descrs2(get_descr_form(class ~ ., data = hpc_descr))
   )
   expect_equal(
-    template2(1, 1, 150, species_tab, 0),
-    eval_descrs2(get_descr_form(Species ~ Sepal_Length, data = iris_descr))
+    template2(1, 1, 150, class_tab, 0),
+    eval_descrs2(get_descr_form(class ~ Sepal_Length, data = hpc_descr))
   )
   expect_equivalent(
     template2(7, 3, 24, rev(table(npk$K, dnn = NULL)), 3),
diff --git a/tests/testthat/test_failed_models.R b/tests/testthat/test_failed_models.R
index dae099ff2..4ab1510c0 100644
--- a/tests/testthat/test_failed_models.R
+++ b/tests/testthat/test_failed_models.R
@@ -3,14 +3,17 @@ library(parsnip)
 library(dplyr)
 library(rlang)
 
+source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
+
 # ------------------------------------------------------------------------------
 
 context("prediciton with failed models")
 
 # ------------------------------------------------------------------------------
 
-iris_bad <-
-  iris %>%
+hpc_bad <-
+  hpc %>%
   mutate(big_num = Inf)
 
 lending_club <-
@@ -30,15 +33,15 @@ test_that('numeric model', {
   lm_mod <-
     linear_reg() %>%
     set_engine("lm") %>%
-    fit(Sepal.Length ~ ., data = iris_bad, control = ctrl)
+    fit(compounds ~ ., data = hpc_bad, control = ctrl)
 
-  expect_warning(num_res <- predict(lm_mod, iris_bad[1:11, -1]))
+  expect_warning(num_res <- predict(lm_mod, hpc_bad[1:11, -1]))
   expect_equal(num_res, NULL)
 
-  expect_warning(ci_res <- predict(lm_mod, iris_bad[1:11, -1], type = "conf_int"))
+  expect_warning(ci_res <- predict(lm_mod, hpc_bad[1:11, -1], type = "conf_int"))
   expect_equal(ci_res, NULL)
 
-  expect_warning(pi_res <- predict(lm_mod, iris_bad[1:11, -1], type = "pred_int"))
+  expect_warning(pi_res <- predict(lm_mod, hpc_bad[1:11, -1], type = "pred_int"))
   expect_equal(pi_res, NULL)
 
 })
diff --git a/tests/testthat/test_fit_interfaces.R b/tests/testthat/test_fit_interfaces.R
index e4fe0f495..c3b59954d 100644
--- a/tests/testthat/test_fit_interfaces.R
+++ b/tests/testthat/test_fit_interfaces.R
@@ -3,6 +3,9 @@ context("fit interfaces")
 library(parsnip)
 library(rlang)
 
+source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
+
 f <- y ~ x
 
 smod <- surv_reg()
@@ -20,21 +23,21 @@ tester_xy <-
 
 
 test_that('good args', {
-  expect_equal(   tester(NULL, formula = f, data = iris, model = rmod), "formula")
-  expect_equal(tester_xy(NULL, x = iris, y = iris, model = rmod), "data.frame")
-  expect_equal(   tester(NULL, f, data = iris, model = rmod), "formula")
+  expect_equal(   tester(NULL, formula = f, data = hpc, model = rmod), "formula")
+  expect_equal(tester_xy(NULL, x = hpc, y = hpc, model = rmod), "data.frame")
+  expect_equal(   tester(NULL, f, data = hpc, model = rmod), "formula")
   expect_equal(   tester(NULL, f, data = sprk, model = rmod), "formula")
 })
 
 #test_that('unnamed args', {
-#  expect_error(tester(NULL, iris, y = iris, model = rmod))
-#  expect_error(tester(NULL, data = iris, model = rmod))
+#  expect_error(tester(NULL, hpc, y = hpc, model = rmod))
+#  expect_error(tester(NULL, data = hpc, model = rmod))
 #})
 #
 test_that('wrong args', {
- expect_error(tester_xy(NULL, x = sprk, y = iris, model = rmod))
- expect_error(tester_xy(NULL, x = iris, y = iris$Sepal.Length, model = smod))
- expect_error(tester(NULL, f,  data = as.matrix(iris[, 1:4])))
+ expect_error(tester_xy(NULL, x = sprk, y = hpc, model = rmod))
+ expect_error(tester_xy(NULL, x = hpc, y = hpc$compounds, model = smod))
+ expect_error(tester(NULL, f,  data = as.matrix(hpc[, 1:4])))
 })
 
 test_that('single column df for issue #129', {
diff --git a/tests/testthat/test_linear_reg.R b/tests/testthat/test_linear_reg.R
index 470266a4b..05cee1854 100644
--- a/tests/testthat/test_linear_reg.R
+++ b/tests/testthat/test_linear_reg.R
@@ -8,7 +8,7 @@ library(tibble)
 context("linear regression")
 source(test_path("helpers.R"))
 source(test_path("helper-objects.R"))
-
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 # ------------------------------------------------------------------------------
 
@@ -203,15 +203,15 @@ test_that('bad input', {
   expect_error(translate(linear_reg(), engine = "wat?"))
   expect_error(translate(linear_reg(), engine = NULL))
   expect_error(translate(linear_reg(formula = y ~ x)))
-  expect_error(translate(linear_reg(x = iris[,1:3], y = iris$Species) %>% set_engine("glmnet")))
+  expect_error(translate(linear_reg(x = hpc[,1:3], y = hpc$class) %>% set_engine("glmnet")))
   expect_error(translate(linear_reg(formula = y ~ x)  %>% set_engine("lm")))
 })
 
 # ------------------------------------------------------------------------------
 
-num_pred <- c("Sepal.Width", "Petal.Width", "Petal.Length")
-iris_bad_form <- as.formula(Species ~ term)
-iris_basic <- linear_reg() %>% set_engine("lm")
+num_pred <- names(hpc)[1:3]
+hpc_bad_form <- as.formula(class ~ term)
+hpc_basic <- linear_reg() %>% set_engine("lm")
 
 # ------------------------------------------------------------------------------
 
@@ -219,9 +219,9 @@ test_that('lm execution', {
 
   expect_error(
     res <- fit(
-      iris_basic,
-      Sepal.Length ~ log(Sepal.Width) + Species,
-      data = iris,
+      hpc_basic,
+      input_fields ~ log(compounds) + class,
+      data = hpc,
       control = ctrl
     ),
     regexp = NA
@@ -230,9 +230,9 @@ test_that('lm execution', {
 
   expect_error(
     res <- fit_xy(
-      iris_basic,
-      x = iris[, num_pred],
-      y = iris$Sepal.Length,
+      hpc_basic,
+      x = hpc[, num_pred],
+      y = hpc$input_fields,
       control = ctrl
     ),
     regexp = NA
@@ -240,17 +240,17 @@ test_that('lm execution', {
 
   expect_error(
     res <- fit(
-      iris_basic,
-      iris_bad_form,
-      data = iris,
+      hpc_basic,
+      hpc_bad_form,
+      data = hpc,
       control = ctrl
     )
   )
 
   lm_form_catch <- fit(
-    iris_basic,
-    iris_bad_form,
-    data = iris,
+    hpc_basic,
+    hpc_bad_form,
+    data = hpc,
     control = caught_ctrl
   )
   expect_true(inherits(lm_form_catch$fit, "try-error"))
@@ -259,9 +259,9 @@ test_that('lm execution', {
 
   expect_error(
     res <- fit(
-      iris_basic,
-      cbind(Sepal.Width, Petal.Width) ~ .,
-      data = iris,
+      hpc_basic,
+      cbind(compounds, iterations) ~ .,
+      data = hpc,
       control = ctrl
     ),
     regexp = NA
@@ -269,9 +269,9 @@ test_that('lm execution', {
 
   expect_error(
     res <- fit_xy(
-      iris_basic,
-      x = iris[, 1:2],
-      y = iris[3:4],
+      hpc_basic,
+      x = hpc[, 1:2],
+      y = hpc[3:4],
       control = ctrl
     ),
     regexp = NA
@@ -279,59 +279,59 @@ test_that('lm execution', {
 })
 
 test_that('lm prediction', {
-  uni_lm <- lm(Sepal.Length ~ Sepal.Width + Petal.Width + Petal.Length, data = iris)
-  uni_pred <- unname(predict(uni_lm, newdata = iris[1:5, ]))
-  inl_lm <- lm(Sepal.Length ~ log(Sepal.Width) + Species, data = iris)
-  inl_pred <- unname(predict(inl_lm, newdata = iris[1:5, ]))
-  mv_lm <- lm(cbind(Sepal.Width, Petal.Width) ~ ., data = iris)
-  mv_pred <- as_tibble(predict(mv_lm, newdata = iris[1:5, ]))
-  names(mv_pred) <- c(".pred_Sepal.Width", ".pred_Petal.Width")
+  uni_lm <- lm(input_fields ~ compounds + iterations + num_pending, data = hpc)
+  uni_pred <- unname(predict(uni_lm, newdata = hpc[1:5, ]))
+  inl_lm <- lm(input_fields ~ log(compounds) + class, data = hpc)
+  inl_pred <- unname(predict(inl_lm, newdata = hpc[1:5, ]))
+  mv_lm <- lm(cbind(compounds, iterations) ~ ., data = hpc)
+  mv_pred <- as_tibble(predict(mv_lm, newdata = hpc[1:5, ]))
+  names(mv_pred) <- c(".pred_compounds", ".pred_iterations")
 
 
   res_xy <- fit_xy(
-    iris_basic,
-    x = iris[, num_pred],
-    y = iris$Sepal.Length,
+    hpc_basic,
+    x = hpc[, num_pred],
+    y = hpc$input_fields,
     control = ctrl
   )
 
-  expect_equal(uni_pred, predict(res_xy, iris[1:5, num_pred])$.pred)
+  expect_equal(uni_pred, predict(res_xy, hpc[1:5, num_pred])$.pred)
 
   res_form <- fit(
-    iris_basic,
-    Sepal.Length ~ log(Sepal.Width) + Species,
-    data = iris,
+    hpc_basic,
+    input_fields ~ log(compounds) + class,
+    data = hpc,
     control = ctrl
   )
-  expect_equal(inl_pred, predict(res_form, iris[1:5, ])$.pred)
+  expect_equal(inl_pred, predict(res_form, hpc[1:5, ])$.pred)
 
   res_mv <- fit(
-    iris_basic,
-    cbind(Sepal.Width, Petal.Width) ~ .,
-    data = iris,
+    hpc_basic,
+    cbind(compounds, iterations) ~ .,
+    data = hpc,
     control = ctrl
   )
-  expect_equal(mv_pred, predict(res_mv, iris[1:5,]))
+  expect_equal(mv_pred, predict(res_mv, hpc[1:5,]))
 })
 
 test_that('lm intervals', {
-  stats_lm <- lm(Sepal.Length ~ Sepal.Width + Petal.Width + Petal.Length,
-                 data = iris)
-  confidence_lm <- predict(stats_lm, newdata = iris[1:5, ],
+  stats_lm <- lm(input_fields ~ compounds + iterations + num_pending,
+                 data = hpc)
+  confidence_lm <- predict(stats_lm, newdata = hpc[1:5, ],
                            level = 0.93, interval = "confidence")
-  prediction_lm <- predict(stats_lm, newdata = iris[1:5, ],
+  prediction_lm <- predict(stats_lm, newdata = hpc[1:5, ],
                            level = 0.93, interval = "prediction")
 
   res_xy <- fit_xy(
     linear_reg()  %>% set_engine("lm"),
-    x = iris[, num_pred],
-    y = iris$Sepal.Length,
+    x = hpc[, num_pred],
+    y = hpc$input_fields,
     control = ctrl
   )
 
   confidence_parsnip <-
     predict(res_xy,
-            new_data = iris[1:5,],
+            new_data = hpc[1:5,],
             type = "conf_int",
             level = 0.93)
 
@@ -340,7 +340,7 @@ test_that('lm intervals', {
 
   prediction_parsnip <-
     predict(res_xy,
-            new_data = iris[1:5,],
+            new_data = hpc[1:5,],
             type = "pred_int",
             level = 0.93)
 
@@ -351,12 +351,12 @@ test_that('lm intervals', {
 
 test_that('newdata error trapping', {
   res_xy <- fit_xy(
-    iris_basic,
-    x = iris[, num_pred],
-    y = iris$Sepal.Length,
+    hpc_basic,
+    x = hpc[, num_pred],
+    y = hpc$input_fields,
     control = ctrl
   )
-  expect_error(predict(res_xy, newdata = iris[1:3, num_pred]), "Did you mean")
+  expect_error(predict(res_xy, newdata = hpc[1:3, num_pred]), "Did you mean")
 })
 
 test_that('default engine', {
diff --git a/tests/testthat/test_linear_reg_glmnet.R b/tests/testthat/test_linear_reg_glmnet.R
index b4a4f7789..4b64b7530 100644
--- a/tests/testthat/test_linear_reg_glmnet.R
+++ b/tests/testthat/test_linear_reg_glmnet.R
@@ -7,11 +7,12 @@ library(tidyr)
 
 context("linear regression execution with glmnet")
 source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 
-num_pred <- c("Sepal.Width", "Petal.Width", "Petal.Length")
-iris_bad_form <- as.formula(Species ~ term)
-iris_basic <- linear_reg(penalty = .1, mixture = .3) %>%
+num_pred <- c("compounds", "iterations", "num_pending")
+hpc_bad_form <- as.formula(class ~ term)
+hpc_basic <- linear_reg(penalty = .1, mixture = .3) %>%
   set_engine("glmnet", nlambda = 15)
 no_lambda <- linear_reg(mixture = .3) %>%
   set_engine("glmnet")
@@ -25,10 +26,10 @@ test_that('glmnet execution', {
 
   expect_error(
     res <- fit_xy(
-      iris_basic,
+      hpc_basic,
       control = ctrl,
-      x = iris[, num_pred],
-      y = iris$Sepal.Length
+      x = hpc[, num_pred],
+      y = hpc$input_fields
     ),
     regexp = NA
   )
@@ -38,17 +39,17 @@ test_that('glmnet execution', {
 
   expect_error(
     fit(
-      iris_basic,
-      iris_bad_form,
-      data = iris,
+      hpc_basic,
+      hpc_bad_form,
+      data = hpc,
       control = ctrl
     )
   )
 
   glmnet_xy_catch <- fit_xy(
-    iris_basic,
-    x = iris[, num_pred],
-    y = factor(iris$Sepal.Length),
+    hpc_basic,
+    x = hpc[, num_pred],
+    y = factor(hpc$input_fields),
     control = caught_ctrl
   )
   expect_true(inherits(glmnet_xy_catch$fit, "try-error"))
@@ -61,28 +62,28 @@ test_that('glmnet prediction, single lambda', {
   skip_if(run_glmnet)
 
   res_xy <- fit_xy(
-    iris_basic,
+    hpc_basic,
     control = ctrl,
-    x = iris[, num_pred],
-    y = iris$Sepal.Length
+    x = hpc[, num_pred],
+    y = hpc$input_fields
   )
 
   uni_pred <- c(5.05125589060219, 4.86977761622526, 4.90912345599309, 4.93931874108359,
                 5.08755154547758)
 
-  expect_equal(uni_pred, predict(res_xy, iris[1:5, num_pred])$.pred, tolerance = 0.0001)
+  expect_equal(uni_pred, predict(res_xy, hpc[1:5, num_pred])$.pred, tolerance = 0.0001)
 
   res_form <- fit(
-    iris_basic,
-    Sepal.Length ~ log(Sepal.Width) + Species,
-    data = iris,
+    hpc_basic,
+    input_fields ~ log(compounds) + class,
+    data = hpc,
     control = ctrl
   )
 
   form_pred <- c(5.23960117346944, 5.08769210344022, 5.15129212608077, 5.12000510716518,
                  5.26736239856889)
 
-  expect_equal(form_pred, predict(res_form, iris[1:5,])$.pred, tolerance = 0.0001)
+  expect_equal(form_pred, predict(res_form, hpc[1:5,])$.pred, tolerance = 0.0001)
 })
 
 
@@ -93,19 +94,19 @@ test_that('glmnet prediction, multiple lambda', {
 
   lams <- c(.01, 0.1)
 
-  iris_mult <- linear_reg(penalty = lams, mixture = .3) %>%
+  hpc_mult <- linear_reg(penalty = lams, mixture = .3) %>%
     set_engine("glmnet")
 
   res_xy <- fit_xy(
-    iris_mult,
+    hpc_mult,
     control = ctrl,
-    x = iris[, num_pred],
-    y = iris$Sepal.Length
+    x = hpc[, num_pred],
+    y = hpc$input_fields
   )
 
   # mult_pred <-
   #   predict(res_xy$fit,
-  #           newx = as.matrix(iris[1:5, num_pred]),
+  #           newx = as.matrix(hpc[1:5, num_pred]),
   #           s = lams)
   # mult_pred <- stack(as.data.frame(mult_pred))
   # mult_pred$penalty <- rep(lams, each = 5)
@@ -131,20 +132,20 @@ test_that('glmnet prediction, multiple lambda', {
 
   expect_equal(
     as.data.frame(mult_pred),
-    multi_predict(res_xy, new_data = iris[1:5, num_pred], lambda = lams) %>%
+    multi_predict(res_xy, new_data = hpc[1:5, num_pred], lambda = lams) %>%
       unnest(cols = c(.pred)) %>%
       as.data.frame(),
     tolerance = 0.0001
   )
 
   res_form <- fit(
-    iris_mult,
-    Sepal.Length ~ log(Sepal.Width) + Species,
-    data = iris,
+    hpc_mult,
+    input_fields ~ log(compounds) + class,
+    data = hpc,
     control = ctrl
   )
 
-  # form_mat <- model.matrix(Sepal.Length ~ log(Sepal.Width) + Species, data = iris)
+  # form_mat <- model.matrix(input_fields ~ log(compounds) + class, data = hpc)
   # form_mat <- form_mat[1:5, -1]
   #
   # form_pred <-
@@ -176,7 +177,7 @@ test_that('glmnet prediction, multiple lambda', {
 
   expect_equal(
     as.data.frame(form_pred),
-    multi_predict(res_form, new_data = iris[1:5, ], lambda = lams) %>%
+    multi_predict(res_form, new_data = hpc[1:5, ], lambda = lams) %>%
       unnest(cols = c(.pred)) %>%
       as.data.frame(),
     tolerance = 0.0001
@@ -188,17 +189,17 @@ test_that('glmnet prediction, all lambda', {
   skip_if_not_installed("glmnet")
   skip_if(run_glmnet)
 
-  iris_all <- linear_reg(mixture = .3) %>%
+  hpc_all <- linear_reg(mixture = .3) %>%
     set_engine("glmnet")
 
   res_xy <- fit_xy(
-    iris_all,
+    hpc_all,
     control = ctrl,
-    x = iris[, num_pred],
-    y = iris$Sepal.Length
+    x = hpc[, num_pred],
+    y = hpc$input_fields
   )
 
-  all_pred <- predict(res_xy$fit, newx = as.matrix(iris[1:5, num_pred]))
+  all_pred <- predict(res_xy$fit, newx = as.matrix(hpc[1:5, num_pred]))
   all_pred <- stack(as.data.frame(all_pred))
   all_pred$penalty <- rep(res_xy$fit$lambda, each = 5)
   all_pred$rows <- rep(1:5, 2)
@@ -207,16 +208,16 @@ test_that('glmnet prediction, all lambda', {
   names(all_pred) <- c("penalty", ".pred")
   all_pred <- tibble::as_tibble(all_pred)
 
-  expect_equal(all_pred, multi_predict(res_xy, new_data = iris[1:5,num_pred ]) %>% unnest(cols = c(.pred)))
+  expect_equal(all_pred, multi_predict(res_xy, new_data = hpc[1:5,num_pred ]) %>% unnest(cols = c(.pred)))
 
   res_form <- fit(
-    iris_all,
-    Sepal.Length ~ log(Sepal.Width) + Species,
-    data = iris,
+    hpc_all,
+    input_fields ~ log(compounds) + class,
+    data = hpc,
     control = ctrl
   )
 
-  form_mat <- model.matrix(Sepal.Length ~ log(Sepal.Width) + Species, data = iris)
+  form_mat <- model.matrix(input_fields ~ log(compounds) + class, data = hpc)
   form_mat <- form_mat[1:5, -1]
 
   form_pred <- predict(res_form$fit, newx = form_mat)
@@ -228,7 +229,7 @@ test_that('glmnet prediction, all lambda', {
   names(form_pred) <- c("penalty", ".pred")
   form_pred <- tibble::as_tibble(form_pred)
 
-  expect_equal(form_pred, multi_predict(res_form, iris[1:5, c("Sepal.Width", "Species")]) %>% unnest(cols = c(.pred)))
+  expect_equal(form_pred, multi_predict(res_form, hpc[1:5, c("compounds", "class")]) %>% unnest(cols = c(.pred)))
 })
 
 
diff --git a/tests/testthat/test_linear_reg_keras.R b/tests/testthat/test_linear_reg_keras.R
index 0d16bd0cb..0e8bc20c9 100644
--- a/tests/testthat/test_linear_reg_keras.R
+++ b/tests/testthat/test_linear_reg_keras.R
@@ -8,6 +8,7 @@ library(tibble)
 context("keras linear regression")
 source(test_path("helpers.R"))
 source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 # ------------------------------------------------------------------------------
 
@@ -33,8 +34,8 @@ test_that('model fitting', {
       fit_xy(
         basic_mod,
         control = ctrl,
-        x = iris[,2:4],
-        y = iris$Sepal.Length
+        x = hpc[,2:4],
+        y = hpc$compounds
       ),
     regexp = NA
   )
@@ -45,8 +46,8 @@ test_that('model fitting', {
       fit_xy(
         basic_mod,
         control = ctrl,
-        x = iris[,2:4],
-        y = iris$Sepal.Length
+        x = hpc[,2:4],
+        y = hpc$compounds
       ),
     regexp = NA
   )
@@ -56,8 +57,8 @@ test_that('model fitting', {
   expect_error(
     fit(
       basic_mod,
-      Sepal.Length ~ .,
-      data = iris[, -5],
+      compounds ~ .,
+      data = hpc[, -5],
       control = ctrl
     ),
     regexp = NA
@@ -68,8 +69,8 @@ test_that('model fitting', {
       fit_xy(
         ridge_mod,
         control = ctrl,
-        x = iris[,2:4],
-        y = iris$Sepal.Length
+        x = hpc[,2:4],
+        y = hpc$compounds
       ),
     regexp = NA
   )
@@ -77,8 +78,8 @@ test_that('model fitting', {
   expect_error(
     fit(
       ridge_mod,
-      Sepal.Length ~ .,
-      data = iris[, -5],
+      compounds ~ .,
+      data = hpc[, -5],
       control = ctrl
     ),
     regexp = NA
@@ -98,18 +99,18 @@ test_that('regression prediction', {
     fit_xy(
       basic_mod,
       control = ctrl,
-      x = iris[,2:4],
-      y = iris$Sepal.Length
+      x = hpc[,2:4],
+      y = hpc$compounds
     )
 
   keras_pred <-
-    predict(lm_fit$fit, as.matrix(iris[1:3,2:4]))
+    predict(lm_fit$fit, as.matrix(hpc[1:3,2:4]))
   colnames(keras_pred) <- ".pred"
 
   keras_pred <-
     keras_pred %>%
     as_tibble()
-  parsnip_pred <- predict(lm_fit, iris[1:3,2:4])
+  parsnip_pred <- predict(lm_fit, hpc[1:3,2:4])
   expect_equal(as.data.frame(keras_pred), as.data.frame(parsnip_pred))
 
   set.seed(257)
@@ -117,15 +118,15 @@ test_that('regression prediction', {
     fit_xy(
       ridge_mod,
       control = ctrl,
-      x = iris[,2:4],
-      y = iris$Sepal.Length
+      x = hpc[,2:4],
+      y = hpc$compounds
     )
 
-  keras_pred <- predict(rr_fit$fit, as.matrix(iris[1:3,2:4]))
+  keras_pred <- predict(rr_fit$fit, as.matrix(hpc[1:3,2:4]))
   colnames(keras_pred) <- ".pred"
   keras_pred <- as_tibble(keras_pred)
 
-  parsnip_pred <- predict(rr_fit, iris[1:3,2:4])
+  parsnip_pred <- predict(rr_fit, hpc[1:3,2:4])
   expect_equal(as.data.frame(keras_pred), as.data.frame(parsnip_pred))
 
 })
diff --git a/tests/testthat/test_linear_reg_spark.R b/tests/testthat/test_linear_reg_spark.R
index fc1c02b71..d85e46f1f 100644
--- a/tests/testthat/test_linear_reg_spark.R
+++ b/tests/testthat/test_linear_reg_spark.R
@@ -6,7 +6,7 @@ library(dplyr)
 
 context("linear regression execution with spark")
 source(test_path("helper-objects.R"))
-
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 # ------------------------------------------------------------------------------
 
@@ -20,8 +20,8 @@ test_that('spark execution', {
 
   skip_if(inherits(sc, "try-error"))
 
-  iris_linreg_tr <- copy_to(sc, iris[-(1:4),   ], "iris_linreg_tr", overwrite = TRUE)
-  iris_linreg_te <- copy_to(sc, iris[  1:4 , -1], "iris_linreg_te", overwrite = TRUE)
+  hpc_linreg_tr <- copy_to(sc, hpc[-(1:4),   ], "hpc_linreg_tr", overwrite = TRUE)
+  hpc_linreg_te <- copy_to(sc, hpc[  1:4 , -1], "hpc_linreg_te", overwrite = TRUE)
 
   expect_error(
     spark_fit <-
@@ -29,7 +29,7 @@ test_that('spark execution', {
         linear_reg() %>% set_engine("spark"),
         control = ctrl,
         Sepal_Length ~ .,
-        data = iris_linreg_tr
+        data = hpc_linreg_tr
       ),
     regexp = NA
   )
@@ -38,17 +38,17 @@ test_that('spark execution', {
   expect_equal(multi_predict_args(spark_fit), NA_character_)
 
   expect_error(
-    spark_pred <- predict(spark_fit, iris_linreg_te),
+    spark_pred <- predict(spark_fit, hpc_linreg_te),
     regexp = NA
   )
 
   expect_error(
-    spark_pred_num <- predict(spark_fit, iris_linreg_te),
+    spark_pred_num <- predict(spark_fit, hpc_linreg_te),
     regexp = NA
   )
 
-  lm_fit <- lm(Sepal.Length ~ ., data = iris[-(1:4),   ])
-  lm_pred <- unname(predict(lm_fit, iris[  1:4 , -1]))
+  lm_fit <- lm(compounds ~ ., data = hpc[-(1:4),   ])
+  lm_pred <- unname(predict(lm_fit, hpc[  1:4 , -1]))
 
   expect_equal(as.data.frame(spark_pred)$pred, lm_pred)
   expect_equal(as.data.frame(spark_pred_num)$pred, lm_pred)
diff --git a/tests/testthat/test_linear_reg_stan.R b/tests/testthat/test_linear_reg_stan.R
index 33a88ded8..b77a5241c 100644
--- a/tests/testthat/test_linear_reg_stan.R
+++ b/tests/testthat/test_linear_reg_stan.R
@@ -2,13 +2,16 @@ library(testthat)
 library(parsnip)
 library(rlang)
 
+source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
+
 # ------------------------------------------------------------------------------
 
 context("linear regression execution with stan")
 
-num_pred <- c("Sepal.Width", "Petal.Width", "Petal.Length")
-iris_bad_form <- as.formula(Species ~ term)
-iris_basic <- linear_reg() %>%
+num_pred <- c("compounds", "iterations", "num_pending")
+hpc_bad_form <- as.formula(class ~ term)
+hpc_basic <- linear_reg() %>%
   set_engine("stan", seed = 10, chains = 1)
 
 ctrl <- control_parsnip(verbosity = 0L, catch = FALSE)
@@ -23,18 +26,18 @@ test_that('stan_glm execution', {
 
   expect_error(
     res <- fit(
-      iris_basic,
-      Sepal.Width ~ log(Sepal.Length) + Species,
-      data = iris,
+      hpc_basic,
+      compounds ~ log(input_fields) + class,
+      data = hpc,
       control = ctrl
     ),
     regexp = NA
   )
   expect_error(
     res <- fit_xy(
-      iris_basic,
-      x = iris[, num_pred],
-      y = iris$Sepal.Length,
+      hpc_basic,
+      x = hpc[, num_pred],
+      y = hpc$input_fields,
       control = ctrl
     ),
     regexp = NA
@@ -45,9 +48,9 @@ test_that('stan_glm execution', {
 
   expect_error(
     res <- fit(
-      iris_basic,
-      Species ~ term,
-      data = iris,
+      hpc_basic,
+      class ~ term,
+      data = hpc,
       control = ctrl
     )
   )
@@ -67,20 +70,20 @@ test_that('stan prediction', {
   res_xy <- fit_xy(
     linear_reg() %>%
       set_engine("stan", seed = 10, chains = 1),
-    x = iris[, num_pred],
-    y = iris$Sepal.Length,
+    x = hpc[, num_pred],
+    y = hpc$input_fields,
     control = quiet_ctrl
   )
 
-  expect_equal(uni_pred, predict(res_xy, iris[1:5, num_pred])$.pred, tolerance = 0.001)
+  expect_equal(uni_pred, predict(res_xy, hpc[1:5, num_pred])$.pred, tolerance = 0.001)
 
   res_form <- fit(
-    iris_basic,
-    Sepal.Width ~ log(Sepal.Length) + Species,
-    data = iris,
+    hpc_basic,
+    compounds ~ log(input_fields) + class,
+    data = hpc,
     control = quiet_ctrl
   )
-  expect_equal(inl_pred, predict(res_form, iris[1:5, ])$.pred, tolerance = 0.001)
+  expect_equal(inl_pred, predict(res_form, hpc[1:5, ])$.pred, tolerance = 0.001)
 })
 
 
@@ -91,20 +94,20 @@ test_that('stan intervals', {
   res_xy <- fit_xy(
     linear_reg() %>%
       set_engine("stan", seed = 1333, chains = 10, iter = 1000),
-    x = iris[, num_pred],
-    y = iris$Sepal.Length,
+    x = hpc[, num_pred],
+    y = hpc$input_fields,
     control = quiet_ctrl
   )
 
   confidence_parsnip <-
     predict(res_xy,
-            new_data = iris[1:5,],
+            new_data = hpc[1:5,],
             type = "conf_int",
             level = 0.93)
 
   prediction_parsnip <-
     predict(res_xy,
-            new_data = iris[1:5,],
+            new_data = hpc[1:5,],
             type = "pred_int",
             level = 0.93)
 
diff --git a/tests/testthat/test_logistic_reg.R b/tests/testthat/test_logistic_reg.R
index 610b7cbd1..c23ca110e 100644
--- a/tests/testthat/test_logistic_reg.R
+++ b/tests/testthat/test_logistic_reg.R
@@ -8,6 +8,7 @@ library(tibble)
 context("logistic regression")
 source(test_path("helpers.R"))
 source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 
 # ------------------------------------------------------------------------------
@@ -212,7 +213,7 @@ test_that('updating', {
 test_that('bad input', {
   expect_error(logistic_reg(mode = "regression"))
   expect_error(translate(logistic_reg(formula = y ~ x)))
-  expect_error(translate(logistic_reg(x = iris[,1:3], y = iris$Species) %>% set_engine(engine = "glmnet")))
+  expect_error(translate(logistic_reg(x = hpc[,1:3], y = hpc$class) %>% set_engine(engine = "glmnet")))
   expect_error(translate(logistic_reg(formula = y ~ x) %>% set_engine(engine = "glm")))
 })
 
diff --git a/tests/testthat/test_logistic_reg_glmnet.R b/tests/testthat/test_logistic_reg_glmnet.R
index 0797e65c6..cd5d0b358 100644
--- a/tests/testthat/test_logistic_reg_glmnet.R
+++ b/tests/testthat/test_logistic_reg_glmnet.R
@@ -8,6 +8,7 @@ library(tidyr)
 
 context("logistic regression execution with glmnet")
 source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 lending_club <- head(lending_club, 200)
 lc_form <- as.formula(Class ~ log(funded_amnt) + int_rate)
diff --git a/tests/testthat/test_logistic_reg_spark.R b/tests/testthat/test_logistic_reg_spark.R
index 429fa6822..d7cba9482 100644
--- a/tests/testthat/test_logistic_reg_spark.R
+++ b/tests/testthat/test_logistic_reg_spark.R
@@ -6,6 +6,7 @@ library(dplyr)
 
 context("logistic regression execution with spark")
 source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 # ------------------------------------------------------------------------------
 
diff --git a/tests/testthat/test_logistic_reg_stan.R b/tests/testthat/test_logistic_reg_stan.R
index 7b803853d..5f599300c 100644
--- a/tests/testthat/test_logistic_reg_stan.R
+++ b/tests/testthat/test_logistic_reg_stan.R
@@ -7,6 +7,7 @@ library(tibble)
 
 context("execution tests for stan logistic regression")
 source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 
 lending_club <- head(lending_club, 200)
diff --git a/tests/testthat/test_mars.R b/tests/testthat/test_mars.R
index 2d0d31ab9..a6361bbaf 100644
--- a/tests/testthat/test_mars.R
+++ b/tests/testthat/test_mars.R
@@ -7,6 +7,7 @@ library(rlang)
 context("mars tests")
 source(test_path("helpers.R"))
 source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 # ------------------------------------------------------------------------------
 
@@ -114,9 +115,9 @@ test_that('bad input', {
 
 # ------------------------------------------------------------------------------
 
-num_pred <- c("Sepal.Width", "Petal.Width", "Petal.Length")
-iris_bad_form <- as.formula(Species ~ term)
-iris_basic <- mars(mode = "regression") %>% set_engine("earth")
+num_pred <- colnames(hpc)[1:3]
+hpc_bad_form <- as.formula(class ~ term)
+hpc_basic <- mars(mode = "regression") %>% set_engine("earth")
 
 # ------------------------------------------------------------------------------
 
@@ -125,9 +126,9 @@ test_that('mars execution', {
 
   expect_error(
     res <- fit(
-      iris_basic,
-      Sepal.Length ~ log(Sepal.Width) + Species,
-      data = iris,
+      hpc_basic,
+      compounds ~ log(input_fields) + class,
+      data = hpc,
       control = ctrl
     ),
     regexp = NA
@@ -136,9 +137,9 @@ test_that('mars execution', {
 
   expect_error(
     res <- fit_xy(
-      iris_basic,
-      x = iris[, num_pred],
-      y = iris$Sepal.Length,
+      hpc_basic,
+      x = hpc[, num_pred],
+      y = hpc$num_pending,
       control = ctrl
     ),
     regexp = NA
@@ -150,9 +151,9 @@ test_that('mars execution', {
   expect_message(
     expect_error(
       res <- fit(
-        iris_basic,
-        iris_bad_form,
-        data = iris,
+        hpc_basic,
+        hpc_bad_form,
+        data = hpc,
         control = ctrl
       )
     ),
@@ -163,9 +164,9 @@ test_that('mars execution', {
 
   expect_error(
     res <- fit(
-      iris_basic,
-      cbind(Sepal.Width, Petal.Width) ~ .,
-      data = iris,
+      hpc_basic,
+      cbind(compounds, input_fields) ~ .,
+      data = hpc,
       control = ctrl
     ),
     regexp = NA
@@ -173,9 +174,9 @@ test_that('mars execution', {
 
   expect_error(
     res <- fit_xy(
-      iris_basic,
-      x = iris[, 1:2],
-      y = iris[3:4],
+      hpc_basic,
+      x = hpc[, 1:2],
+      y = hpc[3:4],
       control = ctrl
     ),
     regexp = NA
@@ -200,31 +201,31 @@ test_that('mars prediction', {
              )), class = "data.frame", row.names = c(NA, -5L))
 
   res_xy <- fit_xy(
-    iris_basic,
-    x = iris[, num_pred],
-    y = iris$Sepal.Length,
+    hpc_basic,
+    x = hpc[, num_pred],
+    y = hpc$num_pending,
     control = ctrl
   )
 
-  expect_equal(uni_pred, predict(res_xy, iris[1:5, num_pred])$.pred)
+  expect_equal(uni_pred, predict(res_xy, hpc[1:5, num_pred])$.pred)
 
   res_form <- fit(
-    iris_basic,
-    Sepal.Length ~ log(Sepal.Width) + Species,
-    data = iris,
+    hpc_basic,
+    compounds ~ log(input_fields) + class,
+    data = hpc,
     control = ctrl
   )
-  expect_equal(inl_pred, predict(res_form, iris[1:5, ])$.pred)
+  expect_equal(inl_pred, predict(res_form, hpc[1:5, ])$.pred)
 
   res_mv <- fit(
-    iris_basic,
-    cbind(Sepal.Width, Petal.Width) ~ .,
-    data = iris,
+    hpc_basic,
+    cbind(compounds, input_fields) ~ .,
+    data = hpc,
     control = ctrl
   )
   expect_equal(
     setNames(mv_pred, paste0(".pred_", names(mv_pred))) %>% as.data.frame(),
-    predict(res_mv, iris[1:5,]) %>% as.data.frame()
+    predict(res_mv, hpc[1:5,]) %>% as.data.frame()
   )
 })
 
diff --git a/tests/testthat/test_misc.R b/tests/testthat/test_misc.R
index a20a5b0fc..ab4216c2a 100644
--- a/tests/testthat/test_misc.R
+++ b/tests/testthat/test_misc.R
@@ -3,6 +3,9 @@
 
 context("checking for multi_predict")
 
+source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
+
 test_that('parsnip objects', {
 
   lm_idea <- linear_reg() %>% set_engine("lm")
@@ -76,7 +79,7 @@ test_that('S3 method dispatch/registration', {
       null_model() %>%
       set_engine("parsnip") %>%
       set_mode("classification") %>%
-      fit(Species ~ ., data = iris) %>%
+      fit(class ~ ., data = hpc) %>%
       tidy(),
     regex = NA
   )
diff --git a/tests/testthat/test_mlp_keras.R b/tests/testthat/test_mlp_keras.R
index 810b459e1..629c602ff 100644
--- a/tests/testthat/test_mlp_keras.R
+++ b/tests/testthat/test_mlp_keras.R
@@ -6,11 +6,12 @@ library(tibble)
 
 context("simple neural network execution with keras")
 source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 
-num_pred <- names(iris)[1:4]
+num_pred <- names(hpc)[1:4]
 
-iris_keras <-
+hpc_keras <-
   mlp(mode = "classification", hidden_units = 2, epochs = 10) %>%
   set_engine("keras", verbose = 0)
 
@@ -24,9 +25,9 @@ test_that('keras execution, classification', {
 
   expect_error(
     res <- parsnip::fit(
-      iris_keras,
-      Species ~ Sepal.Width + Sepal.Length,
-      data = iris,
+      hpc_keras,
+      class ~ compounds + input_fields,
+      data = hpc,
       control = ctrl
     ),
     regexp = NA
@@ -40,9 +41,9 @@ test_that('keras execution, classification', {
 
   expect_error(
     res <- parsnip::fit_xy(
-      iris_keras,
-      x = iris[, num_pred],
-      y = iris$Species,
+      hpc_keras,
+      x = hpc[, num_pred],
+      y = hpc$class,
       control = ctrl
     ),
     regexp = NA
@@ -52,9 +53,9 @@ test_that('keras execution, classification', {
 
   expect_error(
     res <- parsnip::fit(
-      iris_keras,
-      Species ~ novar,
-      data = iris,
+      hpc_keras,
+      class ~ novar,
+      data = hpc,
       control = ctrl
     )
   )
@@ -67,28 +68,28 @@ test_that('keras classification prediction', {
   library(keras)
 
   xy_fit <- parsnip::fit_xy(
-    iris_keras,
-    x = iris[, num_pred],
-    y = iris$Species,
+    hpc_keras,
+    x = hpc[, num_pred],
+    y = hpc$class,
     control = ctrl
   )
 
-  xy_pred <- keras::predict_classes(xy_fit$fit, x = as.matrix(iris[1:8, num_pred]))
-  xy_pred <- factor(levels(iris$Species)[xy_pred + 1], levels = levels(iris$Species))
-  expect_equal(xy_pred, predict(xy_fit, new_data = iris[1:8, num_pred], type = "class")[[".pred_class"]])
+  xy_pred <- keras::predict_classes(xy_fit$fit, x = as.matrix(hpc[1:8, num_pred]))
+  xy_pred <- factor(levels(hpc$class)[xy_pred + 1], levels = levels(hpc$class))
+  expect_equal(xy_pred, predict(xy_fit, new_data = hpc[1:8, num_pred], type = "class")[[".pred_class"]])
 
   keras::backend()$clear_session()
 
   form_fit <- parsnip::fit(
-    iris_keras,
-    Species ~ .,
-    data = iris,
+    hpc_keras,
+    class ~ .,
+    data = hpc,
     control = ctrl
   )
 
-  form_pred <- keras::predict_classes(form_fit$fit, x = as.matrix(iris[1:8, num_pred]))
-  form_pred <- factor(levels(iris$Species)[form_pred + 1], levels = levels(iris$Species))
-  expect_equal(form_pred, predict(form_fit, new_data = iris[1:8, num_pred], type = "class")[[".pred_class"]])
+  form_pred <- keras::predict_classes(form_fit$fit, x = as.matrix(hpc[1:8, num_pred]))
+  form_pred <- factor(levels(hpc$class)[form_pred + 1], levels = levels(hpc$class))
+  expect_equal(form_pred, predict(form_fit, new_data = hpc[1:8, num_pred], type = "class")[[".pred_class"]])
 
   keras::backend()$clear_session()
 })
@@ -99,30 +100,30 @@ test_that('keras classification probabilities', {
   skip_if_not_installed("keras")
 
   xy_fit <- parsnip::fit_xy(
-    iris_keras,
-    x = iris[, num_pred],
-    y = iris$Species,
+    hpc_keras,
+    x = hpc[, num_pred],
+    y = hpc$class,
     control = ctrl
   )
 
-  xy_pred <- keras::predict_proba(xy_fit$fit, x = as.matrix(iris[1:8, num_pred]))
-  colnames(xy_pred) <- paste0(".pred_", levels(iris$Species))
+  xy_pred <- keras::predict_proba(xy_fit$fit, x = as.matrix(hpc[1:8, num_pred]))
+  colnames(xy_pred) <- paste0(".pred_", levels(hpc$class))
   xy_pred <- as_tibble(xy_pred)
-  expect_equal(xy_pred, predict(xy_fit, new_data = iris[1:8, num_pred], type = "prob"))
+  expect_equal(xy_pred, predict(xy_fit, new_data = hpc[1:8, num_pred], type = "prob"))
 
   keras::backend()$clear_session()
 
   form_fit <- parsnip::fit(
-    iris_keras,
-    Species ~ .,
-    data = iris,
+    hpc_keras,
+    class ~ .,
+    data = hpc,
     control = ctrl
   )
 
-  form_pred <- keras::predict_proba(form_fit$fit, x = as.matrix(iris[1:8, num_pred]))
-  colnames(form_pred) <- paste0(".pred_", levels(iris$Species))
+  form_pred <- keras::predict_proba(form_fit$fit, x = as.matrix(hpc[1:8, num_pred]))
+  colnames(form_pred) <- paste0(".pred_", levels(hpc$class))
   form_pred <- as_tibble(form_pred)
-  expect_equal(form_pred, predict(form_fit, new_data = iris[1:8, num_pred], type = "prob"))
+  expect_equal(form_pred, predict(form_fit, new_data = hpc[1:8, num_pred], type = "prob"))
 
   keras::backend()$clear_session()
 })
diff --git a/tests/testthat/test_mlp_nnet.R b/tests/testthat/test_mlp_nnet.R
index 127ee824e..37bcb9e6f 100644
--- a/tests/testthat/test_mlp_nnet.R
+++ b/tests/testthat/test_mlp_nnet.R
@@ -5,10 +5,11 @@ library(parsnip)
 
 context("simple neural network execution with nnet")
 source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
-num_pred <- names(iris)[1:4]
+num_pred <- names(hpc)[1:4]
 
-iris_nnet <-
+hpc_nnet <-
   mlp(mode = "classification", hidden_units = 5) %>%
   set_engine("nnet")
 
@@ -20,9 +21,9 @@ test_that('nnet execution, classification', {
 
   expect_error(
     res <- parsnip::fit(
-      iris_nnet,
-      Species ~ Sepal.Width + Sepal.Length,
-      data = iris,
+      hpc_nnet,
+      class ~ compounds + input_fields,
+      data = hpc,
       control = ctrl
     ),
     regexp = NA
@@ -31,9 +32,9 @@ test_that('nnet execution, classification', {
 
   expect_error(
     res <- parsnip::fit_xy(
-      iris_nnet,
-      x = iris[, num_pred],
-      y = iris$Species,
+      hpc_nnet,
+      x = hpc[, num_pred],
+      y = hpc$class,
       control = ctrl
     ),
     regexp = NA
@@ -41,9 +42,9 @@ test_that('nnet execution, classification', {
 
   expect_error(
     res <- parsnip::fit(
-      iris_nnet,
-      Species ~ novar,
-      data = iris,
+      hpc_nnet,
+      class ~ novar,
+      data = hpc,
       control = ctrl
     )
   )
@@ -55,26 +56,26 @@ test_that('nnet classification prediction', {
   skip_if_not_installed("nnet")
 
   xy_fit <- fit_xy(
-    iris_nnet,
-    x = iris[, num_pred],
-    y = iris$Species,
+    hpc_nnet,
+    x = hpc[, num_pred],
+    y = hpc$class,
     control = ctrl
   )
 
-  xy_pred <- predict(xy_fit$fit, newdata = iris[1:8, num_pred], type = "class")
-  xy_pred <- factor(xy_pred, levels = levels(iris$Species))
-  expect_equal(xy_pred, predict(xy_fit, new_data = iris[1:8, num_pred], type = "class")$.pred_class)
+  xy_pred <- predict(xy_fit$fit, newdata = hpc[1:8, num_pred], type = "class")
+  xy_pred <- factor(xy_pred, levels = levels(hpc$class))
+  expect_equal(xy_pred, predict(xy_fit, new_data = hpc[1:8, num_pred], type = "class")$.pred_class)
 
   form_fit <- fit(
-    iris_nnet,
-    Species ~ .,
-    data = iris,
+    hpc_nnet,
+    class ~ .,
+    data = hpc,
     control = ctrl
   )
 
-  form_pred <- predict(form_fit$fit, newdata = iris[1:8, num_pred], type = "class")
-  form_pred <- factor(form_pred, levels = levels(iris$Species))
-  expect_equal(form_pred, predict(form_fit, new_data = iris[1:8, num_pred])$.pred_class)
+  form_pred <- predict(form_fit$fit, newdata = hpc[1:8, num_pred], type = "class")
+  form_pred <- factor(form_pred, levels = levels(hpc$class))
+  expect_equal(form_pred, predict(form_fit, new_data = hpc[1:8, num_pred])$.pred_class)
 })
 
 
diff --git a/tests/testthat/test_multinom_reg.R b/tests/testthat/test_multinom_reg.R
index 46b2082bb..fbec18322 100644
--- a/tests/testthat/test_multinom_reg.R
+++ b/tests/testthat/test_multinom_reg.R
@@ -6,6 +6,8 @@ library(rlang)
 
 context("multinom regression")
 source("helpers.R")
+source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 # ------------------------------------------------------------------------------
 
@@ -115,5 +117,5 @@ test_that('bad input', {
   expect_error(multinom_reg(mode = "regression"))
   expect_error(translate(multinom_reg() %>% set_engine("wat?")))
   expect_error(translate(multinom_reg() %>% set_engine()))
-  expect_warning(translate(multinom_reg() %>% set_engine("glmnet", x = iris[,1:3], y = iris$Species)))
+  expect_warning(translate(multinom_reg() %>% set_engine("glmnet", x = hpc[,1:3], y = hpc$class)))
 })
diff --git a/tests/testthat/test_multinom_reg_glmnet.R b/tests/testthat/test_multinom_reg_glmnet.R
index 22aa3368d..212b1ea29 100644
--- a/tests/testthat/test_multinom_reg_glmnet.R
+++ b/tests/testthat/test_multinom_reg_glmnet.R
@@ -7,7 +7,7 @@ library(tibble)
 
 context("multinom regression execution with glmnet")
 source(test_path("helper-objects.R"))
-
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 rows <- c(1, 51, 101)
 
@@ -22,8 +22,8 @@ test_that('glmnet execution', {
     res <- fit_xy(
       multinom_reg() %>% set_engine("glmnet"),
       control = ctrl,
-      x = iris[, 1:4],
-      y = iris$Species
+      x = hpc[, 1:4],
+      y = hpc$class
     ),
     regexp = NA
   )
@@ -34,8 +34,8 @@ test_that('glmnet execution', {
   expect_error(
     glmnet_xy_catch <- fit_xy(
       multinom_reg() %>% set_engine("glmnet"),
-      x = iris[, 2:5],
-      y = iris$Sepal.Length,
+      x = hpc[, 2:5],
+      y = hpc$compounds,
       control = caught_ctrl
     )
   )
@@ -50,27 +50,27 @@ test_that('glmnet prediction, one lambda', {
   xy_fit <- fit_xy(
     multinom_reg(penalty = 0.1) %>% set_engine("glmnet"),
     control = ctrl,
-    x = iris[, 1:4],
-    y = iris$Species
+    x = hpc[, 1:4],
+    y = hpc$class
   )
 
   uni_pred <-
     predict(xy_fit$fit,
-            newx = as.matrix(iris[rows, 1:4]),
+            newx = as.matrix(hpc[rows, 1:4]),
             s = xy_fit$spec$args$penalty, type = "class")
-  uni_pred <- factor(uni_pred[,1], levels = levels(iris$Species))
+  uni_pred <- factor(uni_pred[,1], levels = levels(hpc$class))
   uni_pred <- unname(uni_pred)
 
-  expect_equal(uni_pred, predict(xy_fit, iris[rows, 1:4], type = "class")$.pred_class)
+  expect_equal(uni_pred, predict(xy_fit, hpc[rows, 1:4], type = "class")$.pred_class)
 
   res_form <- fit(
     multinom_reg(penalty = 0.1) %>% set_engine("glmnet"),
-    Species ~ log(Sepal.Width) + Petal.Width,
-    data = iris,
+    class ~ log(compounds) + input_fields,
+    data = hpc,
     control = ctrl
   )
 
-  form_mat <- model.matrix(Species ~ log(Sepal.Width) + Petal.Width, data = iris)
+  form_mat <- model.matrix(class ~ log(compounds) + input_fields, data = hpc)
   form_mat <- form_mat[rows, -1]
 
   form_pred <-
@@ -78,9 +78,9 @@ test_that('glmnet prediction, one lambda', {
             newx = form_mat,
             s = res_form$spec$args$penalty,
             type = "class")
-  form_pred <- factor(form_pred[,1], levels = levels(iris$Species))
-  expect_equal(form_pred, parsnip:::predict_class.model_fit(res_form, iris[rows, c("Sepal.Width", "Petal.Width")]))
-  expect_equal(form_pred, predict(res_form, iris[rows, c("Sepal.Width", "Petal.Width")], type = "class")$.pred_class)
+  form_pred <- factor(form_pred[,1], levels = levels(hpc$class))
+  expect_equal(form_pred, parsnip:::predict_class.model_fit(res_form, hpc[rows, c("compounds", "input_fields")]))
+  expect_equal(form_pred, predict(res_form, hpc[rows, c("compounds", "input_fields")], type = "class")$.pred_class)
 
 })
 
@@ -95,16 +95,16 @@ test_that('glmnet probabilities, mulitiple lambda', {
   xy_fit <- fit_xy(
     multinom_reg(penalty = lams) %>% set_engine("glmnet"),
     control = ctrl,
-    x = iris[, 1:4],
-    y = iris$Species
+    x = hpc[, 1:4],
+    y = hpc$class
   )
 
-  expect_error(predict(xy_fit, iris[rows, 1:4], type = "class"))
-  expect_error(predict(xy_fit, iris[rows, 1:4], type = "prob"))
+  expect_error(predict(xy_fit, hpc[rows, 1:4], type = "class"))
+  expect_error(predict(xy_fit, hpc[rows, 1:4], type = "prob"))
 
   mult_pred <-
     predict(xy_fit$fit,
-            newx = as.matrix(iris[rows, 1:4]),
+            newx = as.matrix(hpc[rows, 1:4]),
             s = lams, type = "response")
   mult_pred <- apply(mult_pred, 3, as_tibble)
   mult_pred <- dplyr:::bind_rows(mult_pred)
@@ -119,7 +119,7 @@ test_that('glmnet probabilities, mulitiple lambda', {
 
   expect_equal(
     mult_pred$.pred,
-    multi_predict(xy_fit, iris[rows, 1:4], penalty = lams, type = "prob")$.pred
+    multi_predict(xy_fit, hpc[rows, 1:4], penalty = lams, type = "prob")$.pred
   )
 
   mult_class <- factor(names(mult_probs)[apply(mult_probs, 1, which.max)],
@@ -136,17 +136,17 @@ test_that('glmnet probabilities, mulitiple lambda', {
 
   expect_equal(
     mult_class$.pred,
-    multi_predict(xy_fit, iris[rows, 1:4], penalty = lams)$.pred
+    multi_predict(xy_fit, hpc[rows, 1:4], penalty = lams)$.pred
   )
 
   expect_error(
-    multi_predict(xy_fit, newdata = iris[rows, 1:4], penalty = lams),
+    multi_predict(xy_fit, newdata = hpc[rows, 1:4], penalty = lams),
     "Did you mean"
   )
 
   # Can predict probs with default penalty. See #108
   expect_error(
-    multi_predict(xy_fit, new_data = iris[rows, 1:4], type = "prob"),
+    multi_predict(xy_fit, new_data = hpc[rows, 1:4], type = "prob"),
     NA
   )
 
@@ -156,10 +156,10 @@ test_that("class predictions are factors with all levels", {
   skip_if_not_installed("glmnet")
   skip_if(run_glmnet)
 
-  basic <- multinom_reg() %>% set_engine("glmnet") %>% fit(Species ~ ., data = iris)
-  nd <- iris[iris$Species == "setosa", ]
+  basic <- multinom_reg() %>% set_engine("glmnet") %>% fit(class ~ ., data = hpc)
+  nd <- hpc[hpc$class == "setosa", ]
   yhat <- predict(basic, new_data = nd, penalty = .1)
   yhat_multi <- multi_predict(basic, new_data =  nd, penalty = .1)$.pred
   expect_is(yhat_multi[[1]]$.pred_class, "factor")
-  expect_equal(levels(yhat_multi[[1]]$.pred_class), levels(iris$Species))
+  expect_equal(levels(yhat_multi[[1]]$.pred_class), levels(hpc$class))
 })
diff --git a/tests/testthat/test_multinom_reg_keras.R b/tests/testthat/test_multinom_reg_keras.R
index 495fcea01..322a8afe9 100644
--- a/tests/testthat/test_multinom_reg_keras.R
+++ b/tests/testthat/test_multinom_reg_keras.R
@@ -8,11 +8,13 @@ library(dplyr)
 
 context("keras logistic regression")
 source("helpers.R")
+source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 # ------------------------------------------------------------------------------
 
 set.seed(352)
-dat <- iris[order(runif(nrow(iris))),]
+dat <- hpc[order(runif(150)),]
 
 tr_dat <- dat[1:140, ]
 te_dat <- dat[141:150, ]
@@ -42,7 +44,7 @@ test_that('model fitting', {
         basic_mod,
         control = ctrl,
         x = tr_dat[, -5],
-        y = tr_dat$Species
+        y = tr_dat$class
       ),
     regexp = NA
   )
@@ -54,7 +56,7 @@ test_that('model fitting', {
         basic_mod,
         control = ctrl,
         x = tr_dat[, -5],
-        y = tr_dat$Species
+        y = tr_dat$class
       ),
     regexp = NA
   )
@@ -64,7 +66,7 @@ test_that('model fitting', {
   expect_error(
     fit(
       basic_mod,
-      Species ~ .,
+      class ~ .,
       data = tr_dat,
       control = ctrl
     ),
@@ -77,7 +79,7 @@ test_that('model fitting', {
         reg_mod,
         control = ctrl,
         x = tr_dat[, -5],
-        y = tr_dat$Species
+        y = tr_dat$class
       ),
     regexp = NA
   )
@@ -85,7 +87,7 @@ test_that('model fitting', {
   expect_error(
     fit(
       reg_mod,
-      Species ~ .,
+      class ~ .,
       data = tr_dat,
       control = ctrl
     ),
@@ -107,7 +109,7 @@ test_that('classification prediction', {
       basic_mod,
       control = ctrl,
       x = tr_dat[, -5],
-      y = tr_dat$Species
+      y = tr_dat$class
     )
 
   keras_raw <-
@@ -125,7 +127,7 @@ test_that('classification prediction', {
       reg_mod,
       control = ctrl,
       x = tr_dat[, -5],
-      y = tr_dat$Species
+      y = tr_dat$class
     )
 
   keras_raw <-
@@ -151,7 +153,7 @@ test_that('classification probabilities', {
       basic_mod,
       control = ctrl,
       x = tr_dat[, -5],
-      y = tr_dat$Species
+      y = tr_dat$class
     )
 
   keras_pred <-
@@ -168,7 +170,7 @@ test_that('classification probabilities', {
       reg_mod,
       control = ctrl,
       x = tr_dat[, -5],
-      y = tr_dat$Species
+      y = tr_dat$class
     )
 
   keras_pred <-
diff --git a/tests/testthat/test_multinom_reg_nnet.R b/tests/testthat/test_multinom_reg_nnet.R
index 5893c2316..7336be592 100644
--- a/tests/testthat/test_multinom_reg_nnet.R
+++ b/tests/testthat/test_multinom_reg_nnet.R
@@ -8,11 +8,13 @@ library(dplyr)
 
 context("nnet multinomial regression")
 source("helpers.R")
+source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 # ------------------------------------------------------------------------------
 
 set.seed(352)
-dat <- iris[order(runif(nrow(iris))),]
+dat <- hpc[order(runif(150)),]
 
 tr_dat <- dat[1:140, ]
 te_dat <- dat[141:150, ]
@@ -37,7 +39,7 @@ test_that('model fitting', {
         basic_mod,
         control = ctrl,
         x = tr_dat[, -5],
-        y = tr_dat$Species
+        y = tr_dat$class
       ),
     regexp = NA
   )
@@ -50,7 +52,7 @@ test_that('model fitting', {
         basic_mod,
         control = ctrl,
         x = tr_dat[, -5],
-        y = tr_dat$Species
+        y = tr_dat$class
       ),
     regexp = NA
   )
@@ -60,7 +62,7 @@ test_that('model fitting', {
   expect_error(
     fit(
       basic_mod,
-      Species ~ .,
+      class ~ .,
       data = tr_dat,
       control = ctrl
     ),
@@ -79,7 +81,7 @@ test_that('classification prediction', {
       basic_mod,
       control = ctrl,
       x = tr_dat[, -5],
-      y = tr_dat$Species
+      y = tr_dat$class
     )
 
   nnet_pred <-
@@ -100,7 +102,7 @@ test_that('classification probabilities', {
       basic_mod,
       control = ctrl,
       x = tr_dat[, -5],
-      y = tr_dat$Species
+      y = tr_dat$class
     )
 
   nnet_pred <-
diff --git a/tests/testthat/test_multinom_reg_spark.R b/tests/testthat/test_multinom_reg_spark.R
index 9550d98d0..55bebd919 100644
--- a/tests/testthat/test_multinom_reg_spark.R
+++ b/tests/testthat/test_multinom_reg_spark.R
@@ -6,7 +6,7 @@ library(dplyr)
 
 context("multinomial regression execution with spark")
 source(test_path("helper-objects.R"))
-
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 # ------------------------------------------------------------------------------
 
@@ -20,9 +20,9 @@ test_that('spark execution', {
 
   skip_if(inherits(sc, "try-error"))
 
-  iris_rows <- c(1, 51, 101)
-  iris_tr <- copy_to(sc, iris[-iris_rows,   ], "iris_tr", overwrite = TRUE)
-  iris_te <- copy_to(sc, iris[ iris_rows, -5], "iris_te", overwrite = TRUE)
+  hpc_rows <- c(1, 51, 101)
+  hpc_tr <- copy_to(sc, hpc[-hpc_rows,   ], "hpc_tr", overwrite = TRUE)
+  hpc_te <- copy_to(sc, hpc[ hpc_rows, -5], "hpc_te", overwrite = TRUE)
 
   # ----------------------------------------------------------------------------
 
@@ -31,19 +31,19 @@ test_that('spark execution', {
       fit(
         multinom_reg() %>% set_engine("spark"),
         control = ctrl,
-        Species ~ .,
-        data = iris_tr
+        class ~ .,
+        data = hpc_tr
       ),
     regexp = NA
   )
 
   expect_error(
-    spark_class_pred <- predict(spark_class_fit, iris_te),
+    spark_class_pred <- predict(spark_class_fit, hpc_te),
     regexp = NA
   )
 
   expect_error(
-    spark_class_pred_class <- predict(spark_class_fit, iris_te),
+    spark_class_pred_class <- predict(spark_class_fit, hpc_te),
     regexp = NA
   )
 
@@ -55,12 +55,12 @@ test_that('spark execution', {
   )
 
   expect_error(
-    spark_class_prob <- predict(spark_class_fit, iris_te, type = "prob"),
+    spark_class_prob <- predict(spark_class_fit, hpc_te, type = "prob"),
     regexp = NA
   )
 
   expect_error(
-    spark_class_prob_classprob <- predict(spark_class_fit, iris_te, type = "prob"),
+    spark_class_prob_classprob <- predict(spark_class_fit, hpc_te, type = "prob"),
     regexp = NA
   )
 
diff --git a/tests/testthat/test_nearest_neighbor_kknn.R b/tests/testthat/test_nearest_neighbor_kknn.R
index ed17b1206..41aab23c5 100644
--- a/tests/testthat/test_nearest_neighbor_kknn.R
+++ b/tests/testthat/test_nearest_neighbor_kknn.R
@@ -6,11 +6,12 @@ library(rlang)
 
 context("nearest neighbor execution with kknn")
 source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 
-num_pred <- c("Sepal.Width", "Petal.Width", "Petal.Length")
-iris_bad_form <- as.formula(Species ~ term)
-iris_basic <- nearest_neighbor(mode = "classification",
+num_pred <- c("compounds", "iterations", "num_pending")
+hpc_bad_form <- as.formula(class ~ term)
+hpc_basic <- nearest_neighbor(mode = "classification",
                                neighbors = 8,
                                weight_func = "triangular") %>%
   set_engine("kknn")
@@ -26,10 +27,10 @@ test_that('kknn execution', {
   # expect no error
   expect_error(
     fit_xy(
-      iris_basic,
+      hpc_basic,
       control = ctrl,
-      x = iris[, num_pred],
-      y = iris$Sepal.Length
+      x = hpc[, num_pred],
+      y = hpc$input_fields
     ),
     regexp = "outcome should be a factor"
   )
@@ -38,10 +39,10 @@ test_that('kknn execution', {
   # expect no error
   expect_error(
     res <- fit_xy(
-      iris_basic,
+      hpc_basic,
       control = ctrl,
-      x = iris[, c("Sepal.Length", "Petal.Width")],
-      y = iris$Species
+      x = hpc[, c("input_fields", "iterations")],
+      y = hpc$class
     ),
     regexp = NA
   )
@@ -51,9 +52,9 @@ test_that('kknn execution', {
 
   expect_error(
     fit(
-      iris_basic,
-      iris_bad_form,
-      data = iris,
+      hpc_basic,
+      hpc_bad_form,
+      data = hpc,
 
       control = ctrl
     )
@@ -68,51 +69,51 @@ test_that('kknn prediction', {
 
   # continuous
   res_xy <- fit_xy(
-    iris_basic,
+    hpc_basic,
     control = ctrl,
-    x = iris[, num_pred],
-    y = iris$Species
+    x = hpc[, num_pred],
+    y = hpc$class
   )
 
   uni_pred <- predict(
     res_xy$fit,
-    newdata = iris[1:5, num_pred]
+    newdata = hpc[1:5, num_pred]
   )
 
-  expect_equal(tibble(.pred_class = uni_pred), predict(res_xy, iris[1:5, num_pred]))
+  expect_equal(tibble(.pred_class = uni_pred), predict(res_xy, hpc[1:5, num_pred]))
 
   # nominal
   res_xy_nom <- fit_xy(
-    iris_basic %>% set_mode("classification"),
+    hpc_basic %>% set_mode("classification"),
     control = ctrl,
-    x = iris[, c("Sepal.Length", "Petal.Width")],
-    y = iris$Species
+    x = hpc[, c("input_fields", "iterations")],
+    y = hpc$class
   )
 
   uni_pred_nom <- predict(
     res_xy_nom$fit,
-    newdata = iris[1:5, c("Sepal.Length", "Petal.Width")]
+    newdata = hpc[1:5, c("input_fields", "iterations")]
   )
 
   expect_equal(
     uni_pred_nom,
-    predict(res_xy_nom, iris[1:5, c("Sepal.Length", "Petal.Width")], type = "class")$.pred_class
+    predict(res_xy_nom, hpc[1:5, c("input_fields", "iterations")], type = "class")$.pred_class
   )
 
   # continuous - formula interface
   res_form <- fit(
-    iris_basic %>% set_mode("regression"),
-    Sepal.Length ~ log(Sepal.Width) + Species,
-    data = iris,
+    hpc_basic %>% set_mode("regression"),
+    input_fields ~ log(compounds) + class,
+    data = hpc,
     control = ctrl
   )
 
   form_pred <- predict(
     res_form$fit,
-    newdata = iris[1:5,]
+    newdata = hpc[1:5,]
   )
 
-  expect_equal(form_pred, predict(res_form, iris[1:5, c("Sepal.Width", "Species")])$.pred)
+  expect_equal(form_pred, predict(res_form, hpc[1:5, c("compounds", "class")])$.pred)
 })
 
 
@@ -121,23 +122,23 @@ test_that('kknn multi-predict', {
   skip_if_not_installed("kknn")
   library(kknn)
 
-  iris_te <- c(1:2, 50:51, 100:101)
+  hpc_te <- c(1:2, 50:51, 100:101)
   k_vals <- 1:10
 
   res_xy <- fit_xy(
     nearest_neighbor(mode = "classification", neighbors = 3) %>%
       set_engine("kknn"),
     control = ctrl,
-    x = iris[-iris_te, num_pred],
-    y = iris$Species[-iris_te]
+    x = hpc[-hpc_te, num_pred],
+    y = hpc$class[-hpc_te]
   )
 
-  pred_multi <- multi_predict(res_xy, iris[iris_te, num_pred], neighbors = k_vals)
+  pred_multi <- multi_predict(res_xy, hpc[hpc_te, num_pred], neighbors = k_vals)
   expect_equal(pred_multi %>% unnest(cols = c(.pred)) %>% nrow(),
-               length(iris_te) * length(k_vals))
-  expect_equal(pred_multi %>% nrow(), length(iris_te))
+               length(hpc_te) * length(k_vals))
+  expect_equal(pred_multi %>% nrow(), length(hpc_te))
 
-  pred_uni <- predict(res_xy, iris[iris_te, num_pred])
+  pred_uni <- predict(res_xy, hpc[hpc_te, num_pred])
   pred_uni_obs <-
     pred_multi %>%
     mutate(.rows = row_number()) %>%
@@ -148,13 +149,13 @@ test_that('kknn multi-predict', {
   expect_equal(pred_uni, pred_uni_obs)
 
 
-  prob_multi <- multi_predict(res_xy, iris[iris_te, num_pred],
+  prob_multi <- multi_predict(res_xy, hpc[hpc_te, num_pred],
                               neighbors = k_vals, type = "prob")
   expect_equal(prob_multi %>% unnest(cols = c(.pred)) %>% nrow(),
-               length(iris_te) * length(k_vals))
-  expect_equal(prob_multi %>% nrow(), length(iris_te))
+               length(hpc_te) * length(k_vals))
+  expect_equal(prob_multi %>% nrow(), length(hpc_te))
 
-  prob_uni <- predict(res_xy, iris[iris_te, num_pred], type = "prob")
+  prob_uni <- predict(res_xy, hpc[hpc_te, num_pred], type = "prob")
   prob_uni_obs <-
     prob_multi %>%
     mutate(.rows = row_number()) %>%
diff --git a/tests/testthat/test_nullmodel.R b/tests/testthat/test_nullmodel.R
index b3eb4ee51..db499d54c 100644
--- a/tests/testthat/test_nullmodel.R
+++ b/tests/testthat/test_nullmodel.R
@@ -5,7 +5,8 @@ library(tibble)
 
 context("test-nullmodel")
 source("helpers.R")
-
+source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 test_that('primary arguments', {
   basic <- null_model(mode = "regression")
@@ -36,16 +37,16 @@ test_that('bad input', {
   expect_error(translate(null_model(formula = y ~ x)))
   expect_warning(
     translate(
-      null_model(mode = "regression") %>% set_engine("parsnip", x = iris[,1:3], y = iris$Species)
+      null_model(mode = "regression") %>% set_engine("parsnip", x = hpc[,1:3], y = hpc$class)
     )
   )
 })
 
 # ------------------------------------------------------------------------------
 
-num_pred <-c("Sepal.Length", "Sepal.Width", "Petal.Width")
-iris_bad_form <- as.formula(Species ~ term)
-iris_basic <- null_model(mode = "regression") %>% set_engine("parsnip")
+num_pred <- names(phc)[1:3]
+hpc_bad_form <- as.formula(class ~ term)
+hpc_basic <- null_model(mode = "regression") %>% set_engine("parsnip")
 
 # ------------------------------------------------------------------------------
 
@@ -53,26 +54,26 @@ test_that('nullmodel execution', {
 
   expect_error(
     res <- fit(
-      iris_basic,
-      Sepal.Length ~ log(Sepal.Width) + Species,
-      data = iris
+      hpc_basic,
+      compounds ~ log(input_fields) + class,
+      data = hpc
     ),
     regexp = NA
   )
   expect_error(
     res <- fit_xy(
-      iris_basic,
-      x = iris[, num_pred],
-      y = iris$Petal.Length
+      hpc_basic,
+      x = hpc[, num_pred],
+      y = hpc$num_pending
     ),
     regexp = NA
   )
 
   expect_error(
     res <- fit(
-      iris_basic,
-      iris_bad_form,
-      data = iris
+      hpc_basic,
+      hpc_bad_form,
+      data = hpc
     )
   )
 
@@ -80,9 +81,9 @@ test_that('nullmodel execution', {
 
   expect_error(
     res <- fit(
-      iris_basic,
-      cbind(Sepal.Width, Petal.Width) ~ .,
-      data = iris
+      hpc_basic,
+      cbind(compounds, input_fields) ~ .,
+      data = hpc
     ),
     regexp = NA
   )
@@ -97,23 +98,23 @@ test_that('nullmodel prediction', {
                     carb = rep(2.8125, 5))
 
   res_xy <- fit_xy(
-    iris_basic,
-    x = iris[, num_pred],
-    y = iris$Petal.Length
+    hpc_basic,
+    x = hpc[, num_pred],
+    y = hpc$num_pending
   )
 
-  expect_equal(uni_pred, predict(res_xy, new_data = iris[1:5, num_pred]))
+  expect_equal(uni_pred, predict(res_xy, new_data = hpc[1:5, num_pred]))
 
   res_form <- fit(
-    iris_basic,
-    Petal.Length ~ log(Sepal.Width) + Species,
-    data = iris
+    hpc_basic,
+    num_pending ~ log(compounds) + class,
+    data = hpc_basic
   )
-  expect_equal(inl_pred, predict(res_form, iris[1:5, ])$.pred)
+  expect_equal(inl_pred, predict(res_form, hpc[1:5, ])$.pred)
 
   # Multivariate y
   res <- fit(
-    iris_basic,
+    hpc_basic,
     cbind(gear, carb) ~ .,
     data = mtcars
   )
@@ -131,7 +132,7 @@ test_that('classification', {
   expect_error(
     null_model <- null_model(mode = "classification") %>%
       set_engine("parsnip") %>%
-      fit(Species ~ ., data = iris),
+      fit(class ~ ., data = hpc),
     regexp = NA
   )
   expect_true(!is.null(null_model$fit))
diff --git a/tests/testthat/test_predict_formats.R b/tests/testthat/test_predict_formats.R
index 442650486..9b9b5430a 100644
--- a/tests/testthat/test_predict_formats.R
+++ b/tests/testthat/test_predict_formats.R
@@ -3,6 +3,9 @@ library(parsnip)
 library(tibble)
 library(dplyr)
 
+source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
+
 # ------------------------------------------------------------------------------
 
 context("check predict output structures")
@@ -10,7 +13,7 @@ context("check predict output structures")
 lm_fit <-
   linear_reg(mode = "regression") %>%
   set_engine("lm") %>%
-  fit(Sepal.Length ~ ., data = iris)
+  fit(class ~ ., data = hpc)
 
 class_dat <- airquality[complete.cases(airquality),]
 class_dat$Ozone <- factor(ifelse(class_dat$Ozone >= 31, "high", "low"))
@@ -31,9 +34,9 @@ lr_fit_2 <-
 # ------------------------------------------------------------------------------
 
 test_that('regression predictions', {
-  expect_true(is_tibble(predict(lm_fit, new_data = iris[1:5,-1])))
-  expect_true(is.vector(parsnip:::predict_numeric.model_fit(lm_fit, new_data = iris[1:5,-1])))
-  expect_equal(names(predict(lm_fit, new_data = iris[1:5,-1])), ".pred")
+  expect_true(is_tibble(predict(lm_fit, new_data = hpc[1:5,-1])))
+  expect_true(is.vector(parsnip:::predict_numeric.model_fit(lm_fit, new_data = hpc[1:5,-1])))
+  expect_equal(names(predict(lm_fit, new_data = hpc[1:5,-1])), ".pred")
 })
 
 test_that('classification predictions', {
@@ -67,18 +70,18 @@ test_that('non-factor classification', {
   expect_error(
     logistic_reg() %>%
       set_engine("glm") %>%
-      fit(Species ~ ., data = iris %>% mutate(Species = Species == "setosa"))
+      fit(class ~ ., data = hpc %>% mutate(class = class == "VF"))
   )
   expect_error(
     logistic_reg() %>%
       set_engine("glm") %>%
-      fit(Species ~ ., data = iris %>% mutate(Species = ifelse(Species == "setosa", 1, 0)))
+      fit(class ~ ., data = hpc %>% mutate(class = ifelse(class == "VF", 1, 0)))
   )
 
   expect_error(
     multinom_reg() %>%
       set_engine("glmnet") %>%
-      fit(Species ~ ., data = iris %>% mutate(Species = as.character(Species)))
+      fit(class ~ ., data = hpc %>% mutate(class = as.character(class)))
   )
 })
 
diff --git a/tests/testthat/test_rand_forest_ranger.R b/tests/testthat/test_rand_forest_ranger.R
index 46a2a6eb8..6d96e2490 100644
--- a/tests/testthat/test_rand_forest_ranger.R
+++ b/tests/testthat/test_rand_forest_ranger.R
@@ -7,7 +7,7 @@ library(rlang)
 
 context("random forest execution with ranger")
 source(test_path("helper-objects.R"))
-
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 # ------------------------------------------------------------------------------
 
@@ -315,8 +315,8 @@ test_that('additional descriptor tests', {
   descr_other_xy <- fit_xy(
     rand_forest(mode = "classification", mtry = 2) %>%
       set_engine("ranger", class.weights = c(min(.lvls()), 20, 10)),
-    x = iris[, 1:4],
-    y = iris$Species,
+    x = hpc[, 1:4],
+    y = hpc$class,
     control = ctrl
   )
   expect_equal(descr_other_xy$fit$mtry, 2)
@@ -325,7 +325,7 @@ test_that('additional descriptor tests', {
   descr_other_f <- fit(
     rand_forest(mode = "classification", mtry = 2) %>%
       set_engine("ranger", class.weights = c(min(.lvls()), 20, 10)),
-    Species ~ ., data = iris,
+    class ~ ., data = hpc,
     control = ctrl
   )
   expect_equal(descr_other_f$fit$mtry, 2)
@@ -334,8 +334,8 @@ test_that('additional descriptor tests', {
   descr_other_xy <- fit_xy(
     rand_forest(mode = "classification", mtry = 2) %>%
       set_engine("ranger", class.weights = c(min(.lvls()), 20, 10)),
-    x = iris[, 1:4],
-    y = iris$Species,
+    x = hpc[, 1:4],
+    y = hpc$class,
     control = ctrl
   )
   expect_equal(descr_other_xy$fit$mtry, 2)
@@ -344,7 +344,7 @@ test_that('additional descriptor tests', {
   descr_other_f <- fit(
     rand_forest(mode = "classification", mtry = 2) %>%
       set_engine("ranger", class.weights = c(min(.lvls()), 20, 10)),
-    Species ~ ., data = iris,
+    class ~ ., data = hpc,
     control = ctrl
   )
   expect_equal(descr_other_f$fit$mtry, 2)
@@ -359,21 +359,21 @@ test_that('ranger classification prediction', {
   xy_class_fit <-
     rand_forest() %>% set_mode("classification")  %>% set_engine("ranger") %>%
     fit_xy(
-      x = iris[, 1:4],
-      y = iris$Species,
+      x = hpc[, 1:4],
+      y = hpc$class,
       control = ctrl
     )
 
   expect_false(has_multi_predict(xy_class_fit))
   expect_equal(multi_predict_args(xy_class_fit), NA_character_)
 
-  xy_class_pred <- predict(xy_class_fit$fit, data = iris[c(1, 51, 101), 1:4])$prediction
+  xy_class_pred <- predict(xy_class_fit$fit, data = hpc[c(1, 51, 101), 1:4])$prediction
   xy_class_pred <- colnames(xy_class_pred)[apply(xy_class_pred, 1, which.max)]
-  xy_class_pred <- factor(xy_class_pred, levels = levels(iris$Species))
+  xy_class_pred <- factor(xy_class_pred, levels = levels(hpc$class))
 
   expect_equal(
     xy_class_pred,
-    predict(xy_class_fit, new_data = iris[c(1, 51, 101), 1:4])$.pred_class
+    predict(xy_class_fit, new_data = hpc[c(1, 51, 101), 1:4])$.pred_class
   )
 
   xy_prob_fit <-
@@ -381,26 +381,26 @@ test_that('ranger classification prediction', {
     set_mode("classification") %>%
     set_engine("ranger") %>%
     fit_xy(
-      x = iris[, 1:4],
-      y = iris$Species,
+      x = hpc[, 1:4],
+      y = hpc$class,
       control = ctrl
     )
 
-  xy_prob_pred <- predict(xy_prob_fit$fit, data = iris[c(1, 51, 101), 1:4])$prediction
+  xy_prob_pred <- predict(xy_prob_fit$fit, data = hpc[c(1, 51, 101), 1:4])$prediction
   xy_prob_pred <- colnames(xy_prob_pred)[apply(xy_prob_pred, 1, which.max)]
-  xy_prob_pred <- factor(xy_prob_pred, levels = levels(iris$Species))
+  xy_prob_pred <- factor(xy_prob_pred, levels = levels(hpc$class))
 
   expect_equal(
     xy_class_pred,
-    predict(xy_prob_fit, new_data = iris[c(1, 51, 101), 1:4])$.pred_class
+    predict(xy_prob_fit, new_data = hpc[c(1, 51, 101), 1:4])$.pred_class
   )
 
-  xy_prob_prob <- predict(xy_prob_fit$fit, data = iris[c(1, 51, 101), 1:4], type = "response")
+  xy_prob_prob <- predict(xy_prob_fit$fit, data = hpc[c(1, 51, 101), 1:4], type = "response")
   xy_prob_prob <- as_tibble(xy_prob_prob$prediction)
   names(xy_prob_prob) <- paste0(".pred_", names(xy_prob_prob))
   expect_equal(
     xy_prob_prob,
-    predict(xy_prob_fit, new_data = iris[c(1, 51, 101), 1:4], type = "prob")
+    predict(xy_prob_fit, new_data = hpc[c(1, 51, 101), 1:4], type = "prob")
   )
 })
 
diff --git a/tests/testthat/test_rand_forest_spark.R b/tests/testthat/test_rand_forest_spark.R
index ad62f39e9..bc22fafde 100644
--- a/tests/testthat/test_rand_forest_spark.R
+++ b/tests/testthat/test_rand_forest_spark.R
@@ -6,7 +6,7 @@ library(dplyr)
 
 context("random forest execution with spark")
 source(test_path("helper-objects.R"))
-
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 # ------------------------------------------------------------------------------
 
@@ -20,8 +20,8 @@ test_that('spark execution', {
 
   skip_if(inherits(sc, "try-error"))
 
-  iris_rf_tr <- copy_to(sc, iris[-(1:4),   ], "iris_rf_tr", overwrite = TRUE)
-  iris_rf_te <- copy_to(sc, iris[  1:4 , -1], "iris_rf_te", overwrite = TRUE)
+  hpc_rf_tr <- copy_to(sc, hpc[-(1:4),   ], "hpc_rf_tr", overwrite = TRUE)
+  hpc_rf_te <- copy_to(sc, hpc[  1:4 , -1], "hpc_rf_te", overwrite = TRUE)
 
   # ----------------------------------------------------------------------------
 
@@ -32,7 +32,7 @@ test_that('spark execution', {
           set_engine("spark", seed = 12),
         control = ctrl,
         Sepal_Length ~ .,
-        data = iris_rf_tr
+        data = hpc_rf_tr
       ),
     regexp = NA
   )
@@ -45,28 +45,28 @@ test_that('spark execution', {
           set_engine("spark", seed = 12),
         control = ctrl,
         Sepal_Length ~ .,
-        data = iris_rf_tr
+        data = hpc_rf_tr
       ),
     regexp = NA
   )
 
   expect_error(
-    spark_reg_pred <- predict(spark_reg_fit, iris_rf_te),
+    spark_reg_pred <- predict(spark_reg_fit, hpc_rf_te),
     regexp = NA
   )
 
   expect_error(
-    spark_reg_pred_num <- predict(spark_reg_fit, iris_rf_te),
+    spark_reg_pred_num <- predict(spark_reg_fit, hpc_rf_te),
     regexp = NA
   )
 
   expect_error(
-    spark_reg_dup <- predict(spark_reg_fit_dup, iris_rf_te),
+    spark_reg_dup <- predict(spark_reg_fit_dup, hpc_rf_te),
     regexp = NA
   )
 
   expect_error(
-    spark_reg_num_dup <- predict(spark_reg_fit_dup, iris_rf_te),
+    spark_reg_num_dup <- predict(spark_reg_fit_dup, hpc_rf_te),
     regexp = NA
   )
 
diff --git a/tests/testthat/test_svm_liquidsvm.R b/tests/testthat/test_svm_liquidsvm.R
index 240351551..98306ff15 100644
--- a/tests/testthat/test_svm_liquidsvm.R
+++ b/tests/testthat/test_svm_liquidsvm.R
@@ -3,6 +3,9 @@ library(parsnip)
 library(rlang)
 library(tibble)
 
+source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
+
 # ------------------------------------------------------------------------------
 
 test_that('primary arguments', {
@@ -103,8 +106,8 @@ test_that('svm rbf regression', {
       fit_xy(
         reg_mod,
         control = ctrl,
-        x = iris[, 2:4],
-        y = iris$Sepal.Length
+        x = hpc[, 2:4],
+        y = hpc$compounds
       ),
       regexp = NA
     )
@@ -114,8 +117,8 @@ test_that('svm rbf regression', {
     expect_error(
       fit(
         reg_mod,
-        Sepal.Length ~ .,
-        data = iris[, -5],
+        compounds ~ .,
+        data = hpc[, -5],
         control = ctrl
       ),
       regexp = NA
@@ -133,8 +136,8 @@ test_that('svm rbf regression prediction', {
     reg_form <-
       fit(
         object = reg_mod,
-        formula = Sepal.Length ~ .,
-        data = iris[, -5],
+        formula = compounds ~ .,
+        data = hpc[, -5],
         control = ctrl
       )
   )
@@ -143,8 +146,8 @@ test_that('svm rbf regression prediction', {
     reg_xy_form <-
       fit_xy(
         object = reg_mod,
-        x = iris[, 2:4],
-        y = iris$Sepal.Length,
+        x = hpc[, 2:4],
+        y = hpc$compounds,
         control = ctrl
       )
   )
@@ -153,8 +156,8 @@ test_that('svm rbf regression prediction', {
   expect_warning(
     liquidSVM_form <-
       liquidSVM::svm(
-        x = Sepal.Length ~ .,
-        y = iris[, -5],
+        x = compounds ~ .,
+        y = hpc[, -5],
         gammas = .1,
         lambdas = 0.25,
         folds = 1,
@@ -165,8 +168,8 @@ test_that('svm rbf regression prediction', {
   expect_warning(
     liquidSVM_xy_form <-
       liquidSVM::svm(
-        x = iris[, 2:4],
-        y = iris$Sepal.Length,
+        x = hpc[, 2:4],
+        y = hpc$compounds,
         gammas = .1,
         lambdas = 0.25,
         folds = 1,
@@ -179,8 +182,8 @@ test_that('svm rbf regression prediction', {
                liquidSVM::getSolution(liquidSVM_xy_form)[c("coeff", "sv")])
 
   # check predictions for liquidSVM formula and liquidSVM xy interfaces
-  liquidSVM_form_preds <- predict(liquidSVM_form, iris[1:3, 2:4])
-  liquidSVM_form_xy_preds <- predict(liquidSVM_xy_form, iris[1:3, 2:4])
+  liquidSVM_form_preds <- predict(liquidSVM_form, hpc[1:3, 2:4])
+  liquidSVM_form_xy_preds <- predict(liquidSVM_xy_form, hpc[1:3, 2:4])
   expect_equal(liquidSVM_form_preds, liquidSVM_form_xy_preds)
 
   # check predictions for parsnip formula and liquidSVM formula interfaces
@@ -189,7 +192,7 @@ test_that('svm rbf regression prediction', {
       list(.pred = liquidSVM_form_preds),
       row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame"))
 
-  parsnip_pred <- predict(reg_form, iris[1:3, 2:4])
+  parsnip_pred <- predict(reg_form, hpc[1:3, 2:4])
   expect_equal(as.data.frame(liquidSVM_pred), as.data.frame(parsnip_pred))
 
   # check that coeffs are equal for formula methods called via parsnip and liquidSVM
@@ -201,7 +204,7 @@ test_that('svm rbf regression prediction', {
                liquidSVM::getSolution(reg_xy_form$fit)[c("coeff", "sv")])
 
   # check predictions are equal for parsnip xy and liquidSVM xy methods
-  parsnip_xy_pred <- predict(reg_xy_form, iris[1:3, -c(1, 5)])
+  parsnip_xy_pred <- predict(reg_xy_form, hpc[1:3, -c(1, 5)])
   expect_equal(as.data.frame(liquidSVM_pred), as.data.frame(parsnip_xy_pred))
 })
 
diff --git a/tests/testthat/test_svm_poly.R b/tests/testthat/test_svm_poly.R
index cf1184aef..2e451b232 100644
--- a/tests/testthat/test_svm_poly.R
+++ b/tests/testthat/test_svm_poly.R
@@ -7,6 +7,8 @@ library(tibble)
 
 context("RBF SVM")
 source("helpers.R")
+source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 # ------------------------------------------------------------------------------
 
@@ -131,8 +133,8 @@ test_that('svm poly regression', {
     res <- fit_xy(
       reg_mod,
       control = ctrl,
-      x = iris[,2:4],
-      y = iris$Sepal.Length
+      x = hpc[,2:4],
+      y = hpc$compounds
     ),
     regexp = NA
   )
@@ -144,8 +146,8 @@ test_that('svm poly regression', {
   expect_error(
     fit(
       reg_mod,
-      Sepal.Length ~ .,
-      data = iris[, -5],
+      compounds ~ .,
+      data = hpc[, -5],
       control = ctrl
     ),
     regexp = NA
@@ -161,13 +163,13 @@ test_that('svm poly regression prediction', {
   reg_form <-
     fit(
       reg_mod,
-      Sepal.Length ~ .,
-      data = iris[, -5],
+      compounds ~ .,
+      data = hpc[, -5],
       control = ctrl
     )
 
   # kern_pred <-
-  #   predict(reg_form$fit, iris[1:3, -c(1, 5)]) %>%
+  #   predict(reg_form$fit, hpc[1:3, -c(1, 5)]) %>%
   #   as_tibble() %>%
   #   setNames(".pred")
   kern_pred <-
@@ -178,20 +180,20 @@ test_that('svm poly regression prediction', {
       class = c("tbl_df", "tbl", "data.frame")
     )
 
-  parsnip_pred <- predict(reg_form, iris[1:3, -c(1, 5)])
+  parsnip_pred <- predict(reg_form, hpc[1:3, -c(1, 5)])
   expect_equal(as.data.frame(kern_pred), as.data.frame(parsnip_pred))
 
 
   reg_xy_form <-
     fit_xy(
       reg_mod,
-      x = iris[, 2:4],
-      y = iris$Sepal.Length,
+      x = hpc[, 2:4],
+      y = hpc$compounds,
       control = ctrl
     )
   expect_equal(reg_form$fit@alphaindex, reg_xy_form$fit@alphaindex)
 
-  parsnip_xy_pred <- predict(reg_xy_form, iris[1:3, -c(1, 5)])
+  parsnip_xy_pred <- predict(reg_xy_form, hpc[1:3, -c(1, 5)])
   expect_equal(as.data.frame(kern_pred), as.data.frame(parsnip_xy_pred))
 })
 
@@ -205,8 +207,8 @@ test_that('svm poly classification', {
     fit_xy(
       cls_mod,
       control = ctrl,
-      x = iris[, -5],
-      y = iris$Species
+      x = hpc[, -5],
+      y = hpc$class
     ),
     regexp = NA
   )
@@ -214,8 +216,8 @@ test_that('svm poly classification', {
   expect_error(
     fit(
       cls_mod,
-      Species ~ .,
-      data = iris,
+      class ~ .,
+      data = hpc,
       control = ctrl
     ),
     regexp = NA
@@ -234,13 +236,13 @@ test_that('svm poly classification probabilities', {
   cls_form <-
     fit(
       cls_mod,
-      Species ~ .,
-      data = iris,
+      class ~ .,
+      data = hpc,
       control = ctrl
     )
 
   # kern_class <-
-  #   tibble(.pred_class = predict(cls_form$fit, iris[ind, -5]))
+  #   tibble(.pred_class = predict(cls_form$fit, hpc[ind, -5]))
 
   kern_class <-
     structure(
@@ -249,22 +251,22 @@ test_that('svm poly classification probabilities', {
           structure(1:3, .Label = c("setosa", "versicolor", "virginica"), class = "factor")),
       row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame"))
 
-  parsnip_class <- predict(cls_form, iris[ind, -5])
+  parsnip_class <- predict(cls_form, hpc[ind, -5])
   expect_equal(kern_class, parsnip_class)
 
   set.seed(34562)
   cls_xy_form <-
     fit_xy(
       cls_mod,
-      x = iris[, 1:4],
-      y = iris$Species,
+      x = hpc[, 1:4],
+      y = hpc$class,
       control = ctrl
     )
   expect_equal(cls_form$fit@alphaindex, cls_xy_form$fit@alphaindex)
 
   library(kernlab)
   kern_probs <-
-    kernlab::predict(cls_form$fit, iris[ind, -5], type = "probabilities") %>%
+    kernlab::predict(cls_form$fit, hpc[ind, -5], type = "probabilities") %>%
     as_tibble() %>%
     setNames(c('.pred_setosa', '.pred_versicolor', '.pred_virginica'))
 
@@ -277,9 +279,9 @@ test_that('svm poly classification probabilities', {
   #     row.names = c(NA,-3L),
   #     class = c("tbl_df", "tbl", "data.frame"))
 
-  parsnip_probs <- predict(cls_form, iris[ind, -5], type = "prob")
+  parsnip_probs <- predict(cls_form, hpc[ind, -5], type = "prob")
   expect_equal(as.data.frame(kern_probs), as.data.frame(parsnip_probs))
 
-  parsnip_xy_probs <- predict(cls_xy_form, iris[ind, -5], type = "prob")
+  parsnip_xy_probs <- predict(cls_xy_form, hpc[ind, -5], type = "prob")
   expect_equal(as.data.frame(kern_probs), as.data.frame(parsnip_xy_probs))
 })
diff --git a/tests/testthat/test_svm_rbf.R b/tests/testthat/test_svm_rbf.R
index b3e7c7d04..a14964191 100644
--- a/tests/testthat/test_svm_rbf.R
+++ b/tests/testthat/test_svm_rbf.R
@@ -6,6 +6,8 @@ library(rlang)
 
 context("poly SVM")
 source(test_path("helpers.R"))
+source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 # ------------------------------------------------------------------------------
 
@@ -110,8 +112,8 @@ test_that('svm poly regression', {
     res <- fit_xy(
       reg_mod,
       control = ctrl,
-      x = iris[,2:4],
-      y = iris$Sepal.Length
+      x = hpc[,2:4],
+      y = hpc$input_fields
     ),
     regexp = NA
   )
@@ -122,8 +124,8 @@ test_that('svm poly regression', {
   expect_error(
     fit(
       reg_mod,
-      Sepal.Length ~ .,
-      data = iris[, -5],
+      input_fields ~ .,
+      data = hpc[, -5],
       control = ctrl
     ),
     regexp = NA
@@ -139,13 +141,13 @@ test_that('svm rbf regression prediction', {
   reg_form <-
     fit(
       reg_mod,
-      Sepal.Length ~ .,
-      data = iris[, -5],
+      input_fields ~ .,
+      data = hpc[, -5],
       control = ctrl
     )
 
   # kern_pred <-
-  #   predict(reg_form$fit, iris[1:3, -c(1, 5)]) %>%
+  #   predict(reg_form$fit, hpc[1:3, -c(1, 5)]) %>%
   #   as_tibble() %>%
   #   setNames(".pred")
   kern_pred <-
@@ -153,20 +155,20 @@ test_that('svm rbf regression prediction', {
       list(.pred = c(5.02786147259765, 4.81715220026091, 4.86817852816449)),
       row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame"))
 
-  parsnip_pred <- predict(reg_form, iris[1:3, -c(1, 5)])
+  parsnip_pred <- predict(reg_form, hpc[1:3, -c(1, 5)])
   expect_equal(as.data.frame(kern_pred), as.data.frame(parsnip_pred))
 
 
   reg_xy_form <-
     fit_xy(
       reg_mod,
-      x = iris[, 2:4],
-      y = iris$Sepal.Length,
+      x = hpc[, 2:4],
+      y = hpc$input_fields,
       control = ctrl
     )
   expect_equal(reg_form$fit@alphaindex, reg_xy_form$fit@alphaindex)
 
-  parsnip_xy_pred <- predict(reg_xy_form, iris[1:3, -c(1, 5)])
+  parsnip_xy_pred <- predict(reg_xy_form, hpc[1:3, -c(1, 5)])
   expect_equal(as.data.frame(kern_pred), as.data.frame(parsnip_xy_pred))
 })
 
@@ -180,8 +182,8 @@ test_that('svm rbf classification', {
     fit_xy(
       cls_mod,
       control = ctrl,
-      x = iris[, -5],
-      y = iris$Species
+      x = hpc[, -5],
+      y = hpc$class
     ),
     regexp = NA
   )
@@ -189,8 +191,8 @@ test_that('svm rbf classification', {
   expect_error(
     fit(
       cls_mod,
-      Species ~ .,
-      data = iris,
+      class ~ .,
+      data = hpc,
       control = ctrl
     ),
     regexp = NA
@@ -209,13 +211,13 @@ test_that('svm rbf classification probabilities', {
   cls_form <-
     fit(
       cls_mod,
-      Species ~ .,
-      data = iris,
+      class ~ .,
+      data = hpc,
       control = ctrl
     )
 
   # kern_class <-
-  #   tibble(.pred_class = predict(cls_form$fit, iris[ind, -5]))
+  #   tibble(.pred_class = predict(cls_form$fit, hpc[ind, -5]))
 
   kern_class <-
     structure(list(
@@ -224,22 +226,22 @@ test_that('svm rbf classification probabilities', {
         .Label = c("setosa", "versicolor", "virginica"), class = "factor")),
       row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame"))
 
-  parsnip_class <- predict(cls_form, iris[ind, -5])
+  parsnip_class <- predict(cls_form, hpc[ind, -5])
   expect_equal(kern_class, parsnip_class)
 
   set.seed(34562)
   cls_xy_form <-
     fit_xy(
       cls_mod,
-      x = iris[, 1:4],
-      y = iris$Species,
+      x = hpc[, 1:4],
+      y = hpc$class,
       control = ctrl
     )
   expect_equal(cls_form$fit@alphaindex, cls_xy_form$fit@alphaindex)
 
   library(kernlab)
   kern_probs <-
-    kernlab::predict(cls_form$fit, iris[ind, -5], type = "probabilities") %>%
+    kernlab::predict(cls_form$fit, hpc[ind, -5], type = "probabilities") %>%
     as_tibble() %>%
     setNames(c('.pred_setosa', '.pred_versicolor', '.pred_virginica'))
 
@@ -251,10 +253,10 @@ test_that('svm rbf classification probabilities', {
   #       .pred_virginica  = c(0.00640936947697121, 0.625112509213187, 0.976312878783783)),
   #     row.names = c(NA,-3L), class = c("tbl_df", "tbl", "data.frame"))
 
-  parsnip_probs <- predict(cls_form, iris[ind, -5], type = "prob")
+  parsnip_probs <- predict(cls_form, hpc[ind, -5], type = "prob")
   expect_equal(as.data.frame(kern_probs), as.data.frame(parsnip_probs))
 
-  parsnip_xy_probs <- predict(cls_xy_form, iris[ind, -5], type = "prob")
+  parsnip_xy_probs <- predict(cls_xy_form, hpc[ind, -5], type = "prob")
   expect_equal(as.data.frame(kern_probs), as.data.frame(parsnip_xy_probs))
 })
 

From a41e1c0ec1906e67a67c30679ede445369da8f88 Mon Sep 17 00:00:00 2001
From: "Simon P. Couch" <simonpatrickcouch@gmail.com>
Date: Sun, 21 Jun 2020 20:37:15 -0700
Subject: [PATCH 2/7] fix descriptor tests

There are 4 factor levels in hpc_data$class, but 3 in iris$Species, which tripped up a few tests.
---
 tests/testthat/test_descriptors.R | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/testthat/test_descriptors.R b/tests/testthat/test_descriptors.R
index 34390a989..8f60b1468 100644
--- a/tests/testthat/test_descriptors.R
+++ b/tests/testthat/test_descriptors.R
@@ -2,7 +2,7 @@ library(testthat)
 library(parsnip)
 
 source(test_path("helper-objects.R"))
-hpc <- hpc_data[1:150, c(2:5, 8)]
+hpc <- hpc_data[1:150, c(2:5, 8)] %>% as.data.frame()
 
 # ------------------------------------------------------------------------------
 
@@ -29,7 +29,7 @@ eval_descrs <- function(descrs, not = NULL) {
   lapply(descrs, do.call, list())
 }
 
-class_tab <- table(hpc_data$class, dnn = NULL)
+class_tab <- table(hpc$class, dnn = NULL)
 
 # ------------------------------------------------------------------------------
 
@@ -86,12 +86,12 @@ context("Testing formula -> xy conversion")
 
 test_that("numeric y and dummy vars", {
   expect_equal(
-    template(5, 4, 150, NA, 1, hpc, hpc[-2], hpc[,"compounds"]),
-    eval_descrs(get_descr_form(compounds ~ ., data = hpc))
+    template(6, 4, 150, NA, 1, hpc, hpc[-2], hpc[,"input_fields"]),
+    eval_descrs(get_descr_form(input_fields ~ ., data = hpc))
   )
   expect_equal(
-    template(2, 1, 150, NA, 1, hpc, hpc["class"], hpc[,"compounds"]),
-    eval_descrs(get_descr_form(compounds ~ class, data = hpc))
+    template(3, 1, 150, NA, 1, hpc, hpc["class"], hpc[,"input_fields"]),
+    eval_descrs(get_descr_form(input_fields ~ class, data = hpc))
   )
 })
 
@@ -117,8 +117,8 @@ test_that("factor y", {
     eval_descrs(get_descr_form(class ~ ., data = hpc))
   )
   expect_equal(
-    template(1, 1, 150, class_tab, 0, hpc, hpc["input_fields"], hpc[,"class"]),
-    eval_descrs(get_descr_form(class ~ input_fields, data = hpc))
+    template(1, 1, 150, class_tab, 0, hpc, hpc["compounds"], hpc[,"class"]),
+    eval_descrs(get_descr_form(class ~ compounds, data = hpc))
   )
 })
 
@@ -134,8 +134,8 @@ test_that("weird cases", {
   # So model.frame ignores - signs in a model formula so class is not removed
   # prior to model.matrix; otherwise this should have n_cols = 3
   expect_equal(
-    template(3, 4, 150, NA, 1, hpc, hpc[-2], hpc[,"compounds"]),
-    eval_descrs(get_descr_form(compounds ~ . - class, data = hpc))
+    template(3, 4, 150, NA, 1, hpc, hpc[-2], hpc[,"input_fields"]),
+    eval_descrs(get_descr_form(input_fields ~ . - class, data = hpc))
   )
 
   # Oy ve! Before going to model.matrix, model.frame produces a data frame
@@ -173,7 +173,7 @@ test_that("numeric y and dummy vars", {
     eval_descrs(get_descr_xy(x = hpc[, 4:5], y = hpc[, 1:2]))
   )
 
-  hpc3 <- hpc2[,c("iterations", "class", "input_fields")]
+  hpc3 <- hpc2[,c("num_pending", "class", "compounds")]
   expect_equal(
     template(2, 2, 150, NA, 1, hpc3, hpc[, 4:5], hpc[, 1, drop = FALSE]),
     eval_descrs(get_descr_xy(x = hpc[, 4:5], y = hpc[, 1, drop = FALSE]))

From 5ae33c5e649a8264dfd8c21295241732c86431e2 Mon Sep 17 00:00:00 2001
From: "Simon P. Couch" <simonpatrickcouch@gmail.com>
Date: Sun, 21 Jun 2020 21:01:09 -0700
Subject: [PATCH 3/7] fix linear_reg tests

Addresses some failures re: some subsetting by name and some by index
---
 tests/testthat/test_linear_reg.R | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/tests/testthat/test_linear_reg.R b/tests/testthat/test_linear_reg.R
index 05cee1854..d2574988d 100644
--- a/tests/testthat/test_linear_reg.R
+++ b/tests/testthat/test_linear_reg.R
@@ -209,7 +209,7 @@ test_that('bad input', {
 
 # ------------------------------------------------------------------------------
 
-num_pred <- names(hpc)[1:3]
+num_pred <- c("input_fields", "num_pending", "iterations")
 hpc_bad_form <- as.formula(class ~ term)
 hpc_basic <- linear_reg() %>% set_engine("lm")
 
@@ -279,19 +279,18 @@ test_that('lm execution', {
 })
 
 test_that('lm prediction', {
-  uni_lm <- lm(input_fields ~ compounds + iterations + num_pending, data = hpc)
+  uni_lm <- lm(compounds ~ input_fields + num_pending + iterations, data = hpc)
   uni_pred <- unname(predict(uni_lm, newdata = hpc[1:5, ]))
-  inl_lm <- lm(input_fields ~ log(compounds) + class, data = hpc)
+  inl_lm <- lm(compounds ~ log(input_fields) + class, data = hpc)
   inl_pred <- unname(predict(inl_lm, newdata = hpc[1:5, ]))
-  mv_lm <- lm(cbind(compounds, iterations) ~ ., data = hpc)
+  mv_lm <- lm(cbind(input_fields, num_pending) ~ ., data = hpc)
   mv_pred <- as_tibble(predict(mv_lm, newdata = hpc[1:5, ]))
-  names(mv_pred) <- c(".pred_compounds", ".pred_iterations")
-
+  names(mv_pred) <- c(".pred_input_fields", ".pred_num_pending")
 
   res_xy <- fit_xy(
     hpc_basic,
     x = hpc[, num_pred],
-    y = hpc$input_fields,
+    y = hpc$compounds,
     control = ctrl
   )
 
@@ -299,23 +298,25 @@ test_that('lm prediction', {
 
   res_form <- fit(
     hpc_basic,
-    input_fields ~ log(compounds) + class,
+    compounds ~ log(input_fields) + class,
     data = hpc,
     control = ctrl
   )
+
   expect_equal(inl_pred, predict(res_form, hpc[1:5, ])$.pred)
 
   res_mv <- fit(
     hpc_basic,
-    cbind(compounds, iterations) ~ .,
+    cbind(input_fields, num_pending) ~ .,
     data = hpc,
     control = ctrl
   )
+
   expect_equal(mv_pred, predict(res_mv, hpc[1:5,]))
 })
 
 test_that('lm intervals', {
-  stats_lm <- lm(input_fields ~ compounds + iterations + num_pending,
+  stats_lm <- lm(compounds ~ input_fields + iterations + num_pending,
                  data = hpc)
   confidence_lm <- predict(stats_lm, newdata = hpc[1:5, ],
                            level = 0.93, interval = "confidence")
@@ -325,7 +326,7 @@ test_that('lm intervals', {
   res_xy <- fit_xy(
     linear_reg()  %>% set_engine("lm"),
     x = hpc[, num_pred],
-    y = hpc$input_fields,
+    y = hpc$compounds,
     control = ctrl
   )
 

From a466fc4e909ef8bcdf0a53661a90dd39c7b3d356 Mon Sep 17 00:00:00 2001
From: "Simon P. Couch" <simonpatrickcouch@gmail.com>
Date: Mon, 22 Jun 2020 08:05:04 -0700
Subject: [PATCH 4/7] fix svm tests

The fourth factor level in hpc_data was preserved in some tests. Also, some manually entered predictions needed to be rewritten.
---
 tests/testthat/test_svm_poly.R | 50 +++++++++++------------
 tests/testthat/test_svm_rbf.R  | 72 +++++++++++++++++-----------------
 2 files changed, 60 insertions(+), 62 deletions(-)

diff --git a/tests/testthat/test_svm_poly.R b/tests/testthat/test_svm_poly.R
index 2e451b232..f22caac9d 100644
--- a/tests/testthat/test_svm_poly.R
+++ b/tests/testthat/test_svm_poly.R
@@ -6,7 +6,7 @@ library(tibble)
 # ------------------------------------------------------------------------------
 
 context("RBF SVM")
-source("helpers.R")
+source(test_path("helpers.R"))
 source(test_path("helper-objects.R"))
 hpc <- hpc_data[1:150, c(2:5, 8)]
 
@@ -175,13 +175,15 @@ test_that('svm poly regression prediction', {
   kern_pred <-
     structure(
       list(
-        .pred = c(5.02154233477783, 4.71496213707127, 4.78370369917621)),
+        .pred = c(164.4739, 139.8284, 133.8760)),
       row.names = c(NA,-3L),
       class = c("tbl_df", "tbl", "data.frame")
     )
 
   parsnip_pred <- predict(reg_form, hpc[1:3, -c(1, 5)])
-  expect_equal(as.data.frame(kern_pred), as.data.frame(parsnip_pred))
+  expect_equal(as.data.frame(kern_pred),
+               as.data.frame(parsnip_pred),
+               tolerance = .0001)
 
 
   reg_xy_form <-
@@ -194,7 +196,9 @@ test_that('svm poly regression prediction', {
   expect_equal(reg_form$fit@alphaindex, reg_xy_form$fit@alphaindex)
 
   parsnip_xy_pred <- predict(reg_xy_form, hpc[1:3, -c(1, 5)])
-  expect_equal(as.data.frame(kern_pred), as.data.frame(parsnip_xy_pred))
+  expect_equal(as.data.frame(kern_pred),
+               as.data.frame(parsnip_xy_pred),
+               tolerance = .0001)
 })
 
 # ------------------------------------------------------------------------------
@@ -230,58 +234,50 @@ test_that('svm poly classification probabilities', {
 
   skip_if_not_installed("kernlab")
 
-  ind <- c(1, 51, 101)
+  hpc_no_m <- hpc[-c(84, 85, 86, 87, 88, 109, 128),] %>%
+    droplevels()
+
+  ind <- c(1, 2, 143)
 
   set.seed(34562)
   cls_form <-
     fit(
       cls_mod,
       class ~ .,
-      data = hpc,
+      data = hpc_no_m,
       control = ctrl
     )
 
-  # kern_class <-
-  #   tibble(.pred_class = predict(cls_form$fit, hpc[ind, -5]))
+  .pred_factor <- factor(c("F", "VF", "L"), levels = c("VF", "F", "L"))
 
   kern_class <-
     structure(
       list(
-        .pred_class =
-          structure(1:3, .Label = c("setosa", "versicolor", "virginica"), class = "factor")),
+        .pred_class = .pred_factor),
       row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame"))
 
-  parsnip_class <- predict(cls_form, hpc[ind, -5])
+  parsnip_class <- predict(cls_form, hpc_no_m[ind, -5])
   expect_equal(kern_class, parsnip_class)
 
   set.seed(34562)
   cls_xy_form <-
     fit_xy(
       cls_mod,
-      x = hpc[, 1:4],
-      y = hpc$class,
+      x = hpc_no_m[, 1:4],
+      y = hpc_no_m$class,
       control = ctrl
     )
   expect_equal(cls_form$fit@alphaindex, cls_xy_form$fit@alphaindex)
 
   library(kernlab)
   kern_probs <-
-    kernlab::predict(cls_form$fit, hpc[ind, -5], type = "probabilities") %>%
+    kernlab::predict(cls_form$fit, hpc_no_m[ind, -5], type = "probabilities") %>%
     as_tibble() %>%
-    setNames(c('.pred_setosa', '.pred_versicolor', '.pred_virginica'))
-
-  # kern_probs <-
-  #   structure(
-  #     list(
-  #       .pred_setosa = c(0.982990083267231, 0.0167077303224448, 0.00930879923686657),
-  #       .pred_versicolor = c(0.00417116710624842, 0.946131931665357, 0.0015524073332013),
-  #       .pred_virginica = c(0.0128387496265202, 0.0371603380121978, 0.989138793429932)),
-  #     row.names = c(NA,-3L),
-  #     class = c("tbl_df", "tbl", "data.frame"))
-
-  parsnip_probs <- predict(cls_form, hpc[ind, -5], type = "prob")
+    setNames(c('.pred_VF', '.pred_F', '.pred_L'))
+
+  parsnip_probs <- predict(cls_form, hpc_no_m[ind, -5], type = "prob")
   expect_equal(as.data.frame(kern_probs), as.data.frame(parsnip_probs))
 
-  parsnip_xy_probs <- predict(cls_xy_form, hpc[ind, -5], type = "prob")
+  parsnip_xy_probs <- predict(cls_xy_form, hpc_no_m[ind, -5], type = "prob")
   expect_equal(as.data.frame(kern_probs), as.data.frame(parsnip_xy_probs))
 })
diff --git a/tests/testthat/test_svm_rbf.R b/tests/testthat/test_svm_rbf.R
index a14964191..663c5d0e2 100644
--- a/tests/testthat/test_svm_rbf.R
+++ b/tests/testthat/test_svm_rbf.R
@@ -138,6 +138,11 @@ test_that('svm rbf regression prediction', {
 
   skip_if_not_installed("kernlab")
 
+  hpc_no_m <- hpc[-c(84, 85, 86, 87, 88, 109, 128),] %>%
+    droplevels()
+
+  ind <- c(2, 1, 143)
+
   reg_form <-
     fit(
       reg_mod,
@@ -146,30 +151,30 @@ test_that('svm rbf regression prediction', {
       control = ctrl
     )
 
-  # kern_pred <-
-  #   predict(reg_form$fit, hpc[1:3, -c(1, 5)]) %>%
-  #   as_tibble() %>%
-  #   setNames(".pred")
   kern_pred <-
     structure(
-      list(.pred = c(5.02786147259765, 4.81715220026091, 4.86817852816449)),
+      list(.pred = c(131.7743, 372.0932, 902.0633)),
       row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame"))
 
-  parsnip_pred <- predict(reg_form, hpc[1:3, -c(1, 5)])
-  expect_equal(as.data.frame(kern_pred), as.data.frame(parsnip_pred))
+  parsnip_pred <- predict(reg_form, hpc[ind, -c(2, 5)])
+  expect_equal(as.data.frame(kern_pred),
+               as.data.frame(parsnip_pred),
+               tolerance = .0001)
 
 
   reg_xy_form <-
     fit_xy(
       reg_mod,
-      x = hpc[, 2:4],
+      x = hpc[, c(1, 3, 4)],
       y = hpc$input_fields,
       control = ctrl
     )
   expect_equal(reg_form$fit@alphaindex, reg_xy_form$fit@alphaindex)
 
-  parsnip_xy_pred <- predict(reg_xy_form, hpc[1:3, -c(1, 5)])
-  expect_equal(as.data.frame(kern_pred), as.data.frame(parsnip_xy_pred))
+  parsnip_xy_pred <- predict(reg_xy_form, hpc[ind, -c(2, 5)])
+  expect_equal(as.data.frame(kern_pred),
+               as.data.frame(parsnip_xy_pred),
+               tolerance = .0001)
 })
 
 # ------------------------------------------------------------------------------
@@ -178,12 +183,17 @@ test_that('svm rbf classification', {
 
   skip_if_not_installed("kernlab")
 
+  hpc_no_m <- hpc[-c(84, 85, 86, 87, 88, 109, 128),] %>%
+    droplevels()
+
+  ind <- c(2, 1, 143)
+
   expect_error(
     fit_xy(
       cls_mod,
       control = ctrl,
-      x = hpc[, -5],
-      y = hpc$class
+      x = hpc_no_m[, -5],
+      y = hpc_no_m$class
     ),
     regexp = NA
   )
@@ -192,7 +202,7 @@ test_that('svm rbf classification', {
     fit(
       cls_mod,
       class ~ .,
-      data = hpc,
+      data = hpc_no_m,
       control = ctrl
     ),
     regexp = NA
@@ -205,58 +215,50 @@ test_that('svm rbf classification probabilities', {
 
   skip_if_not_installed("kernlab")
 
-  ind <- c(1, 51, 101)
+  hpc_no_m <- hpc[-c(84, 85, 86, 87, 88, 109, 128),] %>%
+    droplevels()
+
+  ind <- c(4, 55, 143)
 
   set.seed(34562)
   cls_form <-
     fit(
       cls_mod,
       class ~ .,
-      data = hpc,
+      data = hpc_no_m,
       control = ctrl
     )
 
-  # kern_class <-
-  #   tibble(.pred_class = predict(cls_form$fit, hpc[ind, -5]))
-
   kern_class <-
     structure(list(
       .pred_class = structure(
-        c(1L, 3L, 3L),
-        .Label = c("setosa", "versicolor", "virginica"), class = "factor")),
+        c(1L, 1L, 3L),
+        .Label = c("VF", "F", "L"), class = "factor")),
       row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame"))
 
-  parsnip_class <- predict(cls_form, hpc[ind, -5])
+  parsnip_class <- predict(cls_form, hpc_no_m[ind, -5])
   expect_equal(kern_class, parsnip_class)
 
   set.seed(34562)
   cls_xy_form <-
     fit_xy(
       cls_mod,
-      x = hpc[, 1:4],
-      y = hpc$class,
+      x = hpc_no_m[, 1:4],
+      y = hpc_no_m$class,
       control = ctrl
     )
   expect_equal(cls_form$fit@alphaindex, cls_xy_form$fit@alphaindex)
 
   library(kernlab)
   kern_probs <-
-    kernlab::predict(cls_form$fit, hpc[ind, -5], type = "probabilities") %>%
+    kernlab::predict(cls_form$fit, hpc_no_m[ind, -5], type = "probabilities") %>%
     as_tibble() %>%
-    setNames(c('.pred_setosa', '.pred_versicolor', '.pred_virginica'))
-
-  # kern_probs <-
-  #   structure(
-  #     list(
-  #       .pred_setosa     = c(0.985403715135807, 0.0158818274678279, 0.00633995479908973),
-  #       .pred_versicolor = c(0.00818691538722139, 0.359005663318986, 0.0173471664171275),
-  #       .pred_virginica  = c(0.00640936947697121, 0.625112509213187, 0.976312878783783)),
-  #     row.names = c(NA,-3L), class = c("tbl_df", "tbl", "data.frame"))
+    setNames(c('.pred_VF', '.pred_F', '.pred_L'))
 
-  parsnip_probs <- predict(cls_form, hpc[ind, -5], type = "prob")
+  parsnip_probs <- predict(cls_form, hpc_no_m[ind, -5], type = "prob")
   expect_equal(as.data.frame(kern_probs), as.data.frame(parsnip_probs))
 
-  parsnip_xy_probs <- predict(cls_xy_form, hpc[ind, -5], type = "prob")
+  parsnip_xy_probs <- predict(cls_xy_form, hpc_no_m[ind, -5], type = "prob")
   expect_equal(as.data.frame(kern_probs), as.data.frame(parsnip_xy_probs))
 })
 

From 35ec46e8bd9a5e0580a9f75b1885ec6275fd53f2 Mon Sep 17 00:00:00 2001
From: "Simon P. Couch" <simonpatrickcouch@gmail.com>
Date: Mon, 22 Jun 2020 08:54:29 -0700
Subject: [PATCH 5/7] fix remaining test failures

Some more manual data entry and # classes in outcome variable issues.
---
 tests/testthat/test_boost_tree_xgboost.R |  4 ++--
 tests/testthat/test_mars.R               | 22 +++++++++++++---------
 tests/testthat/test_predict_formats.R    |  2 +-
 tests/testthat/test_rand_forest_ranger.R | 10 +++++-----
 4 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/tests/testthat/test_boost_tree_xgboost.R b/tests/testthat/test_boost_tree_xgboost.R
index d181843ed..d2a4f4919 100644
--- a/tests/testthat/test_boost_tree_xgboost.R
+++ b/tests/testthat/test_boost_tree_xgboost.R
@@ -65,7 +65,7 @@ test_that('xgboost classification prediction', {
   )
 
   xy_pred <- predict(xy_fit$fit, newdata = xgb.DMatrix(data = as.matrix(hpc[1:8, num_pred])), type = "class")
-  xy_pred <- matrix(xy_pred, ncol = 3, byrow = TRUE)
+  xy_pred <- matrix(xy_pred, ncol = 4, byrow = TRUE)
   xy_pred <- factor(levels(hpc$class)[apply(xy_pred, 1, which.max)], levels = levels(hpc$class))
   expect_equal(xy_pred, predict(xy_fit, new_data = hpc[1:8, num_pred], type = "class")$.pred_class)
 
@@ -77,7 +77,7 @@ test_that('xgboost classification prediction', {
   )
 
   form_pred <- predict(form_fit$fit, newdata = xgb.DMatrix(data = as.matrix(hpc[1:8, num_pred])), type = "class")
-  form_pred <- matrix(form_pred, ncol = 3, byrow = TRUE)
+  form_pred <- matrix(form_pred, ncol = 4, byrow = TRUE)
   form_pred <- factor(levels(hpc$class)[apply(form_pred, 1, which.max)], levels = levels(hpc$class))
   expect_equal(form_pred, predict(form_fit, new_data = hpc[1:8, num_pred], type = "class")$.pred_class)
 })
diff --git a/tests/testthat/test_mars.R b/tests/testthat/test_mars.R
index a6361bbaf..a95f95d64 100644
--- a/tests/testthat/test_mars.R
+++ b/tests/testthat/test_mars.R
@@ -188,17 +188,21 @@ test_that('mars execution', {
 test_that('mars prediction', {
   skip_if_not_installed("earth")
 
-  uni_pred <- c(5.02371514510488, 4.70502120747471, 4.78973285129011, 4.81152592623742,
-                5.08745393263092)
-  inl_pred <- c(5.07584328502019, 4.64927636051174, 4.82786784324037, 4.74001260567429,
-                5.15379794835255)
+  uni_pred <- c(30.1466666666667, 30.1466666666667, 30.1466666666667,
+                30.1466666666667, 30.1466666666667)
+  inl_pred <- c(538.268789262046, 141.024903718634, 141.024903718634,
+                141.024903718634, 141.024903718634)
   mv_pred <-
     structure(
-      list(Sepal.Width =
-             c(3.4874092243636, 3.34173526636919, 3.17647644756747, 3.14280919018489, 3.41457224536639),
-           Petal.Width =
-             c(0.237414046784062, 0.221455118452782, 0.18348960240454, 0.219523313672823, 0.229434582618422
-             )), class = "data.frame", row.names = c(NA, -5L))
+      list(compounds =
+             c(371.334864384913, 129.475162245595, 256.094366313268,
+               129.475162245595, 129.475162245595),
+           input_fields =
+             c(430.476046435458, 158.833790342308, 218.07635084308,
+               158.833790342308, 158.833790342308)
+           ),
+      class = "data.frame", row.names = c(NA, -5L)
+    )
 
   res_xy <- fit_xy(
     hpc_basic,
diff --git a/tests/testthat/test_predict_formats.R b/tests/testthat/test_predict_formats.R
index 9b9b5430a..6fb7a709b 100644
--- a/tests/testthat/test_predict_formats.R
+++ b/tests/testthat/test_predict_formats.R
@@ -13,7 +13,7 @@ context("check predict output structures")
 lm_fit <-
   linear_reg(mode = "regression") %>%
   set_engine("lm") %>%
-  fit(class ~ ., data = hpc)
+  fit(compounds ~ ., data = hpc)
 
 class_dat <- airquality[complete.cases(airquality),]
 class_dat$Ozone <- factor(ifelse(class_dat$Ozone >= 31, "high", "low"))
diff --git a/tests/testthat/test_rand_forest_ranger.R b/tests/testthat/test_rand_forest_ranger.R
index 6d96e2490..0762d56d5 100644
--- a/tests/testthat/test_rand_forest_ranger.R
+++ b/tests/testthat/test_rand_forest_ranger.R
@@ -310,11 +310,11 @@ test_that('additional descriptor tests', {
 
   ##
 
-  exp_wts <- quo(c(min(.lvls()), 20, 10))
+  exp_wts <- quo(c(min(.lvls()), 20, 10, 1))
 
   descr_other_xy <- fit_xy(
     rand_forest(mode = "classification", mtry = 2) %>%
-      set_engine("ranger", class.weights = c(min(.lvls()), 20, 10)),
+      set_engine("ranger", class.weights = c(min(.lvls()), 20, 10, 1)),
     x = hpc[, 1:4],
     y = hpc$class,
     control = ctrl
@@ -324,7 +324,7 @@ test_that('additional descriptor tests', {
 
   descr_other_f <- fit(
     rand_forest(mode = "classification", mtry = 2) %>%
-      set_engine("ranger", class.weights = c(min(.lvls()), 20, 10)),
+      set_engine("ranger", class.weights = c(min(.lvls()), 20, 10, 1)),
     class ~ ., data = hpc,
     control = ctrl
   )
@@ -333,7 +333,7 @@ test_that('additional descriptor tests', {
 
   descr_other_xy <- fit_xy(
     rand_forest(mode = "classification", mtry = 2) %>%
-      set_engine("ranger", class.weights = c(min(.lvls()), 20, 10)),
+      set_engine("ranger", class.weights = c(min(.lvls()), 20, 10, 1)),
     x = hpc[, 1:4],
     y = hpc$class,
     control = ctrl
@@ -343,7 +343,7 @@ test_that('additional descriptor tests', {
 
   descr_other_f <- fit(
     rand_forest(mode = "classification", mtry = 2) %>%
-      set_engine("ranger", class.weights = c(min(.lvls()), 20, 10)),
+      set_engine("ranger", class.weights = c(min(.lvls()), 20, 10, 1)),
     class ~ ., data = hpc,
     control = ctrl
   )

From 6dc72df25d6820a4922368b164523b39eb3ee866 Mon Sep 17 00:00:00 2001
From: "Simon P. Couch" <simonpatrickcouch@gmail.com>
Date: Mon, 22 Jun 2020 09:11:27 -0700
Subject: [PATCH 6/7] a couple more test fixes

---
 tests/testthat/test_convert_data.R |  2 +-
 tests/testthat/test_nullmodel.R    | 20 ++++++++++++--------
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/tests/testthat/test_convert_data.R b/tests/testthat/test_convert_data.R
index 48c0cf8fa..6a711d3b2 100644
--- a/tests/testthat/test_convert_data.R
+++ b/tests/testthat/test_convert_data.R
@@ -449,7 +449,7 @@ test_that("1 col matrix x, 1 col matrix y", {
 
 test_that("matrix x, factor y", {
   observed <- parsnip:::convert_xy_to_form_fit(as.matrix(hpc[, -5]), hpc$class)
-  expected <- hpc
+  expected <- as.data.frame(hpc)
   names(expected)[5] <- "..y"
   expect_equal(expected, observed$data)
   expect_equal(formula("..y ~ ."), observed$formula)
diff --git a/tests/testthat/test_nullmodel.R b/tests/testthat/test_nullmodel.R
index db499d54c..aaf77532a 100644
--- a/tests/testthat/test_nullmodel.R
+++ b/tests/testthat/test_nullmodel.R
@@ -4,9 +4,9 @@ library(rlang)
 library(tibble)
 
 context("test-nullmodel")
-source("helpers.R")
+source(test_path("helpers.R"))
 source(test_path("helper-objects.R"))
-hpc <- hpc_data[1:150, c(2:5, 8)]
+hpc <- hpc_data[1:150, c(2:5, 8)] %>% as.data.frame()
 
 test_that('primary arguments', {
   basic <- null_model(mode = "regression")
@@ -44,7 +44,7 @@ test_that('bad input', {
 
 # ------------------------------------------------------------------------------
 
-num_pred <- names(phc)[1:3]
+num_pred <- names(hpc)[1:3]
 hpc_bad_form <- as.formula(class ~ term)
 hpc_basic <- null_model(mode = "regression") %>% set_engine("parsnip")
 
@@ -92,8 +92,8 @@ test_that('nullmodel execution', {
 
 test_that('nullmodel prediction', {
 
-  uni_pred <- tibble(.pred = rep(3.758, 5))
-  inl_pred <- rep(3.758, 5)
+  uni_pred <- tibble(.pred = rep(30.1, 5))
+  inl_pred <- rep(30.1, 5)
   mw_pred <- tibble(gear = rep(3.6875, 5),
                     carb = rep(2.8125, 5))
 
@@ -103,14 +103,18 @@ test_that('nullmodel prediction', {
     y = hpc$num_pending
   )
 
-  expect_equal(uni_pred, predict(res_xy, new_data = hpc[1:5, num_pred]))
+  expect_equal(uni_pred,
+               predict(res_xy, new_data = hpc[1:5, num_pred]),
+               tolerance = .01)
 
   res_form <- fit(
     hpc_basic,
     num_pending ~ log(compounds) + class,
-    data = hpc_basic
+    data = hpc
   )
-  expect_equal(inl_pred, predict(res_form, hpc[1:5, ])$.pred)
+  expect_equal(inl_pred,
+               predict(res_form, hpc[1:5, ])$.pred,
+               tolerance = .01)
 
   # Multivariate y
   res <- fit(

From 1393f96124c374f2803b630cb1bb37e44c29a22a Mon Sep 17 00:00:00 2001
From: "Simon P. Couch" <simonpatrickcouch@gmail.com>
Date: Mon, 22 Jun 2020 09:12:04 -0700
Subject: [PATCH 7/7] improve descriptors examples

Initially switched from iris to mtcars, but the Orange data is a bit more interesting / illustrative.
---
 R/descriptors.R    | 30 +++++++++++++++---------------
 man/descriptors.Rd | 30 +++++++++++++++---------------
 2 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/R/descriptors.R b/R/descriptors.R
index dc64f0737..b6b0da844 100644
--- a/R/descriptors.R
+++ b/R/descriptors.R
@@ -26,28 +26,28 @@
 #'   column, `..y`.
 #'   }
 #'
-#' For example, if you use the model formula `mpg ~ .` with the `mtcars`
-#'  data, the values would be
+#' For example, if you use the model formula `circumference ~ .` with the
+#' built-in `Orange` data, the values would be
 #' \preformatted{
-#'  .preds() =   10          (the 10 columns in `mtcars`)
-#'  .cols()  =   10          (10 numeric columns + 0 from dummy variables)
-#'  .obs()   = 32
+#'  .preds() =   2          (the 2 remaining columns in `Orange`)
+#'  .cols()  =   5          (1 numeric column + 4 from Tree dummy variables)
+#'  .obs()   = 35
 #'  .lvls()  =  NA          (no factor outcome)
-#'  .facts() =   0          (no factor outcome)
-#'  .y()     = <vector>     (mpg as a vector)
-#'  .x()     = <data.frame> (The other 10 columns as a data frame)
+#'  .facts() =   1          (the Tree predictor)
+#'  .y()     = <vector>     (circumference as a vector)
+#'  .x()     = <data.frame> (The other 2 columns as a data frame)
 #'  .dat()   = <data.frame> (The full data set)
 #' }
 #'
-#' If the formula `as.character(cyl) ~ .` where used:
+#' If the formula `Tree ~ .` were used:
 #' \preformatted{
-#'  .preds() =   10          (the 10 numeric columns in `mtcars`)
-#'  .cols()  =   10          (same)
-#'  .obs()   = 32
-#'  .lvls()  =  c("4" = 11, "6" = 7, "8" = 14)
+#'  .preds() =   2          (the 2 numeric columns in `Orange`)
+#'  .cols()  =   2          (same)
+#'  .obs()   = 35
+#'  .lvls()  =  c("1" = 7, "2" = 7, "3" = 7, "4" = 7, "5" = 7)
 #'  .facts() =   0
-#'  .y()     = <vector>     (as.character(cyl) as a vector)
-#'  .x()     = <data.frame> (The other 10 columns as a data frame)
+#'  .y()     = <vector>     (Tree as a vector)
+#'  .x()     = <data.frame> (The other 2 columns as a data frame)
 #'  .dat()   = <data.frame> (The full data set)
 #' }
 #'
diff --git a/man/descriptors.Rd b/man/descriptors.Rd
index b53ec6459..4ad7ccf18 100644
--- a/man/descriptors.Rd
+++ b/man/descriptors.Rd
@@ -55,28 +55,28 @@ outcomes. If \code{fit_xy()} was used, the outcomes are attached as the
 column, \code{..y}.
 }
 
-For example, if you use the model formula \code{mpg ~ .} with the \code{mtcars}
-data, the values would be
+For example, if you use the model formula \code{circumference ~ .} with the
+built-in \code{Orange} data, the values would be
 \preformatted{
- .preds() =   10          (the 10 columns in `mtcars`)
- .cols()  =   10          (10 numeric columns + 0 from dummy variables)
- .obs()   = 32
+ .preds() =   2          (the 2 remaining columns in `Orange`)
+ .cols()  =   5          (1 numeric column + 4 from Tree dummy variables)
+ .obs()   = 35
  .lvls()  =  NA          (no factor outcome)
- .facts() =   0          (no factor outcome)
- .y()     = <vector>     (mpg as a vector)
- .x()     = <data.frame> (The other 10 columns as a data frame)
+ .facts() =   1          (the Tree predictor)
+ .y()     = <vector>     (circumference as a vector)
+ .x()     = <data.frame> (The other 2 columns as a data frame)
  .dat()   = <data.frame> (The full data set)
 }
 
-If the formula \code{as.character(cyl) ~ .} where used:
+If the formula \code{Tree ~ .} were used:
 \preformatted{
- .preds() =   10          (the 10 numeric columns in `mtcars`)
- .cols()  =   10          (same)
- .obs()   = 32
- .lvls()  =  c("4" = 11, "6" = 7, "8" = 14)
+ .preds() =   2          (the 2 numeric columns in `Orange`)
+ .cols()  =   2          (same)
+ .obs()   = 35
+ .lvls()  =  c("1" = 7, "2" = 7, "3" = 7, "4" = 7, "5" = 7)
  .facts() =   0
- .y()     = <vector>     (as.character(cyl) as a vector)
- .x()     = <data.frame> (The other 10 columns as a data frame)
+ .y()     = <vector>     (Tree as a vector)
+ .x()     = <data.frame> (The other 2 columns as a data frame)
  .dat()   = <data.frame> (The full data set)
 }