From bebc5d56328bdd78e469928fa6f493832e551949 Mon Sep 17 00:00:00 2001 From: "Simon P. Couch" Date: Fri, 19 Jun 2020 15:30:06 -0700 Subject: [PATCH 1/7] initial go at iris -> modeldata::hpc_data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switching out iris for modeldata::hpc_data—much of the switches so far were global find-and-replace, with some manual bug fixes. Still quite a few to go. --- R/descriptors.R | 28 ++--- R/model_object_docs.R | 20 ++-- R/nullmodel.R | 1 - man/descriptors.Rd | 28 ++--- man/model_spec.Rd | 20 ++-- man/tidy.nullmodel.Rd | 1 - tests/testthat/helper-objects.R | 1 + tests/testthat/test_adds.R | 10 +- tests/testthat/test_boost_tree.R | 6 +- tests/testthat/test_boost_tree_spark.R | 22 ++-- tests/testthat/test_boost_tree_xgboost.R | 48 ++++----- tests/testthat/test_convert_data.R | 23 +++-- tests/testthat/test_decision_tree.R | 6 +- tests/testthat/test_descriptors.R | 91 +++++++++-------- tests/testthat/test_failed_models.R | 15 +-- tests/testthat/test_fit_interfaces.R | 19 ++-- tests/testthat/test_linear_reg.R | 108 ++++++++++---------- tests/testthat/test_linear_reg_glmnet.R | 85 +++++++-------- tests/testthat/test_linear_reg_keras.R | 37 +++---- tests/testthat/test_linear_reg_spark.R | 16 +-- tests/testthat/test_linear_reg_stan.R | 49 ++++----- tests/testthat/test_logistic_reg.R | 3 +- tests/testthat/test_logistic_reg_glmnet.R | 1 + tests/testthat/test_logistic_reg_spark.R | 1 + tests/testthat/test_logistic_reg_stan.R | 1 + tests/testthat/test_mars.R | 61 +++++------ tests/testthat/test_misc.R | 5 +- tests/testthat/test_mlp_keras.R | 71 ++++++------- tests/testthat/test_mlp_nnet.R | 47 ++++----- tests/testthat/test_multinom_reg.R | 4 +- tests/testthat/test_multinom_reg_glmnet.R | 56 +++++----- tests/testthat/test_multinom_reg_keras.R | 22 ++-- tests/testthat/test_multinom_reg_nnet.R | 14 +-- tests/testthat/test_multinom_reg_spark.R | 20 ++-- tests/testthat/test_nearest_neighbor_kknn.R | 77 +++++++------- tests/testthat/test_nullmodel.R | 55 +++++----- tests/testthat/test_predict_formats.R | 17 +-- tests/testthat/test_rand_forest_ranger.R | 38 +++---- tests/testthat/test_rand_forest_spark.R | 18 ++-- tests/testthat/test_svm_liquidsvm.R | 35 ++++--- tests/testthat/test_svm_poly.R | 50 ++++----- tests/testthat/test_svm_rbf.R | 50 ++++----- 42 files changed, 665 insertions(+), 615 deletions(-) diff --git a/R/descriptors.R b/R/descriptors.R index 310409735..dc64f0737 100644 --- a/R/descriptors.R +++ b/R/descriptors.R @@ -26,28 +26,28 @@ #' column, `..y`. #' } #' -#' For example, if you use the model formula `Sepal.Width ~ .` with the `iris` +#' For example, if you use the model formula `mpg ~ .` with the `mtcars` #' data, the values would be #' \preformatted{ -#' .preds() = 4 (the 4 columns in `iris`) -#' .cols() = 5 (3 numeric columns + 2 from Species dummy variables) -#' .obs() = 150 +#' .preds() = 10 (the 10 columns in `mtcars`) +#' .cols() = 10 (10 numeric columns + 0 from dummy variables) +#' .obs() = 32 #' .lvls() = NA (no factor outcome) -#' .facts() = 1 (the Species predictor) -#' .y() = (Sepal.Width as a vector) -#' .x() = (The other 4 columns as a data frame) +#' .facts() = 0 (no factor outcome) +#' .y() = (mpg as a vector) +#' .x() = (The other 10 columns as a data frame) #' .dat() = (The full data set) #' } #' -#' If the formula `Species ~ .` where used: +#' If the formula `as.character(cyl) ~ .` where used: #' \preformatted{ -#' .preds() = 4 (the 4 numeric columns in `iris`) -#' .cols() = 4 (same) -#' .obs() = 150 -#' .lvls() = c(setosa = 50, versicolor = 50, virginica = 50) +#' .preds() = 10 (the 10 numeric columns in `mtcars`) +#' .cols() = 10 (same) +#' .obs() = 32 +#' .lvls() = c("4" = 11, "6" = 7, "8" = 14) #' .facts() = 0 -#' .y() = (Species as a vector) -#' .x() = (The other 4 columns as a data frame) +#' .y() = (as.character(cyl) as a vector) +#' .x() = (The other 10 columns as a data frame) #' .dat() = (The full data set) #' } #' diff --git a/R/model_object_docs.R b/R/model_object_docs.R index af46bc0e8..f7006a922 100644 --- a/R/model_object_docs.R +++ b/R/model_object_docs.R @@ -53,18 +53,18 @@ #' `parsnip` model functions do not do this. For example, using #' #'\preformatted{ -#' rand_forest(mtry = ncol(iris) - 1) +#' rand_forest(mtry = ncol(mtcars) - 1) #' } #' -#' **does not** execute `ncol(iris) - 1` when creating the specification. +#' **does not** execute `ncol(mtcars) - 1` when creating the specification. #' This can be seen in the output: #' #'\preformatted{ -#' > rand_forest(mtry = ncol(iris) - 1) +#' > rand_forest(mtry = ncol(mtcars) - 1) #' Random Forest Model Specification (unknown) #' #' Main Arguments: -#' mtry = ncol(iris) - 1 +#' mtry = ncol(mtcars) - 1 #'} #' #' The model functions save the argument _expressions_ and their @@ -102,14 +102,14 @@ #' object is small. For example, using #' #'\preformatted{ -#' rand_forest(mtry = ncol(!!iris) - 1) +#' rand_forest(mtry = ncol(!!mtcars) - 1) #' } #' #' would work (and be reproducible between sessions) but embeds -#' the entire iris data set into the `mtry` expression: +#' the entire mtcars data set into the `mtry` expression: #' #'\preformatted{ -#' > rand_forest(mtry = ncol(!!iris) - 1) +#' > rand_forest(mtry = ncol(!!mtcars) - 1) #' Random Forest Model Specification (unknown) #' #' Main Arguments: @@ -120,14 +120,14 @@ #' it, this wouldn't be too bad: #' #'\preformatted{ -#' > mtry_val <- ncol(iris) - 1 +#' > mtry_val <- ncol(mtcars) - 1 #' > mtry_val -#' [1] 4 +#' [1] 10 #' > rand_forest(mtry = !!mtry_val) #' Random Forest Model Specification (unknown) #' #' Main Arguments: -#' mtry = 4 +#' mtry = 10 #'} #' #' More information on quosures and quasiquotation can be found at diff --git a/R/nullmodel.R b/R/nullmodel.R index 336e2356b..fbb557cd8 100644 --- a/R/nullmodel.R +++ b/R/nullmodel.R @@ -182,7 +182,6 @@ null_model <- #' @return A tibble with column `value`. #' @export #' @examples -#' nullmodel(iris[,-5], iris$Species) %>% tidy() #' #' nullmodel(mtcars[,-1], mtcars$mpg) %>% tidy() diff --git a/man/descriptors.Rd b/man/descriptors.Rd index 22f44e5c2..b53ec6459 100644 --- a/man/descriptors.Rd +++ b/man/descriptors.Rd @@ -55,28 +55,28 @@ outcomes. If \code{fit_xy()} was used, the outcomes are attached as the column, \code{..y}. } -For example, if you use the model formula \code{Sepal.Width ~ .} with the \code{iris} +For example, if you use the model formula \code{mpg ~ .} with the \code{mtcars} data, the values would be \preformatted{ - .preds() = 4 (the 4 columns in `iris`) - .cols() = 5 (3 numeric columns + 2 from Species dummy variables) - .obs() = 150 + .preds() = 10 (the 10 columns in `mtcars`) + .cols() = 10 (10 numeric columns + 0 from dummy variables) + .obs() = 32 .lvls() = NA (no factor outcome) - .facts() = 1 (the Species predictor) - .y() = (Sepal.Width as a vector) - .x() = (The other 4 columns as a data frame) + .facts() = 0 (no factor outcome) + .y() = (mpg as a vector) + .x() = (The other 10 columns as a data frame) .dat() = (The full data set) } -If the formula \code{Species ~ .} where used: +If the formula \code{as.character(cyl) ~ .} where used: \preformatted{ - .preds() = 4 (the 4 numeric columns in `iris`) - .cols() = 4 (same) - .obs() = 150 - .lvls() = c(setosa = 50, versicolor = 50, virginica = 50) + .preds() = 10 (the 10 numeric columns in `mtcars`) + .cols() = 10 (same) + .obs() = 32 + .lvls() = c("4" = 11, "6" = 7, "8" = 14) .facts() = 0 - .y() = (Species as a vector) - .x() = (The other 4 columns as a data frame) + .y() = (as.character(cyl) as a vector) + .x() = (The other 10 columns as a data frame) .dat() = (The full data set) } diff --git a/man/model_spec.Rd b/man/model_spec.Rd index 54e19ac94..f61c9aba0 100644 --- a/man/model_spec.Rd +++ b/man/model_spec.Rd @@ -57,18 +57,18 @@ arguments. For example, when calling \code{mean(dat_vec)}, the object \code{parsnip} model functions do not do this. For example, using \preformatted{ - rand_forest(mtry = ncol(iris) - 1) + rand_forest(mtry = ncol(mtcars) - 1) } -\strong{does not} execute \code{ncol(iris) - 1} when creating the specification. +\strong{does not} execute \code{ncol(mtcars) - 1} when creating the specification. This can be seen in the output: \preformatted{ - > rand_forest(mtry = ncol(iris) - 1) + > rand_forest(mtry = ncol(mtcars) - 1) Random Forest Model Specification (unknown) Main Arguments: - mtry = ncol(iris) - 1 + mtry = ncol(mtcars) - 1 } The model functions save the argument \emph{expressions} and their @@ -106,14 +106,14 @@ model specification and might be the best idea when the data object is small. For example, using \preformatted{ - rand_forest(mtry = ncol(!!iris) - 1) + rand_forest(mtry = ncol(!!mtcars) - 1) } would work (and be reproducible between sessions) but embeds -the entire iris data set into the \code{mtry} expression: +the entire mtcars data set into the \code{mtry} expression: \preformatted{ - > rand_forest(mtry = ncol(!!iris) - 1) + > rand_forest(mtry = ncol(!!mtcars) - 1) Random Forest Model Specification (unknown) Main Arguments: @@ -124,14 +124,14 @@ However, if there were an object with the number of columns in it, this wouldn't be too bad: \preformatted{ - > mtry_val <- ncol(iris) - 1 + > mtry_val <- ncol(mtcars) - 1 > mtry_val - [1] 4 + [1] 10 > rand_forest(mtry = !!mtry_val) Random Forest Model Specification (unknown) Main Arguments: - mtry = 4 + mtry = 10 } More information on quosures and quasiquotation can be found at diff --git a/man/tidy.nullmodel.Rd b/man/tidy.nullmodel.Rd index 6ff19e9da..c48a073e9 100644 --- a/man/tidy.nullmodel.Rd +++ b/man/tidy.nullmodel.Rd @@ -18,7 +18,6 @@ A tibble with column \code{value}. Return the results of \code{nullmodel} as a tibble } \examples{ -nullmodel(iris[,-5], iris$Species) \%>\% tidy() nullmodel(mtcars[,-1], mtcars$mpg) \%>\% tidy() } diff --git a/tests/testthat/helper-objects.R b/tests/testthat/helper-objects.R index 5c9a41c07..70eb0fb2e 100644 --- a/tests/testthat/helper-objects.R +++ b/tests/testthat/helper-objects.R @@ -2,6 +2,7 @@ library(modeldata) data("wa_churn") data("lending_club") +data("hpc_data") # ------------------------------------------------------------------------------ diff --git a/tests/testthat/test_adds.R b/tests/testthat/test_adds.R index e693cc973..086defaba 100644 --- a/tests/testthat/test_adds.R +++ b/tests/testthat/test_adds.R @@ -4,14 +4,16 @@ library(dplyr) context("adding functions") source("helpers.R") +source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] # ------------------------------------------------------------------------------ test_that('adding row indicies', { - iris_2 <- iris %>% add_rowindex() - expect_true(nrow(iris_2) == 150) - expect_true(sum(names(iris_2) == ".row") == 1) - expect_true(is.integer(iris_2$.row)) + hpc_2 <- hpc %>% add_rowindex() + expect_true(nrow(hpc_2) == 150) + expect_true(sum(names(hpc_2) == ".row") == 1) + expect_true(is.integer(hpc_2$.row)) mtcar_2 <- dplyr::as_tibble(mtcars) %>% dplyr::slice(0) %>% add_rowindex() expect_true(nrow(mtcar_2) == 0) diff --git a/tests/testthat/test_boost_tree.R b/tests/testthat/test_boost_tree.R index f31adca8c..724de2831 100644 --- a/tests/testthat/test_boost_tree.R +++ b/tests/testthat/test_boost_tree.R @@ -6,6 +6,8 @@ library(rlang) context("boosted trees") source("helpers.R") +source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] # ------------------------------------------------------------------------------ @@ -134,11 +136,11 @@ test_that('bad input', { expect_error(boost_tree(mode = "bogus")) expect_error({ bt <- boost_tree(trees = -1) %>% set_engine("xgboost") - fit(bt, Species ~ ., iris) + fit(bt, class ~ ., hpc) }) expect_error({ bt <- boost_tree(min_n = -10) %>% set_engine("xgboost") - fit(bt, Species ~ ., iris) + fit(bt, class ~ ., hpc) }) expect_message(translate(boost_tree(mode = "classification"), engine = NULL)) expect_error(translate(boost_tree(formula = y ~ x))) diff --git a/tests/testthat/test_boost_tree_spark.R b/tests/testthat/test_boost_tree_spark.R index 45b050031..647cfca98 100644 --- a/tests/testthat/test_boost_tree_spark.R +++ b/tests/testthat/test_boost_tree_spark.R @@ -6,21 +6,21 @@ library(dplyr) context("boosted tree execution with spark") source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] # ------------------------------------------------------------------------------ test_that('spark execution', { skip_if_not_installed("sparklyr") - library(sparklyr) sc <- try(spark_connect(master = "local"), silent = TRUE) skip_if(inherits(sc, "try-error")) - iris_bt_tr <- copy_to(sc, iris[-(1:4), ], "iris_bt_tr", overwrite = TRUE) - iris_bt_te <- copy_to(sc, iris[ 1:4 , -1], "iris_bt_te", overwrite = TRUE) + hpc_bt_tr <- copy_to(sc, hpc[-(1:4), ], "hpc_bt_tr", overwrite = TRUE) + hpc_bt_te <- copy_to(sc, hpc[ 1:4 , -1], "hpc_bt_te", overwrite = TRUE) # ---------------------------------------------------------------------------- @@ -30,8 +30,8 @@ test_that('spark execution', { boost_tree(trees = 5, mode = "regression") %>% set_engine("spark", seed = 12), control = ctrl, - Sepal_Length ~ ., - data = iris_bt_tr + class ~ ., + data = hpc_bt_tr ), regexp = NA ) @@ -43,29 +43,29 @@ test_that('spark execution', { boost_tree(trees = 5, mode = "regression") %>% set_engine("spark", seed = 12), control = ctrl, - Sepal_Length ~ ., - data = iris_bt_tr + compounds ~ ., + data = hpc_bt_tr ), regexp = NA ) expect_error( - spark_reg_pred <- predict(spark_reg_fit, iris_bt_te), + spark_reg_pred <- predict(spark_reg_fit, hpc_bt_te), regexp = NA ) expect_error( - spark_reg_pred_num <- parsnip:::predict_numeric.model_fit(spark_reg_fit, iris_bt_te), + spark_reg_pred_num <- parsnip:::predict_numeric.model_fit(spark_reg_fit, hpc_bt_te), regexp = NA ) expect_error( - spark_reg_dup <- predict(spark_reg_fit_dup, iris_bt_te), + spark_reg_dup <- predict(spark_reg_fit_dup, hpc_bt_te), regexp = NA ) expect_error( - spark_reg_num_dup <- parsnip:::predict_numeric.model_fit(spark_reg_fit_dup, iris_bt_te), + spark_reg_num_dup <- parsnip:::predict_numeric.model_fit(spark_reg_fit_dup, hpc_bt_te), regexp = NA ) diff --git a/tests/testthat/test_boost_tree_xgboost.R b/tests/testthat/test_boost_tree_xgboost.R index 8c70532b5..d181843ed 100644 --- a/tests/testthat/test_boost_tree_xgboost.R +++ b/tests/testthat/test_boost_tree_xgboost.R @@ -5,11 +5,11 @@ library(parsnip) context("boosted tree execution with xgboost") source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] +num_pred <- names(hpc)[1:4] -num_pred <- names(iris)[1:4] - -iris_xgboost <- +hpc_xgboost <- boost_tree(trees = 2, mode = "classification") %>% set_engine("xgboost") @@ -21,18 +21,18 @@ test_that('xgboost execution, classification', { expect_error( res <- parsnip::fit( - iris_xgboost, - Species ~ Sepal.Width + Sepal.Length, - data = iris, + hpc_xgboost, + class ~ compounds + input_fields, + data = hpc, control = ctrl ), regexp = NA ) expect_error( res <- parsnip::fit_xy( - iris_xgboost, - x = iris[, num_pred], - y = iris$Species, + hpc_xgboost, + x = hpc[, num_pred], + y = hpc$class, control = ctrl ), regexp = NA @@ -43,9 +43,9 @@ test_that('xgboost execution, classification', { expect_error( res <- parsnip::fit( - iris_xgboost, - Species ~ novar, - data = iris, + hpc_xgboost, + class ~ novar, + data = hpc, control = ctrl ) ) @@ -58,28 +58,28 @@ test_that('xgboost classification prediction', { library(xgboost) xy_fit <- fit_xy( - iris_xgboost, - x = iris[, num_pred], - y = iris$Species, + hpc_xgboost, + x = hpc[, num_pred], + y = hpc$class, control = ctrl ) - xy_pred <- predict(xy_fit$fit, newdata = xgb.DMatrix(data = as.matrix(iris[1:8, num_pred])), type = "class") + xy_pred <- predict(xy_fit$fit, newdata = xgb.DMatrix(data = as.matrix(hpc[1:8, num_pred])), type = "class") xy_pred <- matrix(xy_pred, ncol = 3, byrow = TRUE) - xy_pred <- factor(levels(iris$Species)[apply(xy_pred, 1, which.max)], levels = levels(iris$Species)) - expect_equal(xy_pred, predict(xy_fit, new_data = iris[1:8, num_pred], type = "class")$.pred_class) + xy_pred <- factor(levels(hpc$class)[apply(xy_pred, 1, which.max)], levels = levels(hpc$class)) + expect_equal(xy_pred, predict(xy_fit, new_data = hpc[1:8, num_pred], type = "class")$.pred_class) form_fit <- fit( - iris_xgboost, - Species ~ ., - data = iris, + hpc_xgboost, + class ~ ., + data = hpc, control = ctrl ) - form_pred <- predict(form_fit$fit, newdata = xgb.DMatrix(data = as.matrix(iris[1:8, num_pred])), type = "class") + form_pred <- predict(form_fit$fit, newdata = xgb.DMatrix(data = as.matrix(hpc[1:8, num_pred])), type = "class") form_pred <- matrix(form_pred, ncol = 3, byrow = TRUE) - form_pred <- factor(levels(iris$Species)[apply(form_pred, 1, which.max)], levels = levels(iris$Species)) - expect_equal(form_pred, predict(form_fit, new_data = iris[1:8, num_pred], type = "class")$.pred_class) + form_pred <- factor(levels(hpc$class)[apply(form_pred, 1, which.max)], levels = levels(hpc$class)) + expect_equal(form_pred, predict(form_fit, new_data = hpc[1:8, num_pred], type = "class")$.pred_class) }) diff --git a/tests/testthat/test_convert_data.R b/tests/testthat/test_convert_data.R index c58364be2..48c0cf8fa 100644 --- a/tests/testthat/test_convert_data.R +++ b/tests/testthat/test_convert_data.R @@ -2,6 +2,9 @@ library(testthat) context("data conversion") library(parsnip) +source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] + # to go from lm_object$x results to our format format_x_for_test <- function(x, df = TRUE) { @@ -260,12 +263,12 @@ test_that("numeric x and numeric multivariate y", { test_that("numeric x and factor y", { expected <- expect_warning( - glm(Species ~ ., data = iris, x = TRUE, y = TRUE, family = binomial() + glm(class ~ ., data = hpc, x = TRUE, y = TRUE, family = binomial() ) ) - observed <- parsnip:::convert_form_to_xy_fit(Species ~ ., data = iris) + observed <- parsnip:::convert_form_to_xy_fit(class ~ ., data = hpc) expect_equal(format_x_for_test(expected$x), observed$x) - expect_equivalent(iris$Species, observed$y) + expect_equivalent(hpc$class, observed$y) expect_equal(expected$terms, observed$terms) expect_equal(expected$xlevels, observed$xlevels) expect_null(observed$weights) @@ -273,7 +276,7 @@ test_that("numeric x and factor y", { expect_equal( head(format_x_for_test(expected$x)), - parsnip:::convert_form_to_xy_new(observed, new_data = head(iris))$x + parsnip:::convert_form_to_xy_new(observed, new_data = head(hpc))$x ) }) @@ -445,22 +448,22 @@ test_that("1 col matrix x, 1 col matrix y", { test_that("matrix x, factor y", { - observed <- parsnip:::convert_xy_to_form_fit(as.matrix(iris[, -5]), iris$Species) - expected <- iris + observed <- parsnip:::convert_xy_to_form_fit(as.matrix(hpc[, -5]), hpc$class) + expected <- hpc names(expected)[5] <- "..y" expect_equal(expected, observed$data) expect_equal(formula("..y ~ ."), observed$formula) - expect_equal(names(iris)[-5], observed$x_var) + expect_equal(names(hpc)[-5], observed$x_var) expect_null(observed$weights) }) test_that("data frame x, factor y", { - observed <- parsnip:::convert_xy_to_form_fit(iris[, -5], iris$Species) - expected <- iris + observed <- parsnip:::convert_xy_to_form_fit(hpc[, -5], hpc$class) + expected <- hpc names(expected)[5] <- "..y" expect_equivalent(expected, observed$data) expect_equal(formula("..y ~ ."), observed$formula) - expect_equal(names(iris)[-5], observed$x_var) + expect_equal(names(hpc)[-5], observed$x_var) expect_null(observed$weights) }) diff --git a/tests/testthat/test_decision_tree.R b/tests/testthat/test_decision_tree.R index 87cbe1864..18720e7af 100644 --- a/tests/testthat/test_decision_tree.R +++ b/tests/testthat/test_decision_tree.R @@ -6,6 +6,8 @@ library(rlang) context("decision trees") source("helpers.R") +source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] # ------------------------------------------------------------------------------ @@ -124,11 +126,11 @@ test_that('bad input', { expect_error(decision_tree(mode = "bogus")) expect_error({ bt <- decision_tree(cost_complexity = -1) %>% set_engine("rpart") - fit(bt, Species ~ ., iris) + fit(bt, class ~ ., hpc) }) expect_error({ bt <- decision_tree(min_n = 0) %>% set_engine("rpart") - fit(bt, Species ~ ., iris) + fit(bt, class ~ ., hpc) }) expect_error(translate(decision_tree(), engine = NULL)) expect_error(translate(decision_tree(formula = y ~ x))) diff --git a/tests/testthat/test_descriptors.R b/tests/testthat/test_descriptors.R index fc74fb606..34390a989 100644 --- a/tests/testthat/test_descriptors.R +++ b/tests/testthat/test_descriptors.R @@ -1,6 +1,9 @@ library(testthat) library(parsnip) +source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] + # ------------------------------------------------------------------------------ context("descriptor variables") @@ -26,7 +29,7 @@ eval_descrs <- function(descrs, not = NULL) { lapply(descrs, do.call, list()) } -species_tab <- table(iris$Species, dnn = NULL) +class_tab <- table(hpc_data$class, dnn = NULL) # ------------------------------------------------------------------------------ @@ -83,39 +86,39 @@ context("Testing formula -> xy conversion") test_that("numeric y and dummy vars", { expect_equal( - template(5, 4, 150, NA, 1, iris, iris[-2], iris[,"Sepal.Width"]), - eval_descrs(get_descr_form(Sepal.Width ~ ., data = iris)) + template(5, 4, 150, NA, 1, hpc, hpc[-2], hpc[,"compounds"]), + eval_descrs(get_descr_form(compounds ~ ., data = hpc)) ) expect_equal( - template(2, 1, 150, NA, 1, iris, iris["Species"], iris[,"Sepal.Width"]), - eval_descrs(get_descr_form(Sepal.Width ~ Species, data = iris)) + template(2, 1, 150, NA, 1, hpc, hpc["class"], hpc[,"compounds"]), + eval_descrs(get_descr_form(compounds ~ class, data = hpc)) ) }) test_that("numeric y and x", { expect_equal( - template(1, 1, 150, NA, 0, iris, iris["Sepal.Length"], iris[,"Sepal.Width"]), - eval_descrs(get_descr_form(Sepal.Width ~ Sepal.Length, data = iris)) + template(1, 1, 150, NA, 0, hpc, hpc["input_fields"], hpc[,"compounds"]), + eval_descrs(get_descr_form(compounds ~ input_fields, data = hpc)) ) expect_equal( { - log_sep <- iris["Sepal.Length"] - log_sep[["Sepal.Length"]] <- log(log_sep[["Sepal.Length"]]) - names(log_sep) <- "log(Sepal.Length)" - template(1, 1, 150, NA, 0, iris, log_sep, iris[,"Sepal.Width"]) + log_sep <- hpc["input_fields"] + log_sep[["input_fields"]] <- log(log_sep[["input_fields"]]) + names(log_sep) <- "log(input_fields)" + template(1, 1, 150, NA, 0, hpc, log_sep, hpc[,"compounds"]) }, - eval_descrs(get_descr_form(Sepal.Width ~ log(Sepal.Length), data = iris)) + eval_descrs(get_descr_form(compounds ~ log(input_fields), data = hpc)) ) }) test_that("factor y", { expect_equal( - template(4, 4, 150, species_tab, 0, iris, iris[-5], iris[,"Species"]), - eval_descrs(get_descr_form(Species ~ ., data = iris)) + template(4, 4, 150, class_tab, 0, hpc, hpc[-5], hpc[,"class"]), + eval_descrs(get_descr_form(class ~ ., data = hpc)) ) expect_equal( - template(1, 1, 150, species_tab, 0, iris, iris["Sepal.Length"], iris[,"Species"]), - eval_descrs(get_descr_form(Species ~ Sepal.Length, data = iris)) + template(1, 1, 150, class_tab, 0, hpc, hpc["input_fields"], hpc[,"class"]), + eval_descrs(get_descr_form(class ~ input_fields, data = hpc)) ) }) @@ -128,26 +131,26 @@ test_that("factors all the way down", { }) test_that("weird cases", { - # So model.frame ignores - signs in a model formula so Species is not removed + # So model.frame ignores - signs in a model formula so class is not removed # prior to model.matrix; otherwise this should have n_cols = 3 expect_equal( - template(3, 4, 150, NA, 1, iris, iris[-2], iris[,"Sepal.Width"]), - eval_descrs(get_descr_form(Sepal.Width ~ . - Species, data = iris)) + template(3, 4, 150, NA, 1, hpc, hpc[-2], hpc[,"compounds"]), + eval_descrs(get_descr_form(compounds ~ . - class, data = hpc)) ) # Oy ve! Before going to model.matrix, model.frame produces a data frame # with one column and that column is a matrix (with the results from - # `poly(Sepal.Length, 3)` - x <- model.frame(~poly(Sepal.Length, 3), iris) + # `poly(input_fields, 3)` + x <- model.frame(~poly(input_fields, 3), hpc) attributes(x) <- attributes(as.data.frame(x))[c("names", "class", "row.names")] expect_equal( - template(3, 1, 150, NA, 0, iris, x, iris[,"Sepal.Width"]), - eval_descrs(get_descr_form(Sepal.Width ~ poly(Sepal.Length, 3), data = iris)) + template(3, 1, 150, NA, 0, hpc, x, hpc[,"compounds"]), + eval_descrs(get_descr_form(compounds ~ poly(input_fields, 3), data = hpc)) ) expect_equal( - template(0, 0, 150, NA, 0, iris, iris[,numeric()], iris[,"Sepal.Width"]), - eval_descrs(get_descr_form(Sepal.Width ~ 1, data = iris)) + template(0, 0, 150, NA, 0, hpc, hpc[,numeric()], hpc[,"compounds"]), + eval_descrs(get_descr_form(compounds ~ 1, data = hpc)) ) }) @@ -156,24 +159,24 @@ test_that("weird cases", { context("Testing xy -> formula conversion") test_that("numeric y and dummy vars", { - iris2 <- dplyr::rename(iris, ..y = Species) - rownames(iris2) <- rownames(iris2) # convert to char + hpc2 <- dplyr::rename(hpc, ..y = class) + rownames(hpc2) <- rownames(hpc2) # convert to char expect_equal( - template(4, 4, 150, species_tab, 0, iris2, iris[, 1:4], iris$Species), - eval_descrs(get_descr_xy(x = iris[, 1:4], y = iris$Species)) + template(4, 4, 150, class_tab, 0, hpc2, hpc[, 1:4], hpc$class), + eval_descrs(get_descr_xy(x = hpc[, 1:4], y = hpc$class)) ) - iris2 <- iris[,c(4,5,1,2)] - rownames(iris2) <- rownames(iris2) + hpc2 <- hpc[,c(4,5,1,2)] + rownames(hpc2) <- rownames(hpc2) expect_equal( - template(2, 2, 150, NA, 1, iris2, iris[,4:5], iris[,1:2]), - eval_descrs(get_descr_xy(x = iris[, 4:5], y = iris[, 1:2])) + template(2, 2, 150, NA, 1, hpc2, hpc[,4:5], hpc[,1:2]), + eval_descrs(get_descr_xy(x = hpc[, 4:5], y = hpc[, 1:2])) ) - iris3 <- iris2[,c("Petal.Width", "Species", "Sepal.Length")] + hpc3 <- hpc2[,c("iterations", "class", "input_fields")] expect_equal( - template(2, 2, 150, NA, 1, iris3, iris[, 4:5], iris[, 1, drop = FALSE]), - eval_descrs(get_descr_xy(x = iris[, 4:5], y = iris[, 1, drop = FALSE])) + template(2, 2, 150, NA, 1, hpc3, hpc[, 4:5], hpc[, 1, drop = FALSE]), + eval_descrs(get_descr_xy(x = hpc[, 4:5], y = hpc[, 1, drop = FALSE])) ) }) @@ -193,7 +196,7 @@ test_that("spark descriptor", { skip_if(inherits(sc, "try-error")) npk_descr <- copy_to(sc, npk[, 1:4], "npk_descr", overwrite = TRUE) - iris_descr <- copy_to(sc, iris, "iris_descr", overwrite = TRUE) + hpc_descr <- copy_to(sc, hpc, "hpc_descr", overwrite = TRUE) # spark does not allow .x, .y, .dat template2 <- purrr::partial(template, x = NULL, y = NULL, dat = NULL) @@ -201,23 +204,23 @@ test_that("spark descriptor", { expect_equal( template2(5, 4, 150, NA, 1), - eval_descrs2(get_descr_form(Sepal_Width ~ ., data = iris_descr)) + eval_descrs2(get_descr_form(Sepal_Width ~ ., data = hpc_descr)) ) expect_equal( template2(2, 1, 150, NA, 1), - eval_descrs2(get_descr_form(Sepal_Width ~ Species, data = iris_descr)) + eval_descrs2(get_descr_form(Sepal_Width ~ class, data = hpc_descr)) ) expect_equal( template2(1, 1, 150, NA, 0), - eval_descrs2(get_descr_form(Sepal_Width ~ Sepal_Length, data = iris_descr)) + eval_descrs2(get_descr_form(Sepal_Width ~ Sepal_Length, data = hpc_descr)) ) expect_equivalent( - template2(4, 4, 150, species_tab, 0), - eval_descrs2(get_descr_form(Species ~ ., data = iris_descr)) + template2(4, 4, 150, class_tab, 0), + eval_descrs2(get_descr_form(class ~ ., data = hpc_descr)) ) expect_equal( - template2(1, 1, 150, species_tab, 0), - eval_descrs2(get_descr_form(Species ~ Sepal_Length, data = iris_descr)) + template2(1, 1, 150, class_tab, 0), + eval_descrs2(get_descr_form(class ~ Sepal_Length, data = hpc_descr)) ) expect_equivalent( template2(7, 3, 24, rev(table(npk$K, dnn = NULL)), 3), diff --git a/tests/testthat/test_failed_models.R b/tests/testthat/test_failed_models.R index dae099ff2..4ab1510c0 100644 --- a/tests/testthat/test_failed_models.R +++ b/tests/testthat/test_failed_models.R @@ -3,14 +3,17 @@ library(parsnip) library(dplyr) library(rlang) +source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] + # ------------------------------------------------------------------------------ context("prediciton with failed models") # ------------------------------------------------------------------------------ -iris_bad <- - iris %>% +hpc_bad <- + hpc %>% mutate(big_num = Inf) lending_club <- @@ -30,15 +33,15 @@ test_that('numeric model', { lm_mod <- linear_reg() %>% set_engine("lm") %>% - fit(Sepal.Length ~ ., data = iris_bad, control = ctrl) + fit(compounds ~ ., data = hpc_bad, control = ctrl) - expect_warning(num_res <- predict(lm_mod, iris_bad[1:11, -1])) + expect_warning(num_res <- predict(lm_mod, hpc_bad[1:11, -1])) expect_equal(num_res, NULL) - expect_warning(ci_res <- predict(lm_mod, iris_bad[1:11, -1], type = "conf_int")) + expect_warning(ci_res <- predict(lm_mod, hpc_bad[1:11, -1], type = "conf_int")) expect_equal(ci_res, NULL) - expect_warning(pi_res <- predict(lm_mod, iris_bad[1:11, -1], type = "pred_int")) + expect_warning(pi_res <- predict(lm_mod, hpc_bad[1:11, -1], type = "pred_int")) expect_equal(pi_res, NULL) }) diff --git a/tests/testthat/test_fit_interfaces.R b/tests/testthat/test_fit_interfaces.R index e4fe0f495..c3b59954d 100644 --- a/tests/testthat/test_fit_interfaces.R +++ b/tests/testthat/test_fit_interfaces.R @@ -3,6 +3,9 @@ context("fit interfaces") library(parsnip) library(rlang) +source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] + f <- y ~ x smod <- surv_reg() @@ -20,21 +23,21 @@ tester_xy <- test_that('good args', { - expect_equal( tester(NULL, formula = f, data = iris, model = rmod), "formula") - expect_equal(tester_xy(NULL, x = iris, y = iris, model = rmod), "data.frame") - expect_equal( tester(NULL, f, data = iris, model = rmod), "formula") + expect_equal( tester(NULL, formula = f, data = hpc, model = rmod), "formula") + expect_equal(tester_xy(NULL, x = hpc, y = hpc, model = rmod), "data.frame") + expect_equal( tester(NULL, f, data = hpc, model = rmod), "formula") expect_equal( tester(NULL, f, data = sprk, model = rmod), "formula") }) #test_that('unnamed args', { -# expect_error(tester(NULL, iris, y = iris, model = rmod)) -# expect_error(tester(NULL, data = iris, model = rmod)) +# expect_error(tester(NULL, hpc, y = hpc, model = rmod)) +# expect_error(tester(NULL, data = hpc, model = rmod)) #}) # test_that('wrong args', { - expect_error(tester_xy(NULL, x = sprk, y = iris, model = rmod)) - expect_error(tester_xy(NULL, x = iris, y = iris$Sepal.Length, model = smod)) - expect_error(tester(NULL, f, data = as.matrix(iris[, 1:4]))) + expect_error(tester_xy(NULL, x = sprk, y = hpc, model = rmod)) + expect_error(tester_xy(NULL, x = hpc, y = hpc$compounds, model = smod)) + expect_error(tester(NULL, f, data = as.matrix(hpc[, 1:4]))) }) test_that('single column df for issue #129', { diff --git a/tests/testthat/test_linear_reg.R b/tests/testthat/test_linear_reg.R index 470266a4b..05cee1854 100644 --- a/tests/testthat/test_linear_reg.R +++ b/tests/testthat/test_linear_reg.R @@ -8,7 +8,7 @@ library(tibble) context("linear regression") source(test_path("helpers.R")) source(test_path("helper-objects.R")) - +hpc <- hpc_data[1:150, c(2:5, 8)] # ------------------------------------------------------------------------------ @@ -203,15 +203,15 @@ test_that('bad input', { expect_error(translate(linear_reg(), engine = "wat?")) expect_error(translate(linear_reg(), engine = NULL)) expect_error(translate(linear_reg(formula = y ~ x))) - expect_error(translate(linear_reg(x = iris[,1:3], y = iris$Species) %>% set_engine("glmnet"))) + expect_error(translate(linear_reg(x = hpc[,1:3], y = hpc$class) %>% set_engine("glmnet"))) expect_error(translate(linear_reg(formula = y ~ x) %>% set_engine("lm"))) }) # ------------------------------------------------------------------------------ -num_pred <- c("Sepal.Width", "Petal.Width", "Petal.Length") -iris_bad_form <- as.formula(Species ~ term) -iris_basic <- linear_reg() %>% set_engine("lm") +num_pred <- names(hpc)[1:3] +hpc_bad_form <- as.formula(class ~ term) +hpc_basic <- linear_reg() %>% set_engine("lm") # ------------------------------------------------------------------------------ @@ -219,9 +219,9 @@ test_that('lm execution', { expect_error( res <- fit( - iris_basic, - Sepal.Length ~ log(Sepal.Width) + Species, - data = iris, + hpc_basic, + input_fields ~ log(compounds) + class, + data = hpc, control = ctrl ), regexp = NA @@ -230,9 +230,9 @@ test_that('lm execution', { expect_error( res <- fit_xy( - iris_basic, - x = iris[, num_pred], - y = iris$Sepal.Length, + hpc_basic, + x = hpc[, num_pred], + y = hpc$input_fields, control = ctrl ), regexp = NA @@ -240,17 +240,17 @@ test_that('lm execution', { expect_error( res <- fit( - iris_basic, - iris_bad_form, - data = iris, + hpc_basic, + hpc_bad_form, + data = hpc, control = ctrl ) ) lm_form_catch <- fit( - iris_basic, - iris_bad_form, - data = iris, + hpc_basic, + hpc_bad_form, + data = hpc, control = caught_ctrl ) expect_true(inherits(lm_form_catch$fit, "try-error")) @@ -259,9 +259,9 @@ test_that('lm execution', { expect_error( res <- fit( - iris_basic, - cbind(Sepal.Width, Petal.Width) ~ ., - data = iris, + hpc_basic, + cbind(compounds, iterations) ~ ., + data = hpc, control = ctrl ), regexp = NA @@ -269,9 +269,9 @@ test_that('lm execution', { expect_error( res <- fit_xy( - iris_basic, - x = iris[, 1:2], - y = iris[3:4], + hpc_basic, + x = hpc[, 1:2], + y = hpc[3:4], control = ctrl ), regexp = NA @@ -279,59 +279,59 @@ test_that('lm execution', { }) test_that('lm prediction', { - uni_lm <- lm(Sepal.Length ~ Sepal.Width + Petal.Width + Petal.Length, data = iris) - uni_pred <- unname(predict(uni_lm, newdata = iris[1:5, ])) - inl_lm <- lm(Sepal.Length ~ log(Sepal.Width) + Species, data = iris) - inl_pred <- unname(predict(inl_lm, newdata = iris[1:5, ])) - mv_lm <- lm(cbind(Sepal.Width, Petal.Width) ~ ., data = iris) - mv_pred <- as_tibble(predict(mv_lm, newdata = iris[1:5, ])) - names(mv_pred) <- c(".pred_Sepal.Width", ".pred_Petal.Width") + uni_lm <- lm(input_fields ~ compounds + iterations + num_pending, data = hpc) + uni_pred <- unname(predict(uni_lm, newdata = hpc[1:5, ])) + inl_lm <- lm(input_fields ~ log(compounds) + class, data = hpc) + inl_pred <- unname(predict(inl_lm, newdata = hpc[1:5, ])) + mv_lm <- lm(cbind(compounds, iterations) ~ ., data = hpc) + mv_pred <- as_tibble(predict(mv_lm, newdata = hpc[1:5, ])) + names(mv_pred) <- c(".pred_compounds", ".pred_iterations") res_xy <- fit_xy( - iris_basic, - x = iris[, num_pred], - y = iris$Sepal.Length, + hpc_basic, + x = hpc[, num_pred], + y = hpc$input_fields, control = ctrl ) - expect_equal(uni_pred, predict(res_xy, iris[1:5, num_pred])$.pred) + expect_equal(uni_pred, predict(res_xy, hpc[1:5, num_pred])$.pred) res_form <- fit( - iris_basic, - Sepal.Length ~ log(Sepal.Width) + Species, - data = iris, + hpc_basic, + input_fields ~ log(compounds) + class, + data = hpc, control = ctrl ) - expect_equal(inl_pred, predict(res_form, iris[1:5, ])$.pred) + expect_equal(inl_pred, predict(res_form, hpc[1:5, ])$.pred) res_mv <- fit( - iris_basic, - cbind(Sepal.Width, Petal.Width) ~ ., - data = iris, + hpc_basic, + cbind(compounds, iterations) ~ ., + data = hpc, control = ctrl ) - expect_equal(mv_pred, predict(res_mv, iris[1:5,])) + expect_equal(mv_pred, predict(res_mv, hpc[1:5,])) }) test_that('lm intervals', { - stats_lm <- lm(Sepal.Length ~ Sepal.Width + Petal.Width + Petal.Length, - data = iris) - confidence_lm <- predict(stats_lm, newdata = iris[1:5, ], + stats_lm <- lm(input_fields ~ compounds + iterations + num_pending, + data = hpc) + confidence_lm <- predict(stats_lm, newdata = hpc[1:5, ], level = 0.93, interval = "confidence") - prediction_lm <- predict(stats_lm, newdata = iris[1:5, ], + prediction_lm <- predict(stats_lm, newdata = hpc[1:5, ], level = 0.93, interval = "prediction") res_xy <- fit_xy( linear_reg() %>% set_engine("lm"), - x = iris[, num_pred], - y = iris$Sepal.Length, + x = hpc[, num_pred], + y = hpc$input_fields, control = ctrl ) confidence_parsnip <- predict(res_xy, - new_data = iris[1:5,], + new_data = hpc[1:5,], type = "conf_int", level = 0.93) @@ -340,7 +340,7 @@ test_that('lm intervals', { prediction_parsnip <- predict(res_xy, - new_data = iris[1:5,], + new_data = hpc[1:5,], type = "pred_int", level = 0.93) @@ -351,12 +351,12 @@ test_that('lm intervals', { test_that('newdata error trapping', { res_xy <- fit_xy( - iris_basic, - x = iris[, num_pred], - y = iris$Sepal.Length, + hpc_basic, + x = hpc[, num_pred], + y = hpc$input_fields, control = ctrl ) - expect_error(predict(res_xy, newdata = iris[1:3, num_pred]), "Did you mean") + expect_error(predict(res_xy, newdata = hpc[1:3, num_pred]), "Did you mean") }) test_that('default engine', { diff --git a/tests/testthat/test_linear_reg_glmnet.R b/tests/testthat/test_linear_reg_glmnet.R index b4a4f7789..4b64b7530 100644 --- a/tests/testthat/test_linear_reg_glmnet.R +++ b/tests/testthat/test_linear_reg_glmnet.R @@ -7,11 +7,12 @@ library(tidyr) context("linear regression execution with glmnet") source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] -num_pred <- c("Sepal.Width", "Petal.Width", "Petal.Length") -iris_bad_form <- as.formula(Species ~ term) -iris_basic <- linear_reg(penalty = .1, mixture = .3) %>% +num_pred <- c("compounds", "iterations", "num_pending") +hpc_bad_form <- as.formula(class ~ term) +hpc_basic <- linear_reg(penalty = .1, mixture = .3) %>% set_engine("glmnet", nlambda = 15) no_lambda <- linear_reg(mixture = .3) %>% set_engine("glmnet") @@ -25,10 +26,10 @@ test_that('glmnet execution', { expect_error( res <- fit_xy( - iris_basic, + hpc_basic, control = ctrl, - x = iris[, num_pred], - y = iris$Sepal.Length + x = hpc[, num_pred], + y = hpc$input_fields ), regexp = NA ) @@ -38,17 +39,17 @@ test_that('glmnet execution', { expect_error( fit( - iris_basic, - iris_bad_form, - data = iris, + hpc_basic, + hpc_bad_form, + data = hpc, control = ctrl ) ) glmnet_xy_catch <- fit_xy( - iris_basic, - x = iris[, num_pred], - y = factor(iris$Sepal.Length), + hpc_basic, + x = hpc[, num_pred], + y = factor(hpc$input_fields), control = caught_ctrl ) expect_true(inherits(glmnet_xy_catch$fit, "try-error")) @@ -61,28 +62,28 @@ test_that('glmnet prediction, single lambda', { skip_if(run_glmnet) res_xy <- fit_xy( - iris_basic, + hpc_basic, control = ctrl, - x = iris[, num_pred], - y = iris$Sepal.Length + x = hpc[, num_pred], + y = hpc$input_fields ) uni_pred <- c(5.05125589060219, 4.86977761622526, 4.90912345599309, 4.93931874108359, 5.08755154547758) - expect_equal(uni_pred, predict(res_xy, iris[1:5, num_pred])$.pred, tolerance = 0.0001) + expect_equal(uni_pred, predict(res_xy, hpc[1:5, num_pred])$.pred, tolerance = 0.0001) res_form <- fit( - iris_basic, - Sepal.Length ~ log(Sepal.Width) + Species, - data = iris, + hpc_basic, + input_fields ~ log(compounds) + class, + data = hpc, control = ctrl ) form_pred <- c(5.23960117346944, 5.08769210344022, 5.15129212608077, 5.12000510716518, 5.26736239856889) - expect_equal(form_pred, predict(res_form, iris[1:5,])$.pred, tolerance = 0.0001) + expect_equal(form_pred, predict(res_form, hpc[1:5,])$.pred, tolerance = 0.0001) }) @@ -93,19 +94,19 @@ test_that('glmnet prediction, multiple lambda', { lams <- c(.01, 0.1) - iris_mult <- linear_reg(penalty = lams, mixture = .3) %>% + hpc_mult <- linear_reg(penalty = lams, mixture = .3) %>% set_engine("glmnet") res_xy <- fit_xy( - iris_mult, + hpc_mult, control = ctrl, - x = iris[, num_pred], - y = iris$Sepal.Length + x = hpc[, num_pred], + y = hpc$input_fields ) # mult_pred <- # predict(res_xy$fit, - # newx = as.matrix(iris[1:5, num_pred]), + # newx = as.matrix(hpc[1:5, num_pred]), # s = lams) # mult_pred <- stack(as.data.frame(mult_pred)) # mult_pred$penalty <- rep(lams, each = 5) @@ -131,20 +132,20 @@ test_that('glmnet prediction, multiple lambda', { expect_equal( as.data.frame(mult_pred), - multi_predict(res_xy, new_data = iris[1:5, num_pred], lambda = lams) %>% + multi_predict(res_xy, new_data = hpc[1:5, num_pred], lambda = lams) %>% unnest(cols = c(.pred)) %>% as.data.frame(), tolerance = 0.0001 ) res_form <- fit( - iris_mult, - Sepal.Length ~ log(Sepal.Width) + Species, - data = iris, + hpc_mult, + input_fields ~ log(compounds) + class, + data = hpc, control = ctrl ) - # form_mat <- model.matrix(Sepal.Length ~ log(Sepal.Width) + Species, data = iris) + # form_mat <- model.matrix(input_fields ~ log(compounds) + class, data = hpc) # form_mat <- form_mat[1:5, -1] # # form_pred <- @@ -176,7 +177,7 @@ test_that('glmnet prediction, multiple lambda', { expect_equal( as.data.frame(form_pred), - multi_predict(res_form, new_data = iris[1:5, ], lambda = lams) %>% + multi_predict(res_form, new_data = hpc[1:5, ], lambda = lams) %>% unnest(cols = c(.pred)) %>% as.data.frame(), tolerance = 0.0001 @@ -188,17 +189,17 @@ test_that('glmnet prediction, all lambda', { skip_if_not_installed("glmnet") skip_if(run_glmnet) - iris_all <- linear_reg(mixture = .3) %>% + hpc_all <- linear_reg(mixture = .3) %>% set_engine("glmnet") res_xy <- fit_xy( - iris_all, + hpc_all, control = ctrl, - x = iris[, num_pred], - y = iris$Sepal.Length + x = hpc[, num_pred], + y = hpc$input_fields ) - all_pred <- predict(res_xy$fit, newx = as.matrix(iris[1:5, num_pred])) + all_pred <- predict(res_xy$fit, newx = as.matrix(hpc[1:5, num_pred])) all_pred <- stack(as.data.frame(all_pred)) all_pred$penalty <- rep(res_xy$fit$lambda, each = 5) all_pred$rows <- rep(1:5, 2) @@ -207,16 +208,16 @@ test_that('glmnet prediction, all lambda', { names(all_pred) <- c("penalty", ".pred") all_pred <- tibble::as_tibble(all_pred) - expect_equal(all_pred, multi_predict(res_xy, new_data = iris[1:5,num_pred ]) %>% unnest(cols = c(.pred))) + expect_equal(all_pred, multi_predict(res_xy, new_data = hpc[1:5,num_pred ]) %>% unnest(cols = c(.pred))) res_form <- fit( - iris_all, - Sepal.Length ~ log(Sepal.Width) + Species, - data = iris, + hpc_all, + input_fields ~ log(compounds) + class, + data = hpc, control = ctrl ) - form_mat <- model.matrix(Sepal.Length ~ log(Sepal.Width) + Species, data = iris) + form_mat <- model.matrix(input_fields ~ log(compounds) + class, data = hpc) form_mat <- form_mat[1:5, -1] form_pred <- predict(res_form$fit, newx = form_mat) @@ -228,7 +229,7 @@ test_that('glmnet prediction, all lambda', { names(form_pred) <- c("penalty", ".pred") form_pred <- tibble::as_tibble(form_pred) - expect_equal(form_pred, multi_predict(res_form, iris[1:5, c("Sepal.Width", "Species")]) %>% unnest(cols = c(.pred))) + expect_equal(form_pred, multi_predict(res_form, hpc[1:5, c("compounds", "class")]) %>% unnest(cols = c(.pred))) }) diff --git a/tests/testthat/test_linear_reg_keras.R b/tests/testthat/test_linear_reg_keras.R index 0d16bd0cb..0e8bc20c9 100644 --- a/tests/testthat/test_linear_reg_keras.R +++ b/tests/testthat/test_linear_reg_keras.R @@ -8,6 +8,7 @@ library(tibble) context("keras linear regression") source(test_path("helpers.R")) source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] # ------------------------------------------------------------------------------ @@ -33,8 +34,8 @@ test_that('model fitting', { fit_xy( basic_mod, control = ctrl, - x = iris[,2:4], - y = iris$Sepal.Length + x = hpc[,2:4], + y = hpc$compounds ), regexp = NA ) @@ -45,8 +46,8 @@ test_that('model fitting', { fit_xy( basic_mod, control = ctrl, - x = iris[,2:4], - y = iris$Sepal.Length + x = hpc[,2:4], + y = hpc$compounds ), regexp = NA ) @@ -56,8 +57,8 @@ test_that('model fitting', { expect_error( fit( basic_mod, - Sepal.Length ~ ., - data = iris[, -5], + compounds ~ ., + data = hpc[, -5], control = ctrl ), regexp = NA @@ -68,8 +69,8 @@ test_that('model fitting', { fit_xy( ridge_mod, control = ctrl, - x = iris[,2:4], - y = iris$Sepal.Length + x = hpc[,2:4], + y = hpc$compounds ), regexp = NA ) @@ -77,8 +78,8 @@ test_that('model fitting', { expect_error( fit( ridge_mod, - Sepal.Length ~ ., - data = iris[, -5], + compounds ~ ., + data = hpc[, -5], control = ctrl ), regexp = NA @@ -98,18 +99,18 @@ test_that('regression prediction', { fit_xy( basic_mod, control = ctrl, - x = iris[,2:4], - y = iris$Sepal.Length + x = hpc[,2:4], + y = hpc$compounds ) keras_pred <- - predict(lm_fit$fit, as.matrix(iris[1:3,2:4])) + predict(lm_fit$fit, as.matrix(hpc[1:3,2:4])) colnames(keras_pred) <- ".pred" keras_pred <- keras_pred %>% as_tibble() - parsnip_pred <- predict(lm_fit, iris[1:3,2:4]) + parsnip_pred <- predict(lm_fit, hpc[1:3,2:4]) expect_equal(as.data.frame(keras_pred), as.data.frame(parsnip_pred)) set.seed(257) @@ -117,15 +118,15 @@ test_that('regression prediction', { fit_xy( ridge_mod, control = ctrl, - x = iris[,2:4], - y = iris$Sepal.Length + x = hpc[,2:4], + y = hpc$compounds ) - keras_pred <- predict(rr_fit$fit, as.matrix(iris[1:3,2:4])) + keras_pred <- predict(rr_fit$fit, as.matrix(hpc[1:3,2:4])) colnames(keras_pred) <- ".pred" keras_pred <- as_tibble(keras_pred) - parsnip_pred <- predict(rr_fit, iris[1:3,2:4]) + parsnip_pred <- predict(rr_fit, hpc[1:3,2:4]) expect_equal(as.data.frame(keras_pred), as.data.frame(parsnip_pred)) }) diff --git a/tests/testthat/test_linear_reg_spark.R b/tests/testthat/test_linear_reg_spark.R index fc1c02b71..d85e46f1f 100644 --- a/tests/testthat/test_linear_reg_spark.R +++ b/tests/testthat/test_linear_reg_spark.R @@ -6,7 +6,7 @@ library(dplyr) context("linear regression execution with spark") source(test_path("helper-objects.R")) - +hpc <- hpc_data[1:150, c(2:5, 8)] # ------------------------------------------------------------------------------ @@ -20,8 +20,8 @@ test_that('spark execution', { skip_if(inherits(sc, "try-error")) - iris_linreg_tr <- copy_to(sc, iris[-(1:4), ], "iris_linreg_tr", overwrite = TRUE) - iris_linreg_te <- copy_to(sc, iris[ 1:4 , -1], "iris_linreg_te", overwrite = TRUE) + hpc_linreg_tr <- copy_to(sc, hpc[-(1:4), ], "hpc_linreg_tr", overwrite = TRUE) + hpc_linreg_te <- copy_to(sc, hpc[ 1:4 , -1], "hpc_linreg_te", overwrite = TRUE) expect_error( spark_fit <- @@ -29,7 +29,7 @@ test_that('spark execution', { linear_reg() %>% set_engine("spark"), control = ctrl, Sepal_Length ~ ., - data = iris_linreg_tr + data = hpc_linreg_tr ), regexp = NA ) @@ -38,17 +38,17 @@ test_that('spark execution', { expect_equal(multi_predict_args(spark_fit), NA_character_) expect_error( - spark_pred <- predict(spark_fit, iris_linreg_te), + spark_pred <- predict(spark_fit, hpc_linreg_te), regexp = NA ) expect_error( - spark_pred_num <- predict(spark_fit, iris_linreg_te), + spark_pred_num <- predict(spark_fit, hpc_linreg_te), regexp = NA ) - lm_fit <- lm(Sepal.Length ~ ., data = iris[-(1:4), ]) - lm_pred <- unname(predict(lm_fit, iris[ 1:4 , -1])) + lm_fit <- lm(compounds ~ ., data = hpc[-(1:4), ]) + lm_pred <- unname(predict(lm_fit, hpc[ 1:4 , -1])) expect_equal(as.data.frame(spark_pred)$pred, lm_pred) expect_equal(as.data.frame(spark_pred_num)$pred, lm_pred) diff --git a/tests/testthat/test_linear_reg_stan.R b/tests/testthat/test_linear_reg_stan.R index 33a88ded8..b77a5241c 100644 --- a/tests/testthat/test_linear_reg_stan.R +++ b/tests/testthat/test_linear_reg_stan.R @@ -2,13 +2,16 @@ library(testthat) library(parsnip) library(rlang) +source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] + # ------------------------------------------------------------------------------ context("linear regression execution with stan") -num_pred <- c("Sepal.Width", "Petal.Width", "Petal.Length") -iris_bad_form <- as.formula(Species ~ term) -iris_basic <- linear_reg() %>% +num_pred <- c("compounds", "iterations", "num_pending") +hpc_bad_form <- as.formula(class ~ term) +hpc_basic <- linear_reg() %>% set_engine("stan", seed = 10, chains = 1) ctrl <- control_parsnip(verbosity = 0L, catch = FALSE) @@ -23,18 +26,18 @@ test_that('stan_glm execution', { expect_error( res <- fit( - iris_basic, - Sepal.Width ~ log(Sepal.Length) + Species, - data = iris, + hpc_basic, + compounds ~ log(input_fields) + class, + data = hpc, control = ctrl ), regexp = NA ) expect_error( res <- fit_xy( - iris_basic, - x = iris[, num_pred], - y = iris$Sepal.Length, + hpc_basic, + x = hpc[, num_pred], + y = hpc$input_fields, control = ctrl ), regexp = NA @@ -45,9 +48,9 @@ test_that('stan_glm execution', { expect_error( res <- fit( - iris_basic, - Species ~ term, - data = iris, + hpc_basic, + class ~ term, + data = hpc, control = ctrl ) ) @@ -67,20 +70,20 @@ test_that('stan prediction', { res_xy <- fit_xy( linear_reg() %>% set_engine("stan", seed = 10, chains = 1), - x = iris[, num_pred], - y = iris$Sepal.Length, + x = hpc[, num_pred], + y = hpc$input_fields, control = quiet_ctrl ) - expect_equal(uni_pred, predict(res_xy, iris[1:5, num_pred])$.pred, tolerance = 0.001) + expect_equal(uni_pred, predict(res_xy, hpc[1:5, num_pred])$.pred, tolerance = 0.001) res_form <- fit( - iris_basic, - Sepal.Width ~ log(Sepal.Length) + Species, - data = iris, + hpc_basic, + compounds ~ log(input_fields) + class, + data = hpc, control = quiet_ctrl ) - expect_equal(inl_pred, predict(res_form, iris[1:5, ])$.pred, tolerance = 0.001) + expect_equal(inl_pred, predict(res_form, hpc[1:5, ])$.pred, tolerance = 0.001) }) @@ -91,20 +94,20 @@ test_that('stan intervals', { res_xy <- fit_xy( linear_reg() %>% set_engine("stan", seed = 1333, chains = 10, iter = 1000), - x = iris[, num_pred], - y = iris$Sepal.Length, + x = hpc[, num_pred], + y = hpc$input_fields, control = quiet_ctrl ) confidence_parsnip <- predict(res_xy, - new_data = iris[1:5,], + new_data = hpc[1:5,], type = "conf_int", level = 0.93) prediction_parsnip <- predict(res_xy, - new_data = iris[1:5,], + new_data = hpc[1:5,], type = "pred_int", level = 0.93) diff --git a/tests/testthat/test_logistic_reg.R b/tests/testthat/test_logistic_reg.R index 610b7cbd1..c23ca110e 100644 --- a/tests/testthat/test_logistic_reg.R +++ b/tests/testthat/test_logistic_reg.R @@ -8,6 +8,7 @@ library(tibble) context("logistic regression") source(test_path("helpers.R")) source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] # ------------------------------------------------------------------------------ @@ -212,7 +213,7 @@ test_that('updating', { test_that('bad input', { expect_error(logistic_reg(mode = "regression")) expect_error(translate(logistic_reg(formula = y ~ x))) - expect_error(translate(logistic_reg(x = iris[,1:3], y = iris$Species) %>% set_engine(engine = "glmnet"))) + expect_error(translate(logistic_reg(x = hpc[,1:3], y = hpc$class) %>% set_engine(engine = "glmnet"))) expect_error(translate(logistic_reg(formula = y ~ x) %>% set_engine(engine = "glm"))) }) diff --git a/tests/testthat/test_logistic_reg_glmnet.R b/tests/testthat/test_logistic_reg_glmnet.R index 0797e65c6..cd5d0b358 100644 --- a/tests/testthat/test_logistic_reg_glmnet.R +++ b/tests/testthat/test_logistic_reg_glmnet.R @@ -8,6 +8,7 @@ library(tidyr) context("logistic regression execution with glmnet") source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] lending_club <- head(lending_club, 200) lc_form <- as.formula(Class ~ log(funded_amnt) + int_rate) diff --git a/tests/testthat/test_logistic_reg_spark.R b/tests/testthat/test_logistic_reg_spark.R index 429fa6822..d7cba9482 100644 --- a/tests/testthat/test_logistic_reg_spark.R +++ b/tests/testthat/test_logistic_reg_spark.R @@ -6,6 +6,7 @@ library(dplyr) context("logistic regression execution with spark") source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] # ------------------------------------------------------------------------------ diff --git a/tests/testthat/test_logistic_reg_stan.R b/tests/testthat/test_logistic_reg_stan.R index 7b803853d..5f599300c 100644 --- a/tests/testthat/test_logistic_reg_stan.R +++ b/tests/testthat/test_logistic_reg_stan.R @@ -7,6 +7,7 @@ library(tibble) context("execution tests for stan logistic regression") source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] lending_club <- head(lending_club, 200) diff --git a/tests/testthat/test_mars.R b/tests/testthat/test_mars.R index 2d0d31ab9..a6361bbaf 100644 --- a/tests/testthat/test_mars.R +++ b/tests/testthat/test_mars.R @@ -7,6 +7,7 @@ library(rlang) context("mars tests") source(test_path("helpers.R")) source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] # ------------------------------------------------------------------------------ @@ -114,9 +115,9 @@ test_that('bad input', { # ------------------------------------------------------------------------------ -num_pred <- c("Sepal.Width", "Petal.Width", "Petal.Length") -iris_bad_form <- as.formula(Species ~ term) -iris_basic <- mars(mode = "regression") %>% set_engine("earth") +num_pred <- colnames(hpc)[1:3] +hpc_bad_form <- as.formula(class ~ term) +hpc_basic <- mars(mode = "regression") %>% set_engine("earth") # ------------------------------------------------------------------------------ @@ -125,9 +126,9 @@ test_that('mars execution', { expect_error( res <- fit( - iris_basic, - Sepal.Length ~ log(Sepal.Width) + Species, - data = iris, + hpc_basic, + compounds ~ log(input_fields) + class, + data = hpc, control = ctrl ), regexp = NA @@ -136,9 +137,9 @@ test_that('mars execution', { expect_error( res <- fit_xy( - iris_basic, - x = iris[, num_pred], - y = iris$Sepal.Length, + hpc_basic, + x = hpc[, num_pred], + y = hpc$num_pending, control = ctrl ), regexp = NA @@ -150,9 +151,9 @@ test_that('mars execution', { expect_message( expect_error( res <- fit( - iris_basic, - iris_bad_form, - data = iris, + hpc_basic, + hpc_bad_form, + data = hpc, control = ctrl ) ), @@ -163,9 +164,9 @@ test_that('mars execution', { expect_error( res <- fit( - iris_basic, - cbind(Sepal.Width, Petal.Width) ~ ., - data = iris, + hpc_basic, + cbind(compounds, input_fields) ~ ., + data = hpc, control = ctrl ), regexp = NA @@ -173,9 +174,9 @@ test_that('mars execution', { expect_error( res <- fit_xy( - iris_basic, - x = iris[, 1:2], - y = iris[3:4], + hpc_basic, + x = hpc[, 1:2], + y = hpc[3:4], control = ctrl ), regexp = NA @@ -200,31 +201,31 @@ test_that('mars prediction', { )), class = "data.frame", row.names = c(NA, -5L)) res_xy <- fit_xy( - iris_basic, - x = iris[, num_pred], - y = iris$Sepal.Length, + hpc_basic, + x = hpc[, num_pred], + y = hpc$num_pending, control = ctrl ) - expect_equal(uni_pred, predict(res_xy, iris[1:5, num_pred])$.pred) + expect_equal(uni_pred, predict(res_xy, hpc[1:5, num_pred])$.pred) res_form <- fit( - iris_basic, - Sepal.Length ~ log(Sepal.Width) + Species, - data = iris, + hpc_basic, + compounds ~ log(input_fields) + class, + data = hpc, control = ctrl ) - expect_equal(inl_pred, predict(res_form, iris[1:5, ])$.pred) + expect_equal(inl_pred, predict(res_form, hpc[1:5, ])$.pred) res_mv <- fit( - iris_basic, - cbind(Sepal.Width, Petal.Width) ~ ., - data = iris, + hpc_basic, + cbind(compounds, input_fields) ~ ., + data = hpc, control = ctrl ) expect_equal( setNames(mv_pred, paste0(".pred_", names(mv_pred))) %>% as.data.frame(), - predict(res_mv, iris[1:5,]) %>% as.data.frame() + predict(res_mv, hpc[1:5,]) %>% as.data.frame() ) }) diff --git a/tests/testthat/test_misc.R b/tests/testthat/test_misc.R index a20a5b0fc..ab4216c2a 100644 --- a/tests/testthat/test_misc.R +++ b/tests/testthat/test_misc.R @@ -3,6 +3,9 @@ context("checking for multi_predict") +source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] + test_that('parsnip objects', { lm_idea <- linear_reg() %>% set_engine("lm") @@ -76,7 +79,7 @@ test_that('S3 method dispatch/registration', { null_model() %>% set_engine("parsnip") %>% set_mode("classification") %>% - fit(Species ~ ., data = iris) %>% + fit(class ~ ., data = hpc) %>% tidy(), regex = NA ) diff --git a/tests/testthat/test_mlp_keras.R b/tests/testthat/test_mlp_keras.R index 810b459e1..629c602ff 100644 --- a/tests/testthat/test_mlp_keras.R +++ b/tests/testthat/test_mlp_keras.R @@ -6,11 +6,12 @@ library(tibble) context("simple neural network execution with keras") source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] -num_pred <- names(iris)[1:4] +num_pred <- names(hpc)[1:4] -iris_keras <- +hpc_keras <- mlp(mode = "classification", hidden_units = 2, epochs = 10) %>% set_engine("keras", verbose = 0) @@ -24,9 +25,9 @@ test_that('keras execution, classification', { expect_error( res <- parsnip::fit( - iris_keras, - Species ~ Sepal.Width + Sepal.Length, - data = iris, + hpc_keras, + class ~ compounds + input_fields, + data = hpc, control = ctrl ), regexp = NA @@ -40,9 +41,9 @@ test_that('keras execution, classification', { expect_error( res <- parsnip::fit_xy( - iris_keras, - x = iris[, num_pred], - y = iris$Species, + hpc_keras, + x = hpc[, num_pred], + y = hpc$class, control = ctrl ), regexp = NA @@ -52,9 +53,9 @@ test_that('keras execution, classification', { expect_error( res <- parsnip::fit( - iris_keras, - Species ~ novar, - data = iris, + hpc_keras, + class ~ novar, + data = hpc, control = ctrl ) ) @@ -67,28 +68,28 @@ test_that('keras classification prediction', { library(keras) xy_fit <- parsnip::fit_xy( - iris_keras, - x = iris[, num_pred], - y = iris$Species, + hpc_keras, + x = hpc[, num_pred], + y = hpc$class, control = ctrl ) - xy_pred <- keras::predict_classes(xy_fit$fit, x = as.matrix(iris[1:8, num_pred])) - xy_pred <- factor(levels(iris$Species)[xy_pred + 1], levels = levels(iris$Species)) - expect_equal(xy_pred, predict(xy_fit, new_data = iris[1:8, num_pred], type = "class")[[".pred_class"]]) + xy_pred <- keras::predict_classes(xy_fit$fit, x = as.matrix(hpc[1:8, num_pred])) + xy_pred <- factor(levels(hpc$class)[xy_pred + 1], levels = levels(hpc$class)) + expect_equal(xy_pred, predict(xy_fit, new_data = hpc[1:8, num_pred], type = "class")[[".pred_class"]]) keras::backend()$clear_session() form_fit <- parsnip::fit( - iris_keras, - Species ~ ., - data = iris, + hpc_keras, + class ~ ., + data = hpc, control = ctrl ) - form_pred <- keras::predict_classes(form_fit$fit, x = as.matrix(iris[1:8, num_pred])) - form_pred <- factor(levels(iris$Species)[form_pred + 1], levels = levels(iris$Species)) - expect_equal(form_pred, predict(form_fit, new_data = iris[1:8, num_pred], type = "class")[[".pred_class"]]) + form_pred <- keras::predict_classes(form_fit$fit, x = as.matrix(hpc[1:8, num_pred])) + form_pred <- factor(levels(hpc$class)[form_pred + 1], levels = levels(hpc$class)) + expect_equal(form_pred, predict(form_fit, new_data = hpc[1:8, num_pred], type = "class")[[".pred_class"]]) keras::backend()$clear_session() }) @@ -99,30 +100,30 @@ test_that('keras classification probabilities', { skip_if_not_installed("keras") xy_fit <- parsnip::fit_xy( - iris_keras, - x = iris[, num_pred], - y = iris$Species, + hpc_keras, + x = hpc[, num_pred], + y = hpc$class, control = ctrl ) - xy_pred <- keras::predict_proba(xy_fit$fit, x = as.matrix(iris[1:8, num_pred])) - colnames(xy_pred) <- paste0(".pred_", levels(iris$Species)) + xy_pred <- keras::predict_proba(xy_fit$fit, x = as.matrix(hpc[1:8, num_pred])) + colnames(xy_pred) <- paste0(".pred_", levels(hpc$class)) xy_pred <- as_tibble(xy_pred) - expect_equal(xy_pred, predict(xy_fit, new_data = iris[1:8, num_pred], type = "prob")) + expect_equal(xy_pred, predict(xy_fit, new_data = hpc[1:8, num_pred], type = "prob")) keras::backend()$clear_session() form_fit <- parsnip::fit( - iris_keras, - Species ~ ., - data = iris, + hpc_keras, + class ~ ., + data = hpc, control = ctrl ) - form_pred <- keras::predict_proba(form_fit$fit, x = as.matrix(iris[1:8, num_pred])) - colnames(form_pred) <- paste0(".pred_", levels(iris$Species)) + form_pred <- keras::predict_proba(form_fit$fit, x = as.matrix(hpc[1:8, num_pred])) + colnames(form_pred) <- paste0(".pred_", levels(hpc$class)) form_pred <- as_tibble(form_pred) - expect_equal(form_pred, predict(form_fit, new_data = iris[1:8, num_pred], type = "prob")) + expect_equal(form_pred, predict(form_fit, new_data = hpc[1:8, num_pred], type = "prob")) keras::backend()$clear_session() }) diff --git a/tests/testthat/test_mlp_nnet.R b/tests/testthat/test_mlp_nnet.R index 127ee824e..37bcb9e6f 100644 --- a/tests/testthat/test_mlp_nnet.R +++ b/tests/testthat/test_mlp_nnet.R @@ -5,10 +5,11 @@ library(parsnip) context("simple neural network execution with nnet") source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] -num_pred <- names(iris)[1:4] +num_pred <- names(hpc)[1:4] -iris_nnet <- +hpc_nnet <- mlp(mode = "classification", hidden_units = 5) %>% set_engine("nnet") @@ -20,9 +21,9 @@ test_that('nnet execution, classification', { expect_error( res <- parsnip::fit( - iris_nnet, - Species ~ Sepal.Width + Sepal.Length, - data = iris, + hpc_nnet, + class ~ compounds + input_fields, + data = hpc, control = ctrl ), regexp = NA @@ -31,9 +32,9 @@ test_that('nnet execution, classification', { expect_error( res <- parsnip::fit_xy( - iris_nnet, - x = iris[, num_pred], - y = iris$Species, + hpc_nnet, + x = hpc[, num_pred], + y = hpc$class, control = ctrl ), regexp = NA @@ -41,9 +42,9 @@ test_that('nnet execution, classification', { expect_error( res <- parsnip::fit( - iris_nnet, - Species ~ novar, - data = iris, + hpc_nnet, + class ~ novar, + data = hpc, control = ctrl ) ) @@ -55,26 +56,26 @@ test_that('nnet classification prediction', { skip_if_not_installed("nnet") xy_fit <- fit_xy( - iris_nnet, - x = iris[, num_pred], - y = iris$Species, + hpc_nnet, + x = hpc[, num_pred], + y = hpc$class, control = ctrl ) - xy_pred <- predict(xy_fit$fit, newdata = iris[1:8, num_pred], type = "class") - xy_pred <- factor(xy_pred, levels = levels(iris$Species)) - expect_equal(xy_pred, predict(xy_fit, new_data = iris[1:8, num_pred], type = "class")$.pred_class) + xy_pred <- predict(xy_fit$fit, newdata = hpc[1:8, num_pred], type = "class") + xy_pred <- factor(xy_pred, levels = levels(hpc$class)) + expect_equal(xy_pred, predict(xy_fit, new_data = hpc[1:8, num_pred], type = "class")$.pred_class) form_fit <- fit( - iris_nnet, - Species ~ ., - data = iris, + hpc_nnet, + class ~ ., + data = hpc, control = ctrl ) - form_pred <- predict(form_fit$fit, newdata = iris[1:8, num_pred], type = "class") - form_pred <- factor(form_pred, levels = levels(iris$Species)) - expect_equal(form_pred, predict(form_fit, new_data = iris[1:8, num_pred])$.pred_class) + form_pred <- predict(form_fit$fit, newdata = hpc[1:8, num_pred], type = "class") + form_pred <- factor(form_pred, levels = levels(hpc$class)) + expect_equal(form_pred, predict(form_fit, new_data = hpc[1:8, num_pred])$.pred_class) }) diff --git a/tests/testthat/test_multinom_reg.R b/tests/testthat/test_multinom_reg.R index 46b2082bb..fbec18322 100644 --- a/tests/testthat/test_multinom_reg.R +++ b/tests/testthat/test_multinom_reg.R @@ -6,6 +6,8 @@ library(rlang) context("multinom regression") source("helpers.R") +source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] # ------------------------------------------------------------------------------ @@ -115,5 +117,5 @@ test_that('bad input', { expect_error(multinom_reg(mode = "regression")) expect_error(translate(multinom_reg() %>% set_engine("wat?"))) expect_error(translate(multinom_reg() %>% set_engine())) - expect_warning(translate(multinom_reg() %>% set_engine("glmnet", x = iris[,1:3], y = iris$Species))) + expect_warning(translate(multinom_reg() %>% set_engine("glmnet", x = hpc[,1:3], y = hpc$class))) }) diff --git a/tests/testthat/test_multinom_reg_glmnet.R b/tests/testthat/test_multinom_reg_glmnet.R index 22aa3368d..212b1ea29 100644 --- a/tests/testthat/test_multinom_reg_glmnet.R +++ b/tests/testthat/test_multinom_reg_glmnet.R @@ -7,7 +7,7 @@ library(tibble) context("multinom regression execution with glmnet") source(test_path("helper-objects.R")) - +hpc <- hpc_data[1:150, c(2:5, 8)] rows <- c(1, 51, 101) @@ -22,8 +22,8 @@ test_that('glmnet execution', { res <- fit_xy( multinom_reg() %>% set_engine("glmnet"), control = ctrl, - x = iris[, 1:4], - y = iris$Species + x = hpc[, 1:4], + y = hpc$class ), regexp = NA ) @@ -34,8 +34,8 @@ test_that('glmnet execution', { expect_error( glmnet_xy_catch <- fit_xy( multinom_reg() %>% set_engine("glmnet"), - x = iris[, 2:5], - y = iris$Sepal.Length, + x = hpc[, 2:5], + y = hpc$compounds, control = caught_ctrl ) ) @@ -50,27 +50,27 @@ test_that('glmnet prediction, one lambda', { xy_fit <- fit_xy( multinom_reg(penalty = 0.1) %>% set_engine("glmnet"), control = ctrl, - x = iris[, 1:4], - y = iris$Species + x = hpc[, 1:4], + y = hpc$class ) uni_pred <- predict(xy_fit$fit, - newx = as.matrix(iris[rows, 1:4]), + newx = as.matrix(hpc[rows, 1:4]), s = xy_fit$spec$args$penalty, type = "class") - uni_pred <- factor(uni_pred[,1], levels = levels(iris$Species)) + uni_pred <- factor(uni_pred[,1], levels = levels(hpc$class)) uni_pred <- unname(uni_pred) - expect_equal(uni_pred, predict(xy_fit, iris[rows, 1:4], type = "class")$.pred_class) + expect_equal(uni_pred, predict(xy_fit, hpc[rows, 1:4], type = "class")$.pred_class) res_form <- fit( multinom_reg(penalty = 0.1) %>% set_engine("glmnet"), - Species ~ log(Sepal.Width) + Petal.Width, - data = iris, + class ~ log(compounds) + input_fields, + data = hpc, control = ctrl ) - form_mat <- model.matrix(Species ~ log(Sepal.Width) + Petal.Width, data = iris) + form_mat <- model.matrix(class ~ log(compounds) + input_fields, data = hpc) form_mat <- form_mat[rows, -1] form_pred <- @@ -78,9 +78,9 @@ test_that('glmnet prediction, one lambda', { newx = form_mat, s = res_form$spec$args$penalty, type = "class") - form_pred <- factor(form_pred[,1], levels = levels(iris$Species)) - expect_equal(form_pred, parsnip:::predict_class.model_fit(res_form, iris[rows, c("Sepal.Width", "Petal.Width")])) - expect_equal(form_pred, predict(res_form, iris[rows, c("Sepal.Width", "Petal.Width")], type = "class")$.pred_class) + form_pred <- factor(form_pred[,1], levels = levels(hpc$class)) + expect_equal(form_pred, parsnip:::predict_class.model_fit(res_form, hpc[rows, c("compounds", "input_fields")])) + expect_equal(form_pred, predict(res_form, hpc[rows, c("compounds", "input_fields")], type = "class")$.pred_class) }) @@ -95,16 +95,16 @@ test_that('glmnet probabilities, mulitiple lambda', { xy_fit <- fit_xy( multinom_reg(penalty = lams) %>% set_engine("glmnet"), control = ctrl, - x = iris[, 1:4], - y = iris$Species + x = hpc[, 1:4], + y = hpc$class ) - expect_error(predict(xy_fit, iris[rows, 1:4], type = "class")) - expect_error(predict(xy_fit, iris[rows, 1:4], type = "prob")) + expect_error(predict(xy_fit, hpc[rows, 1:4], type = "class")) + expect_error(predict(xy_fit, hpc[rows, 1:4], type = "prob")) mult_pred <- predict(xy_fit$fit, - newx = as.matrix(iris[rows, 1:4]), + newx = as.matrix(hpc[rows, 1:4]), s = lams, type = "response") mult_pred <- apply(mult_pred, 3, as_tibble) mult_pred <- dplyr:::bind_rows(mult_pred) @@ -119,7 +119,7 @@ test_that('glmnet probabilities, mulitiple lambda', { expect_equal( mult_pred$.pred, - multi_predict(xy_fit, iris[rows, 1:4], penalty = lams, type = "prob")$.pred + multi_predict(xy_fit, hpc[rows, 1:4], penalty = lams, type = "prob")$.pred ) mult_class <- factor(names(mult_probs)[apply(mult_probs, 1, which.max)], @@ -136,17 +136,17 @@ test_that('glmnet probabilities, mulitiple lambda', { expect_equal( mult_class$.pred, - multi_predict(xy_fit, iris[rows, 1:4], penalty = lams)$.pred + multi_predict(xy_fit, hpc[rows, 1:4], penalty = lams)$.pred ) expect_error( - multi_predict(xy_fit, newdata = iris[rows, 1:4], penalty = lams), + multi_predict(xy_fit, newdata = hpc[rows, 1:4], penalty = lams), "Did you mean" ) # Can predict probs with default penalty. See #108 expect_error( - multi_predict(xy_fit, new_data = iris[rows, 1:4], type = "prob"), + multi_predict(xy_fit, new_data = hpc[rows, 1:4], type = "prob"), NA ) @@ -156,10 +156,10 @@ test_that("class predictions are factors with all levels", { skip_if_not_installed("glmnet") skip_if(run_glmnet) - basic <- multinom_reg() %>% set_engine("glmnet") %>% fit(Species ~ ., data = iris) - nd <- iris[iris$Species == "setosa", ] + basic <- multinom_reg() %>% set_engine("glmnet") %>% fit(class ~ ., data = hpc) + nd <- hpc[hpc$class == "setosa", ] yhat <- predict(basic, new_data = nd, penalty = .1) yhat_multi <- multi_predict(basic, new_data = nd, penalty = .1)$.pred expect_is(yhat_multi[[1]]$.pred_class, "factor") - expect_equal(levels(yhat_multi[[1]]$.pred_class), levels(iris$Species)) + expect_equal(levels(yhat_multi[[1]]$.pred_class), levels(hpc$class)) }) diff --git a/tests/testthat/test_multinom_reg_keras.R b/tests/testthat/test_multinom_reg_keras.R index 495fcea01..322a8afe9 100644 --- a/tests/testthat/test_multinom_reg_keras.R +++ b/tests/testthat/test_multinom_reg_keras.R @@ -8,11 +8,13 @@ library(dplyr) context("keras logistic regression") source("helpers.R") +source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] # ------------------------------------------------------------------------------ set.seed(352) -dat <- iris[order(runif(nrow(iris))),] +dat <- hpc[order(runif(150)),] tr_dat <- dat[1:140, ] te_dat <- dat[141:150, ] @@ -42,7 +44,7 @@ test_that('model fitting', { basic_mod, control = ctrl, x = tr_dat[, -5], - y = tr_dat$Species + y = tr_dat$class ), regexp = NA ) @@ -54,7 +56,7 @@ test_that('model fitting', { basic_mod, control = ctrl, x = tr_dat[, -5], - y = tr_dat$Species + y = tr_dat$class ), regexp = NA ) @@ -64,7 +66,7 @@ test_that('model fitting', { expect_error( fit( basic_mod, - Species ~ ., + class ~ ., data = tr_dat, control = ctrl ), @@ -77,7 +79,7 @@ test_that('model fitting', { reg_mod, control = ctrl, x = tr_dat[, -5], - y = tr_dat$Species + y = tr_dat$class ), regexp = NA ) @@ -85,7 +87,7 @@ test_that('model fitting', { expect_error( fit( reg_mod, - Species ~ ., + class ~ ., data = tr_dat, control = ctrl ), @@ -107,7 +109,7 @@ test_that('classification prediction', { basic_mod, control = ctrl, x = tr_dat[, -5], - y = tr_dat$Species + y = tr_dat$class ) keras_raw <- @@ -125,7 +127,7 @@ test_that('classification prediction', { reg_mod, control = ctrl, x = tr_dat[, -5], - y = tr_dat$Species + y = tr_dat$class ) keras_raw <- @@ -151,7 +153,7 @@ test_that('classification probabilities', { basic_mod, control = ctrl, x = tr_dat[, -5], - y = tr_dat$Species + y = tr_dat$class ) keras_pred <- @@ -168,7 +170,7 @@ test_that('classification probabilities', { reg_mod, control = ctrl, x = tr_dat[, -5], - y = tr_dat$Species + y = tr_dat$class ) keras_pred <- diff --git a/tests/testthat/test_multinom_reg_nnet.R b/tests/testthat/test_multinom_reg_nnet.R index 5893c2316..7336be592 100644 --- a/tests/testthat/test_multinom_reg_nnet.R +++ b/tests/testthat/test_multinom_reg_nnet.R @@ -8,11 +8,13 @@ library(dplyr) context("nnet multinomial regression") source("helpers.R") +source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] # ------------------------------------------------------------------------------ set.seed(352) -dat <- iris[order(runif(nrow(iris))),] +dat <- hpc[order(runif(150)),] tr_dat <- dat[1:140, ] te_dat <- dat[141:150, ] @@ -37,7 +39,7 @@ test_that('model fitting', { basic_mod, control = ctrl, x = tr_dat[, -5], - y = tr_dat$Species + y = tr_dat$class ), regexp = NA ) @@ -50,7 +52,7 @@ test_that('model fitting', { basic_mod, control = ctrl, x = tr_dat[, -5], - y = tr_dat$Species + y = tr_dat$class ), regexp = NA ) @@ -60,7 +62,7 @@ test_that('model fitting', { expect_error( fit( basic_mod, - Species ~ ., + class ~ ., data = tr_dat, control = ctrl ), @@ -79,7 +81,7 @@ test_that('classification prediction', { basic_mod, control = ctrl, x = tr_dat[, -5], - y = tr_dat$Species + y = tr_dat$class ) nnet_pred <- @@ -100,7 +102,7 @@ test_that('classification probabilities', { basic_mod, control = ctrl, x = tr_dat[, -5], - y = tr_dat$Species + y = tr_dat$class ) nnet_pred <- diff --git a/tests/testthat/test_multinom_reg_spark.R b/tests/testthat/test_multinom_reg_spark.R index 9550d98d0..55bebd919 100644 --- a/tests/testthat/test_multinom_reg_spark.R +++ b/tests/testthat/test_multinom_reg_spark.R @@ -6,7 +6,7 @@ library(dplyr) context("multinomial regression execution with spark") source(test_path("helper-objects.R")) - +hpc <- hpc_data[1:150, c(2:5, 8)] # ------------------------------------------------------------------------------ @@ -20,9 +20,9 @@ test_that('spark execution', { skip_if(inherits(sc, "try-error")) - iris_rows <- c(1, 51, 101) - iris_tr <- copy_to(sc, iris[-iris_rows, ], "iris_tr", overwrite = TRUE) - iris_te <- copy_to(sc, iris[ iris_rows, -5], "iris_te", overwrite = TRUE) + hpc_rows <- c(1, 51, 101) + hpc_tr <- copy_to(sc, hpc[-hpc_rows, ], "hpc_tr", overwrite = TRUE) + hpc_te <- copy_to(sc, hpc[ hpc_rows, -5], "hpc_te", overwrite = TRUE) # ---------------------------------------------------------------------------- @@ -31,19 +31,19 @@ test_that('spark execution', { fit( multinom_reg() %>% set_engine("spark"), control = ctrl, - Species ~ ., - data = iris_tr + class ~ ., + data = hpc_tr ), regexp = NA ) expect_error( - spark_class_pred <- predict(spark_class_fit, iris_te), + spark_class_pred <- predict(spark_class_fit, hpc_te), regexp = NA ) expect_error( - spark_class_pred_class <- predict(spark_class_fit, iris_te), + spark_class_pred_class <- predict(spark_class_fit, hpc_te), regexp = NA ) @@ -55,12 +55,12 @@ test_that('spark execution', { ) expect_error( - spark_class_prob <- predict(spark_class_fit, iris_te, type = "prob"), + spark_class_prob <- predict(spark_class_fit, hpc_te, type = "prob"), regexp = NA ) expect_error( - spark_class_prob_classprob <- predict(spark_class_fit, iris_te, type = "prob"), + spark_class_prob_classprob <- predict(spark_class_fit, hpc_te, type = "prob"), regexp = NA ) diff --git a/tests/testthat/test_nearest_neighbor_kknn.R b/tests/testthat/test_nearest_neighbor_kknn.R index ed17b1206..41aab23c5 100644 --- a/tests/testthat/test_nearest_neighbor_kknn.R +++ b/tests/testthat/test_nearest_neighbor_kknn.R @@ -6,11 +6,12 @@ library(rlang) context("nearest neighbor execution with kknn") source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] -num_pred <- c("Sepal.Width", "Petal.Width", "Petal.Length") -iris_bad_form <- as.formula(Species ~ term) -iris_basic <- nearest_neighbor(mode = "classification", +num_pred <- c("compounds", "iterations", "num_pending") +hpc_bad_form <- as.formula(class ~ term) +hpc_basic <- nearest_neighbor(mode = "classification", neighbors = 8, weight_func = "triangular") %>% set_engine("kknn") @@ -26,10 +27,10 @@ test_that('kknn execution', { # expect no error expect_error( fit_xy( - iris_basic, + hpc_basic, control = ctrl, - x = iris[, num_pred], - y = iris$Sepal.Length + x = hpc[, num_pred], + y = hpc$input_fields ), regexp = "outcome should be a factor" ) @@ -38,10 +39,10 @@ test_that('kknn execution', { # expect no error expect_error( res <- fit_xy( - iris_basic, + hpc_basic, control = ctrl, - x = iris[, c("Sepal.Length", "Petal.Width")], - y = iris$Species + x = hpc[, c("input_fields", "iterations")], + y = hpc$class ), regexp = NA ) @@ -51,9 +52,9 @@ test_that('kknn execution', { expect_error( fit( - iris_basic, - iris_bad_form, - data = iris, + hpc_basic, + hpc_bad_form, + data = hpc, control = ctrl ) @@ -68,51 +69,51 @@ test_that('kknn prediction', { # continuous res_xy <- fit_xy( - iris_basic, + hpc_basic, control = ctrl, - x = iris[, num_pred], - y = iris$Species + x = hpc[, num_pred], + y = hpc$class ) uni_pred <- predict( res_xy$fit, - newdata = iris[1:5, num_pred] + newdata = hpc[1:5, num_pred] ) - expect_equal(tibble(.pred_class = uni_pred), predict(res_xy, iris[1:5, num_pred])) + expect_equal(tibble(.pred_class = uni_pred), predict(res_xy, hpc[1:5, num_pred])) # nominal res_xy_nom <- fit_xy( - iris_basic %>% set_mode("classification"), + hpc_basic %>% set_mode("classification"), control = ctrl, - x = iris[, c("Sepal.Length", "Petal.Width")], - y = iris$Species + x = hpc[, c("input_fields", "iterations")], + y = hpc$class ) uni_pred_nom <- predict( res_xy_nom$fit, - newdata = iris[1:5, c("Sepal.Length", "Petal.Width")] + newdata = hpc[1:5, c("input_fields", "iterations")] ) expect_equal( uni_pred_nom, - predict(res_xy_nom, iris[1:5, c("Sepal.Length", "Petal.Width")], type = "class")$.pred_class + predict(res_xy_nom, hpc[1:5, c("input_fields", "iterations")], type = "class")$.pred_class ) # continuous - formula interface res_form <- fit( - iris_basic %>% set_mode("regression"), - Sepal.Length ~ log(Sepal.Width) + Species, - data = iris, + hpc_basic %>% set_mode("regression"), + input_fields ~ log(compounds) + class, + data = hpc, control = ctrl ) form_pred <- predict( res_form$fit, - newdata = iris[1:5,] + newdata = hpc[1:5,] ) - expect_equal(form_pred, predict(res_form, iris[1:5, c("Sepal.Width", "Species")])$.pred) + expect_equal(form_pred, predict(res_form, hpc[1:5, c("compounds", "class")])$.pred) }) @@ -121,23 +122,23 @@ test_that('kknn multi-predict', { skip_if_not_installed("kknn") library(kknn) - iris_te <- c(1:2, 50:51, 100:101) + hpc_te <- c(1:2, 50:51, 100:101) k_vals <- 1:10 res_xy <- fit_xy( nearest_neighbor(mode = "classification", neighbors = 3) %>% set_engine("kknn"), control = ctrl, - x = iris[-iris_te, num_pred], - y = iris$Species[-iris_te] + x = hpc[-hpc_te, num_pred], + y = hpc$class[-hpc_te] ) - pred_multi <- multi_predict(res_xy, iris[iris_te, num_pred], neighbors = k_vals) + pred_multi <- multi_predict(res_xy, hpc[hpc_te, num_pred], neighbors = k_vals) expect_equal(pred_multi %>% unnest(cols = c(.pred)) %>% nrow(), - length(iris_te) * length(k_vals)) - expect_equal(pred_multi %>% nrow(), length(iris_te)) + length(hpc_te) * length(k_vals)) + expect_equal(pred_multi %>% nrow(), length(hpc_te)) - pred_uni <- predict(res_xy, iris[iris_te, num_pred]) + pred_uni <- predict(res_xy, hpc[hpc_te, num_pred]) pred_uni_obs <- pred_multi %>% mutate(.rows = row_number()) %>% @@ -148,13 +149,13 @@ test_that('kknn multi-predict', { expect_equal(pred_uni, pred_uni_obs) - prob_multi <- multi_predict(res_xy, iris[iris_te, num_pred], + prob_multi <- multi_predict(res_xy, hpc[hpc_te, num_pred], neighbors = k_vals, type = "prob") expect_equal(prob_multi %>% unnest(cols = c(.pred)) %>% nrow(), - length(iris_te) * length(k_vals)) - expect_equal(prob_multi %>% nrow(), length(iris_te)) + length(hpc_te) * length(k_vals)) + expect_equal(prob_multi %>% nrow(), length(hpc_te)) - prob_uni <- predict(res_xy, iris[iris_te, num_pred], type = "prob") + prob_uni <- predict(res_xy, hpc[hpc_te, num_pred], type = "prob") prob_uni_obs <- prob_multi %>% mutate(.rows = row_number()) %>% diff --git a/tests/testthat/test_nullmodel.R b/tests/testthat/test_nullmodel.R index b3eb4ee51..db499d54c 100644 --- a/tests/testthat/test_nullmodel.R +++ b/tests/testthat/test_nullmodel.R @@ -5,7 +5,8 @@ library(tibble) context("test-nullmodel") source("helpers.R") - +source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] test_that('primary arguments', { basic <- null_model(mode = "regression") @@ -36,16 +37,16 @@ test_that('bad input', { expect_error(translate(null_model(formula = y ~ x))) expect_warning( translate( - null_model(mode = "regression") %>% set_engine("parsnip", x = iris[,1:3], y = iris$Species) + null_model(mode = "regression") %>% set_engine("parsnip", x = hpc[,1:3], y = hpc$class) ) ) }) # ------------------------------------------------------------------------------ -num_pred <-c("Sepal.Length", "Sepal.Width", "Petal.Width") -iris_bad_form <- as.formula(Species ~ term) -iris_basic <- null_model(mode = "regression") %>% set_engine("parsnip") +num_pred <- names(phc)[1:3] +hpc_bad_form <- as.formula(class ~ term) +hpc_basic <- null_model(mode = "regression") %>% set_engine("parsnip") # ------------------------------------------------------------------------------ @@ -53,26 +54,26 @@ test_that('nullmodel execution', { expect_error( res <- fit( - iris_basic, - Sepal.Length ~ log(Sepal.Width) + Species, - data = iris + hpc_basic, + compounds ~ log(input_fields) + class, + data = hpc ), regexp = NA ) expect_error( res <- fit_xy( - iris_basic, - x = iris[, num_pred], - y = iris$Petal.Length + hpc_basic, + x = hpc[, num_pred], + y = hpc$num_pending ), regexp = NA ) expect_error( res <- fit( - iris_basic, - iris_bad_form, - data = iris + hpc_basic, + hpc_bad_form, + data = hpc ) ) @@ -80,9 +81,9 @@ test_that('nullmodel execution', { expect_error( res <- fit( - iris_basic, - cbind(Sepal.Width, Petal.Width) ~ ., - data = iris + hpc_basic, + cbind(compounds, input_fields) ~ ., + data = hpc ), regexp = NA ) @@ -97,23 +98,23 @@ test_that('nullmodel prediction', { carb = rep(2.8125, 5)) res_xy <- fit_xy( - iris_basic, - x = iris[, num_pred], - y = iris$Petal.Length + hpc_basic, + x = hpc[, num_pred], + y = hpc$num_pending ) - expect_equal(uni_pred, predict(res_xy, new_data = iris[1:5, num_pred])) + expect_equal(uni_pred, predict(res_xy, new_data = hpc[1:5, num_pred])) res_form <- fit( - iris_basic, - Petal.Length ~ log(Sepal.Width) + Species, - data = iris + hpc_basic, + num_pending ~ log(compounds) + class, + data = hpc_basic ) - expect_equal(inl_pred, predict(res_form, iris[1:5, ])$.pred) + expect_equal(inl_pred, predict(res_form, hpc[1:5, ])$.pred) # Multivariate y res <- fit( - iris_basic, + hpc_basic, cbind(gear, carb) ~ ., data = mtcars ) @@ -131,7 +132,7 @@ test_that('classification', { expect_error( null_model <- null_model(mode = "classification") %>% set_engine("parsnip") %>% - fit(Species ~ ., data = iris), + fit(class ~ ., data = hpc), regexp = NA ) expect_true(!is.null(null_model$fit)) diff --git a/tests/testthat/test_predict_formats.R b/tests/testthat/test_predict_formats.R index 442650486..9b9b5430a 100644 --- a/tests/testthat/test_predict_formats.R +++ b/tests/testthat/test_predict_formats.R @@ -3,6 +3,9 @@ library(parsnip) library(tibble) library(dplyr) +source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] + # ------------------------------------------------------------------------------ context("check predict output structures") @@ -10,7 +13,7 @@ context("check predict output structures") lm_fit <- linear_reg(mode = "regression") %>% set_engine("lm") %>% - fit(Sepal.Length ~ ., data = iris) + fit(class ~ ., data = hpc) class_dat <- airquality[complete.cases(airquality),] class_dat$Ozone <- factor(ifelse(class_dat$Ozone >= 31, "high", "low")) @@ -31,9 +34,9 @@ lr_fit_2 <- # ------------------------------------------------------------------------------ test_that('regression predictions', { - expect_true(is_tibble(predict(lm_fit, new_data = iris[1:5,-1]))) - expect_true(is.vector(parsnip:::predict_numeric.model_fit(lm_fit, new_data = iris[1:5,-1]))) - expect_equal(names(predict(lm_fit, new_data = iris[1:5,-1])), ".pred") + expect_true(is_tibble(predict(lm_fit, new_data = hpc[1:5,-1]))) + expect_true(is.vector(parsnip:::predict_numeric.model_fit(lm_fit, new_data = hpc[1:5,-1]))) + expect_equal(names(predict(lm_fit, new_data = hpc[1:5,-1])), ".pred") }) test_that('classification predictions', { @@ -67,18 +70,18 @@ test_that('non-factor classification', { expect_error( logistic_reg() %>% set_engine("glm") %>% - fit(Species ~ ., data = iris %>% mutate(Species = Species == "setosa")) + fit(class ~ ., data = hpc %>% mutate(class = class == "VF")) ) expect_error( logistic_reg() %>% set_engine("glm") %>% - fit(Species ~ ., data = iris %>% mutate(Species = ifelse(Species == "setosa", 1, 0))) + fit(class ~ ., data = hpc %>% mutate(class = ifelse(class == "VF", 1, 0))) ) expect_error( multinom_reg() %>% set_engine("glmnet") %>% - fit(Species ~ ., data = iris %>% mutate(Species = as.character(Species))) + fit(class ~ ., data = hpc %>% mutate(class = as.character(class))) ) }) diff --git a/tests/testthat/test_rand_forest_ranger.R b/tests/testthat/test_rand_forest_ranger.R index 46a2a6eb8..6d96e2490 100644 --- a/tests/testthat/test_rand_forest_ranger.R +++ b/tests/testthat/test_rand_forest_ranger.R @@ -7,7 +7,7 @@ library(rlang) context("random forest execution with ranger") source(test_path("helper-objects.R")) - +hpc <- hpc_data[1:150, c(2:5, 8)] # ------------------------------------------------------------------------------ @@ -315,8 +315,8 @@ test_that('additional descriptor tests', { descr_other_xy <- fit_xy( rand_forest(mode = "classification", mtry = 2) %>% set_engine("ranger", class.weights = c(min(.lvls()), 20, 10)), - x = iris[, 1:4], - y = iris$Species, + x = hpc[, 1:4], + y = hpc$class, control = ctrl ) expect_equal(descr_other_xy$fit$mtry, 2) @@ -325,7 +325,7 @@ test_that('additional descriptor tests', { descr_other_f <- fit( rand_forest(mode = "classification", mtry = 2) %>% set_engine("ranger", class.weights = c(min(.lvls()), 20, 10)), - Species ~ ., data = iris, + class ~ ., data = hpc, control = ctrl ) expect_equal(descr_other_f$fit$mtry, 2) @@ -334,8 +334,8 @@ test_that('additional descriptor tests', { descr_other_xy <- fit_xy( rand_forest(mode = "classification", mtry = 2) %>% set_engine("ranger", class.weights = c(min(.lvls()), 20, 10)), - x = iris[, 1:4], - y = iris$Species, + x = hpc[, 1:4], + y = hpc$class, control = ctrl ) expect_equal(descr_other_xy$fit$mtry, 2) @@ -344,7 +344,7 @@ test_that('additional descriptor tests', { descr_other_f <- fit( rand_forest(mode = "classification", mtry = 2) %>% set_engine("ranger", class.weights = c(min(.lvls()), 20, 10)), - Species ~ ., data = iris, + class ~ ., data = hpc, control = ctrl ) expect_equal(descr_other_f$fit$mtry, 2) @@ -359,21 +359,21 @@ test_that('ranger classification prediction', { xy_class_fit <- rand_forest() %>% set_mode("classification") %>% set_engine("ranger") %>% fit_xy( - x = iris[, 1:4], - y = iris$Species, + x = hpc[, 1:4], + y = hpc$class, control = ctrl ) expect_false(has_multi_predict(xy_class_fit)) expect_equal(multi_predict_args(xy_class_fit), NA_character_) - xy_class_pred <- predict(xy_class_fit$fit, data = iris[c(1, 51, 101), 1:4])$prediction + xy_class_pred <- predict(xy_class_fit$fit, data = hpc[c(1, 51, 101), 1:4])$prediction xy_class_pred <- colnames(xy_class_pred)[apply(xy_class_pred, 1, which.max)] - xy_class_pred <- factor(xy_class_pred, levels = levels(iris$Species)) + xy_class_pred <- factor(xy_class_pred, levels = levels(hpc$class)) expect_equal( xy_class_pred, - predict(xy_class_fit, new_data = iris[c(1, 51, 101), 1:4])$.pred_class + predict(xy_class_fit, new_data = hpc[c(1, 51, 101), 1:4])$.pred_class ) xy_prob_fit <- @@ -381,26 +381,26 @@ test_that('ranger classification prediction', { set_mode("classification") %>% set_engine("ranger") %>% fit_xy( - x = iris[, 1:4], - y = iris$Species, + x = hpc[, 1:4], + y = hpc$class, control = ctrl ) - xy_prob_pred <- predict(xy_prob_fit$fit, data = iris[c(1, 51, 101), 1:4])$prediction + xy_prob_pred <- predict(xy_prob_fit$fit, data = hpc[c(1, 51, 101), 1:4])$prediction xy_prob_pred <- colnames(xy_prob_pred)[apply(xy_prob_pred, 1, which.max)] - xy_prob_pred <- factor(xy_prob_pred, levels = levels(iris$Species)) + xy_prob_pred <- factor(xy_prob_pred, levels = levels(hpc$class)) expect_equal( xy_class_pred, - predict(xy_prob_fit, new_data = iris[c(1, 51, 101), 1:4])$.pred_class + predict(xy_prob_fit, new_data = hpc[c(1, 51, 101), 1:4])$.pred_class ) - xy_prob_prob <- predict(xy_prob_fit$fit, data = iris[c(1, 51, 101), 1:4], type = "response") + xy_prob_prob <- predict(xy_prob_fit$fit, data = hpc[c(1, 51, 101), 1:4], type = "response") xy_prob_prob <- as_tibble(xy_prob_prob$prediction) names(xy_prob_prob) <- paste0(".pred_", names(xy_prob_prob)) expect_equal( xy_prob_prob, - predict(xy_prob_fit, new_data = iris[c(1, 51, 101), 1:4], type = "prob") + predict(xy_prob_fit, new_data = hpc[c(1, 51, 101), 1:4], type = "prob") ) }) diff --git a/tests/testthat/test_rand_forest_spark.R b/tests/testthat/test_rand_forest_spark.R index ad62f39e9..bc22fafde 100644 --- a/tests/testthat/test_rand_forest_spark.R +++ b/tests/testthat/test_rand_forest_spark.R @@ -6,7 +6,7 @@ library(dplyr) context("random forest execution with spark") source(test_path("helper-objects.R")) - +hpc <- hpc_data[1:150, c(2:5, 8)] # ------------------------------------------------------------------------------ @@ -20,8 +20,8 @@ test_that('spark execution', { skip_if(inherits(sc, "try-error")) - iris_rf_tr <- copy_to(sc, iris[-(1:4), ], "iris_rf_tr", overwrite = TRUE) - iris_rf_te <- copy_to(sc, iris[ 1:4 , -1], "iris_rf_te", overwrite = TRUE) + hpc_rf_tr <- copy_to(sc, hpc[-(1:4), ], "hpc_rf_tr", overwrite = TRUE) + hpc_rf_te <- copy_to(sc, hpc[ 1:4 , -1], "hpc_rf_te", overwrite = TRUE) # ---------------------------------------------------------------------------- @@ -32,7 +32,7 @@ test_that('spark execution', { set_engine("spark", seed = 12), control = ctrl, Sepal_Length ~ ., - data = iris_rf_tr + data = hpc_rf_tr ), regexp = NA ) @@ -45,28 +45,28 @@ test_that('spark execution', { set_engine("spark", seed = 12), control = ctrl, Sepal_Length ~ ., - data = iris_rf_tr + data = hpc_rf_tr ), regexp = NA ) expect_error( - spark_reg_pred <- predict(spark_reg_fit, iris_rf_te), + spark_reg_pred <- predict(spark_reg_fit, hpc_rf_te), regexp = NA ) expect_error( - spark_reg_pred_num <- predict(spark_reg_fit, iris_rf_te), + spark_reg_pred_num <- predict(spark_reg_fit, hpc_rf_te), regexp = NA ) expect_error( - spark_reg_dup <- predict(spark_reg_fit_dup, iris_rf_te), + spark_reg_dup <- predict(spark_reg_fit_dup, hpc_rf_te), regexp = NA ) expect_error( - spark_reg_num_dup <- predict(spark_reg_fit_dup, iris_rf_te), + spark_reg_num_dup <- predict(spark_reg_fit_dup, hpc_rf_te), regexp = NA ) diff --git a/tests/testthat/test_svm_liquidsvm.R b/tests/testthat/test_svm_liquidsvm.R index 240351551..98306ff15 100644 --- a/tests/testthat/test_svm_liquidsvm.R +++ b/tests/testthat/test_svm_liquidsvm.R @@ -3,6 +3,9 @@ library(parsnip) library(rlang) library(tibble) +source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] + # ------------------------------------------------------------------------------ test_that('primary arguments', { @@ -103,8 +106,8 @@ test_that('svm rbf regression', { fit_xy( reg_mod, control = ctrl, - x = iris[, 2:4], - y = iris$Sepal.Length + x = hpc[, 2:4], + y = hpc$compounds ), regexp = NA ) @@ -114,8 +117,8 @@ test_that('svm rbf regression', { expect_error( fit( reg_mod, - Sepal.Length ~ ., - data = iris[, -5], + compounds ~ ., + data = hpc[, -5], control = ctrl ), regexp = NA @@ -133,8 +136,8 @@ test_that('svm rbf regression prediction', { reg_form <- fit( object = reg_mod, - formula = Sepal.Length ~ ., - data = iris[, -5], + formula = compounds ~ ., + data = hpc[, -5], control = ctrl ) ) @@ -143,8 +146,8 @@ test_that('svm rbf regression prediction', { reg_xy_form <- fit_xy( object = reg_mod, - x = iris[, 2:4], - y = iris$Sepal.Length, + x = hpc[, 2:4], + y = hpc$compounds, control = ctrl ) ) @@ -153,8 +156,8 @@ test_that('svm rbf regression prediction', { expect_warning( liquidSVM_form <- liquidSVM::svm( - x = Sepal.Length ~ ., - y = iris[, -5], + x = compounds ~ ., + y = hpc[, -5], gammas = .1, lambdas = 0.25, folds = 1, @@ -165,8 +168,8 @@ test_that('svm rbf regression prediction', { expect_warning( liquidSVM_xy_form <- liquidSVM::svm( - x = iris[, 2:4], - y = iris$Sepal.Length, + x = hpc[, 2:4], + y = hpc$compounds, gammas = .1, lambdas = 0.25, folds = 1, @@ -179,8 +182,8 @@ test_that('svm rbf regression prediction', { liquidSVM::getSolution(liquidSVM_xy_form)[c("coeff", "sv")]) # check predictions for liquidSVM formula and liquidSVM xy interfaces - liquidSVM_form_preds <- predict(liquidSVM_form, iris[1:3, 2:4]) - liquidSVM_form_xy_preds <- predict(liquidSVM_xy_form, iris[1:3, 2:4]) + liquidSVM_form_preds <- predict(liquidSVM_form, hpc[1:3, 2:4]) + liquidSVM_form_xy_preds <- predict(liquidSVM_xy_form, hpc[1:3, 2:4]) expect_equal(liquidSVM_form_preds, liquidSVM_form_xy_preds) # check predictions for parsnip formula and liquidSVM formula interfaces @@ -189,7 +192,7 @@ test_that('svm rbf regression prediction', { list(.pred = liquidSVM_form_preds), row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame")) - parsnip_pred <- predict(reg_form, iris[1:3, 2:4]) + parsnip_pred <- predict(reg_form, hpc[1:3, 2:4]) expect_equal(as.data.frame(liquidSVM_pred), as.data.frame(parsnip_pred)) # check that coeffs are equal for formula methods called via parsnip and liquidSVM @@ -201,7 +204,7 @@ test_that('svm rbf regression prediction', { liquidSVM::getSolution(reg_xy_form$fit)[c("coeff", "sv")]) # check predictions are equal for parsnip xy and liquidSVM xy methods - parsnip_xy_pred <- predict(reg_xy_form, iris[1:3, -c(1, 5)]) + parsnip_xy_pred <- predict(reg_xy_form, hpc[1:3, -c(1, 5)]) expect_equal(as.data.frame(liquidSVM_pred), as.data.frame(parsnip_xy_pred)) }) diff --git a/tests/testthat/test_svm_poly.R b/tests/testthat/test_svm_poly.R index cf1184aef..2e451b232 100644 --- a/tests/testthat/test_svm_poly.R +++ b/tests/testthat/test_svm_poly.R @@ -7,6 +7,8 @@ library(tibble) context("RBF SVM") source("helpers.R") +source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] # ------------------------------------------------------------------------------ @@ -131,8 +133,8 @@ test_that('svm poly regression', { res <- fit_xy( reg_mod, control = ctrl, - x = iris[,2:4], - y = iris$Sepal.Length + x = hpc[,2:4], + y = hpc$compounds ), regexp = NA ) @@ -144,8 +146,8 @@ test_that('svm poly regression', { expect_error( fit( reg_mod, - Sepal.Length ~ ., - data = iris[, -5], + compounds ~ ., + data = hpc[, -5], control = ctrl ), regexp = NA @@ -161,13 +163,13 @@ test_that('svm poly regression prediction', { reg_form <- fit( reg_mod, - Sepal.Length ~ ., - data = iris[, -5], + compounds ~ ., + data = hpc[, -5], control = ctrl ) # kern_pred <- - # predict(reg_form$fit, iris[1:3, -c(1, 5)]) %>% + # predict(reg_form$fit, hpc[1:3, -c(1, 5)]) %>% # as_tibble() %>% # setNames(".pred") kern_pred <- @@ -178,20 +180,20 @@ test_that('svm poly regression prediction', { class = c("tbl_df", "tbl", "data.frame") ) - parsnip_pred <- predict(reg_form, iris[1:3, -c(1, 5)]) + parsnip_pred <- predict(reg_form, hpc[1:3, -c(1, 5)]) expect_equal(as.data.frame(kern_pred), as.data.frame(parsnip_pred)) reg_xy_form <- fit_xy( reg_mod, - x = iris[, 2:4], - y = iris$Sepal.Length, + x = hpc[, 2:4], + y = hpc$compounds, control = ctrl ) expect_equal(reg_form$fit@alphaindex, reg_xy_form$fit@alphaindex) - parsnip_xy_pred <- predict(reg_xy_form, iris[1:3, -c(1, 5)]) + parsnip_xy_pred <- predict(reg_xy_form, hpc[1:3, -c(1, 5)]) expect_equal(as.data.frame(kern_pred), as.data.frame(parsnip_xy_pred)) }) @@ -205,8 +207,8 @@ test_that('svm poly classification', { fit_xy( cls_mod, control = ctrl, - x = iris[, -5], - y = iris$Species + x = hpc[, -5], + y = hpc$class ), regexp = NA ) @@ -214,8 +216,8 @@ test_that('svm poly classification', { expect_error( fit( cls_mod, - Species ~ ., - data = iris, + class ~ ., + data = hpc, control = ctrl ), regexp = NA @@ -234,13 +236,13 @@ test_that('svm poly classification probabilities', { cls_form <- fit( cls_mod, - Species ~ ., - data = iris, + class ~ ., + data = hpc, control = ctrl ) # kern_class <- - # tibble(.pred_class = predict(cls_form$fit, iris[ind, -5])) + # tibble(.pred_class = predict(cls_form$fit, hpc[ind, -5])) kern_class <- structure( @@ -249,22 +251,22 @@ test_that('svm poly classification probabilities', { structure(1:3, .Label = c("setosa", "versicolor", "virginica"), class = "factor")), row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame")) - parsnip_class <- predict(cls_form, iris[ind, -5]) + parsnip_class <- predict(cls_form, hpc[ind, -5]) expect_equal(kern_class, parsnip_class) set.seed(34562) cls_xy_form <- fit_xy( cls_mod, - x = iris[, 1:4], - y = iris$Species, + x = hpc[, 1:4], + y = hpc$class, control = ctrl ) expect_equal(cls_form$fit@alphaindex, cls_xy_form$fit@alphaindex) library(kernlab) kern_probs <- - kernlab::predict(cls_form$fit, iris[ind, -5], type = "probabilities") %>% + kernlab::predict(cls_form$fit, hpc[ind, -5], type = "probabilities") %>% as_tibble() %>% setNames(c('.pred_setosa', '.pred_versicolor', '.pred_virginica')) @@ -277,9 +279,9 @@ test_that('svm poly classification probabilities', { # row.names = c(NA,-3L), # class = c("tbl_df", "tbl", "data.frame")) - parsnip_probs <- predict(cls_form, iris[ind, -5], type = "prob") + parsnip_probs <- predict(cls_form, hpc[ind, -5], type = "prob") expect_equal(as.data.frame(kern_probs), as.data.frame(parsnip_probs)) - parsnip_xy_probs <- predict(cls_xy_form, iris[ind, -5], type = "prob") + parsnip_xy_probs <- predict(cls_xy_form, hpc[ind, -5], type = "prob") expect_equal(as.data.frame(kern_probs), as.data.frame(parsnip_xy_probs)) }) diff --git a/tests/testthat/test_svm_rbf.R b/tests/testthat/test_svm_rbf.R index b3e7c7d04..a14964191 100644 --- a/tests/testthat/test_svm_rbf.R +++ b/tests/testthat/test_svm_rbf.R @@ -6,6 +6,8 @@ library(rlang) context("poly SVM") source(test_path("helpers.R")) +source(test_path("helper-objects.R")) +hpc <- hpc_data[1:150, c(2:5, 8)] # ------------------------------------------------------------------------------ @@ -110,8 +112,8 @@ test_that('svm poly regression', { res <- fit_xy( reg_mod, control = ctrl, - x = iris[,2:4], - y = iris$Sepal.Length + x = hpc[,2:4], + y = hpc$input_fields ), regexp = NA ) @@ -122,8 +124,8 @@ test_that('svm poly regression', { expect_error( fit( reg_mod, - Sepal.Length ~ ., - data = iris[, -5], + input_fields ~ ., + data = hpc[, -5], control = ctrl ), regexp = NA @@ -139,13 +141,13 @@ test_that('svm rbf regression prediction', { reg_form <- fit( reg_mod, - Sepal.Length ~ ., - data = iris[, -5], + input_fields ~ ., + data = hpc[, -5], control = ctrl ) # kern_pred <- - # predict(reg_form$fit, iris[1:3, -c(1, 5)]) %>% + # predict(reg_form$fit, hpc[1:3, -c(1, 5)]) %>% # as_tibble() %>% # setNames(".pred") kern_pred <- @@ -153,20 +155,20 @@ test_that('svm rbf regression prediction', { list(.pred = c(5.02786147259765, 4.81715220026091, 4.86817852816449)), row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame")) - parsnip_pred <- predict(reg_form, iris[1:3, -c(1, 5)]) + parsnip_pred <- predict(reg_form, hpc[1:3, -c(1, 5)]) expect_equal(as.data.frame(kern_pred), as.data.frame(parsnip_pred)) reg_xy_form <- fit_xy( reg_mod, - x = iris[, 2:4], - y = iris$Sepal.Length, + x = hpc[, 2:4], + y = hpc$input_fields, control = ctrl ) expect_equal(reg_form$fit@alphaindex, reg_xy_form$fit@alphaindex) - parsnip_xy_pred <- predict(reg_xy_form, iris[1:3, -c(1, 5)]) + parsnip_xy_pred <- predict(reg_xy_form, hpc[1:3, -c(1, 5)]) expect_equal(as.data.frame(kern_pred), as.data.frame(parsnip_xy_pred)) }) @@ -180,8 +182,8 @@ test_that('svm rbf classification', { fit_xy( cls_mod, control = ctrl, - x = iris[, -5], - y = iris$Species + x = hpc[, -5], + y = hpc$class ), regexp = NA ) @@ -189,8 +191,8 @@ test_that('svm rbf classification', { expect_error( fit( cls_mod, - Species ~ ., - data = iris, + class ~ ., + data = hpc, control = ctrl ), regexp = NA @@ -209,13 +211,13 @@ test_that('svm rbf classification probabilities', { cls_form <- fit( cls_mod, - Species ~ ., - data = iris, + class ~ ., + data = hpc, control = ctrl ) # kern_class <- - # tibble(.pred_class = predict(cls_form$fit, iris[ind, -5])) + # tibble(.pred_class = predict(cls_form$fit, hpc[ind, -5])) kern_class <- structure(list( @@ -224,22 +226,22 @@ test_that('svm rbf classification probabilities', { .Label = c("setosa", "versicolor", "virginica"), class = "factor")), row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame")) - parsnip_class <- predict(cls_form, iris[ind, -5]) + parsnip_class <- predict(cls_form, hpc[ind, -5]) expect_equal(kern_class, parsnip_class) set.seed(34562) cls_xy_form <- fit_xy( cls_mod, - x = iris[, 1:4], - y = iris$Species, + x = hpc[, 1:4], + y = hpc$class, control = ctrl ) expect_equal(cls_form$fit@alphaindex, cls_xy_form$fit@alphaindex) library(kernlab) kern_probs <- - kernlab::predict(cls_form$fit, iris[ind, -5], type = "probabilities") %>% + kernlab::predict(cls_form$fit, hpc[ind, -5], type = "probabilities") %>% as_tibble() %>% setNames(c('.pred_setosa', '.pred_versicolor', '.pred_virginica')) @@ -251,10 +253,10 @@ test_that('svm rbf classification probabilities', { # .pred_virginica = c(0.00640936947697121, 0.625112509213187, 0.976312878783783)), # row.names = c(NA,-3L), class = c("tbl_df", "tbl", "data.frame")) - parsnip_probs <- predict(cls_form, iris[ind, -5], type = "prob") + parsnip_probs <- predict(cls_form, hpc[ind, -5], type = "prob") expect_equal(as.data.frame(kern_probs), as.data.frame(parsnip_probs)) - parsnip_xy_probs <- predict(cls_xy_form, iris[ind, -5], type = "prob") + parsnip_xy_probs <- predict(cls_xy_form, hpc[ind, -5], type = "prob") expect_equal(as.data.frame(kern_probs), as.data.frame(parsnip_xy_probs)) }) From a41e1c0ec1906e67a67c30679ede445369da8f88 Mon Sep 17 00:00:00 2001 From: "Simon P. Couch" Date: Sun, 21 Jun 2020 20:37:15 -0700 Subject: [PATCH 2/7] fix descriptor tests There are 4 factor levels in hpc_data$class, but 3 in iris$Species, which tripped up a few tests. --- tests/testthat/test_descriptors.R | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/testthat/test_descriptors.R b/tests/testthat/test_descriptors.R index 34390a989..8f60b1468 100644 --- a/tests/testthat/test_descriptors.R +++ b/tests/testthat/test_descriptors.R @@ -2,7 +2,7 @@ library(testthat) library(parsnip) source(test_path("helper-objects.R")) -hpc <- hpc_data[1:150, c(2:5, 8)] +hpc <- hpc_data[1:150, c(2:5, 8)] %>% as.data.frame() # ------------------------------------------------------------------------------ @@ -29,7 +29,7 @@ eval_descrs <- function(descrs, not = NULL) { lapply(descrs, do.call, list()) } -class_tab <- table(hpc_data$class, dnn = NULL) +class_tab <- table(hpc$class, dnn = NULL) # ------------------------------------------------------------------------------ @@ -86,12 +86,12 @@ context("Testing formula -> xy conversion") test_that("numeric y and dummy vars", { expect_equal( - template(5, 4, 150, NA, 1, hpc, hpc[-2], hpc[,"compounds"]), - eval_descrs(get_descr_form(compounds ~ ., data = hpc)) + template(6, 4, 150, NA, 1, hpc, hpc[-2], hpc[,"input_fields"]), + eval_descrs(get_descr_form(input_fields ~ ., data = hpc)) ) expect_equal( - template(2, 1, 150, NA, 1, hpc, hpc["class"], hpc[,"compounds"]), - eval_descrs(get_descr_form(compounds ~ class, data = hpc)) + template(3, 1, 150, NA, 1, hpc, hpc["class"], hpc[,"input_fields"]), + eval_descrs(get_descr_form(input_fields ~ class, data = hpc)) ) }) @@ -117,8 +117,8 @@ test_that("factor y", { eval_descrs(get_descr_form(class ~ ., data = hpc)) ) expect_equal( - template(1, 1, 150, class_tab, 0, hpc, hpc["input_fields"], hpc[,"class"]), - eval_descrs(get_descr_form(class ~ input_fields, data = hpc)) + template(1, 1, 150, class_tab, 0, hpc, hpc["compounds"], hpc[,"class"]), + eval_descrs(get_descr_form(class ~ compounds, data = hpc)) ) }) @@ -134,8 +134,8 @@ test_that("weird cases", { # So model.frame ignores - signs in a model formula so class is not removed # prior to model.matrix; otherwise this should have n_cols = 3 expect_equal( - template(3, 4, 150, NA, 1, hpc, hpc[-2], hpc[,"compounds"]), - eval_descrs(get_descr_form(compounds ~ . - class, data = hpc)) + template(3, 4, 150, NA, 1, hpc, hpc[-2], hpc[,"input_fields"]), + eval_descrs(get_descr_form(input_fields ~ . - class, data = hpc)) ) # Oy ve! Before going to model.matrix, model.frame produces a data frame @@ -173,7 +173,7 @@ test_that("numeric y and dummy vars", { eval_descrs(get_descr_xy(x = hpc[, 4:5], y = hpc[, 1:2])) ) - hpc3 <- hpc2[,c("iterations", "class", "input_fields")] + hpc3 <- hpc2[,c("num_pending", "class", "compounds")] expect_equal( template(2, 2, 150, NA, 1, hpc3, hpc[, 4:5], hpc[, 1, drop = FALSE]), eval_descrs(get_descr_xy(x = hpc[, 4:5], y = hpc[, 1, drop = FALSE])) From 5ae33c5e649a8264dfd8c21295241732c86431e2 Mon Sep 17 00:00:00 2001 From: "Simon P. Couch" Date: Sun, 21 Jun 2020 21:01:09 -0700 Subject: [PATCH 3/7] fix linear_reg tests Addresses some failures re: some subsetting by name and some by index --- tests/testthat/test_linear_reg.R | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/tests/testthat/test_linear_reg.R b/tests/testthat/test_linear_reg.R index 05cee1854..d2574988d 100644 --- a/tests/testthat/test_linear_reg.R +++ b/tests/testthat/test_linear_reg.R @@ -209,7 +209,7 @@ test_that('bad input', { # ------------------------------------------------------------------------------ -num_pred <- names(hpc)[1:3] +num_pred <- c("input_fields", "num_pending", "iterations") hpc_bad_form <- as.formula(class ~ term) hpc_basic <- linear_reg() %>% set_engine("lm") @@ -279,19 +279,18 @@ test_that('lm execution', { }) test_that('lm prediction', { - uni_lm <- lm(input_fields ~ compounds + iterations + num_pending, data = hpc) + uni_lm <- lm(compounds ~ input_fields + num_pending + iterations, data = hpc) uni_pred <- unname(predict(uni_lm, newdata = hpc[1:5, ])) - inl_lm <- lm(input_fields ~ log(compounds) + class, data = hpc) + inl_lm <- lm(compounds ~ log(input_fields) + class, data = hpc) inl_pred <- unname(predict(inl_lm, newdata = hpc[1:5, ])) - mv_lm <- lm(cbind(compounds, iterations) ~ ., data = hpc) + mv_lm <- lm(cbind(input_fields, num_pending) ~ ., data = hpc) mv_pred <- as_tibble(predict(mv_lm, newdata = hpc[1:5, ])) - names(mv_pred) <- c(".pred_compounds", ".pred_iterations") - + names(mv_pred) <- c(".pred_input_fields", ".pred_num_pending") res_xy <- fit_xy( hpc_basic, x = hpc[, num_pred], - y = hpc$input_fields, + y = hpc$compounds, control = ctrl ) @@ -299,23 +298,25 @@ test_that('lm prediction', { res_form <- fit( hpc_basic, - input_fields ~ log(compounds) + class, + compounds ~ log(input_fields) + class, data = hpc, control = ctrl ) + expect_equal(inl_pred, predict(res_form, hpc[1:5, ])$.pred) res_mv <- fit( hpc_basic, - cbind(compounds, iterations) ~ ., + cbind(input_fields, num_pending) ~ ., data = hpc, control = ctrl ) + expect_equal(mv_pred, predict(res_mv, hpc[1:5,])) }) test_that('lm intervals', { - stats_lm <- lm(input_fields ~ compounds + iterations + num_pending, + stats_lm <- lm(compounds ~ input_fields + iterations + num_pending, data = hpc) confidence_lm <- predict(stats_lm, newdata = hpc[1:5, ], level = 0.93, interval = "confidence") @@ -325,7 +326,7 @@ test_that('lm intervals', { res_xy <- fit_xy( linear_reg() %>% set_engine("lm"), x = hpc[, num_pred], - y = hpc$input_fields, + y = hpc$compounds, control = ctrl ) From a466fc4e909ef8bcdf0a53661a90dd39c7b3d356 Mon Sep 17 00:00:00 2001 From: "Simon P. Couch" Date: Mon, 22 Jun 2020 08:05:04 -0700 Subject: [PATCH 4/7] fix svm tests The fourth factor level in hpc_data was preserved in some tests. Also, some manually entered predictions needed to be rewritten. --- tests/testthat/test_svm_poly.R | 50 +++++++++++------------ tests/testthat/test_svm_rbf.R | 72 +++++++++++++++++----------------- 2 files changed, 60 insertions(+), 62 deletions(-) diff --git a/tests/testthat/test_svm_poly.R b/tests/testthat/test_svm_poly.R index 2e451b232..f22caac9d 100644 --- a/tests/testthat/test_svm_poly.R +++ b/tests/testthat/test_svm_poly.R @@ -6,7 +6,7 @@ library(tibble) # ------------------------------------------------------------------------------ context("RBF SVM") -source("helpers.R") +source(test_path("helpers.R")) source(test_path("helper-objects.R")) hpc <- hpc_data[1:150, c(2:5, 8)] @@ -175,13 +175,15 @@ test_that('svm poly regression prediction', { kern_pred <- structure( list( - .pred = c(5.02154233477783, 4.71496213707127, 4.78370369917621)), + .pred = c(164.4739, 139.8284, 133.8760)), row.names = c(NA,-3L), class = c("tbl_df", "tbl", "data.frame") ) parsnip_pred <- predict(reg_form, hpc[1:3, -c(1, 5)]) - expect_equal(as.data.frame(kern_pred), as.data.frame(parsnip_pred)) + expect_equal(as.data.frame(kern_pred), + as.data.frame(parsnip_pred), + tolerance = .0001) reg_xy_form <- @@ -194,7 +196,9 @@ test_that('svm poly regression prediction', { expect_equal(reg_form$fit@alphaindex, reg_xy_form$fit@alphaindex) parsnip_xy_pred <- predict(reg_xy_form, hpc[1:3, -c(1, 5)]) - expect_equal(as.data.frame(kern_pred), as.data.frame(parsnip_xy_pred)) + expect_equal(as.data.frame(kern_pred), + as.data.frame(parsnip_xy_pred), + tolerance = .0001) }) # ------------------------------------------------------------------------------ @@ -230,58 +234,50 @@ test_that('svm poly classification probabilities', { skip_if_not_installed("kernlab") - ind <- c(1, 51, 101) + hpc_no_m <- hpc[-c(84, 85, 86, 87, 88, 109, 128),] %>% + droplevels() + + ind <- c(1, 2, 143) set.seed(34562) cls_form <- fit( cls_mod, class ~ ., - data = hpc, + data = hpc_no_m, control = ctrl ) - # kern_class <- - # tibble(.pred_class = predict(cls_form$fit, hpc[ind, -5])) + .pred_factor <- factor(c("F", "VF", "L"), levels = c("VF", "F", "L")) kern_class <- structure( list( - .pred_class = - structure(1:3, .Label = c("setosa", "versicolor", "virginica"), class = "factor")), + .pred_class = .pred_factor), row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame")) - parsnip_class <- predict(cls_form, hpc[ind, -5]) + parsnip_class <- predict(cls_form, hpc_no_m[ind, -5]) expect_equal(kern_class, parsnip_class) set.seed(34562) cls_xy_form <- fit_xy( cls_mod, - x = hpc[, 1:4], - y = hpc$class, + x = hpc_no_m[, 1:4], + y = hpc_no_m$class, control = ctrl ) expect_equal(cls_form$fit@alphaindex, cls_xy_form$fit@alphaindex) library(kernlab) kern_probs <- - kernlab::predict(cls_form$fit, hpc[ind, -5], type = "probabilities") %>% + kernlab::predict(cls_form$fit, hpc_no_m[ind, -5], type = "probabilities") %>% as_tibble() %>% - setNames(c('.pred_setosa', '.pred_versicolor', '.pred_virginica')) - - # kern_probs <- - # structure( - # list( - # .pred_setosa = c(0.982990083267231, 0.0167077303224448, 0.00930879923686657), - # .pred_versicolor = c(0.00417116710624842, 0.946131931665357, 0.0015524073332013), - # .pred_virginica = c(0.0128387496265202, 0.0371603380121978, 0.989138793429932)), - # row.names = c(NA,-3L), - # class = c("tbl_df", "tbl", "data.frame")) - - parsnip_probs <- predict(cls_form, hpc[ind, -5], type = "prob") + setNames(c('.pred_VF', '.pred_F', '.pred_L')) + + parsnip_probs <- predict(cls_form, hpc_no_m[ind, -5], type = "prob") expect_equal(as.data.frame(kern_probs), as.data.frame(parsnip_probs)) - parsnip_xy_probs <- predict(cls_xy_form, hpc[ind, -5], type = "prob") + parsnip_xy_probs <- predict(cls_xy_form, hpc_no_m[ind, -5], type = "prob") expect_equal(as.data.frame(kern_probs), as.data.frame(parsnip_xy_probs)) }) diff --git a/tests/testthat/test_svm_rbf.R b/tests/testthat/test_svm_rbf.R index a14964191..663c5d0e2 100644 --- a/tests/testthat/test_svm_rbf.R +++ b/tests/testthat/test_svm_rbf.R @@ -138,6 +138,11 @@ test_that('svm rbf regression prediction', { skip_if_not_installed("kernlab") + hpc_no_m <- hpc[-c(84, 85, 86, 87, 88, 109, 128),] %>% + droplevels() + + ind <- c(2, 1, 143) + reg_form <- fit( reg_mod, @@ -146,30 +151,30 @@ test_that('svm rbf regression prediction', { control = ctrl ) - # kern_pred <- - # predict(reg_form$fit, hpc[1:3, -c(1, 5)]) %>% - # as_tibble() %>% - # setNames(".pred") kern_pred <- structure( - list(.pred = c(5.02786147259765, 4.81715220026091, 4.86817852816449)), + list(.pred = c(131.7743, 372.0932, 902.0633)), row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame")) - parsnip_pred <- predict(reg_form, hpc[1:3, -c(1, 5)]) - expect_equal(as.data.frame(kern_pred), as.data.frame(parsnip_pred)) + parsnip_pred <- predict(reg_form, hpc[ind, -c(2, 5)]) + expect_equal(as.data.frame(kern_pred), + as.data.frame(parsnip_pred), + tolerance = .0001) reg_xy_form <- fit_xy( reg_mod, - x = hpc[, 2:4], + x = hpc[, c(1, 3, 4)], y = hpc$input_fields, control = ctrl ) expect_equal(reg_form$fit@alphaindex, reg_xy_form$fit@alphaindex) - parsnip_xy_pred <- predict(reg_xy_form, hpc[1:3, -c(1, 5)]) - expect_equal(as.data.frame(kern_pred), as.data.frame(parsnip_xy_pred)) + parsnip_xy_pred <- predict(reg_xy_form, hpc[ind, -c(2, 5)]) + expect_equal(as.data.frame(kern_pred), + as.data.frame(parsnip_xy_pred), + tolerance = .0001) }) # ------------------------------------------------------------------------------ @@ -178,12 +183,17 @@ test_that('svm rbf classification', { skip_if_not_installed("kernlab") + hpc_no_m <- hpc[-c(84, 85, 86, 87, 88, 109, 128),] %>% + droplevels() + + ind <- c(2, 1, 143) + expect_error( fit_xy( cls_mod, control = ctrl, - x = hpc[, -5], - y = hpc$class + x = hpc_no_m[, -5], + y = hpc_no_m$class ), regexp = NA ) @@ -192,7 +202,7 @@ test_that('svm rbf classification', { fit( cls_mod, class ~ ., - data = hpc, + data = hpc_no_m, control = ctrl ), regexp = NA @@ -205,58 +215,50 @@ test_that('svm rbf classification probabilities', { skip_if_not_installed("kernlab") - ind <- c(1, 51, 101) + hpc_no_m <- hpc[-c(84, 85, 86, 87, 88, 109, 128),] %>% + droplevels() + + ind <- c(4, 55, 143) set.seed(34562) cls_form <- fit( cls_mod, class ~ ., - data = hpc, + data = hpc_no_m, control = ctrl ) - # kern_class <- - # tibble(.pred_class = predict(cls_form$fit, hpc[ind, -5])) - kern_class <- structure(list( .pred_class = structure( - c(1L, 3L, 3L), - .Label = c("setosa", "versicolor", "virginica"), class = "factor")), + c(1L, 1L, 3L), + .Label = c("VF", "F", "L"), class = "factor")), row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame")) - parsnip_class <- predict(cls_form, hpc[ind, -5]) + parsnip_class <- predict(cls_form, hpc_no_m[ind, -5]) expect_equal(kern_class, parsnip_class) set.seed(34562) cls_xy_form <- fit_xy( cls_mod, - x = hpc[, 1:4], - y = hpc$class, + x = hpc_no_m[, 1:4], + y = hpc_no_m$class, control = ctrl ) expect_equal(cls_form$fit@alphaindex, cls_xy_form$fit@alphaindex) library(kernlab) kern_probs <- - kernlab::predict(cls_form$fit, hpc[ind, -5], type = "probabilities") %>% + kernlab::predict(cls_form$fit, hpc_no_m[ind, -5], type = "probabilities") %>% as_tibble() %>% - setNames(c('.pred_setosa', '.pred_versicolor', '.pred_virginica')) - - # kern_probs <- - # structure( - # list( - # .pred_setosa = c(0.985403715135807, 0.0158818274678279, 0.00633995479908973), - # .pred_versicolor = c(0.00818691538722139, 0.359005663318986, 0.0173471664171275), - # .pred_virginica = c(0.00640936947697121, 0.625112509213187, 0.976312878783783)), - # row.names = c(NA,-3L), class = c("tbl_df", "tbl", "data.frame")) + setNames(c('.pred_VF', '.pred_F', '.pred_L')) - parsnip_probs <- predict(cls_form, hpc[ind, -5], type = "prob") + parsnip_probs <- predict(cls_form, hpc_no_m[ind, -5], type = "prob") expect_equal(as.data.frame(kern_probs), as.data.frame(parsnip_probs)) - parsnip_xy_probs <- predict(cls_xy_form, hpc[ind, -5], type = "prob") + parsnip_xy_probs <- predict(cls_xy_form, hpc_no_m[ind, -5], type = "prob") expect_equal(as.data.frame(kern_probs), as.data.frame(parsnip_xy_probs)) }) From 35ec46e8bd9a5e0580a9f75b1885ec6275fd53f2 Mon Sep 17 00:00:00 2001 From: "Simon P. Couch" Date: Mon, 22 Jun 2020 08:54:29 -0700 Subject: [PATCH 5/7] fix remaining test failures Some more manual data entry and # classes in outcome variable issues. --- tests/testthat/test_boost_tree_xgboost.R | 4 ++-- tests/testthat/test_mars.R | 22 +++++++++++++--------- tests/testthat/test_predict_formats.R | 2 +- tests/testthat/test_rand_forest_ranger.R | 10 +++++----- 4 files changed, 21 insertions(+), 17 deletions(-) diff --git a/tests/testthat/test_boost_tree_xgboost.R b/tests/testthat/test_boost_tree_xgboost.R index d181843ed..d2a4f4919 100644 --- a/tests/testthat/test_boost_tree_xgboost.R +++ b/tests/testthat/test_boost_tree_xgboost.R @@ -65,7 +65,7 @@ test_that('xgboost classification prediction', { ) xy_pred <- predict(xy_fit$fit, newdata = xgb.DMatrix(data = as.matrix(hpc[1:8, num_pred])), type = "class") - xy_pred <- matrix(xy_pred, ncol = 3, byrow = TRUE) + xy_pred <- matrix(xy_pred, ncol = 4, byrow = TRUE) xy_pred <- factor(levels(hpc$class)[apply(xy_pred, 1, which.max)], levels = levels(hpc$class)) expect_equal(xy_pred, predict(xy_fit, new_data = hpc[1:8, num_pred], type = "class")$.pred_class) @@ -77,7 +77,7 @@ test_that('xgboost classification prediction', { ) form_pred <- predict(form_fit$fit, newdata = xgb.DMatrix(data = as.matrix(hpc[1:8, num_pred])), type = "class") - form_pred <- matrix(form_pred, ncol = 3, byrow = TRUE) + form_pred <- matrix(form_pred, ncol = 4, byrow = TRUE) form_pred <- factor(levels(hpc$class)[apply(form_pred, 1, which.max)], levels = levels(hpc$class)) expect_equal(form_pred, predict(form_fit, new_data = hpc[1:8, num_pred], type = "class")$.pred_class) }) diff --git a/tests/testthat/test_mars.R b/tests/testthat/test_mars.R index a6361bbaf..a95f95d64 100644 --- a/tests/testthat/test_mars.R +++ b/tests/testthat/test_mars.R @@ -188,17 +188,21 @@ test_that('mars execution', { test_that('mars prediction', { skip_if_not_installed("earth") - uni_pred <- c(5.02371514510488, 4.70502120747471, 4.78973285129011, 4.81152592623742, - 5.08745393263092) - inl_pred <- c(5.07584328502019, 4.64927636051174, 4.82786784324037, 4.74001260567429, - 5.15379794835255) + uni_pred <- c(30.1466666666667, 30.1466666666667, 30.1466666666667, + 30.1466666666667, 30.1466666666667) + inl_pred <- c(538.268789262046, 141.024903718634, 141.024903718634, + 141.024903718634, 141.024903718634) mv_pred <- structure( - list(Sepal.Width = - c(3.4874092243636, 3.34173526636919, 3.17647644756747, 3.14280919018489, 3.41457224536639), - Petal.Width = - c(0.237414046784062, 0.221455118452782, 0.18348960240454, 0.219523313672823, 0.229434582618422 - )), class = "data.frame", row.names = c(NA, -5L)) + list(compounds = + c(371.334864384913, 129.475162245595, 256.094366313268, + 129.475162245595, 129.475162245595), + input_fields = + c(430.476046435458, 158.833790342308, 218.07635084308, + 158.833790342308, 158.833790342308) + ), + class = "data.frame", row.names = c(NA, -5L) + ) res_xy <- fit_xy( hpc_basic, diff --git a/tests/testthat/test_predict_formats.R b/tests/testthat/test_predict_formats.R index 9b9b5430a..6fb7a709b 100644 --- a/tests/testthat/test_predict_formats.R +++ b/tests/testthat/test_predict_formats.R @@ -13,7 +13,7 @@ context("check predict output structures") lm_fit <- linear_reg(mode = "regression") %>% set_engine("lm") %>% - fit(class ~ ., data = hpc) + fit(compounds ~ ., data = hpc) class_dat <- airquality[complete.cases(airquality),] class_dat$Ozone <- factor(ifelse(class_dat$Ozone >= 31, "high", "low")) diff --git a/tests/testthat/test_rand_forest_ranger.R b/tests/testthat/test_rand_forest_ranger.R index 6d96e2490..0762d56d5 100644 --- a/tests/testthat/test_rand_forest_ranger.R +++ b/tests/testthat/test_rand_forest_ranger.R @@ -310,11 +310,11 @@ test_that('additional descriptor tests', { ## - exp_wts <- quo(c(min(.lvls()), 20, 10)) + exp_wts <- quo(c(min(.lvls()), 20, 10, 1)) descr_other_xy <- fit_xy( rand_forest(mode = "classification", mtry = 2) %>% - set_engine("ranger", class.weights = c(min(.lvls()), 20, 10)), + set_engine("ranger", class.weights = c(min(.lvls()), 20, 10, 1)), x = hpc[, 1:4], y = hpc$class, control = ctrl @@ -324,7 +324,7 @@ test_that('additional descriptor tests', { descr_other_f <- fit( rand_forest(mode = "classification", mtry = 2) %>% - set_engine("ranger", class.weights = c(min(.lvls()), 20, 10)), + set_engine("ranger", class.weights = c(min(.lvls()), 20, 10, 1)), class ~ ., data = hpc, control = ctrl ) @@ -333,7 +333,7 @@ test_that('additional descriptor tests', { descr_other_xy <- fit_xy( rand_forest(mode = "classification", mtry = 2) %>% - set_engine("ranger", class.weights = c(min(.lvls()), 20, 10)), + set_engine("ranger", class.weights = c(min(.lvls()), 20, 10, 1)), x = hpc[, 1:4], y = hpc$class, control = ctrl @@ -343,7 +343,7 @@ test_that('additional descriptor tests', { descr_other_f <- fit( rand_forest(mode = "classification", mtry = 2) %>% - set_engine("ranger", class.weights = c(min(.lvls()), 20, 10)), + set_engine("ranger", class.weights = c(min(.lvls()), 20, 10, 1)), class ~ ., data = hpc, control = ctrl ) From 6dc72df25d6820a4922368b164523b39eb3ee866 Mon Sep 17 00:00:00 2001 From: "Simon P. Couch" Date: Mon, 22 Jun 2020 09:11:27 -0700 Subject: [PATCH 6/7] a couple more test fixes --- tests/testthat/test_convert_data.R | 2 +- tests/testthat/test_nullmodel.R | 20 ++++++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/tests/testthat/test_convert_data.R b/tests/testthat/test_convert_data.R index 48c0cf8fa..6a711d3b2 100644 --- a/tests/testthat/test_convert_data.R +++ b/tests/testthat/test_convert_data.R @@ -449,7 +449,7 @@ test_that("1 col matrix x, 1 col matrix y", { test_that("matrix x, factor y", { observed <- parsnip:::convert_xy_to_form_fit(as.matrix(hpc[, -5]), hpc$class) - expected <- hpc + expected <- as.data.frame(hpc) names(expected)[5] <- "..y" expect_equal(expected, observed$data) expect_equal(formula("..y ~ ."), observed$formula) diff --git a/tests/testthat/test_nullmodel.R b/tests/testthat/test_nullmodel.R index db499d54c..aaf77532a 100644 --- a/tests/testthat/test_nullmodel.R +++ b/tests/testthat/test_nullmodel.R @@ -4,9 +4,9 @@ library(rlang) library(tibble) context("test-nullmodel") -source("helpers.R") +source(test_path("helpers.R")) source(test_path("helper-objects.R")) -hpc <- hpc_data[1:150, c(2:5, 8)] +hpc <- hpc_data[1:150, c(2:5, 8)] %>% as.data.frame() test_that('primary arguments', { basic <- null_model(mode = "regression") @@ -44,7 +44,7 @@ test_that('bad input', { # ------------------------------------------------------------------------------ -num_pred <- names(phc)[1:3] +num_pred <- names(hpc)[1:3] hpc_bad_form <- as.formula(class ~ term) hpc_basic <- null_model(mode = "regression") %>% set_engine("parsnip") @@ -92,8 +92,8 @@ test_that('nullmodel execution', { test_that('nullmodel prediction', { - uni_pred <- tibble(.pred = rep(3.758, 5)) - inl_pred <- rep(3.758, 5) + uni_pred <- tibble(.pred = rep(30.1, 5)) + inl_pred <- rep(30.1, 5) mw_pred <- tibble(gear = rep(3.6875, 5), carb = rep(2.8125, 5)) @@ -103,14 +103,18 @@ test_that('nullmodel prediction', { y = hpc$num_pending ) - expect_equal(uni_pred, predict(res_xy, new_data = hpc[1:5, num_pred])) + expect_equal(uni_pred, + predict(res_xy, new_data = hpc[1:5, num_pred]), + tolerance = .01) res_form <- fit( hpc_basic, num_pending ~ log(compounds) + class, - data = hpc_basic + data = hpc ) - expect_equal(inl_pred, predict(res_form, hpc[1:5, ])$.pred) + expect_equal(inl_pred, + predict(res_form, hpc[1:5, ])$.pred, + tolerance = .01) # Multivariate y res <- fit( From 1393f96124c374f2803b630cb1bb37e44c29a22a Mon Sep 17 00:00:00 2001 From: "Simon P. Couch" Date: Mon, 22 Jun 2020 09:12:04 -0700 Subject: [PATCH 7/7] improve descriptors examples Initially switched from iris to mtcars, but the Orange data is a bit more interesting / illustrative. --- R/descriptors.R | 30 +++++++++++++++--------------- man/descriptors.Rd | 30 +++++++++++++++--------------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/R/descriptors.R b/R/descriptors.R index dc64f0737..b6b0da844 100644 --- a/R/descriptors.R +++ b/R/descriptors.R @@ -26,28 +26,28 @@ #' column, `..y`. #' } #' -#' For example, if you use the model formula `mpg ~ .` with the `mtcars` -#' data, the values would be +#' For example, if you use the model formula `circumference ~ .` with the +#' built-in `Orange` data, the values would be #' \preformatted{ -#' .preds() = 10 (the 10 columns in `mtcars`) -#' .cols() = 10 (10 numeric columns + 0 from dummy variables) -#' .obs() = 32 +#' .preds() = 2 (the 2 remaining columns in `Orange`) +#' .cols() = 5 (1 numeric column + 4 from Tree dummy variables) +#' .obs() = 35 #' .lvls() = NA (no factor outcome) -#' .facts() = 0 (no factor outcome) -#' .y() = (mpg as a vector) -#' .x() = (The other 10 columns as a data frame) +#' .facts() = 1 (the Tree predictor) +#' .y() = (circumference as a vector) +#' .x() = (The other 2 columns as a data frame) #' .dat() = (The full data set) #' } #' -#' If the formula `as.character(cyl) ~ .` where used: +#' If the formula `Tree ~ .` were used: #' \preformatted{ -#' .preds() = 10 (the 10 numeric columns in `mtcars`) -#' .cols() = 10 (same) -#' .obs() = 32 -#' .lvls() = c("4" = 11, "6" = 7, "8" = 14) +#' .preds() = 2 (the 2 numeric columns in `Orange`) +#' .cols() = 2 (same) +#' .obs() = 35 +#' .lvls() = c("1" = 7, "2" = 7, "3" = 7, "4" = 7, "5" = 7) #' .facts() = 0 -#' .y() = (as.character(cyl) as a vector) -#' .x() = (The other 10 columns as a data frame) +#' .y() = (Tree as a vector) +#' .x() = (The other 2 columns as a data frame) #' .dat() = (The full data set) #' } #' diff --git a/man/descriptors.Rd b/man/descriptors.Rd index b53ec6459..4ad7ccf18 100644 --- a/man/descriptors.Rd +++ b/man/descriptors.Rd @@ -55,28 +55,28 @@ outcomes. If \code{fit_xy()} was used, the outcomes are attached as the column, \code{..y}. } -For example, if you use the model formula \code{mpg ~ .} with the \code{mtcars} -data, the values would be +For example, if you use the model formula \code{circumference ~ .} with the +built-in \code{Orange} data, the values would be \preformatted{ - .preds() = 10 (the 10 columns in `mtcars`) - .cols() = 10 (10 numeric columns + 0 from dummy variables) - .obs() = 32 + .preds() = 2 (the 2 remaining columns in `Orange`) + .cols() = 5 (1 numeric column + 4 from Tree dummy variables) + .obs() = 35 .lvls() = NA (no factor outcome) - .facts() = 0 (no factor outcome) - .y() = (mpg as a vector) - .x() = (The other 10 columns as a data frame) + .facts() = 1 (the Tree predictor) + .y() = (circumference as a vector) + .x() = (The other 2 columns as a data frame) .dat() = (The full data set) } -If the formula \code{as.character(cyl) ~ .} where used: +If the formula \code{Tree ~ .} were used: \preformatted{ - .preds() = 10 (the 10 numeric columns in `mtcars`) - .cols() = 10 (same) - .obs() = 32 - .lvls() = c("4" = 11, "6" = 7, "8" = 14) + .preds() = 2 (the 2 numeric columns in `Orange`) + .cols() = 2 (same) + .obs() = 35 + .lvls() = c("1" = 7, "2" = 7, "3" = 7, "4" = 7, "5" = 7) .facts() = 0 - .y() = (as.character(cyl) as a vector) - .x() = (The other 10 columns as a data frame) + .y() = (Tree as a vector) + .x() = (The other 2 columns as a data frame) .dat() = (The full data set) }