diff --git a/.Rbuildignore b/.Rbuildignore index 387dedcff..0e14913de 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -9,4 +9,5 @@ ^R/README\.md$ derby.log ^logs$ -^tests/testthat/logs$ \ No newline at end of file +^tests/testthat/logs$ +^revdep$ \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 0ba99ba55..8de5c838c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,11 +17,11 @@ VignetteBuilder: knitr Depends: R (>= 2.10) Imports: - dplyr, + dplyr (>= 0.8.0.1), rlang (>= 0.3.1), purrr, utils, - tibble, + tibble (>= 2.1.1), generics, glue, magrittr, @@ -39,7 +39,7 @@ Suggests: xgboost, covr, C50, - sparklyr, + sparklyr (>= 1.0.0), earth, glmnet, kernlab, diff --git a/NEWS.md b/NEWS.md index 2220c1c5f..55a6c45de 100644 --- a/NEWS.md +++ b/NEWS.md @@ -21,6 +21,7 @@ that are actually varying). * The prediction modules (e.g. `predict_class`, `predict_numeric`, etc) were de-exported. These were internal functions that were not to be used by the users and the users were using them. + * An event time data set (`check_times`) was included that is the time (in seconds) to run `R CMD check` using the "r-devel-windows-ix86+x86_64` flavor. Packages that errored are censored. ## Bug Fixes diff --git a/R/aaa_spark_helpers.R b/R/aaa_spark_helpers.R index fe3d5b455..4d233a760 100644 --- a/R/aaa_spark_helpers.R +++ b/R/aaa_spark_helpers.R @@ -4,9 +4,9 @@ format_spark_probs <- function(results, object) { results <- dplyr::select(results, starts_with("probability_")) p <- ncol(results) - lvl <- paste0("probability_", 0:(p - 1)) - names(lvl) <- paste0("pred_", object$fit$.index_labels) - results %>% rename(!!!syms(lvl)) + lvl <- colnames(results) + names(lvl) <- paste0("pred_", object$fit$index_labels) + results %>% dplyr::rename(!!!syms(lvl)) } format_spark_class <- function(results, object) { diff --git a/R/data.R b/R/data.R index 173a82125..25ee1cb60 100644 --- a/R/data.R +++ b/R/data.R @@ -44,3 +44,62 @@ NULL #' data(wa_churn) #' str(wa_churn) NULL + +#' Execution Time Data +#' +#' These data were collected from the CRAN web page for 13,626 R +#' packages. The time to complete the standard package checking +#' routine was collected In some cases, the package checking +#' process is stopped due to errors and these data are treated as +#' censored. It is less than 1 percent. +#' +#' As predictors, the associated package source code were +#' downloaded and parsed to create predictors, including +#' +#' * `authors`: The number of authors in the author field. +#' * `imports`: The number of imported packages. +#' * `suggests`: The number of packages suggested. +#' * `depends`: The number of hard dependencies. +#' * `Roxygen`: a binary indicator for whether Roxygen was used +#' for documentation. +#' * `gh`: a binary indicator for whether the URL field contained +#' a GitHub link. +#' * `rforge`: a binary indicator for whether the URL field +#' contained a link to R-forge. +#' * `descr`: The number of characters (or, in some cases, bytes) +#' in the description field. +#' * `r_count`: The number of R files in the R directory. +#' * `r_size`: The total disk size of the R files. +#' * `ns_import`: Estimated number of imported functions or methods. +#' * `ns_export`: Estimated number of exported functions or methods. +#' * `s3_methods`: Estimated number of S3 methods. +#' * `s4_methods`: Estimated number of S4 methods. +#' * `doc_count`: How many Rmd or Rnw files in the vignettes +#' directory. +#' * `doc_size`: The disk size of the Rmd or Rnw files. +#' * `src_count`: The number of files in the `src` directory. +#' * `src_size`: The size on disk of files in the `src` directory. +#' * `data_count` The number of files in the `data` directory. +#' * `data_size`: The size on disk of files in the `data` directory. +#' * `testthat_count`: The number of files in the `testthat` +#' directory. +#' * `testthat_size`: The size on disk of files in the `testthat` +#' directory. +#' * `check_time`: The time (in seconds) to run `R CMD check` +#' using the "r-devel-windows-ix86+x86_64` flavor. +#' * `status`: An indicator for whether the tests completed. +#' +#' Data were collected on 2019-01-20. +#' @name check_times +#' @aliases check_times +#' @docType data +#' @return \item{check_times}{a data frame} +#' +#' @source CRAN +#' +#' @keywords datasets +#' @examples +#' data(check_times) +#' str(check_times) +NULL + diff --git a/R/predict.R b/R/predict.R index d0dc1f735..7cea3e0b7 100644 --- a/R/predict.R +++ b/R/predict.R @@ -245,7 +245,6 @@ prepare_data <- function(object, new_data) { #' @return A tibble with the same number of rows as the data being predicted. #' Mostly likely, there is a list-column named `.pred` that is a tibble with #' multiple rows per sub-model. -#' @keywords internal #' @export multi_predict <- function(object, ...) { if (inherits(object$fit, "try-error")) { @@ -255,7 +254,6 @@ multi_predict <- function(object, ...) { UseMethod("multi_predict") } -#' @keywords internal #' @export #' @rdname multi_predict multi_predict.default <- function(object, ...) diff --git a/data/check_times.rda b/data/check_times.rda new file mode 100644 index 000000000..3c482883a Binary files /dev/null and b/data/check_times.rda differ diff --git a/data/datalist b/data/datalist index 943d73588..ac53d359b 100644 --- a/data/datalist +++ b/data/datalist @@ -1,2 +1,3 @@ lending_club: lending_club wa_churn: wa_churn +check_times: check_times diff --git a/man/check_times.Rd b/man/check_times.Rd new file mode 100644 index 000000000..e8a306e1c --- /dev/null +++ b/man/check_times.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{check_times} +\alias{check_times} +\title{Execution Time Data} +\source{ +CRAN +} +\value{ +\item{check_times}{a data frame} +} +\description{ +These data were collected from the CRAN web page for 13,626 R +packages. The time to complete the standard package checking +routine was collected In some cases, the package checking +process is stopped due to errors and these data are treated as +censored. It is less than 1 percent. +} +\details{ +As predictors, the associated package source code were +downloaded and parsed to create predictors, including +\itemize{ +\item \code{authors}: The number of authors in the author field. +\item \code{imports}: The number of imported packages. +\item \code{suggests}: The number of packages suggested. +\item \code{depends}: The number of hard dependencies. +\item \code{Roxygen}: a binary indicator for whether Roxygen was used +for documentation. +\item \code{gh}: a binary indicator for whether the URL field contained +a GitHub link. +\item \code{rforge}: a binary indicator for whether the URL field +contained a link to R-forge. +\item \code{descr}: The number of characters (or, in some cases, bytes) +in the description field. +\item \code{r_count}: The number of R files in the R directory. +\item \code{r_size}: The total disk size of the R files. +\item \code{ns_import}: Estimated number of imported functions or methods. +\item \code{ns_export}: Estimated number of exported functions or methods. +\item \code{s3_methods}: Estimated number of S3 methods. +\item \code{s4_methods}: Estimated number of S4 methods. +\item \code{doc_count}: How many Rmd or Rnw files in the vignettes +directory. +\item \code{doc_size}: The disk size of the Rmd or Rnw files. +\item \code{src_count}: The number of files in the \code{src} directory. +\item \code{src_size}: The size on disk of files in the \code{src} directory. +\item \code{data_count} The number of files in the \code{data} directory. +\item \code{data_size}: The size on disk of files in the \code{data} directory. +\item \code{testthat_count}: The number of files in the \code{testthat} +directory. +\item \code{testthat_size}: The size on disk of files in the \code{testthat} +directory. +\item \code{check_time}: The time (in seconds) to run \code{R CMD check} +using the "r-devel-windows-ix86+x86_64` flavor. +\item \code{status}: An indicator for whether the tests completed. +} + +Data were collected on 2019-01-20. +} +\examples{ +data(check_times) +str(check_times) +} +\keyword{datasets} diff --git a/man/multi_predict.Rd b/man/multi_predict.Rd index 965ec1fc3..c12d9ee7a 100644 --- a/man/multi_predict.Rd +++ b/man/multi_predict.Rd @@ -23,4 +23,3 @@ multiple rows per sub-model. \description{ For some models, predictions can be made on sub-models in the model object. } -\keyword{internal} diff --git a/tests/testthat/test_boost_tree_C50.R b/tests/testthat/test_boost_tree_C50.R index d20578d45..c3a616394 100644 --- a/tests/testthat/test_boost_tree_C50.R +++ b/tests/testthat/test_boost_tree_C50.R @@ -88,7 +88,7 @@ test_that('C5.0 prediction', { ) xy_pred <- predict(classes_xy$fit, newdata = lending_club[1:7, num_pred]) - expect_equal(xy_pred, parsnip:::predict_class(classes_xy, lending_club[1:7, num_pred])) + expect_equal(xy_pred, predict(classes_xy, lending_club[1:7, num_pred])$.pred_class) }) @@ -105,9 +105,10 @@ test_that('C5.0 probabilities', { xy_pred <- predict(classes_xy$fit, newdata = as.data.frame(lending_club[1:7, num_pred]), type = "prob") xy_pred <- as_tibble(xy_pred) - expect_equal(xy_pred, parsnip:::predict_classprob(classes_xy, lending_club[1:7, num_pred])) + names(xy_pred) <- c(".pred_bad", ".pred_good") + expect_equal(xy_pred, predict(classes_xy, lending_club[1:7, num_pred], type = "prob")) - one_row <- parsnip:::predict_classprob(classes_xy, lending_club[1, num_pred]) + one_row <- predict(classes_xy, lending_club[1, num_pred], type = "prob") expect_equal(xy_pred[1,], one_row) }) diff --git a/tests/testthat/test_boost_tree_spark.R b/tests/testthat/test_boost_tree_spark.R index 49e9892ec..0121bf4bb 100644 --- a/tests/testthat/test_boost_tree_spark.R +++ b/tests/testthat/test_boost_tree_spark.R @@ -58,7 +58,7 @@ test_that('spark execution', { ) expect_error( - spark_reg_pred_num <- parsnip:::predict_numeric(spark_reg_fit, iris_bt_te), + spark_reg_pred_num <- parsnip:::predict_numeric.model_fit(spark_reg_fit, iris_bt_te), regexp = NA ) @@ -68,7 +68,7 @@ test_that('spark execution', { ) expect_error( - spark_reg_num_dup <- parsnip:::predict_numeric(spark_reg_fit_dup, iris_bt_te), + spark_reg_num_dup <- parsnip:::predict_numeric.model_fit(spark_reg_fit_dup, iris_bt_te), regexp = NA ) @@ -124,7 +124,7 @@ test_that('spark execution', { ) expect_error( - spark_class_pred_class <- parsnip:::predict_class(spark_class_fit, churn_bt_te), + spark_class_pred_class <- parsnip:::predict_class.model_fit(spark_class_fit, churn_bt_te), regexp = NA ) @@ -134,7 +134,7 @@ test_that('spark execution', { ) expect_error( - spark_class_dup_class <- parsnip:::predict_class(spark_class_fit_dup, churn_bt_te), + spark_class_dup_class <- parsnip:::predict_class.model_fit(spark_class_fit_dup, churn_bt_te), regexp = NA ) @@ -156,7 +156,7 @@ test_that('spark execution', { ) expect_error( - spark_class_prob_classprob <- parsnip:::predict_classprob(spark_class_fit, churn_bt_te), + spark_class_prob_classprob <- parsnip:::predict_classprob.model_fit(spark_class_fit, churn_bt_te), regexp = NA ) @@ -166,7 +166,7 @@ test_that('spark execution', { ) expect_error( - spark_class_dup_classprob <- parsnip:::predict_classprob(spark_class_fit_dup, churn_bt_te), + spark_class_dup_classprob <- parsnip:::predict_classprob.model_fit(spark_class_fit_dup, churn_bt_te), regexp = NA ) diff --git a/tests/testthat/test_boost_tree_xgboost.R b/tests/testthat/test_boost_tree_xgboost.R index e740cfa12..8475707d4 100644 --- a/tests/testthat/test_boost_tree_xgboost.R +++ b/tests/testthat/test_boost_tree_xgboost.R @@ -66,7 +66,7 @@ test_that('xgboost classification prediction', { xy_pred <- predict(xy_fit$fit, newdata = xgb.DMatrix(data = as.matrix(iris[1:8, num_pred])), type = "class") xy_pred <- matrix(xy_pred, ncol = 3, byrow = TRUE) xy_pred <- factor(levels(iris$Species)[apply(xy_pred, 1, which.max)], levels = levels(iris$Species)) - expect_equal(xy_pred, parsnip:::predict_class(xy_fit, new_data = iris[1:8, num_pred])) + expect_equal(xy_pred, predict(xy_fit, new_data = iris[1:8, num_pred], type = "class")$.pred_class) form_fit <- fit( iris_xgboost, @@ -78,7 +78,7 @@ test_that('xgboost classification prediction', { form_pred <- predict(form_fit$fit, newdata = xgb.DMatrix(data = as.matrix(iris[1:8, num_pred])), type = "class") form_pred <- matrix(form_pred, ncol = 3, byrow = TRUE) form_pred <- factor(levels(iris$Species)[apply(form_pred, 1, which.max)], levels = levels(iris$Species)) - expect_equal(form_pred, parsnip:::predict_class(form_fit, new_data = iris[1:8, num_pred])) + expect_equal(form_pred, predict(form_fit, new_data = iris[1:8, num_pred], type = "class")$.pred_class) }) @@ -141,7 +141,7 @@ test_that('xgboost regression prediction', { ) xy_pred <- predict(xy_fit$fit, newdata = xgb.DMatrix(data = as.matrix(mtcars[1:8, -1]))) - expect_equal(xy_pred, parsnip:::predict_numeric(xy_fit, new_data = mtcars[1:8, -1])) + expect_equal(xy_pred, predict(xy_fit, new_data = mtcars[1:8, -1])$.pred) form_fit <- fit( car_basic, @@ -151,7 +151,7 @@ test_that('xgboost regression prediction', { ) form_pred <- predict(form_fit$fit, newdata = xgb.DMatrix(data = as.matrix(mtcars[1:8, -1]))) - expect_equal(form_pred, parsnip:::predict_numeric(form_fit, new_data = mtcars[1:8, -1])) + expect_equal(form_pred, predict(form_fit, new_data = mtcars[1:8, -1])$.pred) }) @@ -188,9 +188,9 @@ test_that('submodel prediction', { mp_res <- multi_predict(class_fit, new_data = wa_churn[1:4, vars], trees = 5, type = "prob") mp_res <- do.call("rbind", mp_res$.pred) expect_equal(mp_res[[".pred_No"]], pred_class) - + expect_error( - multi_predict(class_fit, newdata = wa_churn[1:4, vars], trees = 5, type = "prob"), + multi_predict(class_fit, newdata = wa_churn[1:4, vars], trees = 5, type = "prob"), "Did you mean" ) }) diff --git a/tests/testthat/test_linear_reg.R b/tests/testthat/test_linear_reg.R index ba5f804b0..1243d8a5d 100644 --- a/tests/testthat/test_linear_reg.R +++ b/tests/testthat/test_linear_reg.R @@ -1,6 +1,7 @@ library(testthat) library(parsnip) library(rlang) +library(tibble) # ------------------------------------------------------------------------------ @@ -260,7 +261,9 @@ test_that('lm prediction', { inl_lm <- lm(Sepal.Length ~ log(Sepal.Width) + Species, data = iris) inl_pred <- unname(predict(inl_lm, newdata = iris[1:5, ])) mv_lm <- lm(cbind(Sepal.Width, Petal.Width) ~ ., data = iris) - mv_pred <- as.data.frame(predict(mv_lm, newdata = iris[1:5, ])) + mv_pred <- as_tibble(predict(mv_lm, newdata = iris[1:5, ])) + names(mv_pred) <- c(".pred_Sepal.Width", ".pred_Petal.Width") + res_xy <- fit_xy( iris_basic, @@ -269,7 +272,7 @@ test_that('lm prediction', { control = ctrl ) - expect_equal(uni_pred, parsnip:::predict_numeric(res_xy, iris[1:5, num_pred])) + expect_equal(uni_pred, predict(res_xy, iris[1:5, num_pred])$.pred) res_form <- fit( iris_basic, @@ -277,7 +280,7 @@ test_that('lm prediction', { data = iris, control = ctrl ) - expect_equal(inl_pred, parsnip:::predict_numeric(res_form, iris[1:5, ])) + expect_equal(inl_pred, predict(res_form, iris[1:5, ])$.pred) res_mv <- fit( iris_basic, @@ -285,7 +288,7 @@ test_that('lm prediction', { data = iris, control = ctrl ) - expect_equal(mv_pred, parsnip:::predict_numeric(res_mv, iris[1:5,])) + expect_equal(mv_pred, predict(res_mv, iris[1:5,])) }) test_that('lm intervals', { diff --git a/tests/testthat/test_linear_reg_glmnet.R b/tests/testthat/test_linear_reg_glmnet.R index f03fcb38a..92506ff7f 100644 --- a/tests/testthat/test_linear_reg_glmnet.R +++ b/tests/testthat/test_linear_reg_glmnet.R @@ -1,6 +1,7 @@ library(testthat) library(parsnip) library(rlang) +library(tidyr) # ------------------------------------------------------------------------------ @@ -63,13 +64,10 @@ test_that('glmnet prediction, single lambda', { y = iris$Sepal.Length ) - uni_pred <- - predict(res_xy$fit, - newx = as.matrix(iris[1:5, num_pred]), - s = iris_basic$spec$args$penalty) - uni_pred <- unname(uni_pred[,1]) + uni_pred <- c(5.05124049139868, 4.87103404621362, 4.91028250633598, 4.9399094532023, + 5.08728178043569) - expect_equal(uni_pred, parsnip:::predict_numeric(res_xy, iris[1:5, num_pred])) + expect_equal(uni_pred, predict(res_xy, iris[1:5, num_pred])$.pred) res_form <- fit( iris_basic, @@ -78,16 +76,10 @@ test_that('glmnet prediction, single lambda', { control = ctrl ) - form_pred <- model.matrix(Sepal.Length ~ log(Sepal.Width) + Species, data = iris) - form_pred <- form_pred[1:5, -1] + form_pred <- c(5.24228948237804, 5.09448280355765, 5.15636527125752, 5.12592317615935, + 5.26930099973607) - form_pred <- - predict(res_form$fit, - newx = form_pred, - s = res_form$spec$spec$args$penalty) - form_pred <- unname(form_pred[,1]) - - expect_equal(form_pred, parsnip:::predict_numeric(res_form, iris[1:5, c("Sepal.Width", "Species")])) + expect_equal(form_pred, predict(res_form, iris[1:5,])$.pred) }) @@ -107,15 +99,38 @@ test_that('glmnet prediction, multiple lambda', { y = iris$Sepal.Length ) + # mult_pred <- + # predict(res_xy$fit, + # newx = as.matrix(iris[1:5, num_pred]), + # s = lams) + # mult_pred <- stack(as.data.frame(mult_pred)) + # mult_pred$penalty <- rep(lams, each = 5) + # mult_pred$rows <- rep(1:5, 2) + # mult_pred <- mult_pred[order(mult_pred$rows, mult_pred$penalty), ] + # mult_pred <- mult_pred[, c("penalty", "values")] + # names(mult_pred) <- c("penalty", ".pred") + # mult_pred <- tibble::as_tibble(mult_pred) mult_pred <- - predict(res_xy$fit, - newx = as.matrix(iris[1:5, num_pred]), - s = lams) - mult_pred <- stack(as.data.frame(mult_pred)) - mult_pred$lambda <- rep(lams, each = 5) - mult_pred <- mult_pred[,-2] + tibble::tribble( + ~penalty, ~.pred, + 0.01, 5.01352459498158, + 0.1, 5.05124049139868, + 0.01, 4.71767499960808, + 0.1, 4.87103404621362, + 0.01, 4.7791916685127, + 0.1, 4.91028250633598, + 0.01, 4.83366808792755, + 0.1, 4.9399094532023, + 0.01, 5.07269451405628, + 0.1, 5.08728178043569 + ) - expect_equal(mult_pred, parsnip:::predict_numeric(res_xy, iris[1:5, num_pred])) + expect_equal( + as.data.frame(mult_pred), + multi_predict(res_xy, new_data = iris[1:5, num_pred], lambda = lams) %>% + unnest() %>% + as.data.frame() + ) res_form <- fit( iris_mult, @@ -124,18 +139,42 @@ test_that('glmnet prediction, multiple lambda', { control = ctrl ) - form_mat <- model.matrix(Sepal.Length ~ log(Sepal.Width) + Species, data = iris) - form_mat <- form_mat[1:5, -1] + # form_mat <- model.matrix(Sepal.Length ~ log(Sepal.Width) + Species, data = iris) + # form_mat <- form_mat[1:5, -1] + # + # form_pred <- + # predict(res_form$fit, + # newx = form_mat, + # s = lams) + # form_pred <- stack(as.data.frame(form_pred)) + # form_pred$penalty <- rep(lams, each = 5) + # form_pred$rows <- rep(1:5, 2) + # form_pred <- form_pred[order(form_pred$rows, form_pred$penalty), ] + # form_pred <- form_pred[, c("penalty", "values")] + # names(form_pred) <- c("penalty", ".pred") + # form_pred <- tibble::as_tibble(form_pred) form_pred <- - predict(res_form$fit, - newx = form_mat, - s = lams) - form_pred <- stack(as.data.frame(form_pred)) - form_pred$lambda <- rep(lams, each = 5) - form_pred <- form_pred[,-2] + tibble::tribble( + ~penalty, ~.pred, + 0.01, 5.09237402805557, + 0.1, 5.24228948237804, + 0.01, 4.75071416991856, + 0.1, 5.09448280355765, + 0.01, 4.89375747015535, + 0.1, 5.15636527125752, + 0.01, 4.82338959520112, + 0.1, 5.12592317615935, + 0.01, 5.15481201301174, + 0.1, 5.26930099973607 + ) - expect_equal(form_pred, parsnip:::predict_numeric(res_form, iris[1:5, c("Sepal.Width", "Species")])) + expect_equal( + as.data.frame(form_pred), + multi_predict(res_form, new_data = iris[1:5, ], lambda = lams) %>% + unnest() %>% + as.data.frame() + ) }) test_that('glmnet prediction, all lambda', { @@ -154,16 +193,14 @@ test_that('glmnet prediction, all lambda', { all_pred <- predict(res_xy$fit, newx = as.matrix(iris[1:5, num_pred])) all_pred <- stack(as.data.frame(all_pred)) - all_pred$lambda <- rep(res_xy$fit$lambda, each = 5) - all_pred <- all_pred[,-2] + all_pred$penalty <- rep(res_xy$fit$lambda, each = 5) + all_pred$rows <- rep(1:5, 2) + all_pred <- all_pred[order(all_pred$rows, all_pred$penalty), ] + all_pred <- all_pred[, c("penalty", "values")] + names(all_pred) <- c("penalty", ".pred") + all_pred <- tibble::as_tibble(all_pred) - expect_equal(all_pred, parsnip:::predict_numeric(res_xy, iris[1:5, num_pred])) - - # test that the lambda seq is in the right order (since no docs on this) - tmp_pred <- predict(res_xy$fit, newx = as.matrix(iris[1:5, num_pred]), - s = res_xy$fit$lambda[5])[,1] - expect_equal(all_pred$values[all_pred$lambda == res_xy$fit$lambda[5]], - unname(tmp_pred)) + expect_equal(all_pred, multi_predict(res_xy, new_data = iris[1:5,num_pred ]) %>% unnest()) res_form <- fit( iris_all, @@ -177,10 +214,14 @@ test_that('glmnet prediction, all lambda', { form_pred <- predict(res_form$fit, newx = form_mat) form_pred <- stack(as.data.frame(form_pred)) - form_pred$lambda <- rep(res_form$fit$lambda, each = 5) - form_pred <- form_pred[,-2] - - expect_equal(form_pred, parsnip:::predict_numeric(res_form, iris[1:5, c("Sepal.Width", "Species")])) + form_pred$penalty <- rep(res_form$fit$lambda, each = 5) + form_pred$rows <- rep(1:5, 2) + form_pred <- form_pred[order(form_pred$rows, form_pred$penalty), ] + form_pred <- form_pred[, c("penalty", "values")] + names(form_pred) <- c("penalty", ".pred") + form_pred <- tibble::as_tibble(form_pred) + + expect_equal(form_pred, multi_predict(res_form, iris[1:5, c("Sepal.Width", "Species")]) %>% unnest()) }) diff --git a/tests/testthat/test_linear_reg_spark.R b/tests/testthat/test_linear_reg_spark.R index 28859a030..4b7432d80 100644 --- a/tests/testthat/test_linear_reg_spark.R +++ b/tests/testthat/test_linear_reg_spark.R @@ -42,7 +42,7 @@ test_that('spark execution', { ) expect_error( - spark_pred_num <- parsnip:::predict_numeric(spark_fit, iris_linreg_te), + spark_pred_num <- predict(spark_fit, iris_linreg_te), regexp = NA ) diff --git a/tests/testthat/test_linear_reg_stan.R b/tests/testthat/test_linear_reg_stan.R index 656abe111..e039d10b3 100644 --- a/tests/testthat/test_linear_reg_stan.R +++ b/tests/testthat/test_linear_reg_stan.R @@ -54,10 +54,10 @@ test_that('stan_glm execution', { test_that('stan prediction', { skip_if_not_installed("rstanarm") - uni_stan <- stan_glm(Sepal.Length ~ Sepal.Width + Petal.Width + Petal.Length, data = iris, seed = 123) - uni_pred <- unname(predict(uni_stan, newdata = iris[1:5, ])) - inl_stan <- stan_glm(Sepal.Width ~ log(Sepal.Length) + Species, data = iris, seed = 123, chains = 1) - inl_pred <- unname(predict(inl_stan, newdata = iris[1:5, c("Sepal.Length", "Species")])) + uni_pred <- c(5.01531691055198, 4.6896592504705, 4.74907435900005, 4.82563873798984, + 5.08044844256827) + inl_pred <- c(3.47062722437493, 3.38380776677489, 3.29336980560884, 3.24669710332179, + 3.42765162180813) res_xy <- fit_xy( linear_reg() %>% @@ -67,7 +67,7 @@ test_that('stan prediction', { control = quiet_ctrl ) - expect_equal(uni_pred, parsnip:::predict_numeric(res_xy, iris[1:5, num_pred]), tolerance = 0.001) + expect_equal(uni_pred, predict(res_xy, iris[1:5, num_pred])$.pred, tolerance = 0.001) res_form <- fit( iris_basic, @@ -75,7 +75,7 @@ test_that('stan prediction', { data = iris, control = quiet_ctrl ) - expect_equal(inl_pred, parsnip:::predict_numeric(res_form, iris[1:5, ]), tolerance = 0.001) + expect_equal(inl_pred, predict(res_form, iris[1:5, ])$.pred, tolerance = 0.001) }) @@ -102,30 +102,24 @@ test_that('stan intervals', { type = "pred_int", level = 0.93) - # prediction_stan <- - # predictive_interval(res_xy$fit, newdata = iris[1:5, ], seed = 13, - # prob = 0.93) - # - # stan_post <- posterior_linpred(res_xy$fit, newdata = iris[1:5, ], - # seed = 13) - # stan_lower <- apply(stan_post, 2, quantile, prob = 0.035) - # stan_upper <- apply(stan_post, 2, quantile, prob = 0.965) - - stan_lower <- c(`1` = 4.93164991101342, `2` = 4.60197941230393, - `3` = 4.6671442757811, `4` = 4.74402724639963, - `5` = 4.99248110476701) - stan_upper <- c(`1` = 5.1002837047058, `2` = 4.77617561853506, - `3` = 4.83183673602725, `4` = 4.90844811805409, - `5` = 5.16979395659009) - - expect_equivalent(confidence_parsnip$.pred_lower, stan_lower) - expect_equivalent(confidence_parsnip$.pred_upper, stan_upper) + ci_lower <- c(4.93164991101342, 4.60197941230393, 4.6671442757811, 4.74402724639963, + 4.99248110476701) + ci_upper <- c(5.1002837047058, 4.77617561853506, 4.83183673602725, 4.90844811805409, + 5.16979395659009) + + pi_lower <- c(4.43202758985944, 4.09957733046886, 4.17664779714598, 4.24948546338885, + 4.50058914781073) + pi_upper <- c(5.59783267637042, 5.25976504318669, 5.33296516452929, 5.41050668003565, + 5.66355828140989) + + expect_equivalent(confidence_parsnip$.pred_lower, ci_lower) + expect_equivalent(confidence_parsnip$.pred_upper, ci_upper) expect_equivalent(prediction_parsnip$.pred_lower, - prediction_stan[, "3.5%"], + pi_lower, tol = 0.01) expect_equivalent(prediction_parsnip$.pred_upper, - prediction_stan[, "96.5%"], + pi_upper, tol = 0.01) }) diff --git a/tests/testthat/test_logistic_reg.R b/tests/testthat/test_logistic_reg.R index 31e346414..9d2c265a0 100644 --- a/tests/testthat/test_logistic_reg.R +++ b/tests/testthat/test_logistic_reg.R @@ -276,7 +276,7 @@ test_that('glm prediction', { xy_pred <- ifelse(xy_pred >= 0.5, "good", "bad") xy_pred <- factor(xy_pred, levels = levels(lending_club$Class)) xy_pred <- unname(xy_pred) - expect_equal(xy_pred, parsnip:::predict_class(classes_xy, lending_club[1:7, num_pred])) + expect_equal(xy_pred, predict(classes_xy, lending_club[1:7, num_pred], type = "class")$.pred_class) }) @@ -289,10 +289,10 @@ test_that('glm probabilities', { ) xy_pred <- predict(classes_xy$fit, newdata = lending_club[1:7, num_pred], type = "response") - xy_pred <- tibble(bad = 1 - xy_pred, good = xy_pred) - expect_equal(xy_pred, parsnip:::predict_classprob(classes_xy, lending_club[1:7, num_pred])) + xy_pred <- tibble(.pred_bad = 1 - xy_pred, .pred_good = xy_pred) + expect_equal(xy_pred, predict(classes_xy, lending_club[1:7, num_pred], type = "prob")) - one_row <- parsnip:::predict_classprob(classes_xy, lending_club[1, num_pred]) + one_row <- predict(classes_xy, lending_club[1, num_pred], type = "prob") expect_equal(xy_pred[1,], one_row) }) diff --git a/tests/testthat/test_logistic_reg_glmnet.R b/tests/testthat/test_logistic_reg_glmnet.R index e183b07f6..74165c6dc 100644 --- a/tests/testthat/test_logistic_reg_glmnet.R +++ b/tests/testthat/test_logistic_reg_glmnet.R @@ -2,6 +2,7 @@ library(testthat) library(parsnip) library(rlang) library(tibble) +library(tidyr) # ------------------------------------------------------------------------------ @@ -64,7 +65,7 @@ test_that('glmnet prediction, one lambda', { uni_pred <- factor(uni_pred, levels = levels(lending_club$Class)) uni_pred <- unname(uni_pred) - expect_equal(uni_pred, parsnip:::predict_class(xy_fit, lending_club[1:7, num_pred])) + expect_equal(uni_pred, predict(xy_fit, lending_club[1:7, num_pred])$.pred_class) res_form <- fit( logistic_reg(penalty = 0.1) %>% set_engine("glmnet"), @@ -84,7 +85,10 @@ test_that('glmnet prediction, one lambda', { form_pred <- factor(form_pred, levels = levels(lending_club$Class)) form_pred <- unname(form_pred) - expect_equal(form_pred, parsnip:::predict_class(res_form, lending_club[1:7, c("funded_amnt", "int_rate")])) + expect_equal( + form_pred, + predict(res_form, lending_club[1:7, c("funded_amnt", "int_rate")], type = "class")$.pred_class + ) }) @@ -109,10 +113,17 @@ test_that('glmnet prediction, mulitiple lambda', { mult_pred <- stack(as.data.frame(mult_pred)) mult_pred$values <- ifelse(mult_pred$values >= 0.5, "good", "bad") mult_pred$values <- factor(mult_pred$values, levels = levels(lending_club$Class)) - mult_pred$lambda <- rep(lams, each = 7) - mult_pred <- mult_pred[, -2] - - expect_equal(mult_pred, parsnip:::predict_class(xy_fit, lending_club[1:7, num_pred])) + mult_pred$penalty <- rep(lams, each = 7) + mult_pred$rows <- rep(1:7, 2) + mult_pred <- mult_pred[order(mult_pred$rows, mult_pred$penalty), ] + mult_pred <- mult_pred[, c("penalty", "values")] + names(mult_pred) <- c("penalty", ".pred") + mult_pred <- tibble::as_tibble(mult_pred) + + expect_equal( + mult_pred, + multi_predict(xy_fit, lending_club[1:7, num_pred], type = "class") %>% unnest() + ) res_form <- fit( logistic_reg(penalty = lams) %>% set_engine("glmnet"), @@ -127,14 +138,21 @@ test_that('glmnet prediction, mulitiple lambda', { form_pred <- predict(res_form$fit, newx = form_mat, - type = "response") + s = lams) form_pred <- stack(as.data.frame(form_pred)) form_pred$values <- ifelse(form_pred$values >= 0.5, "good", "bad") form_pred$values <- factor(form_pred$values, levels = levels(lending_club$Class)) - form_pred$lambda <- rep(lams, each = 7) - form_pred <- form_pred[, -2] - - expect_equal(form_pred, parsnip:::predict_class(res_form, lending_club[1:7, c("funded_amnt", "int_rate")])) + form_pred$penalty <- rep(lams, each = 7) + form_pred$rows <- rep(1:7, 2) + form_pred <- form_pred[order(form_pred$rows, form_pred$penalty), ] + form_pred <- form_pred[, c("penalty", "values")] + names(form_pred) <- c("penalty", ".pred") + form_pred <- tibble::as_tibble(form_pred) + + expect_equal( + form_pred, + multi_predict(res_form, lending_club[1:7, c("funded_amnt", "int_rate")]) %>% unnest() + ) }) @@ -156,10 +174,14 @@ test_that('glmnet prediction, no lambda', { mult_pred <- stack(as.data.frame(mult_pred)) mult_pred$values <- ifelse(mult_pred$values >= 0.5, "good", "bad") mult_pred$values <- factor(mult_pred$values, levels = levels(lending_club$Class)) - mult_pred$lambda <- rep(xy_fit$fit$lambda, each = 7) - mult_pred <- mult_pred[, -2] + mult_pred$penalty <- rep(xy_fit$fit$lambda, each = 7) + mult_pred$rows <- rep(1:7, 2) + mult_pred <- mult_pred[order(mult_pred$rows, mult_pred$penalty), ] + mult_pred <- mult_pred[, c("penalty", "values")] + names(mult_pred) <- c("penalty", ".pred") + mult_pred <- tibble::as_tibble(mult_pred) - expect_equal(mult_pred, parsnip:::predict_class(xy_fit, lending_club[1:7, num_pred])) + expect_equal(mult_pred, multi_predict(xy_fit, lending_club[1:7, num_pred]) %>% unnest()) res_form <- fit( logistic_reg() %>% set_engine("glmnet", nlambda = 11), @@ -178,9 +200,17 @@ test_that('glmnet prediction, no lambda', { form_pred <- stack(as.data.frame(form_pred)) form_pred$values <- ifelse(form_pred$values >= 0.5, "good", "bad") form_pred$values <- factor(form_pred$values, levels = levels(lending_club$Class)) - form_pred$lambda <- rep(res_form$fit$lambda, each = 7) - form_pred <- form_pred[, -2] - expect_equal(form_pred, parsnip:::predict_class(res_form, lending_club[1:7, c("funded_amnt", "int_rate")])) + form_pred$penalty <- rep(res_form$fit$lambda, each = 7) + form_pred$rows <- rep(1:7, 2) + form_pred <- form_pred[order(form_pred$rows, form_pred$penalty), ] + form_pred <- form_pred[, c("penalty", "values")] + names(form_pred) <- c("penalty", ".pred") + form_pred <- tibble::as_tibble(form_pred) + + expect_equal( + form_pred, + multi_predict(res_form, lending_club[1:7, c("funded_amnt", "int_rate")]) %>% unnest() + ) }) @@ -200,9 +230,12 @@ test_that('glmnet probabilities, one lambda', { predict(xy_fit$fit, newx = as.matrix(lending_club[1:7, num_pred]), s = 0.1, type = "response")[,1] - uni_pred <- tibble(bad = 1 - uni_pred, good = uni_pred) + uni_pred <- tibble(.pred_bad = 1 - uni_pred, .pred_good = uni_pred) - expect_equal(uni_pred, parsnip:::predict_classprob(xy_fit, lending_club[1:7, num_pred])) + expect_equal( + uni_pred, + predict(xy_fit, lending_club[1:7, num_pred], type = "prob") + ) res_form <- fit( logistic_reg(penalty = 0.1) %>% set_engine("glmnet"), @@ -218,10 +251,14 @@ test_that('glmnet probabilities, one lambda', { predict(res_form$fit, newx = form_mat, s = 0.1, type = "response")[, 1] - form_pred <- tibble(bad = 1 - form_pred, good = form_pred) - expect_equal(form_pred, parsnip:::predict_classprob(res_form, lending_club[1:7, c("funded_amnt", "int_rate")])) + form_pred <- tibble(.pred_bad = 1 - form_pred, .pred_good = form_pred) + + expect_equal( + form_pred, + predict(res_form, lending_club[1:7, c("funded_amnt", "int_rate")], type = "prob") + ) - one_row <- parsnip:::predict_classprob(res_form, lending_club[1, c("funded_amnt", "int_rate")]) + one_row <- predict(res_form, lending_club[1, c("funded_amnt", "int_rate")], type = "prob") expect_equal(form_pred[1,], one_row) }) @@ -244,10 +281,19 @@ test_that('glmnet probabilities, mulitiple lambda', { newx = as.matrix(lending_club[1:7, num_pred]), s = lams, type = "response") mult_pred <- stack(as.data.frame(mult_pred)) - mult_pred <- tibble(bad = 1 - mult_pred$values, good = mult_pred$values) - mult_pred$lambda <- rep(lams, each = 7) - - expect_equal(mult_pred, parsnip:::predict_classprob(xy_fit, lending_club[1:7, num_pred])) + mult_pred$penalty <- rep(lams, each = 7) + mult_pred$rows <- rep(1:7, 2) + mult_pred <- mult_pred[order(mult_pred$rows, mult_pred$penalty), ] + mult_pred$.pred_bad <- 1 - mult_pred$values + mult_pred <- mult_pred[, c("penalty", ".pred_bad", "values")] + names(mult_pred) <- c("penalty", ".pred_bad", ".pred_good") + mult_pred <- tibble::as_tibble(mult_pred) + + expect_equal( + mult_pred, + multi_predict(xy_fit, lending_club[1:7, num_pred], lambda = lams, type = "prob") %>% + unnest() + ) res_form <- fit( logistic_reg(penalty = lams) %>% set_engine("glmnet"), @@ -264,10 +310,19 @@ test_that('glmnet probabilities, mulitiple lambda', { newx = form_mat, s = lams, type = "response") form_pred <- stack(as.data.frame(form_pred)) - form_pred <- tibble(bad = 1 - form_pred$values, good = form_pred$values) - form_pred$lambda <- rep(lams, each = 7) - - expect_equal(form_pred, parsnip:::predict_classprob(res_form, lending_club[1:7, c("funded_amnt", "int_rate")])) + form_pred$penalty <- rep(lams, each = 7) + form_pred$rows <- rep(1:7, 2) + form_pred <- form_pred[order(form_pred$rows, form_pred$penalty), ] + form_pred$.pred_bad <- 1 - form_pred$values + form_pred <- form_pred[, c("penalty", ".pred_bad", "values")] + names(form_pred) <- c("penalty", ".pred_bad", ".pred_good") + form_pred <- tibble::as_tibble(form_pred) + + expect_equal( + form_pred, + multi_predict(res_form, lending_club[1:7, c("funded_amnt", "int_rate")], type = "prob") %>% + unnest() + ) }) @@ -288,10 +343,18 @@ test_that('glmnet probabilities, no lambda', { newx = as.matrix(lending_club[1:7, num_pred]), type = "response") mult_pred <- stack(as.data.frame(mult_pred)) - mult_pred <- tibble(bad = 1 - mult_pred$values, good = mult_pred$values) - mult_pred$lambda <- rep(xy_fit$fit$lambda, each = 7) - - expect_equal(mult_pred, parsnip:::predict_classprob(xy_fit, lending_club[1:7, num_pred])) + mult_pred$penalty <- rep(xy_fit$fit$lambda, each = 7) + mult_pred$rows <- rep(1:7, length(xy_fit$fit$lambda)) + mult_pred <- mult_pred[order(mult_pred$rows, mult_pred$penalty), ] + mult_pred$.pred_bad <- 1 - mult_pred$values + mult_pred <- mult_pred[, c("penalty", ".pred_bad", "values")] + names(mult_pred) <- c("penalty", ".pred_bad", ".pred_good") + mult_pred <- tibble::as_tibble(mult_pred) + + expect_equal( + mult_pred, + multi_predict(xy_fit, lending_club[1:7, num_pred], type = "prob") %>% unnest() + ) res_form <- fit( logistic_reg() %>% set_engine("glmnet"), @@ -308,10 +371,18 @@ test_that('glmnet probabilities, no lambda', { newx = form_mat, type = "response") form_pred <- stack(as.data.frame(form_pred)) - form_pred <- tibble(bad = 1 - form_pred$values, good = form_pred$values) - form_pred$lambda <- rep(res_form$fit$lambda, each = 7) - - expect_equal(form_pred, parsnip:::predict_classprob(res_form, lending_club[1:7, c("funded_amnt", "int_rate")])) + form_pred$penalty <- rep(res_form$fit$lambda, each = 7) + form_pred$rows <- rep(1:7, length(res_form$fit$lambda)) + form_pred <- form_pred[order(form_pred$rows, form_pred$penalty), ] + form_pred$.pred_bad <- 1 - form_pred$values + form_pred <- form_pred[, c("penalty", ".pred_bad", "values")] + names(form_pred) <- c("penalty", ".pred_bad", ".pred_good") + form_pred <- tibble::as_tibble(form_pred) + + expect_equal( + form_pred, + multi_predict(res_form, lending_club[1:7, c("funded_amnt", "int_rate")], type = "prob") %>% unnest() + ) }) diff --git a/tests/testthat/test_logistic_reg_spark.R b/tests/testthat/test_logistic_reg_spark.R index b9ac7a1fa..6c47487c8 100644 --- a/tests/testthat/test_logistic_reg_spark.R +++ b/tests/testthat/test_logistic_reg_spark.R @@ -56,7 +56,7 @@ test_that('spark execution', { ) expect_error( - spark_class_pred_class <- parsnip:::predict_class(spark_class_fit, churn_logit_te), + spark_class_pred_class <- predict(spark_class_fit, churn_logit_te), regexp = NA ) @@ -73,7 +73,7 @@ test_that('spark execution', { ) expect_error( - spark_class_prob_classprob <- parsnip:::predict_classprob(spark_class_fit, churn_logit_te), + spark_class_prob_classprob <- predict(spark_class_fit, churn_logit_te, type = "prob"), regexp = NA ) diff --git a/tests/testthat/test_logistic_reg_stan.R b/tests/testthat/test_logistic_reg_stan.R index 19d2ce4d2..18940994c 100644 --- a/tests/testthat/test_logistic_reg_stan.R +++ b/tests/testthat/test_logistic_reg_stan.R @@ -56,15 +56,9 @@ test_that('stan_glm prediction', { y = lending_club$Class ) - xy_pred <- - predict(xy_fit$fit, - newdata = lending_club[1:7, num_pred]) - xy_pred <- xy_fit$fit$family$linkinv(xy_pred) - xy_pred <- ifelse(xy_pred >= 0.5, "good", "bad") - xy_pred <- factor(xy_pred, levels = levels(lending_club$Class)) - xy_pred <- unname(xy_pred) + xy_pred <- structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("bad", "good"), class = "factor") - expect_equal(xy_pred, parsnip:::predict_class(xy_fit, lending_club[1:7, num_pred])) + expect_equal(xy_pred, parsnip:::predict_class.model_fit(xy_fit, lending_club[1:7, num_pred])) res_form <- fit( logistic_reg() %>% @@ -74,19 +68,11 @@ test_that('stan_glm prediction', { control = ctrl ) - - # form_pred <- - # predict(res_form$fit, - # newdata = lending_club[1:7, c("funded_amnt", "int_rate")]) - # form_pred <- xy_fit$fit$family$linkinv(form_pred) - # form_pred <- unname(form_pred) - # form_pred <- ifelse(form_pred >= 0.5, "good", "bad") - # form_pred <- factor(form_pred, levels = levels(lending_club$Class)) form_pred <- structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("bad", "good"), class = "factor") - expect_equal(form_pred, parsnip:::predict_class(res_form, lending_club[1:7, c("funded_amnt", "int_rate")])) + expect_equal(form_pred, parsnip:::predict_class.model_fit(res_form, lending_club[1:7, c("funded_amnt", "int_rate")])) }) @@ -104,12 +90,21 @@ test_that('stan_glm probability', { ) xy_pred <- - predict(xy_fit$fit, - newdata = lending_club[1:7, num_pred]) - xy_pred <- xy_fit$fit$family$linkinv(xy_pred) - xy_pred <- tibble(bad = 1 - xy_pred, good = xy_pred) + tibble::tribble( + ~bad, ~good, + 0.0173511241321764, 0.982648875867824, + 0.0550090130462705, 0.94499098695373, + 0.0292445716644468, 0.970755428335553, + 0.0516116810109397, 0.94838831898906, + 0.0142530690940691, 0.985746930905931, + 0.0184806465081366, 0.981519353491863, + 0.0253642111906806, 0.974635788809319 + ) - expect_equal(xy_pred, parsnip:::predict_classprob(xy_fit, lending_club[1:7, num_pred])) + expect_equal( + xy_pred %>% as.data.frame(), + parsnip:::predict_classprob.model_fit(xy_fit, lending_club[1:7, num_pred]) %>% as.data.frame() + ) res_form <- fit( logistic_reg() %>% @@ -119,11 +114,6 @@ test_that('stan_glm probability', { control = ctrl ) - # form_pred <- - # predict(res_form$fit, - # newdata = lending_club[1:7, c("funded_amnt", "int_rate")]) - # form_pred <- xy_fit$fit$family$linkinv(form_pred) - # form_pred <- tibble(bad = 1 - form_pred, good = form_pred) form_pred <- tibble::tribble( ~bad, ~good, @@ -137,7 +127,7 @@ test_that('stan_glm probability', { ) expect_equal( form_pred %>% as.data.frame(), - parsnip:::predict_classprob(res_form, lending_club[1:7, c("funded_amnt", "int_rate")]) %>% + parsnip:::predict_classprob.model_fit(res_form, lending_club[1:7, c("funded_amnt", "int_rate")]) %>% as.data.frame() ) }) @@ -170,14 +160,6 @@ test_that('stan intervals', { level = 0.93, std_error = TRUE) - # stan_post <- - # posterior_linpred(res_form$fit, newdata = lending_club[1:5, ], seed = 13, - # prob = 0.93, transform = TRUE) - # - # stan_lower <- apply(stan_post, 2, quantile, prob = 0.035) - # stan_upper <- apply(stan_post, 2, quantile, prob = 0.965) - # stan_std <- apply(stan_post, 2, sd) - stan_lower <- c(`1` = 0.913925483690233, `2` = 0.841801274737206, `3` = 0.91056642931229, `4` = 0.913619668586545, `5` = 0.987780279394871) @@ -194,21 +176,13 @@ test_that('stan intervals', { expect_equivalent(confidence_parsnip$.pred_upper_bad, 1 - stan_lower) expect_equivalent(confidence_parsnip$.std_error, stan_std) - # stan_pred_post <- - # posterior_predict(res_form$fit, newdata = lending_club[1:5, ], seed = 13, - # prob = 0.93) - # - # stan_pred_lower <- apply(stan_pred_post, 2, quantile, prob = 0.035) - # stan_pred_upper <- apply(stan_pred_post, 2, quantile, prob = 0.965) - # stan_pred_std <- apply(stan_pred_post, 2, sd) - stan_pred_lower <- c(`1` = 0, `2` = 0, `3` = 0, `4` = 0, `5` = 1) stan_pred_upper <- c(`1` = 1, `2` = 1, `3` = 1, `4` = 1, `5` = 1) stan_pred_std <- c(`1` = 0.211744742168102, `2` = 0.265130711714607, `3` = 0.209589904165081, `4` = 0.198389410902796, `5` = 0.0446989708829856) - expect_equivalent(prediction_parsnip$.pred_lower, stan_pred_lower) - expect_equivalent(prediction_parsnip$.pred_upper, stan_pred_upper) + expect_equivalent(prediction_parsnip$.pred_lower_good, stan_pred_lower) + expect_equivalent(prediction_parsnip$.pred_upper_good, stan_pred_upper) expect_equivalent(prediction_parsnip$.std_error, stan_pred_std, tolerance = 0.1) }) diff --git a/tests/testthat/test_mars.R b/tests/testthat/test_mars.R index 7ea9213a3..ef1b16254 100644 --- a/tests/testthat/test_mars.R +++ b/tests/testthat/test_mars.R @@ -185,7 +185,7 @@ test_that('mars prediction', { control = ctrl ) - expect_equal(uni_pred, parsnip:::predict_numeric(res_xy, iris[1:5, num_pred])) + expect_equal(uni_pred, predict(res_xy, iris[1:5, num_pred])$.pred) res_form <- fit( iris_basic, @@ -193,7 +193,7 @@ test_that('mars prediction', { data = iris, control = ctrl ) - expect_equal(inl_pred, parsnip:::predict_numeric(res_form, iris[1:5, ])) + expect_equal(inl_pred, predict(res_form, iris[1:5, ])$.pred) res_mv <- fit( iris_basic, @@ -201,7 +201,10 @@ test_that('mars prediction', { data = iris, control = ctrl ) - expect_equal(mv_pred, parsnip:::predict_numeric(res_mv, iris[1:5,])) + expect_equal( + setNames(mv_pred, paste0(".pred_", names(mv_pred))) %>% as.data.frame(), + predict(res_mv, iris[1:5,]) %>% as.data.frame() + ) }) @@ -264,18 +267,19 @@ test_that('classification', { skip_if_not_installed("earth") expect_error( - glm_mars <- mars(mode = "classification") %>% + glm_mars <- + mars(mode = "classification") %>% set_engine("earth") %>% fit(Class ~ ., data = lending_club[-(1:5),]), regexp = NA ) expect_true(!is.null(glm_mars$fit$glm.list)) - parsnip_pred <- parsnip:::predict_classprob(glm_mars, new_data = lending_club[1:5, -ncol(lending_club)]) + parsnip_pred <- predict(glm_mars, new_data = lending_club[1:5, -ncol(lending_club)], type = "prob") earth_pred <- c(0.95631355972526, 0.971917781277731, 0.894245392500336, 0.962667553751077, 0.985827594261896) - expect_equal(parsnip_pred[["good"]], earth_pred) + expect_equal(parsnip_pred$.pred_good, earth_pred) }) diff --git a/tests/testthat/test_mlp_keras.R b/tests/testthat/test_mlp_keras.R index fead31752..e143c0464 100644 --- a/tests/testthat/test_mlp_keras.R +++ b/tests/testthat/test_mlp_keras.R @@ -218,10 +218,8 @@ test_that('multivariate nnet formula', { data = nn_dat[-(1:5),] ) expect_equal(length(unlist(keras::get_weights(nnet_form$fit))), 24) - nnet_form_pred <- parsnip:::predict_numeric(nnet_form, new_data = nn_dat[1:5, -(1:3)]) - expect_equal(ncol(nnet_form_pred), 3) - expect_equal(nrow(nnet_form_pred), 5) - expect_equal(names(nnet_form_pred), c("V1", "V2", "V3")) + nnet_form_pred <- predict(nnet_form, new_data = nn_dat[1:5, -(1:3)]) + expect_equal(names(nnet_form_pred), paste0(".pred_", c("V1", "V2", "V3"))) keras::backend()$clear_session() @@ -233,10 +231,9 @@ test_that('multivariate nnet formula', { y = nn_dat[-(1:5), 1:3 ] ) expect_equal(length(unlist(keras::get_weights(nnet_xy$fit))), 24) - nnet_form_xy <- parsnip:::predict_numeric(nnet_xy, new_data = nn_dat[1:5, -(1:3)]) - expect_equal(ncol(nnet_form_xy), 3) - expect_equal(nrow(nnet_form_xy), 5) - expect_equal(names(nnet_form_xy), c("V1", "V2", "V3")) + nnet_form_xy <- predict(nnet_xy, new_data = nn_dat[1:5, -(1:3)]) + expect_equal(names(nnet_form_pred), paste0(".pred_", c("V1", "V2", "V3"))) + keras::backend()$clear_session() }) diff --git a/tests/testthat/test_mlp_nnet.R b/tests/testthat/test_mlp_nnet.R index 6a5022786..4172364b1 100644 --- a/tests/testthat/test_mlp_nnet.R +++ b/tests/testthat/test_mlp_nnet.R @@ -8,7 +8,7 @@ context("simple neural network execution with nnet") num_pred <- names(iris)[1:4] iris_nnet <- - mlp(mode = "classification", hidden_units = 2) %>% + mlp(mode = "classification", hidden_units = 5) %>% set_engine("nnet") ctrl <- fit_control(verbosity = 1, catch = FALSE) @@ -64,7 +64,7 @@ test_that('nnet classification prediction', { xy_pred <- predict(xy_fit$fit, newdata = iris[1:8, num_pred], type = "class") xy_pred <- factor(xy_pred, levels = levels(iris$Species)) - expect_equal(xy_pred, parsnip:::predict_class(xy_fit, new_data = iris[1:8, num_pred])) + expect_equal(xy_pred, predict(xy_fit, new_data = iris[1:8, num_pred], type = "class")$.pred_class) form_fit <- fit( iris_nnet, @@ -75,7 +75,7 @@ test_that('nnet classification prediction', { form_pred <- predict(form_fit$fit, newdata = iris[1:8, num_pred], type = "class") form_pred <- factor(form_pred, levels = levels(iris$Species)) - expect_equal(form_pred, parsnip:::predict_class(form_fit, new_data = iris[1:8, num_pred])) + expect_equal(form_pred, predict(form_fit, new_data = iris[1:8, num_pred])$.pred_class) }) @@ -141,7 +141,7 @@ test_that('nnet regression prediction', { xy_pred <- predict(xy_fit$fit, newdata = mtcars[1:8, -1])[,1] xy_pred <- unname(xy_pred) - expect_equal(xy_pred, parsnip:::predict_numeric(xy_fit, new_data = mtcars[1:8, -1])) + expect_equal(xy_pred, predict(xy_fit, new_data = mtcars[1:8, -1])$.pred) form_fit <- fit( car_basic, @@ -152,7 +152,7 @@ test_that('nnet regression prediction', { form_pred <- predict(form_fit$fit, newdata = mtcars[1:8, -1])[,1] form_pred <- unname(form_pred) - expect_equal(form_pred, parsnip:::predict_numeric(form_fit, new_data = mtcars[1:8, -1])) + expect_equal(form_pred, predict(form_fit, new_data = mtcars[1:8, -1])$.pred) }) # ------------------------------------------------------------------------------ @@ -175,10 +175,8 @@ test_that('multivariate nnet formula', { data = nn_dat[-(1:5),] ) expect_equal(length(nnet_form$fit$wts), 24) - nnet_form_pred <- parsnip:::predict_numeric(nnet_form, new_data = nn_dat[1:5, -(1:3)]) - expect_equal(ncol(nnet_form_pred), 3) - expect_equal(nrow(nnet_form_pred), 5) - expect_equal(names(nnet_form_pred), c("V1", "V2", "V3")) + nnet_form_pred <- predict(nnet_form, new_data = nn_dat[1:5, -(1:3)]) + expect_equal(names(nnet_form_pred), paste0(".pred_", c("V1", "V2", "V3"))) nnet_xy <- mlp( @@ -192,10 +190,8 @@ test_that('multivariate nnet formula', { y = nn_dat[-(1:5), 1:3 ] ) expect_equal(length(nnet_xy$fit$wts), 24) - nnet_form_xy <- parsnip:::predict_numeric(nnet_xy, new_data = nn_dat[1:5, -(1:3)]) - expect_equal(ncol(nnet_form_xy), 3) - expect_equal(nrow(nnet_form_xy), 5) - expect_equal(names(nnet_form_xy), c("V1", "V2", "V3")) + nnet_form_xy <- predict(nnet_xy, new_data = nn_dat[1:5, -(1:3)]) + expect_equal(names(nnet_form_xy), paste0(".pred_", c("V1", "V2", "V3"))) }) diff --git a/tests/testthat/test_multinom_reg_spark.R b/tests/testthat/test_multinom_reg_spark.R index e28238207..7a2d81712 100644 --- a/tests/testthat/test_multinom_reg_spark.R +++ b/tests/testthat/test_multinom_reg_spark.R @@ -45,7 +45,7 @@ test_that('spark execution', { ) expect_error( - spark_class_pred_class <- parsnip:::predict_class(spark_class_fit, iris_te), + spark_class_pred_class <- predict(spark_class_fit, iris_te), regexp = NA ) @@ -62,7 +62,7 @@ test_that('spark execution', { ) expect_error( - spark_class_prob_classprob <- parsnip:::predict_classprob(spark_class_fit, iris_te), + spark_class_prob_classprob <- predict(spark_class_fit, iris_te, type = "prob"), regexp = NA ) diff --git a/tests/testthat/test_nearest_neighbor_kknn.R b/tests/testthat/test_nearest_neighbor_kknn.R index d0d1846af..fa153d12f 100644 --- a/tests/testthat/test_nearest_neighbor_kknn.R +++ b/tests/testthat/test_nearest_neighbor_kknn.R @@ -75,7 +75,7 @@ test_that('kknn prediction', { newdata = iris[1:5, num_pred] ) - expect_equal(uni_pred, parsnip:::predict_numeric(res_xy, iris[1:5, num_pred])) + expect_equal(uni_pred, predict(res_xy, iris[1:5, num_pred])$.pred) # nominal res_xy_nom <- fit_xy( @@ -90,7 +90,10 @@ test_that('kknn prediction', { newdata = iris[1:5, c("Sepal.Length", "Petal.Width")] ) - expect_equal(uni_pred_nom, parsnip:::predict_class(res_xy_nom, iris[1:5, c("Sepal.Length", "Petal.Width")])) + expect_equal( + uni_pred_nom, + predict(res_xy_nom, iris[1:5, c("Sepal.Length", "Petal.Width")], type = "class")$.pred_class + ) # continuous - formula interface res_form <- fit( @@ -105,5 +108,5 @@ test_that('kknn prediction', { newdata = iris[1:5,] ) - expect_equal(form_pred, parsnip:::predict_numeric(res_form, iris[1:5, c("Sepal.Width", "Species")])) + expect_equal(form_pred, predict(res_form, iris[1:5, c("Sepal.Width", "Species")])$.pred) }) diff --git a/tests/testthat/test_nullmodel.R b/tests/testthat/test_nullmodel.R index da0294d1f..b3eb4ee51 100644 --- a/tests/testthat/test_nullmodel.R +++ b/tests/testthat/test_nullmodel.R @@ -109,7 +109,7 @@ test_that('nullmodel prediction', { Petal.Length ~ log(Sepal.Width) + Species, data = iris ) - expect_equal(inl_pred, parsnip:::predict_numeric(res_form, iris[1:5, ])) + expect_equal(inl_pred, predict(res_form, iris[1:5, ])$.pred) # Multivariate y res <- fit( @@ -118,7 +118,10 @@ test_that('nullmodel prediction', { data = mtcars ) - expect_equal(mw_pred, parsnip:::predict_numeric(res, mtcars[1:5, ])) + expect_equal( + setNames(mw_pred, paste0(".pred_", names(mw_pred))), + predict(res, mtcars[1:5, ]) + ) }) # ------------------------------------------------------------------------------ diff --git a/tests/testthat/test_predict_formats.R b/tests/testthat/test_predict_formats.R index 2588d47f3..eb83643cf 100644 --- a/tests/testthat/test_predict_formats.R +++ b/tests/testthat/test_predict_formats.R @@ -32,31 +32,31 @@ lr_fit_2 <- test_that('regression predictions', { expect_true(is_tibble(predict(lm_fit, new_data = iris[1:5,-1]))) - expect_true(is.vector(parsnip:::predict_numeric(lm_fit, new_data = iris[1:5,-1]))) + expect_true(is.vector(parsnip:::predict_numeric.model_fit(lm_fit, new_data = iris[1:5,-1]))) expect_equal(names(predict(lm_fit, new_data = iris[1:5,-1])), ".pred") }) test_that('classification predictions', { expect_true(is_tibble(predict(lr_fit, new_data = class_dat[1:5,-1]))) - expect_true(is.factor(parsnip:::predict_class(lr_fit, new_data = class_dat[1:5,-1]))) + expect_true(is.factor(parsnip:::predict_class.model_fit(lr_fit, new_data = class_dat[1:5,-1]))) expect_equal(names(predict(lr_fit, new_data = class_dat[1:5,-1])), ".pred_class") expect_true(is_tibble(predict(lr_fit, new_data = class_dat[1:5,-1], type = "prob"))) - expect_true(is_tibble(parsnip:::predict_classprob(lr_fit, new_data = class_dat[1:5,-1]))) + expect_true(is_tibble(parsnip:::predict_classprob.model_fit(lr_fit, new_data = class_dat[1:5,-1]))) expect_equal(names(predict(lr_fit, new_data = class_dat[1:5,-1], type = "prob")), c(".pred_high", ".pred_low")) }) test_that('non-standard levels', { expect_true(is_tibble(predict(lr_fit, new_data = class_dat[1:5,-1]))) - expect_true(is.factor(parsnip:::predict_class(lr_fit, new_data = class_dat[1:5,-1]))) + expect_true(is.factor(parsnip:::predict_class.model_fit(lr_fit, new_data = class_dat[1:5,-1]))) expect_equal(names(predict(lr_fit, new_data = class_dat[1:5,-1])), ".pred_class") expect_true(is_tibble(predict(lr_fit_2, new_data = class_dat2[1:5,-1], type = "prob"))) - expect_true(is_tibble(parsnip:::predict_classprob(lr_fit_2, new_data = class_dat2[1:5,-1]))) + expect_true(is_tibble(parsnip:::predict_classprob.model_fit(lr_fit_2, new_data = class_dat2[1:5,-1]))) expect_equal(names(predict(lr_fit_2, new_data = class_dat2[1:5,-1], type = "prob")), c(".pred_2low", ".pred_high+values")) - expect_equal(names(parsnip:::predict_classprob(lr_fit_2, new_data = class_dat2[1:5,-1])), + expect_equal(names(parsnip:::predict_classprob.model_fit(lr_fit_2, new_data = class_dat2[1:5,-1])), c("2low", "high+values")) }) @@ -79,20 +79,4 @@ test_that('non-factor classification', { ) }) -# ------------------------------------------------------------------------------ - -test_that('bad predict args', { - lm_model <- - linear_reg() %>% - set_engine("lm") %>% - fit(mpg ~ ., data = mtcars %>% dplyr::slice(11:32)) - - pred_cars <- - mtcars %>% - dplyr::slice(1:10) %>% - dplyr::select(-mpg) - - # expect_error(predict(lm_model, pred_cars, yes = "no")) - # expect_error(predict(lm_model, pred_cars, type = "conf_int", level = 0.95, yes = "no")) -}) diff --git a/tests/testthat/test_rand_forest_randomForest.R b/tests/testthat/test_rand_forest_randomForest.R index 5305cb464..5988de6c2 100644 --- a/tests/testthat/test_rand_forest_randomForest.R +++ b/tests/testthat/test_rand_forest_randomForest.R @@ -91,7 +91,7 @@ test_that('randomForest classification prediction', { xy_pred <- predict(xy_fit$fit, newdata = lending_club[1:6, num_pred]) xy_pred <- unname(xy_pred) - expect_equal(xy_pred, parsnip:::predict_class(xy_fit, new_data = lending_club[1:6, num_pred])) + expect_equal(xy_pred, predict(xy_fit, new_data = lending_club[1:6, num_pred])$.pred_class) form_fit <- fit( lc_basic, @@ -102,7 +102,10 @@ test_that('randomForest classification prediction', { form_pred <- predict(form_fit$fit, newdata = lending_club[1:6, c("funded_amnt", "int_rate")]) form_pred <- unname(form_pred) - expect_equal(form_pred, parsnip:::predict_class(form_fit, new_data = lending_club[1:6, c("funded_amnt", "int_rate")])) + expect_equal( + form_pred, + predict(form_fit, new_data = lending_club[1:6, c("funded_amnt", "int_rate")])$.pred_class + ) }) test_that('randomForest classification probabilities', { @@ -118,9 +121,10 @@ test_that('randomForest classification probabilities', { xy_pred <- predict(xy_fit$fit, newdata = lending_club[1:6, num_pred], type = "prob") xy_pred <- as_tibble(as.data.frame(xy_pred)) - expect_equal(xy_pred, parsnip:::predict_classprob(xy_fit, new_data = lending_club[1:6, num_pred])) + names(xy_pred) <- paste0(".pred_", names(xy_pred)) + expect_equal(xy_pred, predict(xy_fit, new_data = lending_club[1:6, num_pred], type = "prob")) - one_row <- parsnip:::predict_classprob(xy_fit, new_data = lending_club[1, num_pred]) + one_row <- predict(xy_fit, new_data = lending_club[1, num_pred], type = "prob") expect_equivalent(xy_pred[1,], one_row) form_fit <- fit( @@ -132,7 +136,11 @@ test_that('randomForest classification probabilities', { form_pred <- predict(form_fit$fit, newdata = lending_club[1:6, c("funded_amnt", "int_rate")], type = "prob") form_pred <- as_tibble(as.data.frame(form_pred)) - expect_equal(form_pred, parsnip:::predict_classprob(form_fit, new_data = lending_club[1:6, c("funded_amnt", "int_rate")])) + names(form_pred) <- paste0(".pred_", names(form_pred)) + expect_equal( + form_pred, + predict(form_fit, new_data = lending_club[1:6, c("funded_amnt", "int_rate")], type = "prob") + ) }) @@ -210,6 +218,6 @@ test_that('randomForest regression prediction', { xy_pred <- predict(xy_fit$fit, newdata = tail(mtcars)) xy_pred <- unname(xy_pred) - expect_equal(xy_pred, parsnip:::predict_numeric(xy_fit, new_data = tail(mtcars))) + expect_equal(xy_pred, predict(xy_fit, new_data = tail(mtcars))$.pred) }) diff --git a/tests/testthat/test_rand_forest_ranger.R b/tests/testthat/test_rand_forest_ranger.R index ee340df04..5e8300400 100644 --- a/tests/testthat/test_rand_forest_ranger.R +++ b/tests/testthat/test_rand_forest_ranger.R @@ -95,7 +95,10 @@ test_that('ranger classification prediction', { xy_pred <- predict(xy_fit$fit, data = lending_club[1:6, num_pred])$prediction xy_pred <- colnames(xy_pred)[apply(xy_pred, 1, which.max)] xy_pred <- factor(xy_pred, levels = levels(lending_club$Class)) - expect_equal(xy_pred, parsnip:::predict_class(xy_fit, new_data = lending_club[1:6, num_pred])) + expect_equal( + xy_pred, + predict(xy_fit, new_data = lending_club[1:6, num_pred], type = "class")$.pred_class + ) form_fit <- fit( rand_forest() %>% set_mode("classification") %>% set_engine("ranger"), @@ -108,7 +111,10 @@ test_that('ranger classification prediction', { form_pred <- predict(form_fit$fit, data = lending_club[1:6, c("funded_amnt", "int_rate")])$prediction form_pred <- colnames(form_pred)[apply(form_pred, 1, which.max)] form_pred <- factor(form_pred, levels = levels(lending_club$Class)) - expect_equal(form_pred, parsnip:::predict_class(form_fit, new_data = lending_club[1:6, c("funded_amnt", "int_rate")])) + expect_equal( + form_pred, + predict(form_fit, new_data = lending_club[1:6, c("funded_amnt", "int_rate")])$.pred_class + ) }) @@ -127,9 +133,13 @@ test_that('ranger classification probabilities', { xy_pred <- predict(xy_fit$fit, data = lending_club[1:6, num_pred])$predictions xy_pred <- as_tibble(xy_pred) - expect_equal(xy_pred, parsnip:::predict_classprob(xy_fit, new_data = lending_club[1:6, num_pred])) + names(xy_pred) <- paste0(".pred_", names(xy_pred)) + expect_equal( + xy_pred, + predict(xy_fit, new_data = lending_club[1:6, num_pred], type = "prob") + ) - one_row <- parsnip:::predict_classprob(xy_fit, new_data = lending_club[1, num_pred]) + one_row <- predict(xy_fit, new_data = lending_club[1, num_pred], type = "prob") expect_equivalent(xy_pred[1,], one_row) form_fit <- fit( @@ -142,7 +152,11 @@ test_that('ranger classification probabilities', { form_pred <- predict(form_fit$fit, data = lending_club[1:6, c("funded_amnt", "int_rate")])$predictions form_pred <- as_tibble(form_pred) - expect_equal(form_pred, parsnip:::predict_classprob(form_fit, new_data = lending_club[1:6, c("funded_amnt", "int_rate")])) + names(form_pred) <- paste0(".pred_", names(form_pred)) + expect_equal( + form_pred, + predict(form_fit, new_data = lending_club[1:6, c("funded_amnt", "int_rate")], type = "prob") + ) no_prob_model <- fit_xy( rand_forest() %>% set_engine("ranger", probability = FALSE), @@ -152,7 +166,7 @@ test_that('ranger classification probabilities', { ) expect_error( - parsnip:::predict_classprob(no_prob_model, new_data = lending_club[1:6, num_pred]) + parsnip:::predict_classprob.model_fit(no_prob_model, new_data = lending_club[1:6, num_pred]) ) }) @@ -227,7 +241,7 @@ test_that('ranger regression prediction', { xy_pred <- predict(xy_fit$fit, data = tail(mtcars[, -1]))$prediction - expect_equal(xy_pred, parsnip:::predict_numeric(xy_fit, new_data = tail(mtcars[, -1]))) + expect_equal(xy_pred, predict(xy_fit, new_data = tail(mtcars[, -1]))$.pred) }) diff --git a/tests/testthat/test_rand_forest_spark.R b/tests/testthat/test_rand_forest_spark.R index da0b5fab6..499049390 100644 --- a/tests/testthat/test_rand_forest_spark.R +++ b/tests/testthat/test_rand_forest_spark.R @@ -58,7 +58,7 @@ test_that('spark execution', { ) expect_error( - spark_reg_pred_num <- parsnip:::predict_numeric(spark_reg_fit, iris_rf_te), + spark_reg_pred_num <- predict(spark_reg_fit, iris_rf_te), regexp = NA ) @@ -68,7 +68,7 @@ test_that('spark execution', { ) expect_error( - spark_reg_num_dup <- parsnip:::predict_numeric(spark_reg_fit_dup, iris_rf_te), + spark_reg_num_dup <- predict(spark_reg_fit_dup, iris_rf_te), regexp = NA ) @@ -124,7 +124,7 @@ test_that('spark execution', { ) expect_error( - spark_class_pred_class <- parsnip:::predict_class(spark_class_fit, churn_rf_te), + spark_class_pred_class <- predict(spark_class_fit, churn_rf_te), regexp = NA ) @@ -134,7 +134,7 @@ test_that('spark execution', { ) expect_error( - spark_class_dup_class <- parsnip:::predict_class(spark_class_fit_dup, churn_rf_te), + spark_class_dup_class <- predict(spark_class_fit_dup, churn_rf_te), regexp = NA ) @@ -156,17 +156,16 @@ test_that('spark execution', { ) expect_error( - spark_class_prob_classprob <- parsnip:::predict_classprob(spark_class_fit, churn_rf_te), + spark_class_dup <- predict(spark_class_fit_dup, churn_rf_te, type = "prob"), regexp = NA ) expect_error( - spark_class_dup <- predict(spark_class_fit_dup, churn_rf_te, type = "prob"), + spark_class_dup_classprob <- predict(spark_class_fit_dup, churn_rf_te, type = "prob"), regexp = NA ) - expect_error( - spark_class_dup_classprob <- parsnip:::predict_classprob(spark_class_fit_dup, churn_rf_te), + spark_class_prob_classprob <- predict(spark_class_fit, churn_rf_te, type = "prob"), regexp = NA )