tidymodels · topepo · Mar 22, 2019 · Mar 21, 2019 · Mar 21, 2019 · Mar 21, 2019
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -9,4 +9,5 @@
 ^R/README\.md$
 derby.log
 ^logs$
-^tests/testthat/logs$
+^tests/testthat/logs$
+^revdep$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -17,11 +17,11 @@ VignetteBuilder: knitr
 Depends:
     R (>= 2.10)
 Imports: 
-    dplyr,
+    dplyr (>= 0.8.0.1),
     rlang (>= 0.3.1),
     purrr,
     utils,
-    tibble,
+    tibble (>= 2.1.1),
     generics,
     glue,
     magrittr,
@@ -39,7 +39,7 @@ Suggests:
     xgboost,
     covr,
     C50,
-    sparklyr,
+    sparklyr (>= 1.0.0),
     earth,
     glmnet,
     kernlab,

diff --git a/NEWS.md b/NEWS.md
@@ -21,6 +21,7 @@ that are actually varying).
 
 * The prediction modules (e.g. `predict_class`, `predict_numeric`, etc) were de-exported. These were internal functions that were not to be used by the users and the users were using them. 
 
+ * An event time data set (`check_times`) was included that is the time (in seconds) to run `R CMD check` using the "r-devel-windows-ix86+x86_64` flavor. Packages that errored are censored. 
 
 ## Bug Fixes
 

diff --git a/R/aaa_spark_helpers.R b/R/aaa_spark_helpers.R
@@ -4,9 +4,9 @@
 format_spark_probs <- function(results, object) {
   results <- dplyr::select(results, starts_with("probability_"))
   p <- ncol(results)
-  lvl <- paste0("probability_", 0:(p - 1))
-  names(lvl) <- paste0("pred_", object$fit$.index_labels)
-  results %>% rename(!!!syms(lvl))
+  lvl <- colnames(results)
+  names(lvl) <- paste0("pred_", object$fit$index_labels)
+  results %>% dplyr::rename(!!!syms(lvl))
 }
 
 format_spark_class <- function(results, object) {

diff --git a/R/data.R b/R/data.R
@@ -44,3 +44,62 @@ NULL
 #' data(wa_churn)
 #' str(wa_churn)
 NULL
+
+#' Execution Time Data
+#'
+#' These data were collected from the CRAN web page for 13,626 R
+#' packages. The time to complete the standard package checking
+#' routine was collected In some cases, the package checking
+#' process is stopped due to errors and these data are treated as
+#' censored. It is less than 1 percent.
+#'
+#' As predictors, the associated package source code were
+#' downloaded and parsed to create predictors, including
+#'
+#' * `authors`: The number of authors in the author field.
+#' * `imports`: The number of imported packages.
+#' * `suggests`: The number of packages suggested.
+#' * `depends`: The number of hard dependencies.
+#' * `Roxygen`: a binary indicator for whether Roxygen was used
+#'   for documentation.
+#' * `gh`: a binary indicator for whether the URL field contained
+#'   a GitHub link.
+#' * `rforge`: a binary indicator for whether the URL field
+#'   contained a link to R-forge.
+#' * `descr`: The number of characters (or, in some cases, bytes)
+#'   in the description field.
+#' * `r_count`: The number of R files in the R directory.
+#' * `r_size`: The total disk size of the R files.
+#' * `ns_import`: Estimated number of imported functions or methods.
+#' * `ns_export`: Estimated number of exported functions or methods.
+#' * `s3_methods`: Estimated number of S3 methods.
+#' * `s4_methods`: Estimated number of S4 methods.
+#' * `doc_count`: How many Rmd or Rnw files in the vignettes
+#'   directory.
+#' * `doc_size`: The disk size of the Rmd or Rnw files.
+#' * `src_count`: The number of files in the `src` directory.
+#' * `src_size`: The size on disk of files in the `src` directory.
+#' * `data_count`  The number of files in the `data` directory.
+#' * `data_size`: The size on disk of files in the `data` directory.
+#' * `testthat_count`: The number of files in the `testthat`
+#'   directory.
+#' * `testthat_size`: The size on disk of files in the `testthat`
+#'   directory.
+#' * `check_time`: The time (in seconds) to run `R CMD check`
+#'   using the "r-devel-windows-ix86+x86_64` flavor.
+#' * `status`: An indicator for whether the tests completed.
+#'
+#' Data were collected on 2019-01-20.
+#' @name check_times
+#' @aliases check_times
+#' @docType data
+#' @return \item{check_times}{a data frame}
+#'
+#' @source CRAN
+#'
+#' @keywords datasets
+#' @examples
+#' data(check_times)
+#' str(check_times)
+NULL
+
diff --git a/R/predict.R b/R/predict.R
@@ -245,7 +245,6 @@ prepare_data <- function(object, new_data) {
 #' @return A tibble with the same number of rows as the data being predicted.
 #'  Mostly likely, there is a list-column named `.pred` that is a tibble with
 #'  multiple rows per sub-model.
-#' @keywords internal
 #' @export
 multi_predict <- function(object, ...) {
   if (inherits(object$fit, "try-error")) {
@@ -255,7 +254,6 @@ multi_predict <- function(object, ...) {
   UseMethod("multi_predict")
 }
 
-#' @keywords internal
 #' @export
 #' @rdname multi_predict
 multi_predict.default <- function(object, ...)

diff --git a/data/check_times.rda b/data/check_times.rda
diff --git a/data/datalist b/data/datalist
@@ -1,2 +1,3 @@
 lending_club: lending_club
 wa_churn: wa_churn
+check_times: check_times
diff --git a/man/check_times.Rd b/man/check_times.Rd
diff --git a/man/multi_predict.Rd b/man/multi_predict.Rd
diff --git a/tests/testthat/test_boost_tree_C50.R b/tests/testthat/test_boost_tree_C50.R
@@ -88,7 +88,7 @@ test_that('C5.0 prediction', {
   )
 
   xy_pred <- predict(classes_xy$fit, newdata = lending_club[1:7, num_pred])
-  expect_equal(xy_pred, parsnip:::predict_class(classes_xy, lending_club[1:7, num_pred]))
+  expect_equal(xy_pred, predict(classes_xy, lending_club[1:7, num_pred])$.pred_class)
 
 })
 
@@ -105,9 +105,10 @@ test_that('C5.0 probabilities', {
 
   xy_pred <- predict(classes_xy$fit, newdata = as.data.frame(lending_club[1:7, num_pred]), type = "prob")
   xy_pred <- as_tibble(xy_pred)
-  expect_equal(xy_pred, parsnip:::predict_classprob(classes_xy, lending_club[1:7, num_pred]))
+  names(xy_pred) <- c(".pred_bad", ".pred_good")
+  expect_equal(xy_pred, predict(classes_xy, lending_club[1:7, num_pred], type = "prob"))
 
-  one_row <- parsnip:::predict_classprob(classes_xy, lending_club[1, num_pred])
+  one_row <- predict(classes_xy, lending_club[1, num_pred], type = "prob")
   expect_equal(xy_pred[1,], one_row)
 
 })

diff --git a/tests/testthat/test_boost_tree_spark.R b/tests/testthat/test_boost_tree_spark.R
@@ -58,7 +58,7 @@ test_that('spark execution', {
   )
 
   expect_error(
-    spark_reg_pred_num <- parsnip:::predict_numeric(spark_reg_fit, iris_bt_te),
+    spark_reg_pred_num <- parsnip:::predict_numeric.model_fit(spark_reg_fit, iris_bt_te),
     regexp = NA
   )
 
@@ -68,7 +68,7 @@ test_that('spark execution', {
   )
 
   expect_error(
-    spark_reg_num_dup <- parsnip:::predict_numeric(spark_reg_fit_dup, iris_bt_te),
+    spark_reg_num_dup <- parsnip:::predict_numeric.model_fit(spark_reg_fit_dup, iris_bt_te),
     regexp = NA
   )
 
@@ -124,7 +124,7 @@ test_that('spark execution', {
   )
 
   expect_error(
-    spark_class_pred_class <- parsnip:::predict_class(spark_class_fit, churn_bt_te),
+    spark_class_pred_class <- parsnip:::predict_class.model_fit(spark_class_fit, churn_bt_te),
     regexp = NA
   )
 
@@ -134,7 +134,7 @@ test_that('spark execution', {
   )
 
   expect_error(
-    spark_class_dup_class <- parsnip:::predict_class(spark_class_fit_dup, churn_bt_te),
+    spark_class_dup_class <- parsnip:::predict_class.model_fit(spark_class_fit_dup, churn_bt_te),
     regexp = NA
   )
 
@@ -156,7 +156,7 @@ test_that('spark execution', {
   )
 
   expect_error(
-    spark_class_prob_classprob <- parsnip:::predict_classprob(spark_class_fit, churn_bt_te),
+    spark_class_prob_classprob <- parsnip:::predict_classprob.model_fit(spark_class_fit, churn_bt_te),
     regexp = NA
   )
 
@@ -166,7 +166,7 @@ test_that('spark execution', {
   )
 
   expect_error(
-    spark_class_dup_classprob <- parsnip:::predict_classprob(spark_class_fit_dup, churn_bt_te),
+    spark_class_dup_classprob <- parsnip:::predict_classprob.model_fit(spark_class_fit_dup, churn_bt_te),
     regexp = NA
   )
 

diff --git a/tests/testthat/test_boost_tree_xgboost.R b/tests/testthat/test_boost_tree_xgboost.R
@@ -66,7 +66,7 @@ test_that('xgboost classification prediction', {
   xy_pred <- predict(xy_fit$fit, newdata = xgb.DMatrix(data = as.matrix(iris[1:8, num_pred])), type = "class")
   xy_pred <- matrix(xy_pred, ncol = 3, byrow = TRUE)
   xy_pred <- factor(levels(iris$Species)[apply(xy_pred, 1, which.max)], levels = levels(iris$Species))
-  expect_equal(xy_pred, parsnip:::predict_class(xy_fit, new_data = iris[1:8, num_pred]))
+  expect_equal(xy_pred, predict(xy_fit, new_data = iris[1:8, num_pred], type = "class")$.pred_class)
 
   form_fit <- fit(
     iris_xgboost,
@@ -78,7 +78,7 @@ test_that('xgboost classification prediction', {
   form_pred <- predict(form_fit$fit, newdata = xgb.DMatrix(data = as.matrix(iris[1:8, num_pred])), type = "class")
   form_pred <- matrix(form_pred, ncol = 3, byrow = TRUE)
   form_pred <- factor(levels(iris$Species)[apply(form_pred, 1, which.max)], levels = levels(iris$Species))
-  expect_equal(form_pred, parsnip:::predict_class(form_fit, new_data = iris[1:8, num_pred]))
+  expect_equal(form_pred, predict(form_fit, new_data = iris[1:8, num_pred], type = "class")$.pred_class)
 })
 
 
@@ -141,7 +141,7 @@ test_that('xgboost regression prediction', {
   )
 
   xy_pred <- predict(xy_fit$fit, newdata = xgb.DMatrix(data = as.matrix(mtcars[1:8, -1])))
-  expect_equal(xy_pred, parsnip:::predict_numeric(xy_fit, new_data = mtcars[1:8, -1]))
+  expect_equal(xy_pred, predict(xy_fit, new_data = mtcars[1:8, -1])$.pred)
 
   form_fit <- fit(
     car_basic,
@@ -151,7 +151,7 @@ test_that('xgboost regression prediction', {
   )
 
   form_pred <- predict(form_fit$fit, newdata = xgb.DMatrix(data = as.matrix(mtcars[1:8, -1])))
-  expect_equal(form_pred, parsnip:::predict_numeric(form_fit, new_data = mtcars[1:8, -1]))
+  expect_equal(form_pred, predict(form_fit, new_data = mtcars[1:8, -1])$.pred)
 })
 
 
@@ -188,9 +188,9 @@ test_that('submodel prediction', {
   mp_res <- multi_predict(class_fit, new_data = wa_churn[1:4, vars], trees = 5, type = "prob")
   mp_res <- do.call("rbind", mp_res$.pred)
   expect_equal(mp_res[[".pred_No"]], pred_class)
-  
+
   expect_error(
-    multi_predict(class_fit, newdata = wa_churn[1:4, vars], trees = 5, type = "prob"), 
+    multi_predict(class_fit, newdata = wa_churn[1:4, vars], trees = 5, type = "prob"),
     "Did you mean"
   )
 })

diff --git a/tests/testthat/test_linear_reg.R b/tests/testthat/test_linear_reg.R
@@ -1,6 +1,7 @@
 library(testthat)
 library(parsnip)
 library(rlang)
+library(tibble)
 
 # ------------------------------------------------------------------------------
 
@@ -260,7 +261,9 @@ test_that('lm prediction', {
   inl_lm <- lm(Sepal.Length ~ log(Sepal.Width) + Species, data = iris)
   inl_pred <- unname(predict(inl_lm, newdata = iris[1:5, ]))
   mv_lm <- lm(cbind(Sepal.Width, Petal.Width) ~ ., data = iris)
-  mv_pred <- as.data.frame(predict(mv_lm, newdata = iris[1:5, ]))
+  mv_pred <- as_tibble(predict(mv_lm, newdata = iris[1:5, ]))
+  names(mv_pred) <- c(".pred_Sepal.Width", ".pred_Petal.Width")
+
 
   res_xy <- fit_xy(
     iris_basic,
@@ -269,23 +272,23 @@ test_that('lm prediction', {
     control = ctrl
   )
 
-  expect_equal(uni_pred, parsnip:::predict_numeric(res_xy, iris[1:5, num_pred]))
+  expect_equal(uni_pred, predict(res_xy, iris[1:5, num_pred])$.pred)
 
   res_form <- fit(
     iris_basic,
     Sepal.Length ~ log(Sepal.Width) + Species,
     data = iris,
     control = ctrl
   )
-  expect_equal(inl_pred, parsnip:::predict_numeric(res_form, iris[1:5, ]))
+  expect_equal(inl_pred, predict(res_form, iris[1:5, ])$.pred)
 
   res_mv <- fit(
     iris_basic,
     cbind(Sepal.Width, Petal.Width) ~ .,
     data = iris,
     control = ctrl
   )
-  expect_equal(mv_pred, parsnip:::predict_numeric(res_mv, iris[1:5,]))
+  expect_equal(mv_pred, predict(res_mv, iris[1:5,]))
 })
 
 test_that('lm intervals', {
Original file line number	Diff line number	Diff line change
Expand Up		@@ -21,6 +21,7 @@ that are actually varying).

		* The prediction modules (e.g. `predict_class`, `predict_numeric`, etc) were de-exported. These were internal functions that were not to be used by the users and the users were using them.

		* An event time data set (`check_times`) was included that is the time (in seconds) to run `R CMD check` using the "r-devel-windows-ix86+x86_64` flavor. Packages that errored are censored.

		## Bug Fixes

Expand Down