Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@
^R/README\.md$
derby.log
^logs$
^tests/testthat/logs$
^tests/testthat/logs$
^revdep$
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ VignetteBuilder: knitr
Depends:
R (>= 2.10)
Imports:
dplyr,
dplyr (>= 0.8.0.1),
rlang (>= 0.3.1),
purrr,
utils,
tibble,
tibble (>= 2.1.1),
generics,
glue,
magrittr,
Expand All @@ -39,7 +39,7 @@ Suggests:
xgboost,
covr,
C50,
sparklyr,
sparklyr (>= 1.0.0),
earth,
glmnet,
kernlab,
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ that are actually varying).

* The prediction modules (e.g. `predict_class`, `predict_numeric`, etc) were de-exported. These were internal functions that were not to be used by the users and the users were using them.

* An event time data set (`check_times`) was included that is the time (in seconds) to run `R CMD check` using the "r-devel-windows-ix86+x86_64` flavor. Packages that errored are censored.

## Bug Fixes

Expand Down
6 changes: 3 additions & 3 deletions R/aaa_spark_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
format_spark_probs <- function(results, object) {
results <- dplyr::select(results, starts_with("probability_"))
p <- ncol(results)
lvl <- paste0("probability_", 0:(p - 1))
names(lvl) <- paste0("pred_", object$fit$.index_labels)
results %>% rename(!!!syms(lvl))
lvl <- colnames(results)
names(lvl) <- paste0("pred_", object$fit$index_labels)
results %>% dplyr::rename(!!!syms(lvl))
}

format_spark_class <- function(results, object) {
Expand Down
59 changes: 59 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,62 @@ NULL
#' data(wa_churn)
#' str(wa_churn)
NULL

#' Execution Time Data
#'
#' These data were collected from the CRAN web page for 13,626 R
#' packages. The time to complete the standard package checking
#' routine was collected In some cases, the package checking
#' process is stopped due to errors and these data are treated as
#' censored. It is less than 1 percent.
#'
#' As predictors, the associated package source code were
#' downloaded and parsed to create predictors, including
#'
#' * `authors`: The number of authors in the author field.
#' * `imports`: The number of imported packages.
#' * `suggests`: The number of packages suggested.
#' * `depends`: The number of hard dependencies.
#' * `Roxygen`: a binary indicator for whether Roxygen was used
#' for documentation.
#' * `gh`: a binary indicator for whether the URL field contained
#' a GitHub link.
#' * `rforge`: a binary indicator for whether the URL field
#' contained a link to R-forge.
#' * `descr`: The number of characters (or, in some cases, bytes)
#' in the description field.
#' * `r_count`: The number of R files in the R directory.
#' * `r_size`: The total disk size of the R files.
#' * `ns_import`: Estimated number of imported functions or methods.
#' * `ns_export`: Estimated number of exported functions or methods.
#' * `s3_methods`: Estimated number of S3 methods.
#' * `s4_methods`: Estimated number of S4 methods.
#' * `doc_count`: How many Rmd or Rnw files in the vignettes
#' directory.
#' * `doc_size`: The disk size of the Rmd or Rnw files.
#' * `src_count`: The number of files in the `src` directory.
#' * `src_size`: The size on disk of files in the `src` directory.
#' * `data_count` The number of files in the `data` directory.
#' * `data_size`: The size on disk of files in the `data` directory.
#' * `testthat_count`: The number of files in the `testthat`
#' directory.
#' * `testthat_size`: The size on disk of files in the `testthat`
#' directory.
#' * `check_time`: The time (in seconds) to run `R CMD check`
#' using the "r-devel-windows-ix86+x86_64` flavor.
#' * `status`: An indicator for whether the tests completed.
#'
#' Data were collected on 2019-01-20.
#' @name check_times
#' @aliases check_times
#' @docType data
#' @return \item{check_times}{a data frame}
#'
#' @source CRAN
#'
#' @keywords datasets
#' @examples
#' data(check_times)
#' str(check_times)
NULL

2 changes: 0 additions & 2 deletions R/predict.R
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,6 @@ prepare_data <- function(object, new_data) {
#' @return A tibble with the same number of rows as the data being predicted.
#' Mostly likely, there is a list-column named `.pred` that is a tibble with
#' multiple rows per sub-model.
#' @keywords internal
#' @export
multi_predict <- function(object, ...) {
if (inherits(object$fit, "try-error")) {
Expand All @@ -255,7 +254,6 @@ multi_predict <- function(object, ...) {
UseMethod("multi_predict")
}

#' @keywords internal
#' @export
#' @rdname multi_predict
multi_predict.default <- function(object, ...)
Expand Down
Binary file added data/check_times.rda
Binary file not shown.
1 change: 1 addition & 0 deletions data/datalist
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
lending_club: lending_club
wa_churn: wa_churn
check_times: check_times
64 changes: 64 additions & 0 deletions man/check_times.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/multi_predict.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions tests/testthat/test_boost_tree_C50.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ test_that('C5.0 prediction', {
)

xy_pred <- predict(classes_xy$fit, newdata = lending_club[1:7, num_pred])
expect_equal(xy_pred, parsnip:::predict_class(classes_xy, lending_club[1:7, num_pred]))
expect_equal(xy_pred, predict(classes_xy, lending_club[1:7, num_pred])$.pred_class)

})

Expand All @@ -105,9 +105,10 @@ test_that('C5.0 probabilities', {

xy_pred <- predict(classes_xy$fit, newdata = as.data.frame(lending_club[1:7, num_pred]), type = "prob")
xy_pred <- as_tibble(xy_pred)
expect_equal(xy_pred, parsnip:::predict_classprob(classes_xy, lending_club[1:7, num_pred]))
names(xy_pred) <- c(".pred_bad", ".pred_good")
expect_equal(xy_pred, predict(classes_xy, lending_club[1:7, num_pred], type = "prob"))

one_row <- parsnip:::predict_classprob(classes_xy, lending_club[1, num_pred])
one_row <- predict(classes_xy, lending_club[1, num_pred], type = "prob")
expect_equal(xy_pred[1,], one_row)

})
Expand Down
12 changes: 6 additions & 6 deletions tests/testthat/test_boost_tree_spark.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ test_that('spark execution', {
)

expect_error(
spark_reg_pred_num <- parsnip:::predict_numeric(spark_reg_fit, iris_bt_te),
spark_reg_pred_num <- parsnip:::predict_numeric.model_fit(spark_reg_fit, iris_bt_te),
regexp = NA
)

Expand All @@ -68,7 +68,7 @@ test_that('spark execution', {
)

expect_error(
spark_reg_num_dup <- parsnip:::predict_numeric(spark_reg_fit_dup, iris_bt_te),
spark_reg_num_dup <- parsnip:::predict_numeric.model_fit(spark_reg_fit_dup, iris_bt_te),
regexp = NA
)

Expand Down Expand Up @@ -124,7 +124,7 @@ test_that('spark execution', {
)

expect_error(
spark_class_pred_class <- parsnip:::predict_class(spark_class_fit, churn_bt_te),
spark_class_pred_class <- parsnip:::predict_class.model_fit(spark_class_fit, churn_bt_te),
regexp = NA
)

Expand All @@ -134,7 +134,7 @@ test_that('spark execution', {
)

expect_error(
spark_class_dup_class <- parsnip:::predict_class(spark_class_fit_dup, churn_bt_te),
spark_class_dup_class <- parsnip:::predict_class.model_fit(spark_class_fit_dup, churn_bt_te),
regexp = NA
)

Expand All @@ -156,7 +156,7 @@ test_that('spark execution', {
)

expect_error(
spark_class_prob_classprob <- parsnip:::predict_classprob(spark_class_fit, churn_bt_te),
spark_class_prob_classprob <- parsnip:::predict_classprob.model_fit(spark_class_fit, churn_bt_te),
regexp = NA
)

Expand All @@ -166,7 +166,7 @@ test_that('spark execution', {
)

expect_error(
spark_class_dup_classprob <- parsnip:::predict_classprob(spark_class_fit_dup, churn_bt_te),
spark_class_dup_classprob <- parsnip:::predict_classprob.model_fit(spark_class_fit_dup, churn_bt_te),
regexp = NA
)

Expand Down
12 changes: 6 additions & 6 deletions tests/testthat/test_boost_tree_xgboost.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ test_that('xgboost classification prediction', {
xy_pred <- predict(xy_fit$fit, newdata = xgb.DMatrix(data = as.matrix(iris[1:8, num_pred])), type = "class")
xy_pred <- matrix(xy_pred, ncol = 3, byrow = TRUE)
xy_pred <- factor(levels(iris$Species)[apply(xy_pred, 1, which.max)], levels = levels(iris$Species))
expect_equal(xy_pred, parsnip:::predict_class(xy_fit, new_data = iris[1:8, num_pred]))
expect_equal(xy_pred, predict(xy_fit, new_data = iris[1:8, num_pred], type = "class")$.pred_class)

form_fit <- fit(
iris_xgboost,
Expand All @@ -78,7 +78,7 @@ test_that('xgboost classification prediction', {
form_pred <- predict(form_fit$fit, newdata = xgb.DMatrix(data = as.matrix(iris[1:8, num_pred])), type = "class")
form_pred <- matrix(form_pred, ncol = 3, byrow = TRUE)
form_pred <- factor(levels(iris$Species)[apply(form_pred, 1, which.max)], levels = levels(iris$Species))
expect_equal(form_pred, parsnip:::predict_class(form_fit, new_data = iris[1:8, num_pred]))
expect_equal(form_pred, predict(form_fit, new_data = iris[1:8, num_pred], type = "class")$.pred_class)
})


Expand Down Expand Up @@ -141,7 +141,7 @@ test_that('xgboost regression prediction', {
)

xy_pred <- predict(xy_fit$fit, newdata = xgb.DMatrix(data = as.matrix(mtcars[1:8, -1])))
expect_equal(xy_pred, parsnip:::predict_numeric(xy_fit, new_data = mtcars[1:8, -1]))
expect_equal(xy_pred, predict(xy_fit, new_data = mtcars[1:8, -1])$.pred)

form_fit <- fit(
car_basic,
Expand All @@ -151,7 +151,7 @@ test_that('xgboost regression prediction', {
)

form_pred <- predict(form_fit$fit, newdata = xgb.DMatrix(data = as.matrix(mtcars[1:8, -1])))
expect_equal(form_pred, parsnip:::predict_numeric(form_fit, new_data = mtcars[1:8, -1]))
expect_equal(form_pred, predict(form_fit, new_data = mtcars[1:8, -1])$.pred)
})


Expand Down Expand Up @@ -188,9 +188,9 @@ test_that('submodel prediction', {
mp_res <- multi_predict(class_fit, new_data = wa_churn[1:4, vars], trees = 5, type = "prob")
mp_res <- do.call("rbind", mp_res$.pred)
expect_equal(mp_res[[".pred_No"]], pred_class)

expect_error(
multi_predict(class_fit, newdata = wa_churn[1:4, vars], trees = 5, type = "prob"),
multi_predict(class_fit, newdata = wa_churn[1:4, vars], trees = 5, type = "prob"),
"Did you mean"
)
})
Expand Down
11 changes: 7 additions & 4 deletions tests/testthat/test_linear_reg.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
library(testthat)
library(parsnip)
library(rlang)
library(tibble)

# ------------------------------------------------------------------------------

Expand Down Expand Up @@ -260,7 +261,9 @@ test_that('lm prediction', {
inl_lm <- lm(Sepal.Length ~ log(Sepal.Width) + Species, data = iris)
inl_pred <- unname(predict(inl_lm, newdata = iris[1:5, ]))
mv_lm <- lm(cbind(Sepal.Width, Petal.Width) ~ ., data = iris)
mv_pred <- as.data.frame(predict(mv_lm, newdata = iris[1:5, ]))
mv_pred <- as_tibble(predict(mv_lm, newdata = iris[1:5, ]))
names(mv_pred) <- c(".pred_Sepal.Width", ".pred_Petal.Width")


res_xy <- fit_xy(
iris_basic,
Expand All @@ -269,23 +272,23 @@ test_that('lm prediction', {
control = ctrl
)

expect_equal(uni_pred, parsnip:::predict_numeric(res_xy, iris[1:5, num_pred]))
expect_equal(uni_pred, predict(res_xy, iris[1:5, num_pred])$.pred)

res_form <- fit(
iris_basic,
Sepal.Length ~ log(Sepal.Width) + Species,
data = iris,
control = ctrl
)
expect_equal(inl_pred, parsnip:::predict_numeric(res_form, iris[1:5, ]))
expect_equal(inl_pred, predict(res_form, iris[1:5, ])$.pred)

res_mv <- fit(
iris_basic,
cbind(Sepal.Width, Petal.Width) ~ .,
data = iris,
control = ctrl
)
expect_equal(mv_pred, parsnip:::predict_numeric(res_mv, iris[1:5,]))
expect_equal(mv_pred, predict(res_mv, iris[1:5,]))
})

test_that('lm intervals', {
Expand Down
Loading