Skip to content

Commit

Permalink
spell check and small patches
Browse files Browse the repository at this point in the history
  • Loading branch information
bgreenwell committed Aug 26, 2018
1 parent f8d9c94 commit f6915f3
Show file tree
Hide file tree
Showing 17 changed files with 214 additions and 100 deletions.
2 changes: 1 addition & 1 deletion NEWS.md
Expand Up @@ -6,7 +6,7 @@

* Changed `truncate_feature_names` argument of `vi()` to `abbreviate_feature_names` which abbreviates all feature names, rather than just truncating them.

* Added CRAN badge [(#32)](https://github.com/koalaverse/vip/issues/32).
* Added CRAN-related badges [(#32)](https://github.com/koalaverse/vip/issues/32).

* New generic `vi_permute()` for constructing permutation-based variable importance scores [(#19)](https://github.com/koalaverse/vip/issues/19).

Expand Down
4 changes: 3 additions & 1 deletion R/get_predictions.R
Expand Up @@ -5,7 +5,9 @@ get_predictions <- function(object, type = c("raw", "prob")) {


#' @keywords internal
get_predictions.default <- stats::predict
get_predictions.default <- function(object, type = c("raw", "prob")) {
stats::predict
}


#' @keywords internal
Expand Down
22 changes: 17 additions & 5 deletions R/metrics.R
Expand Up @@ -10,7 +10,7 @@ perf_rmse <- ModelMetrics::rmse

#' @keywords internal
perf_rsquared <- function(actual, predicted) {
stats::cor(actual, predicted)^2
stats::cor(x = actual, y = predicted) ^ 2
}


Expand All @@ -27,7 +27,10 @@ perf_auc <- function(actual, predicted) {
if (NCOL(predicted) != 2L) {
stop("Expected a 2 column matrix of predicted class probabilities.")
}
ModelMetrics::auc(actual = actual, predicted = predicted[, 1L, drop = TRUE])
ModelMetrics::auc(
actual = actual,
predicted = predicted[, 1L, drop = TRUE]
)
}


Expand All @@ -36,7 +39,10 @@ perf_logLoss <- function(actual, predicted) {
if (NCOL(predicted) != 2L) {
stop("Expected a 2 column matrix of predicted class probabilities.")
}
ModelMetrics::logLoss(actual = actual, predicted = predicted[, 1L, drop = TRUE])
ModelMetrics::logLoss(
actual = actual,
predicted = predicted[, 1L, drop = TRUE]
)
}


Expand All @@ -47,7 +53,10 @@ perf_mauc <- function(actual, predicted) {
if (NCOL(predicted) <= 2L) {
stop("Expected a >2 column matrix of predicted class probabilities.")
}
ModelMetrics::mauc(actual = actual, predicted = predicted)$mauc
ModelMetrics::mauc(
actual = actual,
predicted = predicted
)$mauc
}


Expand All @@ -56,7 +65,10 @@ perf_mlogLoss <- function(actual, predicted) {
if (NCOL(predicted) <= 2L) {
stop("Expected a >2 column matrix of predicted class probabilities.")
}
ModelMetrics::mlogLoss(actual = actual, predicted = predicted)
ModelMetrics::mlogLoss(
actual = actual,
predicted = predicted
)
}


Expand Down
14 changes: 8 additions & 6 deletions R/vi.R
Expand Up @@ -14,7 +14,7 @@
#' @param feature_names Character string giving the names of the predictor
#' variables (i.e., features) of interest.
#'
#' @param FUN List with two componenets, \code{"cat"} and \code{"con"},
#' @param FUN List with two components, \code{"cat"} and \code{"con"},
#' containing the functions to use for categorical and continuous features,
#' respectively. If \code{NULL}, the standard deviation is used for continuous
#' features. For categorical features, the range statistic is used (i.e.,
Expand Down Expand Up @@ -74,17 +74,19 @@ vi <- function(

# Construct VI scores
method <- match.arg(method)
if (method %in% c("pdp", "ice", "permute")) {
if (method %in% c("pdp", "ice")) {
if (missing(feature_names)) {
feature_names <- get_feature_names(object)
}
}

# Construct tibble of VI scores
tib <- switch(method,
"model" = vi_model(object, ...),
"pdp" = vi_pdp(object, feature_names = feature_names, FUN = FUN, ...),
"ice" = vi_ice(object, feature_names = feature_names, FUN = FUN, ...),
vi_permute(object, feature_names = feature_names, ...))
"model" = vi_model(object, ...),
"pdp" = vi_pdp(object, feature_names = feature_names, FUN = FUN, ...),
"ice" = vi_ice(object, feature_names = feature_names, FUN = FUN, ...),
vi_permute(object, feature_names = feature_names, ...)
)

# Save attribute
vi_type <- attr(tib, which = "type")
Expand Down
2 changes: 1 addition & 1 deletion R/vi_ice.R
Expand Up @@ -8,7 +8,7 @@
#' @param feature_names Character string giving the names of the predictor
#' variables (i.e., features) of interest.
#'
#' @param FUN List with two componenets, \code{"cat"} and \code{"con"},
#' @param FUN List with two components, \code{"cat"} and \code{"con"},
#' containing the functions to use for categorical and continuous features,
#' respectively. If \code{NULL}, the standard deviation is used for continuous
#' features. For categorical features, the range statistic is used (i.e.,
Expand Down
2 changes: 1 addition & 1 deletion R/vi_pdp.R
Expand Up @@ -8,7 +8,7 @@
#' @param feature_names Character string giving the names of the predictor
#' variables (i.e., features) of interest.
#'
#' @param FUN List with two componenets, \code{"cat"} and \code{"con"},
#' @param FUN List with two components, \code{"cat"} and \code{"con"},
#' containing the functions to use for categorical and continuous features,
#' respectively. If \code{NULL}, the standard deviation is used for continuous
#' features. For categorical features, the range statistic is used (i.e.,
Expand Down
103 changes: 55 additions & 48 deletions R/vi_permute.R
Expand Up @@ -5,34 +5,40 @@
#'
#' @param object A fitted model object (e.g., a \code{"randomForest"} object).
#'
#' @param train Data frame containing the original training data.
#' @param train A matrix-like R object (e.g., a data frame or matrix)
#' containing the training data.
#'
#' @param response_name Character string giving the name (or position) of the
#' traget column in \code{train}.
#'
#' @param pred_fun Optional prediction function that requires two arguments,
#' \code{object} and \code{newdata}. Default is \code{NULL}.
#' @param target Either a character string giving the name (or position) of the
#' target column in \code{train} or, if \code{train} only contains feature
#' columns, a vector containing the target values used to train \code{object}.
#'
#' @param metric Either a function or character string specifying the
#' performancefor metric to use in computing model performance (e.g.,
#' RMSE for regression or accuracy for binary classification). If \code{metric}
#' is a function, then it requires two arguments, \code{actual} and
#' \code{predicted}, and should return a single, numeric value.
#' performance metric to use in computing model performance (e.g., RMSE for
#' regression or accuracy for binary classification). If \code{metric} is a
#' function, then it requires two arguments, \code{actual} and \code{predicted},
#' and should return a single, numeric value.
#'
#' @param smaller_is_better Logical indicating whether or not a smaller value
#' of \code{metric} is better. Default is \code{NULL}. Must be supplied if
#' \code{metric} is a user-supplied function.
#'
#' @param pos_class Character string specifying which category in `obs`
#' represents the "positive" class (i.e., the class for which the predicted
#' class probabilties correspond to). Only needed for binary classification
#' @param reference_class Character string specifying which response category
#' represents the "reference" class (i.e., the class for which the predicted
#' class probabilities correspond to). Only needed for binary classification
#' problems.
#'
#' @param pred_fun Optional prediction function that requires two arguments,
#' \code{object} and \code{newdata}. Default is \code{NULL}. Must be supplied
#' whenever \code{metric} is a custom function.
#'
#' @return A tidy data frame (i.e., a \code{"tibble"} object) with two columns:
#' \code{Variable} and \code{Importance}. For \code{"glm"}-like object, an
#' additional column, called \code{Sign}, is also included which gives the sign
#' (i.e., POS/NEG) of the original coefficient.
#'
#' @param verbose Logical indicating whether or not to print information during
#' the construction of variable importance scores. Default is \code{FALSE}.
#'
#' @param progress Character string giving the name of the progress bar to use.
#' See \code{\link[plyr]{create_progress_bar}} for details. Default is
#' \code{"none"}.
Expand Down Expand Up @@ -62,7 +68,7 @@
#'
#' # Simulate training data
#' set.seed(101) # for reproducibility
#' trn <- as.data.frame(mlbench.friedman1(500) # ?mlbench.friedman1
#' trn <- as.data.frame(mlbench.friedman1(500)) # ?mlbench.friedman1
#'
#' # Inspect data
#' tibble::as.tibble(trn)
Expand All @@ -76,9 +82,9 @@
#'
#' # Plot VI scores
#' set.seed(2021) # for reproducibility
#' p1 <- vip(pp, method = "permute", response_name = "y", metric = "rsquared",
#' p1 <- vip(pp, method = "permute", target = "y", metric = "rsquared",
#' pred_fun = predict) + ggtitle("PPR")
#' p2 <- vip(nn, method = "permute", response_name = "y", metric = "rsquared",
#' p2 <- vip(nn, method = "permute", target = "y", metric = "rsquared",
#' pred_fun = predict) + ggtitle("NN")
#' grid.arrange(p1, p2, ncol = 2)
#'
Expand All @@ -89,12 +95,11 @@
#'
#' # Permutation-based VIP with user-defined MAE metric
#' set.seed(1101) # for reproducibility
#' vip(pp, method = "permute",
#' response_name = "y",
#' metric = mae,
#' vip(pp, method = "permute", target = "y", metric = mae,
#' smaller_is_better = TRUE,
#' pred_fun = function(object, newdata) predict(object, newdata) # wrapper
#' ) + ggtitle("PPR")
#' }
vi_permute <- function(object, ...) {
UseMethod("vi_permute")
}
Expand All @@ -103,19 +108,9 @@ vi_permute <- function(object, ...) {
#' @rdname vi_permute
#'
#' @export
vi_permute.default <- function(
object,
train,
response_name,
# perf_fun = NULL,
metric = "auto", # add log loss, auc, mae, mape, etc.
smaller_is_better = NULL,
pos_class = NULL,
pred_fun = NULL,
progress = "none",
parallel = FALSE,
paropts = NULL,
...
vi_permute.default <- function(object, train, target, metric = "auto",
smaller_is_better = NULL, reference_class = NULL, pred_fun = NULL,
verbose = FALSE, progress = "none", parallel = FALSE, paropts = NULL, ...
) {

# Issue warning until this function is complete!
Expand All @@ -127,11 +122,16 @@ vi_permute.default <- function(
train <- get_training_data(object)
}

# Feature names
feature_names <- setdiff(names(train), response_name)

# Observed (training) response values
obs <- train[[response_name]]
# Extract feature names and separate features from target (if necessary)
if (is.character(target)) {
feature_names <- setdiff(colnames(train), target)
train_x <- train[, feature_names]
train_y <- train[, target, drop = TRUE]
} else {
feature_names <- colnames(train)
train_x <- train
train_y <- target
}

# Metric
if (is.function(metric)) { # user-supplied function
Expand All @@ -150,7 +150,7 @@ vi_permute.default <- function(
call. = FALSE)
} else {
# Check prediction function arguments
if (!identical(c("object", "newdata"), names(formals(pred_fun)))) {
if (!all(c("object", "newdata") %in% names(formals(pred_fun)))) {
stop("`pred_fun()` must be a function with arguments `object` and ",
"`newdata`.", call. = FALSE)
}
Expand Down Expand Up @@ -229,17 +229,21 @@ vi_permute.default <- function(
pred_fun <- get_predictions(object, type = type)
}

}
# Determine reference class (binary classification only)
if (is.null(reference_class) && metric %in% c("auc", "logloss")) {
stop("Please specify the reference class via the `reference_class` ",
"argument when using \"auc\" or \"logloss\".")
}
if (!is.null(reference_class) && metric %in% c("auc", "logloss")) {
train_y <- ifelse(train_y == reference_class, yes = 1, no = 0)
}

# Determine reference class (classification only)
if (!is.null(pos_class)) {
obs <- ifelse(obs == pos_class, yes = 1, no = 0)
}

# Compute baseline metric for comparison
baseline <- perf_fun(
actual = obs,
predicted = pred_fun(object, newdata = train)
actual = train_y,
predicted = pred_fun(object, newdata = train_x)
)

# Construct VI scores
Expand All @@ -253,11 +257,14 @@ vi_permute.default <- function(
vis <- unlist(plyr::llply(feature_names, .progress = progress,
.parallel = parallel, .paropts = paropts,
.fun = function(x) {
copy <- train # make copy
copy[[x]] <- sample(copy[[x]]) # permute values
if (verbose && !parallel) {
message("Computing variable importance for ", x, "...")
}
train_x_permuted <- train_x # make copy
train_x_permuted[[x]] <- sample(train_x_permuted[[x]]) # permute values
permuted <- perf_fun(
actual = obs,
predicted = pred_fun(object, newdata = copy)
actual = train_y,
predicted = pred_fun(object, newdata = train_x_permuted)
)
if (smaller_is_better) {
permuted - baseline
Expand Down
4 changes: 2 additions & 2 deletions R/vip.R
Expand Up @@ -16,8 +16,8 @@
#' @param horizontal Logical indicating whether or not to plot the importance
#' scores on the x-axis (\code{TRUE}). Default is \code{TRUE}.
#'
#' @param alpha Numeric value between 0 and 1 giving the trasparency of the
#' bars.
#' @param alpha Numeric value between 0 and 1 giving the transparency of the
#' bars (\code{bar = TRUE}) or points (\code{bar = FALSE}).
#'
#' @param color Character string specifying the color to use for the borders of
#' the bars. Could also be a function, such as
Expand Down
2 changes: 1 addition & 1 deletion README.Rmd
Expand Up @@ -41,4 +41,4 @@ if (!requireNamespace("devtools")) {
devtools::install_github("koalaverse/vip")
```

For details and example usage, click the [Get started](https://koalaverse.github.io/vip/articles/vip.html) tab on the [`vip` package website](https://koalaverse.github.io/vip/index.html).
For details and example usage, visit the [**vip** package website](https://koalaverse.github.io/vip/index.html).
2 changes: 1 addition & 1 deletion man/vi.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/vi_ice.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/vi_pdp.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit f6915f3

Please sign in to comment.