Skip to content

Commit

Permalink
Merge pull request #4 from mattheaphy/trees
Browse files Browse the repository at this point in the history
Tree models
  • Loading branch information
mattheaphy committed Apr 4, 2024
2 parents 97ec2d8 + 59fb97b commit 9ab4aee
Show file tree
Hide file tree
Showing 32 changed files with 1,698 additions and 73 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,4 @@ next_actions.md
docs
CRAN-SUBMISSION
cran-comments.md
inst/doc
15 changes: 11 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: offsetreg
Title: An Extension of 'Tidymodels' Supporting Offset Terms
Version: 1.0.0
Version: 1.1.0
Authors@R:
person("Matt", "Heaphy", email = "mattrmattrs@gmail.com", role = c("aut", "cre", "cph"))
Maintainer: Matt Heaphy <mattrmattrs@gmail.com>
Expand All @@ -20,17 +20,24 @@ RoxygenNote: 7.3.0
Imports:
generics,
glue,
parsnip,
parsnip (>= 1.2.0),
poissonreg,
rlang,
stats
Suggests:
Suggests:
broom,
glmnet,
knitr,
recipes,
rmarkdown,
rpart,
testthat (>= 3.0.0),
tune,
workflows
workflows,
rsample,
xgboost
Config/testthat/edition: 3
Depends:
R (>= 4.1)
LazyData: true
VignetteBuilder: knitr
15 changes: 15 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,18 +1,33 @@
# Generated by roxygen2: do not edit by hand

S3method(check_args,boost_tree_offset)
S3method(check_args,poisson_reg_offset)
S3method(min_grid,boost_tree_offset)
S3method(min_grid,decision_tree_exposure)
S3method(min_grid,poisson_reg_offset)
S3method(print,boost_tree_offset)
S3method(print,decision_tree_exposure)
S3method(print,poisson_reg_offset)
S3method(translate,poisson_reg_offset)
S3method(update,boost_tree_offset)
S3method(update,decision_tree_exposure)
S3method(update,poisson_reg_offset)
export(boost_tree_offset)
export(check_args)
export(decision_tree_exposure)
export(glm_offset)
export(glmnet_offset)
export(min_grid)
export(poisson_reg_offset)
export(rpart_exposure)
export(translate)
export(xgb_predict_offset)
export(xgb_train_offset)
import(parsnip)
import(poissonreg)
importFrom(generics,min_grid)
importFrom(glue,glue)
importFrom(parsnip,check_args)
importFrom(parsnip,translate)
importFrom(rlang,eval_tidy)
importFrom(rlang,expr)
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# offsetreg 1.1.0

- `boost_tree_offset()` - new model specification for boosted ensembles of decision trees. Currently xgboost ("xgboost_offset") is supported.
- `decision_tree_exposure()` - new model specification for weighted decision trees with weighted exposures. Currently rpart ("rpart_exposure") is supported.
- Added a vignette on when offsetreg should and shouldn't be used.
- Added `check_args()` methods to various model specifications.


# offsetreg 1.0.0

- Initial CRAN release
Expand Down
140 changes: 140 additions & 0 deletions R/boost_tree_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
make_boost_tree_xgboost_offset <- function() {

set_model_engine(
"boost_tree_offset",
mode = "regression",
eng = "xgboost_offset")

set_dependency(
"boost_tree_offset",
eng = "xgboost_offset",
pkg = "xgboost",
mode = "regression"
)

set_dependency(
"boost_tree_offset",
eng = "xgboost_offset",
pkg = "offsetreg",
mode = "regression"
)

set_model_arg(
model = "boost_tree_offset",
eng = "xgboost_offset",
parsnip = "tree_depth",
original = "max_depth",
func = list(pkg = "dials", fun = "tree_depth"),
has_submodel = FALSE
)
set_model_arg(
model = "boost_tree_offset",
eng = "xgboost_offset",
parsnip = "trees",
original = "nrounds",
func = list(pkg = "dials", fun = "trees"),
has_submodel = TRUE
)
set_model_arg(
model = "boost_tree_offset",
eng = "xgboost_offset",
parsnip = "learn_rate",
original = "eta",
func = list(pkg = "dials", fun = "learn_rate"),
has_submodel = FALSE
)
set_model_arg(
model = "boost_tree_offset",
eng = "xgboost_offset",
parsnip = "mtry",
original = "colsample_bynode",
func = list(pkg = "dials", fun = "mtry"),
has_submodel = FALSE
)
set_model_arg(
model = "boost_tree_offset",
eng = "xgboost_offset",
parsnip = "min_n",
original = "min_child_weight",
func = list(pkg = "dials", fun = "min_n"),
has_submodel = FALSE
)
set_model_arg(
model = "boost_tree_offset",
eng = "xgboost_offset",
parsnip = "loss_reduction",
original = "gamma",
func = list(pkg = "dials", fun = "loss_reduction"),
has_submodel = FALSE
)
set_model_arg(
model = "boost_tree_offset",
eng = "xgboost_offset",
parsnip = "sample_size",
original = "subsample",
func = list(pkg = "dials", fun = "sample_size"),
has_submodel = FALSE
)
set_model_arg(
model = "boost_tree_offset",
eng = "xgboost_offset",
parsnip = "stop_iter",
original = "early_stop",
func = list(pkg = "dials", fun = "stop_iter"),
has_submodel = FALSE
)


set_fit(
model = "boost_tree_offset",
eng = "xgboost_offset",
mode = "regression",
value = list(
interface = "matrix",
protect = c("x", "y", "weights"),
func = c(pkg = "offsetreg", fun = "xgb_train_offset"),
defaults = list(nthread = 1, verbose = 0, offset_col = "offset")
)
)

set_encoding(
model = "boost_tree_offset",
eng = "xgboost_offset",
mode = "regression",
options = list(
predictor_indicators = "one_hot",
compute_intercept = FALSE,
remove_intercept = TRUE,
allow_sparse_x = TRUE
)
)

set_pred(
model = "boost_tree_offset",
eng = "xgboost_offset",
mode = "regression",
type = "numeric",
value = list(
pre = .predict_pre_offset_rename,
post = NULL,
func = c(fun = "xgb_predict_offset"),
args = list(object = expr(object$fit), new_data = expr(new_data),
offset_col = "offset")
)
)

set_pred(
model = "boost_tree_offset",
eng = "xgboost_offset",
mode = "regression",
type = "raw",
value = list(
pre = .predict_pre_offset_rename,
post = NULL,
func = c(fun = "xgb_predict_offset"),
args = list(object = expr(object$fit), new_data = expr(new_data),
offset_col = "offset")
)
)

}
131 changes: 131 additions & 0 deletions R/boost_tree_offset.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
#' Boosted Poisson Trees with Offsets
#'
#' `boost_tree_offset()` defines a model that creates a series of Poisson
#' decision trees with pre-defined offsets forming an ensemble. Each tree
#' depends on the results of previous trees. All trees in the ensemble are
#' combined to produce a final prediction. This function can be used for count
#' regression models only.
#'
#' This function is similar to [parsnip::boost_tree()] except that
#' specification of an offset column is required.
#'
#' @param mode A single character string for the type of model. The only
#' possible value for this model is "regression"
#' @inheritParams parsnip::boost_tree
#'
#' @return A model specification object with the classes `boost_tree_offset` and
#' `model_spec`.
#'
#' @examples
#' parsnip::show_model_info("boost_tree_offset")
#'
#' boost_tree_offset()
#'
#' @seealso [parsnip::boost_tree()]
#' @export
boost_tree_offset <- function(mode = "regression",
engine = "xgboost_offset",
mtry = NULL,
trees = NULL,
min_n = NULL,
tree_depth = NULL,
learn_rate = NULL,
loss_reduction = NULL,
sample_size = NULL,
stop_iter = NULL) {

if (mode != "regression") {
rlang::abort("`mode` should be 'regression'")
}

args <- list(mtry = rlang::enquo(mtry), trees = rlang::enquo(trees),
min_n = rlang::enquo(min_n),
tree_depth = rlang::enquo(tree_depth),
learn_rate = rlang::enquo(learn_rate),
loss_reduction = rlang::enquo(loss_reduction),
sample_size = rlang::enquo(sample_size),
stop_iter = rlang::enquo(stop_iter))

# Save some empty slots for future parts of the specification
new_model_spec(
"boost_tree_offset",
args = args,
eng_args = NULL,
mode = mode,
method = NULL,
engine = engine
)
}

make_boost_tree_offset <- function() {
if (is.null(get_model_env()[["boost_tree_offset"]])) {
set_new_model("boost_tree_offset")
set_model_mode(model = "boost_tree_offset", mode = "regression")
}
}

#' @export
print.boost_tree_offset <- function(x, ...) {
print_model_spec(x, desc = "Boosted Tree with Offsets", ...)

invisible(x)
}

# code from the parsnip package
#' @export
update.boost_tree_offset <- function(object,
parameters = NULL,
mtry = NULL, trees = NULL, min_n = NULL,
tree_depth = NULL, learn_rate = NULL,
loss_reduction = NULL, sample_size = NULL,
stop_iter = NULL,
fresh = FALSE, ...) {

args <- list(
mtry = rlang::enquo(mtry),
trees = rlang::enquo(trees),
min_n = rlang::enquo(min_n),
tree_depth = rlang::enquo(tree_depth),
learn_rate = rlang::enquo(learn_rate),
loss_reduction = rlang::enquo(loss_reduction),
sample_size = rlang::enquo(sample_size),
stop_iter = rlang::enquo(stop_iter)
)

update_spec(
object = object,
parameters = parameters,
args_enquo_list = args,
fresh = fresh,
cls = "boost_tree_offset",
...
)
}

# code from the parsnip package
#' @export
check_args.boost_tree_offset <- function(object) {

args <- lapply(object$args, rlang::eval_tidy)

if (is.numeric(args$trees) && args$trees < 0) {
rlang::abort("`trees` should be >= 1.")
}
if (is.numeric(args$sample_size) && (args$sample_size < 0 | args$sample_size > 1)) {
rlang::abort("`sample_size` should be within [0,1].")
}
if (is.numeric(args$tree_depth) && args$tree_depth < 0) {
rlang::abort("`tree_depth` should be >= 1.")
}
if (is.numeric(args$min_n) && args$min_n < 0) {
rlang::abort("`min_n` should be >= 1.")
}

invisible(object)
}

#' @export
min_grid.boost_tree_offset <- function(x, grid, ...) {
rlang::check_installed('tune')
tune::fit_max_value(x, grid, ...)
}
Loading

0 comments on commit 9ab4aee

Please sign in to comment.