Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add zero-inflated Beta-binomial distribution #1311

Merged
merged 10 commits into from
Mar 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 5 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: brms
Encoding: UTF-8
Type: Package
Title: Bayesian Regression Models using 'Stan'
Version: 2.16.10
Version: 2.16.11
Date: 2022-03-07
Authors@R:
c(person("Paul-Christian", "Bürkner", email = "paul.buerkner@gmail.com",
Expand All @@ -13,7 +13,8 @@ Authors@R:
person("Martin", "Modrak", role = c("ctb")),
person("Hamada S.", "Badr", role = c("ctb")),
person("Frank", "Weber", role = c("ctb")),
person("Mattan S.", "Ben-Shachar", role = c("ctb")))
person("Mattan S.", "Ben-Shachar", role = c("ctb")),
person("Hayden", "Rabel", role = c("ctb")))
Depends:
R (>= 3.5.0),
Rcpp (>= 0.12.0),
Expand Down Expand Up @@ -48,6 +49,8 @@ Suggests:
projpred (>= 2.0.0),
RWiener,
rtdists,
extraDistr,
processx,
mice,
spdep,
mnormt,
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ export(dstudent_t)
export(dvon_mises)
export(dwiener)
export(dzero_inflated_beta)
export(dzero_inflated_beta_binomial)
export(dzero_inflated_binomial)
export(dzero_inflated_negbinomial)
export(dzero_inflated_poisson)
Expand Down Expand Up @@ -516,6 +517,7 @@ export(pskew_normal)
export(pstudent_t)
export(pvon_mises)
export(pzero_inflated_beta)
export(pzero_inflated_beta_binomial)
export(pzero_inflated_binomial)
export(pzero_inflated_negbinomial)
export(pzero_inflated_poisson)
Expand Down Expand Up @@ -587,6 +589,7 @@ export(waic)
export(weibull)
export(wiener)
export(zero_inflated_beta)
export(zero_inflated_beta_binomial)
export(zero_inflated_binomial)
export(zero_inflated_negbinomial)
export(zero_inflated_poisson)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
* Add family `logistic_normal` for simplex responses. (#1274)
* Add argument `future_args` to `kfold` and `reloo` for additional
control over parallel execution via futures.
* Add family `zero_inflated_beta_binomial` for potentially over-dispersed and
zero-inflated binomial response models thanks to Hayden Rabel. (#1311)

### Other changes

Expand Down
1 change: 1 addition & 0 deletions R/backends.R
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,7 @@ file_refit_options <- function() {
"--canonicalize=deprecations,braces,parentheses"
)
if (cmdstanr::cmdstan_version() >= "2.29.0") {
require_package("processx")
res <- processx::run(
command = stanc_cmd,
args = c(stan_file, stanc_flags),
Expand Down
30 changes: 30 additions & 0 deletions R/distributions.R
Original file line number Diff line number Diff line change
Expand Up @@ -1681,6 +1681,7 @@ pcox <- function(q, mu, bhaz, cbhaz, lower.tail = TRUE, log.p = FALSE) {
#' @param zi zero-inflation probability
#' @param mu,lambda location parameter
#' @param shape,shape1,shape2 shape parameter
#' @param phi precision parameter
#' @param size number of trials
#' @param prob probability of success on each trial
#'
Expand Down Expand Up @@ -1736,6 +1737,21 @@ pzero_inflated_binomial <- function(q, size, prob, zi, lower.tail = TRUE,
.pzero_inflated(q, "binom", zi, pars, lower.tail, log.p)
}

#' @rdname ZeroInflated
#' @export
dzero_inflated_beta_binomial <- function(x, size, mu, phi, zi, log = FALSE) {
pars <- nlist(size, mu, phi)
.dzero_inflated(x, "beta_binom", zi, pars, log)
}

#' @rdname ZeroInflated
#' @export
pzero_inflated_beta_binomial <- function(q, size, mu, phi, zi,
lower.tail = TRUE, log.p = FALSE) {
pars <- nlist(size, mu, phi)
.pzero_inflated(q, "beta_binom", zi, pars, lower.tail, log.p)
}

#' @rdname ZeroInflated
#' @export
dzero_inflated_beta <- function(x, shape1, shape2, zi, log = FALSE) {
Expand Down Expand Up @@ -2461,6 +2477,20 @@ pordinal <- function(q, eta, thres, disc = 1, family = NULL, link = "logit") {
cblapply(q, .fun)
}

# CDF of the beta-binomial distribution
dbeta_binom <- function(x, size, mu = 1, phi = 1, log = FALSE) {
require_package("extraDistr")
extraDistr::dbbinom(x, size, alpha = mu * phi, beta = (1 - mu) * phi, log = log)
}

# PMF of the beta-binomial distribution
pbeta_binom <- function(q, size, mu = 1, phi = 1, lower.tail = TRUE,
log.p = FALSE) {
require_package("extraDistr")
extraDistr::pbbinom(q, size, alpha = mu * phi, beta = (1 - mu) * phi,
lower.tail = lower.tail, log.p = log.p)
}

# helper functions to shift arbitrary distributions
dshifted <- function(dist, x, shift = 0, ...) {
do_call(paste0("d", dist), list(x - shift, ...))
Expand Down
32 changes: 21 additions & 11 deletions R/families.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
#' \code{multinomial}, \code{cumulative}, \code{cratio}, \code{sratio},
#' \code{acat}, \code{hurdle_poisson}, \code{hurdle_negbinomial},
#' \code{hurdle_gamma}, \code{hurdle_lognormal},
#' \code{zero_inflated_binomial}, \code{zero_inflated_beta},
#' \code{zero_inflated_negbinomial}, \code{zero_inflated_poisson}, and
#' \code{zero_one_inflated_beta}.
#' \code{zero_inflated_binomial}, \code{zero_inflated_beta_binomial},
#' \code{zero_inflated_beta}, \code{zero_inflated_negbinomial},
#' \code{zero_inflated_poisson}, and \code{zero_one_inflated_beta}.
#' @param link A specification for the model link function. This can be a
#' name/expression or character string. See the 'Details' section for more
#' information on link functions supported by each family.
Expand Down Expand Up @@ -108,10 +108,10 @@
#' \item{Families \code{hurdle_poisson}, \code{hurdle_negbinomial},
#' \code{hurdle_gamma}, \code{hurdle_lognormal}, \code{zero_inflated_poisson},
#' \code{zero_inflated_negbinomial}, \code{zero_inflated_binomial},
#' \code{zero_inflated_beta}, and \code{zero_one_inflated_beta}
#' allow to estimate zero-inflated and hurdle models.
#' These models can be very helpful when there are many zeros in the data
#' (or ones in case of one-inflated models)
#' \code{zero_inflated_beta_binomial}, \code{zero_inflated_beta}, and
#' \code{zero_one_inflated_beta} allow to estimate zero-inflated and hurdle
#' models. These models can be very helpful when there are many zeros in the
#' data (or ones in case of one-inflated models)
#' that cannot be explained by the primary distribution of the response.}
#' }
#'
Expand All @@ -129,9 +129,9 @@
#' \code{log}, \code{identity}, \code{sqrt}, and \code{softplus}.}
#'
#' \item{Families \code{binomial}, \code{bernoulli}, \code{Beta},
#' \code{zero_inflated_binomial}, \code{zero_inflated_beta},
#' and \code{zero_one_inflated_beta} support \code{logit},
#' \code{probit}, \code{probit_approx}, \code{cloglog},
#' \code{zero_inflated_binomial}, \code{zero_inflated_beta_binomial},
#' \code{zero_inflated_beta}, and \code{zero_one_inflated_beta} support
#' \code{logit}, \code{probit}, \code{probit_approx}, \code{cloglog},
#' \code{cauchit}, and \code{identity}.}
#'
#' \item{Families \code{cumulative}, \code{cratio}, \code{sratio},
Expand Down Expand Up @@ -761,6 +761,15 @@ zero_inflated_binomial <- function(link = "logit", link_zi = "logit") {
link_zi = link_zi)
}

#' @rdname brmsfamily
#' @export
zero_inflated_beta_binomial <- function(link = "logit", link_phi = "log",
link_zi = "logit") {
slink <- substitute(link)
.brmsfamily("zero_inflated_beta_binomial", link = link, slink = slink,
link_phi = link_phi, link_zi = link_zi)
}

#' @rdname brmsfamily
#' @export
categorical <- function(link = "logit", refcat = NULL) {
Expand Down Expand Up @@ -1834,7 +1843,8 @@ family_bounds.brmsterms <- function(x, ...) {
out <- list(lb = 0, ub = 1)
} else if (family %in% c("categorical", ordinal_families)) {
out <- list(lb = 1, ub = paste0("ncat", resp))
} else if (family %in% c("binomial", "zero_inflated_binomial")) {
} else if (family %in% c("binomial", "zero_inflated_binomial",
"zero_inflated_beta_binomial")) {
out <- list(lb = 0, ub = paste0("trials", resp))
} else if (family %in% "von_mises") {
out <- list(lb = -pi, ub = pi)
Expand Down
17 changes: 17 additions & 0 deletions R/family-lists.R
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,23 @@
)
}

.family_zero_inflated_beta_binomial <- function() {
list(
links = c(
"logit", "probit", "probit_approx",
"cloglog", "cauchit", "softit", "identity"
),
dpars = c("mu", "phi", "zi"),
type = "int",
ybounds = c(0, Inf),
closed = c(TRUE, NA),
ad = c("weights", "subset", "trials", "cens", "trunc", "index"),
include = "fun_zero_inflated_beta_binomial.stan",
specials = c("sbi_zi_logit"),
normalized = ""
)
}

.family_zero_inflated_beta <- function() {
list(
links = c(
Expand Down
13 changes: 12 additions & 1 deletion R/log_lik.R
Original file line number Diff line number Diff line change
Expand Up @@ -728,12 +728,23 @@ log_lik_zero_inflated_binomial <- function(i, prep) {
trials <- prep$data$trials[i]
mu <- get_dpar(prep, "mu", i)
zi <- get_dpar(prep, "zi", i)
args <- list(size = trials, prob = mu, zi)
args <- list(size = trials, prob = mu, zi = zi)
out <- log_lik_censor("zero_inflated_binomial", args, i, prep)
out <- log_lik_truncate(out, pzero_inflated_binomial, args, i, prep)
log_lik_weight(out, i = i, prep = prep)
}

log_lik_zero_inflated_beta_binomial <- function(i, prep) {
trials <- prep$data$trials[i]
mu <- get_dpar(prep, "mu", i)
phi <- get_dpar(prep, "phi", i)
zi <- get_dpar(prep, "zi", i)
args <- nlist(size = trials, mu, phi, zi)
out <- log_lik_censor("zero_inflated_beta_binomial", args, i, prep)
out <- log_lik_truncate(out, pzero_inflated_beta_binomial, args, i, prep)
log_lik_weight(out, i = i, prep = prep)
}

log_lik_zero_inflated_beta <- function(i, prep) {
zi <- get_dpar(prep, "zi", i)
mu <- get_dpar(prep, "mu", i)
Expand Down
6 changes: 6 additions & 0 deletions R/posterior_epred.R
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,12 @@ posterior_epred_zero_inflated_binomial <- function(prep) {
prep$dpars$mu * trials * (1 - prep$dpars$zi)
}

posterior_epred_zero_inflated_beta_binomial <- function(prep) {
# same as zero_inflated_binom as beta part is included in mu
trials <- data2draws(prep$data$trials, dim_mu(prep))
prep$dpars$mu * trials * (1 - prep$dpars$zi)
}

posterior_epred_zero_inflated_beta <- function(prep) {
with(prep$dpars, mu * (1 - zi))
}
Expand Down
16 changes: 15 additions & 1 deletion R/posterior_predict.R
Original file line number Diff line number Diff line change
Expand Up @@ -809,7 +809,7 @@ posterior_predict_zero_inflated_negbinomial <- function(i, prep, ...) {
}

posterior_predict_zero_inflated_binomial <- function(i, prep, ...) {
# theta is the bernoulii zero-inflation parameter
# theta is the bernoulli zero-inflation parameter
theta <- get_dpar(prep, "zi", i = i)
trials <- prep$data$trials[i]
prob <- get_dpar(prep, "mu", i = i)
Expand All @@ -819,6 +819,20 @@ posterior_predict_zero_inflated_binomial <- function(i, prep, ...) {
ifelse(zi < theta, 0, rbinom(ndraws, size = trials, prob = prob))
}

posterior_predict_zero_inflated_beta_binomial <- function(i, prep, ...) {
# theta is the bernoulli zero-inflation parameter
theta <- get_dpar(prep, "zi", i = i)
trials <- prep$data$trials[i]
mu <- get_dpar(prep, "mu", i = i)
phi <- get_dpar(prep, "phi", i = i)
ndraws <- prep$ndraws
# beta location-scale probabilities
probs <- rbeta(ndraws, mu * phi, (1 - mu) * phi)
# compare with theta to incorporate the zero-inflation process
zi <- runif(ndraws, 0, 1)
ifelse(zi < theta, 0, rbinom(ndraws, size = trials, prob = probs))
}

posterior_predict_categorical <- function(i, prep, ...) {
eta <- get_Mu(prep, i = i)
eta <- insert_refcat(eta, refcat = prep$refcat)
Expand Down
12 changes: 12 additions & 0 deletions R/stan-likelihood.R
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,18 @@ stan_log_lik_zero_inflated_binomial <- function(bterms, resp = "", mix = "",
sdist(lpdf, p$trials, p$mu, p$zi)
}

stan_log_lik_zero_inflated_beta_binomial <- function(bterms, resp = "",
mix = "", threads = NULL,
...) {
p <- stan_log_lik_dpars(bterms, TRUE, resp, mix)
n <- stan_nn(threads)
p$trials <- paste0("trials", resp, n)
lpdf <- "zero_inflated_beta_binomial"
lpdf <- stan_log_lik_simple_lpdf(lpdf, "logit", bterms, sep = "_b")
lpdf <- paste0(lpdf, stan_log_lik_dpar_usc_logit("zi", bterms))
sdist(lpdf, p$trials, p$mu, p$phi, p$zi)
}

stan_log_lik_zero_inflated_beta <- function(bterms, resp = "", mix = "", ...) {
p <- stan_log_lik_dpars(bterms, TRUE, resp, mix)
usc_logit <- stan_log_lik_dpar_usc_logit("zi", bterms)
Expand Down
97 changes: 97 additions & 0 deletions inst/chunks/fun_zero_inflated_beta_binomial.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/* zero-inflated beta-binomial log-PDF of a single response
* Args:
* y: the response value
* trials: number of trials of the binomial part
* mu: mean parameter of the beta distribution
* phi: precision parameter of the beta distribution
* zi: zero-inflation probability
* Returns:
* a scalar to be added to the log posterior
*/
real zero_inflated_beta_binomial_lpmf(int y, int trials,
real mu, real phi, real zi) {
if (y == 0) {
return log_sum_exp(bernoulli_lpmf(1 | zi),
bernoulli_lpmf(0 | zi) +
beta_binomial_lpmf(0 | trials,
mu * phi,
(1 - mu) * phi));
} else {
return bernoulli_lpmf(0 | zi) +
beta_binomial_lpmf(y | trials, mu * phi, (1 - mu) * phi);
}
}
/* zero-inflated beta-binomial log-PDF of a single response
* logit parameterization of the zero-inflation part
* Args:
* y: the response value
* trials: number of trials of the binomial part
* mu: mean parameter of the beta distribution
* phi: precision parameter of the beta distribution
* zi: linear predictor for zero-inflation part
* Returns:
* a scalar to be added to the log posterior
*/
real zero_inflated_beta_binomial_logit_lpmf(int y, int trials,
real mu, real phi, real zi) {
if (y == 0) {
return log_sum_exp(bernoulli_logit_lpmf(1 | zi),
bernoulli_logit_lpmf(0 | zi) +
beta_binomial_lpmf(0 | trials,
mu * phi,
(1 - mu) * phi));
} else {
return bernoulli_logit_lpmf(0 | zi) +
beta_binomial_lpmf(y | trials, mu * phi, (1 - mu) * phi);
}
}
/* zero-inflated beta-binomial log-PDF of a single response
* logit parameterization of the binomial part
* Args:
* y: the response value
* trials: number of trials of the binomial part
* eta: linear predictor for the beta part
* phi: precision parameter of the beta distribution
* zi: zero-inflation probability
* Returns:
* a scalar to be added to the log posterior
*/
real zero_inflated_beta_binomial_blogit_lpmf(int y, int trials,
real eta, real phi, real zi) {
if (y == 0) {
return log_sum_exp(bernoulli_lpmf(1 | zi),
bernoulli_lpmf(0 | zi) +
beta_binomial_lpmf(0 | trials,
eta * phi,
(1 - eta) * phi));
} else {
return bernoulli_lpmf(0 | zi) +
beta_binomial_lpmf(y | trials, eta * phi, (1 - eta) * phi);
}
}
/* zero-inflated beta-binomial log-PDF of a single response
* logit parameterization of the binomial part
* logit parameterization of the zero-inflation part
* Args:
* y: the response value
* trials: number of trials of the binomial part
* eta: linear predictor for the beta part
* phi: precision parameter of the beta distribution
* zi: linear predictor for zero-inflation part
* Returns:
* a scalar to be added to the log posterior
*/
real zero_inflated_beta_binomial_blogit_logit_lpmf(int y, int trials,
real eta, real phi,
real zi) {
if (y == 0) {
return log_sum_exp(bernoulli_logit_lpmf(1 | zi),
bernoulli_logit_lpmf(0 | zi) +
beta_binomial_lpmf(0 | trials,
eta * phi,
(1 - eta) * phi));
} else {
return bernoulli_logit_lpmf(0 | zi) +
beta_binomial_lpmf(y | trials, eta * phi, (1 - eta) * phi);
}
}