Skip to content

Commit

Permalink
Merge pull request #1311 from hdrab127/master
Browse files Browse the repository at this point in the history
Add zero-inflated Beta-binomial distribution
  • Loading branch information
paul-buerkner committed Mar 10, 2022
2 parents 197b8eb + 6d672f6 commit 9a038e5
Show file tree
Hide file tree
Showing 21 changed files with 299 additions and 29 deletions.
7 changes: 5 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: brms
Encoding: UTF-8
Type: Package
Title: Bayesian Regression Models using 'Stan'
Version: 2.16.10
Version: 2.16.11
Date: 2022-03-07
Authors@R:
c(person("Paul-Christian", "Bürkner", email = "paul.buerkner@gmail.com",
Expand All @@ -13,7 +13,8 @@ Authors@R:
person("Martin", "Modrak", role = c("ctb")),
person("Hamada S.", "Badr", role = c("ctb")),
person("Frank", "Weber", role = c("ctb")),
person("Mattan S.", "Ben-Shachar", role = c("ctb")))
person("Mattan S.", "Ben-Shachar", role = c("ctb")),
person("Hayden", "Rabel", role = c("ctb")))
Depends:
R (>= 3.5.0),
Rcpp (>= 0.12.0),
Expand Down Expand Up @@ -48,6 +49,8 @@ Suggests:
projpred (>= 2.0.0),
RWiener,
rtdists,
extraDistr,
processx,
mice,
spdep,
mnormt,
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ export(dstudent_t)
export(dvon_mises)
export(dwiener)
export(dzero_inflated_beta)
export(dzero_inflated_beta_binomial)
export(dzero_inflated_binomial)
export(dzero_inflated_negbinomial)
export(dzero_inflated_poisson)
Expand Down Expand Up @@ -516,6 +517,7 @@ export(pskew_normal)
export(pstudent_t)
export(pvon_mises)
export(pzero_inflated_beta)
export(pzero_inflated_beta_binomial)
export(pzero_inflated_binomial)
export(pzero_inflated_negbinomial)
export(pzero_inflated_poisson)
Expand Down Expand Up @@ -587,6 +589,7 @@ export(waic)
export(weibull)
export(wiener)
export(zero_inflated_beta)
export(zero_inflated_beta_binomial)
export(zero_inflated_binomial)
export(zero_inflated_negbinomial)
export(zero_inflated_poisson)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
* Add family `logistic_normal` for simplex responses. (#1274)
* Add argument `future_args` to `kfold` and `reloo` for additional
control over parallel execution via futures.
* Add family `zero_inflated_beta_binomial` for potentially over-dispersed and
zero-inflated binomial response models thanks to Hayden Rabel. (#1311)

### Other changes

Expand Down
1 change: 1 addition & 0 deletions R/backends.R
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,7 @@ file_refit_options <- function() {
"--canonicalize=deprecations,braces,parentheses"
)
if (cmdstanr::cmdstan_version() >= "2.29.0") {
require_package("processx")
res <- processx::run(
command = stanc_cmd,
args = c(stan_file, stanc_flags),
Expand Down
30 changes: 30 additions & 0 deletions R/distributions.R
Original file line number Diff line number Diff line change
Expand Up @@ -1681,6 +1681,7 @@ pcox <- function(q, mu, bhaz, cbhaz, lower.tail = TRUE, log.p = FALSE) {
#' @param zi zero-inflation probability
#' @param mu,lambda location parameter
#' @param shape,shape1,shape2 shape parameter
#' @param phi precision parameter
#' @param size number of trials
#' @param prob probability of success on each trial
#'
Expand Down Expand Up @@ -1736,6 +1737,21 @@ pzero_inflated_binomial <- function(q, size, prob, zi, lower.tail = TRUE,
.pzero_inflated(q, "binom", zi, pars, lower.tail, log.p)
}

#' @rdname ZeroInflated
#' @export
dzero_inflated_beta_binomial <- function(x, size, mu, phi, zi, log = FALSE) {
pars <- nlist(size, mu, phi)
.dzero_inflated(x, "beta_binom", zi, pars, log)
}

#' @rdname ZeroInflated
#' @export
pzero_inflated_beta_binomial <- function(q, size, mu, phi, zi,
lower.tail = TRUE, log.p = FALSE) {
pars <- nlist(size, mu, phi)
.pzero_inflated(q, "beta_binom", zi, pars, lower.tail, log.p)
}

#' @rdname ZeroInflated
#' @export
dzero_inflated_beta <- function(x, shape1, shape2, zi, log = FALSE) {
Expand Down Expand Up @@ -2461,6 +2477,20 @@ pordinal <- function(q, eta, thres, disc = 1, family = NULL, link = "logit") {
cblapply(q, .fun)
}

# CDF of the beta-binomial distribution
dbeta_binom <- function(x, size, mu = 1, phi = 1, log = FALSE) {
require_package("extraDistr")
extraDistr::dbbinom(x, size, alpha = mu * phi, beta = (1 - mu) * phi, log = log)
}

# PMF of the beta-binomial distribution
pbeta_binom <- function(q, size, mu = 1, phi = 1, lower.tail = TRUE,
log.p = FALSE) {
require_package("extraDistr")
extraDistr::pbbinom(q, size, alpha = mu * phi, beta = (1 - mu) * phi,
lower.tail = lower.tail, log.p = log.p)
}

# helper functions to shift arbitrary distributions
dshifted <- function(dist, x, shift = 0, ...) {
do_call(paste0("d", dist), list(x - shift, ...))
Expand Down
32 changes: 21 additions & 11 deletions R/families.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
#' \code{multinomial}, \code{cumulative}, \code{cratio}, \code{sratio},
#' \code{acat}, \code{hurdle_poisson}, \code{hurdle_negbinomial},
#' \code{hurdle_gamma}, \code{hurdle_lognormal},
#' \code{zero_inflated_binomial}, \code{zero_inflated_beta},
#' \code{zero_inflated_negbinomial}, \code{zero_inflated_poisson}, and
#' \code{zero_one_inflated_beta}.
#' \code{zero_inflated_binomial}, \code{zero_inflated_beta_binomial},
#' \code{zero_inflated_beta}, \code{zero_inflated_negbinomial},
#' \code{zero_inflated_poisson}, and \code{zero_one_inflated_beta}.
#' @param link A specification for the model link function. This can be a
#' name/expression or character string. See the 'Details' section for more
#' information on link functions supported by each family.
Expand Down Expand Up @@ -108,10 +108,10 @@
#' \item{Families \code{hurdle_poisson}, \code{hurdle_negbinomial},
#' \code{hurdle_gamma}, \code{hurdle_lognormal}, \code{zero_inflated_poisson},
#' \code{zero_inflated_negbinomial}, \code{zero_inflated_binomial},
#' \code{zero_inflated_beta}, and \code{zero_one_inflated_beta}
#' allow to estimate zero-inflated and hurdle models.
#' These models can be very helpful when there are many zeros in the data
#' (or ones in case of one-inflated models)
#' \code{zero_inflated_beta_binomial}, \code{zero_inflated_beta}, and
#' \code{zero_one_inflated_beta} allow to estimate zero-inflated and hurdle
#' models. These models can be very helpful when there are many zeros in the
#' data (or ones in case of one-inflated models)
#' that cannot be explained by the primary distribution of the response.}
#' }
#'
Expand All @@ -129,9 +129,9 @@
#' \code{log}, \code{identity}, \code{sqrt}, and \code{softplus}.}
#'
#' \item{Families \code{binomial}, \code{bernoulli}, \code{Beta},
#' \code{zero_inflated_binomial}, \code{zero_inflated_beta},
#' and \code{zero_one_inflated_beta} support \code{logit},
#' \code{probit}, \code{probit_approx}, \code{cloglog},
#' \code{zero_inflated_binomial}, \code{zero_inflated_beta_binomial},
#' \code{zero_inflated_beta}, and \code{zero_one_inflated_beta} support
#' \code{logit}, \code{probit}, \code{probit_approx}, \code{cloglog},
#' \code{cauchit}, and \code{identity}.}
#'
#' \item{Families \code{cumulative}, \code{cratio}, \code{sratio},
Expand Down Expand Up @@ -761,6 +761,15 @@ zero_inflated_binomial <- function(link = "logit", link_zi = "logit") {
link_zi = link_zi)
}

#' @rdname brmsfamily
#' @export
zero_inflated_beta_binomial <- function(link = "logit", link_phi = "log",
link_zi = "logit") {
slink <- substitute(link)
.brmsfamily("zero_inflated_beta_binomial", link = link, slink = slink,
link_phi = link_phi, link_zi = link_zi)
}

#' @rdname brmsfamily
#' @export
categorical <- function(link = "logit", refcat = NULL) {
Expand Down Expand Up @@ -1834,7 +1843,8 @@ family_bounds.brmsterms <- function(x, ...) {
out <- list(lb = 0, ub = 1)
} else if (family %in% c("categorical", ordinal_families)) {
out <- list(lb = 1, ub = paste0("ncat", resp))
} else if (family %in% c("binomial", "zero_inflated_binomial")) {
} else if (family %in% c("binomial", "zero_inflated_binomial",
"zero_inflated_beta_binomial")) {
out <- list(lb = 0, ub = paste0("trials", resp))
} else if (family %in% "von_mises") {
out <- list(lb = -pi, ub = pi)
Expand Down
17 changes: 17 additions & 0 deletions R/family-lists.R
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,23 @@
)
}

.family_zero_inflated_beta_binomial <- function() {
list(
links = c(
"logit", "probit", "probit_approx",
"cloglog", "cauchit", "softit", "identity"
),
dpars = c("mu", "phi", "zi"),
type = "int",
ybounds = c(0, Inf),
closed = c(TRUE, NA),
ad = c("weights", "subset", "trials", "cens", "trunc", "index"),
include = "fun_zero_inflated_beta_binomial.stan",
specials = c("sbi_zi_logit"),
normalized = ""
)
}

.family_zero_inflated_beta <- function() {
list(
links = c(
Expand Down
13 changes: 12 additions & 1 deletion R/log_lik.R
Original file line number Diff line number Diff line change
Expand Up @@ -728,12 +728,23 @@ log_lik_zero_inflated_binomial <- function(i, prep) {
trials <- prep$data$trials[i]
mu <- get_dpar(prep, "mu", i)
zi <- get_dpar(prep, "zi", i)
args <- list(size = trials, prob = mu, zi)
args <- list(size = trials, prob = mu, zi = zi)
out <- log_lik_censor("zero_inflated_binomial", args, i, prep)
out <- log_lik_truncate(out, pzero_inflated_binomial, args, i, prep)
log_lik_weight(out, i = i, prep = prep)
}

log_lik_zero_inflated_beta_binomial <- function(i, prep) {
trials <- prep$data$trials[i]
mu <- get_dpar(prep, "mu", i)
phi <- get_dpar(prep, "phi", i)
zi <- get_dpar(prep, "zi", i)
args <- nlist(size = trials, mu, phi, zi)
out <- log_lik_censor("zero_inflated_beta_binomial", args, i, prep)
out <- log_lik_truncate(out, pzero_inflated_beta_binomial, args, i, prep)
log_lik_weight(out, i = i, prep = prep)
}

log_lik_zero_inflated_beta <- function(i, prep) {
zi <- get_dpar(prep, "zi", i)
mu <- get_dpar(prep, "mu", i)
Expand Down
6 changes: 6 additions & 0 deletions R/posterior_epred.R
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,12 @@ posterior_epred_zero_inflated_binomial <- function(prep) {
prep$dpars$mu * trials * (1 - prep$dpars$zi)
}

posterior_epred_zero_inflated_beta_binomial <- function(prep) {
# same as zero_inflated_binom as beta part is included in mu
trials <- data2draws(prep$data$trials, dim_mu(prep))
prep$dpars$mu * trials * (1 - prep$dpars$zi)
}

posterior_epred_zero_inflated_beta <- function(prep) {
with(prep$dpars, mu * (1 - zi))
}
Expand Down
16 changes: 15 additions & 1 deletion R/posterior_predict.R
Original file line number Diff line number Diff line change
Expand Up @@ -809,7 +809,7 @@ posterior_predict_zero_inflated_negbinomial <- function(i, prep, ...) {
}

posterior_predict_zero_inflated_binomial <- function(i, prep, ...) {
# theta is the bernoulii zero-inflation parameter
# theta is the bernoulli zero-inflation parameter
theta <- get_dpar(prep, "zi", i = i)
trials <- prep$data$trials[i]
prob <- get_dpar(prep, "mu", i = i)
Expand All @@ -819,6 +819,20 @@ posterior_predict_zero_inflated_binomial <- function(i, prep, ...) {
ifelse(zi < theta, 0, rbinom(ndraws, size = trials, prob = prob))
}

posterior_predict_zero_inflated_beta_binomial <- function(i, prep, ...) {
# theta is the bernoulli zero-inflation parameter
theta <- get_dpar(prep, "zi", i = i)
trials <- prep$data$trials[i]
mu <- get_dpar(prep, "mu", i = i)
phi <- get_dpar(prep, "phi", i = i)
ndraws <- prep$ndraws
# beta location-scale probabilities
probs <- rbeta(ndraws, mu * phi, (1 - mu) * phi)
# compare with theta to incorporate the zero-inflation process
zi <- runif(ndraws, 0, 1)
ifelse(zi < theta, 0, rbinom(ndraws, size = trials, prob = probs))
}

posterior_predict_categorical <- function(i, prep, ...) {
eta <- get_Mu(prep, i = i)
eta <- insert_refcat(eta, refcat = prep$refcat)
Expand Down
12 changes: 12 additions & 0 deletions R/stan-likelihood.R
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,18 @@ stan_log_lik_zero_inflated_binomial <- function(bterms, resp = "", mix = "",
sdist(lpdf, p$trials, p$mu, p$zi)
}

stan_log_lik_zero_inflated_beta_binomial <- function(bterms, resp = "",
mix = "", threads = NULL,
...) {
p <- stan_log_lik_dpars(bterms, TRUE, resp, mix)
n <- stan_nn(threads)
p$trials <- paste0("trials", resp, n)
lpdf <- "zero_inflated_beta_binomial"
lpdf <- stan_log_lik_simple_lpdf(lpdf, "logit", bterms, sep = "_b")
lpdf <- paste0(lpdf, stan_log_lik_dpar_usc_logit("zi", bterms))
sdist(lpdf, p$trials, p$mu, p$phi, p$zi)
}

stan_log_lik_zero_inflated_beta <- function(bterms, resp = "", mix = "", ...) {
p <- stan_log_lik_dpars(bterms, TRUE, resp, mix)
usc_logit <- stan_log_lik_dpar_usc_logit("zi", bterms)
Expand Down
97 changes: 97 additions & 0 deletions inst/chunks/fun_zero_inflated_beta_binomial.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/* zero-inflated beta-binomial log-PDF of a single response
* Args:
* y: the response value
* trials: number of trials of the binomial part
* mu: mean parameter of the beta distribution
* phi: precision parameter of the beta distribution
* zi: zero-inflation probability
* Returns:
* a scalar to be added to the log posterior
*/
real zero_inflated_beta_binomial_lpmf(int y, int trials,
real mu, real phi, real zi) {
if (y == 0) {
return log_sum_exp(bernoulli_lpmf(1 | zi),
bernoulli_lpmf(0 | zi) +
beta_binomial_lpmf(0 | trials,
mu * phi,
(1 - mu) * phi));
} else {
return bernoulli_lpmf(0 | zi) +
beta_binomial_lpmf(y | trials, mu * phi, (1 - mu) * phi);
}
}
/* zero-inflated beta-binomial log-PDF of a single response
* logit parameterization of the zero-inflation part
* Args:
* y: the response value
* trials: number of trials of the binomial part
* mu: mean parameter of the beta distribution
* phi: precision parameter of the beta distribution
* zi: linear predictor for zero-inflation part
* Returns:
* a scalar to be added to the log posterior
*/
real zero_inflated_beta_binomial_logit_lpmf(int y, int trials,
real mu, real phi, real zi) {
if (y == 0) {
return log_sum_exp(bernoulli_logit_lpmf(1 | zi),
bernoulli_logit_lpmf(0 | zi) +
beta_binomial_lpmf(0 | trials,
mu * phi,
(1 - mu) * phi));
} else {
return bernoulli_logit_lpmf(0 | zi) +
beta_binomial_lpmf(y | trials, mu * phi, (1 - mu) * phi);
}
}
/* zero-inflated beta-binomial log-PDF of a single response
* logit parameterization of the binomial part
* Args:
* y: the response value
* trials: number of trials of the binomial part
* eta: linear predictor for the beta part
* phi: precision parameter of the beta distribution
* zi: zero-inflation probability
* Returns:
* a scalar to be added to the log posterior
*/
real zero_inflated_beta_binomial_blogit_lpmf(int y, int trials,
real eta, real phi, real zi) {
if (y == 0) {
return log_sum_exp(bernoulli_lpmf(1 | zi),
bernoulli_lpmf(0 | zi) +
beta_binomial_lpmf(0 | trials,
eta * phi,
(1 - eta) * phi));
} else {
return bernoulli_lpmf(0 | zi) +
beta_binomial_lpmf(y | trials, eta * phi, (1 - eta) * phi);
}
}
/* zero-inflated beta-binomial log-PDF of a single response
* logit parameterization of the binomial part
* logit parameterization of the zero-inflation part
* Args:
* y: the response value
* trials: number of trials of the binomial part
* eta: linear predictor for the beta part
* phi: precision parameter of the beta distribution
* zi: linear predictor for zero-inflation part
* Returns:
* a scalar to be added to the log posterior
*/
real zero_inflated_beta_binomial_blogit_logit_lpmf(int y, int trials,
real eta, real phi,
real zi) {
if (y == 0) {
return log_sum_exp(bernoulli_logit_lpmf(1 | zi),
bernoulli_logit_lpmf(0 | zi) +
beta_binomial_lpmf(0 | trials,
eta * phi,
(1 - eta) * phi));
} else {
return bernoulli_logit_lpmf(0 | zi) +
beta_binomial_lpmf(y | trials, eta * phi, (1 - eta) * phi);
}
}

0 comments on commit 9a038e5

Please sign in to comment.