Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates for Regression and Other Stories #432

Merged
merged 27 commits into from
Jul 8, 2020
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
8d43c7f
prior scale for intercept now 2.5 instead of 10
jgabry Apr 27, 2020
8bfe1d2
change autoscale usage
jgabry Apr 27, 2020
0973436
add default_prior_intercept, default_prior_coef
jgabry Apr 27, 2020
dd800ab
Update priors.R
jgabry Apr 27, 2020
76a3806
Update NAMESPACE
jgabry Apr 27, 2020
fa19d13
introduce posterior_epred
bgoodri May 3, 2020
7add53f
predict.R: remove error message for RAOS
bgoodri May 3, 2020
90d49bc
add (currently unused) family argument to default_prior_*
jgabry May 14, 2020
b469f1e
update posterior_linpred/epred doc
jgabry May 14, 2020
92d10a6
import posterior_epred generic from rstantools
jgabry May 14, 2020
fef4102
use posterior_epred instead of posterior_linpred in *_R2() functions
jgabry May 15, 2020
dfd7d33
use posterior_epred instead of linpred with transform
jgabry May 15, 2020
6ebfa90
logit and invlogit convenience functions
jgabry May 18, 2020
cc050a0
Update DESCRIPTION
jgabry Jun 3, 2020
64126fc
Merge branch 'master' into new-default-prior
jgabry Jun 10, 2020
c5b67a4
Update posterior_linpred.R
jgabry Jun 21, 2020
f97753c
always divide by sd(x) (unless x is constant)
jgabry Jun 26, 2020
eb5cbd4
Update priors.Rmd
jgabry Jun 26, 2020
4c3b8e2
Merge branch 'new-default-prior' of https://github.com/stan-dev/rstan…
jgabry Jun 26, 2020
dcf587a
more doc updates after updating default priors
jgabry Jun 26, 2020
5a43cad
Add note at top of priors vignette about changes
jgabry Jun 26, 2020
27e9988
remove autoscale=FALSE from vignettes
jgabry Jun 26, 2020
efc6d16
Update zzz.R
jgabry Jun 27, 2020
781a131
mention version number in vignette
jgabry Jun 29, 2020
3a412a9
averages -> expectations in posterior_epred doc
jgabry Jun 29, 2020
8708832
update message and doc for posterior_linpred(transform=TRUE)
jgabry Jul 1, 2020
5e8f452
Merge branch 'master' into new-default-prior
bgoodri Jul 8, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ Imports:
Matrix (>= 1.2-13),
nlme (>= 3.1-124),
rstan (>= 2.19.1),
rstantools (>= 2.0.0),
rstantools (>= 2.1.0),
shinystan (>= 2.3.0),
stats,
survival (>= 2.40.1),
Expand All @@ -68,4 +68,4 @@ LazyData: true
NeedsCompilation: yes
URL: https://mc-stan.org/rstanarm/, https://discourse.mc-stan.org
BugReports: https://github.com/stan-dev/rstanarm/issues
RoxygenNote: 7.0.2
RoxygenNote: 7.1.0
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ S3method(pairs,stanreg)
S3method(plot,predict.stanjm)
S3method(plot,stanreg)
S3method(plot,survfit.stanjm)
S3method(posterior_epred,stanreg)
S3method(posterior_interval,stanreg)
S3method(posterior_linpred,stanreg)
S3method(posterior_predict,stanmvreg)
Expand Down Expand Up @@ -95,6 +96,8 @@ export(bayes_R2)
export(cauchy)
export(compare_models)
export(decov)
export(default_prior_coef)
export(default_prior_intercept)
export(dirichlet)
export(exponential)
export(fixef)
Expand All @@ -103,12 +106,14 @@ export(get_y)
export(get_z)
export(hs)
export(hs_plus)
export(invlogit)
export(kfold)
export(laplace)
export(lasso)
export(launch_shinystan)
export(lkj)
export(log_lik)
export(logit)
export(loo)
export(loo_R2)
export(loo_compare)
Expand All @@ -124,6 +129,7 @@ export(pairs_condition)
export(pairs_style_np)
export(plot_nonlinear)
export(plot_stack_jm)
export(posterior_epred)
export(posterior_interval)
export(posterior_linpred)
export(posterior_predict)
Expand Down
6 changes: 3 additions & 3 deletions R/bayes_R2.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#' @templateVar stanregArg object
#' @template args-stanreg-object
#' @param re.form For models with group-level terms, \code{re.form} is
#' passed to \code{\link{posterior_linpred}} if specified.
#' passed to \code{\link{posterior_epred}} if specified.
#' @param ... Currently ignored.
#'
#' @return A vector of R-squared values with length equal to the posterior
Expand Down Expand Up @@ -53,7 +53,7 @@ bayes_R2.stanreg <- function(object, ..., re.form = NULL) {
stop("bayes_R2 is only available for Gaussian and binomial models.")
}

mu_pred <- posterior_linpred(object, transform = TRUE, re.form = re.form)
mu_pred <- posterior_epred(object, re.form = re.form)
if (is.binomial(fam)) {
y <- get_y(object)
if (NCOL(y) == 2) {
Expand Down Expand Up @@ -99,7 +99,7 @@ loo_R2.stanreg <- function(object, ...) {
psis_object <- loo::psis(log_ratios, r_eff = NA)
}

mu_pred <- posterior_linpred(object, transform = TRUE)
mu_pred <- posterior_epred(object)
if (is.binomial(fam)) {
if (is.factor(y)) {
y <- fac2bin(y)
Expand Down
2 changes: 1 addition & 1 deletion R/doc-rstanarm-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
#' @import bayesplot
#' @import shinystan
#' @import rstantools
#' @export log_lik posterior_linpred posterior_predict posterior_interval
#' @export log_lik posterior_linpred posterior_epred posterior_predict posterior_interval
#' @export predictive_interval predictive_error prior_summary bayes_R2
#' @export loo_linpred loo_predict loo_predictive_interval loo_R2
#' @export loo waic kfold loo_compare
Expand Down
12 changes: 12 additions & 0 deletions R/misc.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,18 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.


#' Logit and inverse logit
#'
#' @export
#' @param x Numeric vector.
#' @return A numeric vector the same length as \code{x}.
logit <- function(x) stats::qlogis(x)

#' @rdname logit
#' @export
invlogit <- function(x) stats::plogis(x)

# Set arguments for sampling
#
# Prepare a list of arguments to use with \code{rstan::sampling} via
Expand Down
39 changes: 33 additions & 6 deletions R/posterior_linpred.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,24 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

#' Posterior distribution of the linear predictor
#' Posterior distribution of the (possibly transformed) linear predictor
#'
#' Extract the posterior draws of the linear predictor, possibly transformed by
#' the inverse-link function. This function is occasionally useful, but it
#' should be used sparingly. Inference and model checking should generally be
#' carried out using the posterior predictive distribution (i.e., using
#' \code{\link{posterior_predict}}).
#'
#' @aliases posterior_linpred
#' @aliases posterior_linpred posterior_epred
#' @export
#'
#' @templateVar stanregArg object
#' @template args-stanreg-object
#' @param transform Should the linear predictor be transformed using the
#' inverse-link function? The default is \code{FALSE}, in which case the
#' untransformed linear predictor is returned.
#' untransformed linear predictor is returned. The non-default behavior
#' of \code{TRUE} is somewhat deprecated since \code{posterior_epred}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does "somewhat deprecated" mean? Is it possible to be more specific?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question. @bgoodri by "somewhat deprecated" did you just mean that we don't recommend it anymore but we're not going to throw a warning (I think it's just a message currently not a warning)? Is there a reason not to deprecate it with a proper warning?

#' should be called instead.
#' @param newdata,draws,re.form,offset Same as for \code{\link{posterior_predict}}.
#' @param XZ If \code{TRUE} then instead of computing the linear predictor the
#' design matrix \code{X} (or \code{cbind(X,Z)} for models with group-specific
Expand All @@ -43,6 +45,14 @@
#' transformed) linear predictor. The exception is if the argument \code{XZ}
#' is set to \code{TRUE} (see the \code{XZ} argument description above).
#'
#' @details The \code{posterior_linpred} function returns the posterior
#' distribution of the linear predictor, while the \code{posterior_epred}
#' function returns the posterior distribution of the conditional expectation.
#' In the special case of a Gaussian likelihood with an identity link
#' function, these two concepts are the same. The \code{posterior_epred}
#' function is a less noisy way obtain averages over the output of
jgabry marked this conversation as resolved.
Show resolved Hide resolved
#' \code{\link{posterior_predict}}.
#'
#' @note For models estimated with \code{\link{stan_clogit}}, the number of
#' successes per stratum is ostensibly fixed by the research design. Thus,
#' when calling \code{posterior_linpred} with new data and \code{transform =
Expand All @@ -63,11 +73,11 @@
#' colMeans(linpred)
#'
#' # probabilities
#' probs <- posterior_linpred(example_model, transform = TRUE)
#' probs <- posterior_epred(example_model)
#' colMeans(probs)
#'
#' # not conditioning on any group-level parameters
#' probs2 <- posterior_linpred(example_model, transform = TRUE, re.form = NA)
#' probs2 <- posterior_epred(example_model, re.form = NA)
#' apply(probs2, 2, median)
#'
posterior_linpred.stanreg <-
Expand Down Expand Up @@ -102,6 +112,11 @@ posterior_linpred.stanreg <-
} else {
colnames(eta) <- rownames(newdata)
}

if (isTRUE(transform)) {
message("transform = TRUE is somewhat deprecated. ",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't like use of "somewhat"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah "somewhat" is a bit strange. I think the idea was to distinguish between a message and warning, but I'm not sure. @bgoodri Are you ok if we change this message to a warning and get rid of "somewhat"?

"Please call posterior_epred(), which provides equivalent functionality.")
}

if (!transform || is.nlmer(object)) {
return(eta)
Expand All @@ -115,7 +130,19 @@ posterior_linpred.stanreg <-
return(g(eta))
}


#' @rdname posterior_linpred.stanreg
#' @export
posterior_epred.stanreg <-
function(object,
newdata = NULL,
draws = NULL,
re.form = NULL,
offset = NULL,
XZ = FALSE,
...) {
return(suppressMessages(posterior_linpred(object, transform = TRUE, newdata,
draws, re.form, offset, XZ, ...)))
}

# internal ----------------------------------------------------------------
clogit_linpred_transform <- function(object, newdata = NULL, eta = NULL) {
Expand Down
2 changes: 1 addition & 1 deletion R/pp_check.R
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ is_binomial_ppc <- function(object, ...) {
...) {
y <- get_y(object, ...)
if (binned_resid_plot) {
yrep <- posterior_linpred(object, transform = TRUE, ...)
yrep <- posterior_epred(object, ...)
yrep <- yrep[1:nreps, , drop = FALSE]
} else {
yrep <- posterior_predict(object, draws = nreps, seed = seed, ...)
Expand Down
9 changes: 1 addition & 8 deletions R/predict.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,7 @@ predict.stanreg <- function(object,
object$linear.predictors else object$fitted.values
return(preds)
}
if (!used.optimizing(object) && type == "response")
stop(
"type='response' should not be used for models estimated by MCMC",
"\nor variational approximation. Instead, use the 'posterior_predict' ",
"function to draw \nfrom the posterior predictive distribution.",
call. = FALSE
)


if (isTRUE(object$stan_function == "stan_betareg") &&
!is.null(newdata)) {
# avoid false positive warnings about missing z variables in newdata
Expand Down
78 changes: 50 additions & 28 deletions R/priors.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#'
#' The default priors used in the various \pkg{rstanarm} modeling functions
#' are intended to be \emph{weakly informative} in that they provide moderate
#' regularlization and help stabilize computation. For many applications the
#' regularization and help stabilize computation. For many applications the
#' defaults will perform well, but prudent use of more informative priors is
#' encouraged. Uniform prior distributions are possible (e.g. by setting
#' \code{\link{stan_glm}}'s \code{prior} argument to \code{NULL}) but, unless
Expand All @@ -36,10 +36,11 @@
#' More information on priors is available in the vignette
#' \href{http://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior
#' Distributions for rstanarm Models}} as well as the vignettes for the
#' various modeling functions. In particular, for details on the
#' priors used for multilevel models see the vignette
#' various modeling functions. For details on the
#' priors used for multilevel models in particular see the vignette
#' \href{http://mc-stan.org/rstanarm/articles/glmer.html}{\emph{Estimating
#' Generalized (Non-)Linear Models with Group-Specific Terms with rstanarm}}.
#' Generalized (Non-)Linear Models with Group-Specific Terms with rstanarm}}
#' and also the \strong{Covariance matrices} section lower down on this page.
#'
#'
#' @param location Prior location. In most cases, this is the prior mean, but
Expand Down Expand Up @@ -72,18 +73,18 @@
#' or equal to two, the mode of this Beta distribution does not exist
#' and an error will prompt the user to specify another choice for
#' \code{what}.
#' @param autoscale A logical scalar, defaulting to \code{TRUE}. If \code{TRUE}
#' then the scales of the priors on the intercept and regression coefficients
#' may be additionally modified internally by \pkg{rstanarm} in the following
#' cases. First, for Gaussian models only, the prior scales for the intercept,
#' coefficients, and the auxiliary parameter \code{sigma} (error standard
#' deviation) are multiplied by \code{sd(y)}. Additionally --- not only for
#' Gaussian models --- if the \code{QR} argument to the model fitting function
#' (e.g. \code{stan_glm}) is \code{FALSE} then: for a predictor with only one
#' value nothing is changed; for a predictor \code{x} with exactly two unique
#' values, we take the user-specified (or default) scale(s) for the selected
#' priors and divide by the range of \code{x}; for a predictor \code{x} with
#' more than two unique values, we divide the prior scale(s) by \code{sd(x)}.
#' @param autoscale If \code{TRUE} then the scales of the priors on the
#' intercept and regression coefficients may be additionally modified
#' internally by \pkg{rstanarm} in the following cases. First, for Gaussian
#' models only, the prior scales for the intercept, coefficients, and the
#' auxiliary parameter \code{sigma} (error standard deviation) are multiplied
#' by \code{sd(y)}. Additionally --- not only for Gaussian models --- if the
#' \code{QR} argument to the model fitting function (e.g. \code{stan_glm}) is
#' \code{FALSE} then we also divide the prior scale(s) by \code{sd(x)}.
#' Prior autoscaling is also discussed in the vignette
#' \href{http://mc-stan.org/rstanarm/articles/priors.html}{\emph{Prior
#' Distributions for rstanarm Models}}
#'
#'
#' @details The details depend on the family of the prior being used:
#' \subsection{Student t family}{
Expand All @@ -104,12 +105,11 @@
#' approaches the normal distribution and if the degrees of freedom are one,
#' then the Student t distribution is the Cauchy distribution.
#'
#' If \code{scale} is not specified it will default to \eqn{10} for the
#' intercept and \eqn{2.5} for the other coefficients, unless the probit link
#' function is used, in which case these defaults are scaled by a factor of
#' \code{dnorm(0)/dlogis(0)}, which is roughly \eqn{1.6}.
#' If \code{scale} is not specified it will default to \eqn{2.5}, unless the
#' probit link function is used, in which case these defaults are scaled by a
#' factor of \code{dnorm(0)/dlogis(0)}, which is roughly \eqn{1.6}.
#'
#' If the \code{autoscale} argument is \code{TRUE} (the default), then the
#' If the \code{autoscale} argument is \code{TRUE}, then the
#' scales will be further adjusted as described above in the documentation of
#' the \code{autoscale} argument in the \strong{Arguments} section.
#' }
Expand Down Expand Up @@ -174,7 +174,7 @@
#'
#' It is also common in supervised learning to standardize the predictors
#' before training the model. We do not recommend doing so. Instead, it is
#' better to specify \code{autoscale = TRUE} (the default value), which
#' better to specify \code{autoscale = TRUE}, which
#' will adjust the scales of the priors according to the dispersion in the
#' variables. See the documentation of the \code{autoscale} argument above
#' and also the \code{\link{prior_summary}} page for more information.
Expand Down Expand Up @@ -436,22 +436,22 @@ NULL

#' @rdname priors
#' @export
normal <- function(location = 0, scale = NULL, autoscale = TRUE) {
normal <- function(location = 0, scale = NULL, autoscale = FALSE) {
validate_parameter_value(scale)
nlist(dist = "normal", df = NA, location, scale, autoscale)
}

#' @rdname priors
#' @export
student_t <- function(df = 1, location = 0, scale = NULL, autoscale = TRUE) {
student_t <- function(df = 1, location = 0, scale = NULL, autoscale = FALSE) {
validate_parameter_value(scale)
validate_parameter_value(df)
nlist(dist = "t", df, location, scale, autoscale)
}

#' @rdname priors
#' @export
cauchy <- function(location = 0, scale = NULL, autoscale = TRUE) {
cauchy <- function(location = 0, scale = NULL, autoscale = FALSE) {
student_t(df = 1, location = location, scale = scale, autoscale)
}

Expand Down Expand Up @@ -485,13 +485,13 @@ hs_plus <- function(df1 = 1, df2 = 1, global_df = 1, global_scale = 0.01,

#' @rdname priors
#' @export
laplace <- function(location = 0, scale = NULL, autoscale = TRUE) {
laplace <- function(location = 0, scale = NULL, autoscale = FALSE) {
nlist(dist = "laplace", df = NA, location, scale, autoscale)
}

#' @rdname priors
#' @export
lasso <- function(df = 1, location = 0, scale = NULL, autoscale = TRUE) {
lasso <- function(df = 1, location = 0, scale = NULL, autoscale = FALSE) {
nlist(dist = "lasso", df, location, scale, autoscale)
}

Expand All @@ -510,7 +510,7 @@ product_normal <- function(df = 2, location = 0, scale = 1) {
#' \code{1}. For the exponential distribution, the rate parameter is the
#' \emph{reciprocal} of the mean.
#'
exponential <- function(rate = 1, autoscale = TRUE) {
exponential <- function(rate = 1, autoscale = FALSE) {
stopifnot(length(rate) == 1)
validate_parameter_value(rate)
nlist(dist = "exponential",
Expand Down Expand Up @@ -562,7 +562,29 @@ R2 <- function(location = NULL, what = c("mode", "mean", "median", "log")) {
list(dist = "R2", location = location, what = what, df = 0, scale = 0)
}

#' @rdname priors
#' @export
#' @param family Not currently used.
default_prior_intercept = function(family) {
# family arg not used, but we can use in the future to do different things
# based on family if necessary
out <- normal(0, 2.5, autoscale = TRUE)
out$location <- NULL # not determined yet
out$default <- TRUE
out$version <- utils::packageVersion("rstanarm")
out
}

#' @rdname priors
#' @export
default_prior_coef = function(family) {
# family arg not used, but we can use in the future to do different things
# based on family if necessary
out <- normal(0, 2.5, autoscale = TRUE)
out$default <- TRUE
out$version <- utils::packageVersion("rstanarm")
out
}


# internal ----------------------------------------------------------------
Expand Down
Loading