Skip to content

Commit

Permalink
reorganize documentation and internal functions
Browse files Browse the repository at this point in the history
  • Loading branch information
jgabry committed Oct 19, 2016
1 parent 9ad61cc commit db4aef9
Show file tree
Hide file tree
Showing 23 changed files with 371 additions and 302 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
@@ -1,7 +1,7 @@
Package: loo
Type: Package
Title: Efficient Leave-One-Out Cross-Validation and WAIC for Bayesian Models
Version: 0.1.6.9999
Version: 0.1.7
Date: 2016-06-13
Authors@R: c(person("Aki", "Vehtari", email = "Aki.Vehtari@aalto.fi", role = c("aut")),
person("Andrew", "Gelman", email = "gelman@stat.columbia.edu", role = c("aut")),
Expand Down
8 changes: 3 additions & 5 deletions NEWS.md
@@ -1,12 +1,10 @@
# loo 0.1.7
* `pareto_k_ids` convenience function for quickly identifying problematic
observations
* warning messages about pareto k values are now issued by `psislw` instead of
`print.loo`
* `pareto_k_table` and `pareto_k_ids` convenience functions for quickly identifying problematic observations
* pareto k values now grouped into `(-Inf, 0.5]`, `(0.5, 0.7]`, `(0.7, 1]`,
`(1, Inf)` (didn't used to include 0.7)
* warning messages are now issued by `psislw` instead of `print.loo`
* `print.loo` shows a table of pareto k estimates (if any k > 0.7)
* Add argument to `compare` to allows loo objects to be provided in a list
* Add argument to `compare` to allows loo objects to be provided in a list rather than in '...'

# loo 0.1.6
* GitHub repository moved from @jgabry to @stan-dev
Expand Down
23 changes: 15 additions & 8 deletions R/compare.R
Expand Up @@ -8,12 +8,14 @@
#' @param x A list of at least two objects returned by \code{\link{loo}} or
#' \code{\link{waic}}. This argument can be used as an alternative to
#' specifying the models in \code{...}.
#' @return A vector or matrix with class \code{'compare.loo'}. If exactly two
#' objects are provided in \code{...} or \code{x}, then the difference in
#' expected predictive accuracy and the standard error of the difference are
#' returned (see Details). \emph{The difference will be positive if the
#' expected predictive accuracy for the second model is higher.} If more than
#' two objects are provided then a matrix of summary information is returned.
#'
#' @return A vector or matrix with class \code{'compare.loo'} that has its own
#' print method. If exactly two objects are provided in \code{...} or
#' \code{x}, then the difference in expected predictive accuracy and the
#' standard error of the difference are returned (see Details). \emph{The
#' difference will be positive if the expected predictive accuracy for the
#' second model is higher.} If more than two objects are provided then a
#' matrix of summary information is returned.
#'
#' @details When comparing two fitted models, we can estimate the difference in
#' their expected predictive accuracy by the difference in \code{elpd_waic} or
Expand All @@ -35,9 +37,8 @@
#' currently working on something similar to these weights that also accounts
#' for uncertainty, which will be included in future versions of \pkg{loo}.
#'
#' @template loo-paper-reference
#' @template loo-and-psis-references
#'
#' @seealso \code{\link{print.compare.loo}}
#' @examples
#' \dontrun{
#' loo1 <- loo(log_lik1)
Expand Down Expand Up @@ -100,3 +101,9 @@ compare <- function(..., x) {
comp
}
}

#' @export
print.compare.loo <- function(x, ..., digits = 1) {
print(.fr(x, digits), quote = FALSE)
invisible(x)
}
6 changes: 4 additions & 2 deletions R/gpdfit.R
Expand Up @@ -13,8 +13,8 @@
#'
#' @template internal-function-note
#'
#' @seealso \code{\link{psislw}}, \code{\link{loo}},
#' \code{\link{loo-package}}
#' @seealso \code{\link{psislw}}, \code{\link{pareto-k-diagnostic}},
#' \code{\link{loo-package}}
#'
#' @references
#' Zhang, J., and Stephens, M. A. (2009). A new and efficient estimation method
Expand All @@ -38,6 +38,8 @@ gpdfit <- function(x) {
nlist(k, sigma)
}


# internal ----------------------------------------------------------------
lx <- function(a,x) {
a <- -a
k <- sapply(a, FUN = function(y) mean(log1p(y * x)))
Expand Down
77 changes: 1 addition & 76 deletions R/helpers.R
Expand Up @@ -72,43 +72,11 @@ pointwise_loo <- function(psis, log_lik, llfun = NULL, llargs = NULL) {
out
}

# psis helpers ------------------------------------------------------------

# inverse-CDF of generalized Pareto distribution (formula from Wikipedia)
qgpd <- function(p, xi = 1, mu = 0, sigma = 1, lower.tail = TRUE) {
if (is.nan(sigma) || sigma <= 0)
return(rep(NaN, length(p)))
if (!lower.tail)
p <- 1 - p

mu + sigma * ((1 - p)^(-xi) - 1) / xi
}

lw_cutpoint <- function(y, wcp, min_cut) {
if (min_cut < log(.Machine$double.xmin))
min_cut <- -700
cp <- quantile(y, 1 - wcp, names = FALSE)
max(cp, min_cut)
}

lw_truncate <- function(y, wtrunc) {
if (wtrunc == 0)
return(y)
logS <- log(length(y))
lwtrunc <- wtrunc * logS - logS + logSumExp(y)
y[y > lwtrunc] <- lwtrunc
y
}

#' @importFrom matrixStats logSumExp
lw_normalize <- function(y) {
y - logSumExp(y)
}

# print and warning helpers -----------------------------------------------
.fr <- function(x, digits) format(round(x, digits), nsmall = digits)
.warn <- function(..., call. = FALSE) warning(..., call. = call.)
.k_help <- function() c("See PSIS-LOO description (?'loo-package') for more information")
.k_help <- function() "See help('pareto-k-diagnostic') for details."
.k_cut <- function(k) {
cut(
k,
Expand Down Expand Up @@ -153,49 +121,6 @@ pwaic_warnings <- function(p, digits = 1) {
}


# plot pareto k estimates -------------------------------------------------
#' @importFrom graphics abline axis plot points text
plot_k <- function(k, ..., label_points = FALSE) {
inrange <- function(a, rr) a >= rr[1L] & a <= rr[2L]
plot(k, xlab = "Data point", ylab = "Shape parameter k",
type = "n", bty = "l", yaxt = "n")
axis(side = 2, las = 1)
krange <- range(k)
breaks <- c(0, 0.5, 0.7, 1)
hex_clrs <- c("#C79999", "#A25050", "#7C0000")
ltys <- c(3, 4, 2, 1)
for (j in seq_along(breaks)) {
val <- breaks[j]
if (inrange(val, krange))
abline(h = val, col = ifelse(val == 0, "darkgray", hex_clrs[j-1]),
lty = ltys[j], lwd = 1)
}

breaks <- c(-Inf, 0.5, 1)
hex_clrs <- c("#6497b1", "#005b96", "#03396c")
clrs <- ifelse(inrange(k, breaks[1:2]), hex_clrs[1],
ifelse(inrange(k, breaks[2:3]), hex_clrs[2], hex_clrs[3]))
if (all(k < 0.5) || !label_points) {
points(k, col = clrs, pch = 3, cex = .6)
return(invisible())
} else {
points(k[k < 0.5], col = clrs[k < 0.5], pch = 3, cex = .6)
sel <- !inrange(k, breaks[1:2])
dots <- list(...)
txt_args <- c(list(x = seq_along(k)[sel], y = k[sel],
labels = seq_along(k)[sel]),
if (length(dots)) dots)
if (!("adj" %in% names(txt_args)))
txt_args$adj <- 2/3
if (!("cex" %in% names(txt_args)))
txt_args$cex <- 0.75
if (!("col" %in% names(txt_args)))
txt_args$col <- clrs[sel]

do.call("text", txt_args)
}
}


# convenience functions ---------------------------------------------------
is.loo <- function(x) {
Expand Down
10 changes: 7 additions & 3 deletions R/loo.R
Expand Up @@ -64,11 +64,15 @@
#' \code{\link{loo-package}} for details on the Pareto Smoothed Importance
#' Sampling (PSIS) procedure used for approximating LOO.
#'
#' \code{\link{print.loo}} for the \code{print} and \code{plot} methods for
#' \code{'loo'} objects.
#'
#' \code{\link{compare}} for model comparison.
#'
#' \code{\link{pareto-k-diagnostic}} for convenience functions for looking at
#' diagnostics.
#'
#' \code{\link{print.loo}} for the \code{print} method for \code{'loo'} objects.
#'
#' @template loo-and-psis-references
#'
#' @examples
#' \dontrun{
#' ### Usage with stanfit objects
Expand Down
25 changes: 9 additions & 16 deletions R/loo_package.R
Expand Up @@ -5,13 +5,9 @@
#'
#' @importFrom stats sd var quantile
#'
#' @description This package implements the methods described in
#'
#' Vehtari, A., Gelman, A., and Gabry, J. (2016). Practical Bayesian model
#' evaluation using leave-one-out cross-validation and WAIC. arXiv preprint:
#' \url{http://arxiv.org/abs/1507.04544}.
#'
#' The package documentation is largely based on excerpts from the paper.
#'@description This package implements the methods described in Vehtari, Gelman,
#' and Gabry (2016a). The package documentation is largely based on excerpts
#' from the paper.
#'
#' @section Summary: Leave-one-out cross-validation (LOO) and the widely
#' applicable information criterion (WAIC) are methods for estimating
Expand All @@ -21,12 +17,12 @@
#' estimates of predictive error such as AIC and DIC but are less used in
#' practice because they involve additional computational steps. This package
#' implements the fast and stable computations for LOO and WAIC laid out in
#' Vehtari, Gelman, and Gabry (2015). From existing posterior simulation
#' Vehtari, Gelman, and Gabry (2016a). From existing posterior simulation
#' draws, we compute LOO using Pareto Smoothed Importance Sampling (PSIS;
#' Vehtari and Gelman, 2015), a new procedure for regularizing importance
#' weights. As a byproduct of our calculations, we also obtain approximate
#' standard errors for estimated predictive errors and for comparing of
#' predictive errors between two models.
#' Vehtari, Gelman, and Gabry, 2016b), a new procedure for regularizing
#' importance weights. As a byproduct of our calculations, we also obtain
#' approximate standard errors for estimated predictive errors and for
#' comparing of predictive errors between two models.
#'
#' @section Details: After fitting a Bayesian model we often want to measure its
#' predictive accuracy, for its own sake or for purposes of model comparison,
Expand Down Expand Up @@ -144,7 +140,7 @@
#' influential observations. A robust model may reduce the sensitivity to highly
#' influential observations.
#'
#' @template loo-paper-reference
#' @template loo-and-psis-references
#' @references
#' Epifani, I., MacEachern, S. N., and Peruggia, M. (2008). Case-deletion
#' importance sampling estimators: Central limit theorems and related results.
Expand Down Expand Up @@ -179,9 +175,6 @@
#' Stan Development Team (2016). RStan: the R interface to Stan, Version 2.10.1
#' \url{http://mc-stan.org/interfaces/rstan.html}.
#'
#' Vehtari, A., and Gelman, A. (2015). Pareto smoothed importance sampling.
#' arXiv:1507.02646.
#'
#' Watanabe, S. (2010). Asymptotic equivalence of Bayes cross validation and
#' widely application information criterion in singular learning theory.
#' \emph{Journal of Machine Learning Research} \strong{11}, 3571-3594.
Expand Down

0 comments on commit db4aef9

Please sign in to comment.