reorganize documentation and internal functions

stan-dev · Oct 19, 2016 · db4aef9 · db4aef9
1 parent 9ad61cc
commit db4aef9
Show file tree

Hide file tree

Showing 23 changed files with 371 additions and 302 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: loo
 Type: Package
 Title: Efficient Leave-One-Out Cross-Validation and WAIC for Bayesian Models
-Version: 0.1.6.9999
+Version: 0.1.7
 Date: 2016-06-13
 Authors@R: c(person("Aki", "Vehtari", email = "Aki.Vehtari@aalto.fi", role = c("aut")),
              person("Andrew", "Gelman", email = "gelman@stat.columbia.edu", role = c("aut")),

diff --git a/NEWS.md b/NEWS.md
@@ -1,12 +1,10 @@
 # loo 0.1.7
-* `pareto_k_ids` convenience function for quickly identifying problematic
-observations
-* warning messages about pareto k values are now issued by `psislw` instead of
-`print.loo`
+* `pareto_k_table` and `pareto_k_ids` convenience functions for quickly identifying problematic observations
 * pareto k values now grouped into `(-Inf, 0.5]`, `(0.5, 0.7]`, `(0.7, 1]`, 
 `(1, Inf)` (didn't used to include 0.7)
+* warning messages are now issued by `psislw` instead of `print.loo`
 * `print.loo` shows a table of pareto k estimates (if any k > 0.7)
-* Add argument to `compare` to allows loo objects to be provided in a list
+* Add argument to `compare` to allows loo objects to be provided in a list rather than in '...'
 
 # loo 0.1.6
 * GitHub repository moved from @jgabry to @stan-dev

diff --git a/R/compare.R b/R/compare.R
@@ -8,12 +8,14 @@
 #' @param x A list of at least two objects returned by \code{\link{loo}} or
 #'   \code{\link{waic}}. This argument can be used as an alternative to
 #'   specifying the models in \code{...}.
-#' @return A vector or matrix with class \code{'compare.loo'}. If exactly two
-#'   objects are provided in \code{...} or \code{x}, then the difference in
-#'   expected predictive accuracy and the standard error of the difference are
-#'   returned (see Details). \emph{The difference will be positive if the
-#'   expected predictive accuracy for the second model is higher.} If more than
-#'   two objects are provided then a matrix of summary information is returned.
+#'
+#' @return A vector or matrix with class \code{'compare.loo'} that has its own
+#'   print method. If exactly two objects are provided in \code{...} or
+#'   \code{x}, then the difference in expected predictive accuracy and the
+#'   standard error of the difference are returned (see Details). \emph{The
+#'   difference will be positive if the expected predictive accuracy for the
+#'   second model is higher.} If more than two objects are provided then a
+#'   matrix of summary information is returned.
 #'
 #' @details When comparing two fitted models, we can estimate the difference in
 #'   their expected predictive accuracy by the difference in \code{elpd_waic} or
@@ -35,9 +37,8 @@
 #'   currently working on something similar to these weights that also accounts
 #'   for uncertainty, which will be included in future versions of \pkg{loo}.
 #'
-#' @template loo-paper-reference
+#' @template loo-and-psis-references
 #'
-#' @seealso \code{\link{print.compare.loo}}
 #' @examples
 #' \dontrun{
 #' loo1 <- loo(log_lik1)
@@ -100,3 +101,9 @@ compare <- function(..., x) {
     comp
   }
 }
+
+#' @export
+print.compare.loo <- function(x, ..., digits = 1) {
+  print(.fr(x, digits), quote = FALSE)
+  invisible(x)
+}
diff --git a/R/gpdfit.R b/R/gpdfit.R
@@ -13,8 +13,8 @@
 #'
 #' @template internal-function-note
 #'
-#' @seealso \code{\link{psislw}}, \code{\link{loo}},
-#' \code{\link{loo-package}}
+#' @seealso \code{\link{psislw}}, \code{\link{pareto-k-diagnostic}},
+#'   \code{\link{loo-package}}
 #'
 #' @references
 #' Zhang, J., and Stephens, M. A. (2009). A new and efficient estimation method
@@ -38,6 +38,8 @@ gpdfit <- function(x) {
   nlist(k, sigma)
 }
 
+
+# internal ----------------------------------------------------------------
 lx <- function(a,x) {
   a <- -a
   k <- sapply(a, FUN = function(y) mean(log1p(y * x)))

diff --git a/R/helpers.R b/R/helpers.R
@@ -72,43 +72,11 @@ pointwise_loo <- function(psis, log_lik, llfun = NULL, llargs = NULL) {
   out
 }
 
-# psis helpers ------------------------------------------------------------
-
-# inverse-CDF of generalized Pareto distribution (formula from Wikipedia)
-qgpd <- function(p, xi = 1, mu = 0, sigma = 1, lower.tail = TRUE) {
-  if (is.nan(sigma) || sigma <= 0)
-    return(rep(NaN, length(p)))
-  if (!lower.tail)
-    p <- 1 - p
-
-  mu + sigma * ((1 - p)^(-xi) - 1) / xi
-}
-
-lw_cutpoint <- function(y, wcp, min_cut) {
-  if (min_cut < log(.Machine$double.xmin))
-    min_cut <- -700
-  cp <- quantile(y, 1 - wcp, names = FALSE)
-  max(cp, min_cut)
-}
-
-lw_truncate <- function(y, wtrunc) {
-  if (wtrunc == 0)
-    return(y)
-  logS <- log(length(y))
-  lwtrunc <- wtrunc * logS - logS + logSumExp(y)
-  y[y > lwtrunc] <- lwtrunc
-  y
-}
-
-#' @importFrom matrixStats logSumExp
-lw_normalize <- function(y) {
-  y - logSumExp(y)
-}
 
 # print and warning helpers -----------------------------------------------
 .fr <- function(x, digits) format(round(x, digits), nsmall = digits)
 .warn <- function(..., call. = FALSE) warning(..., call. = call.)
-.k_help <- function() c("See PSIS-LOO description (?'loo-package') for more information")
+.k_help <- function() "See help('pareto-k-diagnostic') for details."
 .k_cut <- function(k) {
   cut(
     k,
@@ -153,49 +121,6 @@ pwaic_warnings <- function(p, digits = 1) {
 }
 
 
-# plot pareto k estimates -------------------------------------------------
-#' @importFrom graphics abline axis plot points text
-plot_k <- function(k, ..., label_points = FALSE) {
-  inrange <- function(a, rr) a >= rr[1L] & a <= rr[2L]
-  plot(k, xlab = "Data point", ylab = "Shape parameter k",
-       type = "n", bty = "l", yaxt = "n")
-  axis(side = 2, las = 1)
-  krange <- range(k)
-  breaks <- c(0, 0.5, 0.7, 1)
-  hex_clrs <- c("#C79999", "#A25050", "#7C0000")
-  ltys <- c(3, 4, 2, 1)
-  for (j in seq_along(breaks)) {
-    val <- breaks[j]
-    if (inrange(val, krange))
-      abline(h = val, col = ifelse(val == 0, "darkgray", hex_clrs[j-1]),
-             lty = ltys[j], lwd = 1)
-  }
-
-  breaks <- c(-Inf, 0.5, 1)
-  hex_clrs <- c("#6497b1", "#005b96", "#03396c")
-  clrs <- ifelse(inrange(k, breaks[1:2]), hex_clrs[1],
-                 ifelse(inrange(k, breaks[2:3]), hex_clrs[2], hex_clrs[3]))
-  if (all(k < 0.5) || !label_points) {
-    points(k, col = clrs, pch = 3, cex = .6)
-    return(invisible())
-  } else {
-    points(k[k < 0.5], col = clrs[k < 0.5], pch = 3, cex = .6)
-    sel <- !inrange(k, breaks[1:2])
-    dots <- list(...)
-    txt_args <- c(list(x = seq_along(k)[sel], y = k[sel],
-                       labels = seq_along(k)[sel]),
-                  if (length(dots)) dots)
-    if (!("adj" %in% names(txt_args)))
-      txt_args$adj <- 2/3
-    if (!("cex" %in% names(txt_args)))
-      txt_args$cex <- 0.75
-    if (!("col" %in% names(txt_args)))
-      txt_args$col <- clrs[sel]
-
-    do.call("text", txt_args)
-  }
-}
-
 
 # convenience functions ---------------------------------------------------
 is.loo <- function(x) {

diff --git a/R/loo.R b/R/loo.R
@@ -64,11 +64,15 @@
 #' \code{\link{loo-package}} for details on the Pareto Smoothed Importance
 #' Sampling (PSIS) procedure used for approximating LOO.
 #'
-#' \code{\link{print.loo}} for the \code{print} and \code{plot} methods for
-#' \code{'loo'} objects.
-#'
 #' \code{\link{compare}} for model comparison.
 #'
+#' \code{\link{pareto-k-diagnostic}} for convenience functions for looking at
+#' diagnostics.
+#'
+#' \code{\link{print.loo}} for the \code{print} method for \code{'loo'} objects.
+#'
+#' @template loo-and-psis-references
+#'
 #' @examples
 #' \dontrun{
 #' ### Usage with stanfit objects

diff --git a/R/loo_package.R b/R/loo_package.R
@@ -5,13 +5,9 @@
 #'
 #' @importFrom stats sd var quantile
 #'
-#' @description This package implements the methods described in
-#'
-#' Vehtari, A., Gelman, A., and Gabry, J. (2016). Practical Bayesian model
-#' evaluation using leave-one-out cross-validation and WAIC. arXiv preprint:
-#' \url{http://arxiv.org/abs/1507.04544}.
-#'
-#' The package documentation is largely based on excerpts from the paper.
+#'@description This package implements the methods described in Vehtari, Gelman,
+#'  and Gabry (2016a). The package documentation is largely based on excerpts
+#'  from the paper.
 #'
 #' @section Summary: Leave-one-out cross-validation (LOO) and the widely
 #'   applicable information criterion (WAIC) are methods for estimating
@@ -21,12 +17,12 @@
 #'   estimates of predictive error such as AIC and DIC but are less used in
 #'   practice because they involve additional computational steps. This package
 #'   implements the fast and stable computations for LOO and WAIC laid out in
-#'   Vehtari, Gelman, and Gabry (2015). From existing posterior simulation
+#'   Vehtari, Gelman, and Gabry (2016a). From existing posterior simulation
 #'   draws, we compute LOO using Pareto Smoothed Importance Sampling (PSIS;
-#'   Vehtari and Gelman, 2015), a new procedure for regularizing importance
-#'   weights. As a byproduct of our calculations, we also obtain approximate
-#'   standard errors for estimated predictive errors and for comparing of
-#'   predictive errors between two models.
+#'   Vehtari, Gelman, and Gabry, 2016b), a new procedure for regularizing
+#'   importance weights. As a byproduct of our calculations, we also obtain
+#'   approximate standard errors for estimated predictive errors and for
+#'   comparing of predictive errors between two models.
 #'
 #' @section Details: After fitting a Bayesian model we often want to measure its
 #'   predictive accuracy, for its own sake or for purposes of model comparison,
@@ -144,7 +140,7 @@
 #' influential observations. A robust model may reduce the sensitivity to highly
 #' influential observations.
 #'
-#' @template loo-paper-reference
+#' @template loo-and-psis-references
 #' @references
 #' Epifani, I., MacEachern, S. N., and Peruggia, M. (2008). Case-deletion
 #' importance sampling estimators: Central limit theorems and related results.
@@ -179,9 +175,6 @@
 #' Stan Development Team (2016). RStan: the R interface to Stan, Version 2.10.1
 #' \url{http://mc-stan.org/interfaces/rstan.html}.
 #'
-#' Vehtari, A., and Gelman, A. (2015).  Pareto smoothed importance sampling.
-#' arXiv:1507.02646.
-#'
 #' Watanabe, S. (2010). Asymptotic equivalence of Bayes cross validation and
 #' widely application information criterion in singular learning theory.
 #' \emph{Journal of Machine Learning Research} \strong{11}, 3571-3594.