Add pp_check functionality for stanjm objects

stan-dev · Mar 23, 2017 · b9fe9e0 · b9fe9e0
1 parent 49ab027
commit b9fe9e0
Show file tree

Hide file tree

Showing 14 changed files with 165 additions and 67 deletions.
diff --git a/R/datasets.R b/R/datasets.R
@@ -20,7 +20,7 @@
 #' Small datasets for use in \pkg{rstanarm} examples and vignettes.
 #'
 #' @name rstanarm-datasets
-#' @aliases kidiq roaches wells bball1970 bball2006 mortality tumors radon
+#' @aliases kidiq roaches wells bball1970 bball2006 mortality tumors radon pbcLong pbcSurv
 #' @format 
 #' \describe{
 #' \item{\code{bball1970}}{
@@ -76,6 +76,34 @@
 #' \item \code{K} Number of surgeries
 #' }
 #' }
+#' \item{\code{pbcLong,pbcSurv}}{
+#' Longitudinal biomarker and time-to-event survival data for 40 patients 
+#' with primary biliary cirrhosis who participated in a randomised 
+#' placebo controlled trial of D-penicillamine conducted at the Mayo
+#' Clinic between 1974 and 1984.
+#' 
+#' Source: Therneau and Grambsch (2000)
+#' 
+#' 304 obs. of 8 variables (\code{pbcLong}) and 40 obs. of 7 variables (\code{pbcSurv})
+#' \itemize{
+#' \item \code{age} {in years}
+#' \item \code{albumin} {serum albumin (g/dl)}
+#' \item \code{logBili} {logarithm of serum bilirubin}
+#' \item \code{death} {indicator of death at endpoint} 
+#' \item \code{futimeYears} {time (in years) between baseline and  
+#'     the earliest of death, transplantion or censoring}
+#' \item \code{id} {numeric ID unique to each individual}
+#' \item \code{platelet} {platelet count}
+#' \item \code{sex} {gender (m = male, f = female)}
+#' \item \code{status} {status at endpoint (0 = censored, 
+#'     1 = transplant, 2 = dead)}
+#' \item \code{trt} {binary treatment code (0 = placebo, 1 = 
+#'     D-penicillamine)}
+#' \item \code{year} {time (in years) of the longitudinal measurements,
+#'     taken as time since baseline)}
+#' }
+#' }
+#' 
 #' \item{\code{radon}}{
 #' Data on radon levels in houses in the state of Minnesota. 
 #' 
@@ -159,6 +187,9 @@
 #' Tarone, R. E. (1982) The use of historical control information in testing for
 #' a trend in proportions. \emph{Biometrics} \strong{38}(1):215--220.
 #' 
+#' Therneau, T. and Grambsch, P. (2000) \emph{Modeling Survival Data: Extending 
+#' the Cox Model}. Springer-Verlag, New York, US.
+#' 
 #' @examples 
 #' # Using 'kidiq' dataset 
 #' fit <- stan_lm(kid_score ~ mom_hs * mom_iq, data = kidiq, 

diff --git a/R/example_jm.R b/R/example_jm.R
@@ -0,0 +1,45 @@
+# Part of the rstanarm package for estimating model parameters
+# Copyright (C) 2015, 2016 Trustees of Columbia University
+# Copyright (C) 2016 Sam Brilleman
+# 
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 3
+# of the License, or (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+#' Example joint longitudinal and time-to-event model
+#' 
+#' A model for use in the \pkg{rstanarm} examples related to \code{\link{stan_jm}}. 
+#' 
+#' @name example_jm
+#' @format Calling \code{example("example_jm")} will run the model in the 
+#'   Examples section, below, and the resulting stanjm object will then be
+#'   available in the global environment. The \code{chains} and \code{iter}
+#'   arguments are specified to make this example be small in size. In practice,
+#'   we recommend that they be left unspecified in order to use the default
+#'   values or increased if there are convergence problems. The \code{cores} 
+#'   argument is optional and on a multicore system, the user may well want 
+#'   to set that equal to the number of chains being executed.
+#'   
+#' @examples
+#'   set.seed(123)
+#'   example_jm <- 
+#'      stan_jm(formulaLong = logBili ~ year + (1 | id), 
+#'              dataLong = pbcLong,
+#'              formulaEvent = Surv(futimeYears, death) ~ sex + trt, 
+#'              dataEvent = pbcSurv,
+#'              time_var = "year",
+#'              # this next line is only to keep the example small in size!
+#'              chains = 1, cores = 1, seed = 12345, iter = 1000)
+#' 
+#' 
+NULL
diff --git a/R/posterior_linpred.R b/R/posterior_linpred.R
@@ -78,14 +78,15 @@ posterior_linpred.stanreg <-
     dat <- pp_data(object,
                    newdata = newdata,
                    re.form = re.form,
-                   offset = offset)
+                   offset = offset,
+                   ...)
     if (XZ) {
       XZ <- dat[["x"]]
       if (is.mer(object))
         XZ <- cbind(XZ, t(dat[["Zt"]]))
       return(XZ)
     }
-    eta <- pp_eta(object, data = dat, draws = NULL)[["eta"]]
+    eta <- pp_eta(object, data = dat, draws = NULL, ...)[["eta"]]
     if (!transform)
       return(eta)
 

diff --git a/R/posterior_predict.R b/R/posterior_predict.R
@@ -143,15 +143,19 @@ posterior_predict.stanreg <- function(object, newdata = NULL, draws = NULL,
     fun <- match.fun(fun)
 
   dots <- list(...)
-  m <- if (is.stanjm(object) && is.null(dots$m)) 1 else dots$m 
+  if (is.stanjm(object)) {
+    dots <- list(...)
+    m <- dots[["m"]]
+    if (is.null(m))
+      stop("Argument 'm' must be provided for stanjm objects.")
+  } else m <- NULL
 
   newdata <- validate_newdata(newdata)
   dat <-
     pp_data(object,
             newdata = newdata,
             re.form = re.form,
             offset = offset,
-            m = m,
             ...)
   if (is_scobit(object)) {
     data <- pp_eta(object, dat, NULL)

diff --git a/R/posterior_survfit.R b/R/posterior_survfit.R
@@ -92,7 +92,7 @@
 #'   percentiles will be provided.
 #' @param times A numeric vector of length 1 or a character string. Specifies the  
 #'   times at which to obtain the estimated survival probabilities. 
-#'   If \code{newdata} is not provided, then the 
+#'   If \code{newdata} is \code{NULL}, then the 
 #'   \code{times} argument is optional; if it is not provided then \code{times} 
 #'   will default to the last known event or censoring time for each individual,
 #'   whereas if it is provided then it must be a numeric vector of length 1, and 
@@ -161,13 +161,13 @@
 #' 
 #' @seealso \code{\link{plot.survfit.stanjm}} for plotting the estimated survival  
 #'   probabilities, \code{\link{ps_check}} for for graphical checks of the estimated 
-#'   survival function, and \code{\link{posterior_predict}} for estimating the
+#'   survival function, and \code{\link{posterior_traj}} for estimating the
 #'   marginal or subject-specific longitudinal trajectories.
 #'   
 #' @examples
 #' 
 #'   # Run example model if not already loaded
-#'   if (!exists("examplejm")) example(examplejm)
+#'   if (!exists("example_jm")) example(example_jm)
 #'   
 #'   # Obtain subject-specific survival probabilities for a few
 #'   # selected individuals in the estimation dataset who were  
@@ -176,8 +176,8 @@
 #'   # survival probabilities, that is, conditional on having survived
 #'   # until the event or censoring time, and then by default will
 #'   # extrapolate the survival predictions forward from there.  
-#'   head(pbcSurv_subset[pbcSurv_subset$status == 0,])
-#'   ps1 <- posterior_survfit(examplejm, ids = c(7,13,16))
+#'   head(pbcSurv[pbcSurv$status == 0,])
+#'   ps1 <- posterior_survfit(example_jm, ids = c(7,13,16))
 #'   head(ps1)
 #'   # We can plot the estimated survival probabilities using the
 #'   # associated plot function
@@ -193,8 +193,8 @@
 #'   # is to specify that we want the survival time estimated at time 0
 #'   # and then extrapolated forward 5 years. We also specify that we
 #'   # do not want to condition on their last known survival time.
-#'   ps2 <- posterior_survfit(examplejm, ids = c(7,13,16), times == 0,
-#'     extrapolate = TRUE, control = list(ext_distance = 5, condition = FALSE))
+#'   ps2 <- posterior_survfit(example_jm, ids = c(7,13,16), times == 0,
+#'     extrapolate = TRUE, control = list(edist = 5, condition = FALSE))
 #'   ps2
 #'   
 #'   # Instead of estimating survival probabilities for a specific individual 
@@ -215,8 +215,8 @@
 #'   nd <- data.frame(id = c("new1", "new2"),
 #'                    sex = c("f", "f"), 
 #'                    trt = c(1, 0))
-#'   ps3 <- posterior_survfit(examplejm, newdata = nd, times = 0,
-#'     extrapolate = TRUE, control = list(ext_distance = 5, condition = FALSE))
+#'   ps3 <- posterior_survfit(example_jm, newdata = nd, times = 0,
+#'     extrapolate = TRUE, control = list(edist = 5, condition = FALSE))
 #'   ps3
 #'   
 #'   # We can then plot the estimated survival functions to compare
@@ -229,7 +229,7 @@
 #'   # for individuals in our estimation sample) then we can specify
 #'   # 'standardise = TRUE'. We can then plot the resulting standardised
 #'   # survival curve.
-#'   ps4 <- posterior_survfit(examplejm, standardise = TRUE, 
+#'   ps4 <- posterior_survfit(example_jm, standardise = TRUE, 
 #'                            times = 0, extrapolate = TRUE)
 #'   plot(ps4)                         
 #' 
@@ -482,16 +482,16 @@ posterior_survfit <- function(object, newdata = NULL, extrapolate = TRUE,
 #'   It can also be passed to the function \code{\link{plot_stack}}.
 #'   
 #' @seealso \code{\link{posterior_survfit}}, \code{\link{plot_stack}},
-#'   \code{\link{posterior_predict}}, \code{\link{plot.predict.stanjm}}      
+#'   \code{\link{posterior_traj}}, \code{\link{plot.predict.stanjm}}      
 #'   
 #' @examples 
 #' 
 #'   # Run example model if not already loaded
-#'   if (!exists("examplejm")) example(examplejm)
+#'   if (!exists("example_jm")) example(example_jm)
 #'   
 #'   # Obtain subject-specific conditional survival probabilities
 #'   # for all individuals in the estimation dataset.
-#'   ps1 <- posterior_survfit(examplejm, extrapolate = TRUE)
+#'   ps1 <- posterior_survfit(example_jm, extrapolate = TRUE)
 #'   
 #'   # We then plot the conditional survival probabilities for
 #'   # a subset of individuals
@@ -516,17 +516,16 @@ posterior_survfit <- function(object, newdata = NULL, extrapolate = TRUE,
 #'   # subject-specific survival functions, with plot(s) 
 #'   # of the estimated longitudinal trajectories for the
 #'   # same individuals
-#'   ps1 <- posterior_survfit(examplejm, ids = c(7,13,16))
-#'   pt1 <- posterior_predict(examplejm, , ids = c(7,13,16),
-#'                            interpolate = TRUE, extrapolate = TRUE)
+#'   ps1 <- posterior_survfit(example_jm, ids = c(7,13,16))
+#'   pt1 <- posterior_traj(example_jm, , ids = c(7,13,16))
 #'   plot_surv <- plot(ps1) 
 #'   plot_traj <- plot(pt1, vline = TRUE, plot_observed = TRUE)
 #'   plot_stack(plot_traj, plot_surv)
 #'    
 #'   # Lastly, let us plot the standardised survival function
 #'   # based on all individuals in our estimation dataset
-#'   ps2 <- posterior_survfit(examplejm, standardise = TRUE, times = 0,
-#'                           control = list(ext_points = 20))
+#'   ps2 <- posterior_survfit(example_jm, standardise = TRUE, times = 0,
+#'                           control = list(epoints = 20))
 #'   plot(ps2)   
 #' 
 #'    

diff --git a/R/posterior_traj.R b/R/posterior_traj.R
@@ -118,30 +118,30 @@
 #' @examples
 #' \donttest{
 #'   # Run example model if not already loaded
-#'   if (!exists("examplejm")) example(examplejm)
+#'   if (!exists("example_jm")) example(example_jm)
 #'   
 #'   # Obtain subject-specific predictions for all individuals 
 #'   # in the estimation dataset
-#'   pt1 <- posterior_traj(examplejm, interpolate = FALSE, extrapolate = FALSE)
+#'   pt1 <- posterior_traj(example_jm, interpolate = FALSE, extrapolate = FALSE)
 #'   head(pt1)
 #'   
 #'   # Obtain subject-specific predictions only for a few selected individuals
-#'   pt2 <- posterior_traj(examplejm, ids = c(1,3,8))
+#'   pt2 <- posterior_traj(example_jm, ids = c(1,3,8))
 #'   
 #'   # If we wanted to obtain subject-specific predictions in order to plot the 
 #'   # longitudinal trajectories, then we might want to ensure a full trajectory 
 #'   # is obtained by interpolating and extrapolating time. We can then use the 
 #'   # generic plot function to plot the subject-specific predicted trajectories
 #'   # for the first three individuals. Interpolation and extrapolation is 
 #'   # carried out by default.
-#'   pt3 <- posterior_traj(examplejm)
+#'   pt3 <- posterior_traj(example_jm)
 #'   head(pt3) # predictions at additional time points compared with pt1 
 #'   plot(pt3, ids = 1:3)
 #'   
 #'   # If we wanted to extrapolate further in time, but decrease the number of 
 #'   # discrete time points at which we obtain predictions for each individual, 
 #'   # then we could specify a named list in the 'control' argument
-#'   pt4 <- posterior_traj(examplejm, control = list(ipoints = 10, epoints = 10, eprop = 0.5))
+#'   pt4 <- posterior_traj(example_jm, control = list(ipoints = 10, epoints = 10, eprop = 0.5))
 #'   
 #'   # Alternatively we may want to estimate the marginal longitudinal
 #'   # trajectory for a given set of covariates. To do this, we can pass
@@ -155,7 +155,7 @@
 #'   # Our marginal prediction will therefore capture the between-individual 
 #'   # variation associated with the random effects.)
 #'   nd <- data.frame(id = rep("new1", 11), year = (0:10 / 2))
-#'   pt5 <- posterior_traj(examplejm, newdata = nd)
+#'   pt5 <- posterior_traj(example_jm, newdata = nd)
 #'   head(pt5)  # note the greater width of the uncertainty interval compared 
 #'              # with the subject-specific predictions in pt1, pt2, etc
 #'   
@@ -176,7 +176,7 @@
 #'   # the fixed effect component of the model, we simply specify 're.form = NA'. 
 #'   # (We will use the same covariate values as used in the prediction for 
 #'   # example for pt5).
-#'   pt6 <- posterior_traj(examplejm, newdata = nd, re.form = NA)
+#'   pt6 <- posterior_traj(example_jm, newdata = nd, re.form = NA)
 #'   head(pt6)  # note the much narrower ci, compared with pt5
 #' }
 #' 
@@ -339,18 +339,18 @@ posterior_traj <- function(object, m = 1, newdata = NULL,
 #' @examples 
 #' 
 #'   # Run example model if not already loaded
-#'   if (!exists("examplejm")) example(examplejm)
+#'   if (!exists("example_jm")) example(example_jm)
 #'   
 #'   # For a subset of individuals in the estimation dataset we will
 #'   # obtain subject-specific predictions for the longitudinal submodel 
 #'   # at evenly spaced times between 0 and their event or censoring time.
-#'   pt1 <- posterior_traj(examplejm, ids = c(7,13,16), interpolate = TRUE)
+#'   pt1 <- posterior_traj(example_jm, ids = c(7,13,16), interpolate = TRUE)
 #'   plot(pt1)                  # credible interval for mean response
 #'   plot(pt1, limits = "pi")   # prediction interval for raw response
 #'   plot(pt1, limits = "none") # no uncertainty interval
 #'   
 #'   # We can also extrapolate the longitudinal trajectories.
-#'   pt2 <- posterior_traj(examplejm, ids = c(7,13,16), interpolate = TRUE,
+#'   pt2 <- posterior_traj(example_jm, ids = c(7,13,16), interpolate = TRUE,
 #'                            extrapolate = TRUE)
 #'   plot(pt2)
 #'   plot(pt2, vline = TRUE)    # add line indicating event or censoring time