Permalink
Browse files

stanpredict functional, but still changing rapidly

  • Loading branch information...
rmcelreath committed Apr 26, 2013
1 parent 4985a5c commit 784798f7c77885ae22917da925e68f34a3dd5f71
Showing with 136 additions and 16 deletions.
  1. +1 −0 R/glmer2stan.R
  2. +55 −16 R/predict.R
  3. +8 −0 R/zzz.R
  4. +13 −0 data/UCBadmit.csv
  5. +59 −0 man/stanpredict.Rd
View
@@ -1,4 +1,5 @@
# to do:
# (*) families: use built-in families and links (the closures) instead of strings
# (*) turbo option, to apply matt trick and use matrix operations for glm (design matrix)...reduces readability of model, but potentially large speed benefits
# (*) eventually make default chains=4, but need to monitor Rhat
# (*) make "conjugate" priors for traditional inv_wishart/inv_gamma...these priors suck, but people like them
View
@@ -1,10 +1,13 @@
########################################################################
# functions to compute posterior predictions from glmer2stan fit models
# by default, computes predictions for observed cases
# when new data is provided, computes for new cases
# to do
# (*) coerce cluster variables to integer
# (*) replace missing variables with means from data used in fitting
# (*) when data missing entirely, use data used in fitting
# (*) structure result as data frame?
stanpredict <- function( stanfit , data=NA , vary_prefix="vary_" , fixed_prefix="beta_" , probs=c(0.025,0.975) , nsims=1e4 , ... ) {
stanpredict <- function( stanfit , data , vary_prefix="vary_" , fixed_prefix="beta_" , probs=c(0.025,0.975) , nsims=1e4 ) {
undot <- function( astring ) {
astring <- gsub( "." , "_" , astring , fixed=TRUE )
@@ -20,18 +23,24 @@ stanpredict <- function( stanfit , data=NA , vary_prefix="vary_" , fixed_prefix=
quantile(samples, probs = c(a, 1 - a))
}
logistic <- function (x)
{
p <- 1/(1 + exp(-x))
p <- ifelse(x == Inf, 1, p)
p
}
# check params
if ( missing(data) ) {
# extract cases from fit
stop("no data")
}
if ( class(stanfit) != "stanfit" ) {
stop("requires stanfit object")
}
#if ( is.na( attr(stanfit,"formulas") ) ) {
# stop("stanfit object not fit by glmer2stan (cannot parse formulas)")
#}
if ( is.null( attr(stanfit,"formulas") ) ) {
stop("stanfit object not fit by glmer2stan (cannot parse formulas)")
}
# compute vary and fixef components of linear model
# need:
@@ -48,6 +57,27 @@ stanpredict <- function( stanfit , data=NA , vary_prefix="vary_" , fixed_prefix=
family <- attr( stanfit , "formulas" )$family
formula <- attr( stanfit , "formulas" )$formula
# check for missing outcome values in data
# don't need outcomes, but design.matrix wants them for its input
# so can just add placeholders
for ( f in 1:num_formulas ) {
data[[ fp[[f]]$yname ]] <- 0
# for binomial, also need failure count
# so stop and warn user
if ( family[[f]]=="binomial" ) {
if ( length( formula[[f]][[2]] )==3 ) {
# cbind construction
bintotname <- deparse( formula[[f]][[2]][[3]] )
# check for presence
if ( is.null( data[[ bintotname ]] ) )
stop( paste( "Failure count '" , bintotname , "' required as element of data list." , sep="" ) )
} else {
# bernoulli construction
# do nothing for now
}
}
}
# build data (design) matrix by re-parsing formulas with new data frame
# this is mainly needed so user doesn't have to manually construct interactions
@@ -163,24 +193,27 @@ stanpredict <- function( stanfit , data=NA , vary_prefix="vary_" , fixed_prefix=
if ( family[[f]] == "gaussian" ) {
# gaussian noise around mean
parname <- paste( "sigma" , var_suffix[f] , sep="" )
outsim <- sapply( 1:nrow(glm2) , function(i) PCI(
rnorm( nsims , glm2[i,] , post[[parname]] ) ) )
outsim <- sapply( 1:nrow(glm2) , function(i) quantile(
rnorm( nsims , glm2[i,] , post[[parname]] ) , probs=probs ) )
}
if ( family[[f]] == "binomial" ) {
# apply inverse link
glm2 <- logistic( glm2 )
# simulate
bintotname <- paste( "bin_total" , var_suffix[f] , sep="" )
outsim <- sapply( 1:nrow(glm2) , function(i) PCI(
rbinom( nsims , prob=logistic(glm2[i,]) , size=data_list[[bintotname]][i] )/data_list[[bintotname]][i] ) )
outsim <- sapply( 1:nrow(glm2) , function(i) quantile(
rbinom( nsims , prob=glm2[i,] , size=data_list[[bintotname]][i] )/data_list[[bintotname]][i] , probs=probs ) )
}
# store results
# compute summary stats
mu <- apply( glm2 , 1 , mean )
ci <- apply( glm2 , 1 , PCI )
ci <- apply( glm2 , 1 , quantile , probs=probs )
obs <- outsim
# result
# name by outcome variable
result[[ fp[[f]]$yname ]] <- list( vary=vary , glm=glm , glm2=glm2 , mu=mu , obs=obs , upper=ci[2,] , lower=ci[1,] , ci=ci )
result[[ fp[[f]]$yname ]] <- list( mu=mu , mu.ci=ci , obs.ci=obs )
} #f
@@ -198,6 +231,12 @@ stanpredict <- function( stanfit , data=NA , vary_prefix="vary_" , fixed_prefix=
# z <- stanpredict( m , data=UCBadmit )[[1]]
# UCBadmit$p.admit <- UCBadmit$admit / UCBadmit$applications
# plot( UCBadmit$p.admit , ylim=c(0,1) , pch=ifelse(UCBadmit$male==1,16,1) )
# lines( 1:nrow(UCBadmit) , logistic(z$mu) )
# shade( logistic(z$ci) , 1:nrow(UCBadmit) )
# shade( z$obs , 1:nrow(UCBadmit) )
# lines( 1:nrow(UCBadmit) , z$mu )
# shade( z$mu.ci , 1:nrow(UCBadmit) )
# shade( z$obs.ci , 1:nrow(UCBadmit) )
# z <- stanpredict( m , data=list( reject=100 , dept=as.integer(3) , male=c(0,1) ) )[[1]]
# plot( 0:1 , type="n" , xlim=c(0,1) , ylim=c(0,1) , xlab="female/male" , ylab="prob admit" )
# lines( 0:1 , z$mu )
# shade( z$mu.ci , 0:1 )
# shade( z$obs.ci , 0:1 )
View
@@ -0,0 +1,8 @@
.onLoad <- function(libname, pkgname) { }
.onAttach <- function(...) {
rstanLib <- dirname(system.file(package = "glmer2stan"))
pkgdesc <- packageDescription("glmer2stan", lib.loc = rstanLib)
builddate <- gsub(';.*$', '', pkgdesc$Packaged)
packageStartupMessage(paste("glmer2stan (Version ", pkgdesc$Version, ", packaged: ", builddate, ")", sep = ""))
}
View
@@ -0,0 +1,13 @@
"dept";"male";"admit";"reject";"applications"
1;1;512;313;825
1;0;89;19;108
2;1;353;207;560
2;0;17;8;25
3;1;120;205;325
3;0;202;391;593
4;1;138;279;417
4;0;131;244;375
5;1;53;138;191
5;0;94;299;393
6;1;22;351;373
6;0;24;317;341
View
@@ -0,0 +1,59 @@
\name{stanpredict}
\alias{stanpredict}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{Compute predictions from a model fit with glmer2stan}
\description{
This function computes predictions and prediction intervals from a model fit by \code{glmer2stan}. Predictions use the entire posterior for inference. Uncertainty due to both posterior probability and sampling distribution are included.
}
\usage{
stanpredict( stanfit , data , vary_prefix="vary_" , fixed_prefix="beta_" ,
probs=c(0.025,0.975) , nsims=1e4 )
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{fit}{\code{stanfit} object, produced with \code{\link{glmer2stan}}}
\item{data}{data frame of cases to use for prediction}
\item{fixed_prefix}{Identifying prefix for fixed effects, in the samples}
\item{vary_prefix}{Identifying prefix for varying effects, in the samples}
\item{probs}{Prediction quantiles to report (default is 95 percent interval)}
\item{nsims}{Number of simulations, for generating sampling distributions}
}
\details{
This function uses the posterior samples from a \code{stanfit} object, fit with \code{glmer2stan}, to simulate model predictions.
The returned value is a list with a named entry for each outcome variable in the model. Under each outcome variable is a list with: (1) \code{mu}, expectation of mean; (2) \code{mu.ci}, quantiles for the mean; and (3) \code{obs.ci}, quantiles for the predicted sampling distribution.
When passing new data to \code{stanpredict}, you can use a list with different length vectors. Short vectors will be grown to match the length of the longest vector, using repetition. For cluster variables, using a value of zero will be interpreted as omitting varying effects from predictions, generating predictions for average cases in the population. Otherwise, cluster variable values should be integers.
}
\references{}
\author{Richard McElreath}
\seealso{\code{\link{glmer2stan}}}
\examples{
# simple linear regression
data(cars)
m <- glmer2stan( dist ~ speed , data=cars )
speed.seq <- seq( from=min(cars$speed) , to=max(cars$speed) , length.out=20 )
pred.dist <- stanpredict( m , data=list( speed=speed.seq ) )$dist
plot( dist ~ speed , cars )
lines( speed.seq , pred.dist$mu )
lines( speed.seq , pred.dist$mu.ci[1,] , lty=2 )
lines( speed.seq , pred.dist$mu.ci[2,] , lty=2 )
# binomial example
data(UCBadmit)
m <- glmer2stan( cbind(admit,reject) ~ (1|dept) + male ,
data=UCBadmit , family="binomial" )
pred.admit <- stanpredict( m , data=UCBadmit )$admit
prop.admit <- UCBadmit$admit / UCBadmit$applications
plot( prop.admit , ylim=c(0,1) , pch=ifelse(UCBadmit$male==1,2,1) ,
ylab="probability admit" , xlab="case" )
lines( 1:12 , pred.admit$mu )
lines( 1:12 , pred.admit$mu.ci[1,] , lty=2 )
lines( 1:12 , pred.admit$mu.ci[2,] , lty=2 )
lines( 1:12 , pred.admit$obs.ci[1,] , lty=3 )
lines( 1:12 , pred.admit$obs.ci[2,] , lty=3 )
}
% Add one or more standard keywords, see file 'KEYWORDS' in the
% R documentation directory.
\keyword{ }

0 comments on commit 784798f

Please sign in to comment.