Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
3207 lines (2792 sloc) 120 KB
## ----ch1, child = "src/ch1.Rnw"------------------------------------------
## ----init1, echo = FALSE, results = 'hide'-------------------------------
opts_chunk$set(fig.path = 'fig/ch01-', self.contained = FALSE)
suppressPackageStartupMessages(library(mice, warn.conflicts = FALSE, quietly = TRUE))
source("R/mi.hist.R")
## ----mean1---------------------------------------------------------------
y <- c(1, 2, 4)
mean(y)
## ----mean2---------------------------------------------------------------
y <- c(1, 2, NA)
mean(y)
## ----mean3---------------------------------------------------------------
mean(y, na.rm = TRUE)
## ----air2, eval = FALSE--------------------------------------------------
fit <- lm(Ozone ~ Wind, data = airquality)
# Error in na.fail.default: missing values in object}
## ----air3----------------------------------------------------------------
fit <- lm(Ozone ~ Wind, data = airquality, na.action = na.omit)
## ----air3b---------------------------------------------------------------
options(na.action = na.omit)
## ----air4, eval = FALSE--------------------------------------------------
airquality2 <- cbind(airquality, predict(fit))
# Error: arguments imply differing number of rows: 153, 116
## ----air5----------------------------------------------------------------
head(na.action(fit))
## ----air6----------------------------------------------------------------
naprint(na.action(fit))
## ----air7----------------------------------------------------------------
colSums(is.na(airquality))
## ----air8, include = FALSE-----------------------------------------------
airquality2 <- cbind(na.omit(airquality[, c("Ozone", "Wind")]),
predicted = predict(fit))
## ----plotair9, echo=FALSE, fig.width=4.5, fig.height=2.25, duo = TRUE----
lwd <- 0.6
plot(predicted ~ Ozone, data = airquality2, type = "n",
lwd = lwd, ylim = c(-20, 165), xlim = c(-20, 165),
ylab = "Ozone predicted (ppb)",
xlab = "Ozone measured (ppb)", axes = FALSE)
axis(1, lwd = lwd)
axis(2, lwd = lwd, las = 1)
abline(0, 1, lty = 3, lwd = 0.6)
points(predicted ~ Ozone, data = airquality2,
lwd = 1.5, col = mdc(1))
box(lwd = lwd)
fit2 <- lm(Ozone ~ Wind, data = airquality, na.action = na.exclude)
plot(predicted ~ Ozone, data = airquality2, type = "n",
ylim = c(-20, 165), xlim = c(-20, 165),
ylab = " Ozone predicted (ppb)",
xlab = "Ozone measured (ppb)",
axes = FALSE)
axis(1, lwd = lwd)
axis(2, lwd = lwd, las = 1)
abline(0, 1, lty = 3, lwd = 0.6)
pred2 <- cbind(1, airquality$Wind) %*% coef(fit)
abline(h = pred2[ici(predict(fit2))], col = mdc(2), lwd = 0.6, lty = 1)
box(lwd = lwd)
## ----airx6---------------------------------------------------------------
fit2 <- lm(Ozone ~ Wind + Solar.R, data = airquality)
naprint(na.action(fit2))
## ----pairwise1, eval=FALSE-----------------------------------------------
data <- airquality[, c("Ozone", "Solar.R", "Wind")]
mu <- colMeans(data, na.rm = TRUE)
cv <- cov(data, use = "pairwise")
## ----pairwise2, eval = FALSE---------------------------------------------
library(lavaan)
fit <- lavaan("Ozone ~ 1 + Wind + Solar.R
Ozone ~~ Ozone",
sample.mean = mu, sample.cov = cv,
sample.nobs = sum(complete.cases(data)))
## ----install, eval=FALSE-------------------------------------------------
install.packages("mice")
## ----load, eval = FALSE--------------------------------------------------
library("mice")
## ----meanimp, echo=TRUE--------------------------------------------------
imp <- mice(airquality, method = "mean", m = 1, maxit = 1)
## ----plotmeanimp, duo = TRUE, echo=FALSE, fig.width=4.5, fig.height=2.25----
lwd <- 0.6
data <- complete(imp)
Yobs <- airquality[,"Ozone"]
Yimp <- data[,"Ozone"]
mi.hist(Yimp, Yobs,b=seq(-20,200,10),type="continuous",
gray=F,lwd = lwd,
obs.lwd=1.5, mis.lwd=1.5, imp.lwd=1.5,
obs.col=mdc(4), mis.col=mdc(5), imp.col="transparent",
mlt=0.08,main="",xlab="Ozone (ppb)",
axes = FALSE)
box(lwd = 1)
plot(data[cci(imp),2:1],col=mdc(1), lwd=1.5,ylab="Ozone (ppb)",
xlab="Solar Radiation (lang)",ylim=c(-10,170),
axes = FALSE)
points(data[ici(imp),2:1],col=mdc(2),lwd=1.5)
axis(1, lwd = lwd)
axis(2, lwd = lwd, las = 1)
box(lwd = 1)
## ----regimp, echo=TRUE---------------------------------------------------
fit <- lm(Ozone ~ Solar.R, data = airquality)
pred <- predict(fit, newdata = ic(airquality))
## ----plotregimp, duo = TRUE, echo=FALSE, fig.width=4.5, fig.height=2.25----
lwd <- 0.6
Yobs <- airquality[,"Ozone"]
Yimp <- Yobs
Yimp[ici(airquality)] <- pred
ss <- cci(airquality$Solar.R)
data <- data.frame(Ozone=Yimp, Solar.R=airquality$Solar.R)
mi.hist(Yimp[ss], Yobs[ss],b=seq(-20,200,10),type="continuous",
gray=F, lwd = lwd,
obs.lwd=1.5, mis.lwd=1.5, imp.lwd=1.5,
obs.col=mdc(4),mis.col=mdc(5), imp.col="transparent",
mlt=0.08,main="",xlab="Ozone (ppb)", axes = FALSE)
box(lwd = 1)
plot(data[cci(imp),2:1],col=mdc(1),lwd=1.5,
ylab="Ozone (ppb)", xlab="Solar Radiation (lang)",
ylim=c(-10,170), axes = FALSE)
points(data[ici(imp),2:1],col=mdc(2),lwd=1.5)
axis(1, lwd = lwd)
axis(2, lwd = lwd, las = 1)
box(lwd = 1)
## ----regimp2, eval = FALSE-----------------------------------------------
data <- airquality[, c("Ozone", "Solar.R")]
imp <- mice(data, method = "norm.predict", seed = 1,
m = 1, print = FALSE)
xyplot(imp, Ozone ~ Solar.R)
## ----sri-----------------------------------------------------------------
data <- airquality[, c("Ozone", "Solar.R")]
imp <- mice(data, method = "norm.nob", m = 1, maxit = 1,
seed = 1, print = FALSE)
## ----plotsri, duo = TRUE, echo=FALSE, fig.width=4.5, fig.height=2.25-----
lwd <- 0.6
data <- complete(imp)
Yobs <- airquality[, "Ozone"]
Yimp <- data[, "Ozone"]
mi.hist(Yimp, Yobs,
b = seq(-20, 200, 10), type = "continuous",
gray = FALSE, lwd = lwd,
obs.lwd = 1.5, mis.lwd = 1.5, imp.lwd = 1.5,
obs.col = mdc(4),mis.col = mdc(5), imp.col = "transparent",
mlt = 0.08, main = "", xlab = "Ozone (ppb)")
box(lwd = 1)
plot(data[cci(imp), 2:1], col = mdc(1),
lwd = 1.5, ylab = "Ozone (ppb)",
xlab = "Solar Radiation (lang)", ylim = c(-10, 170),
axes = FALSE)
points(data[ici(imp), 2:1], col = mdc(2), lwd = 1.5)
axis(1, lwd = lwd)
axis(2, lwd = lwd, las = 1)
box(lwd = 1)
## ----locf1---------------------------------------------------------------
airquality2 <- tidyr::fill(airquality, Ozone)
## ----locf2,echo=FALSE,fig.width=4.5,fig.height=2.5, solo=TRUE------------
lwd <- 0.6
Oz <- airquality$Oz
Ozi <- airquality2$Oz
colvec <- ifelse(is.na(Oz), mdc(2), mdc(1))
plot(Ozi[1:80], col = colvec, type = "l",
xlab = "Day number", ylab = "Ozone (ppb)",
lwd = lwd, axes = FALSE)
points(Ozi[1:80], col = colvec, pch = 20)
axis(1, lwd = lwd)
axis(2, lwd = lwd, las = 1)
box(lwd = lwd)
## ----indicator1----------------------------------------------------------
imp <- mice(airquality, method = "mean", m = 1,
maxit = 1, print = FALSE)
airquality2 <- cbind(complete(imp),
r.Ozone = is.na(airquality[, "Ozone"]))
fit <- lm(Wind ~ Ozone + r.Ozone, data = airquality2)
## ----airx8---------------------------------------------------------------
imp <- mice(airquality, seed = 1, m = 20, print = FALSE)
fit <- with(imp, lm(Ozone ~ Wind + Temp + Solar.R))
summary(pool(fit))
## ----airx9---------------------------------------------------------------
fit <- lm(Ozone ~ Wind + Temp + Solar.R, data = airquality)
coef(summary(fit))
## ----plotairmi, echo=FALSE, fig.width=4.5, fig.height=2.25, duo = TRUE----
lwd <- 0.6
data <- complete(imp)
Yobs <- airquality[,"Ozone"]
Yimp <- data[,"Ozone"]
mi.hist(Yimp, Yobs,b=seq(-20,200,10),type="continuous",
gray=F,lwd = lwd,
obs.lwd=1.5, mis.lwd=1.5, imp.lwd=1.5,
obs.col=mdc(4),mis.col=mdc(5), imp.col="transparent",
mlt=0.08,main="",xlab="Ozone (ppb)")
box(lwd = 1)
plot(data[cci(imp),2:1],
col = mdc(1),lwd = 1.5,
ylab = "Ozone (ppb)",
xlab = "Solar Radiation (lang)",
ylim = c(-10, 170), axes = FALSE)
points(data[ici(imp),2:1],
col = mdc(2), lwd = 1.5)
axis(1, lwd = lwd)
axis(2, lwd = lwd, las = 1)
box(lwd = 1)
## ----air10, echo=FALSE,fig.width=4.5,fig.height=2.5, solo=TRUE-----------
Oz <- airquality$Ozone
colvec <- ifelse(is.na(Oz),mdc(2),mdc(1))
plot(Oz[1:80],col=mdc(1),type="l",xlab="Day number",ylab="Ozone (ppb)",
axes = FALSE,lwd = lwd)
points(Oz[1:80],col=mdc(1),pch=20)
lwd <- 0.6
axis(1, lwd = lwd)
axis(2, lwd = lwd, las = 1)
box(lwd = lwd)
idx <- ici(airquality$Ozone) & (1:153)<81
x <- (1:153)[idx]
points(x=x,y=complete(imp,1)$Ozone[idx],col=mdc(2),lwd=lwd,pch=20)
points(x=x,y=complete(imp,2)$Ozone[idx],col=mdc(2),lwd=lwd,pch=20)
points(x=x,y=complete(imp,3)$Ozone[idx],col=mdc(2),lwd=lwd,pch=20)
points(x=x,y=complete(imp,4)$Ozone[idx],col=mdc(2),lwd=lwd,pch=20)
points(x=x,y=complete(imp,5)$Ozone[idx],col=mdc(2),lwd=lwd,pch=20)
## ----acf, echo=FALSE-----------------------------------------------------
par(mfrow=c(2,5))
acf.ozone <- with(imp, acf(Ozone, plot = FALSE))
model <- expression(acf(resid(lm(Ozone ~ Wind + Temp + Solar.R)), plot = FALSE))
acf.resid <- with(imp,model, plot = FALSE)
calcacf <- function(acf.list) {
k <- length(acf.list)
acc <- acf.list[[1]]$acf
for (i in 2:k) acc <- acc + acf.list[[i]]$acf
return(acc/k)
}
oz <- round(calcacf(acf.ozone$analyses),2)
re <- round(calcacf(acf.resid$analyses),2)
## ----ch2, child = "src/ch2.Rnw"------------------------------------------
## ----init2, echo = FALSE, results = 'hide'-------------------------------
opts_chunk$set(fig.path = 'fig/ch02-', self.contained = FALSE)
pkg <- c("mice", "lattice")
loaded <- sapply(pkg, require, character.only = TRUE, warn.conflicts = FALSE, quietly = TRUE)
## ----publications, echo=FALSE, fig.width=4.5, fig.height=2.25, solo=TRUE----
cit <- c( 2017, 75, 381, NA,
2016, 81, 328, NA,
2015, 59, 306, NA,
2014, 55, 281, NA,
2013, 45, 233, NA,
2012, 47, 214, NA,
2011, 55, 181, NA,
2010, 44, 157, NA,
2009, 38, 111, NA,
2008, 28, 102, NA,
2007, 34, 113, NA,
2006, 19, 75, NA,
2005, 21, 63, NA,
2004, 7, 44, NA,
2003, 18, 38, NA,
2002, 15, 36, NA,
2001, 14, 35, 57,
2000, 8, 19, 33,
1999, 6, 18, 47,
1998, 6, 12, 22,
1997, 6, 16, 29,
1996, 5, 12, 28,
1995, 3, 5, 20,
1994, 4, 7, 34,
1993, 3, 6, 15,
1992, NA, 4, NA,
1991, 3, 4, 19,
1990, 2, 3, 15,
1989, NA, 2, 11,
1988, NA, 1, 13,
1987, NA, 3, 10,
1986, 2, 3, 5,
1985, NA, NA, 1,
1984, NA, 1, 2,
1983, NA, NA, 5,
1982, NA, NA, 2,
1981, NA, NA, 1,
1980, NA, NA, 5,
1979, NA, NA, 2,
1978, NA, NA, 1,
1977, NA, NA, 2)
cit <- matrix(cit, nr=2018-1977, nc=4, byrow=TRUE)
cit <- as.data.frame(cit)
names(cit) <- c("Year","Title","Abstract","All")
par(cex = 0.7, lwd = 0.5)
plot(x = cit$Year, y = cit$Abstract, type="o",log="y",
xlim = c(1975,2017), ylim = c(1,400),
ylab="Number of publications (log)", xlab="Year",
pch=24, bg = "white",
axes=FALSE)
axis(1, at = seq(1977, 2017, 5), lwd = par("lwd"))
axis(2, lwd = par("lwd"), las=1)
lines(x=cit$Year, y=cit$Title, pch=15, type="o")
lines(x=cit$Year, y=cit$All, pch=16, type="o")
legend(x=1975,y=200,legend=c('early publications',
'"multiple imputation" in abstract',
'"multiple imputation" in title'),
pch=c(16,2,15), bty="n")
## ----mar-----------------------------------------------------------------
logistic <- function(x) exp(x) / (1 + exp(x))
set.seed(80122)
n <- 300
y <- MASS::mvrnorm(n = n, mu = c(0, 0),
Sigma = matrix(c(1, 0.5, 0.5, 1), nrow = 2))
r2.mcar <- 1 - rbinom(n, 1, 0.5)
r2.mar <- 1 - rbinom(n, 1, logistic(y[, 1]))
r2.mnar <- 1 - rbinom(n, 1, logistic(y[, 2]))
## ----marplot, echo=FALSE, fig.height=4, fig.width=6, solo = TRUE---------
library(lattice)
y1 <- y[, 1]
y2 <- y[, 2]
y3 <- rbind(y,y,y)
r2 <- c(r2.mcar,r2.mar,r2.mnar)
r2 <- factor(r2, labels=c("Ymis","Yobs"))
typ <- factor(rep(3:1,each=n),labels=c("MNAR","MAR","MCAR"))
d <- data.frame(y1=y3[,1],y2=y3[,2],r2=r2,typ=typ)
trellis.par.set(box.rectangle=list(col=c(mdc(2),mdc(1)),lwd=1.2))
trellis.par.set(box.umbrella=list(col=c(mdc(2),mdc(1)),lwd=1.2))
trellis.par.set(plot.symbol=list(col=mdc(3),lwd=1))
tp <- bwplot(r2~y2|typ, data=d,
horizontal=TRUE, layout=c(1,3),
xlab=expression(Y[2]),
col=c(mdc(2),mdc(1)),strip=FALSE, xlim=c(-3,3),
strip.left = strip.custom(bg="grey95"))
print(tp)
## ----theory1-------------------------------------------------------------
library(mice)
imp <- mice(nhanes, print = FALSE, m = 10, seed = 24415)
fit <- with(imp, lm(bmi ~ age))
est <- pool(fit)
est
## ----theory2-------------------------------------------------------------
summary(est, conf.int = TRUE)
## ----lin.sim1------------------------------------------------------------
create.data <- function(beta = 1, sigma2 = 1, n = 50,
run = 1) {
set.seed(seed = run)
x <- rnorm(n)
y <- beta * x + rnorm(n, sd = sqrt(sigma2))
cbind(x = x, y = y)
}
## ----lin.sim2------------------------------------------------------------
make.missing <- function(data, p = 0.5){
rx <- rbinom(nrow(data), 1, p)
data[rx == 0, "x"] <- NA
data
}
## ----lin.sim3------------------------------------------------------------
test.impute <- function(data, m = 5, method = "norm", ...) {
imp <- mice(data, method = method, m = m, print = FALSE, ...)
fit <- with(imp, lm(y ~ x))
tab <- summary(pool(fit), "all", conf.int = TRUE)
as.numeric(tab["x", c("estimate", "2.5 %", "97.5 %")])
}
## ----lin.sim4------------------------------------------------------------
simulate <- function(runs = 10) {
res <- array(NA, dim = c(2, runs, 3))
dimnames(res) <- list(c("norm.predict", "norm.nob"),
as.character(1:runs),
c("estimate", "2.5 %","97.5 %"))
for(run in 1:runs) {
data <- create.data(run = run)
data <- make.missing(data)
res[1, run, ] <- test.impute(data, method = "norm.predict",
m = 2)
res[2, run, ] <- test.impute(data, method = "norm.nob")
}
res
}
## ----lin.sim6a, cache=TRUE-----------------------------------------------
res <- simulate(1000)
## ----lin.sim6c-----------------------------------------------------------
apply(res, c(1, 3), mean, na.rm = TRUE)
## ----lin.bias------------------------------------------------------------
true <- 1
RB <- rowMeans(res[,, "estimate"]) - true
PB <- 100 * abs((rowMeans(res[,, "estimate"]) - true)/ true)
CR <- rowMeans(res[,, "2.5 %"] < true & true < res[,, "97.5 %"])
AW <- rowMeans(res[,, "97.5 %"] - res[,, "2.5 %"])
RMSE <- sqrt(rowMeans((res[,, "estimate"] - true)^2))
data.frame(RB, PB, CR, AW, RMSE)
## ----mse1----------------------------------------------------------------
rmse <- function(truedata, imp, v = "x") {
mx <- is.na(mice::complete(imp, 0))[, v]
mse <- rep(NA, imp$m)
for (k in seq_len(imp$m)) {
filled <- mice::complete(imp, k)[mx, v]
true <- truedata[mx, v]
mse[k] <- mean((filled - true)^2)
}
sqrt(mean(mse))
}
## ----mse2----------------------------------------------------------------
simulate2 <- function(runs = 10) {
res <- array(NA, dim = c(2, runs, 1))
dimnames(res) <- list(c("norm.predict", "norm.nob"),
as.character(1:runs),
"RMSE")
for(run in 1:runs) {
truedata <- create.data(run = run)
data <- make.missing(truedata)
imp <- mice(data, method = "norm.predict", m = 1,
print = FALSE)
res[1, run, ] <- rmse(truedata, imp)
imp <- mice(data, method = "norm.nob", print = FALSE)
res[2, run, ] <- rmse(truedata, imp)
}
res
}
## ----mse.sim6a, cache = TRUE---------------------------------------------
res2 <- simulate2(1000)
apply(res2, c(1, 3), mean, na.rm = TRUE)
## ----ch3, child = "src/ch3.Rnw"------------------------------------------
## ----init3, echo = FALSE, results = 'hide'-------------------------------
opts_chunk$set(fig.path = 'fig/ch03-', self.contained = FALSE)
suppressPackageStartupMessages(library(gamlss, warn.conflicts = FALSE, quietly = TRUE))
pkg <- c("mice", "lattice", "MASS", "ImputeRobust", "rpart")
loaded <- sapply(pkg, require, character.only = TRUE, warn.conflicts = FALSE, quietly = TRUE)
source("R/mice.impute.x.R")
## ----gas1, six=TRUE, echo=FALSE, fig.width=4.5, fig.height=6.75---------
library("MASS")
data <- whiteside
lwd <- 1.5
plot(x=data$Temp, y=data$Gas, col=mdc(1), lwd=lwd,
xlab=expression(paste("Temperature (", degree, "C)")),
ylab="Gas consumption (cubic feet)", cex.lab = 1.6,
axes = FALSE)
axis(1, lwd = 0.7)
axis(2, lwd = 0.7, las = 1)
box(lwd = 0.7)
points(x=5, y=3.6, pch=4, cex=2, lwd=lwd, col=mdc(2))
legend(x="bottomleft", legend="deleted observation", pch=4, col=mdc(2),
pt.lwd=lwd, bty="n", pt.cex=2, cex = 1.6)
text(x=9, y=6.5, label="a",cex=2)
data[47,"Gas"] <- NA
plot(x=data$Temp, y=data$Gas, col=mdc(1), lwd=lwd,
xlab=expression(paste("Temperature (", degree, "C)")),
ylab="Gas consumption (cubic feet)", cex.lab = 1.6,
axes = FALSE)
axis(1, lwd = 0.7)
axis(2, lwd = 0.7, las = 1)
box(lwd = 0.7)
abline(m1<-lm(Gas~Temp, data=data, na.action=na.omit), col=mdc(4))
points(5,4.04, lwd=lwd, col=mdc(2),pch=19)
text(x=9, y=6.5, label="b",cex=2)
plot(x=data$Temp, y=data$Gas, col=mdc(1), lwd=lwd,
xlab=expression(paste("Temperature (", degree, "C)")),
ylab="Gas consumption (cubic feet)", cex.lab = 1.6,
axes = FALSE)
axis(1, lwd = 0.7)
axis(2, lwd = 0.7, las = 1)
box(lwd = 0.7)
imp <- mice(data, m=1, maxit=0)
pred <- imp$pred
pred["Gas","Insul"] <- 0
imp <- mice(data, m=5, pred=pred, meth="norm.nob", maxit=1, print=FALSE, seed=45433)
abline(m1<-lm(Gas~Temp, data=data, na.action=na.omit), col=mdc(4))
points(rep(5,5),imp$imp$Gas, lwd=lwd, col=mdc(2),pch=19)
text(x=9, y=6.5, label="c",cex=2)
plot(x=data$Temp, y=data$Gas, col=mdc(1), lwd=lwd,
xlab=expression(paste("Temperature (", degree, "C)")),
ylab="Gas consumption (cubic feet)", cex.lab = 1.6,
axes = FALSE)
axis(1, lwd = 0.7)
axis(2, lwd = 0.7, las = 1)
box(lwd = 0.7)
imp <- mice(data, m=1, maxit=0)
pred <- imp$pred
pred["Gas","Insul"] <- 0
betadump <- vector("list", 0)
imp <- mice(data, m=5, pred=pred, meth="normdump", maxit=1, print=FALSE, seed=83126)
abline(m1<-lm(Gas~Temp, data=data, na.action=na.omit), col=mdc(4))
betadump <- matrix(betadump, nc=2, byrow=TRUE)
for (i in 1:5) abline(coef=unlist(betadump[i,]), col=mdc(5))
points(rep(5,5),imp$imp$Gas, lwd=lwd, col=mdc(2),pch=19)
text(x=9, y=6.5, label="d",cex=2)
pch <- c(rep(3,26),rep(1,30))
plot(x=data$Temp, y=data$Gas, col=mdc(1), lwd=lwd, pch=pch,
xlab=expression(paste("Temperature (", degree, "C)")),
ylab="Gas consumption (cubic feet)", cex.lab = 1.6,
axes = FALSE)
axis(1, lwd = 0.7)
axis(2, lwd = 0.7, las = 1)
box(lwd = 0.7)
imp <- mice(data, m=5, meth="norm", maxit=1, print=FALSE, seed=11727)
abline(m1<-lm(Gas~Temp, data=data, na.action=na.omit, subset=Insul=="Before"), col=mdc(4))
abline(m2<-lm(Gas~Temp, data=data, na.action=na.omit, subset=Insul=="After"), col=mdc(4))
points(rep(5,5),imp$imp$Gas, lwd=lwd, col=mdc(2),pch=19)
legend(x="bottomleft", legend=c("before insulation","after insulation"), pch=c(3,1),bty="n", pt.lwd=lwd, cex = 1.6, pt.cex = 1, y.intersp = 0.6)
text(x=9, y=6.5, label="e",cex=2)
pch <- c(rep(3,26),rep(1,30))
plot(x=data$Temp, y=data$Gas, col=mdc(1), lwd=lwd, pch=pch,
xlab=expression(paste("Temperature (", degree, "C)")),
ylab="Gas consumption (cubic feet)", cex.lab = 1.6,
axes = FALSE)
axis(1, lwd = 0.7)
axis(2, lwd = 0.7, las = 1)
box(lwd = 0.7)
betadump <- vector("list", 0)
imp <- mice(data, m=5, meth="pmmdump", maxit=1, print=FALSE, seed=68006)
betadump <- matrix(betadump, nc=3, byrow=TRUE)
m1<-lm(Gas~Temp+Insul, data=data, na.action=na.omit)
an <- coef(m1)[1]
ai <- an + coef(m1)[3]
b <- coef(m1)[2]
abline(a=ai, b=b, col=mdc(4))
abline(a=an, b=b, col=mdc(4))
# for (i in 1:1) {
# abline(a=unlist(betadump[i,1]), b=unlist(betadump[i,2]), col=mdc(5))
# abline(a=unlist(betadump[i,1])+unlist(betadump[i,3]), b=unlist(betadump[i,2]), col=mdc(5))
# }
# points(rep(5,5),imp$imp$Gas, lwd=lwd, col=mdc(2), pch=20)
eta <- 0.6
ylo <- ai+b*(5-eta)
yhi <- ai+b*(5+eta)
lines(x=c(5-eta,5+eta),y=c(ylo,yhi),lwd=3,col=mdc(5))
xlo <- (ylo-an)/b
xhi <- (yhi-an)/b
lines(x=c(xlo,xhi),y=c(ylo,yhi),lwd=3,col=mdc(5))
donors <- subset(data, (Insul=="After"&Temp>5-eta&Temp<5+eta)
| (Insul=="Before"&Temp>xlo&Temp<xhi))
points(x=donors$Temp, y=donors$Gas, cex=1.8, col=mdc(5), lwd=lwd)
legend(x="bottomleft", legend=c("before insulation","after insulation"), pch=c(3,1),bty="n", pt.lwd=lwd, cex = 1.6, pt.cex = 1, y.intersp = 0.6)
text(x=9, y=6.5, label="f",cex=2)
## ----generate1-----------------------------------------------------------
set.seed(1)
n <- 10000
sigma <- matrix(c(1, 0.6, 0.6, 1), nrow = 2)
cmp <- MASS::mvrnorm(n = n, mu = c(5, 5), Sigma = sigma)
p2.marright <- 1 - plogis(-5 + cmp[, 1])
r2.marright <- rbinom(n, 1, p2.marright)
yobs <- cmp
yobs[r2.marright == 0, 2] <- NA
## ----generateplot1, echo=FALSE, solo=TRUE, fig.width=4.5, fig.height=2.5----
grid <- seq(0,10,0.1)
mr <- plogis(-5 + grid)
mm <- plogis( 0.75-abs(grid-5))
mt <- plogis(-0.75+abs(grid-5))
z <- data.frame(grid, mr, mm, mt)
matplot(x=z[,1],y=z[,2:4], type="l", lty=1:3, col=mdc(5) , lwd=2,
xlab=expression(italic(Y)[1]),
ylab=expression(paste("Missingness in", italic(Y)[2])), las=1,
axes = FALSE)
axis(1, lwd = 0.7)
axis(2, lwd = 0.7, las = 1)
box(lwd = 0.7)
legend(x="top", bty="n",
legend=c("MARRIGHT","MARMID","MARTAIL"), lty=1:3, lwd=2, col=mdc(5), cex=0.8)
## ----generateplot2, echo=FALSE, solo=TRUE, fig.width=6, fig.height=4----
library(lattice)
y <- cmp
y3 <- rbind(y,y,y)
p2.marmid <- 1 - plogis(0.75-abs(y[,1]-5))
p2.martail <- 1 - plogis(0.75+abs(y[,1]-5))
r2.marmid <- rbinom(n, 1, p2.marmid)
r2.martail <- rbinom(n, 1, p2.martail)
r2 <- c(r2.marright,r2.marmid,r2.martail)
r2 <- factor(r2, labels=c("Ymis","Yobs"))
typ <- factor(rep(3:1,each=n),labels=c("MARTAIL","MARMID","MARRIGHT"))
d <- data.frame(y1=y3[,1],y2=y3[,2],r2=r2,typ=typ)
trellis.par.set(box.rectangle=list(col=c(mdc(2),mdc(1)),lwd=1.2))
trellis.par.set(box.umbrella=list(col=c(mdc(2),mdc(1)),lwd=1.2))
trellis.par.set(plot.symbol=list(col="transparent",lwd=1))
tp <- bwplot(r2~y2|typ, data=d,
horizontal=TRUE, layout=c(1,3),
xlab=expression(italic(Y)[2]),
col=c(mdc(2),mdc(1)),strip=FALSE, xlim=c(2,8),
strip.left = strip.custom(bg="grey95"))
print(tp)
## ----ampute--------------------------------------------------------------
amp <- ampute(cmp, type = "RIGHT")
apply(amp$amp, 2, mean, na.rm = TRUE)
## ----lingamlss, echo = FALSE---------------------------------------------
mice.impute.TF <- function(y, ry, x,
gamlss.trace = FALSE, ...)
{
# prepare data
xobs <- x[ry, , drop = FALSE]
xmis <- x[!ry, , drop = FALSE]
yobs <- y[ry]
n1 <- sum(ry)
n0 <- sum(!ry)
# draw bootstrap sample
s <- sample(n1, n1, replace = TRUE)
dotxobs <- xobs[s, , drop = FALSE]
dotyobs <- yobs[s]
row.names(dotxobs) <- row.names(dotyobs) <- NULL
dotxy <- data.frame(dotxobs, y = dotyobs)
xmis <- data.frame(xmis)
# fit the gamlss model
fit <- gamlss(y ~ ., data = dotxy, family = TF,
trace = gamlss.trace, ...)
yhat <- predict(fit, data = dotxy, newdata = xmis)
sigma <- exp(coef(fit, "sigma"))
nu <- exp(coef(fit, "nu"))
# draw the imputations
return(rTF(n0, yhat, sigma, nu))
}
## ----linhc0, echo=FALSE, results="hide"----------------------------------
library(gamlss)
data(db)
## ----linhc1, echo=FALSE, duo = TRUE, fig.width=4.5, fig.height=2.25-----
#allboys <- read.table(file="~/Documents/Sync/Impute/mice/data/db7482-2.txt", header=TRUE, sep="\t")[,1:9]
if (empty_figure) {
plot.new()
plot.new()
} else {
db <- as.data.frame(db)
data <- subset(db, age > 1 & age < 2, c("age","head"))
names(data) <- c("age", "hc")
truehist(data$hc, col = mdc(1), border = "white",
xlab="Head circumference (cm)",
ylab="Density",
ylim=c(0,0.3), xlim=c(38,60),
h = 0.5, axes = FALSE)
axis(1, lwd = 0.7, cex.axis = 0.7)
axis(2, lwd = 0.7, las = 1, cex.axis = 0.7)
mn <- gamlss(hc~1, data=na.omit(data), family=NO, trace=FALSE)
mu <- coef(mn)
sigma <- exp(coef(mn, "sigma"))
cmfine <- seq(38,60,0.1)
lines(cmfine, dNO(cmfine, mu, sigma), lwd=0.8, lty=2)
mt <- gamlss(hc~1, data=data, family=TF, trace=FALSE)
mu <- coef(mt)
sigma <- exp(coef(mt, "sigma"))
nu <- exp(coef(mt, "nu"))
lines(cmfine, dTF(cmfine, mu, sigma, nu), lwd=0.8, lty=1)
legend(x="right",legend=c("normal","t, df=6.7"),
lwd=0.8, lty=c(2,1), bty="n", cex=0.7)
plot(x=data$age, y=data$hc, col=mdc(1), cex=0.3,
xlab="Age (in years)",
ylab="Head circumference (cm)",
ylim=c(39,60), axes = FALSE)
axis(1, lwd = 0.7, cex.axis = 0.7)
axis(2, lwd = 0.7, las = 1, cex.axis = 0.7)
box(lwd = 0.7)
}
## ----linhc2, cache=TRUE, warning = FALSE---------------------------------
library("ImputeRobust")
library("gamlss")
data(db)
data <- subset(db, age > 1 & age < 2, c("age", "head"))
names(data) <- c("age", "hc")
synthetic <- rep(c(FALSE, TRUE), each = nrow(data))
data2 <- rbind(data, data)
row.names(data2) <- 1:nrow(data2)
data2[synthetic, "hc"] <- NA
imp <- mice(data2, m = 1, meth = "gamlssTF", seed = 88009,
print = FALSE)
syn <- subset(mice::complete(imp), synthetic)
## ----linhc3, echo=FALSE, duo = TRUE, fig.width=4.5, fig.height=2.25-----
if (empty_figure) {
plot.new()
plot.new()
} else {
data <- syn
truehist(data$hc, col=mdc(2), border="white",
xlab="Head circumference (cm)",
ylab="Density",
ylim=c(0,0.3), xlim=c(38,60),
h=0.5, axes = FALSE)
axis(1, lwd = 0.7, cex.axis = 0.7)
axis(2, lwd = 0.7, las = 1, cex.axis = 0.7)
mn <- gamlss(hc~1, data=na.omit(data), family=NO, trace=FALSE)
mu <- coef(mn)
sigma <- exp(coef(mn, "sigma"))
cmfine <- seq(38,60,0.1)
lines(cmfine, dNO(cmfine, mu, sigma), lwd=0.8, lty=2)
mt <- gamlss(hc~1, data=data, family=TF, trace=FALSE)
mu <- coef(mt)
sigma <- exp(coef(mt, "sigma"))
nu <- exp(coef(mt, "nu"))
lines(cmfine, dTF(cmfine, mu, sigma, nu), lwd=0.8, lty=1)
legend(x="right",legend=c("normal",paste("t, df=",round(nu,1),sep="")),
lwd=0.8, lty=c(2,1), bty="n", cex=0.7)
plot(x=data$age, y=data$hc, col=mdc(2), cex=0.3,
xlab="Age (in years)",
ylab="Head circumference (cm)",
ylim=c(39,60), axes = FALSE)
axis(1, lwd = 0.7, cex.axis = 0.7)
axis(2, lwd = 0.7, las = 1, cex.axis = 0.7)
box(lwd = 0.7)
}
## ----misspecify, echo=FALSE, duo = TRUE, fig.height=2.25, fig.width=4.5----
data <- boys[boys$age<=2,c("age","bmi")]
set.seed(87120)
data[sample(92:136,10),"bmi"] <- NA
imp <- mice(data, meth="norm", m=1, seed=32212, print=FALSE)
cd1 <- mice::complete(imp)
imp <- mice(data, m=1, seed=32212, print=FALSE)
cd2 <- mice::complete(imp)
r <- !is.na(data$bmi)
plot(data, col=mdc(1), xlab="Age", ylab="BMI", cex = 0.7, axes = FALSE)
axis(1, lwd = 0.7, cex.axis = 0.7)
axis(2, lwd = 0.7, las = 1, cex.axis = 0.7)
box(lwd = 0.7)
points(cd1[!r,], col=mdc(2), pch=19, cex=0.7)
plot(data, col=mdc(1), xlab="Age", ylab="BMI", cex = 0.7, axes = FALSE)
axis(1, lwd = 0.7, cex.axis = 0.7)
axis(2, lwd = 0.7, las = 1, cex.axis = 0.7)
box(lwd = 0.7)
points(cd2[!r,], col=mdc(2), pch=19, cex=0.7)
## ----pmmfigure, echo=FALSE, solo=TRUE, fig.height=3.5, fig.width=3.5-----
data <- whiteside
lwd <- 1.5
data[47,"Gas"] <- NA
pch <- c(rep(3,26),rep(1,30))
plot(x=data$Temp, y=data$Gas, col=mdc(1), lwd=lwd, pch=pch,
xlab=expression(paste("Temperature (", degree, "C)")),
ylab="Gas consumption (cubic feet)", axes = FALSE)
axis(1, lwd = 0.7, cex.axis = 1)
axis(2, lwd = 0.7, las = 1, cex.axis = 1)
box(lwd = 0.7)
betadump <- vector("list", 0)
imp <- mice(data, m=5, meth="pmmdump", maxit=1, print=FALSE, seed=68006)
betadump <- matrix(unlist(betadump), nc=3, byrow=TRUE)
m1<-lm(Gas~Temp+Insul, data=data, na.action=na.omit)
an <- coef(m1)[1]
ai <- an + coef(m1)[3]
b <- coef(m1)[2]
abline(a=ai, b=b, col=mdc(4))
abline(a=an, b=b, col=mdc(4))
# for (i in 56:56) {
# abline(a=unlist(betadump[i,1]), b=unlist(betadump[i,2]), col=mdc(5))
# abline(a=unlist(betadump[i,1])+unlist(betadump[i,3]), b=unlist(betadump[i,2]), col=mdc(5))
# }
# points(rep(5,5),imp$imp$Gas, lwd=lwd, col=mdc(2), pch=20)
eta <- 0.6
ylo <- ai+b*(5-eta)
yhi <- ai+b*(5+eta)
lines(x=c(5-eta,5+eta),y=c(ylo,yhi),lwd=3,col=mdc(4))
an <- 7.05; ai<-an-1.7; b <- -0.38
xlo1 <- (ylo-ai)/b
xhi1 <- (yhi-ai)/b
xlo2 <- (ylo-an)/b
xhi2 <- (yhi-an)/b
abline(a=an, b=b, col=mdc(5))
abline(a=ai, b=b, col=mdc(5))
lines(x=c(xlo1,xhi1),y=c(ylo,yhi),lwd=3,col=mdc(5))
lines(x=c(xlo2,xhi2),y=c(ylo,yhi),lwd=3,col=mdc(5))
abline(v=c(5-eta,5+eta),h=c(ylo,yhi),col=mdc(4),lty=3)
rect(xlo1,0,xhi1,8,col=hcl(0,100,40,0.05),border=NA)
rect(xlo2,0,xhi2,8,col=hcl(0,100,40,0.05),border=NA)
donors <- subset(data, (Insul=="After"&Temp>xlo1&Temp<xhi1)
| (Insul=="Before"&Temp>xlo2&Temp<xhi2))
points(x=donors$Temp, y=donors$Gas, cex=1.8, col=mdc(5), lwd=lwd)
legend(x="bottomleft", legend=c("before insulation","after insulation"), pch=c(3,1),bty="n", pt.lwd=lwd)
## ----pmmsim, eval=FALSE, echo=FALSE--------------------------------------
simulate <- function(nsim=10, seed=41872){
set.seed(seed)
res <- array(NA,dim=c(3, nsim, 6))
for(i in 1:nsim){
data <- createdata()
data <- makemissing(data)
res[1,i,] <- test.impute(data, m=5)
res[2,i,] <- test.impute(data, m=5, donors=1)
res[3,i,] <- test.impute(data, m=5, donors=10)
}
return(res)
}
res.pmm <- simulate(10000)
res <- res.pmm
## ----cart, echo=FALSE, duo=TRUE, fig.width=4.5, fig.height=2.25---------
library("rpart")
fit <- rpart(Gas ~ Temp + Insul, data=whiteside)
plot(fit, branch=0, margin=0.15)
text(fit, use=T,pretty=0,dig=3,cex=0.8)
leaf <- row.names(fit$frame)[fit$where]
label <- factor(leaf, labels=c(2,3,1,4,5))
plot(x=whiteside$Temp, y=whiteside$Gas, type="n",
xlab=expression(paste("Temperature (", degree, "C)")),
ylab="Gas consumption (cubic feet)", axes = FALSE)
axis(1, lwd = 0.7, cex.axis = 0.8)
axis(2, lwd = 0.7, las = 1, cex.axis = 0.8)
box(lwd = 0.7)
text(x=whiteside$Temp, y=whiteside$Gas, label=label, col=mdc(4), cex=0.6)
## ----echo=FALSE, eval=FALSE----------------------------------------------
# library(survival)
# library(foreign)
# library(car)
# file <- "~/Documents/Sync/Impute/ice/datasets/breastcancer.dta"
# breastcancer <- read.dta(file)
#
# data <- breastcancer
# nstage <- recode(data$nodes, '0=0;1:3=1;4:9=2;10:hi=3;else=NA')
# lonodes <- 0.5*(nstage==1) + 3.5*(nstage==2) + 9.5*(nstage==3)
# upnodes <- 3.5*(nstage==1) + 9.5*(nstage==2) + 55*(nstage==3)
# llnodes <- log(lonodes)
# lunodes <- log(upnodes)
# lnodes <- as.numeric(NA)
# data <- data.frame(data[,c("size","gradd1","gradd2","nodes")], nstage, lonodes, upnodes, lnodes)
# imp <- mice(data)
#
# fit <- survreg(Surv(lonodes, upnodes, rep(3, nrow(data)),type="interval") ~ size + gradd1 + gradd2, data=data)
#
## ----c85sensfig, duo = TRUE, echo=FALSE, fig.width=4.5, fig.height=2.25----
s <- c(
100, 0.015, 0.058, 0.02, 0.35,
110, 0.024, 0.074, 0.03, 0.30,
120, 0.043, 0.103, 0.05, 0.25,
130, 0.091, 0.164, 0.10, 0.20,
140, 0.145, 0.185, 0.15, 0.15,
150, 0.307, 0.247, 0.30, 0.10,
160, 0.157, 0.099, 0.15, 0.08,
170, 0.107, 0.049, 0.10, 0.06,
180, 0.055, 0.016, 0.05, 0.04,
190, 0.033, 0.005, 0.03, 0.02,
200, 0.023, 0.000, 0.02, 0.00,
210, 0.023, 0.000, 0.02, 0.00
)
sm <- matrix(s, nr=12, nc=5, byrow=TRUE)
snug <- 1.5
xx <- cbind(sm[,1]-5+snug,
sm[,1]-5-snug,
sm[,1]-5)
matplot(x=xx[,3], y=cbind(1-sm[,5]),
col=mdc(1), type="p", lwd=2, lty=1, pch=20,
xlab="Systolic BP (mmHg)",
ylab="Observation probability",
axes = FALSE)
axis(1, lwd = 0.7, cex.axis = 0.8)
axis(2, lwd = 0.7, las = 1, cex.axis = 0.8)
box(lwd = 0.7)
matplot(x=xx, y=sm[,2:4], type="s",
col=c(mdc(4),mdc(5),mdc(6)), lwd=2, lty=1,
xlab="Systolic BP (mmHg)",
ylab="Density", axes = FALSE)
axis(1, lwd = 0.7, cex.axis = 0.8)
axis(2, lwd = 0.7, las = 1, cex.axis = 0.8)
box(lwd = 0.7)
## ----ch4, child = "src/ch4.Rnw"------------------------------------------
## ----init4, echo = FALSE, results = 'hide'-------------------------------
opts_chunk$set(fig.path = 'fig/ch04-', self.contained = FALSE)
pkg <- c("mice", "lattice")
loaded <- sapply(pkg, require, character.only = TRUE, warn.conflicts = FALSE, quietly = TRUE)
suppressPackageStartupMessages(library(gam, warn.conflicts = FALSE, quietly = TRUE))
source("R/functions.R")
## ----patterns, echo=FALSE------------------------------------------------
data <- matrix(sample(1:100,4*8*3,replace=TRUE),nrow=8*4,
dimnames=list(NULL,c("A","B","C")))
data <- as.data.frame(data)
data[c(31:32),"A"] <- NA
data[c(15:16,22:24,30:32),"B"] <- NA
data[c(6:8,12:16,17:21,27:29),"C"] <- NA
mdpat <- cbind(expand.grid(rec = 8:1, pat = 1:4, var = 1:3), r=as.numeric(as.vector(is.na(data))))
pattern1 <- data[1:8,]
pattern2 <- data[9:16,]
pattern3 <- data[17:24,]
pattern4 <- data[25:32,]
types <- c("Univariate","Monotone","File matching","General")
levelplot(r~var+rec|as.factor(pat), data=mdpat,
as.table=TRUE, aspect="iso",
shrink=c(0.9),
col.regions = mdc(1:2),
colorkey=FALSE,
scales=list(draw=FALSE),
xlab="", ylab="",
between = list(x=1,y=0),
strip = strip.custom(bg = "grey95", style = 1,
factor.levels = types))
## ----pattern-------------------------------------------------------------
md.pattern(pattern4, plot = FALSE)
## ----mdpairs-------------------------------------------------------------
p <- md.pairs(pattern4)
p
## ----puc-----------------------------------------------------------------
p$mr/(p$mr+p$mm)
## ----outbound------------------------------------------------------------
p$rm/(p$rm+p$rr)
## ----figfourflux, echo=FALSE, four=TRUE, results='hide', fig.width=4.5, fig.height=4.5----
library("MASS")
fluxplot(pattern1, main="", xlim=c(-0.1,1.1), ylim=c(-0.1,1.1), lwd = 0.8)
text(x=0.5,y=1,label="Univariate")
fluxplot(pattern2, main="", xlim=c(-0.1,1.1), ylim=c(-0.1,1.1), lwd = 0.8)
text(x=0.5,y=1,label="Monotone")
fluxplot(pattern3, main="", xlim=c(-0.1,1.1), ylim=c(-0.1,1.1), lwd = 0.8)
text(x=0.5,y=1,label="File matching")
fluxplot(pattern4, main="", xlim=c(-0.1,1.1), ylim=c(-0.1,1.1), lwd = 0.8)
text(x=0.5,y=1,label="General")
## ----flux----------------------------------------------------------------
flux(pattern4)[,1:3]
## ----mon-----------------------------------------------------------------
data <- nhanes2[, 1:3]
md.pattern(data, plot = FALSE)
imp <- mice(data, visit = "monotone", maxit = 1, m = 2,
print = FALSE)
## ----monda---------------------------------------------------------------
where <- make.where(nhanes2, "none")
where[6, "hyp"] <- TRUE
where[c(3, 6), "bmi"] <- TRUE
imp1 <- mice(nhanes2, where = where, method = "sample",
seed = 21991, maxit = 1, print = FALSE)
data <- mice::complete(imp1)
imp2 <- mice(data, maxit = 1, visitSequence = "monotone",
print = FALSE)
## ----slow1---------------------------------------------------------------
generate <- function(n = c(1000, 4500, 4500, 0),
cor = matrix(c(1.0, 0.9, 0.9,
0.9, 1.0, 0.7,
0.9, 0.7, 1.0), nrow = 3)) {
require(MASS)
nt <- sum(n)
cs <- cumsum(n)
data <- mvrnorm(nt, mu = rep(0,3), Sigma = cor)
dimnames(data) <- list(1:nt, c("X", "Y1", "Y2"))
if (n[2] > 0) data[(cs[1]+1):cs[2],"Y1"] <- NA
if (n[3] > 0) data[(cs[2]+1):cs[3],"Y2"] <- NA
if (n[4] > 0) data[(cs[3]+1):cs[4],c("Y1","Y2")] <- NA
return(data)
}
## ----slow2---------------------------------------------------------------
impute <- function(data, m = 5, method = "norm",
print = FALSE, maxit = 10, ...) {
statistic <- matrix(NA, nrow = maxit, ncol = m)
for (iter in 1:maxit) {
if (iter==1) imp <- mice(data, m = m, method = method,
print = print, maxit = 1, ...)
else imp <- mice.mids(imp, maxit = 1, print = print, ...)
statistic[iter, ] <- unlist(with(imp, cor(Y1, Y2))$analyses)
}
return(list(imp = imp, statistic = statistic))
}
## ----slow3---------------------------------------------------------------
simulate <- function(
ns = matrix(c(1000, 500, 250, 100, 50, 0,
rep(c(4500, 4750, 4875, 4950, 4975, 5000), 2),
rep(0, 6)), nrow = 6),
m = 5, maxit = 10, seed = 1, ...) {
if (!missing(seed)) set.seed(seed)
s <- cbind(rep(1:nrow(ns), each = maxit * m),
apply(ns, 2, rep, each = maxit * m),
rep(1:maxit, each = m), 1:m, NA)
colnames(s) <- c("k", "n111", "n101", "n110", "n100",
"iteration", "m", "rY1Y2")
for (k in 1:nrow(ns)) {
data <- generate(ns[k, ], ...)
r <- impute(data, m = m, maxit = maxit, ...)
s[s[,"k"] == k, "rY1Y2"] <- t(r$statistic)
}
return(data.frame(s))
}
## ----slow4, cache = TRUE-------------------------------------------------
slow.demo <- simulate(maxit = 150, seed = 62771)
## ----slowplot, echo=FALSE, fig.height=5----------------------------------
labels <- c("90% missing", "95% missing", "97.5% missing",
"99% missing", "99.5% missing", "100% missing")
xyplot(rY1Y2 ~ iteration | as.factor(k), group = m,
data = slow.demo, layout = c(3,2),
type="l", as.table = TRUE,
ylab = "Correlation between Y1 and Y2",
xlab = "Iteration", col = mdc(3),
scales = list(y = list(alternating = 1, tck = c(1, 0))),
strip = strip.custom(bg = "grey95", style = 1,
factor.levels = labels))
## ----pubimpjm, eval=TRUE, cache=TRUE-------------------------------------
select <- with(boys, age >= 8 & age <= 21.0)
djm <- boys[select, -4]
djm$gen <- as.integer(djm$gen)
djm$phb <- as.integer(djm$phb)
djm$reg <- as.integer(djm$reg)
dfcs <- boys[select, -4]
## impute under jm and fcs
jm <- mice(djm, method = "norm", seed = 93005, m = 10,
print = FALSE)
pmm <- mice(djm, method = "pmm", seed = 71332, m = 10,
print = FALSE)
fcs <- mice(dfcs, seed = 81420, m = 10, print = FALSE)
## ----pubfigjm, echo=TRUE, fig.width=6, fig.height=5--------------------
xyplot(jm, gen ~ age | as.factor(.imp), subset = .imp < 6,
xlab = "Age", ylab = "Genital stage", col = mdc(1:2),
ylim = c(0, 6))
## ----pubfigfcs, echo=FALSE, fig.width=6, fig.height=5-------------------
xyplot(fcs, gen ~ age | as.factor(.imp), subset = .imp < 6,
xlab = "Age", ylab = "Genital stage", col = mdc(1:2),
ylim = c(0, 6))
## ----pubfig2, echo=FALSE, cache=TRUE------------------------------------
drawgen <- function(imp, var="gen", doround=FALSE) {
m <- imp$m
ires <- vector("list", m)
for (i in 1:m){
cda <- mice::complete(imp,i)
if (doround) cda[,var] <- round(squeeze(cda[,var],c(1,5)))
count.boys(cda)
ires[[i]] <-fitTannerStages(data=cda,var=var,nstages=5,counts=boy.count.gen)
}
## plot the fitted models for the CCA and imputed analyses
plot(c(8,21),c(0,1),type="n",xlab="Age (years)",
ylab="Proportion",lab=c(10,12,5), las=1,cex=1,tck=-0.01)
abline(0.1,0,lty=2)
abline(0.5,0,lty=2)
abline(0.9,0,lty=2)
x <- seq(8.75,20.5,0.25)
for (s in 1:4) {
lines(x=x,y=boys.gen.cc[[s]][[2]]$predicted,lwd=6,col=mdc(1))
for (i in 1:m) {
lines(x=x,y=ires[[i]][[s]][[3]]$predicted,lwd=1,col=mdc(2))
}
}
}
four.gen <- function(formula=gen~age|reorder(factor(c("JM: multivariate normal","JM: rounded","FCS: predictive mean matching","FCS: proportional odds")),1:4),
data=list(jm=jm, jmr=jm, pmm=pmm, fcs=fcs),
doround=FALSE, layout=c(1,4),
xlab="Age (years)", ylab="Proportion",
as.table=TRUE,
...) {
prepanel.gen <- function(x, y, ...){
list(xlim=c(9,20), ylim=c(0,1))
}
panel.gen <- function(x, y, ...){
pan <- panel.number()
doround <- ifelse(pan==2, TRUE, FALSE)
imp <- data[[pan]]
m <- imp$m
ires <- vector("list", m)
for (i in 1:m){
cda <- mice::complete(imp,i)
if (doround) cda[,"gen"] <- round(squeeze(cda[,"gen"],c(1,5)))
count.boys(cda)
ires[[i]] <-fitTannerStages(data=cda,var="gen",nstages=5,counts=boy.count.gen)
}
panel.grid(h=-1, v=-1)
# panel.abline(0.1,0,lty=2,...)
# panel.abline(0.5,0,lty=2,...)
# panel.abline(0.9,0,lty=2,...)
x <- seq(8.75,20.5,0.25)
for (s in 1:4) {
panel.lines(x=x,y=boys.gen.cc[[s]][[2]]$predicted,lwd=4,col=mdc(1))
for (i in 1:m) {
panel.lines(x=x,y=ires[[i]][[s]][[3]]$predicted,lwd=1,col=mdc(2))
}
}
}
gen <- 0; age <- 0; x <- 0
## complete cases
idat <- data[[1]]$data
idat <- idat[cci(idat),]
count.boys(idat)
boys.gen.cc <-fitTannerStages(data=idat,var="gen",nstages=5,counts=boy.count.gen)
tp <- xyplot(formula, data, layout=layout,
prepanel=prepanel.gen, panel=panel.gen,
xlab=xlab, ylab=ylab, as.table=as.table,
...)
return(tp)
}
tp<-four.gen(layout=c(2,2),ylab="",strip=function(...,bg) strip.default(..., bg="grey95"))
print(tp)
## ----ex.slow1, eval=FALSE------------------------------------------------
slow2 <- simulate(ns = ns2, maxit = 50, seed = 62771)
## ----ch5, child = "src/ch5.Rnw"------------------------------------------
## ----init5, echo=FALSE, results='hide'-----------------------------------
opts_chunk$set(fig.path = 'fig/ch05-', self.contained = FALSE)
pkg <- c("mice", "magrittr", "purrr", "dplyr")
loaded <- sapply(pkg, require, character.only = TRUE, warn.conflicts = FALSE, quietly = TRUE)
## ----workflow1, warning = FALSE------------------------------------------
# mids workflow using saved objects
library(mice)
imp <- mice(nhanes, seed = 123, print = FALSE)
fit <- with(imp, lm(chl ~ age + bmi + hyp))
est1 <- pool(fit)
## ----workflow2-----------------------------------------------------------
# mids workflow using pipes
library(magrittr)
est2 <- nhanes %>%
mice(seed = 123, print = FALSE) %>%
with(lm(chl ~ age + bmi + hyp)) %>%
pool()
## ----workflow3-----------------------------------------------------------
# mild workflow using base::lapply
est3 <- nhanes %>%
mice(seed = 123, print = FALSE) %>%
mice::complete("all") %>%
lapply(lm, formula = chl ~ age + bmi + hyp) %>%
pool()
## ----workflow4-----------------------------------------------------------
# mild workflow using pipes and base::Map
est4 <- nhanes %>%
mice(seed = 123, print = FALSE) %>%
mice::complete("all") %>%
Map(f = lm, MoreArgs = list(f = chl ~ age + bmi + hyp)) %>%
pool()
## ----workflow5, warning = FALSE------------------------------------------
# mild workflow using purrr::map
library(purrr)
est5 <- nhanes %>%
mice(seed = 123, print = FALSE) %>%
mice::complete("all") %>%
map(lm, formula = chl ~ age + bmi + hyp) %>%
pool()
## ----workflow6-----------------------------------------------------------
# long workflow using base::by
est6 <- nhanes %>%
mice(seed = 123, print = FALSE) %>%
mice::complete("long") %>%
by(as.factor(.$.imp), lm, formula = chl ~ age + bmi + hyp) %>%
pool()
## ----workflow7-----------------------------------------------------------
# long workflow using a dplyr list-column
library(dplyr)
est7 <- nhanes %>%
mice(seed = 123, print = FALSE) %>%
mice::complete("long") %>%
group_by(.imp) %>%
do(model = lm(formula = chl ~ age + bmi + hyp, data = .)) %>%
as.list() %>%
.[[-1]] %>%
pool()
## ----workflow8-----------------------------------------------------------
# incorrect workflow: averaging data, no pooling
ave <- nhanes %>%
mice(seed = 123, print = FALSE) %>%
mice::complete("long") %>%
group_by(.id) %>%
summarise_all(.funs = mean) %>%
select(-.id, -.imp)
est8 <- lm(formula = chl ~ age + bmi + hyp, data = ave)
## ----corave--------------------------------------------------------------
cor(ave)
## ----corimp--------------------------------------------------------------
cor <- nhanes %>%
mice(seed = 123, print = FALSE) %>%
mice::complete("all") %>%
lapply(cor)
Reduce("+", cor) / length(cor)
## ----stack---------------------------------------------------------------
est9 <- nhanes2 %>%
mice(seed = 123, print = FALSE) %>%
mice::complete("long") %>%
lm(formula = chl ~ age + bmi + hyp)
## ----repeatedanalyses----------------------------------------------------
fit <- with(imp, lm(chl~bmi+age))
coef(fit$analyses[[1]])
coef(fit$analyses[[2]])
## ----poolstandard--------------------------------------------------------
est <- pool(fit)
summary(est)
## ----frequency-----------------------------------------------------------
expr <- expression(freq <- table(hyp), freq[1] - freq[2])
fit <- with(imp, eval(expr))
unlist(fit$analyses)
## ----d1-1----------------------------------------------------------------
imp <- mice(nhanes2, m = 10, print = FALSE, seed = 71242)
m2 <- with(imp, lm(chl ~ age + bmi))
pool(m2)
## ----d1-2----------------------------------------------------------------
m1 <- with(imp, lm(chl ~ bmi))
summary(D1(m2, m1))
## ----d1-3----------------------------------------------------------------
D2(m2, m1)
## ----d3-1----------------------------------------------------------------
D3(m2, m1)
## ----select3, cache = TRUE, results = 'hide'-----------------------------
data <- boys[boys$age >= 8, -4]
imp <- mice(data, seed = 28382, m = 10, print = FALSE)
scope <- list(upper = ~ age + hgt + wgt + hc + gen + phb + reg,
lower = ~1)
expr <- expression(f1 <- lm(tv ~ 1),
f2 <- step(f1, scope = scope))
fit <- with(imp, expr)
## ----select3votes--------------------------------------------------------
formulas <- lapply(fit$analyses, formula)
terms <- lapply(formulas, terms)
votes <- unlist(lapply(terms, labels))
table(votes)
## ----select5-------------------------------------------------------------
fit.without <- with(imp, lm(tv ~ age + gen + reg + phb))
fit.with <- with(imp, lm(tv ~ age + gen + reg + phb + hgt))
D1(fit.with, fit.without)
## ----select6-------------------------------------------------------------
fit.without <- with(imp, lm(tv ~ age + gen + reg))
fit.with <- with(imp, lm(tv ~ age + gen + reg + phb))
D1(fit.with, fit.without)
## ----ch6, child = "src/ch6.Rnw"------------------------------------------
## ----init6, echo = FALSE, results = 'hide'-------------------------------
opts_chunk$set(fig.path = 'fig/ch6-', self.contained = FALSE)
pkg <- c("mice", "lattice", "gamlss", "smcfcs")
loaded <- sapply(pkg, require, character.only = TRUE, warn.conflicts = FALSE, quietly = TRUE)
suppressPackageStartupMessages(library(AGD, warn.conflicts = FALSE, quietly = TRUE))
source("R/mi.hist.R")
## ----pred1---------------------------------------------------------------
imp <- mice(nhanes, print = FALSE)
imp$predictorMatrix
## ----multi---------------------------------------------------------------
imp <- mice(cbind(nhanes, chl2 = 2 * nhanes$chl),
print = FALSE, maxit = 1, m = 3, seed = 1)
imp$loggedEvents
## ----multi2--------------------------------------------------------------
imp <- mice(cbind(nhanes, chl2 = 2 * nhanes$chl),
print = FALSE, maxit = 1, m = 3, seed = 1,
remove.collinear = FALSE)
imp$loggedEvents
## ----chlchl, solo = TRUE, fig.width =7, echo = FALSE, fig.height = 7----
xyplot(imp, chl2 ~ chl | as.factor(.imp), pch = 21)
## ----derived1, cache=TRUE------------------------------------------------
data <- boys[, c("age", "hgt", "wgt", "hc", "reg")]
imp <- mice(data, print = FALSE, seed = 71712)
long <- mice::complete(imp, "long", include = TRUE)
long$whr <- with(long, 100 * wgt / hgt)
imp.itt <- as.mids(long)
## ----derived2, cache = TRUE----------------------------------------------
data$whr <- 100 * data$wgt / data$hgt
imp.jav1 <- mice(data, seed = 32093, print = FALSE)
## ----derived3------------------------------------------------------------
tail(imp.jav1$loggedEvents, 3)
## ----derivedjavpred------------------------------------------------------
pred <- make.predictorMatrix(data)
pred[c("wgt", "whr"), c("wgt", "whr")] <- 0
pred[c("hgt", "whr"), c("hgt", "whr")] <- 0
pred
imp.jav2 <- mice(data, pred = pred, seed = 32093, print = FALSE)
## ----derived5, cache = TRUE----------------------------------------------
data <- boys[, c("age", "hgt", "wgt", "hc", "reg")]
data$whr <- 100 * data$wgt / data$hgt
meth <- make.method(data)
meth["whr"] <- "~I(100 * wgt / hgt)"
pred <- make.predictorMatrix(data)
pred[c("wgt", "hgt"), "whr"] <- 0
imp.pas <- mice(data, meth = meth, pred = pred,
print = FALSE, seed = 32093)
## ----passive, echo=FALSE, fig.width=7,fig.height=3.5------------------
if (empty_figure) {
plot.new()
} else {
c0 <- cbind(model = "Impute, then transform", mice::complete(imp.itt))
c1 <- cbind(model = "JAV", mice::complete(imp.jav2))
c2 <- cbind(model = "Passive imputation", mice::complete(imp.pas))
cd <- rbind(c0, c1, c2)
trellis.par.set(mice.theme())
tp <- xyplot(I(wgt / hgt * 100) ~ whr | model, data = cd, layout = c(3, 1),
groups = rep(is.na(imp.itt$data$whr), 3), pch = c(21, 20), cex = c(0.3, 1),
ylab = "Calculated Weight/Height (kg/m)", xlab = "Imputed Weight/Height (kg/m)")
print(tp)
}
## ----simratio------------------------------------------------------------
pop <- na.omit(boys[, c("age", "hgt", "wgt", "hc", "reg")])
pop$whr <- with(pop, 100 * wgt / hgt)
broom::tidy(lm(hc ~ age + hgt + wgt + whr, data = pop))
## ----ratiofcs, eval = FALSE----------------------------------------------
library(smcfcs)
data <- pop
data[sample(nrow(data), size = 100), "wgt"] <- NA
data[sample(nrow(data), size = 100), "hgt"] <- NA
data$whr <- 100 * data$wgt / data$hgt
meth <- c("", "norm", "norm", "", "", "norm")
imps <- smcfcs(originaldata = data, meth = meth, smtype = "lm",
smformula = "hc ~ age + hgt + wgt + whr")
fit <- lapply(imps$impDatasets, lm,
formula = hc ~ age + hgt + wgt + whr)
summary(pool(fit))
## ----inter1, cache = TRUE------------------------------------------------
expr <- expression((wgt - 40) * (hc - 50))
boys$wgt.hc <- with(boys, eval(expr))
meth <- make.method(boys)
meth["wgt.hc"] <- paste("~I(", expr, ")", sep = "")
meth["bmi"] <- ""
pred <- make.predictorMatrix(boys)
pred[c("wgt", "hc"), "wgt.hc"] <- 0
imp.int <- mice(boys, m = 1, meth = meth, pred = pred,
print = FALSE, seed = 62587, maxit = 10)
## ----interaction, echo=FALSE, duo=TRUE, fig.width=4.5, fig.height=2.25--------
if (empty_figure) {
plot.new()
} else {
cd <- mice::complete(imp.int)
lwd <- 0.6
miss <- is.na(imp.int$data$wgt.hc)
plot(x=cd[!miss,c("wgt.hc","wgt")],col=mdc(1),lwd=0.8,cex=0.3,
ylab="Weight (kg)", xlab="Interaction",pch=1,
axes = FALSE)
points(x=cd[miss,c("wgt.hc","wgt")],col=mdc(2),lwd=0.8,cex=0.8,
pch=20)
axis(1, lwd = lwd)
axis(2, lwd = lwd, las = 1)
box(lwd = lwd)
plot(x=cd[!miss,c("wgt.hc","hc")],col=mdc(1),lwd=0.8,cex=0.3,
ylab="Head circumference (cm)", xlab="Interaction",
axes = FALSE)
points(x=cd[miss,c("wgt.hc","hc")],col=mdc(2),lwd=0.8,cex=0.8,
pch=20)
axis(1, lwd = lwd)
axis(2, lwd = lwd, las = 1)
box(lwd = lwd)
}
## ----compos1-------------------------------------------------------------
set.seed(43112)
n <- 400
Y1 <- sample(1:10, size = n, replace = TRUE)
Y2 <- sample(1:20, size = n, replace = TRUE)
Y3 <- 10 + 2 * Y1 + 0.6 * Y2 + sample(-10:10, size = n,
replace = TRUE)
Y <- data.frame(Y1, Y2, Y3)
Y[1:100, 1:2] <- NA
md.pattern(Y, plot = FALSE)
## ----compos2-------------------------------------------------------------
Y123 <- Y1 + Y2 + Y3
Y12 <- Y123 - Y[,3]
P1 <- Y[,1] / Y12
data <- data.frame(Y, Y123, Y12, P1)
## ----compos3-------------------------------------------------------------
meth <- make.method(data)
meth["Y1"] <- "~ I(P1 * Y12)"
meth["Y2"] <- "~ I((1 - P1) * Y12)"
meth["Y12"] <- "~ I(Y123 - Y3)"
pred <- make.predictorMatrix(data)
pred["P1", ] <- 0
pred[c("P1"), c("Y12", "Y3")] <- 1
imp1 <- mice(data, meth = meth, pred = pred, m = 10,
print = FALSE)
## ----compos4-------------------------------------------------------------
round(summary(pool(with(imp1, lm(Y3 ~ Y1 + Y2))))[, 1:2], 2)
## ----composition, echo=FALSE, fig.width=7, fig.height=4---------------------
meth <- make.method(data)
meth["Y1"] <- "~ I(P1 * Y12)"
meth["Y2"] <- "~ I((1 - P1) * Y12)"
meth["Y12"] <- "~ I(Y123 - Y3)"
pred <- make.predictorMatrix(data)
pred["P1", ] <- 0
pred[c("P1"), c("Y12", "Y3")] <- 1
imp1 <- mice(data, meth = meth, pred = pred, m = 1,
print = FALSE)
pred["P1", "Y3"] <- 0
imp2 <- mice(data, meth = meth, pred = pred, m = 1,
print = FALSE)
imp <- rbind(imp1, imp2)
model <- rep(c("Y12 and Y3", "Y12 only"),each=n)
tp <-xyplot(imp, P1~Y12|model, pch=c(1,19),
xlab=expression(italic(Y)[1] + italic(Y)[2]),
ylab = expression(italic(P)[1]),
col=mdc(1:2),
strip=strip.custom(factor.levels=c(expression(italic(Y)[12] + italic(Y)[3]),
expression(italic(Y)[12]))))
plot(tp)
## ----squeezeprep, eval=TRUE, echo=TRUE, results='hide'-----------------------
data <- airquality[, 1:2]
post <- make.post(data)
post["Ozone"] <-
"imp[[j]][, i] <- squeeze(imp[[j]][, i], c(1, 200))"
imp <- mice(data, method = "norm.nob", m = 1,
maxit = 1, seed = 1, post = post)
## ----squeeze, echo=FALSE, duo=TRUE, fig.width=4.5, fig.height=2.25----
lwd <- 1.5
data <- mice::complete(imp)
Yobs <- airquality[, "Ozone"]
Yimp <- data[, "Ozone"]
mi.hist(Yimp, Yobs, b=seq(-20, 200, 10), type = "continuous",
gray = FALSE,
obs.lwd = lwd, mis.lwd = lwd, imp.lwd = lwd,
obs.col = mdc(4),mis.col = mdc(5), imp.col = "transparent",
mlt = 0.08, main = "", xlab = "Ozone (ppb)")
box(lwd = 1)
plot(data[cci(imp),2:1],col=mdc(1),lwd=lwd,cex=0.75,
ylab="Ozone (ppb)", xlab="Solar Radiation (lang)",ylim=c(-15,170),
axes=FALSE)
points(data[ici(imp),2:1],col=mdc(2),lwd=lwd,cex=0.75)
axis(1, lwd = 1)
axis(2, lwd = 1, las = 1)
box(lwd = 1)
## ----doif----------------------------------------------------------------
post["Ozone"] <- "ifdo(c(Ozone < 1, Ozone > 200), c(1, 200))"
## ----post2, eval=TRUE, echo=FALSE, cache = TRUE--------------------------
post <- make.post(boys)
post["gen"] <-
"imp[[j]][data$age[!r[, j]] < 8, i] <- levels(boys$gen)[1]"
post["phb"] <-
"imp[[j]][data$age[!r[, j]] < 8, i] <- levels(boys$phb)[1]"
post["tv"] <- "imp[[j]][data$age[!r[, j]] < 8, i] <- 1"
free <- mice(boys, m = 1, seed = 85444, print = FALSE)
restricted <- mice(boys, m = 1, post = post, seed = 85444,
print = FALSE)
## ----plotgen, echo=FALSE, fig.width=7, fig.height=4---------------------
imp3 <- rbind(free, restricted)
model <- rep(c("Free","Restricted"),each=nrow(boys))
tp <-xyplot(imp3, gen~age|model, pch=c(3,1), cex=c(3,1.5),
ylab="Genital development", xlab="Age (years)",
col=mdc(1:2))
print(tp)
## ----visit1--------------------------------------------------------------
imp.int$visitSequence
## ----visit2a, echo=FALSE-------------------------------------------------
rm(boys)
## ----visit2--------------------------------------------------------------
vis <- c("hgt", "wgt", "hc", "wgt.hc", "gen", "phb",
"tv", "reg")
expr <- expression((wgt - 40) * (hc - 50))
boys$wgt.hc <- with(boys, eval(expr))
imp.int2 <- mice(boys, m = 1, maxit = 1, visitSequence = vis,
meth = imp.int$meth, pred = imp.int$pred,
seed = 23390)
## ----visit3--------------------------------------------------------------
imp.int2 <- mice(boys, m = 1, maxit = 1, vis = "monotone",
meth = imp.int$meth, pred = imp.int$pred,
seed = 23390)
## ----convergence1, echo = FALSE, cache = TRUE-----------------------------
imp <- mice(nhanes, seed = 62006, maxit = 20, print = FALSE)
print(plot(imp, lwd = 1))
## ----convergence0, eval = FALSE, echo = TRUE---------------------------------
imp <- mice(nhanes, seed = 62006, maxit = 20, print = FALSE)
plot(imp)
## ----convergence2, cache = TRUE------------------------------------------
meth <- make.method(boys)
meth["bmi"] <- "~I(wgt / (hgt / 100)^2)"
imp.bmi1 <- mice(boys, meth = meth, maxit = 20,
print = FALSE, seed = 60109)
## ----convergence3, echo=FALSE-------------------------------------------
print(plot(imp.bmi1, c("hgt", "wgt", "bmi")))
## ----convergence4, cache = TRUE------------------------------------------
pred <- make.predictorMatrix(boys)
pred[c("hgt", "wgt"), "bmi"] <- 0
imp.bmi2 <- mice(boys, meth = meth, pred = pred, maxit = 20,
print = FALSE, seed = 60109)
## ----convergence5, echo=FALSE-------------------------------------------
print(plot(imp.bmi2, c("hgt", "wgt", "bmi")))
## ----wormplot5, echo=FALSE, results = 'hide'-----------------------------
set.seed(24417)
rm(boys)
boys$wgt[sample(1:nrow(boys), nrow(boys) / 2)] <- NA
imp <- mice(boys, m = 1, seed = 53882, print = FALSE)
cd <- mice::complete(imp)[, -4]
isobs <- !is.na(boys$wgt)
cdobs <- cd[isobs, ]
cdmis <- cd[!isobs, ]
obs <- gamlss(wgt ~ age + hgt + hc + gen + phb + tv + reg, data = cdobs)
mis <- gamlss(wgt ~ age + hgt + hc + gen + phb + tv + reg, data = cdmis)
## ----wormplot6, solo=TRUE, echo=FALSE, fig.width = 4.4, fig.height=4.4----
wp.twin(obs, mis, xvar = NULL, xvar.column = 2, n.inter = 9,
col1 = mdc(4), col2 = mdc(5), ylim = 0.9, cex = 0.7, pch = 1)
## ----stripplot, echo=FALSE, fig.width = 7, fig.height=5----------------
imp <- mice(nhanes, seed = 29981, print = FALSE)
stripplot(imp, pch = c(21, 20), cex = c(1, 1.5))
## ----stripplot1, eval=FALSE, echo=TRUE-----------------------------------
imp <- mice(nhanes, seed = 29981)
stripplot(imp, pch = c(21, 20), cex = c(1, 1.5))
## ----densityplot1, eval=FALSE, echo=TRUE---------------------------------
densityplot(imp, layout = c(3, 1))
## ----densityplot, solo=TRUE, echo=FALSE, fig.width = 7, fig.height=3----
densityplot(imp, layout = c(3, 1), col = c(mdc(4), rep(mdc(5), 5)))
## ----propensityplot1, eval = FALSE---------------------------------------
fit <- with(imp, glm(ici(imp) ~ age + bmi + hyp + chl,
family = binomial))
ps <- rep(rowMeans(sapply(fit$analyses, fitted.values)),
imp$m + 1)
xyplot(imp, bmi ~ ps | as.factor(.imp),
xlab = "Probability that record is incomplete",
ylab = "BMI", pch = c(1, 19), col = mdc(1:2))
## ----propensityplot, solo=TRUE, echo=FALSE, fig.width = 7, fig.height=5----
fit <- with(imp, glm(ici(imp)~age+bmi+hyp+chl,family=binomial))
ps <- rep(rowMeans(sapply(fit$analyses, fitted.values)),imp$m+1)
tp <- xyplot(imp, bmi~ps|as.factor(.imp), pch=c(1,19),
xlab="Probability that record is incomplete",
ylab="BMI", col=mdc(1:2))
print(tp)
## ----ch7, child = "src/ch7.Rnw"------------------------------------------
## ----init7, echo = FALSE, results = 'hide'-------------------------------
opts_chunk$set(fig.path = 'fig/ch7-', self.contained = FALSE)
pkg <- c("mice", "micemd", "lme4", "tidyr",
"dplyr", "purrr", "gridExtra")
loaded <- sapply(pkg, require, character.only = TRUE,
warn.conflicts = FALSE, quietly = TRUE)
suppressPackageStartupMessages(library(miceadds, warn.conflicts = FALSE, quietly = TRUE))
suppressPackageStartupMessages(library(DPpackage, warn.conflicts = FALSE, quietly = TRUE))
suppressPackageStartupMessages(library(mitml, warn.conflicts = FALSE, quietly = TRUE))
## ----mla.data0-----------------------------------------------------------
library(mice)
data("brandsma", package = "mice")
d <- brandsma[, c("sch", "lpo", "sex", "den")]
## ----mla.data1-----------------------------------------------------------
d <- brandsma[, c("sch", "lpo")]
## ----mla.data2-----------------------------------------------------------
md.pattern(d, plot = FALSE)
## ----mla.empty1, cache=TRUE----------------------------------------------
library(miceadds)
methods <- c("sample", "pmm", "2l.pan", "2l.norm", "2l.pmm")
result <- vector("list", length(methods))
names(result) <- methods
for (meth in methods) {
d <- brandsma[, c("sch", "lpo")]
pred <- make.predictorMatrix(d)
pred["lpo", "sch"] <- -2
result[[meth]] <- mice(d, pred = pred, meth = meth,
m = 10, maxit = 1,
print = FALSE, seed = 82828)
}
## ----mlsd, fig.width=4.5, fig.height=2.25, solo=TRUE, echo = FALSE----
group_by(d, sch) %>%
summarise(sdg = sd(lpo, na.rm = TRUE)) %>%
pull(sdg) %>%
hist(main = "", xlab = "SD(language score) per school")
## ----mldist, echo = FALSE, fig.width = 6, fig.height = 8, solo = TRUE----
imp <- result[[1]]
imp <- cbind(imp, mis = is.na(imp$data$lpo))
yobs <- mice::complete(imp, "long") %>%
filter(.imp == 1) %>%
group_by(sch) %>%
mutate(
nm = sum(mis),
ng = cut(nm, c(0:2, 4, 100), right = FALSE,
labels = c("0", "1", "2-3", ">3")),
method = "observed") %>%
filter(!mis) %>%
ungroup()
dl <- vector("list", length(methods))
names(dl) <- methods
for (meth in methods) {
imp <- result[[meth]]
imp <- cbind(imp, mis = is.na(imp$data$lpo))
ym <- mice::complete(imp, "long") %>%
group_by(sch, .imp) %>%
mutate(
nm = sum(mis),
ng = cut(nm, c(0:2, 4, 100), right = FALSE,
labels = c("0", "1", "2-3", ">3")),
method = meth) %>%
filter(mis) %>%
ungroup() %>%
mutate(
sch = as.integer(sch)
)
dl[[meth]] <- ym
}
ymis <- do.call(rbind.data.frame, dl)
yom <- rbind.data.frame(yobs, ymis)
yom$method <- factor(yom$method,
levels = c("observed", "sample", "pmm",
"2l.pan", "2l.norm", "2l.pmm"))
trellis.par.set(box.rectangle = list(col = c(rep(mdc(1), 4), mdc(2)),lwd = 0))
trellis.par.set(box.umbrella = list(col = c(mdc(3)), lwd = 1.2))
trellis.par.set(plot.symbol = list(col = "white", lwd = 1))
colors <- c(mdc(4), rep(mdc(5), 5))
bwplot(ng ~ lpo | method, data = yom, do.out = FALSE,
as.table = TRUE, layout = c( 1, 6),
xlim = c(16, 64),
xlab = "Language score (grade 8)",
ylab = "Number of missing values per school",
col = colors,
pch = "|",
panel = function(x, y, col = col, ...) {
panel.grid(v = -1, h = 0)
panel.bwplot(x, y, col = col, fill = col[packet.number()], ...)
meds <- tapply(x, y, median)
ylocs <- seq_along(meds)
panel.segments(meds, ylocs - 1/4,
meds, ylocs + 1/4,
lwd = 2, col = "white")
}
)
## ----mldens, echo = FALSE, fig.width = 6, fig.height = 6, solo = TRUE----
imp <- result[["2l.pan"]]
imp2 <- cbind(imp, mis = is.na(imp$data$lpo))
ym <- mice::complete(imp2, "long", include = TRUE) %>%
group_by(sch, .imp) %>%
mutate(
nm = sum(mis),
ng = cut(nm, c(0:2, 4, 100), right = FALSE,
labels = c("0", "1", "2-3", ">3")),
method = meth) %>%
ungroup()
imp3 <- as.mids(cbind(mice::complete(imp2, "long", include = TRUE), ng = ym$ng))
p.pan <- densityplot(imp3, ~ lpo | ng, subset = ng %in% c("1", "2-3"),
xlim = c(16, 64), ylim = c(0, 0.06),
xlab = "Language score (grade 8)",
ylab = "Density (2l.pan)")
imp <- result[["2l.pmm"]]
imp2 <- cbind(imp, mis = is.na(imp$data$lpo))
ym <- mice::complete(imp2, "long", include = TRUE) %>%
group_by(sch, .imp) %>%
mutate(
nm = sum(mis),
ng = cut(nm, c(0:2, 4, 100), right = FALSE,
labels = c("0", "1", "2-3", ">3")),
method = meth) %>%
ungroup()
imp3 <- as.mids(cbind(mice::complete(imp2, "long", include = TRUE), ng = ym$ng))
p.pmm <- densityplot(imp3, ~ lpo | ng, subset = ng %in% c("1", "2-3"),
xlim = c(16, 64), ylim = c(0, 0.06),
xlab = "Language score (grade 8)",
ylab = "Density (2l.pmm)")
grid.arrange(p.pan, p.pmm)
## ----toenail.2l.3, cache=TRUE--------------------------------------------
library(tidyr)
data("toenail", package = "DPpackage")
data <- tidyr::complete(toenail, ID, visit) %>%
tidyr::fill(treatment) %>%
dplyr::select(-month)
table(data$outcome, useNA = "always")
## ----toenail.2l.4, cache=TRUE--------------------------------------------
pred <- make.predictorMatrix(data)
pred["outcome", "ID"] <- -2
imp <- mice(data, method = "2l.bin", pred = pred, seed = 12102,
maxit = 1, m = 5, print = FALSE)
table(mice::complete(imp)$outcome, useNA = "always")
## ----toenailprofiles, echo = FALSE, solo = TRUE, fig.width = 6, fig.height = 6----
trellis.par.set(strip.background=list(col="grey95"))
ids <- data %>%
group_by(ID) %>%
summarize(n = sum(is.na(outcome))) %>%
filter(n >= 4) %>% pull(ID)
long <- mice::complete(imp, "long", include = TRUE)
miss <- is.na(long[long$.imp == 0, "outcome"])
miss <- ifelse(miss, 2, 1)
long <- cbind(long, miss = miss)
long <- long[long$.imp != 0, ]
ids <- c(9, 12, 13, 117, 51, 168, 188, 230, 21, 31, 214, 309, 41, 45, 48, 99)
tp1 <- xyplot(outcome ~ visit | factor(ID),
data = long, type = "p",
groups = miss,
subset = ID %in% ids,
col = mdc(1:2),
ylab = "Severe infection (Y/N)",
xlab = "Visit",
pch = 19, cex = 0.9, as.table = TRUE)
fit <- with(imp,
lme4::glmer(outcome ~ treatment * visit + (1 | ID),
family = binomial))
prob <- imp %>%
mice::complete("all") %>%
purrr::map(lme4::glmer,
formula = outcome ~ treatment * visit + (1 | ID),
family = binomial) %>%
purrr:::map(predict, type = "response") %>%
unlist()
long$prob <- prob
tp2 <- xyplot(prob ~ visit | factor(ID),
data = long, type = "l",
lwd = 1.5,
groups = factor(.imp),
subset = ID %in% ids,
col = c(mdc(3), mdc(3), "#009E73", mdc(3), mdc(3)),
ylab = "Severe infection (Y/N)",
xlab = "Visit", as.table = TRUE)
print(tp2)
print(tp1, newpage = FALSE)
## ----toenail.est, cache=TRUE---------------------------------------------
library(purrr)
mice::complete(imp, "all") %>%
purrr::map(lme4::glmer,
formula = outcome ~ treatment * visit + (1 | ID),
family = binomial) %>%
pool() %>%
summary()
## ----mla.data3-----------------------------------------------------------
data("brandsma", package = "mice")
dat <- brandsma[, c("sch", "pup", "lpo",
"iqv", "ses", "ssi")]
## ----mdp, echo = FALSE, solo = TRUE, fig.height=5------------------
z <- md.pattern(dat)
## ----mla.empty10, cache=TRUE---------------------------------------------
d <- brandsma[, c("sch", "lpo")]
pred <- make.predictorMatrix(d)
pred["lpo", "sch"] <- -2
imp <- mice(d, pred = pred, meth = "2l.pmm", m = 10, maxit = 1,
print = FALSE, seed = 152)
## ----mla.empty5----------------------------------------------------------
library(lme4)
fit <- with(imp, lmer(lpo ~ (1 | sch), REML = FALSE))
summary(pool(fit))
## ----mla.empty6----------------------------------------------------------
library(mitml)
testEstimates(as.mitml.result(fit), var.comp = TRUE)$var.comp
## ----mla.ri1, cache=TRUE-------------------------------------------------
d <- brandsma[, c("sch", "lpo", "iqv")]
pred <- make.predictorMatrix(d)
pred["lpo", ] <- c(-2, 0, 3)
pred["iqv", ] <- c(-2, 3, 0)
imp <- mice(d, pred = pred, meth = "2l.pmm", seed = 919,
m = 10, print = FALSE)
## ----mla.ri1b, eval = FALSE----------------------------------------------
d$lpo <- as.vector(scale(d$lpo, scale = FALSE))
## ----mla.ri2, cache = TRUE-----------------------------------------------
fm1 <- lpo + iqv ~ 1 + (1 | sch)
mit <- mitml::panImpute(data = d, formula = fm1, m = 5,
silent = TRUE)
## ----mla.ri3, cache = TRUE-----------------------------------------------
blk <- make.blocks(d, "collect")
fm2 <- list(collect = fm1)
imp2 <- mice(d, meth = "panImpute", blocks = blk, form = fm2,
print = FALSE, seed = 711)
## ----mla.ri4-------------------------------------------------------------
fit <- with(imp, lmer(lpo ~ iqv + (1 | sch), REML = FALSE))
summary(pool(fit))
testEstimates(as.mitml.result(fit), var.comp = TRUE)$var.comp
## ----mla.riwb1-----------------------------------------------------------
res <- mice::complete(imp, "long") %>%
group_by(sch, .imp) %>%
mutate(iqm = mean(iqv)) %>%
group_by(.imp) %>%
do(model = lmer(lpo ~ iqv + iqm + (1 | sch),
REML = FALSE, data = .)) %>%
as.list() %>% .[[-1]]
summary(pool(res))
testEstimates(res, var.comp = TRUE)$var.comp
## ----mla.ril2p1----------------------------------------------------------
d <- brandsma[, c("sch", "lpo", "iqv", "den")]
meth <- make.method(d)
meth[c("lpo", "iqv", "den")] <- c("2l.pmm", "2l.pmm",
"2lonly.pmm")
pred <- make.predictorMatrix(d)
pred["lpo", ] <- c(-2, 0, 3, 1)
pred["iqv", ] <- c(-2, 3, 0, 1)
pred["den", ] <- c(-2, 1, 1, 0)
imp <- mice(d, pred = pred, meth = meth, seed = 418,
m = 10, print = FALSE)
## ----mla.ril2p3, cache = TRUE--------------------------------------------
d$den <- as.factor(d$den)
fml <- list(lpo + iqv ~ 1 + (1 | sch), den ~ 1)
mit <- mitml::jomoImpute(data = d, formula = fml, m = 10,
silent = TRUE)
## ----mla.ril2p4, cache = TRUE--------------------------------------------
blk <- make.blocks(d, "collect")
fm2 <- list(collect = fml)
imp2 <- mice(d, meth = "jomoImpute", blocks = blk, form = fm2,
print = FALSE, seed = 418, maxit = 1,
m = 10, n.burn = 100)
## ----mladens, echo = FALSE, solo = TRUE, fig.height=6, fig.width=6----
# convert den back to numeric for plotting
z <- mice::complete(imp2, "long", include = TRUE)
z$den <- as.numeric(levels(z$den))[z$den]
imp3 <- as.mids(z)
p.jomo <- densityplot(imp3, ~ lpo + den, ylab = "jomoImpute", ylim = list(c(0, 0.05), c(0, 1.5)))
p.pmm <- densityplot(imp, ~ lpo + den, ylab = "2l.pmm", ylim = list(c(0, 0.05), c(0, 1.5)))
p.jomo2 <- update(p.jomo, xlim = list(c(16, 64), c(0, 5)))
p.pmm2 <- update(p.pmm, xlim = list(c(16, 64), c(0, 5)))
grid.arrange(p.jomo2, p.pmm2)
## ----mla.ril2p6----------------------------------------------------------
fit <- with(imp, lmer(lpo ~ 1 + iqv + as.factor(den)
+ (1 | sch), REML = FALSE))
summary(pool(fit))
testEstimates(as.mitml.result(fit), var.comp = TRUE)$var.comp
## ----mla.int1------------------------------------------------------------
d <- brandsma[, c("sch", "lpo", "iqv", "sex", "den")]
d <- data.frame(d, lpm = NA, iqm = NA, sxm = NA,
iqd = NA, lpd = NA,
iqd.sex = NA, lpd.sex = NA, iqd.lpd = NA,
iqd.den = NA, sex.den = NA, lpd.den = NA,
iqm.den = NA, sxm.den = NA, lpm.den = NA)
## ----mla.int2------------------------------------------------------------
# level-1 variables
meth <- make.method(d)
meth[c("lpo", "iqv", "sex")] <- "2l.pmm"
pred <- make.predictorMatrix(d)
pred[,] <- 0
pred[, "sch"] <- -2
codes <- c(3, 3, rep(1, 6))
pred["lpo", c("iqv", "sex", "iqd.sex", "sex.den", "iqd.den",
"den", "iqm.den", "sxm.den")] <- codes
pred["iqv", c("lpo", "sex", "lpd.sex", "sex.den", "lpd.den",
"den", "lpm.den", "sxm.den")] <- codes
pred["sex", c("lpo", "iqv", "iqd.lpd", "lpd.den", "iqd.den",
"den", "iqm.den", "lpm.den")] <- codes
## ----mla.int2b-----------------------------------------------------------
# level-2 variables
meth["den"] <- "2lonly.pmm"
pred["den", c("lpo", "iqv", "sex",
"iqd.sex", "lpd.sex", "iqd.lpd")] <- 1
## ----mla.int2c, echo = FALSE---------------------------------------------
t(pred[c("lpo", "iqv", "sex", "den"), c("sch", "lpo", "iqv", "sex", "den", "iqd.sex", "lpd.sex", "iqd.lpd", "iqd.den", "sex.den", "lpd.den", "iqm.den", "sxm.den", "lpm.den")])
## ----mla.int4------------------------------------------------------------
# derive group means
meth[c("iqm", "sxm", "lpm")] <- "2l.groupmean"
pred[c("iqm", "sxm", "lpm"), c("iqv", "sex", "lpo")] <- diag(3)
# derive deviations from cluster mean
meth["iqd"] <- "~ I(iqv - iqm)"
meth["lpd"] <- "~ I(lpo - lpm)"
## ----mla.int4a-----------------------------------------------------------
# derive interactions
meth["iqd.sex"] <- "~ I(iqd * sex)"
meth["lpd.sex"] <- "~ I(lpd * sex)"
meth["iqd.lpd"] <- "~ I(iqd * lpd)"
meth["iqd.den"] <- "~ I(iqd * den)"
meth["sex.den"] <- "~ I(sex * den)"
meth["lpd.den"] <- "~ I(lpd * den)"
meth["iqm.den"] <- "~ I(iqm * den)"
meth["sxm.den"] <- "~ I(sxm * den)"
meth["lpm.den"] <- "~ I(lpm * den)"
## ----mla.int5, cache=TRUE------------------------------------------------
visit <- c("lpo", "lpm", "lpd",
"lpd.sex", "iqd.lpd", "lpd.den", "lpm.den",
"iqv", "iqm", "iqd",
"iqd.sex", "iqd.lpd", "iqd.den", "iqm.den",
"sex", "sxm",
"iqd.sex", "lpd.sex", "sex.den", "sxm.den",
"den", "iqd.den", "sex.den", "lpd.den",
"iqm.den", "sxm.den", "lpm.den")
imp <- mice(d, pred = pred, meth = meth, seed = 188,
visit = visit, m = 10, print = FALSE,
allow.na = TRUE)
## ----mla.int7------------------------------------------------------------
long <- mice::complete(imp, "long", include = TRUE)
long$den <- as.factor(long$den)
imp2 <- as.mids(long)
fit <- with(imp2, lmer(lpo ~ 1 + iqv*sex + iqm*den + sex*den
+ (1 | sch), REML = FALSE))
summary(pool(fit))
## ----mla.rs1, cache=TRUE-------------------------------------------------
d <- brandsma[, c("sch", "lpo", "iqv")]
d$lpo <- as.vector(scale(d$lpo, scale = FALSE))
pred <- make.predictorMatrix(d)
pred["lpo", ] <- c(-2, 0, 4)
pred["iqv", ] <- c(-2, 4, 0)
pred
imp <- mice(d, pred = pred, meth = "2l.pmm", seed = 441,
m = 10, print = FALSE, maxit = 20)
## ----mla.rs2-------------------------------------------------------------
imp2 <- mice::complete(imp, "long", include = TRUE) %>%
group_by(sch) %>%
mutate(iqm = mean(iqv, na.rm = TRUE),
lpo = lpo + mean(brandsma$lpo, na.rm = TRUE)) %>%
as.mids()
fit <- with(imp2, lmer(lpo ~ iqv + iqm + (1 + iqv | sch),
REML = FALSE))
summary(pool(fit))
testEstimates(as.mitml.result(fit), var.comp = TRUE)$var.comp
## ----mla.rs3-------------------------------------------------------------
d <- brandsma[, c("sch", "lpo", "iqv", "ses")]
d$lpo <- as.vector(scale(d$lpo, scale = FALSE))
d <- data.frame(d,
iqv.ses = NA, ses.lpo = NA, iqv.lpo = NA,
lpm = NA, iqm = NA, sem = NA,
iqv.iqm = NA, ses.sem = NA, lpo.lpm = NA,
iqv.sem = NA, iqv.lpm = NA,
ses.iqm = NA, ses.lpm = NA,
lpo.iqm = NA, lpo.sem = NA,
iqm.sem = NA, lpm.sem = NA, iqm.lpm = NA)
## ----mla.rs4, echo = FALSE-----------------------------------------------
pred <- make.predictorMatrix(d)
meth <- make.method(d)
# level-1 variables
meth[c("lpo", "iqv", "ses")] <- "2l.pmm"
pred["lpo", ] <- c(-2, 0, 3, 3, 1, 0, 0, 0, 0, 0, 1, 1, 0,
1, 0, 1, 0, 0, 0, 1, 0, 0)
pred["iqv", ] <- c(-2, 3, 0, 3, 0, 1, 0, 0, 0, 0, 0, 1, 1,
0, 0, 0, 1, 0, 1, 0, 1, 0)
pred["ses", ] <- c(-2, 3, 3, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1,
0, 1, 0, 0, 1, 0, 0, 0, 1)
t(pred[c("lpo", "iqv", "ses"), -(8:10)])
## ----mla.rs6-------------------------------------------------------------
meth[c("iqm", "sem", "lpm")] <- "2l.groupmean"
pred[c("iqm", "sem", "lpm"), ] <- 0
pred["iqm", c("sch", "iqv")] <- c(-2, 1)
pred["sem", c("sch", "ses")] <- c(-2, 1)
pred["lpm", c("sch", "lpo")] <- c(-2, 1)
## ----mla.rs5-------------------------------------------------------------
meth["iqv.ses"] <- "~ I(iqv * ses)"
meth["iqv.lpo"] <- "~ I(iqv * lpo)"
meth["ses.lpo"] <- "~ I(ses * lpo)"
## ----mla.rs7, echo = FALSE-----------------------------------------------
meth["iqv.iqm"] <- "~ I(iqv * iqm)"
meth["ses.sem"] <- "~ I(ses * sem)"
meth["lpo.lpm"] <- "~ I(lpo * lpm)"
meth["iqv.sem"] <- "~ I(iqv * sem)"
meth["iqv.lpm"] <- "~ I(iqv * lpm)"
meth["ses.iqm"] <- "~ I(ses * iqm)"
meth["ses.lpm"] <- "~ I(ses * lpm)"
meth["lpo.iqm"] <- "~ I(lpo * iqm)"
meth["lpo.sem"] <- "~ I(lpo * sem)"
meth["iqm.sem"] <- "~ I(iqm * sem)"
meth["iqm.lpm"] <- "~ I(iqm * lpm)"
meth["lpm.sem"] <- "~ I(lpm * sem)"
## ----mla.rs8, cache=TRUE-------------------------------------------------
visit <- c("lpo", "iqv.lpo", "ses.lpo",
"lpm", "lpo.lpm", "iqv.lpm", "ses.lpm",
"lpo.iqm", "lpo.sem", "iqm.lpm", "lpm.sem",
"iqv", "iqv.ses", "iqv.lpo",
"iqm", "iqv.iqm", "iqv.sem", "iqv.lpm",
"ses.iqm", "lpo.iqm", "iqm.sem", "iqm.lpm",
"ses", "iqv.ses", "ses.lpo",
"sem", "ses.sem", "iqv.sem", "ses.iqm",
"ses.lpm", "lpo.sem", "iqm.sem", "lpm.sem")
imp <- mice(d, pred = pred, meth = meth, seed = 211,
visit = visit, m = 10, print = FALSE, maxit = 10,
allow.na = TRUE)
## ----mla.rs9, warning=FALSE----------------------------------------------
fit <- with(imp, lmer(lpo ~ iqv * ses + iqm * sem +
iqv * iqm + iqv * sem +
ses * iqm + ses * sem + (1 + ses + iqv | sch),
REML = FALSE))
## ----mla.rs10------------------------------------------------------------
summary(pool(fit))
testEstimates(as.mitml.result(fit), var.comp = TRUE)$var.comp
## ----ch8, child = "src/ch8.Rnw"------------------------------------------
## ----init8, echo = FALSE, results = 'hide'-------------------------------
opts_chunk$set(fig.path = 'fig/ch8-', self.contained = FALSE)
pkg <- c("mice", "dplyr", "gridExtra")
loaded <- sapply(pkg, require, character.only = TRUE,
warn.conflicts = FALSE, quietly = TRUE)
## ----ice.definedata------------------------------------------------------
ideal <- data.frame(
x = c(68, 76, 66, 81, 70, 72, 81, 72),
y1 = c(14, 0, 1, 2, 3, 1, 10, 9),
y0 = c(13, 6, 4, 5, 6, 6, 8, 8),
row.names = c("John", "Caren", "Joyce", "Robert",
"Ruth", "Nick", "Peter", "Torey")
)
# assign first three units to trt
data <- ideal
data[1:3, "y0"] <- NA
data[4:8, "y1"] <- NA
## ----ice.firsttry--------------------------------------------------------
library(mice)
data2 <- data[, -1]
imp <- mice(data2, method = "norm", seed = 188, print = FALSE)
## ----iceplot1, fig.width=7, fig.height=3.5, echo=FALSE, solo=TRUE-------
trellis.par.set(list(layout.heights = list(strip = 1.3)))
stripplot(imp, y1 + y0 ~ as.factor(.imp), pch = c(1, 19),
col = c(mdc(1:2)), ylim = c(-100, 100))
## ----ice.corr1-----------------------------------------------------------
sapply(mice::complete(imp, "all"), function(x) cor(x$y1, x$y0))
## ----ice.makeprior-------------------------------------------------------
set.seed(84409)
rho <- 0.9
mu <- mean(unlist(data2[, c("y1", "y0")]), na.rm = TRUE)
sigma2 <- var(unlist(data2), na.rm = TRUE)
cv <- rho * sigma2
s2 <- matrix(c(sigma2, cv, cv, sigma2), nrow = 2)
prior <- data.frame(MASS::mvrnorm(n = 100, mu = rep(mu, 2),
Sigma = s2))
names(prior) <- c("y1", "y0")
## ----ice.stack-----------------------------------------------------------
# combine data and prior
stacked <- dplyr::bind_rows(prior, data2, .id = "d")
stacked$d <- as.numeric(stacked$d) - 1
## ----ice.impute----------------------------------------------------------
stacked$tau <- stacked$y1 - stacked$y0
pred <- make.predictorMatrix(stacked)
pred[, "tau"] <- 0
meth <- c("", "norm", "norm", "~ I(y1 - y0)")
imp <- mice(stacked, maxit = 100, pred = pred,
meth = meth, print = FALSE)
## ----icetrace, fig.width=7, fig.height=5, echo=FALSE, solo=TRUE----
plot(imp)
## ----iceplot2, fig.width=7, fig.height=3.5, echo=FALSE, solo=TRUE-------
stripplot(imp, y1 + y0 ~ as.factor(.imp), pch = c(1, 19),
col = c("grey70", mdc(2)), ylim = c(-5, 15))
## ----ice.corr2-----------------------------------------------------------
sapply(mice::complete(imp, "all"), function(x) {
x <- x[x$d == 1, ]; cor(x$y0, x$y1)})
## ----potential1, fig.height = 5, echo = FALSE---------------------------
xyplot(imp, y1 ~ y0 | as.factor(.imp), layout = c(3, 2),
groups = d, col = c("grey70", mdc(2)), pch = c(1, 19))
## ----potential1.show, eval = FALSE--------------------------------------
xyplot(imp, y1 ~ y0 | as.factor(.imp), layout = c(3, 2),
groups = d, col = c("grey70", mdc(2)), pch = c(1, 19))
## ----ice.iceplotfunction, echo = FALSE-----------------------------------
iceplot <- function(data, rho, ynames = c("y1", "y0"),
maxit = 100, m =100, seed = NULL) {
# no covariates
if (!is.null(seed)) set.seed(seed)
# define prior dataset
mu <- mean(unlist(data[, ynames]), na.rm = TRUE)
s2 <- matrix(c(1, rho, rho, 1), nrow = 2)
prior <- data.frame(MASS::mvrnorm(n = 100, mu = rep(mu, 2), Sigma = s2))
names(prior) <- ynames
# combine data and prior
stacked <- dplyr::bind_rows(prior, data[, ynames], .id = "d")
stacked$d <- as.numeric(stacked$d) - 1
stacked$tau <- stacked[, ynames[1]] - stacked[, ynames[2]]
# define imputation model
pred <- make.predictorMatrix(stacked)
pred[, "tau"] <- 0
meth <- c("", "norm", "norm", "~ I(y1 - y0)")
imp <- mice(stacked, maxit = maxit, pred = pred,
meth = meth, m = m, print = FALSE)
long <- mice::complete(imp, "long")
long <- long[long$d == 1, ]
long$pat <- factor(long$.id, labels = row.names(data))
p <- parallelplot(~ long[, 4:5] | pat, data = long,
horizontal.axis = FALSE, common.scale = TRUE,
layout = c(8, 1), as.table = TRUE,
between = list(x = 0.8, y = 0.5),
par.settings = mice.theme(), col = mdc(2), lwd = 1,
varnames = c("1", "0"),
scales = list(cex = 0.8))
p
}
## ----ice.calciceplots, cache=TRUE, echo = FALSE--------------------------
p50 <- iceplot(data, rho = 0.5, maxit = 10, seed = 1)
p90 <- iceplot(data, rho = 0.9, seed = 1)
p99 <- iceplot(data, rho = 0.99, seed = 1)
## ----fanplot, fig.width=7, fig.height=10, solo = TRUE, echo = FALSE----
library(gridExtra, warn.conflicts = FALSE)
if (empty_figure) {
plot.new()
} else {
grid.arrange(p50, p90, p99, nrow = 3)
}
## ----ch9, child = "src/ch9.Rnw"------------------------------------------
## ----init9, echo = FALSE, results = 'hide'-------------------------------
opts_chunk$set(fig.path = 'fig/ch9-', self.contained = FALSE)
pkg <- c("mice", "MASS", "foreign")
loaded <- sapply(pkg, require, character.only = TRUE,
warn.conflicts = FALSE, quietly = TRUE)
## ----c85imputeblind, eval=FALSE------------------------------------------
library(mice)
## DO NOT DO THIS
imp <- mice(data) # not recommended
## ----c85readdata1--------------------------------------------------------
library(foreign)
file.sas <- "data/c85/master85.xport"
original.sas <- read.xport(file.sas)
names(original.sas) <- tolower(names(original.sas))
dim(original.sas)
## ----c85readdata2--------------------------------------------------------
# remove 15 columns (text, administrative)
all <- names(original.sas)
drop <- c(3, 22, 58, 162:170, 206:208)
keep <- !(1:length(all) %in% drop)
leiden85 <- original.sas[original.sas$abr == "1", keep]
data <- leiden85
## ----c85inspect1---------------------------------------------------------
ini <- mice(data, maxit = 0) # recommended
table(ini$nmis)
## ----c85inspect2---------------------------------------------------------
table(data$beroep1, useNA = "always")
## ----c85inspect4a--------------------------------------------------------
v1 <- names(ini$nmis[ini$nmis == 0])
outlist1 <- v1[c(1, 3:5, 7:10, 16:47, 51:60, 62, 64:65, 69:72)]
length(outlist1)
## ----c85flux, solo=TRUE, echo=FALSE, fig.height=4, fig.width=4----------
if (empty_figure) {
fx <- fluxplot(data, main=NULL, cex=0.7, lwd = 0.6, col = mdc(4), plot = FALSE)
plot.new()
} else {
fx <- fluxplot(data, main=NULL, cex=0.7, lwd = 0.6, col = mdc(4))
}
## ----c85inspect6---------------------------------------------------------
outlist2 <- row.names(fx)[fx$outflux < 0.5]
length(outlist2)
## ----c85inspect7, eval=TRUE----------------------------------------------
data2 <- data[, !names(data) %in% outlist2]
fx2 <- flux(data2)
outlist3 <- row.names(fx2)[fx2$outflux < 0.5]
## ----c85inspect8---------------------------------------------------------
head(ini$loggedEvents, 2)
tail(ini$loggedEvents, 2)
## ----c85inspect9---------------------------------------------------------
outlist4 <- as.character(ini$loggedEvents[, "out"])
## ----c85inspect10--------------------------------------------------------
outlist <- unique(c(outlist1, outlist2, outlist4))
length(outlist)
## ----c85inspect11--------------------------------------------------------
data2 <- data[, !names(data) %in% outlist]
## ----c85quickpred--------------------------------------------------------
inlist <- c("sex", "lftanam", "rrsyst", "rrdiast")
pred <- quickpred(data2, minpuc = 0.5, include = inlist)
## ----c85predinspect1-----------------------------------------------------
table(rowSums(pred))
## ----c85predinspect2-----------------------------------------------------
rowSums(pred[c("rrsyst", "rrdiast"),])
## ----c85predinspect3, eval=FALSE-----------------------------------------
names(data2)[pred["rrsyst", ] == 1]
## ----c85predinspect4, eval=FALSE-----------------------------------------
vname <- "rrsyst"
y <- cbind(data2[vname], r =! is.na(data2[, vname]))
vdata <- data2[,pred[vname,] == 1]
round(cor(y = y, x = vdata, use = "pair"), 2)
## ----c85fetch, echo=FALSE------------------------------------------------
load("data/c85/imp.blind")
load("data/c85/imp.smart")
imp.qp <- imp.smart
imp <- imp.blind
rm(imp.blind, imp.smart)
## ----c85imputesmart, eval=FALSE------------------------------------------
imp.qp <- mice(data2, pred = pred, seed = 29725)
## ----blindvsquickpred, echo=FALSE, fig.width=6, fig.height=4--------------
vnames <- c("rrsyst", "rrdiast")
cd1 <- mice::complete(imp)[, vnames]
cd2 <- mice::complete(imp.qp)[, vnames]
typ <- factor(rep(c("blind imputation","quickpred"), each = nrow(cd1)))
mis <- ici(data2[,vnames])
mis <- is.na(imp$data$rrsyst)|is.na(imp$data$rrdiast)
cd <- data.frame(typ=typ,mis=mis,rbind(cd1, cd2))
if (empty_figure) {
plot.new()
} else {
tp <- xyplot(jitter(rrdiast,10) ~ jitter(rrsyst,10) | typ, data = cd, groups = mis,
xlab = "Systolic BP (mmHg)", ylab = "Diastolic BP (mmHg)",
col=c(mdc(1),mdc(2)), pch=c(1,19),type=c("g","p"),
strip = strip.custom(bg="grey95"),
scales = list(alternating=1, tck=c(1,0)))
print(tp)
}
## ----c85trellis, eval=FALSE----------------------------------------------
vnames <- c("rrsyst", "rrdiast")
cd1 <- mice::complete(imp)[, vnames]
cd2 <- mice::complete(imp.qp)[, vnames]
typ <- factor(rep(c("blind imputation", "quickpred"),
each = nrow(cd1)))
mis <- ici(data2[, vnames])
mis <- is.na(imp$data$rrsyst) | is.na(imp$data$rrdiast)
cd <- data.frame(typ = typ, mis = mis, rbind(cd1, cd2))
xyplot(jitter(rrdiast, 10) ~ jitter(rrsyst, 10) | typ,
data = cd, groups = mis,
col = c(mdc(1), mdc(2)),
xlab = "Systolic BP (mmHg)",
type = c("g","p"), ylab = "Diastolic BP (mmHg)",
pch = c(1, 19),
strip = strip.custom(bg = "grey95"),
scales = list(alternating = 1, tck = c(1, 0)))
## ----c85nelson-----------------------------------------------------------
dat <- cbind(data2, dead = 1 - data2$dwa)
hazard <- nelsonaalen(dat, survda, dead)
## ----c85nelsoncorrelation, eval=FALSE, echo=FALSE------------------------
tmp <- data.frame(hazard, t = data2$survda,
logt = log(data2$survda),
SBP = data2$rrsyst, DBP = data2$rrdiast)
round(cor(tmp, use = "pair"), 3)
## ----c85km, echo=FALSE, fig.height=4-------------------------------------
library(survival)
fit <- survfit(Surv(survda / 365, 1 - dwa) ~ is.na(rrsyst),
data = data2)
lwd <- 0.6
plot(fit, lty = 1, lwd = 0.9,
xlab = "Years since Intake",
ylab = "K-M Survival Probability",
col = c(mdc(4), mdc(5)), mark.time = FALSE,
axes = FALSE)
axis(1, lwd = lwd)
axis(2, lwd = lwd, las = 1)
box(lwd = lwd)
text(4, 0.7, "BP measured")
text(2, 0.3, "BP missing")
## ----c85undamped, eval=FALSE---------------------------------------------
delta <- c(0, -5, -10, -15, -20)
post <- imp.qp$post
imp.all.undamped <- vector("list", length(delta))
for (i in 1:length(delta)) {
d <- delta[i]
cmd <- paste("imp[[j]][,i] <- imp[[j]][,i] +", d)
post["rrsyst"] <- cmd
imp <- mice(data2, pred = pred, post = post, maxit = 10,
seed = i * 22)
imp.all.undamped[[i]] <- imp
}
## ----c85damper, eval=FALSE-----------------------------------------------
cmd <- paste("fit <- lm(y ~ as.matrix(x));
damp <- sqrt(1 - summary(fit)$r.squared);
imp[[j]][, i] <- imp[[j]][, i] + damp * ", d)
## ----c85readimp, echo=FALSE----------------------------------------------
load("data/c85/imp.all.damper2")
imp.all.damped <- imp.all.damper2
rm(imp.all.damper2)
## ----c85cda--------------------------------------------------------------
cda <- expression(
sbpgp <- cut(rrsyst, breaks = c(50, 124, 144, 164, 184, 200,
500)),
agegp <- cut(lftanam, breaks = c(85, 90, 95, 110)),
dead <- 1 - dwa,
coxph(Surv(survda, dead)
~ C(sbpgp, contr.treatment(6, base = 3))
+ strata(sexe, agegp)))
imp <- imp.all.damped[[1]]
fit <- with(imp, cda)
## ----c85hazardratio------------------------------------------------------
as.vector(exp(summary(pool(fit))[, 1]))
## ----readdata7a, echo=FALSE----------------------------------------------
library(mice)
bmi <- function(h, w) w / (h / 100)^2
krul <- selfreport[selfreport$src == "krul",]
males <- krul[krul$sex=="Male",]
## ----plotbmi, fig.width=4.5, fig.height=3,solo=TRUE,echo=FALSE----------
lwd <- 0.6
xy <- xy.coords(krul$bm, krul$br-krul$bm)
if (empty_figure) {
plot.new()
} else {
plot(xy, col = mdc(1),
xlab = "Measured BMI", ylab = "Self-Reported - Measured BMI",
xlim = c(17, 45), ylim = c(-5,5), type = "n",lwd = lwd,
axes = FALSE)
axis(1, lwd = lwd)
axis(2, lwd = lwd, las = 1)
polygon(x = c(30, 20, 30), y = c(0, 10, 10),
col = "grey95", border = NA)
polygon(x = c(30, 40, 30), y = c(0, -10, -10),
col = "grey95", border = NA)
abline(0, 0, lty = 2, lwd = lwd)
points(xy, col = mdc(1),cex = 0.7)
lines(lowess(xy), lwd = 1.5, col = mdc(4))
text(1:4, x = c(40, 28, 20, 32), y = c(4, 4, -4, -4), cex = 3)
box(lwd = lwd)
}
## ----plotexplain,fig.height=3, fig.width=3,solo=TRUE,echo=FALSE----------
lwd = 0.6
fit <- lm(bm~br,data=males)
plot(x=males$br,y=males$bm,
xlim=c(26,34),ylim=c(26,34),
xlab="Self-Reported BMI", ylab="Measured BMI",col=mdc(1),
cex.lab=1, cex = 0.7, bty = "n", axes = FALSE)
axis(side = 1, lwd = lwd)
axis(side = 2, lwd = lwd, las = 1)
box(lwd = lwd)
abline(h=30,v=30,lty=2, lwd = lwd)
abline(coef(fit),col=mdc(4), lwd = lwd)
abline(v=(30-coef(fit)[1])/coef(fit)[2],col=mdc(4), lwd = lwd)
text(1:4,x=c(33.8,33.8,26.2,26.2),y=c(33.8,26.2,26.2,33.8),cex=2.5)
text(c("a","b"),x=c(29.1,29.7,29.1,29.7),y=c(34.1,34.1,26.4,26.4),cex=1.8,adj=c(0.5,1))
## ----srcpattern----------------------------------------------------------
data <- selfreport[, c("age", "sex", "hm", "hr", "wm", "wr")]
md.pattern(data, plot = FALSE)
## ----imputebmi, cache=TRUE-----------------------------------------------
bmi <- function(h, w) w / (h / 100)^2
meth <- make.method(selfreport)
meth[c("prg", "edu", "etn")] <- ""
meth["bm"] <- "~ bmi(hm, wm)"
pred <- make.predictorMatrix(selfreport)
pred[, c("src", "id", "pop", "prg", "edu", "etn",
"web", "bm", "br")] <- 0
imp <- mice(selfreport, pred = pred, meth = meth, m = 10,
seed = 66573, maxit = 20, print = FALSE)
## ----plotimpbmi, fig.width=4.5, fig.height=3,solo=TRUE,echo=FALSE-------
lwd <- 0.6
cd <- mice::complete(imp, 1)
xy <- xy.coords(cd$bm, cd$br - cd$bm)
if (empty_figure) {
plot.new()
} else {
plot(xy, col = mdc(2),
xlab = "Measured BMI", ylab = "Self-Reported - Measured BMI",
xlim = c(17, 45), ylim = c(-5, 5), type = "n", lwd = lwd,
axes = FALSE)
axis(1, lwd = lwd)
axis(2, lwd = lwd, las = 1)
polygon(x = c(30, 20, 30), y = c(0, 10, 10),
col = "grey95", border = NA)
polygon(x = c(30, 40, 30), y = c(0, -10, -10),
col = "grey95", border = NA)
abline(0, 0, lty = 2, lwd = lwd)
idx <- cd$src == "krul"
xyc <- xy; xyc$x <- xy$x[idx]; xyc$y <- xy$y[idx]
xys <- xy; xys$x <- xy$x[!idx]; xys$y <- xy$y[!idx]
points(xyc, col = mdc(1), cex=0.7)
points(xys, col = mdc(2), cex=0.7)
lines(lowess(xyc), col = mdc(4), lwd = 1.5)
lines(lowess(xys), col = mdc(5), lwd = 1.5)
text(1:4, x = c(40, 28, 20, 32), y = c(4, 4, -4, -4), cex = 3)
box(lwd = lwd)
}
## ----walkinginit, echo=FALSE---------------------------------------------
library(mice)
library(foreign)
dataproject <- "Data/wad"
.store <- file.path(dataproject,"R/store/")
file <- file.path(dataproject, "original/walklink.sav")
original <- read.spss(file=file, to.data.frame=TRUE)
data <- original
names(data) <- c("src","sex","age","sip1","haq8","grs9")
data$src <- factor(data$src, levels=2:3, labels=c("Ergo","Euri"))
data$sip1 <- as.factor(data$sip1)
data$haq8 <- as.ordered(data$haq8)
data$grs9 <- as.ordered(data$grs9)
## ----walkingimpute1------------------------------------------------------
fA <- c(242, 43, 15, 0, 6)
fB <- c(145, 110, 29, 8)
YA <- rep(ordered(c(0:3, NA)), fA)
YB <- rep(ordered(c(0:3)), fB)
Y <- rbind(data.frame(YA, YB = ordered(NA)),
data.frame(YB, YA = ordered(NA)))
## ----walkingpattern, echo=FALSE, fig.height=2.5--------------------------
z <- md.pattern(Y)
## ----walkingimpute2------------------------------------------------------
ra <- function(x, simplify = FALSE) {
if (!is.mira(x)) return(NULL)
ra <- x$analyses
if (simplify) ra <- unlist(ra)
return(ra)
}
micemill <- function(n) {
for (i in 1:n) {
imp <<- mice.mids(imp, print = FALSE)
cors <- with(imp, cor(as.numeric(YA), as.numeric(YB),
method = "kendall"))
tau <<- rbind(tau, ra(cors, simplify = TRUE))
}
}
## ----walkingimpute3, cache = TRUE----------------------------------------
tau <- NULL
imp <- mice(Y, maxit = 0, m = 10, seed = 32662, print = FALSE)
micemill(50)
## ----walkingimpute5a, eval = FALSE---------------------------------------
plotit <- function()
matplot(x = 1:nrow(tau), y = tau,
ylab = expression(paste("Kendall's ", tau)),
xlab = "Iteration", type = "l", las = 1)
plotit()
## ----walkingimpute5b, solo=TRUE, echo=FALSE, fig.width = 6, fig.height = 3----
plotit2 <- function(lwd = 0.6) {
matplot(x = 1:nrow(tau), y = tau,
ylab = expression(paste("Kendall's ", tau)),
xlab = "Iteration", type = "l", axes = FALSE,
lwd = lwd, cex.lab = 1.25)
axis(1, lwd = lwd)
axis(2, lwd = lwd, las = 1)
box(lwd = lwd)
}
plotit2()
## ----walkingimpute5d, fig.height=3, echo=FALSE---------------------------
z <- md.pattern(walking)
## ----wasimpute7a, cache = TRUE-------------------------------------------
tau <- NULL
pred <- make.predictorMatrix(walking)
pred[, c("src", "age", "sex")] <- 0
imp <- mice(walking, maxit = 0, m = 10, pred = pred,
seed = 92786, print = FALSE)
micemill(20)
## ----walkingimpute7b, echo=FALSE, fig.height=3, fig.width=6, solo=TRUE----
plotit2()
## ----walkingimpute8, eval = FALSE----
props <- with(imp, mean(YB[src == "A"] == '0'))
thetaAB <<- rbind(thetaAB, ra(props, simplify = TRUE))
## ----walkingimpute9, echo=FALSE------------------------------------------
micemill <- function(n) {
for (i in 1:n) {
imp <<- mice.mids(imp, print = FALSE)
cors <- with(imp, cor(as.numeric(YA[src=="A"]),
as.numeric(YB[src=="A"]),
method="kendall"))
tau <<- rbind(tau, ra(cors, s=TRUE)) # global assignment
means <- with(imp, mean(as.numeric(YA[src=="A"]), na.rm=TRUE))
thetaBA <<- rbind(thetaBA, ra(means, s=TRUE)-1)
props <- with(imp, mean(YB[src=="A"]=='0'))
thetaAB <<- rbind(thetaAB, ra(props, s=TRUE))
tabs <- with(imp, ftable(addmargins(
table(YA[src=="A"],YB[src=="A"],
useNA="ifany", dnn=c("YA","YB")))))
}
}
thetaBA <- NULL
## ----walkingimpute10, cache = TRUE---------------------------------------
tau <- NULL
thetaAB <- NULL
pred2 <- pred1 <- make.predictorMatrix(walking)
pred1[, c("src", "age", "sex")] <- 0
pred2[, "src"] <- 0
imp <- mice(walking, maxit = 0, m = 10, pred = pred1,
seed = 99786)
micemill(20)
imp <- mice(walking, maxit = 0, m = 10, pred = pred2)
micemill(20)
## ----walkingplotthetaAB, fig.height=3, fig.width=6, solo=TRUE, echo=FALSE----
lwd <- 0.6
matplot(x = 1:nrow(thetaAB), y = thetaAB,
xlab = "Iteration",
ylab = expression(hat(theta)[AB]),
type = "l", axes = FALSE,
lwd = lwd, cex.lab = 1.25,
ylim = c(0.42, 0.67))
axis(1, lwd = lwd)
axis(2, lwd = lwd, las = 1)
box(lwd = lwd)
abline(h = 0.497, v