# LME and Creatinine

## Joining Data

In [1]:
library(data.table)
library(nlme)
library(parallel)

creatinine.data <- data.table(read.csv('creat.csv'), key='group')
head(creatinine.data)

id,group,age,scr
1,1,35.765,0.182
1,1,37.99,0.088
3,1,51.083,0.156
3,1,52.386,0.116
3,1,52.805,0.087
3,1,52.997,0.067


In [2]:
group.data <- data.table(
    group=c(1,2,3,4),
    kidney.disease=c(1,1,0,0),
    hypertension=c(1,0,1,0),
    key='group')
group.data

group,kidney.disease,hypertension
1,1,1
2,1,0
3,0,1
4,0,0


In [3]:
creatinine.group.data <- creatinine.data[group.data]
setkey(creatinine.group.data, id)
head(creatinine.group.data)

id,group,age,scr,kidney.disease,hypertension
1,1,35.765,0.182,1,1
1,1,37.99,0.088,1,1
2,2,24.997,1.429,1,0
2,2,27.441,1.111,1,0
2,2,30.524,1.429,1,0
3,1,51.083,0.156,1,1


## Fitting LME Model

In [280]:
fit.scr.model <- function(data, interaction.term=FALSE) {
    formula = scr ~ age + kidney.disease
    if (interaction.term) { formula <- update(formula, . ~ . + age:kidney.disease) }
    lme(formula,
        random=reStruct(~age|id, pdClass='pdDiag'),
        correlation=corExp(form=~age|id, nugget=TRUE),
        method='ML', data=data,
        control=lmeControl(maxIter=100, msMaxIter=100, niterEM=50))
}

## Covariance for Subject $i$

In [220]:
make.covariance <- function(model, i) {
    # Error term, usually denoted epsilon
    error.correlation <- corMatrix(model$modelStruct$corStruct)[[as.character(i)]]
    if (is.null(error.correlation)) { error.correlation <- 1 }
    # Random effects correlation, Z * G * tranpose(Z).
    Z <- cbind(1, model$data[J(i), age])
    random.correlation <- Z %*% as.matrix(model$modelStruct$reStruct$id) %*% t(Z)
    # Convert correlation matrix into covariance matrix.
    (random.correlation + error.correlation)*(model$sigma*model$sigma)
}

## $\beta_j$ Covariance 

### Maximum Likelihood Estimate

This assumes that the random effects model is correct. I can also be used as the *bread* part of the sandwich estimator.

In [286]:
make.covariates <- function(model, data) model.matrix(model$terms, data)

make.ml.parameter.covariance <- function(model) {
    groups <- unique(model$groups)$id
    chol2inv(chol(Reduce(`+`, lapply(groups, function(i) {
        X <- make.covariates(model, model$data[J(i)])
        t(X) %*% chol2inv(chol(make.covariance(model, i))) %*% X
    }))))
}

### Sandwich Estimate

In [285]:
make.response <- function(data) data$scr

make.sandwich.parameter.covariance <- function(model) {
    bread <- make.ml.parameter.covariance(model)    
    meat <- Reduce(`+`, lapply(unique(model$groups)$id, function(i) {
        X <- make.covariates(model, model$data[J(i)])
        y <- make.response(model$data[J(i)])
        weights <- chol2inv(chol(make.covariance(model, i)))
        residuals <- as.numeric(make.response(model$data[J(i)]) - X %*% scr.model$coefficients$fixed)
        empirical.covariance <- outer(residuals, residuals)
        t(X) %*% weights %*% empirical.covariance %*% weights %*% X
    }))        
    bread %*% meat %*% bread
}

### Bootstrap Estimate

To account for clustering of the data, we resample clusters.

In [274]:
resample.clusters <- function(data) {
    resampled.data <- data[
        data.table(id=sample(unique(data$id), replace=TRUE),
                   new.id=c(1:length(unique(data$id))),
                   key='id')] 
    resampled.data[,id:=NULL]
    setnames(resampled.data, 'new.id', 'id')
    setkey(resampled.data, id)
    resampled.data
}

bootstrap.samples <- do.call(rbind, mclapply(
    replicate(256, creatinine.group.data, simplify=FALSE), function(data) {
        resampled.data <- resample.clusters(data)
        fit.scr.model(resampled.data)$coefficients$fixed
    }, mc.cores=4))

## Models

### Without Interaction Term

In [282]:
scr.model <- fit.scr.model(creatinine.group.data)
summary(scr.model, adjustSigma=FALSE)

Linear mixed-effects model fit by maximum likelihood
 Data: data 
        AIC       BIC   logLik
  -53.94986 -11.00314 34.97493

Random effects:
 Formula: ~age | id
 Structure: Diagonal
        (Intercept)        age  Residual
StdDev:  0.04643211 0.00522239 0.2633414

Correlation Structure: Exponential spatial correlation
 Formula: ~age | id 
 Parameter estimate(s):
    range    nugget 
7.8894707 0.1759323 
Fixed effects: list(formula) 
                    Value  Std.Error  DF   t-value p-value
(Intercept)     1.5322224 0.03955663 965  38.73490       0
age            -0.0129154 0.00096383 965 -13.40006       0
kidney.disease -0.2819162 0.02626024 617 -10.73548       0
 Correlation: 
               (Intr) age   
age            -0.849       
kidney.disease -0.352 -0.082

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-2.49081741 -0.56731338 -0.07887231  0.43113427  4.95897388 

Number of Observations: 1585
Number of Groups: 619 

In [287]:
ml.parameter.covariance <- make.ml.parameter.covariance(scr.model)

sandwich.parameter.covariance <- make.sandwich.parameter.covariance(scr.model)

bootstrap.parameter.samples <- do.call(rbind, mclapply(
    replicate(256, creatinine.group.data, simplify=FALSE), function(data) {
        resampled.data <- resample.clusters(data)
        fit.scr.model(resampled.data)$coefficients$fixed
    }, mc.cores=4))
bootstrap.parameter.covariance <- cov(bootstrap.parameter.samples)

In [294]:


standard.errors <- data.frame(
    `ML Standard Error`=sqrt(diag(ml.parameter.covariance)),
    `Sandwich Standard Error`=sqrt(diag(sandwich.parameter.covariance)),
    `Bootstrap Standard Error`=sqrt(diag(bootstrap.parameter.covariance)),
    check.names=FALSE)

standard.errors

Unnamed: 0,ML Standard Error,Sandwich Standard Error,Bootstrap Standard Error
(Intercept),0.0395566329,0.0393479582,0.0391103317
age,0.0009638288,0.0009409584,0.0009455051
kidney.disease,0.0262602392,0.0250273809,0.0356029577


In [279]:
resample.clusters(creatinine.group.data)[J(3)]

group,age,scr,kidney.disease,hypertension,id
1,64.498,0.098,1,1,3
1,64.531,0.083,1,1,3
1,64.542,0.081,1,1,3
1,64.556,0.094,1,1,3


In [53]:
creatinine.group.data[J(c(1,1))]

id,group,age,scr,kidney.disease,hypertension
1,1,35.765,0.182,1,1
1,1,37.99,0.088,1,1
1,1,35.765,0.182,1,1
1,1,37.99,0.088,1,1


In [275]:
bootstrap.samples

(Intercept),age,kidney.disease
1.592835,-0.01562243,-0.2216814
1.512682,-0.01267716,-0.2492987
1.524710,-0.01214914,-0.3250322
1.597048,-0.01522402,-0.2400272
1.490458,-0.01180992,-0.2912710
1.608163,-0.01491729,-0.2800163
1.583784,-0.01455206,-0.2543530
1.598911,-0.01498603,-0.2606407
1.541182,-0.01353970,-0.2493905
1.527865,-0.01359189,-0.2425840


In [276]:
sqrt(diag(cov(bootstrap.samples)))

In [259]:
copy.data <- copy(creatinine.group.data)
tmp <- 

ERROR: Error in eval(expr, envir, enclos): object 'tmp' not found


In [150]:
mean((tmp[1,] - mean(tmp[1,]))^2)*16/15