From 870cf1168d3a74bc974a8c58f7e6af4db53ad7b0 Mon Sep 17 00:00:00 2001
From: Ningshan Zhang <nzhang@Ningshans-MBP.home>
Date: Fri, 18 Mar 2016 15:13:34 -0400
Subject: [PATCH 01/10] write mapper and reducer for parallel computing; add
 diagcov approximation functionality.

---
 DESCRIPTION          |   1 +
 NAMESPACE            |   1 +
 R/mhglm.R            |   7 +-
 R/mhglm.fit.R        |  39 +--
 R/moment.est.R       | 593 +++++++++++++++++++++++++++----------------
 R/unpivotRp.R        |  10 +
 man/mhglm.control.Rd |   5 +-
 7 files changed, 402 insertions(+), 254 deletions(-)
 create mode 100644 R/unpivotRp.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 7eb56e4..8b78f53 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -15,6 +15,7 @@ Imports:
     lme4,
     bigmemory,
     foreach,
+    doParallel,
     logging
 Suggests:
     testthat
diff --git a/NAMESPACE b/NAMESPACE
index 36aa2b1..79f9462 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -25,6 +25,7 @@ importFrom("bigmemory", as.big.matrix)
 importFrom("logging", loginfo)
 importFrom("foreach", foreach, "%dopar%")
 
+
 ## Exports
 export(VarCorr.mhglm)
 export(firthglm.control)
diff --git a/R/mhglm.R b/R/mhglm.R
index eee7d02..69f7d8a 100644
--- a/R/mhglm.R
+++ b/R/mhglm.R
@@ -14,7 +14,8 @@
 
 
 
-mhglm.control <- function(standardize = TRUE, steps = 1, parallel = FALSE,
+mhglm.control <- function(standardize = TRUE, steps = 1, 
+			  parallel = FALSE, diagcov = FALSE,
                           fit.method = "firthglm.fit",
                           fit.control = list(...), ...)
 {
@@ -24,6 +25,8 @@ mhglm.control <- function(standardize = TRUE, steps = 1, parallel = FALSE,
         stop("number of steps must be >= 0")
     if (!is.logical(parallel) || is.na(parallel))
         stop("value of 'parallel' must be TRUE or FALSE")
+    if (!is.logical(diagcov) || is.na(diagcov))
+      stop("value of 'diagcov' must be TRUE or FALSE")
 
     if (!is.character(fit.method) && !is.function(fit.method))
         stop("invalid 'fit.method' argument")
@@ -32,7 +35,7 @@ mhglm.control <- function(standardize = TRUE, steps = 1, parallel = FALSE,
     if (identical(fit.method, "glm.fit"))
         fit.control <- do.call("glm.control", fit.control)
 
-    list(standardize = standardize, steps = steps, parallel = parallel,
+    list(standardize = standardize, steps = steps, parallel = parallel, diagcov = diagcov,
          fit.method = fit.method, fit.control = fit.control)
 }
 
diff --git a/R/mhglm.fit.R b/R/mhglm.fit.R
index 26b4def..3844067 100644
--- a/R/mhglm.fit.R
+++ b/R/mhglm.fit.R
@@ -67,27 +67,14 @@ mhglm.fit <- function(x, z, y, group, weights = rep(1, nobs),
 
     if(control$parallel) {
         Rp <- foreach(i = seq_len(ngroups)) %dopar% {
-            qr.i <- m$qr[[i]]
-            rank.i <- qr.i$rank
-            Rp <- matrix(0, rank.i, nvars)
-            if (rank.i > 0L) {
-                R.i <- qr.R(qr.i)
-                pivot.i <- qr.i$pivot
-                Rp[, pivot.i] <- R.i[seq_len(rank.i), ]
-            }
-            return(Rp)
-        }
+	  qr.i <- m$qr[[i]]
+	  return(unpivotRp(qr.i,nvars))
+	}
     } else {
         for(i in seq_len(ngroups)) {
-            qr.i <- m$qr[[i]]
-            rank.i <- qr.i$rank
-            Rp[[i]] <- matrix(0, rank.i, nvars)
-            if (rank.i > 0L) {
-                R.i <- qr.R(qr.i)
-                pivot.i <- qr.i$pivot
-                Rp[[i]][,pivot.i] <- R.i[seq_len(rank.i),]
-            }
-        }
+	  qr.i <- m$qr[[i]]
+	  Rp[[i]] <- unpivotRp(qr.i,nvars)
+	}
     }
 
     # change coordinates so that average precision is identity
@@ -96,11 +83,11 @@ mhglm.fit <- function(x, z, y, group, weights = rep(1, nobs),
         # compute averge precision of estimates
         prec.avg <- matrix(0, nvars, nvars)
         if(control$parallel) {
-            prec.avg <- foreach(i = seq_len(ngroups)) %dopar% {
-                scale.i <- sqrt(1/dispersion[i])
-                return(crossprod(scale.i * Rp[[i]]))
-            }
-            prec.avg <- Reduce('+', prec.avg)
+	  prec.avg <- foreach(i = seq_len(ngroups)) %dopar% {
+	    scale.i <- sqrt(1/dispersion[i])
+	    return(crossprod(scale.i * Rp[[i]]))
+	  }
+	  prec.avg <- Reduce('+', prec.avg)
         } else {
             for (i in seq_len(ngroups)) {
                 scale.i <- sqrt(1/dispersion[i])
@@ -200,13 +187,13 @@ mhglm.fit <- function(x, z, y, group, weights = rep(1, nobs),
         for (s in seq_len(control$steps)) {
             est0 <- moment.est(coef, nfixed=rank.fixed, subspace, precision,
                                dispersion, start.cov=est0$cov, 
-                               parallel=control$parallel)
+                               parallel=control$parallel,diagcov = control$diagcov)
             logging::loginfo("Refining mean and covariance estimates",
                              logger="mbest.mhglm.fit")
         }
     })
     est <- moment.est(coef, nfixed=rank.fixed, subspace, precision, dispersion,
-                      start.cov=est0$cov, parallel=control$parallel)
+                      start.cov=est0$cov, parallel=control$parallel, diagcov = control$diagcov)
     mean <- est$mean
     mean.cov <- est$mean.cov
     cov <- est$cov
diff --git a/R/moment.est.R b/R/moment.est.R
index d0e81a3..1b59585 100644
--- a/R/moment.est.R
+++ b/R/moment.est.R
@@ -13,246 +13,389 @@
 # limitations under the License.
 
 
-moment.est <- function(coefficients, nfixed, subspace, precision, dispersion,
-                       start.cov = NULL, parallel = FALSE)
+
+# Compute 'sufficient statitics' for estimating fixed coef mean using moment method.
+# Usually it will be used separately on each cores.
+moment.est.mean.mapper <- function(coefficients, nfixed, subspace, precision, dispersion, start.cov = NULL)
 {
-    logging::loginfo("Estimating moments", logger="mbest.mhglm.fit")
-    ngroups <- nrow(coefficients)
-    dim <- ncol(coefficients)
-    nrandom <- dim - nfixed
-    names <- colnames(coefficients)
-
-    if (ngroups == 0L || dim == 0L){
-        cov <- matrix(0, dim, dim)
-        dimnames(cov) <- list(names, names)
-        return(cov)
-    }
 
-    fixed <- seq_len(nfixed)
-    random  <- nfixed + seq_len(nrandom)
-
-    if (is.null(start.cov))
-        start.cov <- diag(nrandom)
-
-    logging::loginfo("Computing mean estimate and covariance bias correction",
-                     logger="mbest.mhglm.fit")
-    if(parallel) {
-        results <- foreach(i = seq_len(ngroups)) %dopar% {
-            u <- subspace[[i]]
-            u1 <- u[fixed,,drop=FALSE]
-            u2 <- u[random,,drop=FALSE]
-            l <- precision[[i]]
-            sigma2 <- dispersion[i]
-            r <- length(l)
-            if (r == 0L) {
-                return(NULL)
-            }
-            s <- sqrt(l)
-            us <- u %*% diag(s, r, r)
-            u1s <- us[fixed,,drop=FALSE]
-            u2s <- us[random,,drop=FALSE]
-
-            cov22 <- t(u2s) %*% start.cov %*% u2s
-            w.inv <- cov22 + diag(sigma2, r, r)
-            R <- chol(w.inv)
-            R.u1s.t <- backsolve(R, t(u1s), transpose=TRUE)
-            R.u2s.t <- backsolve(R, t(u2s), transpose=TRUE)
-
-            w11 <- t(R.u1s.t) %*% R.u1s.t
-            w12 <- t(R.u1s.t) %*% R.u2s.t
-            w22 <- t(R.u2s.t) %*% R.u2s.t
-
-            R2.u2s.t <- backsolve(R, R.u2s.t)
-            B <- sigma2 * t(R2.u2s.t) %*% R2.u2s.t
-
-            w1b <- w11 %*% coefficients[i, fixed]
-                    + w12 %*% coefficients[i, random]
-
-            return(list(w11, w12, w22, w1b, B))
-        }
-        wtot <- (Reduce('+', lapply(results, function(x) x[[1]]))
-                 / length(results))
-        weight12 <- lapply(results, function(x) x[[2]])
-        weight22 <- lapply(results, function(x) x[[3]])
-        weight1.coef <- t(sapply(results, function(x) x[[4]]))
-        wt.bias <- (Reduce('+', lapply(results, function(x) x[[5]]))
-                    / length(results))
-        rm(results); gc()
-    } else {
-        weight11 <- array(0, dim=c(ngroups, nfixed, nfixed))
-        weight12 <- array(0, dim=c(ngroups, nfixed, nrandom))
-        weight22 <- array(0, dim=c(ngroups, nrandom, nrandom))
-        bias <- array(0, dim=c(ngroups, nrandom, nrandom))
-        weight1.coef <- matrix(0, ngroups, nfixed)
-
-        for (i in seq_len(ngroups)) {
-            u <- subspace[[i]]
-            u1 <- u[fixed,,drop=FALSE]
-            u2 <- u[random,,drop=FALSE]
-            l <- precision[[i]]
-            sigma2 <- dispersion[i]
-            r <- length(l)
-
-            if (r == 0L) {
-                next
-            }
-            # implementation trick to avoid 1/li:
-            #
-            # S = L^{1/2}
-            #
-            # W = (U2^T Sigma U2 + a L^{-1})^{-1}
-            #   = [L^{-1/2} (L^{1/2} U2^T Sigma U2 L^{1/2} + a I) L^{-1/2}]^{-1}
-            #   = S (S U2^T Sigma U2 S + A I)^{-1} S
-            #
-            # W[kl] = U[k] W U[l]^T
-            #       = (U[k] S) (U2s^T Sigma U2s + a I)^{-1} (U[l] S)^T
-            #
-            # B = U2 W (a L^{-1}) W U2^T
-            #
-            #   = U2 S (S U2^T Sigma U2 S + A I)^{-1} S
-            #        (a S^(-2))
-            #        S (S U2^T Sigma U2 S + A I)^{-1} S U2^T
-            #
-            #   =  a U2 S (S U2^T Sigma U2 S + a I)^{-2} S U2^T
-            #
-            s <- sqrt(l)
-            us <- u %*% diag(s, r, r)
-            u1s <- us[fixed,,drop=FALSE]
-            u2s <- us[random,,drop=FALSE]
-
-            cov22 <- t(u2s) %*% start.cov %*% u2s
-            w.inv <- cov22 + diag(sigma2, r, r)
-            R <- chol(w.inv)
-            R.u1s.t <- backsolve(R, t(u1s), transpose=TRUE)
-            R.u2s.t <- backsolve(R, t(u2s), transpose=TRUE)
-
-            w11 <- t(R.u1s.t) %*% R.u1s.t
-            w12 <- t(R.u1s.t) %*% R.u2s.t
-            w22 <- t(R.u2s.t) %*% R.u2s.t
-
-            R2.u2s.t <- backsolve(R, R.u2s.t)
-            B <- sigma2 * t(R2.u2s.t) %*% R2.u2s.t
-
-            w1b <- (w11 %*% coefficients[i,fixed]
-                    + w12 %*% coefficients[i,random])
-            weight11[i,,] <- w11
-            weight12[i,,] <- w12
-            weight22[i,,] <- w22
-            weight1.coef[i,] <- w1b
-            bias[i,,] <- B
-        }
-
-        wtot <- apply(weight11, c(2,3), mean)
+
+  ngroups <- nrow(coefficients)
+  dim <- ncol(coefficients)
+  nrandom <- dim - nfixed
+  names <- colnames(coefficients)
+
+  fixed <- seq_len(nfixed)
+  random  <- nfixed + seq_len(nrandom)
+
+  if (is.null(start.cov))
+    start.cov <- diag(nrandom)
+
+  # compute mean estimate and covariance bias correction
+  weight11.sum <- matrix(0,  nfixed, nfixed)
+  weight1.coef.sum <- matrix(0,  nfixed, 1)
+
+  for (i in seq_len(ngroups)) {
+    u <- subspace[[i]]
+    u1 <- u[fixed,,drop=FALSE]
+    u2 <- u[random,,drop=FALSE]
+    l <- precision[[i]]
+
+    sigma2 <- dispersion[i]
+    r <- length(l)
+
+    if (r == 0L) {
+      next
     }
 
-    mean <- pseudo.solve(wtot, colMeans(weight1.coef))
-    if (attr(mean, "deficient")) {
-        warning(paste0("cannot solve fixed effect moment equation",
-                       " due to rank deficiency"))
+    s <- sqrt(l)
+    us <- u %*% diag(s, r, r)
+    u1s <- us[fixed,,drop=FALSE]
+    u2s <- us[random,,drop=FALSE]
+
+    cov22 <- t(u2s) %*% start.cov %*% u2s
+    w.inv <- cov22 + diag(sigma2, r, r)
+    R.w.inv <- chol(w.inv)
+    R.u1s.t <- backsolve(R.w.inv, t(u1s), transpose=TRUE)
+    R.u2s.t <- backsolve(R.w.inv, t(u2s), transpose=TRUE)
+
+    w11 <- t(R.u1s.t) %*% R.u1s.t
+    w12 <- t(R.u1s.t) %*% R.u2s.t
+
+    R2.u2s.t <- backsolve(R.w.inv, R.u2s.t)
+
+    w1b <- (w11 %*% coefficients[i,fixed]
+	    + w12 %*% coefficients[i,random])
+
+    weight11.sum <- weight11.sum + w11
+    weight1.coef.sum <- weight1.coef.sum + w1b
+  }
+
+  return(list(ngroups = ngroups, 
+	      weight11.sum  = weight11.sum ,
+	      weight1.coef.sum  = weight1.coef.sum ))
+
+}
+
+# Estimate fixed effect's coefficient. 
+# Usually used as the 'combine' function in foreach function.
+moment.est.mean.reducer <- function(...)
+{
+
+  ret <- list(...)
+  if(length(ret)==0)
+    stop("The input is empty.")
+
+
+  ngroups <- 0
+  wtot <- 0
+  meanSum <- 0
+
+  for(i in seq_len(length(ret))){
+    ngroups <- ngroups + ret[[i]]$ngroups
+    wtot <- wtot + ret[[i]]$weight11.sum
+    meanSum <- meanSum + ret[[i]]$weight1.coef.sum
+  }
+  wtot <- wtot / ngroups
+  meanSum <- meanSum / ngroups
+
+  mean <- pseudo.solve(wtot, meanSum)
+  if (attr(mean, "deficient")) {
+    warning("cannot solve fixed effect moment equation due to rank deficiency")
+  }
+  mean.cov <- pseudo.solve(wtot) / ngroups
+  attr(mean, "deficient") <- attr(mean.cov, "deficient") <- NULL
+
+  est.mean <- list(mean = mean, mean.cov = mean.cov)
+  return(est.mean)
+}
+
+# Compute 'sufficient statitics' for estimating ranef cov using moment method.
+# Usually it will be used separately on each cores.
+moment.est.cov.mapper <- function(coefficients, nfixed, subspace, precision, dispersion,
+                       start.cov = NULL, diagcov = FALSE, mean)
+{
+
+  dim <- ncol(coefficients)
+  ngroups <- nrow(coefficients)
+  nrandom <- dim - nfixed
+  fixed <- seq_len(nfixed)
+  random  <- nfixed + seq_len(nrandom)
+
+  names <- colnames(coefficients)
+
+  if (is.null(start.cov))
+    start.cov <- diag(nrandom)
+
+  # compute mean estimate and covariance bias correction
+  if(diagcov){
+    bias.sum <- matrix(0, nrandom, 1)
+    weight22.2.sum <- matrix(0,  nrandom, nrandom)
+    weight22.coef.2.sum <- matrix(0, nrandom, 1)
+  } else {
+    bias.sum <- matrix(0,  nrandom, nrandom)
+    weight22.2.sum <- matrix(0,  nrandom^2, nrandom^2)
+    weight22.coef.2.sum <- matrix(0,  nrandom, nrandom)
+  }
+
+  for (i in seq_len(ngroups)) {
+    u <- subspace[[i]]
+    u1 <- u[fixed,,drop=FALSE]
+    u2 <- u[random,,drop=FALSE]
+    l <- precision[[i]]
+
+    sigma2 <- dispersion[i]
+    r <- length(l)
+  
+    if (r == 0L) {
+      next
     }
-    mean.cov <- pseudo.solve(wtot) / ngroups
-
-    attr(mean, "deficient") <- attr(mean.cov, "deficient") <- NULL
-
-    logging::loginfo("Computing covariance estimate", logger="mbest.mhglm.fit")
-    if(parallel) {
-        results <- foreach(i=seq_len(ngroups)) %dopar% {
-            w12 <- weight12[[i]]
-            w22 <- weight22[[i]]
-            weight22.2 <- kronecker(w22, w22)
-            diff <- (t(w12) %*% (coefficients[i, fixed] - mean)
-                     + w22 %*% coefficients[i,random])
-            weight22.coef.2 <- diff %*% t(diff)
-            return(list(weight22.2, weight22.coef.2))
-        }
-        wtot2 <- (Reduce('+', lapply(results, function(x) x[[1]]))
-                  / length(results))
-        wt.cov <- (Reduce('+', lapply(results, function(x) x[[2]]))
-                   / length(results))
-        rm(results); gc()
+
+    s <- sqrt(l)
+    us <- u %*% diag(s, r, r)
+    u1s <- us[fixed,,drop=FALSE]
+    u2s <- us[random,,drop=FALSE]
+
+    cov22 <- t(u2s) %*% start.cov %*% u2s
+    w.inv <- cov22 + diag(sigma2, r, r)
+    R.w.inv <- chol(w.inv)
+    R.u1s.t <- backsolve(R.w.inv, t(u1s), transpose=TRUE)
+    R.u2s.t <- backsolve(R.w.inv, t(u2s), transpose=TRUE)
+
+    w12 <- t(R.u1s.t) %*% R.u2s.t
+    w22 <- t(R.u2s.t) %*% R.u2s.t
+
+    R2.u2s.t <- backsolve(R.w.inv, R.u2s.t)
+
+    if(diagcov)
+      B <- sigma2 * apply((R2.u2s.t)^2,2,sum)
+    else 
+      B <- sigma2 * t(R2.u2s.t) %*% R2.u2s.t
+
+    bias.sum <- bias.sum + B
+
+
+
+    if(diagcov){
+      diff <- (t(w12) %*% (coefficients[i,fixed] - mean)
+	       + w22 %*% coefficients[i,random])
+
+      weight22.2.sum <- weight22.2.sum + w22^2
+      weight22.coef.2.sum  <- weight22.coef.2.sum + diff^2
     } else {
-        weight22.2 <- array(NA, dim=c(ngroups, nrandom^2, nrandom^2))
-        weight22.coef.2 <- array(NA, dim=c(ngroups, nrandom, nrandom))
-
-        for (i in seq_len(ngroups)) {
-            w12 <- matrix(weight12[i,,], nfixed, nrandom)
-            w22 <- matrix(weight22[i,,], nrandom, nrandom)
-
-            weight22.2[i,,] <- kronecker(w22, w22)
-
-            diff <- (t(w12) %*% (coefficients[i,fixed] - mean)
-                     + w22 %*% coefficients[i,random])
-            weight22.coef.2[i,,] <- diff %*% t(diff)
-        }
-        wtot2 <- apply(weight22.2, c(2,3), mean)
-        wt.cov <- apply(weight22.coef.2, c(2, 3), mean)
-        wt.bias <- apply(bias, c(2, 3), mean)
-    }
 
-    logging::loginfo("Constructing an orthonormal basis for symmetric space",
-                     logger="mbest.mhglm.fit")
-    q <- nrandom
-    FF <- matrix(0, q^2, q * (q + 1) / 2)
-    j <- 0
-    for (k in seq_len(q)) {
-        for (l in seq_len(k)) {
-            j <- j + 1
-            f <- matrix(0, q, q)
-            if (k == l) {
-                f[k,l] <- 1
-            } else {
-                f[k,l] <- 1/sqrt(2)
-                f[l,k] <- 1/sqrt(2)
-            }
-            FF[,j] <- as.vector(f)
-        }
+      weight22.2.sum <- weight22.2.sum + kronecker(w22, w22)
+
+      diff <- (t(w12) %*% (coefficients[i,fixed] - mean)
+	       + w22 %*% coefficients[i,random])
+      weight22.coef.2.sum <- weight22.coef.2.sum + diff %*% t(diff)
     }
 
-    logging::loginfo("Solving moment equations", logger="mbest.mhglm.fit")
-    tF.wtot2.F <- t(FF) %*% wtot2 %*% FF
-    cov.vec <- pseudo.solve(tF.wtot2.F, t(FF) %*% as.vector(wt.cov))
-    bias.vec <- pseudo.solve(tF.wtot2.F, t(FF) %*% as.vector(wt.bias))
+  }
+
+  ret <- list(ngroups = ngroups,
+	      weight22.2.sum = weight22.2.sum, 
+	      weight22.coef.2.sum = weight22.coef.2.sum,
+	      bias.sum = bias.sum)
+  return(ret)
 
-    if (attr(cov.vec, "deficient")) {
-        warning(paste0("cannot solve covariance moment equation",
-                       " due to rank deficiency"))
+}
+
+
+# Estimate ranef covariance matrix, if control$diagcov is TRUE, i.e. only estimate diagonal element in covariance matrix.
+# Usually used as the 'combine' function in foreach function.
+moment.est.cov.reducer.diag<- function(...)
+{
+
+  ret <- list(...)
+  if(length(ret)==0)
+    stop("The input is empty.")
+
+  ngroups <- 0
+  wtot2 <- 0
+  wt.cov <- 0
+  wt.bias <- 0
+
+  for(i in seq_len(length(ret))){
+    ngroups <- ngroups + ret[[i]]$ngroups
+    wtot2 <- wtot2 + ret[[i]]$weight22.2.sum
+    wt.cov <- wt.cov + ret[[i]]$weight22.coef.2.sum
+    wt.bias <- wt.bias + ret[[i]]$bias.sum
+  }
+
+  nrandom <- nrow(wt.bias)
+
+  LHSinv <- solve(wtot2)
+  diag_1 <- LHSinv %*% wt.cov
+  diag_2 <- LHSinv %*% wt.bias
+#  gamma <- min(min(diag_1 / diag_2),1)
+#  cov <- diag( c(diag_1 - gamma * diag_2) , nrow = nrandom)
+  cov <- diag( diag_1 -  diag_2 , nrow = nrandom)
+
+  cov <- proj.psd(cov)  # ensure positive definite
+  if (attr(cov, "modified") )
+    warning(paste("moment-based covariance matrix estimate is not positive"
+                  , " semi-definite; using projection"
+                  , sep=""))
+  attr(cov, "modified") <- NULL
+  cov <- matrix(cov,nrow = nrandom)
+
+  return(cov)
+
+}
+
+# Estimate ranef covariance matrix, if control$diagcov is FALSE, i.e. estimate every element in covariance matrix. 
+# Usually used as the 'combine' function in foreach function.
+moment.est.cov.reducer.exact <- function(...)
+{
+
+  ret <- list(...)
+  if(length(ret)==0)
+    stop("The input is empty.")
+
+
+  ngroups <- 0
+  wtot2 <- 0
+  wt.cov <- 0
+  wt.bias <- 0
+
+  for(i in seq_len(length(ret))){
+    ngroups <- ngroups + ret[[i]]$ngroups
+    wtot2 <- wtot2 + ret[[i]]$weight22.2.sum
+    wt.cov <- wt.cov + ret[[i]]$weight22.coef.2.sum
+    wt.bias <- wt.bias + ret[[i]]$bias.sum
+  }
+
+  wtot2 <- wtot2/ngroups
+  wt.cov <- wt.cov/ngroups
+  wt.bias <- wt.bias/ngroups
+
+  nrandom <- nrow(wt.bias)
+
+  # construct an orthonormal basis for the space of symmetric
+  # matrices
+  q <- nrandom
+  F <- matrix(0, q^2, q * (q + 1) / 2)
+  j <- 0
+  for (k in seq_len(q)) {
+    for (l in seq_len(k)) {
+      j <- j + 1
+      f <- matrix(0, q, q)
+      if (k == l) {
+	f[k,l] <- 1
+      } else {
+	f[k,l] <- 1/sqrt(2)
+	f[l,k] <- 1/sqrt(2)
+      }
+      F[,j] <- as.vector(f)
     }
+  }
+
+  # solve the moment equations
+  tF.wtot2.F <- t(F) %*% wtot2 %*% F
+  cov.vec <- pseudo.solve(tF.wtot2.F, t(F) %*% as.vector(wt.cov))
+  bias.vec <- pseudo.solve(tF.wtot2.F, t(F) %*% as.vector(wt.bias))
+
+  if (attr(cov.vec, "deficient")) {
+    warning("cannot solve covariance moment equation due to rank deficiency")
+  }
+
+  # change back to original space
+  cov <- matrix(F %*% cov.vec, nrandom, nrandom)
+  bias <- matrix(F %*% bias.vec, nrandom, nrandom)
+
+  # remove asymmetry arising from numerical errors
+  cov <- 0.5 * (cov + t(cov))
+  bias <- 0.5 * (bias + t(bias))
+
+  eigen.cov <- eigen(cov, symmetric=TRUE)
+  l <- eigen.cov$values
+  u <- eigen.cov$vectors[,l > 0, drop=FALSE]
+  l <- l[l > 0]
+  s <- sqrt(l)
+  s.u.t <- t(u) * s
+  sinv.u.t <- t(u) / s
+  cov.bias <- sinv.u.t %*% bias %*% t(sinv.u.t)
+  eigen.cov.bias <- eigen(cov.bias, symmetric=TRUE)
+  l.bias <- eigen.cov.bias$values
+  u.bias.t <- t(eigen.cov.bias$vectors) %*% s.u.t
+  scale <- max(1, l.bias[1])
+  cov.adj <- (t(u.bias.t)
+	      %*% diag((scale - l.bias) / scale, length(l.bias))
+	      %*% u.bias.t)
+
+  cov <- proj.psd(cov.adj)  # ensure positive definite
+  if (attr(cov, "modified") || length(l) < nrow(cov) || scale != 1)
+    warning(paste("moment-based covariance matrix estimate is not positive"
+		  , " semi-definite; using projection"
+		  , sep=""))
+  attr(cov, "modified") <- NULL
+  return(cov)
+}
 
-    # change back to original space
-    cov <- matrix(FF %*% cov.vec, nrandom, nrandom)
-    bias <- matrix(FF %*% bias.vec, nrandom, nrandom)
 
-    # remove asymmetry arising from numerical errors
-    cov <- 0.5 * (cov + t(cov))
-    bias <- 0.5 * (bias + t(bias))
+moment.est <- function(coefficients, nfixed, subspace, precision, dispersion,
+                       start.cov = NULL, parallel = FALSE, diagcov = FALSE)
+{
 
-    eigen.cov <- eigen(cov, symmetric=TRUE)
-    l <- eigen.cov$values
-    u <- eigen.cov$vectors[,l > 0, drop=FALSE]
-    l <- l[l > 0]
-    s <- sqrt(l)
-    s.u.t <- t(u) * s
-    sinv.u.t <- t(u) / s
-    cov.bias <- sinv.u.t %*% bias %*% t(sinv.u.t)
-    eigen.cov.bias <- eigen(cov.bias, symmetric=TRUE)
-    l.bias <- eigen.cov.bias$values
-    u.bias.t <- t(eigen.cov.bias$vectors) %*% s.u.t
-    scale <- max(1, l.bias[1])
-    cov.adj <- (t(u.bias.t)
-                %*% diag((scale - l.bias) / scale, length(l.bias))
-                %*% u.bias.t)
-
-    cov <- proj.psd(cov.adj)  # ensure positive definite
-    if (attr(cov, "modified") || length(l) < nrow(cov) || scale != 1) {
-        warning(paste("moment-based covariance matrix estimate is not positive"
-                    , " semi-definite; using projection"
-                    , sep=""))
+  logging::loginfo("Estimating moments", logger="mbest.mhglm.fit")
+  ngroups <- nrow(coefficients)
+  dim <- ncol(coefficients)
+  names <- colnames(coefficients)
+  if (ngroups == 0L || dim == 0L){
+    cov <- matrix(0, dim, dim)
+    dimnames(cov) <- list(names, names)
+    return(cov)
+  }
+
+
+  logging::loginfo("Computing mean estimate", logger="mbest.mhglm.fit")
+  i <- NULL
+  # fixef
+  if(parallel){
+    est.mean <- foreach(i=seq_len(ngroups),
+			.combine = 'moment.est.mean.reducer',
+			.multicombine=TRUE ) %dopar% {
+      moment.est.mean.mapper(coefficients[i,,drop = FALSE], nfixed, 
+			     list(subspace[[i]]),list(precision[[i]]),
+			     dispersion[i], start.cov = start.cov) 
+    }
+  } else {
+    mean.info <- moment.est.mean.mapper(coefficients, nfixed, subspace,precision,
+					dispersion, start.cov = start.cov) 
+    est.mean <- moment.est.mean.reducer(mean.info)
+  }
+
+  logging::loginfo("Computing covariance estimate", logger="mbest.mhglm.fit")
+  # ranef
+  if(parallel){
+    if(diagcov){
+      est.cov <- foreach(i=seq_len(ngroups), .combine = 'moment.est.cov.reducer.diag', .multicombine = TRUE) %dopar%{
+	moment.est.cov.mapper(coefficients[i,,drop = FALSE],nfixed,
+			      list(subspace[[i]]),list(precision[[i]]),dispersion[i],
+			      start.cov = start.cov, diagcov = diagcov, 
+			      est.mean$mean) }
+    } else {
+      est.cov <- foreach(i=seq_len(ngroups), .combine = 'moment.est.cov.reducer.exact', .multicombine = TRUE) %dopar%{
+	moment.est.cov.mapper(coefficients[i,,drop = FALSE], nfixed, 
+			      list(subspace[[i]]),list(precision[[i]]),dispersion[i],
+			      start.cov = start.cov, diagcov = diagcov, 
+			      est.mean$mean) }
     }
-    attr(cov, "modified") <- NULL
+  } else {
+    cov.info <- moment.est.cov.mapper(coefficients, nfixed,
+				      subspace,precision,dispersion,
+				      start.cov = start.cov, diagcov = diagcov, 
+				      est.mean$mean) 
+    if(diagcov){
+      est.cov <- moment.est.cov.reducer.diag(cov.info)
+    } else {
+      est.cov <- moment.est.cov.reducer.exact(cov.info)
+    }
+  }
 
-    names(mean) <- names[fixed]
-    dimnames(mean.cov) <- list(names[fixed], names[fixed])
-    dimnames(cov) <- list(names[random], names[random])
+  list(mean=est.mean$mean, mean.cov=est.mean$mean.cov, cov=est.cov)
 
-    list(mean=mean, mean.cov=mean.cov, cov=cov)
 }
+
+
diff --git a/R/unpivotRp.R b/R/unpivotRp.R
new file mode 100644
index 0000000..ff1832d
--- /dev/null
+++ b/R/unpivotRp.R
@@ -0,0 +1,10 @@
+unpivotRp <- function(qr,nvars){
+  rank <- qr$rank
+  Rp <- matrix(0, rank, nvars)
+  if (rank > 0L) {
+    R <- qr.R(qr)
+    pivot <- qr$pivot
+    Rp[,pivot] <- R[seq_len(rank),]
+  }
+  return(Rp)
+}
diff --git a/man/mhglm.control.Rd b/man/mhglm.control.Rd
index e07035f..791d206 100644
--- a/man/mhglm.control.Rd
+++ b/man/mhglm.control.Rd
@@ -9,7 +9,7 @@ internally by \code{\link{mhglm.fit}}, but may be used to construct a
 control argument to either function.
 }
 \usage{
-mhglm.control(standardize = TRUE, steps = 1, parallel = FALSE,
+mhglm.control(standardize = TRUE, steps = 1, parallel = FALSE, diagcov = FALSE,
               fit.method = "firthglm.fit", fit.control = list(...), ...)
 }
 \arguments{
@@ -22,6 +22,9 @@ mhglm.control(standardize = TRUE, steps = 1, parallel = FALSE,
 }
   \item{parallel}{
     fit the group-specific estimates in parallel rather than sequentially
+}
+  \item{diagcov}{
+    estimate random effect covairance matrix with diagonal approximation 
 }
   \item{fit.method}{
     method for obtaining group-specific effect estimates

From 2d9101408b7e900e9da1ab5f3ec7e1ee07ba5d88 Mon Sep 17 00:00:00 2001
From: Ningshan Zhang <nzhang@Ningshans-MBP.home>
Date: Sat, 19 Mar 2016 19:23:14 -0400
Subject: [PATCH 02/10] fix a bug in diagcov reducer

---
 R/moment.est.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/moment.est.R b/R/moment.est.R
index 1b59585..2073b9d 100644
--- a/R/moment.est.R
+++ b/R/moment.est.R
@@ -230,7 +230,7 @@ moment.est.cov.reducer.diag<- function(...)
   diag_2 <- LHSinv %*% wt.bias
 #  gamma <- min(min(diag_1 / diag_2),1)
 #  cov <- diag( c(diag_1 - gamma * diag_2) , nrow = nrandom)
-  cov <- diag( diag_1 -  diag_2 , nrow = nrandom)
+  cov <- diag( c(diag_1 -  diag_2) , nrow = nrandom)
 
   cov <- proj.psd(cov)  # ensure positive definite
   if (attr(cov, "modified") )

From 2ea92026e3e26bbe560dba34691ff1ef77f58cde Mon Sep 17 00:00:00 2001
From: Ningshan Zhang <zhangns07@gmail.com>
Date: Tue, 22 Mar 2016 13:27:49 -0400
Subject: [PATCH 03/10] preprocess ranef.cov before proj.psd

---
 R/moment.est.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/moment.est.R b/R/moment.est.R
index 2073b9d..45d0227 100644
--- a/R/moment.est.R
+++ b/R/moment.est.R
@@ -228,9 +228,9 @@ moment.est.cov.reducer.diag<- function(...)
   LHSinv <- solve(wtot2)
   diag_1 <- LHSinv %*% wt.cov
   diag_2 <- LHSinv %*% wt.bias
-#  gamma <- min(min(diag_1 / diag_2),1)
-#  cov <- diag( c(diag_1 - gamma * diag_2) , nrow = nrandom)
-  cov <- diag( c(diag_1 -  diag_2) , nrow = nrandom)
+  gamma <- min(min(diag_1 / diag_2),1)
+  cov <- diag( c(diag_1 - gamma * diag_2) , nrow = nrandom)
+# cov <- diag( c(diag_1 -  diag_2) , nrow = nrandom)
 
   cov <- proj.psd(cov)  # ensure positive definite
   if (attr(cov, "modified") )

From bdba57158ddce6caef359a4a88b2bc0aed152956 Mon Sep 17 00:00:00 2001
From: Ningshan Zhang <nzhang@Ningshans-MBP.home>
Date: Wed, 4 May 2016 16:19:36 -0400
Subject: [PATCH 04/10] fix an edge case in covariance estimate

---
 R/moment.est.R | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/R/moment.est.R b/R/moment.est.R
index 2073b9d..78e7277 100644
--- a/R/moment.est.R
+++ b/R/moment.est.R
@@ -228,9 +228,16 @@ moment.est.cov.reducer.diag<- function(...)
   LHSinv <- solve(wtot2)
   diag_1 <- LHSinv %*% wt.cov
   diag_2 <- LHSinv %*% wt.bias
-#  gamma <- min(min(diag_1 / diag_2),1)
-#  cov <- diag( c(diag_1 - gamma * diag_2) , nrow = nrandom)
-  cov <- diag( c(diag_1 -  diag_2) , nrow = nrandom)
+
+  idx.use <- diag_2 !=0
+  if( any(idx.use)){
+    gamma <- min(min(diag_1[idx.use] / diag_2[idx.use]),1)
+  } else {
+    gamma <- 0
+  }
+
+  cov <- diag( c(diag_1 - gamma * diag_2) , nrow = nrandom)
+#  cov <- diag( c(diag_1 -  diag_2) , nrow = nrandom)
 
   cov <- proj.psd(cov)  # ensure positive definite
   if (attr(cov, "modified") )

From 0191b708bf73d9ab58d086d2217a393007af7525 Mon Sep 17 00:00:00 2001
From: Ningshan Zhang <nzhang@Ningshans-MBP.home>
Date: Tue, 24 May 2016 21:31:06 -0400
Subject: [PATCH 05/10] refactor moment.est function

---
 R/moment.est.R | 221 ++++++++++++++++++++-----------------------------
 1 file changed, 90 insertions(+), 131 deletions(-)

diff --git a/R/moment.est.R b/R/moment.est.R
index 78e7277..63d495e 100644
--- a/R/moment.est.R
+++ b/R/moment.est.R
@@ -79,10 +79,10 @@ moment.est.mean.mapper <- function(coefficients, nfixed, subspace, precision, di
 
 # Estimate fixed effect's coefficient. 
 # Usually used as the 'combine' function in foreach function.
-moment.est.mean.reducer <- function(...)
+moment.est.mean.reducer <- function(ret)
 {
 
-  ret <- list(...)
+#  ret <- list(...)
   if(length(ret)==0)
     stop("The input is empty.")
 
@@ -204,10 +204,10 @@ moment.est.cov.mapper <- function(coefficients, nfixed, subspace, precision, dis
 
 # Estimate ranef covariance matrix, if control$diagcov is TRUE, i.e. only estimate diagonal element in covariance matrix.
 # Usually used as the 'combine' function in foreach function.
-moment.est.cov.reducer.diag<- function(...)
+moment.est.cov.reducer <- function(ret, diagcov)
 {
 
-  ret <- list(...)
+  #  ret <- list(...)
   if(length(ret)==0)
     stop("The input is empty.")
 
@@ -225,121 +225,85 @@ moment.est.cov.reducer.diag<- function(...)
 
   nrandom <- nrow(wt.bias)
 
-  LHSinv <- solve(wtot2)
-  diag_1 <- LHSinv %*% wt.cov
-  diag_2 <- LHSinv %*% wt.bias
-
-  idx.use <- diag_2 !=0
-  if( any(idx.use)){
-    gamma <- min(min(diag_1[idx.use] / diag_2[idx.use]),1)
-  } else {
-    gamma <- 0
-  }
-
-  cov <- diag( c(diag_1 - gamma * diag_2) , nrow = nrandom)
-#  cov <- diag( c(diag_1 -  diag_2) , nrow = nrandom)
-
-  cov <- proj.psd(cov)  # ensure positive definite
-  if (attr(cov, "modified") )
-    warning(paste("moment-based covariance matrix estimate is not positive"
-                  , " semi-definite; using projection"
-                  , sep=""))
-  attr(cov, "modified") <- NULL
-  cov <- matrix(cov,nrow = nrandom)
-
-  return(cov)
-
-}
-
-# Estimate ranef covariance matrix, if control$diagcov is FALSE, i.e. estimate every element in covariance matrix. 
-# Usually used as the 'combine' function in foreach function.
-moment.est.cov.reducer.exact <- function(...)
-{
+  if(diagcov){
+    LHSinv <- solve(wtot2)
+    diag_1 <- LHSinv %*% wt.cov
+    diag_2 <- LHSinv %*% wt.bias
 
-  ret <- list(...)
-  if(length(ret)==0)
-    stop("The input is empty.")
+    idx.use <- diag_2 !=0
+    if( any(idx.use)){
+      gamma <- min(min(diag_1[idx.use] / diag_2[idx.use]),1)
+    } else {
+      gamma <- 0
+    }
 
+    #  cov <- diag( c(diag_1 -  diag_2) , nrow = nrandom)
+    cov.adj <- diag( c(diag_1 - gamma * diag_2) , nrow = nrandom)
 
-  ngroups <- 0
-  wtot2 <- 0
-  wt.cov <- 0
-  wt.bias <- 0
-
-  for(i in seq_len(length(ret))){
-    ngroups <- ngroups + ret[[i]]$ngroups
-    wtot2 <- wtot2 + ret[[i]]$weight22.2.sum
-    wt.cov <- wt.cov + ret[[i]]$weight22.coef.2.sum
-    wt.bias <- wt.bias + ret[[i]]$bias.sum
-  }
-
-  wtot2 <- wtot2/ngroups
-  wt.cov <- wt.cov/ngroups
-  wt.bias <- wt.bias/ngroups
+  } else {
+    # construct an orthonormal basis for the space of symmetric
+    # matrices
+    q <- nrandom
+    F <- matrix(0, q^2, q * (q + 1) / 2)
+    j <- 0
+    for (k in seq_len(q)) {
+      for (l in seq_len(k)) {
+	j <- j + 1
+	f <- matrix(0, q, q)
+	if (k == l) {
+	  f[k,l] <- 1
+	} else {
+	  f[k,l] <- 1/sqrt(2)
+	  f[l,k] <- 1/sqrt(2)
+	}
+	F[,j] <- as.vector(f)
+      }
+    }
 
-  nrandom <- nrow(wt.bias)
+    # solve the moment equations
+    tF.wtot2.F <- t(F) %*% wtot2 %*% F
+    cov.vec <- pseudo.solve(tF.wtot2.F, t(F) %*% as.vector(wt.cov))
+    bias.vec <- pseudo.solve(tF.wtot2.F, t(F) %*% as.vector(wt.bias))
 
-  # construct an orthonormal basis for the space of symmetric
-  # matrices
-  q <- nrandom
-  F <- matrix(0, q^2, q * (q + 1) / 2)
-  j <- 0
-  for (k in seq_len(q)) {
-    for (l in seq_len(k)) {
-      j <- j + 1
-      f <- matrix(0, q, q)
-      if (k == l) {
-	f[k,l] <- 1
-      } else {
-	f[k,l] <- 1/sqrt(2)
-	f[l,k] <- 1/sqrt(2)
-      }
-      F[,j] <- as.vector(f)
+    if (attr(cov.vec, "deficient")) {
+      warning("cannot solve covariance moment equation due to rank deficiency")
     }
-  }
 
-  # solve the moment equations
-  tF.wtot2.F <- t(F) %*% wtot2 %*% F
-  cov.vec <- pseudo.solve(tF.wtot2.F, t(F) %*% as.vector(wt.cov))
-  bias.vec <- pseudo.solve(tF.wtot2.F, t(F) %*% as.vector(wt.bias))
+    # change back to original space
+    cov <- matrix(F %*% cov.vec, nrandom, nrandom)
+    bias <- matrix(F %*% bias.vec, nrandom, nrandom)
 
-  if (attr(cov.vec, "deficient")) {
-    warning("cannot solve covariance moment equation due to rank deficiency")
-  }
+    # remove asymmetry arising from numerical errors
+    cov <- 0.5 * (cov + t(cov))
+    bias <- 0.5 * (bias + t(bias))
 
-  # change back to original space
-  cov <- matrix(F %*% cov.vec, nrandom, nrandom)
-  bias <- matrix(F %*% bias.vec, nrandom, nrandom)
-
-  # remove asymmetry arising from numerical errors
-  cov <- 0.5 * (cov + t(cov))
-  bias <- 0.5 * (bias + t(bias))
-
-  eigen.cov <- eigen(cov, symmetric=TRUE)
-  l <- eigen.cov$values
-  u <- eigen.cov$vectors[,l > 0, drop=FALSE]
-  l <- l[l > 0]
-  s <- sqrt(l)
-  s.u.t <- t(u) * s
-  sinv.u.t <- t(u) / s
-  cov.bias <- sinv.u.t %*% bias %*% t(sinv.u.t)
-  eigen.cov.bias <- eigen(cov.bias, symmetric=TRUE)
-  l.bias <- eigen.cov.bias$values
-  u.bias.t <- t(eigen.cov.bias$vectors) %*% s.u.t
-  scale <- max(1, l.bias[1])
-  cov.adj <- (t(u.bias.t)
-	      %*% diag((scale - l.bias) / scale, length(l.bias))
-	      %*% u.bias.t)
+    eigen.cov <- eigen(cov, symmetric=TRUE)
+    l <- eigen.cov$values
+    u <- eigen.cov$vectors[,l > 0, drop=FALSE]
+    l <- l[l > 0]
+    s <- sqrt(l)
+    s.u.t <- t(u) * s
+    sinv.u.t <- t(u) / s
+    cov.bias <- sinv.u.t %*% bias %*% t(sinv.u.t)
+    eigen.cov.bias <- eigen(cov.bias, symmetric=TRUE)
+    l.bias <- eigen.cov.bias$values
+    u.bias.t <- t(eigen.cov.bias$vectors) %*% s.u.t
+    scale <- max(1, l.bias[1])
+    cov.adj <- (t(u.bias.t)
+		%*% diag((scale - l.bias) / scale, length(l.bias))
+		%*% u.bias.t)
+  }
 
   cov <- proj.psd(cov.adj)  # ensure positive definite
-  if (attr(cov, "modified") || length(l) < nrow(cov) || scale != 1)
+  if (attr(cov, "modified") )
     warning(paste("moment-based covariance matrix estimate is not positive"
 		  , " semi-definite; using projection"
 		  , sep=""))
   attr(cov, "modified") <- NULL
+
   return(cov)
-}
 
+}
 
 moment.est <- function(coefficients, nfixed, subspace, precision, dispersion,
                        start.cov = NULL, parallel = FALSE, diagcov = FALSE)
@@ -358,51 +322,46 @@ moment.est <- function(coefficients, nfixed, subspace, precision, dispersion,
 
   logging::loginfo("Computing mean estimate", logger="mbest.mhglm.fit")
   i <- NULL
+
   # fixef
   if(parallel){
-    est.mean <- foreach(i=seq_len(ngroups),
-			.combine = 'moment.est.mean.reducer',
-			.multicombine=TRUE ) %dopar% {
+#    est.mean <- foreach(i=seq_len(ngroups), .combine = 'moment.est.mean.reducer', .multicombine=TRUE ) %dopar% {
+    mean.info <- foreach(i=seq_len(ngroups)) %dopar% {
       moment.est.mean.mapper(coefficients[i,,drop = FALSE], nfixed, 
 			     list(subspace[[i]]),list(precision[[i]]),
 			     dispersion[i], start.cov = start.cov) 
     }
+
   } else {
-    mean.info <- moment.est.mean.mapper(coefficients, nfixed, subspace,precision,
-					dispersion, start.cov = start.cov) 
-    est.mean <- moment.est.mean.reducer(mean.info)
+    mean.info <- list(moment.est.mean.mapper(coefficients, nfixed, subspace,precision,
+					dispersion, start.cov = start.cov))
   }
 
-  logging::loginfo("Computing covariance estimate", logger="mbest.mhglm.fit")
+  est.mean <- moment.est.mean.reducer(mean.info)
+
+
   # ranef
+  logging::loginfo("Computing covariance estimate", logger="mbest.mhglm.fit")
   if(parallel){
-    if(diagcov){
-      est.cov <- foreach(i=seq_len(ngroups), .combine = 'moment.est.cov.reducer.diag', .multicombine = TRUE) %dopar%{
-	moment.est.cov.mapper(coefficients[i,,drop = FALSE],nfixed,
-			      list(subspace[[i]]),list(precision[[i]]),dispersion[i],
-			      start.cov = start.cov, diagcov = diagcov, 
-			      est.mean$mean) }
-    } else {
-      est.cov <- foreach(i=seq_len(ngroups), .combine = 'moment.est.cov.reducer.exact', .multicombine = TRUE) %dopar%{
-	moment.est.cov.mapper(coefficients[i,,drop = FALSE], nfixed, 
-			      list(subspace[[i]]),list(precision[[i]]),dispersion[i],
-			      start.cov = start.cov, diagcov = diagcov, 
-			      est.mean$mean) }
-    }
+    #      est.cov <- foreach(i=seq_len(ngroups), .combine = 'moment.est.cov.reducer.exact', .multicombine = TRUE) %dopar%{
+    #      est.cov <- foreach(i=seq_len(ngroups), .combine = 'moment.est.cov.reducer.diag', .multicombine = TRUE) %dopar%{
+    cov.info <- foreach(i=seq_len(ngroups)) %dopar%{
+      moment.est.cov.mapper(coefficients[i,,drop = FALSE], nfixed, 
+			    list(subspace[[i]]),list(precision[[i]]),dispersion[i],
+			    start.cov = start.cov, diagcov = diagcov, 
+			    est.mean$mean) }
   } else {
-    cov.info <- moment.est.cov.mapper(coefficients, nfixed,
-				      subspace,precision,dispersion,
-				      start.cov = start.cov, diagcov = diagcov, 
-				      est.mean$mean) 
-    if(diagcov){
-      est.cov <- moment.est.cov.reducer.diag(cov.info)
-    } else {
-      est.cov <- moment.est.cov.reducer.exact(cov.info)
-    }
+    cov.info <- list(moment.est.cov.mapper(coefficients, nfixed,
+					   subspace,precision,dispersion,
+					   start.cov = start.cov, diagcov = diagcov, 
+					   est.mean$mean))
   }
 
+  est.cov <- moment.est.cov.reducer(cov.info, diagcov = diagcov)
+
   list(mean=est.mean$mean, mean.cov=est.mean$mean.cov, cov=est.cov)
 
+
 }
 
 

From 36ac0a17941f14a331b142d1dbe96c1aa27dbb4b Mon Sep 17 00:00:00 2001
From: Ningshan Zhang <nzhang@Ningshans-MBP.home>
Date: Tue, 24 May 2016 21:31:48 -0400
Subject: [PATCH 06/10] name the lists of Rp, subspace, precision by group
 names

---
 R/mhglm.fit.R | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/R/mhglm.fit.R b/R/mhglm.fit.R
index 3844067..8797dc0 100644
--- a/R/mhglm.fit.R
+++ b/R/mhglm.fit.R
@@ -59,8 +59,8 @@ mhglm.fit <- function(x, z, y, group, weights = rep(1, nobs),
     df.residual.tot <- sum(m$df.residual)
     dispersion <- rep(dispersion.tot, ngroups)
 
-    if (dispersion.tot == 0)
-        stop("cannot estimate dispersion (no residual degrees of freedom)")
+#    if (dispersion.tot == 0)
+#        stop("cannot estimate dispersion (no residual degrees of freedom)")
 
     # compute group-sqecific precision square root (unpivoted)
     Rp <- as.list(rep(NULL, ngroups))
@@ -76,6 +76,7 @@ mhglm.fit <- function(x, z, y, group, weights = rep(1, nobs),
 	  Rp[[i]] <- unpivotRp(qr.i,nvars)
 	}
     }
+    names(Rp) <- names(m$qr)
 
     # change coordinates so that average precision is identity
 
@@ -178,6 +179,8 @@ mhglm.fit <- function(x, z, y, group, weights = rep(1, nobs),
             }
         }
     }
+    names(subspace) <- names(Rp)
+    names(precision) <- names(Rp)
 
     # compute coefficient mean and covariance estimates
     logging::loginfo("Computing mean and covariance estimates",
@@ -224,6 +227,7 @@ mhglm.fit <- function(x, z, y, group, weights = rep(1, nobs),
                 R = R, rank = rank, rank.fixed = rank.fixed,
                 rank.random = rank.random, pivot = pivot, y = y, group = group,
                 prior.weights = weights, offset = offset, nobs = nobs)
+		
     fit
 }
 

From 3bdc960dd24d8de86c2277968207eadf893180c6 Mon Sep 17 00:00:00 2001
From: Ningshan Zhang <nzhang@Ningshans-MBP.home>
Date: Tue, 24 May 2016 21:32:57 -0400
Subject: [PATCH 07/10] demostrate how to fit very large data by map/reduce
 method

improve the comments
---
 demo/0.sim.R      | 118 ++++++++++++++++++++++++++++++
 demo/1.glmfit.R   | 179 ++++++++++++++++++++++++++++++++++++++++++++++
 demo/2.fixef.R    |  51 +++++++++++++
 demo/3.ranef.R    |  39 ++++++++++
 demo/4.finalize.R |  71 ++++++++++++++++++
 demo/demo.R       |  75 +++++++++++++++++++
 6 files changed, 533 insertions(+)
 create mode 100644 demo/0.sim.R
 create mode 100644 demo/1.glmfit.R
 create mode 100644 demo/2.fixef.R
 create mode 100644 demo/3.ranef.R
 create mode 100644 demo/4.finalize.R
 create mode 100644 demo/demo.R

diff --git a/demo/0.sim.R b/demo/0.sim.R
new file mode 100644
index 0000000..269f5b8
--- /dev/null
+++ b/demo/0.sim.R
@@ -0,0 +1,118 @@
+library(devtools)
+library(MASS)
+
+gen.replicate <- function 
+( ngroup, nobs, nobs.dispersion,
+ nfixef, fixef.mean.df, fixef.scale,
+ nranef, ranef.cov.df, ranef.scale,
+ dispersion, family
+) {
+
+
+    # we need to be strict about integer vs. numeric for the md5sum to match
+    ngroup <- as.integer(ngroup)
+    nobs <- as.integer(nobs)
+    nobs.dispersion <- as.numeric(nobs.dispersion)
+    nfixef <- as.integer(nfixef)
+    fixef.mean.df <- as.numeric(fixef.mean.df)
+    fixef.scale <- as.numeric(fixef.scale)
+    nranef <- as.integer(nranef)
+    ranef.cov.df <- as.numeric(ranef.cov.df)
+    ranef.scale <- as.numeric(ranef.scale)
+    dispersion <- as.numeric(dispersion)
+
+    # family
+    if (is.character(family)) {
+        family <- get(family, mode="function", envir=parent.frame())()
+    } else if (is.function(family)) {
+        family <- family()
+    } else if (is.null(family$family)) {
+        stop("'family' not recognized")
+    }
+
+    if (family$family %in% c("binomial", "poisson") && dispersion != 1) {
+        warning(sprintf("dispersion parameter is ignored for '%s' family",
+                        family$family))
+    }
+
+    # generate coefficient mean from t
+    if (is.na(fixef.mean.df)) {
+        fixef <- numeric(nfixef)
+    } else {
+        fixef <- fixef.scale * rt(nfixef, df=fixef.mean.df)
+    }
+
+    # generate global coefficient covariance from inverse Wishart
+    if (is.na(ranef.cov.df)) {
+        ranef.cov <- diag(ranef.scale^2, nranef)
+        ranef.cov.sqrt <- diag(ranef.scale, nranef)
+    } else {
+	ranef.cov <- diag(diag(ranef.scale^2
+			       * solve(rWishart(1, ranef.cov.df, diag(nranef))[,,1])) ,nrow = nranef)
+	ranef.cov.sqrt <- chol(ranef.cov)
+    }
+
+
+    # generate coefficients
+    u <- matrix(rnorm(ngroup * nranef), ngroup, nranef)
+    ranef <- u %*% ranef.cov.sqrt
+
+    # generate sampling rates
+    rate.mean <- nobs / ngroup
+    if (nobs.dispersion == Inf) {
+        sample.rate <- rep(rate.mean, ngroup)
+    } else {
+        sample.rate <- rgamma(ngroup, nobs.dispersion,
+                              nobs.dispersion / rate.mean)
+        stopifnot(all(sample.rate > 0))
+    }
+
+    # generate group
+    suppressWarnings({ # ignore warning about using Walker's alias method
+      group <- sample.int(ngroup, nobs, replace=TRUE, prob=sample.rate)
+    })
+
+
+    # generate feature  matrices with Pr(x[i,j] = +1) = P(x[i,j] = -1) = 1/2,
+    z <- matrix(sample(c(-1, +1), nobs * nranef, replace=TRUE), nobs, nranef)
+    x <- z
+
+    # compute linear predictors and generate observations
+    eta <- drop(x %*% fixef) + rowSums(z * ranef[group,])
+    mu <- family$linkinv(eta)
+
+    if (family$family == "gaussian") {
+        y <- rnorm(nobs, mean=mu, sd=sqrt(dispersion))
+    } else if (family$family == "binomial") {
+        y <- as.numeric(rbinom(nobs, 1, mu))
+    } else if (family$family == "poisson") {
+        y <- rpois(nobs, mu)
+    } else {
+        stop(sprintf("family '%s' not supported", family$family))
+    }
+
+    list(ngroup = ngroup, nobs = nobs,
+         fixef = fixef, ranef = ranef, ranef.cov.sqrt = ranef.cov.sqrt,
+         dispersion = dispersion, sample.rate = sample.rate,
+         group = group, x = x, z = z, y.mean = mu, y = y, family=family)
+}
+
+set.seed(0)
+r <- gen.replicate(ngroup=10,  nobs=10000, nobs.dispersion=1, nfixef=5,
+		   fixef.mean.df=4, fixef.scale=1, nranef=5, ranef.cov.df=10,
+		   ranef.scale=1, dispersion=1, family=family)
+
+
+group <- as.factor(r$group)
+ngroups <- nlevels(group)
+levels <- levels(group)
+subsets <- .Call(C_group_subsets, group, ngroups) # group => indices
+
+for(i in seq_len(length(subsets))){
+  idx <- subsets[[i]]
+  data_filename <- paste0(datafile_folder,'data.',(i),'.rds',sep = "")
+  saveRDS(list(x = r$x[idx,],z = r$z[idx,],y = r$y[idx],group = factor(r$group[idx])),file = data_filename)
+}
+
+nfiles <- length(subsets)
+
diff --git a/demo/1.glmfit.R b/demo/1.glmfit.R
new file mode 100644
index 0000000..8b2bb3f
--- /dev/null
+++ b/demo/1.glmfit.R
@@ -0,0 +1,179 @@
+# 1.glmfit.R
+# (Map step) For each piece of data indexed by i
+# - fit glm to it
+# - compute dispersion.sum, df.residual.tot, coef, Rp, subspace, precision
+# - write out info.i.rds, subspace.i.rds, disp.i.csv
+# 
+# (Reduce step) Pool information together
+# - read in all disp.*.csv 
+# - compute dispersion.tot, df.residual.tot
+
+
+start <- NULL ;etastart <- NULL ; mustart <- NULL;intercept <- TRUE;
+control <- do.call("mhglm.control", control)
+nfiles <- length(datafile_list)
+
+for (dataid in seq_len(nfiles)){
+
+  # load one piece of data 
+  datafile <- paste0(datafile_folder,datafile_list[dataid])
+  data <- readRDS(datafile)
+  x <- data$x
+  y <- data$y
+  z <- x
+  group <- data$group
+
+  # set parameters
+  nobs <- NROW(y)
+  weights <-  rep(1, nobs)
+  offset <- rep(0, nobs)
+
+  # save data specific information 
+  infofilename <- paste0(datafile_folder,'info.',dataid,".rds")
+  saveRDS(list(nobs = nobs, weights = weights, offset = offset, y = y, group = group),file = infofilename)
+
+  # start mhglm.fit
+  xnames <- dimnames(x)[[2L]]
+  znames <- dimnames(z)[[2L]]
+  ynames <- if (is.matrix(y)) rownames(y) else names(y)
+  nfixed <- ncol(x)
+  nrandom <- ncol(z)
+  nvars <- nfixed + nrandom
+  fixed <- (seq_len(nvars) <= nfixed)
+  random <- !fixed
+
+  # doesn't support standardized
+  R.fixed <- diag(nfixed)
+  attr(R.fixed, "pivot") <- seq_len(nfixed)
+  attr(R.fixed, "rank") <- nfixed
+  R.random <- diag(nrandom)
+  attr(R.random, "pivot") <- seq_len(nrandom)
+  attr(R.random, "rank") <- nrandom
+
+  rank.fixed <- attr(R.fixed, "rank")
+  rank.random <- attr(R.random, "rank")
+  rank <- rank.fixed + rank.random
+  r1.fixed <- seq_len(rank.fixed)
+  r1.random <- seq_len(rank.random)
+  r1 <- seq_len(rank)
+  pivot.fixed <- which(fixed)[attr(R.fixed, "pivot")]
+  pivot.random <- which(random)[attr(R.random, "pivot")]
+  pivot <- c(pivot.fixed[r1.fixed], pivot.random[r1.random],
+	     pivot.fixed[-r1.fixed], pivot.random[-r1.random])
+  R <- matrix(0, rank, rank)
+  R[r1.fixed, r1.fixed] <- R.fixed[r1.fixed, r1.fixed]
+  (R[rank.fixed + r1.random, rank.fixed + r1.random] <- R.random[r1.random, r1.random])
+
+
+  if (!is.null(start)) {
+    if (length(start) != nfixed) {
+      stop(gettextf("length of 'start' should equal %d and correspond to initial coefs for %s",
+		    nfixed, paste(deparse(xnames), collapse=", ")),
+	   domain=NA)
+    }
+    start <- c(start, numeric(nrandom))
+  }
+
+
+  # fit group specific glm
+  m <- rdglm.group.fit(x = cbind(x, z), y = y, group = group,
+		       weights = weights, start = start,
+		       etastart = etastart, mustart = mustart,
+		       offset = offset, family = family, 
+		       parallel = control$parallel,
+		       control = control$fit.control,
+		       method = control$fit.method,
+		       intercept = intercept)
+
+  # compute and save dispersion information 
+  dispfilename <- paste0(datafile_folder, "disp.",dataid,".csv")
+
+  df.residual.tot = sum(m$df.residual)
+  dispersion.sum = sum(m$df.residual * m$dispersion)
+  ngroups <- m$ngroups
+
+  estdispersion <- data.frame(ngroups, dispersion.sum, df.residual.tot)
+  write.table(estdispersion ,file = dispfilename, sep = ",", quote = FALSE, col.names = FALSE,row.names = FALSE)
+
+  # compute group-sqecific precision square root (unpivoted)
+  Rp <- as.list(rep(NULL, m$ngroups))
+  if(control$parallel) {
+    Rp <- foreach(i = seq_len(m$ngroups)) %dopar% {
+      qr.i <- m$qr[[i]]
+      return(unpivotRp(qr.i,nvars))
+    }
+  } else {
+    for(i in seq_len(m$ngroups)) {
+      qr.i <- m$qr[[i]]
+      Rp[[i]] <- unpivotRp(qr.i,nvars)
+    }
+  }
+  names(Rp) <- names(m$qr)
+
+  # compute standardized coeficients:
+  coef <- m$coefficients[,pivot[r1],drop=FALSE] %*% t(R)
+
+  # compute group-specific subspace and precisions
+  if(control$parallel) {
+    results <- foreach(i = seq_len(m$ngroups)) %dopar% {
+      if (nrow(Rp[[i]]) > 0L) {
+	prec.sqrt <- backsolve(R, t(Rp[[i]][,pivot[r1],drop=FALSE]),
+			       transpose=TRUE)
+	prec.sqrt.svd <- svd(prec.sqrt)
+	return(list(subspace=prec.sqrt.svd$u,
+		    precision=(prec.sqrt.svd$d)^2))
+      } else {
+	return(list(subspace=matrix(0, rank, 0), precision=numeric()))
+      }
+    }
+    subspace <- lapply(results, function(x) x[['subspace']])
+    precision <- lapply(results, function(x) x[['precision']])
+    rm(results)
+  } else {
+    subspace <- as.list(rep(NULL,m$ngroups))
+    precision <- as.list(rep(NULL, m$ngroups))
+    for (i in seq_len(m$ngroups)) {
+      if (nrow(Rp[[i]]) > 0L) {
+	prec.sqrt <- backsolve(R, t(Rp[[i]][,pivot[r1],drop=FALSE]),
+			       transpose=TRUE)
+	prec.sqrt.svd <- svd(prec.sqrt)
+	subspace[[i]] <- prec.sqrt.svd$u
+	precision[[i]] <- (prec.sqrt.svd$d)^2
+      } else {
+	subspace[[i]] <- matrix(0, rank, 0)
+	precision[[i]] <- numeric()
+      }
+    }
+  }
+  names(subspace) <- names(Rp)
+  names(precision) <- names(Rp)
+
+  # save group specific coef, Rp, subspace, precision
+  subspacefilename <- paste(datafile_folder,'subspace.',dataid,'.rds',sep = "")
+  saveRDS(list(ngroups = m$ngroups, coef = coef, Rp = Rp, 
+	       subspace = subspace, precision = precision), file = subspacefilename)
+
+}
+
+
+#----------
+# pool dispersion information togeter and estimate global dispersion
+dispfiles <- grep('disp',list.files(datafile_folder,full.names = TRUE),value = TRUE)
+disp.summary <- data.frame()
+for (i in seq_len(length(dispfiles))){
+  disp.summary <- rbind(disp.summary,
+			read.table(dispfiles[i],sep = ","))
+}
+
+disp.summary <- colSums(disp.summary)
+names(disp.summary) <- c('ngroups','dispersion.sum','df.residual.tot')
+
+dispersion.tot <- if(disp.summary['df.residual.tot'] >0 ) 
+{  disp.summary['dispersion.sum'] / disp.summary['df.residual.tot']
+} else { disp.summary['dispersion.sum']/disp.summary['ngroups']}
+df.residual.tot <- disp.summary['df.residual.tot']
+
+
+
+
+
diff --git a/demo/2.fixef.R b/demo/2.fixef.R
new file mode 100644
index 0000000..a0e0d0a
--- /dev/null
+++ b/demo/2.fixef.R
@@ -0,0 +1,51 @@
+# 3.fixef.R 
+# Read in momentest.rds (contains estimate for ranef covariance), if it exists. 
+# Otherwise set the initial covariance estimate to NULL.
+# 
+# (Map step) For each piece of data indexed by i
+# - read in subspace.i.rds
+# - compute fixef's sufficient statistics
+# - write out fixef.i.rds
+# 
+# (Reduce step) Pool information together
+# - read in all fixef.i.rds 
+# - estimate fixef
+
+#---------- 
+# read in initial covariance estimate.
+# if no initial estimate is provided, set to NULL.
+mestfile<- paste0(datafile_folder,'momentest.rds',sep = "")
+if(file.exists(mestfile)){
+  est0 <- readRDS(mestfile)
+} else {
+  est0 <- NULL
+} 
+start.cov <- est0$cov
+
+#---------- 
+# compute and save sufficient statistics for estimating fixef
+for(dataid in seq_len(nfiles)){
+
+  subspacefile <- paste0(datafile_folder,'subspace.',dataid,'.rds')
+  s <- readRDS(subspacefile)
+
+  dispersion <- rep(dispersion.tot, s$ngroups)
+  fixef.sep <- moment.est.mean.mapper(s$coef, rank.fixed, s$subspace, s$precision, dispersion, start.cov)
+
+  fixeffilename <- paste(datafile_folder,'fixef.',dataid,'.rds',sep = "")
+  saveRDS(fixef.sep, fixeffilename)
+}
+
+#---------- 
+# estimate fixef
+fixeffiles <- grep('fixef.*.rds',list.files(datafile_folder,full.names = TRUE),value = TRUE)
+mean.info <- as.list(rep(NULL,nfiles))
+
+for (i in seq_len(nfiles)){
+  fixef.ret <- readRDS(fixeffiles[i])
+  mean.info[[i]] <- fixef.ret
+}
+
+est.mean <- moment.est.mean.reducer(mean.info)
+
+
diff --git a/demo/3.ranef.R b/demo/3.ranef.R
new file mode 100644
index 0000000..e1d8750
--- /dev/null
+++ b/demo/3.ranef.R
@@ -0,0 +1,39 @@
+# 4.ranef.R
+# (Map step) For each piece of data indexed by i
+# - read in subspace.i.rds
+# - compute ranef cov's sufficient statistics
+# - write out ranef.i.rds 
+# 
+# (Reduce step) Pool information together
+# - estimate ranef cov
+# - write out momentest.rds
+
+#----------
+# compute and save sufficient statistics for estimating ranef cov
+for(dataid in seq_len(nfiles)){
+
+  subspacefile <- paste0(datafile_folder,'subspace.',dataid,'.rds')
+  s <- readRDS(subspacefile)
+
+  dispersion <- rep(dispersion.tot, s$ngroups)
+  ranef.sep <- moment.est.cov.mapper(s$coef, rank.fixed, s$subspace, s$precision, dispersion, 
+				     start.cov , diagcov = control$diagcov, mean = est.mean$mean)
+
+  raneffilename <- paste0(datafile_folder,'ranef.',dataid,'.rds')
+  saveRDS(ranef.sep, raneffilename)
+}
+
+#---------- 
+# estimate ranef cov
+raneffiles <- grep('ranef.*.rds',list.files(datafile_folder,full.names = TRUE),value = TRUE)
+cov.info <- as.list(rep(NULL,nfiles))
+for(i in seq_len(nfiles)){
+  ranef.ret <- readRDS(raneffiles[i])
+  cov.info[[i]] <- ranef.ret
+}
+est.cov <- moment.est.cov.reducer(cov.info,control$diagcov)
+
+# save to disk
+est <- list(mean=est.mean$mean, mean.cov=est.mean$mean.cov, cov=est.cov)
+mestfile<- paste0(datafile_folder,'momentest.rds',sep = "")
+saveRDS(est,file = mestfile)
diff --git a/demo/4.finalize.R b/demo/4.finalize.R
new file mode 100644
index 0000000..052ab0c
--- /dev/null
+++ b/demo/4.finalize.R
@@ -0,0 +1,71 @@
+# 6.finalize.R
+# Construct the same return object as mhglm.fit.
+# Save the fit object to mhglmfit.rds.
+
+#--------------------
+# combind subspace, precision, 
+# y, group, weights, offset, nobs 
+subspace <- c()
+precision <- c()
+y <- c()
+group <- c()
+weights <- c()
+offset <- c()
+nobs <- 0
+for(dataid in seq_len(nfiles)){
+
+  infofilename <- paste0(datafile_folder,'info.',dataid,".rds")
+  info <- readRDS(infofilename)
+  subspacefile <- paste0(datafile_folder,'subspace.',dataid,'.rds')
+  s <- readRDS(subspacefile)
+
+  subspace <- c(subspace, s$subspace)
+  precision <- c(precision, s$precision)
+  y <- c(y,info$y)
+  group <- c(group,list(info$group))
+  weights <- c(weights, info$weights)
+  offset <- c(offset, info$offset)
+  nobs <- nobs + info$nobs
+}
+group <- unlist(group)
+
+
+#--------------------
+# change back to original coordinates
+mean <- est$mean
+mean.cov <- est$mean.cov
+cov <- est$cov
+
+coef.mean <- rep(NA, nfixed)
+coef.mean.cov <- matrix(NA, nfixed, nfixed)
+coef.cov <- matrix(NA, nrandom, nrandom)
+(coef.mean[attr(R.fixed, "pivot")[r1.fixed]]
+ <- backsolve(R.fixed, mean))
+(coef.mean.cov[attr(R.fixed, "pivot")[r1.fixed],
+ attr(R.fixed, "pivot")[r1.fixed]]
+<- backsolve(R.fixed, t(backsolve(R.fixed, mean.cov))))
+(coef.cov[attr(R.random, "pivot")[r1.random],
+ attr(R.random, "pivot")[r1.random]]
+<- backsolve(R.random, t(backsolve(R.random, cov))))
+
+#--------------------
+# set coordinate names
+names(coef.mean) <- xnames
+dimnames(coef.mean.cov) <- list(xnames, xnames)
+dimnames(coef.cov) <- list(znames, znames)
+
+#--------------------
+# construct the fit object and save to disk
+fit <- list(family = family, coefficient.mean = coef.mean,
+	    coefficient.mean.cov = coef.mean.cov,
+	    coefficient.cov = coef.cov, coefficients = coef,
+	    subspace = subspace, precision = precision,
+	    dispersion = dispersion.tot, df.residual = df.residual.tot,
+	    R = R, rank = rank, rank.fixed = rank.fixed,
+	    rank.random = rank.random, pivot = pivot, y = y, group = group,
+	    prior.weights = weights, offset = offset, nobs = nobs)
+
+
+fitfile <- paste0(datafile_folder,"mhglmfit.rds")
+saveRDS(fit,file = fitfile)
+
diff --git a/demo/demo.R b/demo/demo.R
new file mode 100644
index 0000000..e648e37
--- /dev/null
+++ b/demo/demo.R
@@ -0,0 +1,75 @@
+library(devtools)
+load_all("../R/",export_all = TRUE) # export all functions, for convenience
+
+#----------
+# generate simulation samples; 
+# split data by group and write to separate files
+#----------
+family <- gaussian()
+datafile_folder <- "./"
+source('0.sim.R')
+
+
+#----------
+# The following lines of code demonstrate how to run
+# mhglm.fit on very large dataset.
+#
+# The main idea is to process one trunk of data at 
+# a time, save the necessary information and move on to
+# the next trunk. After one pass of data, combine the 
+# saved information to make final estimate. This is the 
+# classic Map/Reduce method. 
+# 
+# This method requires that data from same group must be 
+# processed together, therefore should be saved in same file.
+# 
+# The detailed procedures are as below.
+# 1. Set parameters. In addition to parameters like control and family, 
+# one also need to specify where the data lives and the filename list.
+#
+# 2. Fit glm to each trunk of data. For each trunk of data, run rdglm.group.fit, 
+# and then compute dispersion.sum, df.residual.tot, Rp, coef, 
+# subspace, precision. Save the above information for each trunk.
+# Once all the data are processed, combine dispersion.sum and df.residual 
+# to estimate the overall dispersion.
+# 
+# 3. Compute coefficient mean and covariance estimates. 
+# - 2.fixed.R: for each trunk of data, compute and save the sufficient statistics for 
+# computing fixef (coefficient mean). Once all the data are processed, combine the 
+# saved statistics to estimate fixef. 
+# - 3. ranef.R: for each trunk of data, compute and save the sufficient statistics for
+# computing ranef covariance matrix. Once all the data are processed, combine the 
+# saved statistics to estimate ranef.cov.
+# - iterate over the above two steps. 
+# 
+# 4. Save the same output as mhglm.fit. Construct the same object as mhglm.fit would return.
+# Save to disk.
+#----------
+
+# 1. Set parameters
+control <- list(diagcov = TRUE, standardize = FALSE)
+family <- gaussian()
+
+# where are the partitioned data files
+datafile_folder <- "./"
+
+# the list of data files to process
+datafile_list <- grep('data',list.files(datafile_folder), value = TRUE)
+
+
+# 2. Fit glm to each trunk of data.
+source('1.glmfit.R')
+
+# 3. Compute coefficient mean and covariance estimates
+iter <- 0
+while(iter < control$steps){
+  source('2.fixef.R')
+  source('3.ranef.R')
+  iter <- iter + 1
+}
+source('2.fixef.R')
+source('3.ranef.R')
+
+# 4. Save the same output as mhglm.fit
+source('4.finalize.R')
+

From 53d7ccde5838ce4496278030b22694131bcb92c6 Mon Sep 17 00:00:00 2001
From: Ningshan Zhang <nzhang@Ningshans-MacBook-Pro.local>
Date: Thu, 9 Jun 2016 18:07:18 -0400
Subject: [PATCH 08/10] use pseudo solve when computing ranef cov in diagcov
 case

---
 R/moment.est.R | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/R/moment.est.R b/R/moment.est.R
index 63d495e..4cecdc4 100644
--- a/R/moment.est.R
+++ b/R/moment.est.R
@@ -31,6 +31,8 @@ moment.est.mean.mapper <- function(coefficients, nfixed, subspace, precision, di
   if (is.null(start.cov))
     start.cov <- diag(nrandom)
 
+
+
   # compute mean estimate and covariance bias correction
   weight11.sum <- matrix(0,  nfixed, nfixed)
   weight1.coef.sum <- matrix(0,  nfixed, 1)
@@ -127,6 +129,7 @@ moment.est.cov.mapper <- function(coefficients, nfixed, subspace, precision, dis
   if (is.null(start.cov))
     start.cov <- diag(nrandom)
 
+
   # compute mean estimate and covariance bias correction
   if(diagcov){
     bias.sum <- matrix(0, nrandom, 1)
@@ -226,7 +229,7 @@ moment.est.cov.reducer <- function(ret, diagcov)
   nrandom <- nrow(wt.bias)
 
   if(diagcov){
-    LHSinv <- solve(wtot2)
+    LHSinv <- pseudo.solve(wtot2)
     diag_1 <- LHSinv %*% wt.cov
     diag_2 <- LHSinv %*% wt.bias
 

From dbf2a3a49e5ad5161f72a373b722dfadd5b8a6d3 Mon Sep 17 00:00:00 2001
From: Ningshan Zhang <nzhang@Ningshans-MacBook-Pro.local>
Date: Thu, 9 Jun 2016 18:08:56 -0400
Subject: [PATCH 09/10] allow useage of || to indicate uncorrelated predictors
 (i.e.set diagcov = TRUE) in mhglm function

---
 R/mhglm.R | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/R/mhglm.R b/R/mhglm.R
index 69f7d8a..e44b516 100644
--- a/R/mhglm.R
+++ b/R/mhglm.R
@@ -103,7 +103,15 @@ mhglm <- function(formula, family = gaussian, data, weights, subset,
     logging::loginfo("Grouping factor and random effect design matrix",
                      logger="mbest.mhglm")
 
+    if(lme4::expandDoubleVerts(formula) != formula){
+      control$diagcov <- TRUE
+      formula <- singlebars(formula)
+    } else {
+      control$diagcov <- FALSE
+    }
+
     bars <- lme4::findbars(formula)
+
     if (length(bars) >= 2L)
         stop("Can specify at most one random effect term")
     if (length(bars) == 1L) {
@@ -603,3 +611,19 @@ print.mhglm <- function(x, digits = max(3L, getOption("digits") - 3L),
 
     cat("\n")
 }
+
+
+singlebars <-  function (term)
+{
+  if (is.name(term) || !is.language(term))
+    return(term)
+  if (length(term) == 2) {
+    term[[2]] <- singlebars(term[[2]])
+    return(term)
+  }
+  stopifnot(length(term) >= 3)
+  if (is.call(term) && term[[1]] == as.name("||"))
+    term[[1]] <- as.name("|")
+  for (j in 2:length(term)) term[[j]] <- singlebars(term[[j]])
+  term
+}

From 30f4f2b618c797b9da7c97bc63872fd8049faed4 Mon Sep 17 00:00:00 2001
From: Ningshan Zhang <nzhang@Ningshans-MBP.home>
Date: Wed, 22 Jun 2016 12:44:41 -0400
Subject: [PATCH 10/10] enable standardize =T for diagonal covariance
 assumption

---
 R/mhglm.fit.R | 70 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 42 insertions(+), 28 deletions(-)

diff --git a/R/mhglm.fit.R b/R/mhglm.fit.R
index 8797dc0..ddd858b 100644
--- a/R/mhglm.fit.R
+++ b/R/mhglm.fit.R
@@ -59,8 +59,8 @@ mhglm.fit <- function(x, z, y, group, weights = rep(1, nobs),
     df.residual.tot <- sum(m$df.residual)
     dispersion <- rep(dispersion.tot, ngroups)
 
-#    if (dispersion.tot == 0)
-#        stop("cannot estimate dispersion (no residual degrees of freedom)")
+    if (dispersion.tot == 0)
+        warning("cannot estimate dispersion (no residual degrees of freedom)")
 
     # compute group-sqecific precision square root (unpivoted)
     Rp <- as.list(rep(NULL, ngroups))
@@ -79,38 +79,52 @@ mhglm.fit <- function(x, z, y, group, weights = rep(1, nobs),
     names(Rp) <- names(m$qr)
 
     # change coordinates so that average precision is identity
-
     if (control$standardize) {
-        # compute averge precision of estimates
-        prec.avg <- matrix(0, nvars, nvars)
-        if(control$parallel) {
+      # compute averge precision of estimates
+
+      if(control$diagcov){
+	# if assuming independent covariates,
+	# standardize each column separately.
+	prec.avg <- rep(0, nvars)
+	for (i in seq_len(ngroups)) {
+	  scale.i <- sqrt(1/dispersion[i])
+	  prec.avg <- prec.avg + apply((scale.i * Rp[[i]])^2, 2,sum)
+	}
+	prec.avg <- diag(prec.avg,nrow = nvars)
+
+      } else {
+
+	prec.avg <- matrix(0, nvars, nvars)
+	if(control$parallel) {
 	  prec.avg <- foreach(i = seq_len(ngroups)) %dopar% {
 	    scale.i <- sqrt(1/dispersion[i])
 	    return(crossprod(scale.i * Rp[[i]]))
 	  }
 	  prec.avg <- Reduce('+', prec.avg)
-        } else {
-            for (i in seq_len(ngroups)) {
-                scale.i <- sqrt(1/dispersion[i])
-                prec.avg <- prec.avg + crossprod(scale.i * Rp[[i]])
-            }
-        }
-        prec.avg <- prec.avg / ngroups
-
-        # cov(coef) = (t(R) R)^{-1} = R^{-1} R^{-T}
-        # cov(R x) = R cov(x) R^T
-        # [cov(R x)]^{-1} = R^{-T} [cov(x)]^{-1} R^{-1}
-        suppressWarnings({
-            R.fixed <- chol(prec.avg[fixed,fixed], pivot = TRUE)
-            R.random <- chol(prec.avg[random,random], pivot = TRUE)
-        })
-
-        #pivot <- attr(R, "pivot")
-        #rank <- attr(R, "rank")
-        #r1.fixed <- seq_len(attr(R.fixed, "rank"))
-        #r1.random <- seq_len(attr(R.random, "rank"))
-        #R.fixed <- R.fixed[r1.fixed,r1.fixed,drop=FALSE]
-        #R.random <- R.random[r1.random,r1.random,drop=FALSE]
+	} else {
+	  for (i in seq_len(ngroups)) {
+	    scale.i <- sqrt(1/dispersion[i])
+	    prec.avg <- prec.avg + crossprod(scale.i * Rp[[i]])
+	  }
+	}
+      }
+
+      prec.avg <- prec.avg / ngroups
+
+      # cov(coef) = (t(R) R)^{-1} = R^{-1} R^{-T}
+      # cov(R x) = R cov(x) R^T
+      # [cov(R x)]^{-1} = R^{-T} [cov(x)]^{-1} R^{-1}
+      suppressWarnings({
+	R.fixed <- chol(prec.avg[fixed,fixed], pivot = TRUE)
+	R.random <- chol(prec.avg[random,random], pivot = TRUE)
+      })
+
+      #pivot <- attr(R, "pivot")
+      #rank <- attr(R, "rank")
+      #r1.fixed <- seq_len(attr(R.fixed, "rank"))
+      #r1.random <- seq_len(attr(R.random, "rank"))
+      #R.fixed <- R.fixed[r1.fixed,r1.fixed,drop=FALSE]
+      #R.random <- R.random[r1.random,r1.random,drop=FALSE]
     }
     else { # control.standardize==FALSE
         R.fixed <- diag(nfixed)