In [1]:
##################################################################
# Public functions
##################################################################
#' Multivariate Normal Density 
#' 
#' Calculates mutlivariate normal densities. This is different to the dmvnorm function 
#' in the mvtnorm package in that it takes a matrix for both x and mean. It then calculates 
#' a vector of densities according to dmvnorm(x[i,],mean[i,],sigma,log = FALSE).
#' To aid computation the mahalanobis distances are calculated in parallel using mclapply.
#' @importFrom parallel mclapply
#' @param x A matrix of values
#' @param mean A matrix of means
#' @param sigma A covariance matrix
#' @param log Boolean for whether we want log densities or not
#' @details My own implementation of the multivariate normal density function
#' for increased efficiency for application in this package
#' because there are so many repeated calls to densitymvnorm using a 
#' given sigma matrix it made sense to have one that could take a matrix 
#' of means as well as a matrix of x's and treat them as 
#' paired, returning the density of x[1,] given mean[1,], x[2,] 
#' given mean[2,] also stops repeated inversions of the matrix sigma, and
#' calculates the densities in parallel
#' @return A vector of densities
#' @export
densityMvNorm = function (x, mean, sigma, log = FALSE) 
{
  ## takes a matrix of means rather than a single vector
  if (missing(sigma)) sigma = diag(ncol(x))
  
  if (NCOL(x) != NCOL(sigma)) {
    stop("x and sigma have non-conforming size")
  }
  if (!isSymmetric(sigma, tol = sqrt(.Machine$double.eps), 
                   check.attributes = FALSE)) {
    stop("sigma must be a symmetric matrix")
  }
  if (NCOL(mean) != NROW(sigma)) {
    stop("mean and sigma have non-conforming size")
  }
  ## invert matrix before hand so only do this once
  prec = solve(sigma)
  means = lapply(1:dim(mean)[1],function(i){mean[i,]})
  distval = do.call(rbind, 
                     mclapply(means, 
                              mahalanobis,x = x, cov = prec,inverted = TRUE))
  logdet = sum(log(eigen(sigma, symmetric = TRUE, only.values = TRUE)$values))
  logretval = -(ncol(x) * log(2 * pi) + logdet + distval)/2
  
  if (log) 
    return(logretval)
  else 
    return(exp(logretval))
}

In [83]:
Estep <- function(data, G, para){
  # Your Code
  # Return the n-by-G probability matrix
    n = dim(data)[1]
    prob = matrix(0,n,G)
    
    A = t(densityMvNorm(faithful,t(para0$mean),para0$Sigma))
    
    for (x in 1:n){
        
        prob[x,1] = A[x,1]/(A[x,1] + A[x,2])
        prob[x,2] = A[x,2]/(A[x,1] + A[x,2])
    }
    
    
    return (prob)
  }

Mstep <- function(data, G, para, post.prob){ 
  # Your Code
  # Return the updated parameters
    
    para$prob[1] = mean(post.prob[,1])
    para$prob[2] = 1 - para$prob[1]
    
    para$mean[1,1] = sum(post.prob[,1]*data[,1])/sum(post.prob[,1])
    para$mean[1,2] = sum(post.prob[,2]*data[,1])/sum(post.prob[,2])
    para$mean[2,1] = sum(post.prob[,1]*data[,2])/sum(post.prob[,1])
    para$mean[2,2] = sum(post.prob[,2]*data[,2])/sum(post.prob[,2])
    
    para$Sigma[1,1] = sqrt(sum(post.prob[,1]*(data[,1]^2))/sum(post.prob[,1])-(para$mean[1,1])^2)
    para$Sigma[1,2] = sqrt(sum(post.prob[,2]*(data[,1]^2))/sum(post.prob[,2])-(para$mean[1,2])^2)
    para$Sigma[2,1] = sqrt(sum(post.prob[,1]*(data[,2]^2))/sum(post.prob[,1])-(para$mean[2,1])^2)
    para$Sigma[2,2] = sqrt(sum(post.prob[,2]*(data[,2]^2))/sum(post.prob[,2])-(para$mean[2,2])^2)
    
    return(para)
    
    
    
  }

myEM <- function(data, itmax, G, para){
  # itmax: num of iterations
  # G:     num of components
  # para:  list of parameters (prob, mean, Sigma)
  for(t in 1:itmax){
    post.prob <- Estep(data, G, para)
    para <- Mstep(data, G, para, post.prob)
  }
  return(para)
}

In [84]:
myEM(data=faithful, itmax=10, G=2, para=para0)

0,1,2
eruptions,3.476344,3.499221
waiting,70.675984,71.118114

Unnamed: 0,eruptions,waiting
eruptions,1.142794,1.135622
waiting,13.603982,13.532245


In [70]:
x = Estep(faithful,2,para0)

In [76]:
sum(x[,2]*faithful[,1])/sum((x[,2]))

In [80]:
sqrt(sum(x[,1]*(faithful[,1]^2))/sum(x[,1])-(3.475)^2)

In [None]:
a = matrix(4,4,4)

In [None]:
a

In [None]:
densityMvNorm(faithful,)

In [None]:
a

In [44]:
library(mclust)
library(parallel)
dim(faithful)

In [45]:
faithful

eruptions,waiting
3.600,79
1.800,54
3.333,74
2.283,62
4.533,85
2.883,55
4.700,88
3.600,85
1.950,51
4.350,85


In [46]:
library(mclust)
dim(faithful)

In [47]:
n <- nrow(faithful)

In [64]:
set.seed(234)  # replace 234 by the last 4-dig of your University ID
Z <- matrix(0, n, 2) 
Z[sample(1:n, 120), 1] <- 1 
Z[, 2] <- 1 - Z[, 1]
ini0 <- mstep(modelName="EEE", faithful , Z)$parameters

In [65]:
para0 <- list(prob = ini0$pro, 
              mean = ini0$mean, 
              Sigma = ini0$variance$Sigma)
para0

0,1,2
eruptions,3.475,3.497875
waiting,70.65,71.092105

Unnamed: 0,eruptions,waiting
eruptions,1.29781,13.92393
waiting,13.92393,184.09563


In [79]:
para0$Sigma[1,1]

In [51]:
t(densityMvNorm(faithful,t(para0$mean),para0$Sigma))

0,1
0.011615652,0.012103748
0.007846028,0.007669177
0.016712034,0.017124050
0.010717542,0.010735605
0.013534373,0.014014828
0.005962381,0.005593383
0.010307940,0.010778877
0.002058848,0.002219558
0.008283514,0.007908938
0.012383241,0.012924538


In [36]:
t(para0$mean[,1])

eruptions,waiting
3.475,70.65


In [9]:
install.packages("parallel")

"package 'parallel' is a base package, and should not be updated"

In [None]:
dmnorm(faithful[3,], para0$mean[,1], para0$Sigma)

In [53]:
Estep(faithful,2,para0)

0,1
0.4897110,0.5102890
0.5056993,0.4943007
0.4939116,0.5060884
0.4995790,0.5004210
0.4912800,0.5087200
0.5159659,0.4840341
0.4888334,0.5111666
0.4812184,0.5187816
0.5115664,0.4884336
0.4893057,0.5106943


In [None]:
faithful[1,1]

In [57]:
x = c(2,2 : 3,4)

In [58]:
x

In [59]:
faithful$waiting

In [61]:
densityMvNorm(faithful$waiting,60,2)

ERROR: Error in UseMethod("isSymmetric"): no applicable method for 'isSymmetric' applied to an object of class "c('double', 'numeric')"
