## Tools for building Henderson's Mixed Model Equations

Use of Gibbs sampler to compute posterior mean of effects and variance components


In [1]:
using PedModule
using DataFrames
using Distributions

##### Code from MTBVModelsGibbs is in MTModule now. Only new code is shown below.

In [2]:
include("../../MTModule/MME.jl")
using MTModule

##### type MME is modified to keep member X and ySparse.

In [3]:
type MME
    modelVec::Array{AbstractString,1}
    modelTerms::Array{MTModule.ModelTerm,1}
    modelTermDict::Dict{AbstractString,MTModule.ModelTerm}
    lhsVec::Array{Symbol,1}
    covVec::Array{Symbol,1}
    pedTrmVec::Array{AbstractString,1}
    X
    ySparse
    mmeLhs
    mmeRhs
    ped
    Gi::Array{Float64,2}
    R::Array{Float64,2}
    Ai
    mmePos::Int64
end

LoadError: LoadError: UndefVarError: ModelTerm not defined
while loading In[3], in expression starting on line 1

In [None]:
ped = PedModule.mkPed("../../data/sim.ped");

In [None]:
dfGen = readtable("../../data/sim.gen", separator = ' ');

In [None]:
Q = convert(Array{Float64,2},dfGen[:,collect(2:end)]);
α1 = randn(200)
α2 = randn(200)
a1 = Q*α1
a2 = Q*α2;

In [None]:
G0=cov([a1 a2])

In [None]:
L  = chol(G0)
e  = L*randn(2,size(Q,1))
y = [a1 a2] + e';

In [None]:
df2 = DataFrame(Animal = dfGen[:,1], y1=round(y[:,1],3),y2=round(y[:,2],3));

In [None]:
head(df2)

In [None]:
models = "y1 = intercept + Animal;
          y2 = intercept + Animal"
R = G0
mme = MTModule.initMME(models,R)
MTModule.setAsRandom(mme,"Animal", ped,G0)

In [None]:
function sampleMCMC(nIter,mme,df;outFreq=100)
    getMME(mme,df2)
    p = size(mme.mmeLhs,1)
    sol = fill(0.0,p)
    solMean = fill(0.0,p)
    GaussSeidel(mme.mmeLhs,sol,mme.mmeRhs,tol=0.000001) 
    ν = 10
    nObs    = size(df,1)
    nTraits = size(mme.lhsVec,1)
    νR0 = ν + nTraits
    R0 = mme.R
    PRes = R0*(νR0 - nTraits - 1)
    SRes   = zeros(Float64,nTraits,nTraits)
    R0Mean = zeros(Float64,nTraits,nTraits)
    if mme.ped != 0
        pedTrmVec = mme.pedTrmVec
        k = size(pedTrmVec,1)
        νG0 = ν + k
        G0 = inv(mme.Gi)
        P = G0*(νG0 - k - 1)
        S = zeros(Float64,k,k)
        G0Mean = zeros(Float64,k,k)
    end
    for iter=1:nIter
        if iter%outFreq==0
            println("at sample: ",iter)
            println(G0Mean)
        end
        Gibbs(mme.mmeLhs,sol,mme.mmeRhs)
        # can make this more efficient by taking advantage of symmetry
        for (i,trmi) = enumerate(pedTrmVec)    
            pedTrmi  = mme.modelTermDict[trmi]
            startPosi  = pedTrmi.startPos
            endPosi    = startPosi + pedTrmi.nLevels - 1
            for (j,trmj) = enumerate(pedTrmVec)
                pedTrmj  = mme.modelTermDict[trmj]
                startPosj  = pedTrmj.startPos
                endPosj    = startPosj + pedTrmj.nLevels - 1
                S[i,j] = (sol[startPosi:endPosi]'*mme.Ai*sol[startPosj:endPosj])[1,1]
            end
        end
        resVec = mme.ySparse - mme.X*sol
        for traiti = 1:nTraits
            startPosi = (traiti-1)*nObs + 1
            endPosi   = startPosi + nObs - 1
            for traitj = traiti:nTraits
                startPosj = (traitj-1)*nObs + 1
                endPosj   = startPosj + nObs - 1
                SRes[traiti,traitj] = (resVec[startPosi:endPosi]'resVec[startPosj:endPosj])[1,1] 
                SRes[traiti,traitj] = SRes[traitj,traiti]
            end
        end
        R0 = rand(InverseWishart(νR0 + nObs, PRes + SRes))
        mme.R = R0
        Ri = mkRi(mme,df)
        X = mme.X
        mme.mmeLhs = X'Ri*X
        mme.mmeRhs = X'Ri*mme.ySparse
        if mme.ped != 0
            pedTrm1 = mme.modelTermDict[pedTrmVec[1]]
            q = pedTrm1.nLevels
            G0 = rand(InverseWishart(νG0 + q, P + S))
            mme.Gi = inv(G0)
            addA(mme)
        end
        solMean += (sol - solMean)/iter
        G0Mean  += (G0  - G0Mean )/iter
        R0Mean  += (R0  - R0Mean )/iter
    end
    output = Dict()
    output["posteriorMeanLocationParms"] = solMean
    output["posteriorMeanG0"] = G0Mean
    output["posteriorMeanR0"] = R0Mean
    return output
end

In [None]:
@time res = sampleMCMC(2000,mme,df2,outFreq=1)
res["posteriorMeanG0"]

In [None]:
res["posteriorMeanR0"]

In [None]:
var([a1 a2],1)

In [None]:
var(e',1)

In [None]:
var([a1 a2],1) + var(e',1)

In [None]:
S = zeros(Float64,2,2)
for (i,trmi) = enumerate(pedTrmVec)
    pedTrmi  = mme.modelTermDict[trmi]
    startPosi  = pedTrmi.startPos
    endPosi    = startPosi + pedTrmi.nLevels - 1
    for (j,trmj) = enumerate(pedTrmVec)
        pedTrmj  = mme.modelTermDict[trmj]
        startPosj  = pedTrmj.startPos
        endPosj    = startPosj + pedTrmj.nLevels - 1
        S[i,j] = (sol[startPosi:endPosi]'*mme.Ai*sol[startPosj:endPosj])[1,1]
    end
end

ν = 10
P = G0*(ν - 2 - 1)
rand(InverseWishart(ν + 4200, P + S))