## Tools for building Henderson's Mixed Model Equations

Here we will see how the mixed model equations (HMME) can be built given a data set and a model string. 


In [7]:
include ("../../PedModule.jl/src/PedModule.jl")
using DataFrames



In [184]:
function GaussSeidel(A,x,b;tol=0.000001)
    n = size(x,1)
    for i=1:n
        x[i] = ((b[i] - A[i,:]*x)/A[i,i])[1,1] + x[i]
    end
    diff = sum((A*x-b).^2)
    iter = 0
    while ((diff/n > tol) & (iter<1000))
        iter += 1
        for i=1:n
            x[i] = ((b[i] - A[i,:]*x)/A[i,i])[1,1] + x[i]
        end
        diff = sum((A*x-b).^2)
        println(iter," ",diff/n)
    end
    return x
end

function Jacobi(A,x,b,p;tol=0.000001)
    D       = diag(A)
    res     = A*x
    resid   = b-res
    tempSol = resid./D
    diff    = sum(resid.^2)
    n    = size(A,1)
    iter = 0
    while ((diff/n > tol) & (iter<1000))
        iter += 1
        x = p*tempSol + (1-p)*x
        res     = A*x
        resid   = b-res
        tempSol = resid./D + x
        diff    = sum(resid.^2)
        println(iter," ",diff/n)
    end
    return x
end


type TermStrVal
    str::String
    value::Float64
end

type TermLvlVal
    level::String
    value::Float64
end

type ModelTerm 
    iModel::Int64
    trmStr::String
    nFactors::Int64
    factors::Array{Symbol,1}
    str::Array{String,1}                    # used to store the data for this term as strings
    val::Array{Float64,1}
    startPos::Int64                         # start pos in HMME
    nLevels::Int64                           
    X::SparseMatrixCSC{Float64,Int64}
    names::Array{Any,1}
end

type MME
    modelVec::Array{String,1}
    modelTerms::Array{ModelTerm,1}
    modelTermDict::Dict{String,ModelTerm}
    lhsVec::Array{Symbol,1}
    covVec::Array{Symbol,1}
    pedTrmVec::Array{String,1}
    mmeLhs
    mmeRhs
    ped
    Gi::Array{Float64,2}
    Ri::Array{Float64,2}
    Ai
    mmePos::Int64
end

function mkDict(a)
    aUnique = unique(a)
    d = Dict()
    names = Array(Any,size(aUnique,1))
    for (i,s) in enumerate(aUnique)
        names[i] = s
        d[s] = i
    end
    return d,names
end

function getTerm(trmStr,m)
    trm = ModelTerm(m,string(m)*":"*trmStr,0,[],[],[],0,0,spzeros(0,0),[])
    factorVec = split(trmStr,"*")
    trm.nFactors = length(factorVec)
    trm.factors = [symbol(strip(f)) for f in factorVec]
    return trm
end

function initMME(models::String,R::Array{Float64,2})
    # returns an MME object for muilding the mme corresponding 
    # to the input string
    if models==""
        println("modelEquation is empty\n")
        return
    end
    modelVec = split(models,[';','\n'],false)
    nModels  = size(modelVec,1)
    lhsVec   = Symbol[]
    modelTerms = ModelTerm[]
    dict = Dict{String,ModelTerm}()
    for (m,model) = enumerate(modelVec)
        lhsRhs = split(model,"=")
        lhsVec = [lhsVec,symbol(strip(lhsRhs[1]))]
        rhs = strip(lhsRhs[2])
        rhsVec = split(rhs,"+")    
        mTrms = [getTerm(strip(trmStr),m) for trmStr in rhsVec]
        modelTerms = [modelTerms, mTrms]
        for (i,trm) = enumerate(modelTerms) # this may need to change
            dict[trm.trmStr] = modelTerms[i]
        end 
    end
    Ri = inv(R)
    return MME(modelVec,modelTerms,dict,lhsVec,[],[],0,0,0,Array(Float64,1,1),Ri,0,1)
end 

function getData(trm::ModelTerm,df::DataFrame,mme::MME)
    nObs = size(df,1)
    trm.str = Array(String,nObs)
    trm.val = Array(Float64,nObs)
    if(trm.factors[1] == :intercept)                     # from Melanie's HW
        str = fill(string(trm.factors[1]),nObs)
        val = fill(1.0,nObs)
    else
        myDf = df[trm.factors]
        if trm.factors[1] in mme.covVec
            str = fill(string(trm.factors[1]),nObs)
            val = df[trm.factors[1]]
        else
            str = [string(i) for i in df[trm.factors[1]]]
            val = fill(1.0,nObs)
        end
        for i=2:trm.nFactors
            if trm.factors[i] in mme.covVec
                str = str .* fill("*"*string(trm.factors[i]),nObs)
                val = val .* df[trm.factors[i]]
            else
                str = str .* fill("*",nObs) .* [string(j) for j in df[trm.factors[i]]]
                val = val .* fill(1.0,nObs)
            end
        end
    end
    trm.str = str
    trm.val = val
end

getFactor1(str) = [strip(i) for i in split(str,"*")][1]

function getX(trm,mme::MME)
    pedSize = 0
    nObs  = size(trm.str,1)
    if trm.trmStr in mme.pedTrmVec
        trm.names   = PedModule.getIDs(mme.ped)
        trm.nLevels = length(mme.ped.idMap)
        xj = int([mme.ped.idMap[getFactor1(i)].seqID for i in trm.str])
    else
        dict,trm.names  = mkDict(trm.str)
        trm.nLevels     = length(dict)
        xj    = int([dict[i] for i in trm.str])
    end
    xi    = (trm.iModel-1)*nObs + [1:nObs]
    xv    = trm.val
    if mme.ped!=0
        pedSize = length(mme.ped.idMap)
        if trm.trmStr in mme.pedTrmVec
            # This is to ensure the X matrix for 
            # additive effect has the correct number of columns
            ii = 1         # adding a zero to
            jj = pedSize   # the last column in row 1
            vv = [0.0]
            xi = [xi,ii]
            xj = [xj,jj]
            xv = [xv,vv]
        end
    end 
    #make sure X has nObs*nModels rows
    nModels = size(mme.lhsVec,1)
    xi = [xi,1,nObs*nModels]
    xj = [xj,1,1]
    xv = [xv,0,0]
    trm.X = sparse(xi,xj,xv)
    trm.startPos = mme.mmePos
    mme.mmePos  += trm.nLevels
end

function getMME(mme::MME, df::DataFrame)
    mme.mmePos = 1
    for trm in mme.modelTerms
        getData(trm,df,mme)
        getX(trm,mme)
    end
    n   = size(mme.modelTerms,1)
    trm = mme.modelTerms[1]
    X   = trm.X
    for i=2:n
        trm = mme.modelTerms[i]
        X = [X trm.X]
    end
    y = df[mme.lhsVec[1]]
    for i=2:size(mme.lhsVec,1)
        y    = [y, df[mme.lhsVec[i]] ] 
    end
    N  = size(y,1)
    ii = 1:N
    jj = fill(1,N)
    vv = y
    ySparse = sparse(ii,jj,vv)
    nObs = size(df,1)
    Ri = kron(mme.Ri,speye(nObs))
    mme.mmeLhs = X'Ri*X
    mme.mmeRhs = X'Ri*ySparse
    if mme.ped != 0
        ii,jj,vv = PedModule.HAi(mme.ped)
        HAi = sparse(ii,jj,vv)
        mme.Ai = HAi'HAi
        addA(mme::MME)
    end   
end

function getNames(mme)
    names = Array(String,0)
    for trm in mme.modelTerms
        for name in trm.names
            push!(names,trm.trmStr*": "*name)
        end
    end
    return names
end  

function covList(mme::MME, covStr::String)
    covVec = split(covStr," ",false) 
    mme.covVec = [symbol(i) for i in covVec]
    nothing
end

function getSolJ(mme::MME, df::DataFrame)
    if size(mme.mmeRhs)==() 
        getMME(mme,df)
    end
    p = size(mme.mmeRhs,1)
    return [getNames(mme) Jacobi(mme.mmeLhs,fill(0.0,p),mme.mmeRhs,0.3,tol=0.000001)]
end

function getSolG(mme::MME, df::DataFrame)
    if size(mme.mmeRhs)==() 
        getMME(mme,df)
    end
    p = size(mme.mmeRhs,1)
    return [getNames(mme) GaussSeidel(mme.mmeLhs,fill(0.0,p),mme.mmeRhs,tol=0.000001)]
end

function setAsRandom(mme::MME,randomStr::String,ped::PedModule.Pedigree, G::Array{Float64,2})
    pedTrmVec = split(randomStr," ",false)
    res = []
    for trm in pedTrmVec
        for (m,model) = enumerate(mme.modelVec)
            strVec  = split(model,['=','+'])
            strpVec = [strip(i) for i in strVec]
            if trm in strpVec
                res = [res,string(m)*":"*trm]
            end
        end
    end
    mme.pedTrmVec = res
    mme.ped = ped
    mme.Gi = inv(G)
    nothing
end

function addA(mme::MME)
    pedTrmVec = mme.pedTrmVec
    for (i,trmi) = enumerate(pedTrmVec)
        pedTrmi  = mme.modelTermDict[trmi]
        startPosi  = pedTrmi.startPos
        endPosi    = startPosi + pedTrmi.nLevels - 1
        for (j,trmj) = enumerate(pedTrmVec)
            pedTrmj  = mme.modelTermDict[trmj]
            startPosj  = pedTrmj.startPos
            endPosj    = startPosj + pedTrmj.nLevels - 1  
            mme.mmeLhs[startPosi:endPosi,startPosj:endPosj] = 
            mme.mmeLhs[startPosi:endPosi,startPosj:endPosj] + mme.Ai*mme.Gi[i,j] 
        end
    end
end

addA (generic function with 1 method)

In [142]:
ped = PedModule.mkPed("small.ped")
ped.idMap

Dict{Any,Any} with 5 entries:
  "O1" => PedNode(3,"S1","D1",0.0)
  "S1" => PedNode(1,"0","0",0.0)
  "O3" => PedNode(4,"S1","D1",0.0)
  "D1" => PedNode(2,"0","0",0.0)
  "O2" => PedNode(5,"S1","D1",0.0)

In [9]:
ID = ["S1","D1","O1","O3"]
y1 = [100.0, 50.0, 150.0, 40.0]
y2 = [10.0,  12.9, 13.0,  5.0]

4-element Array{Float64,1}:
 10.0
 12.9
 13.0
  5.0

In [10]:
dfMT = DataFrame(Animal=ID,y1=y1,y2=y2)

Unnamed: 0,Animal,y1,y2
1,S1,100.0,10.0
2,D1,50.0,12.9
3,O1,150.0,13.0
4,O3,40.0,5.0


### Multiple Trait Animal Model 

In [189]:
models = "y1 = intercept + Animal;
          y2 = intercept + Animal"
R = [10 2; 2 1.0]
mme = initMME(models,R)
G0 = [5 1;1 1.0]
setAsRandom(mme,"Animal", ped,G0)

In [190]:
mme.pedTrmVec

2-element Array{String,1}:
 "1:Animal"
 "2:Animal"

In [191]:
getMME(mme,dfMT);

In [192]:
[trm.startPos for trm in mme.modelTerms]

4-element Array{Any,1}:
 1
 2
 7
 8

In [164]:
round(full(mme.mmeLhs),2)

12x12 Array{Float64,2}:
  0.67   0.17   0.17   0.17   0.17  …  -0.33  -0.33  -0.33  -0.33   0.0 
  0.17   0.79   0.38  -0.25  -0.25     -0.96  -0.38   0.25   0.25   0.25
  0.17   0.38   0.79  -0.25  -0.25     -0.38  -0.96   0.25   0.25   0.25
  0.17  -0.25  -0.25   0.67   0.0       0.25   0.25  -0.83   0.0    0.0 
  0.17  -0.25  -0.25   0.0    0.67      0.25   0.25   0.0   -0.83   0.0 
  0.0   -0.25  -0.25   0.0    0.0   …   0.25   0.25   0.0    0.0   -0.5 
 -1.33  -0.33  -0.33  -0.33  -0.33      1.67   1.67   1.67   1.67   0.0 
 -0.33  -0.96  -0.38   0.25   0.25      4.79   1.88  -1.25  -1.25  -1.25
 -0.33  -0.38  -0.96   0.25   0.25      1.88   4.79  -1.25  -1.25  -1.25
 -0.33   0.25   0.25  -0.83   0.0      -1.25  -1.25   4.17   0.0    0.0 
 -0.33   0.25   0.25   0.0   -0.83  …  -1.25  -1.25   0.0    4.17   0.0 
  0.0    0.25   0.25   0.0    0.0      -1.25  -1.25   0.0    0.0    2.5 

In [165]:
getSolG(mme, dfMT)

1 1.2875568344653632
2 0.2528246005613293
3 0.08485177832746348
4 0.04088157239897236
5 0.02268498965332805
6 0.013124767212561242
7 0.007717507963154283
8 0.004598760338486969
9 0.0027832772766863863
10 0.0017157231019855155
11 0.0010797697648282868
12 0.0006948950048624239
13 0.00045770267427933125
14 0.00030856772541401796
15 0.0002127754000971464
16 0.0001498725201304047
17 0.00010764060172580496
18 7.86661708372215e-5
19 5.837400647329841e-5
20 4.3888600044963984e-5
21 3.336781045516102e-5
22 2.5608058414697016e-5
23 1.9807227977380018e-5
24 1.5420186379885268e-5
25 1.2069364507001945e-5
26 9.488504825399561e-6
27 7.486671560918541e-6
28 5.924823660484019e-6
29 4.700297514686299e-6
30 3.736349193358576e-6
31 2.974985523207473e-6
32 2.371967962714919e-6
33 1.893275365695588e-6
34 1.5125620791333358e-6
35 1.209305817196696e-6
36 9.674408469882774e-7


12x2 Array{Any,2}:
 "1:intercept: intercept"  83.8789    
 "1:Animal: S1"             8.33904   
 "1:Animal: D1"            -8.32588   
 "1:Animal: O1"            13.2295    
 "1:Animal: O3"            -8.77054   
 "1:Animal: O2"             0.00658015
 "2:intercept: intercept"  10.8045    
 "2:Animal: S1"            -2.01601   
 "2:Animal: D1"             2.02271   
 "2:Animal: O1"            -1.76437   
 "2:Animal: O3"            -0.564367  
 "2:Animal: O2"             0.00335109

In [166]:
dfMTMat = [dfMT[3:4,:]  DataFrame(mat = ["D1","D1"])]

Unnamed: 0,Animal,y1,y2,mat
1,O1,150.0,13.0,D1
2,O3,40.0,5.0,D1


###  Multiple Trait Model with Direct and Maternal Effects

In [185]:
models = "y1 = intercept + Animal + mat;
          y2 = intercept + Animal"
R = [10 2; 2 1.0]
mme = initMME(models,R)
G0 = [5   1    0.1
      1   1    0.01
      0.1 0.01 0.5]
setAsRandom(mme,"Animal mat", ped,G0)
mme.pedTrmVec

3-element Array{String,1}:
 "1:Animal"
 "2:Animal"
 "1:mat"   

In [186]:
getMME(mme,dfMTMat)
round(full(mme.mmeLhs),2)

17x17 Array{Float64,2}:
  0.33   0.0    0.0    0.17   0.17  …   0.0    0.0   -0.33  -0.33   0.0 
  0.0    0.63   0.38  -0.25  -0.25     -0.63  -0.38   0.25   0.25   0.25
  0.0    0.38   0.63  -0.25  -0.25     -0.38  -0.63   0.25   0.25   0.25
  0.17  -0.25  -0.25   0.67   0.0       0.25   0.25  -0.83   0.0    0.0 
  0.17  -0.25  -0.25   0.0    0.67      0.25   0.25   0.0   -0.83   0.0 
  0.0   -0.25  -0.25   0.0    0.0   …   0.25   0.25   0.0    0.0   -0.5 
  0.0   -0.11  -0.07   0.05   0.05      0.06   0.04  -0.03  -0.03  -0.03
  0.33  -0.07  -0.11   0.21   0.21      0.04   0.06  -0.36  -0.36  -0.03
  0.0    0.05   0.05  -0.09   0.0      -0.03  -0.03   0.05   0.0    0.0 
  0.0    0.05   0.05   0.0   -0.09     -0.03  -0.03   0.0    0.05   0.0 
  0.0    0.05   0.05   0.0    0.0   …  -0.03  -0.03   0.0    0.0    0.05
 -0.67   0.0    0.0   -0.33  -0.33      0.0    0.0    1.67   1.67   0.0 
  0.0   -0.63  -0.38   0.25   0.25      3.13   1.88  -1.25  -1.25  -1.25
  0.0   -0.38  -0.63   0.25

In [187]:
getSolG(mme, dfMTMat)

1 0.30589459550773695
2 0.048850839090560726
3 0.007810301666182477
4 0.0012492796765922309
5 0.0001998614552352077
6 3.1976360836851596e-5
7 5.116124710902228e-6
8 8.185740751630386e-7


17x2 Array{Any,2}:
 "1:intercept: intercept"   94.9882     
 "1:Animal: S1"              3.38982e-15
 "1:Animal: D1"              2.77061e-15
 "1:Animal: O1"             11.0        
 "1:Animal: O3"            -11.0        
 "1:Animal: O2"              3.08021e-15
 "1:mat: S1"                 1.56027e-16
 "1:mat: D1"                 1.97215e-31
 "1:mat: O1"                 0.255      
 "1:mat: O3"                -0.255      
 "1:mat: O2"                 7.80133e-17
 "2:intercept: intercept"    8.99764    
 "2:Animal: S1"              3.14096e-16
 "2:Animal: D1"             -4.3318e-16 
 "2:Animal: O1"             -0.599998   
 "2:Animal: O3"              0.599998   
 "2:Animal: O2"             -5.95421e-17

In [188]:
methods(xtabs)