## Continuation of Tools for building Henderson's Mixed Model Equations

Here we will see how the mixed model equations (HMME) can be built given a data set and a model string. 

We will start by building the "OLS" part of the equations. 

### Adding code for covariance models.

In [22]:
using DataFrames

### A small data set to test the tools

In [23]:
sex   = ["s","s","s","d","d","d"]
breed = ["Angus","Angus","Hereford","Hereford","Angus","Angus"]
age   = [40,38,38,38,40,40]
weight= [110,103,142,100,112,101];

In [24]:
df1 = DataFrame(sex=sex,breed=breed,age=age,weight=weight,y=round(randn(6),3))

Unnamed: 0,sex,breed,age,weight,y
1,s,Angus,40,110,-0.649
2,s,Angus,38,103,1.447
3,s,Hereford,38,142,0.41
4,d,Hereford,38,100,-1.289
5,d,Angus,40,112,-0.206
6,d,Angus,40,101,-1.655


### <font color="red"> types and functions below are same as before.</font>

In [25]:
function mkDict(a)
    aUnique = unique(a)
    d = Dict()
    names = Array(Any,size(aUnique,1))
    for (i,s) in enumerate(aUnique)
        names[i] = s
        d[s] = i
    end
    return d,names
end

function getNames(mme)
    names = Array(AbstractString,0)
    for trm in mme.modelTerms
        for name in trm.names
            push!(names,trm.trmStr*": "*name)
        end
    end
    return names
end  

getNames (generic function with 1 method)

In [1]:
include("../../Module/solver.jl");

### <font color="red"> types and functions below are modifeid to  work with covariates.</font>

In [27]:
type ModelTerm 
    trmStr::AbstractString
    nFactors::Int64
    factors::Array{Symbol,1}
    str::Array{AbstractString,1} #new #levels
    val::Array{Float64,1}        #new
    X::SparseMatrixCSC{Float64,Int64}
    names::Array{Any,1}
end

type MME
    modelEquation::AbstractString
    modelTerms::Array{ModelTerm,1}
    lhs::Symbol
    covVec::Array{Symbol,1}    #new
    mmeLhs
    mmeRhs
    X
end

In [28]:
function getTerm(trmStr)
    trm = ModelTerm(trmStr,0,[],[],[],spzeros(0,0),[])
    if length(trmStr)==1
        trm.nFactors = 1
        trm.factors  = [symbol(strip(trmStr))]
    else
        factorVec = split(trmStr,"*")
        trm.nFactors = length(factorVec)
        trm.factors = [symbol(strip(f)) for f in factorVec]
    end
    return trm
end

function initMME(modelEquation::AbstractString)
    if modelEquation==""
        error("modelEquation is empty\n")
    end
    lhsRhs = split(modelEquation,"=")
    lhs = symbol(strip(lhsRhs[1]))
    rhs = strip(lhsRhs[2])
    rhsVec = split(rhs,"+")    
    modelTerms = [getTerm(strip(trmStr)) for trmStr in rhsVec]
    return MME(modelEquation,modelTerms,lhs,[],0,0,0)
end 

initMME (generic function with 1 method)

In [29]:
function covList(mme::MME, covStr::AbstractString)
    covVec = split(covStr," ",keep=false) 
    mme.covVec = [symbol(i) for i in covVec]
    nothing
end

covList (generic function with 1 method)

In [30]:
function getData(trm::ModelTerm,df::DataFrame,mme::MME) #A*B 
    nObs = size(df,1)                                   # A:factor,B: covariate
    trm.str = Array(AbstractString,nObs)
    trm.val = Array(Float64,nObs)
    
    if trm.factors[1] == :intercept ##modified from Melanie's HW ##new
        str = fill(string(trm.factors[1]),nObs) 
        val = fill(1.0,nObs)
        trm.str = str
        trm.val = val
        return
    end
    
    myDf = df[trm.factors]   
        
    if trm.factors[1] in mme.covVec #factors= ["A","B"]; covVec=["B"]
        str = fill(string(trm.factors[1]),nObs)
        val = df[trm.factors[1]]
    else
        str = [string(i) for i in df[trm.factors[1]]] #["A1","A1","A2"...]
        val = fill(1.0,nObs) # [1,1,1...]
    end
    for i=2:trm.nFactors
        if trm.factors[i] in mme.covVec
            str = str .* fill(" x "*string(trm.factors[i]),nObs) 
            #["A1 X B","A1 X B","A2 X B"...]
            val = val .* df[trm.factors[i]]
            #[1*3.2,1*2.5,1*2.1....]
        else
            str = str .* fill(" x ",nObs) .* [string(j) for j in df[trm.factors[i]]]
            val = val .* fill(1.0,nObs)
        end
    end
    trm.str = str
    trm.val = val
end

getData (generic function with 1 method)

In [31]:
function getX(trm)
    dict,trm.names  = mkDict(trm.str)
    xj    = round(Int64,[dict[i] for i in trm.str])
    xi    = 1:size(trm.str,1)
    trm.X = sparse(xi,xj,trm.val)  
end

getX (generic function with 1 method)

In [32]:
function getMME(mme::MME, df::DataFrame)
    for trm in mme.modelTerms
        getData(trm,df,mme)
        getX(trm)
    end
    n   = size(mme.modelTerms,1)
    trm = mme.modelTerms[1]
    X   = trm.X
    for i=2:n
        trm = mme.modelTerms[i]
        X = [X trm.X]
    end
    y      = convert(Array,df[mme.lhs])#
    mme.mmeLhs = X'X
    mme.mmeRhs = X'y
    mme.X = X
    nothing
end

getMME (generic function with 1 method)

In [33]:
function getSolJ(mme::MME, df::DataFrame)
    if size(mme.mmeRhs)==() 
        getMME(mme,df)
    end
    p = size(mme.mmeRhs,1)
    return [getNames(mme) Jacobi(mme.mmeLhs,fill(0.0,p),mme.mmeRhs,0.3,tol=0.000001)]
end

getSolJ (generic function with 1 method)

In [34]:
function getSolG(mme::MME, df::DataFrame;output=10)
    if size(mme.mmeRhs)==() 
        getMME(mme,df)
    end
    p = size(mme.mmeRhs,1)
    return [getNames(mme) GaussSeidel(mme.mmeLhs,fill(0.0,p),mme.mmeRhs,tol=0.000001,output=output)]
end

getSolG (generic function with 1 method)

### run it !!

In [35]:
mme = initMME("y = sex + breed + breed*weight + weight + sex*weight*age");

In [36]:
covList(mme,"weight age")

In [37]:
resG = getSolG(mme,df1,output=200);

200 2.5166975440012465
400 1.2388866743607556
600 0.6098573376040108
800 0.30015505199895753
1000 0.1477029878071148


In [30]:
full(mme.X)

6x9 Array{Float64,2}:
 1.0  0.0  1.0  0.0  110.0    0.0  110.0  4400.0     0.0
 1.0  0.0  1.0  0.0  103.0    0.0  103.0  3914.0     0.0
 1.0  0.0  0.0  1.0    0.0  142.0  142.0  5396.0     0.0
 0.0  1.0  0.0  1.0    0.0  100.0  100.0     0.0  3800.0
 0.0  1.0  1.0  0.0  112.0    0.0  112.0     0.0  4480.0
 0.0  1.0  1.0  0.0  101.0    0.0  101.0     0.0  4040.0