## Continuation of Tools for building Henderson's Mixed Model Equations

Here we will see how the mixed model equations (HMME) can be built given a data set and a model string. 

We will start by building the "OLS" part of the equations. 

### Adding code for covariance models.

In [1]:
using DataFrames

### A small data set to test the tools

In [2]:
inter = fill(1,10)
A = [1,1,1,1,1,2,2,2,2,2]
B = [1,1,2,2,2,1,1,1,2,2]
C = [1,2,1,2,2,1,2,2,1,2];

In [19]:
df1 = DataFrame(intercept=inter,A=A,B=B,X=randn(10),W=randn(10),y=randn(10))

Unnamed: 0,intercept,A,B,X,W,y
1,1,1,1,0.7189426021040723,-0.0833683992471847,-0.9909367209555008
2,1,1,1,-1.5861267731463162,0.7203198283160005,0.5550645574551433
3,1,1,2,-0.3333275147150664,-0.3278045958269871,1.6493156428673463
4,1,1,2,0.2728451113346467,0.6201457739646946,-0.6028398192493416
5,1,1,2,-0.5973884875071214,-0.6775460473339241,-0.0436692865656575
6,1,2,1,0.4336774052952167,1.0783861739656544,0.1739452818377391
7,1,2,1,-0.7370558238853403,1.1234927496113367,-1.2117031075813884
8,1,2,1,-0.3745559244493643,-0.6866300829840629,0.6019501570207837
9,1,2,2,0.3444360114324716,-0.3348760699486519,-0.2333409788912871
10,1,2,2,-0.2098536898641819,-0.1243157473935887,-0.9801875371062198


In [4]:
function GaussSeidel(A,x,b;tol=0.000001)
    n = size(x,1)
    iter = 0
    diff = 1.0
    while ((diff/n > tol) & (iter<1000))
        iter += 1
        for i=1:n
            x[i] = ((b[i] - A[i,:]*x)/A[i,i])[1,1] + x[i]
        end
        diff = sum((A*x-b).^2)
        println(iter," ",diff/n)
    end
    return x
end

function Jacobi(A,x,b,p;tol=0.000001)
    D       = diag(A)
    res     = A*x
    resid   = b-res
    tempSol = resid./D
    diff    = sum(resid.^2)
    n    = size(A,1)
    iter = 0
    while ((diff/n > tol) & (iter<1000))
        iter += 1
        x = p*tempSol + (1-p)*x
        res     = A*x
        resid   = b-res
        tempSol = resid./D + x
        diff    = sum(resid.^2)
        println(iter," ",diff/n)
    end
    return x
end

type ModelTerm 
    trmStr::String
    nFactors::Int64
    factors::Array{Symbol,1}
    str::Array{String,1}
    val::Array{Float64,1}
    X::SparseMatrixCSC{Float64,Int64}
    names::Array{Any,1}
end

type MME
    modelEquation::String
    modelTerms::Array{ModelTerm,1}
    lhs::Symbol
    covVec::Array{Symbol,1}
    mmeLhs
    mmeRhs
    X
end

function mkDict(a)
    aUnique = unique(a)
    d = Dict()
    names = Array(Any,size(aUnique,1))
    for (i,s) in enumerate(aUnique)
        names[i] = s
        d[s] = i
    end
    return d,names
end

function getTerm(trmStr)
    trm = ModelTerm(trmStr,0,[],[],[],spzeros(0,0),[])
    if length(trmStr)==1
        trm.nFactors = 1
        trm.factors  = [symbol(strip(trmStr))]
    else
        factorVec = split(trmStr,"*")
        trm.nFactors = length(factorVec)
        trm.factors = [symbol(strip(f)) for f in factorVec]
    end
    return trm
end

function initMME(modelEquation::String)
    # returns an MME object for muilding the mme corresponding 
    # to the input string
    if modelEquation==""
        println("modelEquation is empty\n")
        return
    end
    lhsRhs = split(modelEquation,"=")
    lhs = symbol(strip(lhsRhs[1]))
    rhs = strip(lhsRhs[2])
    rhsVec = split(rhs,"+")    
    modelTerms = [getTerm(strip(trmStr)) for trmStr in rhsVec]
    return MME(modelEquation,modelTerms,lhs,[],0,0,0)
end 

function getData(trm::ModelTerm,df::DataFrame,mme::MME)
    nObs = size(df,1)
    trm.str = Array(String,nObs)
    trm.val = Array(Float64,nObs)
    myDf = df[trm.factors]
    if trm.factors[1] in mme.covVec
        str = fill(string(trm.factors[1]),nObs)
        val = df[trm.factors[1]]
    else
        str = [string(i) for i in df[trm.factors[1]]]
        val = fill(1.0,nObs)
    end
    for i=2:trm.nFactors
        if trm.factors[i] in mme.covVec
            str = str .* fill("x"*string(trm.factors[i]),nObs)
            val = val .* df[trm.factors[i]]
        else
            str = str .* fill("x",nObs) .* [string(j) for j in df[trm.factors[i]]]
            val = val .* fill(1.0,nObs)
        end
    end
    trm.str = str
    trm.val = val
end

function getX(trm)
    dict,trm.names  = mkDict(trm.str)
    xj    = int([dict[i] for i in trm.str])
    xi    = 1:size(trm.str,1)
    trm.X = sparse(xi,xj,trm.val)  
end

function getMME(mme::MME, df::DataFrame)
    for trm in mme.modelTerms
        getData(trm,df,mme)
        getX(trm)
    end
    n   = size(mme.modelTerms,1)
    trm = mme.modelTerms[1]
    X   = trm.X
    for i=2:n
        trm = mme.modelTerms[i]
        X = [X trm.X]
    end
    y      = df[mme.lhs]
    mme.mmeLhs = X'X
    mme.mmeRhs = X'y
    mme.X = X
end

function getNames(mme)
    names = Array(String,0)
    for trm in mme.modelTerms
        for name in trm.names
            push!(names,trm.trmStr*": "*name)
        end
    end
    return names
end  

function covList(mme::MME, covStr::String)
    covVec = split(covStr," ",false) 
    mme.covVec = [symbol(i) for i in covVec]
    nothing
end

function getSolJ(mme::MME, df::DataFrame)
    if size(mme.mmeRhs)==() 
        getMME(mme,df)
    end
    p = size(mme.mmeRhs,1)
    return [getNames(mme) Jacobi(mme.mmeLhs,fill(0.0,p),mme.mmeRhs,0.3,tol=0.000001)]
end

function getSolG(mme::MME, df::DataFrame)
    if size(mme.mmeRhs)==() 
        getMME(mme,df)
    end
    p = size(mme.mmeRhs,1)
    return [getNames(mme) GaussSeidel(mme.mmeLhs,fill(0.0,p),mme.mmeRhs,tol=0.000001)]
end

getSolG (generic function with 1 method)

In [34]:
mme = initMME("y = intercept + A + A*X + W + A*X*W")
covList(mme,"X W")
nothing

In [35]:
resG = getSolG(mme,df1)

1 0.21454965983221413
2 0.032588650559486815
3 0.011963209295140997
4 0.004253631832754928
5 0.0015122530666900474
6 0.0005377790294499956
7 0.0001910252123092954
8 6.774654625370356e-5
9 2.3988749803011822e-5
10 8.48271383034087e-6
11 2.996156913449167e-6
12 1.057272192574704e-6
13 3.728034892696454e-7


8x2 Array{Any,2}:
 "intercept: 1"  -0.176094 
 "A: 1"           0.0728728
 "A: 2"          -0.0728728
 "A*X: 1xX"      -0.869703 
 "A*X: 2xX"      -0.0684133
 "W: W"          -0.210396 
 "A*X*W: 1xXxW"   0.365859 
 "A*X*W: 2xXxW"   1.13714  

In [36]:
round(full(mme.X),2)

10x8 Array{Float64,2}:
 1.0  1.0  0.0   0.72   0.0   -0.08  -0.06   0.0 
 1.0  1.0  0.0  -1.59   0.0    0.72  -1.14   0.0 
 1.0  1.0  0.0  -0.33   0.0   -0.33   0.11   0.0 
 1.0  1.0  0.0   0.27   0.0    0.62   0.17   0.0 
 1.0  1.0  0.0  -0.6    0.0   -0.68   0.4    0.0 
 1.0  0.0  1.0   0.0    0.43   1.08   0.0    0.47
 1.0  0.0  1.0   0.0   -0.74   1.12   0.0   -0.83
 1.0  0.0  1.0   0.0   -0.37  -0.69   0.0    0.26
 1.0  0.0  1.0   0.0    0.34  -0.33   0.0   -0.12
 1.0  0.0  1.0   0.0   -0.21  -0.12   0.0    0.03

In [32]:
round(full(mme.mmeLhs),2)

7x7 Array{Float64,2}:
 10.0    5.0    5.0   -1.53  -0.54   1.31  -0.71
  5.0    5.0    0.0   -1.53   0.0    0.25  -0.52
  5.0    0.0    5.0    0.0   -0.54   1.06  -0.19
 -1.53  -1.53   0.0    3.58   0.0   -0.52   1.54
 -0.54   0.0   -0.54   0.0    1.03  -0.19   0.67
  1.31   0.25   1.06  -0.52  -0.19   4.5   -1.59
 -0.71  -0.52  -0.19   1.54   0.67  -1.59   2.5 

In [8]:
mme = initMME("y = intercept + A + A*C")
covList(mme,"C")
nothing

In [37]:
@profile resG = getSolG(mme,df1)

1 0.21454965983221413
2 0.032588650559486815
3 0.011963209295140997
4 0.004253631832754928
5 0.0015122530666900474
6 0.0005377790294499956
7 0.0001910252123092954
8 6.774654625370356e-5
9 2.3988749803011822e-5
10 8.48271383034087e-6
11 2.996156913449167e-6
12 1.057272192574704e-6
13 3.728034892696454e-7


8x2 Array{Any,2}:
 "intercept: 1"  -0.176094 
 "A: 1"           0.0728728
 "A: 2"          -0.0728728
 "A*X: 1xX"      -0.869703 
 "A*X: 2xX"      -0.0684133
 "W: W"          -0.210396 
 "A*X*W: 1xXxW"   0.365859 
 "A*X*W: 2xXxW"   1.13714  

In [38]:
Profile.print()

4 task.jl; anonymous; line: 340
 4 ...3/IJulia/src/IJulia.jl; eventloop; line: 123
  4 ...src/execute_request.jl; execute_request_0x535c5df2; line: 157
   4 loading.jl; include_string; line: 97
    4 profile.jl; anonymous; line: 14
     3 In[4]; getSolG; line: 170
      1 ...a/lib/julia/sys.dylib; typeinf_ext; (unknown line)
       1 ...a/lib/julia/sys.dylib; typeinf; (unknown line)
        1 .../lib/julia/sys.dylib; inlining_pass; (unknown line)
         1 .../lib/julia/sys.dylib; inlining_pass; (unknown line)
          1 ...lib/julia/sys.dylib; inlining_pass; (unknown line)
           1 ...lib/julia/sys.dylib; inlining_pass; (unknown line)
            1 ...lib/julia/sys.dylib; inlining_pass; (unknown line)
             1 ...ib/julia/sys.dylib; inlineable; (unknown line)
              1 ...ib/julia/sys.dylib; effect_free; (unknown line)
               1 ...b/julia/sys.dylib; effect_free; (unknown line)
                1 ...b/julia/sys.dylib; effect_free; (unknown line)
               

In [18]:
round(full(mme.X),3)

10x5 Array{Float64,2}:
 1.0  1.0  0.0   0.72    0.0  
 1.0  1.0  0.0  -1.03    0.0  
 1.0  1.0  0.0   1.148   0.0  
 1.0  1.0  0.0   2.996   0.0  
 1.0  1.0  0.0   0.75    0.0  
 1.0  0.0  1.0   0.0    -1.395
 1.0  0.0  1.0   0.0     0.923
 1.0  0.0  1.0   0.0    -0.695
 1.0  0.0  1.0   0.0     0.29 
 1.0  0.0  1.0   0.0    -2.72 

In [10]:
round(full(mme.mmeLhs),2)

5x5 Array{Float64,2}:
 10.0   5.0    5.0   4.58  -3.6 
  5.0   5.0    0.0   4.58   0.0 
  5.0   0.0    5.0   0.0   -3.6 
  4.58  4.58   0.0  12.43   0.0 
 -3.6   0.0   -3.6   0.0   10.76