## Tools for building Henderson's Mixed Model Equations

Here we will see how the mixed model equations (HMME) can be built given a data set and a model string. 

We will start by building the "OLS" part of the equations. 

In [1]:
using DataFrames

### A small data set to test the tools

In [2]:
A = [1,1,1,1,1,2,2,2,2,2]
B = [1,1,2,2,2,1,1,1,2,2]
C = [1,2,1,2,2,1,2,2,1,2];

In [3]:
df1 = DataFrame(A=A,B=B,C=C,y=randn(10))

Unnamed: 0,A,B,C,y
1,1,1,1,-1.785891152275221
2,1,1,2,-1.2194008831830074
3,1,2,1,-0.435663787553561
4,1,2,2,1.7595424301837093
5,1,2,2,0.5510667585704071
6,2,1,1,0.5378586487855133
7,2,1,2,0.4601523490443731
8,2,1,2,0.0355505509260308
9,2,2,1,-0.7610095411188301
10,2,2,2,1.2997947651474415


In [4]:
function Jacobi(A,x,b,p;tol=0.000001)
    D       = diag(A)
    res     = A*x
    resid   = b-res
    tempSol = resid./D
    diff    = sum(resid.^2)
    n    = size(A,1)
    iter = 0
    while ((diff/n > 0.001) & (iter<1000))
        iter += 1
        x = p*tempSol + (1-p)*x
        res     = A*x
        resid   = b-res
        tempSol = resid./D + x
        diff    = sum(resid.^2)
        println(iter," ",diff/n)
    end
    return x
end

function mkDict(a)
    aUnique = unique(a)
    d = Dict()
    names = Array(Any,size(aUnique,1))
    for (i,s) in enumerate(aUnique)
        names[i] = s
        d[s] = i
    end
    return d,names
end

type ModelTerm 
    trmStr::String
    nFactors::Int64
    factors::Array{Symbol,1}
    data::Array{String,1}
    X::SparseMatrixCSC{Float64,Int64}
    names::Array{Any,1}
end

type MME
    modelEquation::String
    modelTerms::Array{ModelTerm,1}
    lhs::Symbol
    mmeLhs
    mmeRhs
end

function getTerm(trmStr)
    trm = ModelTerm(trmStr,0,[],[],spzeros(0,0),[])
    if length(trmStr)==1
        trm.nFactors = 1
        trm.factors  = [symbol(strip(trmStr))]
    else
        factorVec = split(trmStr,"*")
        trm.nFactors = length(factorVec)
        trm.factors = [symbol(strip(f)) for f in factorVec]
    end
    return trm
end

function initMME(modelEquation::String)
    # returns an MME object for muilding the mme corresponding 
    # to the input string
    if modelEquation==""
        println("modelEquation is empty\n")
        return
    end
    lhsRhs = split(modelEquation,"=")
    lhs = symbol(strip(lhsRhs[1]))
    rhs = strip(lhsRhs[2])
    rhsVec = split(rhs,"+")    
    modelTerms = [getTerm(strip(trmStr)) for trmStr in rhsVec]
    return MME(modelEquation,modelTerms,lhs,0,0)
end 

function getData(trm::ModelTerm,df::DataFrame)
    nObs = size(df,1)
    trm.data = Array(String,nObs)
    myDf = df[trm.factors]
    for i=1:nObs
        res = string(myDf[i,trm.nFactors])
        for j = trm.nFactors-1:-1:1
            res = string(myDf[i,j])*"x"*res
        end
        trm.data[i] = res
    end
end

function getX(trm)
    dict,trm.names  = mkDict(trm.data)
    xj    = int([dict[i] for i in trm.data])
    xi    = 1:size(trm.data,1)
    trm.X = sparse(xi,xj,1.0)  
end

function getMME(mme::MME, df::DataFrame)
    for trm in mme.modelTerms
        getData(trm,df)
        getX(trm)
    end
    n   = size(mme.modelTerms,1)
    trm = mme.modelTerms[1]
    X   = trm.X
    for i=2:n
        trm = mme.modelTerms[i]
        X = [X trm.X]
    end
    y      = df[mme.lhs]
    mme.mmeLhs = X'X
    mme.mmeRhs = X'y
end

function getNames(mme)
    names = Array(String,0)
    for trm in mme.modelTerms
        for name in trm.names
            push!(names,trm.trmStr*": "*name)
        end
    end
    return names
end    

getNames (generic function with 1 method)

In [7]:
mme = initMME("y = A + B + A*B*C");

In [8]:
getMME(mme,df1)

12-element Array{Float64,1}:
 -1.13035 
  1.57235 
 -1.97173 
  2.41373 
 -1.78589 
 -1.2194  
 -0.435664
  2.31061 
  0.537859
  0.495703
 -0.76101 
  1.29979 

In [10]:
[getNames(mme) mme.mmeRhs]

12x2 Array{Any,2}:
 "A: 1"          -1.13035 
 "A: 2"           1.57235 
 "B: 1"          -1.97173 
 "B: 2"           2.41373 
 "A*B*C: 1x1x1"  -1.78589 
 "A*B*C: 1x1x2"  -1.2194  
 "A*B*C: 1x2x1"  -0.435664
 "A*B*C: 1x2x2"   2.31061 
 "A*B*C: 2x1x1"   0.537859
 "A*B*C: 2x1x2"   0.495703
 "A*B*C: 2x2x1"  -0.76101 
 "A*B*C: 2x2x2"   1.29979 

### Homework

1. add code to check if any of the model terms are not in the dataFrame
1. xxx

In [20]:
mme1 = initMME("y = A + B + A*B*C");

In [21]:
getMME(mme1,df1)

12-element Array{Float64,1}:
 -1.13035 
  1.57235 
 -1.97173 
  2.41373 
 -1.78589 
 -1.2194  
 -0.435664
  2.31061 
  0.537859
  0.495703
 -0.76101 
  1.29979 

In [25]:
25.20*4 + 0.36*2*454

427.68