## Mixed Model Equations(MME) from Data and Model

Here, we will see how the MME can be constructed from the Data in a DataFrame and a model given as a string. In the previous [notebook](3.6.DesignMatrix.ipynb), we constructed the left-hand-side (LHS) and the right-hand-side (RHS)  of the normal equations for a fixed-effects model. In order to construct the MME for a mixed model, we need to identify the random effects in the model and also determine the row and column positions for these random effects in the LHS of the MME. In order to store this information needed to construct the MME, we will use the following stuctures (composite types). 

### Structures for Building MME

In [1]:
using DataFrames, SparseArrays, LinearAlgebra, Random, JWAS, CSV

In [2]:
mutable struct ModelTerm
    trmString::AbstractString
    startPos::Int64
    endPos::Int64
    randomType::String
    var::Float64
end

mutable struct MME
    modelEquation::AbstractString           #"y = A + B + A*B"
    covVec::Array{String}                   #["age","age*age"]
    modelTermVec::Array{ModelTerm,1}        #[modelTerm("A") , modelTerm("A*B")]
    modelTermDict::Dict{AbstractString,ModelTerm} 
    depVar::Symbol                          #:y 
    X::SparseMatrixCSC{Float64,Int64}       #design matrix
    mmeLhs::SparseMatrixCSC{Float64,Int64}  #left-hand side for MME
    mmeRhs::Array{Float64,1}                #right-hand side for MME
    mmeSol::Array{Float64,1}                #a solution of the MME 
    varRes::Float64                         
end  
  

### Functions for Building MME

In [18]:
function ModelTerm(str::AbstractString)
    ModelTerm(str,0,0,"fixed",1.0)
end

ModelTerm

In [4]:
function initMME(modelEquation::AbstractString,varRes)  # "y = A + A*B"
    if modelEquation==""
        error("modelEquation is empty\n")
    end
    leftRight  = split(modelEquation,"=")       # "y", "A+A*B"
    depVar     = Symbol(strip(leftRight[1]))    # :y
    modelParts = strip(leftRight[2])            #"A+A*B" 
    termsVec   = split(modelParts,"+")          #"A","A*B"
    modelTermVec = [ModelTerm(strip(trmStr)) for trmStr in termsVec]
    modelTermDict = Dict{AbstractString,ModelTerm}()
    for i in modelTermVec
        modelTermDict[i.trmString] = i
    end
    covVec = []
    X = spzeros(0,0)
    mmeLhs = spzeros(0,0)
    mmeRhs = []
    mmeSol = []
    return MME(modelEquation,covVec,modelTermVec,modelTermDict,depVar,X,mmeLhs,mmeRhs,mmeSol,varRes)
end 

initMME (generic function with 1 method)

In [5]:
varRes = 1.0
mme = initMME("y = sex + breed + age",varRes)
mme.covVec = ["age"];

In [6]:
mme.modelEquation

"y = sex + breed + age"

In [7]:
mme.modelTermVec

3-element Array{ModelTerm,1}:
 ModelTerm("sex", 0, 0, "", 1.0)  
 ModelTerm("breed", 0, 0, "", 1.0)
 ModelTerm("age", 0, 0, "", 1.0)  

In [8]:
mme.depVar

:y

In [9]:
# This function returns a dictionary with the unique values in the vector "a" as the keys and their 
# sequential numbers as the associated values
# It also returns vector with the keys in sequential order. 
function mkDict(a)
  aUnique = unique(a)
  d = Dict()
  names = Array{String}(undef,size(aUnique,1))
  for (i,s) in enumerate(aUnique)
    names[i] = s
    d[s] = i
  end
  return d,names
end

function getX(modelTerm,covariables,df)
    n = size(df,1)
    if modelTerm == "intercept"
        X = ones(n,1)
        colNames = ["intercept"]
        return X,colNames
    end
    factors = strip.(split(modelTerm,"*"))
    covs = [i in covariables for i in factors]
    
    if covs[1] == false
        str = df[:,Symbol(factors[1])]
        val = 1.0
    else
        str = fill(factors[1],n) 
        val = df[:,Symbol(factors[1])]    
    end       

    for i in 2:length(factors)
        if covs[i] == false
            str = str .*" x ".*df[:,Symbol(factors[i])]
            val = val .* 1.0 
        else
            str = str .*" x ".*fill(factors[i],n) 
            val = val .* df[:,Symbol(factors[i])]    
        end 
    end 
    dict,colNames   = mkDict(str)
    ii = 1:n                     # row numbers 
    jj = [dict[i] for i in str]  # column numbers
    X  = sparse(ii,jj,val)
    return X, strip(modelTerm)*": ".*colNames   
end

getX (generic function with 1 method)

In [10]:
function getLhsRhs(mme,df)
    X,colNames = getX(mme.modelTermVec[1].trmString,mme.covVec,df)
    mme.modelTermVec[1].startPos = 1
    mme.modelTermVec[1].endPos  = mme.modelTermVec[1].startPos + size(X,2) - 1
    for i = 2:size(mme.modelTermVec,1)
        Xi,namesi = getX(mme.modelTermVec[i].trmString,mme.covVec,df)
        X = [X Xi]
        mme.modelTermVec[i].startPos = mme.modelTermVec[i-1].endPos + 1
        mme.modelTermVec[i].endPos   = mme.modelTermVec[i].startPos + size(Xi,2) - 1
        colNames = [colNames; namesi]
    end
    y = df[:,mme.depVar]
    return X'X,X'y,colNames
end

getLhsRhs (generic function with 1 method)

In [11]:
Random.seed!(31415)
animal= ["animal1","animal2","animal3","animal4","animal5","animal6"]
sex   = ["m","f","f","m","f","f"]
breed = ["Angus","Angus","Hereford","Hereford","Angus","Angus"]
age   = [40,36,38,42,40,36]
df    = DataFrame(animal=animal,sex=sex,breed=breed,age=age,y=round.(randn(6),digits=3))

Unnamed: 0_level_0,animal,sex,breed,age,y
Unnamed: 0_level_1,String,String,String,Int64,Float64
1,animal1,m,Angus,40,-0.859
2,animal2,f,Angus,36,0.631
3,animal3,f,Hereford,38,1.775
4,animal4,m,Hereford,42,-0.17
5,animal5,f,Angus,40,-0.561
6,animal6,f,Angus,36,0.214


In [12]:
mme = initMME("y = intercept + sex + breed + age",varRes)
mme.covVec = ["age"];
lhs,rhs,names = getLhsRhs(mme,df)
[names Matrix(lhs) rhs]

6×8 Array{Any,2}:
 "intercept"          6.0   2.0    4.0    4.0   2.0   232.0   1.03 
 "sex: m"             2.0   2.0    0.0    1.0   1.0    82.0  -1.029
 "sex: f"             4.0   0.0    4.0    3.0   1.0   150.0   2.059
 "breed: Angus"       4.0   1.0    3.0    4.0   0.0   152.0  -0.575
 "breed: Hereford"    2.0   1.0    1.0    0.0   2.0    80.0   1.605
 "age: age"         232.0  82.0  150.0  152.0  80.0  9000.0  33.93 

In [13]:
mme.modelTermVec

4-element Array{ModelTerm,1}:
 ModelTerm("intercept", 1, 1, "", 1.0)
 ModelTerm("sex", 2, 3, "", 1.0)      
 ModelTerm("breed", 4, 5, "", 1.0)    
 ModelTerm("age", 6, 6, "", 1.0)      

In [14]:
pedigree   = get_pedigree("pedFile",separator=",",header=false);

[32mThe delimiter in pedFile is ','.[39m


[32mcoding pedigree... 100%|████████████████████████████████| Time: 0:00:00[39m
[32mcalculating inbreeding... 100%|█████████████████████████| Time: 0:00:00[39m


Finished!


In [15]:
data = CSV.read("twoTraitMaternal.phen")

Unnamed: 0_level_0,Ind,Mat,y1,y2
Unnamed: 0_level_1,Int64,Int64,Float64,Float64
1,3,2,8.9,11.9
2,4,2,9.7,10.8
3,5,4,8.8,11.9


In [19]:
varRes = 1.0
mme = initMME("y1 = intercept + Ind",varRes);

MME("y1 = intercept + Ind", String[], ModelTerm[ModelTerm("intercept", 0, 0, "fixed", 1.0), ModelTerm("Ind", 0, 0, "fixed", 1.0)], Dict{AbstractString,ModelTerm}("intercept"=>ModelTerm("intercept", 0, 0, "fixed", 1.0),"Ind"=>ModelTerm("Ind", 0, 0, "fixed", 1.0)), :y1, 0×0 SparseMatrixCSC{Float64,Int64} with 0 stored entries, 0×0 SparseMatrixCSC{Float64,Int64} with 0 stored entries, Float64[], Float64[], 1.0)

In [21]:
mme.modelTermVec

2-element Array{ModelTerm,1}:
 ModelTerm("intercept", 0, 0, "fixed", 1.0)
 ModelTerm("Ind", 0, 0, "fixed", 1.0)      

In [26]:
function setRandom(mme::MME,trmString::String,var::Float64)
    modelTerm = mme.modelTermDict[trmString]
    modelTerm.randomType = "I"
    modelTerm.var = var
end

setRandom (generic function with 2 methods)

In [29]:
setRandom(mme,"Ind",1.5)

1.5

In [30]:
mme.modelTermVec

2-element Array{ModelTerm,1}:
 ModelTerm("intercept", 0, 0, "fixed", 1.0)
 ModelTerm("Ind", 0, 0, "I", 1.5)          