## Module for building Henderson's Mixed Model Equations for single trait including marker  and polygenic effect



### <font color="red"> Data</font>

In [2]:
using DataFrames
using Distributions

In [3]:
FILE = "../../data/small.ped";

In [48]:
Animal = ["S1","D1","O1","O3"]
y = [1.09, 2.68, 1.64, -1.48];
df = DataFrame(Animal=Animal,y=y);

In [49]:
srand(123)
d = Binomial(2,0.5)
nObs     = 4
nMarkers = 5
M        = float(rand(d,(nObs,nMarkers)));

In [50]:
df=[df DataFrame(M)]

Unnamed: 0,Animal,y,x1,x2,x3,x4,x5
1,S1,1.09,1.0,0.0,1.0,1.0,1.0
2,D1,2.68,2.0,0.0,2.0,2.0,1.0
3,O1,1.64,1.0,2.0,0.0,1.0,0.0
4,O3,-1.48,0.0,0.0,2.0,1.0,1.0


In [51]:
a        = M*α
stdGen   = std(a)
a        = a/stdGen
y        = a + randn(nObs)
var(y-a)

0.4236157749220397

### <font color="red"> Run module</font>

In [54]:
MODULE_PATH="../../Module/MME.jl";
include(MODULE_PATH);

In [55]:
varRes = 1.0
varGen = 1.0
markerProp = 0.99
MVarGen= varGen*markerProp
PVarGen= varGen*(1-markerProp);

In [56]:
ped = PedModule.mkPed(FILE);

In [57]:
mme = MMEModule.initMME("y = intercept + Animal",varRes);

In [58]:
G = reshape([PVarGen],1,1)
MMEModule.setAsRandom(mme,"Animal",ped,G)

### <font color="red"> work with markers</font>

### add marker information

In [61]:
function addMarkers(mme,df,G::Float64)
    M = convert(Array,df)
    mme.M = MarkerMatrix(M,G)
end

addMarkers (generic function with 1 method)

##### functions for markers

In [62]:
function get_column(X,j)
    nrow,ncol = size(X)
    if j>ncol||j<0
        error("column number is wrong!")
    end
    indx = 1 + (j-1)*nrow
    ptr = pointer(X,indx)
    pointer_to_array(ptr,nrow)
end

function get_column_ref(X)
    ncol = size(X)[2]
    xArray = Array(Array{Float64,1},ncol)
    for i=1:ncol
        xArray[i] = get_column(X,i)
    end
    return xArray
end

function center!(X)
    nrow,ncol = size(X)
    colMeans = mean(X,1)
    BLAS.axpy!(-1,ones(nrow)*colMeans,X)
    return colMeans
end

center! (generic function with 1 method)

##### types for markers

In [63]:
type MarkerMatrix
    X::Array{Float64,2}
    xArray::Array{Array{Float64,1},1}
    markerMeans::Array{Float64,2}
    mean2pq::Float64
    G::Float64
    centered::Bool
    function MarkerMatrix(X::Array{Float64,2},G::Float64)
        markerMeans = center!(X) #centering
        p           = markerMeans/2.0
        mean2pq     = (2*p*(1-p)')[1,1]
        xArray      = get_column_ref(X)
        new(X,xArray,markerMeans,mean2pq,G,true)
    end
end

### variance components

In [64]:
function sampleVariance(x, n, df, scale)
    return (dot(x,x) + df*scale)/rand(Chisq(n+df))
end

sampleVariance (generic function with 1 method)

### Gibbs sampler

In [65]:
function sampleMCMC(nIter,mme,df;outFreq=100)
    if size(mme.mmeRhs)==() 
        MMEModule.getMME(mme,df)
    end
    p = size(mme.mmeRhs,1)
    sol = zeros(p)
    solMean = zeros(p)
    
    vEff=mme.M.G/mme.M.mean2pq 
    vRes=mme.R
    dfEffectVar=4
    nuRes=4
    scaleVar   = vEff*(dfEffectVar-2)/dfEffectVar        # scale factor for locus effects
    scaleRes   = vRes*(nuRes-2)/nuRes 


    nObs,nLoci = size(mme.M.X)
    α  = zeros(Float64,nLoci)
    meanAlpha = zeros(Float64,nLoci)
    mArray = mme.M.xArray
    mpm = [dot(mme.M.X[:,i],mme.M.X[:,i]) for i=1:size(mme.M.X,2)]   
    ycorr = vec(full(mme.ySparse))
    M = mme.M.X
    
    ν = 10
    if mme.ped != 0
        pedTrmVec = mme.pedTrmVec
        k = size(pedTrmVec,1)
        νG0 = ν + k
        G0 = inv(mme.Gi)
        P = G0*(νG0 - k - 1)
        S = zeros(Float64,k,k)
        G0Mean = zeros(Float64,k,k)
    end

    
    for iter=1:nIter
        #sample non-marker part
        ycorr = ycorr + mme.X*sol
        rhs = mme.X'ycorr #

        MMEModule.Gibbs(mme.mmeLhs,sol,rhs,vRes)
        ycorr = ycorr - mme.X*sol

        solMean += (sol - solMean)/iter
        
        #sample marker
        MMEModule.sample_effects_ycorr!(M,mArray,mpm,ycorr,α,meanAlpha,vRes,vEff,iter)

 
        for (i,trmi) = enumerate(pedTrmVec)    
            pedTrmi  = mme.modelTermDict[trmi]
            startPosi  = pedTrmi.startPos
            endPosi    = startPosi + pedTrmi.nLevels - 1
            for (j,trmj) = enumerate(pedTrmVec)
                pedTrmj  = mme.modelTermDict[trmj]
                startPosj  = pedTrmj.startPos
                endPosj    = startPosj + pedTrmj.nLevels - 1
                S[i,j] = (sol[startPosi:endPosi]'*mme.Ai*sol[startPosj:endPosj])[1,1]
            end
        end
        
        if mme.ped != 0
            pedTrm1 = mme.modelTermDict[pedTrmVec[1]]
            q = pedTrm1.nLevels
            G0 = rand(InverseWishart(νG0 + q, P + S)) #ν+q?
            mme.Gi = inv(G0)
            MMEModule.addA(mme)
        end
       
        
        vRes = sampleVariance(ycorr, nObs, nuRes, scaleRes)
        vEff = sampleVariance(α, nLoci, dfEffectVar, scaleVar)


        if iter%outFreq==0
            println("at sample: ",iter)
        end
    end
    output = Dict()
    output["posteriorMeanLocationParms"] = [MMEModule.getNames(mme) solMean]
    output["posteriorMeanMarkerEffects"] = meanAlpha
    return output
end

sampleMCMC (generic function with 1 method)

### run it

In [66]:
addMarkers(mme,df[:,3:7],MVarGen);

In [70]:
output=sampleMCMC(100000,mme,df,outFreq=10000)

at sample: 10000
at sample: 20000
at sample: 30000
at sample: 40000
at sample: 50000

Dict{Any,Any} with 2 entries:
  "posteriorMeanLocationP… => 6x2 Array{Any,2}:…
  "posteriorMeanMarkerEff… => [0.8980368198994362,0.10394292492512452,-0.274417…


