# Example of Least Squares Estimation

## Function to simulate data for training and validation

In [3]:
using Distributions
function simDat(nObs,nLoci,bMean,bStd,resStd)
    X  = [ones(nObs,1) rand(Binomial(2,0.5),(nObs,nLoci))]
    Xv = [ones(nObs,1) rand(Binomial(2,0.5),(nObs,nLoci))] # for validation
    b = rand(Normal(bMean,bStd),size(X,2))
    y = X*b + rand(Normal(0.0, resStd),nObs)
    return (y,X,Xv,b)
end

simDat (generic function with 1 method)

In [41]:
nObs     = 1000
nLoci    = 5
bMean    = 0.0
bStd     = 0.5
resStd   = 1.0
y,X,Xv,b = simDat(nObs,nLoci,bMean,bStd,resStd);

### Using simulated data for genomic prediction

In [42]:
bHat = inv(X'X)*X'y
sel = 2:size(X,2)
BV   = X[:,sel]*b[sel]
EBV  = X[:,sel]*bHat[sel]
BVv   = Xv[:,sel]*b[sel]
EBVv  = Xv[:,sel]*bHat[sel]
r1 = cor(BV,EBV)
b1 = cov(BV,EBV)/var(EBV)
b2 = cov(BV,EBV)/var(EBV)
r2 = cor(BVv,EBVv)
h2 = var(BV)/var(y)
println("heritability: ",h2)
println("correlations: ", [r1 r2])
println("regressions:  ", [b1 b2])

heritability: 0.646035893973575
correlations: [0.9993554008857737 0.9994346956051486]
regressions:  [1.0034192207277033 1.0034192207277033]
