### construct H matrix, relationship matrix for both genotyped and non-genotyped individuals

* Input:
 * genotype file (missing value denoted as NA) without centering 
 * pedigree file (all animals in genotype files are in pedigree file)

* output
 * relationship matrix for both genotyped and non-genotyped individuals

In [None]:
##Data in Box/Data

In [1]:
using JWAS:SSBR,misc,PedModule
using DataFrames,CSV

In [2]:
PATH="./";

#### 1. get genotype file and pedigree file ready 

* remove header in pedigree file and change seperator to ' '

In [8]:
pedfile=PATH*"Trial16and20_Pedigree.csv"
temp=CSV.read(pedfile);
CSV.write(PATH*"pedfile.txt",temp,header=false,delim=' ');

* genotype,remove header, repalce missing value with column means

In [9]:
genofile=PATH*"AllPigs_not_centered_SingleStep.txt";

In [None]:
temp=readtable(genofile,header=true,separator=' ');
#temp=CSV.read(genofile,header=true,delim=' ',null="NA");

In [None]:
writetable(PATH*"genofile.txt",temp,header=false);

In [None]:
ncol= size(temp,2)
etv = Array(DataType,ncol)
fill!(etv,Float64)
etv[1]=String;

In [None]:
temp=readtable(PATH*"genofile.txt",header=false,separator=',',eltypes=etv);

> functions to repalce missing values with column mean 

In [None]:
function missing2mean!(X::DataFrames.DataFrame;id4row=true)
    nrow,ncol = size(X)
    start = 1
    if id4row==true
      start+=1
    end
    for i=start:ncol
        index=find(x->typeof(x)==Missings.Missing,X[:,i])
        cols = collect(1:nrow)
        deleteat!(cols,index)
        #X[index,i]=round(Int,mean(X[cols,i]))
        X[index,i]=mean(X[cols,i])
    end
end

In [None]:
typeof(temp[1,2])==Missings.Missing

In [None]:
missing2mean!(temp);

In [None]:
writetable(PATH*"genofile.txt",temp,header=false,separator=' ');

#### 2. construct H matrix

In [None]:
num  =SSBR.Numbers(0,0,0,0,0,0,0);

In [None]:
genofile=PATH*"genofile.txt"
geno = misc.make_genotypes(genofile,header=false,center=true);

In [None]:
fieldnames(geno)

In [None]:
ped,amats =SSBR.calc_Ai(PATH*"pedfile.txt",geno,num);

In [None]:
mmats = SSBR.make_MMats(geno,num,amats,ped);

In [None]:
M = mmats.full
H = M*M'/geno.sum2pq;

In [None]:
res=inv(full(amats.nn));

In [None]:
H[1:num.pedn,1:num.pedn]=H[1:num.pedn,1:num.pedn]+res;

In [None]:
writedlm("Hmatrix.txt",H)

In [None]:
mean(diag(H))

In [None]:
size(H,1)

In [None]:
IDs=PedModule.getIDs(ped);

In [None]:
sum(IDs.== IDs)

In [None]:
out=convert(DataFrame, H);

In [None]:
H

In [None]:
temp=hcat(IDs,H)
header=["H_matrix"; IDs]
out=vcat(reshape(header,1,length(header)),temp);

In [None]:
writedlm("Hmatrix.txt",out)

In [None]:
# find missing ID
a=Set(pedid)
b=Set(genoid)
c=intersect(a, b)
setdiff(b,c)