# PedModule from JWAS

I have slightly modified it for the workshop. 

#### Need to install ProgressMeter package

Uncomment the next two lines the first time you use this notebook. 

In [1]:
#using Pkg
#Pkg.add("ProgressMeter")

In [32]:
using DataFrames,CSV
using SparseArrays
using ProgressMeter
using Printf

In [3]:
mutable struct PedNode
    seqID::Int64
    sire::String
    dam::String
    f::Float64
end

mutable struct Pedigree
    currentID::Int64
    idMap::Dict{AbstractString,PedNode}
    aij::Dict{Int64, Float64}
    setNG::Set
    setG::Set
end

In [4]:
function code!(ped::Pedigree,id::AbstractString)
# The idea for this function came from a perl script by Bernt Guldbrandtsen
    if ped.idMap[id].seqID!=0
        return
    end
    sireID = ped.idMap[id].sire
    damID  = ped.idMap[id].dam
    if sireID!="0" && ped.idMap[sireID].seqID==0
        code!(ped,sireID)
    end
    if damID!="0" && ped.idMap[damID].seqID==0
        code!(ped,damID)
    end
    ped.idMap[id].seqID = ped.currentID
    ped.currentID += 1
end

code! (generic function with 1 method)

In [5]:
function fillMap!(ped::Pedigree,df)
    n = size(df,1)
    for i in df[2] 
        if i!="0" && !haskey(ped.idMap,i)          # skip if 0 or already done
            ped.idMap[i]=PedNode(0,"0","0",-1.0)
        end
    end
    for i in df[3]
        if i!="0" && !haskey(ped.idMap,i)         # same for dams
            ped.idMap[i]=PedNode(0,"0","0",-1.0)
        end
    end
    j=1
    for i in df[1]
        ped.idMap[i]=PedNode(0,df[j,2],df[j,3],-1.0)
        j+=1
    end
end

fillMap! (generic function with 1 method)

In [6]:
myDict = Dict{Int64,String}()

Dict{Int64,String} with 0 entries

In [7]:
myDict[1] = "Rohan"

"Rohan"

In [8]:
haskey(myDict,2)

false

In [33]:
function calcAddRel!(ped::Pedigree,id1::AbstractString,id2::AbstractString)
    @printf "calcRel between %s and %s \n" id1 id2
    if id1=="0" || id2=="0"           
        return 0.0
    end
    old,yng = ped.idMap[id1].seqID < ped.idMap[id2].seqID ? (id1,id2) : (id2,id1)
    oldID = ped.idMap[old].seqID
    yngID = ped.idMap[yng].seqID

    n = yngID - 1                     # if aleady done, return stored value
    aijKey = n*(n+1)/2 + oldID
    if haskey(ped.aij,aijKey)
        return ped.aij[aijKey]
    end

    sireOfYng = ped.idMap[yng].sire
    damOfYng  = ped.idMap[yng].dam

    if old==yng                       # aii
        aii = 1.0 + 0.5*calcAddRel!(ped,sireOfYng,damOfYng)
        ped.aij[aijKey] = aii
        return (aii)
    end

    aOldDamYoung  = (old=="0" || damOfYng =="0") ? 0.0 : calcAddRel!(ped,old,damOfYng)
    aOldSireYoung = (old=="0" || sireOfYng=="0") ? 0.0 : calcAddRel!(ped,old,sireOfYng)
    aijVal = 0.5*(aOldSireYoung + aOldDamYoung)
    ped.aij[aijKey] = aijVal

    return aijVal
end

calcAddRel! (generic function with 1 method)

In [10]:
x = 7
y = 4
old,young = x < y ? (x,y) : (y,x)

(4, 7)

In [11]:
x = 1
y = 4
old,young = x < y ? (x,y) : (y,x)

(1, 4)

In [38]:
function calcInbreeding!(ped::Pedigree,id::AbstractString)
    @printf "calcInbreeding for: %s \n" id
    if ped.idMap[id].f > -1.0
        return ped.idMap[id].f
    end
    sireID = ped.idMap[id].sire
    damID  = ped.idMap[id].dam
    if (sireID=="0" || damID=="0" )
        ped.idMap[id].f = 0.0
    else
        ped.idMap[id].f = 0.5*calcAddRel!(ped,sireID,damID)
    end
end

calcInbreeding! (generic function with 1 method)

In [13]:
function AInverse(ped::Pedigree)
    ii,jj,vv = HAi(ped)
    hAi      = sparse(ii,jj,vv)
    Ai       = hAi'hAi
    return Ai
end

function HAi(ped::Pedigree)
    ii = Int64[]
    jj = Int64[]
    vv = Float64[]
    for ind in keys(ped.idMap)
        sire = ped.idMap[ind].sire
        dam  = ped.idMap[ind].dam
        sirePos = sire=="0" ? 0 : ped.idMap[sire].seqID
        damPos  = dam =="0" ? 0 : ped.idMap[dam ].seqID
        myPos   = ped.idMap[ind].seqID
        if sirePos>0 && damPos>0
            d = sqrt(4.0/(2 - ped.idMap[sire].f - ped.idMap[dam].f))
            push!(ii,myPos)
            push!(jj,sirePos)
            push!(vv,-0.5*d)
            push!(ii,myPos)
            push!(jj,damPos)
            push!(vv,-0.5*d)
            push!(ii,myPos)
            push!(jj,myPos)
            push!(vv,d)
         elseif sirePos>0
            d = sqrt(4.0/(3 - ped.idMap[sire].f))
            push!(ii,myPos)
            push!(jj,sirePos)
            push!(vv,-0.5*d)
            push!(ii,myPos)
            push!(jj,myPos)
            push!(vv,d)
          elseif damPos>0
            d = sqrt(4.0/(3 - ped.idMap[dam].f))
            push!(ii,myPos)
            push!(jj,damPos)
            push!(vv,-0.5*d)
            push!(ii,myPos)
            push!(jj,myPos)
            push!(vv,d)
        else
            d = 1.0
            push!(ii,myPos)
            push!(jj,myPos)
            push!(vv,d)
        end
    end
    return (ii,jj,vv)
end

HAi (generic function with 1 method)

In [14]:
function  mkPed(pedFile::AbstractString;header=false,separator=',')
    df  = CSV.read(pedFile,types=[String,String,String],
                    delim=separator,header=header)
    ped = Pedigree(1,Dict{AbstractString,PedNode}(),
                     Dict{Int64, Float64}(),
                     Set(),Set())
    fillMap!(ped,df)
    @showprogress "coding pedigree... " for id in keys(ped.idMap)
     code!(ped,id)
    end
    @showprogress "calculating inbreeding... " for id in keys(ped.idMap)
      calcInbreeding!(ped,id)
    end
    println("Finished!")
    return ped
end

mkPed (generic function with 1 method)

In [15]:
function getIDs(ped::Pedigree)
    n = length(ped.idMap)
    ids = Array{String}(undef,n)
    for i in ped.idMap
      ids[i[2].seqID] = i[1]
    end
    return ids
end

getIDs (generic function with 1 method)

In [49]:
a = Array{Int64}(undef,2,3)

2×3 Array{Int64,2}:
 0  0  1
 1  1  1

In [17]:
b = fill(25,2,3)

2×3 Array{Int64,2}:
 25  25  25
 25  25  25

In [39]:
ped1 = mkPed("ped1",separator=' ');

calcInbreeding for: A 
calcInbreeding for: C 
calcRel between A and B1 
calcInbreeding for: B1 
calcInbreeding for: D 
calcRel between A and B1 
calcInbreeding for: E 
calcRel between C and D 
calcRel between C and B1 
calcRel between B1 and B1 
calcRel between 0 and 0 
calcRel between B1 and A 
calcRel between C and A 
calcRel between A and B1 
calcRel between A and A 
calcRel between 0 and 0 
calcInbreeding for: F 
calcRel between A and D 
calcRel between A and B1 
calcRel between A and A 
Finished!


In [36]:
ped1.idMap

Dict{AbstractString,PedNode} with 6 entries:
  "A"  => PedNode(1, "0", "0", 0.0)
  "C"  => PedNode(3, "A", "B1", 0.0)
  "B1" => PedNode(2, "0", "0", 0.0)
  "D"  => PedNode(4, "A", "B1", 0.0)
  "E"  => PedNode(5, "C", "D", 0.25)
  "F"  => PedNode(6, "A", "D", 0.25)

In [37]:
[id for id in keys(ped1.idMap)]

6-element Array{String,1}:
 "A" 
 "C" 
 "B1"
 "D" 
 "E" 
 "F" 

In [26]:
getIDs(ped1)

6-element Array{String,1}:
 "A" 
 "B1"
 "C" 
 "D" 
 "E" 
 "F" 

In [42]:
AInverse(ped1)

6×6 SparseMatrixCSC{Float64,Int64} with 26 stored entries:
  [1, 1]  =  2.5
  [2, 1]  =  1.0
  [3, 1]  =  -1.0
  [4, 1]  =  -0.5
  [6, 1]  =  -1.0
  [1, 2]  =  1.0
  [2, 2]  =  2.0
  [3, 2]  =  -1.0
  [4, 2]  =  -1.0
  [1, 3]  =  -1.0
  [2, 3]  =  -1.0
  [3, 3]  =  2.5
  [4, 3]  =  0.5
  [5, 3]  =  -1.0
  [1, 4]  =  -0.5
  [2, 4]  =  -1.0
  [3, 4]  =  0.5
  [4, 4]  =  3.0
  [5, 4]  =  -1.0
  [6, 4]  =  -1.0
  [3, 5]  =  -1.0
  [4, 5]  =  -1.0
  [5, 5]  =  2.0
  [1, 6]  =  -1.0
  [4, 6]  =  -1.0
  [6, 6]  =  2.0

In [59]:
Ai = Matrix(AInverse(ped1))
A  = inv(Ai)
[getIDs(ped1) round.(A,digits=3)]

6×7 Array{Any,2}:
 "A"   1.0   0.0   0.5   0.5   0.5    0.75 
 "B1"  0.0   1.0   0.5   0.5   0.5    0.25 
 "C"   0.5   0.5   1.0   0.5   0.75   0.5  
 "D"   0.5   0.5   0.5   1.0   0.75   0.75 
 "E"   0.5   0.5   0.75  0.75  1.25   0.625
 "F"   0.75  0.25  0.5   0.75  0.625  1.25 

In [60]:
q = [0.5;0;0;0.5;0;0]

6-element Array{Float64,1}:
 0.5
 0.0
 0.0
 0.5
 0.0
 0.0

In [56]:
ii = [1;2;3;4]
jj = [1;2;3;1]
vv = [1;2;3;5]
sm = sparse(ii,jj,vv)

4×3 SparseMatrixCSC{Int64,Int64} with 4 stored entries:
  [1, 1]  =  1
  [4, 1]  =  5
  [2, 2]  =  2
  [3, 3]  =  3

In [57]:
Matrix(sm)

4×3 Array{Int64,2}:
 1  0  0
 0  2  0
 0  0  3
 5  0  0