# Pedigree Module of JWAS

#### The exercises here will be used to introduce concepts and features used in the Pedigree Module of JWAS


In this exercise, you will be exposed to:

1. DataFrames
1. Loops
1. Composite types
1. Dictionaries
1. Recursive functions
1. Sparse matricies

In [1]:
using CSV
using SparseArrays
using Printf

mutable struct PedNode
    seqID::Int64
    sire::String
    dam::String
    f::Float64
end

mutable struct Pedigree
    currentID::Int64
    idMap::Dict{AbstractString,PedNode}
    aij::Dict{Int64, Float64}
end

#### 1. Read in the data from "ped1WithoutHeader.txt" into a DataFrame called ``df``

In [2]:
df  = CSV.read("ped1.txt",types=[String,String,String], delim=' ', header=false)

Unnamed: 0_level_0,Column1,Column2,Column3
Unnamed: 0_level_1,String,String,String
1,F,A,D
2,C,A,B1
3,D,A,B1
4,E,C,D


#### 2. Intialize ``ped`` as an instance of the composite type Pedigree with 1 for the currentID and  empty Dictionaries of the appropriate types for idMap and aij

#### Answer:

In [3]:
ped = Pedigree(1,Dict{AbstractString,PedNode}(),
                     Dict{Int64, Float64}())

Pedigree(1, Dict{AbstractString,PedNode}(), Dict{Int64,Float64}())

In [4]:
function fillMap!(ped::Pedigree,df)
    for i in df[:,2] 
        if i!="0" && !haskey(ped.idMap,i)          # skip if 0 or already done
            ped.idMap[i]=PedNode(0,"0","0",-1.0)
        end
    end
    for i in df[:,3]
        if i!="0" && !haskey(ped.idMap,i)         # same for dams
            ped.idMap[i]=PedNode(0,"0","0",-1.0)
        end
    end
    j=1
    for i in df[:,1]
        ped.idMap[i]=PedNode(0,df[j,2],df[j,3],-1.0)
        j+=1
    end
end

fillMap! (generic function with 1 method)

#### 3. Run fillMap!(ped::Pedigree,df) to populate idMap.

In [5]:
fillMap!(ped,df)

#### 4. Examine values stored in ped.idMap

In [6]:
ped.idMap

Dict{AbstractString,PedNode} with 6 entries:
  "A"  => PedNode(0, "0", "0", -1.0)
  "C"  => PedNode(0, "A", "B1", -1.0)
  "B1" => PedNode(0, "0", "0", -1.0)
  "D"  => PedNode(0, "A", "B1", -1.0)
  "E"  => PedNode(0, "C", "D", -1.0)
  "F"  => PedNode(0, "A", "D", -1.0)

#### A couple of print statements have been added to code! to show how the recursion progresses.

In [7]:
function code!(ped::Pedigree,id::AbstractString)
# The idea for this function came from a perl script by Bernt Guldbrandtsen
    println("assign sequential number for $id with parents $(ped.idMap[id].sire) and $(ped.idMap[id].dam)")
    if ped.idMap[id].seqID!=0
        return
    end
    sireID = ped.idMap[id].sire
    damID  = ped.idMap[id].dam
    if sireID!="0" && ped.idMap[sireID].seqID==0
        code!(ped,sireID)
    end
    if damID!="0" && ped.idMap[damID].seqID==0
        code!(ped,damID)
    end
    ped.idMap[id].seqID = ped.currentID
    println("assigned $(ped.currentID) to $id")
    ped.currentID += 1
    nothing
end

code! (generic function with 1 method)

#### 5. Use the function code! to assign a sequential number for "E"

Examine ped.idMap after calling code!

In [8]:
code!(ped,"E")

assign sequential number for E with parents C and D
assign sequential number for C with parents A and B1
assign sequential number for A with parents 0 and 0
assigned 1 to A
assign sequential number for B1 with parents 0 and 0
assigned 2 to B1
assigned 3 to C
assign sequential number for D with parents A and B1
assigned 4 to D
assigned 5 to E


#### 6. Use the function code! to assign a sequential number for "F"

Examine ped.idMap after calling code!

In [9]:
code!(ped,"F")

assign sequential number for F with parents A and D
assigned 6 to F


#### 7. Use the function code! to assign a sequential number for "A"

Examine ped.idMap after calling code!

In [10]:
code!(ped,"A")

assign sequential number for A with parents 0 and 0


In [25]:
ped.idMap

Dict{AbstractString,PedNode} with 6 entries:
  "A"  => PedNode(1, "0", "0", -1.0)
  "C"  => PedNode(3, "A", "B1", -1.0)
  "B1" => PedNode(2, "0", "0", -1.0)
  "D"  => PedNode(4, "A", "B1", -1.0)
  "E"  => PedNode(5, "C", "D", -1.0)
  "F"  => PedNode(6, "A", "D", -1.0)

In [11]:
function calcAddRel!(ped::Pedigree,id1::AbstractString,id2::AbstractString)
    @printf "calcRel between %s and %s \n" id1 id2
    if id1=="0" || id2=="0"           
        return 0.0
    end
    old,yng = ped.idMap[id1].seqID < ped.idMap[id2].seqID ? (id1,id2) : (id2,id1)
    oldID = ped.idMap[old].seqID
    yngID = ped.idMap[yng].seqID

    n = yngID - 1                     
    aijKey = n*(n+1)/2 + oldID        # this is the key used in ped.aij to store the additive  relationship coefficient
    if haskey(ped.aij,aijKey)         # if aleady done, return stored value
        return ped.aij[aijKey]
    end

    sireOfYng = ped.idMap[yng].sire
    damOfYng  = ped.idMap[yng].dam

    if old==yng                       # aii
        aii = 1.0 + 0.5*calcAddRel!(ped,sireOfYng,damOfYng)
        ped.aij[aijKey] = aii
        return (aii)
    end

    aOldDamYoung  = (old=="0" || damOfYng =="0") ? 0.0 : calcAddRel!(ped,old,damOfYng)
    aOldSireYoung = (old=="0" || sireOfYng=="0") ? 0.0 : calcAddRel!(ped,old,sireOfYng)
    aijVal = 0.5*(aOldSireYoung + aOldDamYoung)
    ped.aij[aijKey] = aijVal

    return aijVal
end


function calcInbreeding!(ped::Pedigree,id::AbstractString)
    @printf "calcInbreeding for: %s \n" id
    if ped.idMap[id].f > -1.0
        return ped.idMap[id].f
    end
    sireID = ped.idMap[id].sire
    damID  = ped.idMap[id].dam
    if (sireID=="0" || damID=="0" ) # founders have an inbreeding coeff. of zero 
        ped.idMap[id].f = 0.0
    else
        ped.idMap[id].f = 0.5*calcAddRel!(ped,sireID,damID)
    end
end

calcInbreeding! (generic function with 1 method)

#### 8. Use the function calcInbreeding! to compute the inbreeding of "F"

Examine ``ped.idMap`` and ``ped.aij`` before and after calling calcInbreeding! 

In [31]:
calcInbreeding!(ped,"F")

calcInbreeding for: F 
calcRel between A and D 
calcRel between A and B1 
calcRel between A and A 
calcRel between 0 and 0 


0.25

#### 9. Add more print statements to calcAddRel! to more closely follow the recursion

Reinitialize ``ped`` and repeat inbreeding calculation for "F"

#### 10. Run the code in the next cell to compute inbreeding for all members of the pedigree

In [12]:
for id in keys(ped.idMap)
    calcInbreeding!(ped,id)
end

calcInbreeding for: A 
calcInbreeding for: C 
calcRel between A and B1 
calcInbreeding for: B1 
calcInbreeding for: D 
calcRel between A and B1 
calcInbreeding for: E 
calcRel between C and D 
calcRel between C and B1 
calcRel between B1 and B1 
calcRel between 0 and 0 
calcRel between B1 and A 
calcRel between C and A 
calcRel between A and B1 
calcRel between A and A 
calcRel between 0 and 0 
calcInbreeding for: F 
calcRel between A and D 
calcRel between A and B1 
calcRel between A and A 


#### Run the code in the next three cells.

You should see the contributions from individual "F" to the inverse of the additive relationship matrix. The code below assumes both parents are either known or both are unknown. 

In [16]:
ind = "F"
ii = Int64[]
jj = Int64[]
vv = Float64[]
sire = ped.idMap[ind].sire
dam  = ped.idMap[ind].dam
sirePos = sire=="0" ? 0 : ped.idMap[sire].seqID
damPos  = dam =="0" ? 0 : ped.idMap[dam ].seqID
myPos   = ped.idMap[ind].seqID
if sirePos>0 && damPos>0
    d = 4.0/(2 - ped.idMap[sire].f - ped.idMap[dam].f)
    # sireXsire
    push!(ii,sirePos)
    push!(jj,sirePos)
    push!(vv,0.25*d)
    
    # damXdam
    push!(ii,damPos)
    push!(jj,damPos)
    push!(vv,0.25*d)
    
    # sireXdam
    push!(ii,sirePos)
    push!(jj,damPos)
    push!(vv,0.25*d)
    
    # damXsire
    push!(ii,damPos)
    push!(jj,sirePos)
    push!(vv,0.25*d)
    
    # sireXind
    push!(ii,sirePos)
    push!(jj,myPos)
    push!(vv,-0.5*d)
    
    # indXsire
    push!(ii,myPos)
    push!(jj,sirePos)
    push!(vv,-0.5*d)
    
    # damXind
    push!(ii,damPos)
    push!(jj,myPos)
    push!(vv,-0.5*d)
    
    # indXdam
    push!(ii,myPos)
    push!(jj,damPos)
    push!(vv,-0.5*d) 
    # 
    push!(ii,myPos)
    push!(jj,myPos)
    push!(vv,d)
else
    d = 1.0
    push!(ii,myPos)
    push!(jj,myPos)
    push!(vv,d)
end
nothing

In [None]:
sparseAi = sparse(ii,jj,vv)

In [None]:
Matrix(sparseAi)

In [None]:
function toAi!(ind,ped,ii,jj,vv)
    sire = ped.idMap[ind].sire
    dam  = ped.idMap[ind].dam
    sirePos = sire=="0" ? 0 : ped.idMap[sire].seqID
    damPos  = dam =="0" ? 0 : ped.idMap[dam ].seqID
    myPos   = ped.idMap[ind].seqID
    
    
    
    nothing
end

#### 11. Complete the function in the cell above such that the code in the next cell two cells would give the inverse of the additive relationship matrix

In [14]:
ii = Int64[]
jj = Int64[]
vv = Float64[]
for ind in keys(ped.idMap)
    toAi!(ind,ped,ii,jj,vv)
end

In [None]:
sparseAi = sparse(ii,jj,vv);

In [None]:
Matrix(sparseAi)