## Brief Description of the Pedigree Module

In [3]:
using DataFrames  

### Pedigree node type:
Data for pedigree member will be stored in a pedigree node. The "seqIDs" PedNodes will be assigend integer values going from 1 to n, the number of pedigree members, such that ancestors will have lower IDs than descendents. The string ID af the parents will be stored in "sire" and "dam". If the individual is a founder, "sire" and "dam" will contain "0" and "0". The inbreeding coefficient of an individual is stored in "f".     

In [2]:
type PedNode
    seqID::Int64                                              
    sire::UTF8String
    dam::UTF8String
    f::Float64                           
end

### The Pedigree type:
The pedigree nodes will be stored in "idMap", which is an associative array that can be used to store and retrieve key-values pairs. An associative array in Julia has type "Dict". The key in idMap is the stringID of the individual, and the data associated with the key will be the PedNode of this pedigree member. The "currentID" is used to compute the sequential IDs, and, as described later, "aij" is a sparse matrix that is used to store relationship coefficients used to compute inbreeding. 

In [4]:
type Pedigree                             
    currentID::Int64                            
    idMap::Dict{UTF8String,PedNode}                               
    aij::SparseMatrixCSC{Float64,Int64}                          
end

### Read the pedigree file into a DataFrame object

In [5]:
pedFile = "../data/ex1.ped"
df = readtable(pedFile,eltypes=[UTF8String,UTF8String,UTF8String],separator = ' ',header=false)  

Unnamed: 0,x1,x2,x3
1,FFG1,C2,C99
2,AA92,A1,FFG1
3,AA93,A1,AA92
4,A1,C2,223G


### Initialize a Pedigree object

In [6]:
idMap = Dict{UTF8String,PedNode}() # this is an empty "Dict" with key type UTF8String and value type PedNode
aij = spzeros(1,1)                 # this creates an empty 1x1 sparse matrix
ped = Pedigree(1,idMap,aij)        # here we get the Pedigree object that we will call ped

Pedigree(1,Dict{UTF8String,PedNode}(),1x1 sparse matrix with 0 Float64 entries:)

#### Make entries for sires in the idMap Dict
The PedNode value will be set to (0,"0","0",-1.0) for each sire

In [7]:
n = size(df,1) 
for i in df[:,2]                       
    if i!="0" && !haskey(ped.idMap,i)          # skip 0 and if already done
        ped.idMap[i]=PedNode(0,"0","0",-1.0)
    end
end

In [8]:
ped.idMap

Dict{UTF8String,PedNode} with 2 entries:
  "C2" => PedNode(0,"0","0",-1.0)
  "A1" => PedNode(0,"0","0",-1.0)

#### Make entries for dams in the idMap Dict
    The PedNode value will be set to (0,"0","0",-1.0) for each dam

In [10]:
for i in df[:,3]                      
    if i!="0" && !haskey(ped.idMap,i)         # make an entry for all dams
        ped.idMap[i]=PedNode(0,"0","0",-1.0)
    end
end 

In [11]:
ped.idMap

Dict{UTF8String,PedNode} with 6 entries:
  "FFG1" => PedNode(0,"0","0",-1.0)
  "C2"   => PedNode(0,"0","0",-1.0)
  "C99"  => PedNode(0,"0","0",-1.0)
  "AA92" => PedNode(0,"0","0",-1.0)
  "A1"   => PedNode(0,"0","0",-1.0)
  "223G" => PedNode(0,"0","0",-1.0)

#### Make entries for individuals in the pedigree and fill in sire and dam strings of PedNode

In [12]:
j = 1
for i in df[:,1]                              
    ped.idMap[i]=PedNode(0,df[j,2],df[j,3],-1.0) 
    j+=1
end 
ped.idMap

Dict{UTF8String,PedNode} with 7 entries:
  "FFG1" => PedNode(0,"C2","C99",-1.0)
  "C2"   => PedNode(0,"0","0",-1.0)
  "C99"  => PedNode(0,"0","0",-1.0)
  "AA92" => PedNode(0,"A1","FFG1",-1.0)
  "AA93" => PedNode(0,"A1","AA92",-1.0)
  "A1"   => PedNode(0,"C2","223G",-1.0)
  "223G" => PedNode(0,"0","0",-1.0)

#### Can use enumerate

In [13]:
for (j,i) in enumerate(df[:,1])                              
    ped.idMap[i]=PedNode(0,df[j,2],df[j,3],-1.0) 
end 
ped.idMap

Dict{UTF8String,PedNode} with 7 entries:
  "FFG1" => PedNode(0,"C2","C99",-1.0)
  "C2"   => PedNode(0,"0","0",-1.0)
  "C99"  => PedNode(0,"0","0",-1.0)
  "AA92" => PedNode(0,"A1","FFG1",-1.0)
  "AA93" => PedNode(0,"A1","AA92",-1.0)
  "A1"   => PedNode(0,"C2","223G",-1.0)
  "223G" => PedNode(0,"0","0",-1.0)

### Recursive function to assign sequential IDS

In [21]:
# The idea for this function came from a perl script by Bernt Guldbrandtsen
function code!(ped::Pedigree,id::UTF8String)   
    if ped.idMap[id].seqID!=0  # already done?           
        return                 # nothing more to do
    end
    sireID = ped.idMap[id].sire  
    damID  = ped.idMap[id].dam
    if sireID!="0" && ped.idMap[sireID].seqID==0 # does the sire have a sequential id?      
        code!(ped,sireID)                        # if not call the same function for sire                          
    end 
    if damID!="0" && ped.idMap[damID].seqID==0   # does the dam have a sequential id     
        code!(ped,damID)
    end
    ped.idMap[id].seqID = ped.currentID          # assign ped.current to individual and increment it 
    ped.currentID += 1
end

code! (generic function with 1 method)

In [22]:
for id in keys(ped.idMap)
    code!(ped,id)
end

In [23]:
ped.idMap

Dict{UTF8String,PedNode} with 7 entries:
  "FFG1" => PedNode(3,"C2","C99",-1.0)
  "C2"   => PedNode(1,"0","0",-1.0)
  "AA92" => PedNode(6,"A1","FFG1",-1.0)
  "C99"  => PedNode(2,"0","0",-1.0)
  "AA93" => PedNode(7,"A1","AA92",-1.0)
  "A1"   => PedNode(5,"C2","223G",-1.0)
  "223G" => PedNode(4,"0","0",-1.0)

In [28]:
n = length(ped.idMap)
a = Array(String,n,3)
f = Array(Float64,n)
for i in keys(ped.idMap)
    indSeqID  = ped.idMap[i].seqID

    a[indSeqID,1] = i
    a[indSeqID,2] = ped.idMap[i].sire
    a[indSeqID,3] = ped.idMap[i].dam
    f[indSeqID]   = ped.idMap[i].f
end
for i=1:7
    @printf("%4d %10s %10s %10s %6.3f \n", i, a[i,1], a[i,2], a[i,3], f[i])
end

   1         C2          0          0 -1.000 
   2        C99          0          0 -1.000 
   3       FFG1         C2        C99 -1.000 
   4       223G          0          0 -1.000 
   5         A1         C2       223G -1.000 
   6       AA92         A1       FFG1 -1.000 
   7       AA93         A1       AA92 -1.000 
