# Heritability
### Kelly Swarts
## Heritability is central to genomic prediction (the upper bounds for predictive ability)
## There are something like 52 ways to calculate heritability but one is to estimate the variance explained by the K matrix

# 1.  Initial setup steps

## 1a. Prepare environment
Loading packages and functions into R

In [None]:
library(sommer) #Mixed effects models package
library(rTASSEL)
library(plot.matrix)
options(repr.plot.width=12, repr.plot.height=5)## this sets a larger size for figures

## 1b. Define input variables

In [None]:
# genotype data for maize and arabidopsis (in the "hdf5" format)
default.par <- par()
zmG <- readGenotypeTableFromPath("./data/282.poly_thinned30kbp.h5")
atG <- readGenotypeTableFromPath("./data/1001genomes_snp-short-indel_only_ACGTN.subsamp170_poly_minCov50_thinned30kpb.h5")
atGKO <- readGenotypeTableFromPath("./data/1001genomes_snp-short-indel_only_ACGTN.subsamp170_poly_minCov50_thinned30kpb_KOfri.h5")
# phenotype data for maize and arabidopsis
zmP <- readPhenotypeFromPath("./data/282_traits.txt")
atP <- readPhenotypeFromPath("./data/Arabidopsis_Phenotypes.trait")
# summary info for maize and arabidopsis 
zmSS <- read.table("./data/282.poly_thinned30kbp_SiteSummary.txt",header=T,as.is=T,sep="\t")
atSS <- read.table("./data/1001genomes_snp-short-indel_only_ACGTN.subsamp170_poly_minCov50_thinned30kpb_SiteSummary.txt",header=T,as.is=T,sep="\t")
zmTS <- read.table("./data/282.poly_thinned30kbp_TaxaSummary.txt",header=T,as.is=T,sep="\t")
atTS <- read.table("./data/1001genomes_snp-short-indel_only_ACGTN.subsamp170_poly_minCov50_thinned30kpb_TaxaSummary.txt",header=T,as.is=T,sep="\t")

# 2.  Generate K (kinship/genetic similarity) matrices
### We will calculate these in two different ways, each with different assumptions regarding population expectations for inbreeding
### "Centered" assumes Hardy-Weinburg and is calculated after J. Yang, S. H. Lee, M. E. Goddard, P. M. Visscher, GCTA: a tool for genome-wide complex trait analysis. Am. J. Hum. Genet. 88, 76–82 (2011).
### "Normalized" allows for inbreeding and is calculated after J. B. Endelman, J.-L. Jannink, Shrinkage estimation of the realized relationship matrix. G3 . 2, 1405–1413 (2012).

In [None]:
# Centered K matrix
zm_cent <- kinshipMatrix(zmG,method = "Centered_IBS")
zm_norm <- kinshipMatrix(zmG,method = "Normalized_IBS")
at_cent <- kinshipMatrix(atG,method = "Centered_IBS")
at_norm <- kinshipMatrix(atG,method = "Normalized_IBS")

# 3. Calculate heritability with package sommer, a really flexible generalized linear modelling package

In [None]:
#arabidopsis
atGP <- readGenotypePhenotype(genoPathOrObj = atG,phenoPathDFOrObj = atP)
curP <- as.data.frame(getPhenotypeDF(tasObj = atGP)@listData)
#flowering at 16 degrees
atCent <- mmer(data=curP,mean_ft16~1,random=~vs(Taxa, Gu=kinshipToRMatrix(at_cent)))
summary(atCent)
print(paste("narrow sense heritability from the marker matrix:",atCent$sigma[[1]]/(atCent$sigma[[1]]+atCent$sigma[[2]])))
#flowering at 10 degrees
atCent <- mmer(data=curP,mean_ft10~1,random=~vs(Taxa, Gu=kinshipToRMatrix(at_cent)))
summary(atCent)
print(paste("narrow sense heritability from the marker matrix:",atCent$sigma[[1]]/(atCent$sigma[[1]]+atCent$sigma[[2]])))

#maize
zmGP <- readGenotypePhenotype(genoPathOrObj = zmG,phenoPathDFOrObj = zmP)
curP <- as.data.frame(getPhenotypeDF(tasObj = zmGP)@listData)
zmCent <- mmer(data=curP,DTA~1,random=~vs(Taxa, Gu=kinshipToRMatrix(zm_cent)))
summary(zmCent)
print(paste("narrow sense heritability from the marker matrix:",zmCent$sigma[[1]]/(zmCent$sigma[[1]]+zmCent$sigma[[2]])))


### Why might heritability differ? Is anything surprising?

# 4. Calculate heritability for only arabidopsis that doesn't require vernalization (contains a FRIGIDA deleterious variant). Why might these differ?
atGP <- readGenotypePhenotype(genoPathOrObj = atGKO,phenoPathDFOrObj = atP)
curP <- as.data.frame(getPhenotypeDF(tasObj = atGP)@listData)
#flowering at 16 degrees
atCent <- mmer(data=curP,mean_ft16~1,random=~vs(Taxa, Gu=kinshipToRMatrix(at_cent)))
summary(atCent)
print(paste("narrow sense heritability from the marker matrix:",atCent$sigma[[1]]/(atCent$sigma[[1]]+atCent$sigma[[2]])))
#flowering at 10 degrees
atCent <- mmer(data=curP,mean_ft10~1,random=~vs(Taxa, Gu=kinshipToRMatrix(at_cent)))
summary(atCent)
print(paste("narrow sense heritability from the marker matrix:",atCent$sigma[[1]]/(atCent$sigma[[1]]+atCent$sigma[[2]])))
