# Produce synthetic phenotypes

Synthetic phenotypes include principal components, growth rates, and timepoint of first organ appearance. We will run GWAS over each of these synthetic phenotypes in addition to the raw traits used to calculate them. Later, we will also run multivariate GWAS over traits used to compute synthetic phenotypes, and see how these results compare.

In [None]:
library(factoextra) # used to obtain eigenvectors from PCA

In [None]:
library(data.table)
library(ggplot2) # For skree plot
library(magrittr) # For clean code
library(gplots) # For heat map

## Callus and shoot phenotypes from stem regeneration GWAS

### Load phenotypes and merge into a single dataframe

In [None]:
callus_2w <- read.csv("pheno_files/stem_regen/callus_2w.header.pheno", sep = "\t",
                     colClasses=c("character", "character", "numeric"))
callus_3w <- read.csv("pheno_files/stem_regen/callus_3w.header.pheno", sep = "\t",
                     colClasses=c("character", "character", "numeric"))
callus_4w <- read.csv("pheno_files/stem_regen/callus_4w.header.pheno", sep = "\t",
                     colClasses=c("character", "character", "numeric"))
callus_5w <- read.csv("pheno_files/stem_regen/callus_5w.header.pheno", sep = "\t",
                     colClasses=c("character", "character", "numeric"))

In [None]:
shoot_2w <- read.csv("pheno_files/stem_regen/shoot_2w.header.pheno", sep = "\t",
                     colClasses=c("character", "character", "numeric"))
shoot_3w <- read.csv("pheno_files/stem_regen/shoot_3w.header.pheno", sep = "\t",
                     colClasses=c("character", "character", "numeric"))
shoot_4w <- read.csv("pheno_files/stem_regen/shoot_4w.header.pheno", sep = "\t",
                     colClasses=c("character", "character", "numeric"))
shoot_5w <- read.csv("pheno_files/stem_regen/shoot_5w.header.pheno", sep = "\t",
                     colClasses=c("character", "character", "numeric"))

In [None]:
all_pheno <- cbind(callus_2w,
                   callus_3w[,3],
                   callus_4w[,3],
                   callus_5w[,3],
                   shoot_2w[,3],
                   shoot_3w[,3],
                   shoot_4w[,3],
                   shoot_5w[,3])

In [None]:
colnames(all_pheno) <- gsub("\\[, 3\\]", "", colnames(all_pheno))

In [None]:
head(all_pheno)

### Calculate PCs

We must first omit NAs since they can't go into `prcomp`, then record labels of samples that remain after NA is omitted. We will need these labels to add NAs back in and order genotypes properly after PCA (since phenotype files must have all genotypes, in a certain order).

In [None]:
all_pheno_no_NA <- na.omit(all_pheno)

In [None]:
labels <- all_pheno_no_NA[,1:2]

#### Callus

In [None]:
pca_callus <- prcomp(all_pheno_no_NA[,3:6])

In [None]:
ind_callus <- get_pca_ind(pca_callus)$coord

In [None]:
head(pca_callus$rotation)

In [None]:
for(i in 1:4){ # Write out top 4 PCs
    PC_pheno_callus <- cbind(labels, ind_callus[,i])
    PC_pheno_callus <- merge(callus_2w, PC_pheno_callus, by=c("FID", "IID"), all.x = TRUE)
    PC_pheno_callus <- cbind(PC_pheno_callus[,c(1,2,4)])
    colnames(PC_pheno_callus)[3] <- paste0("callus_PC", i)
    fwrite(PC_pheno_callus, paste0("pheno_files/stem_regen/Callus_PC", i, ".header.pheno"), sep="\t", row.names = FALSE, quote =FALSE, col.names = TRUE)
    fwrite(PC_pheno_callus, paste0("pheno_files/stem_regen/Callus_PC", i, ".noheader.pheno"), sep="\t", row.names = FALSE, quote =FALSE, col.names = FALSE)
}

##### Provide a function for plotting PCA results, which we will use now and also later for other PC traits

In [None]:
library(RColorBrewer)

In [None]:
rev(brewer.pal(11,"RdBu"))[3:9]

In [None]:
par(mar=c(1,1,1,1))

In [None]:
plot_PC <- function(PC_object){
    
    heatmap.2(PC_object$rotation,
              Rowv=FALSE,
              Colv=FALSE,
              cellnote = round(PC_object$rotation, digits = 3),
              dendrogram = 'none',
              notecol="black",
              trace='none',
              key=TRUE,
              col=rev(brewer.pal(11,"RdBu")[2:10]),
              lwid = c(.01,.99),
              lhei = c(.01,.99),
              margins = c(5,15 ),
              notecex = 1.4)

    
    var_explained_df <- data.frame(PC= paste0("PC",1:length(PC_object$center)),
                                   var_explained=(PC_object$sdev)^2/sum((PC_object$sdev)^2))
    
    var_explained_df %>%
      ggplot(aes(x=PC,y=var_explained, group=1))+
      geom_point(size=4)+
      geom_line()+
      labs(title="Scree plot: PCA on unscaled data")+
      ylab("Variance explained") +
      theme(text = element_text(size=20)) 
    
    #screeplot(PC_object)
    

}



In [None]:
plot_PC(pca_callus)

In [None]:
library(data.table)

#### Shoot

In [None]:
pca_shoot <- prcomp(all_pheno_no_NA[,7:10])
ind_shoot <- get_pca_ind(pca_shoot)$coord

In [None]:
for(i in 1:4){ # Write out top 4 PCs
    PC_pheno_shoot <- cbind(labels, ind_shoot[,i])
    PC_pheno_shoot <- merge(callus_2w, PC_pheno_shoot, by=c("FID", "IID"), all.x = TRUE)
    PC_pheno_shoot <- cbind(PC_pheno_shoot[,c(1,2,4)])
    colnames(PC_pheno_shoot)[3] <- paste0("shoot_PC", i)
    fwrite(PC_pheno_shoot, paste0("pheno_files/stem_regen/Shoot_PC", i, ".header.pheno"), sep="\t", row.names = FALSE, quote =FALSE, col.names = TRUE)
    fwrite(PC_pheno_shoot, paste0("pheno_files/stem_regen/Shoot_PC", i, ".noheader.pheno"), sep="\t", row.names = FALSE, quote =FALSE, col.names = FALSE)
}

In [None]:
plot_PC(pca_shoot)

#### Callus *and* shoot

In [None]:
pca <- prcomp(all_pheno_no_NA[,3:10])
ind <- get_pca_ind(pca)$coord

In [None]:
for(i in 1:4){ # Write out top 4 PCs
    PC_pheno <- cbind(labels, ind[,i])
    PC_pheno <- merge(callus_2w, PC_pheno, by=c("FID", "IID"), all.x = TRUE)
    PC_pheno <- cbind(PC_pheno[,c(1,2,4)])
    colnames(PC_pheno)[3] <- paste0("CallusShoot_PC", i)
    fwrite(PC_pheno, paste0("pheno_files/stem_regen/CallusShoot_PC", i, ".header.pheno"), sep="\t", row.names = FALSE, quote =FALSE, col.names = TRUE)
    fwrite(PC_pheno, paste0("pheno_files/stem_regen/CallusShoot_PC", i, ".noheader.pheno"), sep="\t", row.names = FALSE, quote =FALSE, col.names = FALSE)
}

In [None]:
plot_PC(pca)

### Calculate growth rate

In [None]:
callus_growth <- all_pheno$callus_5w - all_pheno$callus_2w
shoot_growth <- all_pheno$shoot_5w - all_pheno$shoot_2w

In [None]:
callus_out <- cbind(all_pheno$FID, all_pheno$IID, callus_growth)
shoot_out <- cbind(all_pheno$FID, all_pheno$IID, shoot_growth)

In [None]:
colnames(callus_out) <- c("FID", "IID", "callus_growth_wk2_wk5")
colnames(shoot_out) <- c("FID", "IID", "shoot_growth_wk2_wk5")

In [None]:
fwrite(callus_out, paste0("pheno_files/stem_regen/callus_growth_wk2_wk5.header.pheno"), sep="\t", row.names = FALSE, quote =FALSE, col.names = TRUE, na = "NA")
fwrite(shoot_out, paste0("pheno_files/stem_regen/shoot_growth_wk2_wk5.header.pheno"), sep="\t", row.names = FALSE, quote =FALSE, col.names = TRUE, na = "NA")

fwrite(callus_out, paste0("pheno_files/stem_regen/callus_growth_wk2_wk5.noheader.pheno"), sep="\t", row.names = FALSE, quote =FALSE, col.names = FALSE, na = "NA")
fwrite(shoot_out, paste0("pheno_files/stem_regen/shoot_growth_wk2_wk5.noheader.pheno"), sep="\t", row.names = FALSE, quote =FALSE, col.names = FALSE, na = "NA")