# Run GMMAT Wald test over top SNPs for traits with maf > 0.01

In [None]:
source("R/GMMAT_utils.R")
source("R/misc_utils.R")

In [None]:
library(GMMAT)
library(biclust)
library(tools)
library(data.table)
library(CMplot)
library(ggplot2)
library(ggrepel)

## Load our data

### Set paths for kinship matrix and SNP matrix, which will be same for everything*

In [None]:
kinship_path <- "../07_GEMMA/output/1323_cohort_maf01_geno10.cXX.txt"

\* Note we are performing missing rate filtering AFTER association mapping because we are using same file as for GEMMA with missing rate filter of 0.10. Later, make new file that is pre-filtered to save time running GMMAT and FarmCPU, which do not exclude SNPs based on missing rate.

In [None]:
gds_path <- "1323_cohort_maf01_geno10.snp.pass.gds"

In [None]:
ncores <- 22

In [None]:
outdir <- "Results/maf01_geno10/"

In [None]:
if(!dir.exists(outdir)){
    dir.create(outdir,
              recursive = TRUE)
}

List of heritable traits

In [None]:
h2_table <- fread("/mnt/data/NSF_GWAS/notebooks/InPlantaGWAS/11_Data_mining/sorted_h2_table_with_raw_names_added.csv")

In [None]:
heritable_traits <- h2_table$raw_trait_name[which(
h2_table$h2 > 0.10)]

## Timepoint 4 was not taken in Ph. 1 or Ph. 3

In [None]:
#covariate_path <- "../07_GEMMA/covariates/Stem_regen_diam_and_phases_sansPh8.cov"
covariate_path <- "../07_GEMMA/covariates/Stem_regen_diam_and_phases_sansPh1Ph3Ph8.cov"

In [None]:
phenotype_paths <- list.files("../05_Parsing_phenodata/pheno_files/stem_regen/",
                              pattern = "4w_binarized\\.header\\.",
                              full.names = TRUE)

In [None]:
# for(phenotype in phenotype_paths[2]){
#     GMMAT_workflow(phenotype_path = phenotype,
#                    covariate_path = covariate_path,
#                    kinship_path = kinship_path,
#                    gds_path = gds_path,
#                    ncores = ncores,
#                    n_SNPs = 10000,
#                    pthresh = 1e-4,
#                    outdir = outdir)
# }

## PC traits only exist for genotypes studied in all timepoints

In [None]:
outdir <- "Results/batch4_maf01_geno10/"

Same covariate file we just used for timepoint 4...

In [None]:
covariate_path <- "../07_GEMMA/covariates/Stem_regen_diam_and_phases_sansPh1Ph3Ph8.cov"

In [None]:
phenotype_paths <- list.files("../05_Parsing_phenodata/pheno_files/stem_regen",
                              pattern = "\\.binary\\.header",
                              full.names = TRUE)

length(phenotype_paths)

In [None]:
# for(phenotype in phenotype_paths){
#     GMMAT_workflow(phenotype_path = phenotype,
#                    covariate_path = covariate_path,
#                    kinship_path = kinship_path,
#                    gds_path = gds_path,
#                    ncores = ncores)
# }

In [None]:
phenotype_paths_subset <- phenotype_paths_subset[grepl("PC", phenotype_paths_subset)]

In [None]:
phenotype_paths_subset <- phenotype_paths[grep(paste(desired_substrings, collapse = "|"),
                                               phenotype_paths)]

In [None]:
length(phenotype_paths_subset)

In [None]:
for(phenotype in phenotype_paths_subset){
    GMMAT_workflow(phenotype_path = phenotype,
                   covariate_path = covariate_path,
                   kinship_path = kinship_path,
                   gds_path = gds_path,
                   ncores = ncores,
                   n_SNPs = 1000,
                   pthresh = 1e-4,
                   outdir = outdir)
}

## Timepoint 5 was not taken in Ph. 1

In [None]:
#covariate_path <- "../07_GEMMA/covariates/Stem_regen_diam_and_phases_sansPh8.cov"
covariate_path <- "../07_GEMMA/covariates/Stem_regen_diam_and_phases_sansPh1Ph8.cov"

In [None]:
phenotype_paths <- list.files("../05_Parsing_phenodata/pheno_files/stem_regen/",
                              pattern = "5w_binarized\\.header\\.",
                              full.names = TRUE)

In [None]:
# for(phenotype in phenotype_paths){
#     GMMAT_workflow(phenotype_path = phenotype,
#                    covariate_path = covariate_path,
#                    kinship_path = kinship_path,
#                    gds_path = gds_path,
#                    ncores = ncores)
# }

## Timepoint 2 and 3 were taken for all phases.

However, note we still leave Ph. 8 out to avoid singular matrix. One phase must be assigned to intercept.

In [None]:
covariate_path <- "../07_GEMMA/covariates/Stem_regen_diam_and_phases_sansPh8.cov"

In [None]:
phenotype_paths <- c(list.files("../05_Parsing_phenodata/pheno_files/stem_regen/",
                                pattern = "2w_binarized\\.header\\.",
                                full.names = TRUE),
                     list.files("../05_Parsing_phenodata/pheno_files/stem_regen/",
                                pattern = "3w_binarized\\.header\\.",
                                full.names = TRUE))

In [None]:
# THIS HAS ALREADY BEEN RUN but with n_SNPs = 100. Re-run last part with n_SNPs = 1000 later...
# for(phenotype in phenotype_paths){
#     GMMAT_workflow(phenotype_path = phenotype,
#                    covariate_path = covariate_path,
#                    kinship_path = kinship_path,
#                    gds_path = gds_path,
#                    ncores = ncores)
# }