# Power Evaluation

### Run SAIGE (GWAS)

#### Step 0 (Create Sparse GRM)

In [None]:
docker run -dv /media/leelabsg-storage0:/media/leelabsg-storage0 wzhou88/saige:1.1.9 createSparseGRM.R \
    --plinkFile=/media/leelabsg-storage0/kisung/META-SAIGE/data/genotype/common/WES_common_merged \
    --nThreads=64 \
    --outputPrefix=/media/leelabsg-storage0/kisung/META-SAIGE/result/step0/UKB_sparseGRM_200k \
    --numRandomMarkerforSparseKin=2000 \
    --relatednessCutoff=0.05

Step 1 (Fitting the null model)

In [None]:
# arg 1, 2 = for loop start and end / arg 3 = cohort number

prev="0.01 0.05"

for ((n = $1; n <= $2; n++))
do
    for p in ${prev}
    do
        for ii in {1..2}
        do
            for jj in {1..3}
            do
                for kk in {1..2}
                do
                    docker run -v /media/leelabsg-storage0/kisung:/media/leelabsg-storage0/kisung wzhou88/saige:1.1.9 step1_fitNULLGLMM.R \
                    --sparseGRMFile=/media/leelabsg-storage0/kisung/META-SAIGE/result/step0/UKB_sparseGRM_200k_relatednessCutoff_0.05_2000_randomMarkersUsed.sparseGRM.mtx \
                    --sparseGRMSampleIDFile=/media/leelabsg-storage0/kisung/META-SAIGE/result/step0/UKB_sparseGRM_200k_relatednessCutoff_0.05_2000_randomMarkersUsed.sparseGRM.mtx.sampleIDs.txt \
                    --useSparseGRMtoFitNULL=TRUE \
                    --plinkFile=/media/leelabsg-storage0/kisung/META-SAIGE/data/genotype/common/WES_common_merged \
                    --phenoFile=/media/leelabsg-storage0/kisung/META-SAIGE/data/phenotype/total_231005/pheno_sim${n}_${ii}_${jj}_${kk}_prev${p}.txt \
                    --phenoCol=pheno_binary \
                    --covarColList=cov1,cov2 \
                    --sampleIDColinphenoFile=IID \
                    --SampleIDIncludeFile=/media/leelabsg-storage0/kisung/META-SAIGE/data/group"$3"_sampleid.txt \
                    --traitType=binary \
                    --outputPrefix=/media/leelabsg-storage0/kisung/META-SAIGE/result/step1_231007/step1_cohort"$3"_sim${n}_${ii}_${jj}_${kk}_prev${p} \
                    --nThreads=8 \
                    --isCateVarianceRatio=FALSE \
                    --LOCO=FALSE \
                    --IsOverwriteVarianceRatioFile=TRUE
                done
            done
        done
    done
done

Step 2 (Variant-level association test)

In [None]:
# arg1 = simulation number, arg2 = cohort number

prev="0.01 0.05"

for p in ${prev}
do
    for ii in {1..2}
    do
        for jj in {1..3}
        do
            for kk in {1..2}
            do
                docker run -v /media/leelabsg-storage0/kisung:/media/leelabsg-storage0/kisung wzhou88/saige:1.1.9 step2_SPAtests.R \
                    --bedFile=/media/leelabsg-storage0/kisung/META-SAIGE/data/genotype/rare_231005/merged.bed \
                    --bimFile=/media/leelabsg-storage0/kisung/META-SAIGE/data/genotype/rare_231005/merged.bim \
                    --famFile=/media/leelabsg-storage0/kisung/META-SAIGE/data/genotype/rare_231005/merged.fam \
                    --AlleleOrder=alt-first \
                    --minMAF=0 \
                    --minMAC=0.5 \
                    --GMMATmodelFile=/media/leelabsg-storage0/kisung/META-SAIGE/result/step1_231007/step1_cohort$2_sim$1_${ii}_${jj}_${kk}_prev${p}.rda \
                    --varianceRatioFile=/media/leelabsg-storage0/kisung/META-SAIGE/result/step1_231007/step1_cohort$2_sim$1_${ii}_${jj}_${kk}_prev${p}.varianceRatio.txt \
                    --sparseGRMFile=/media/leelabsg-storage0/kisung/META-SAIGE/result/step0/UKB_sparseGRM_200k_relatednessCutoff_0.05_2000_randomMarkersUsed.sparseGRM.mtx \
                    --sparseGRMSampleIDFile=/media/leelabsg-storage0/kisung/META-SAIGE/result/step0/UKB_sparseGRM_200k_relatednessCutoff_0.05_2000_randomMarkersUsed.sparseGRM.mtx.sampleIDs.txt \
                    --groupFile=/media/leelabsg-storage0/kisung/dnanexus/group_files/UKBexome_all_chr.txt \
                    --annotation_in_groupTest='lof,missense:lof,missense:lof:synonymous' \
                    --maxMAF_in_groupTest=0.0001,0.001,0.01 \
                    --SAIGEOutputFile=/media/leelabsg-storage0/kisung/META-SAIGE/result/step2_231010/step2_cohort$2_sim$1_${ii}_${jj}_${kk}_prev${p} \
                    --is_output_markerList_in_groupTest=TRUE \
                    --LOCO=FALSE \
                    --is_fastTest=TRUE
            done
        done
    done
done

Running Meta-SAIGE

In [None]:
prev="0.01 0.05"
anno=missense_lof
maxMAF=0.001
chrs="2 5 6 9 11 13"
case=1

# destination=/Meta-SAIGE/power_eval/meta

for chr in ${chrs}
do
    for p in ${prev}
    do
        for ii in {1..2}
        do
            for jj in {1..3}
            do
                for kk in {1..2}
                do
    			docker run -v /media/leelabsg-storage0/kisung:/media/leelabsg-storage0/kisung meta-saige:latest Rscript /app/RV_meta_GC.R \
                        --num_cohorts 4 \
                        --chr ${chr} \
                        --col_co 10 \
                        --info_file_path /media/leelabsg-storage0/kisung/META-SAIGE/data/group1/group1_${maxMAF}_${anno}/WB_case${case}_group1_chr${chr}.marker_info.txt \
                        /media/leelabsg-storage0/kisung/META-SAIGE/data/group2/group2_${maxMAF}_${anno}/WB_case${case}_group2_chr${chr}.marker_info.txt \
                        /media/leelabsg-storage0/kisung/META-SAIGE/data/group3/group3_${maxMAF}_${anno}/WB_case${case}_group3_chr${chr}.marker_info.txt \
                        /media/leelabsg-storage0/kisung/META-SAIGE/data/white_nonB/${maxMAF}_${anno}/white_nonB_chr${chr}.marker_info.txt \
                        \
                        --gene_file_prefix /media/leelabsg-storage0/kisung/META-SAIGE/data/group1/group1_${maxMAF}_${anno}/WB_case${case}_group1_chr${chr}_ \
                        /media/leelabsg-storage0/kisung/META-SAIGE/data/group2/group2_${maxMAF}_${anno}/WB_case${case}_group2_chr${chr}_ \
                        /media/leelabsg-storage0/kisung/META-SAIGE/data/group3/group3_${maxMAF}_${anno}/WB_case${case}_group3_chr${chr}_ \
                        /media/leelabsg-storage0/kisung/META-SAIGE/data/white_nonB/${maxMAF}_${anno}/white_nonB_chr${chr}_ \
                        \
                        --gwas_path /media/leelabsg-storage0/kisung/META-SAIGE/result/step2_single_231020/step2_cohort1_sim"$1"_${ii}_${jj}_${kk}_prev${p} \
                        /media/leelabsg-storage0/kisung/META-SAIGE/result/step2_single_231020/step2_cohort2_sim"$1"_${ii}_${jj}_${kk}_prev${p} \
                        /media/leelabsg-storage0/kisung/META-SAIGE/result/step2_single_231020/step2_cohort3_sim"$1"_${ii}_${jj}_${kk}_prev${p} \
                        /media/leelabsg-storage0/kisung/META-SAIGE/result/step2_single_231106_white_nonB/step2_white_nonB_sim"$1"_${ii}_${jj}_${kk}_prev${p} \
                        \
                        --output_prefix /media/leelabsg-storage0/kisung/META-SAIGE/result/meta_v2_231107/meta_sim"$1"_chr${chr}_${ii}_${jj}_${kk}_prev${p}.txt
                done
            done
        done
    done
done

#### Power for weighted Fisher

In [None]:
library(data.table)

prev <- c(0.01, 0.05)
wFisher = function(p, weight = NULL, is.onetail = TRUE, eff.sign)
{
  if(is.null(weight)){weight = rep(1, length(p))}
  idx.na = which(is.na(p))
  if(length(idx.na)>0){
    p = p[-idx.na];
    weight = weight[-idx.na];
    if(!is.onetail)
    {
      eff.sign = eff.sign[-idx.na]
    }
  }
  NP = length(p)
  NS = length(weight)
  if(NP!=NS){stop("The length of p and weight vector must be identical.")}
  N = NS
  Ntotal = sum(weight)
  ratio = weight/Ntotal
  Ns = N*ratio
  G = c()

  if(is.onetail)
  {
    for(i in 1:length(p))
    {
      G = append(G, qgamma(p = p[i], shape = Ns[i], scale=2, lower.tail=F))
    }
    Gsum = sum(G)
    resultP = pgamma(q=Gsum, shape=N, scale=2, lower.tail=F)
  }else{
    p1 = p2 = p
    idx_pos = which(eff.sign > 0)
    idx_neg = which(eff.sign < 0)
    # positive direction
    G = c()
    p1[idx_pos] = p[idx_pos]/2
    p1[idx_neg] = 1-p[idx_neg]/2
    for(i in 1:length(p1))
    {
      G = append(G, qgamma(p = p1[i], shape = Ns[i], scale=2, lower.tail=F))
    }
    Gsum = sum(G)
    resultP1 = pgamma(q=Gsum, shape=N, scale=2, lower.tail=F)
    # negative direction
    G = c()
    p2[idx_pos] = 1-p[idx_pos]/2
    p2[idx_neg] = p[idx_neg]/2
    for(i in 1:length(p2))
    {
      G = append(G, qgamma(p = p2[i], shape = Ns[i], scale=2, lower.tail=F))
    }
    Gsum = sum(G)
    resultP2 = pgamma(q=Gsum, shape=N, scale=2, lower.tail=F)
    resultP = 2* min(resultP1, resultP2)
    if(resultP > 1.0){resultP = 1.0}
    overall.eff.direction = if(resultP1<=resultP2){"+"}else{"-"}
  }
  RES = if(is.onetail){list(p=min(1,resultP))}else{list(p=min(1,resultP), overall.eff.direction=overall.eff.direction)}
  return(RES)
}

gene_list <- c("IGSF9B", "BRCA2", "APOB", "GPRIN1", "CFB", "DDR1", "GPSM3", "HLA-DRB1", "DBH", "IL33")
sample_size <- c(55655, 55653, 55652, 22136, 1240)
setwd("/media/leelabsg-storage0/kisung/META-SAIGE/result/step2_231010")

result <- NULL
for (p in prev) {
    for (ii in 1:2) {
        for (jj in 1:3) {
            for (kk in 1:2) {
                out <- NULL
                for (n in 1:100) {
                    for (g in gene_list) {
                        pval <- NULL
                        # White British
                        for (c in 1:3) {
                            fname <- paste0("step2_cohort", c, "_sim", n, "_", ii, "_", jj, "_", kk, "_prev", p)
                            d <- fread(fname)
                            pval_temp <- d[which((d$Region == g) & (d$Group == "missense;lof") & (d$max_MAF == 0.001)),]$Pvalue
                            pval <- c(pval, pval_temp)
                        }
                        # white nonB
                        fname <- paste0("/media/leelabsg-storage0/kisung/META-SAIGE/result/step2_231106_white_nonB/step2_AFR_sim", n, "_", ii, "_", jj, "_", kk, "_prev", p)
                        d <- fread(fname)
                        pval_temp <- d[which((d$Region == g) & (d$Group == "missense;lof") & (d$max_MAF == 0.001)),]$Pvalue
                        pval <- c(pval, pval_temp)
                        # African
                        fname <- paste0("/media/leelabsg-storage0/kisung/META-SAIGE/result/step2_231103_AFR/step2_AFR_sim", n, "_", ii, "_", jj, "_", kk, "_prev", p)
                        d <- fread(fname)
                        pval_temp <- d[which((d$Region == g) & (d$Group == "missense;lof") & (d$max_MAF == 0.001)),]$Pvalue
                        pval <- c(pval, pval_temp)

                        weighted_Fisher_pval <- wFisher(p = pval, weight = sample_size, is.onetail = TRUE)$p
                        out <- rbind(out, c(n, g, weighted_Fisher_pval))
                    }
                }
                print(paste(ii, jj, kk, p))
                out <- as.data.frame(out)
                out$V3 <- as.numeric(out$V3)
                power <- length(which(out$V3 < 2.5e-6)) / nrow(out)
                result <- rbind(result, c(ii, jj, kk, p, power))
            }
        }
    }
}

print(result)

#### Power evaluation for Meta-SAIGE

In [None]:
library(data.table)
# library(qqman)
setwd("/media/leelabsg-storage0/kisung/META-SAIGE/result")

Get_Pval_Adj<-function(pval_Matrix, cutoff=10^-3){

	# pval_Matrix<-pval_1_Matrix
	skato<-pval_Matrix[,1]
	minP<-apply(pval_Matrix[,-1], 1, min)*2
	idx<-which(skato<cutoff)
	idx1<-which(minP - skato > 0)
	idx2<-intersect(idx,idx1)
	skato[idx2]<-minP[idx2]
	return(skato)
}

prev <- c(0.01, 0.05)
chr <- c(2, 5, 6, 9, 11, 13)
method <- c("v2", "multi")

for (m in method) {
    for (p in prev) {
        for (c in chr) {
            for (i in 1:2) {
                for (j in 1:3) {
                    for (k in 1:2) {
                        for (n in 1:100) {
                            fname <- paste0("meta_", m, "_231117/meta_sim", n, "_chr", c, "_" , i, "_", j, "_", k, "_prev", p, ".txt")
                            if (file.exists(fname)) {
                                d <- fread(fname)
                                skato <- Get_Pval_Adj(d[,3:10])
                                d$Pval <- skato
                                outname <- paste0("meta_", m, "_adj_231119/meta_sim", n, "_chr", c, "_" , i, "_", j, "_", k, "_prev", p, ".txt")
                                write.table(d, outname, row.names=F, quote=F)
                            }
                        }
                        print(paste(m, p, c, i, j, k))
                    }
                }
            }
        }
    }
}

# Power evaluation

library(data.table)
setwd("/media/leelabsg-storage0/kisung/META-SAIGE/result/meta_multi_adj_231119")

prev <- c(0.01, 0.05)
chr <- c(2, 5, 6, 9, 11, 13)
gene_list <- c("IGSF9B", "BRCA2", "APOB", "GPRIN1", "CFB", "DDR1", "GPSM3", "HLA-DRB1", "DBH", "IL33")

result <- NULL
for (p in prev) {
    for (ii in 1:2) {
        for (jj in 1:3) {
            for (kk in 1:2) {
                out <- NULL
                for (n in 1:100) {
                    for (c in chr) {
                        fname <- paste0("meta_sim", n, "_chr", c, "_", ii, "_", jj, "_", kk, "_prev", p, ".txt")
                        if (file.exists(fname)) {
                            d <- fread(fname)
                            out <- rbind(out, d)
                        }
                    }
                }
                out <- out[which(out$GENE %in% gene_list),]
                power <- length(which(out$Pval < 2.5e-6)) / nrow(out)
                result <- rbind(result, c(ii, jj, kk, p, power))
                print(c(ii, jj, kk, p, power))
            }
        }
    }
}
colnames(result) <- c("ii", "jj", "kk", "prev", "power")
print(result)
