-
Notifications
You must be signed in to change notification settings - Fork 6
/
runFDR_cis.R
executable file
·67 lines (60 loc) · 2.82 KB
/
runFDR_cis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#Load qvalue package
suppressMessages(library(qvalue))
#Read command line arguments
args <- commandArgs(trailingOnly = TRUE)
try(if(length(args) != 3) stop("Incorrect number of arguments, usage> Rscript runFDR.R INPUT FDR OUTPUT"))
opt_input = args[1];
opt_fdr = as.numeric(args[2]);
opt_output = args[3];
#Verbose
cat("\nProcessing QTLtools output.\n");
cat(" * Input = [", opt_input, "]\n");
cat(" * FDR = ", opt_fdr, "\n");
cat(" * Output = [", opt_output, "]\n");
#Read data
cat("\nRead Input data. Note: we expect a file with 19 columns i.e. it is best to use the results from a permutation test.\n");
D = read.table(opt_input, head = FALSE, stringsAsFactors = FALSE)
exon_offset = ifelse(ncol(D) == 19, 0, 2)
if (exon_offset == 2) cat(" * Gene level correction detected\n")
MASK=!is.na(D[,18+exon_offset])
Dnas=D[!MASK,]
D = D[MASK,]
cat(" * Number of molecular phenotypes =" , nrow(D), "\n")
cat(" * Number of NA lines =" , nrow(Dnas), "\n")
cat(" * Correlation between Beta approx. and Empirical p-values =", round(cor(D[, 18+exon_offset], D[, 19+exon_offset]), 4), "\n")
#Run qvalue on pvalues for best signals
cat("\nProcess Input data with Qvalue.\n");
MASK=!is.na(D[,18+exon_offset])
Q = qvalue(D[MASK,19+exon_offset]);
D$qval = NA;
D$qval[MASK] = Q$qvalue;
cat(" * Proportion of significant phenotypes =" , round((1 - Q$pi0) * 100, 2), "%\n")
#Determine significance threshold
cat("\nDetermine significance thresholds\n");
set0 = D[which(D$qval <= opt_fdr),]
set1 = D[which(D$qval > opt_fdr),]
pthreshold = (sort(set1[,19+exon_offset])[1] - sort(-1.0 * set0[,19+exon_offset])[1]) / 2
cat(" * Corrected p-value threshold = ", pthreshold, "\n")
pval0 = qbeta(pthreshold, D[,14+exon_offset], D[,15+exon_offset], ncp = 0, lower.tail = TRUE, log.p = FALSE)
test0 = qf(pval0, 1, D[,13+exon_offset], ncp = 0, lower.tail = FALSE, log.p = FALSE)
corr0 = sqrt(test0 / (D[,13+exon_offset] + test0))
test1 = D[,12+exon_offset] * corr0 * corr0 / (1 - corr0 * corr0)
pval1 = pf(test1, 1, D[,12+exon_offset], ncp = 0, lower.tail = FALSE, log.p = FALSE)
cat(" * pval0 = ", mean(pval0), " +/- ", sd(pval0), "\n")
cat(" * test0 = ", mean(test0), " +/- ", sd(test0), "\n")
cat(" * corr0 = ", mean(corr0), " +/- ", sd(corr0), "\n")
cat(" * test1 = ", mean(test1), " +/- ", sd(test1), "\n")
cat(" * pval1 = ", mean(pval1), " +/- ", sd(pval1), "\n")
D$nthresholds = pval1
#Write significant hits
fout1=paste(opt_output, "significant.txt", sep=".")
cat("\nWrite significant hits in [", fout1, "]\n");
write.table(D[D$qval <= opt_fdr,], fout1, quote=FALSE, row.names=FALSE, col.names=FALSE)
#Write thresholds
fout2=paste(opt_output, "thresholds.txt", sep=".")
cat("\nWrite nominal thresholds in [", fout2, "]\n");
D1=D[, c(1, 21+exon_offset)]
D2=Dnas[, c(1,18)]
names(D2)=names(D1)
D3=rbind(D1, D2)
write.table(D3, fout2, quote=FALSE, row.names=FALSE, col.names=FALSE)