Fixed some warnings, incremented to 0.99.3

leekgroup · Dec 11, 2016 · 38a712d · 38a712d
1 parent 0077a2f
commit 38a712d
Show file tree

Hide file tree

Showing 12 changed files with 60 additions and 39 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: swfdr
 Title: Science-wise false discovery rate estimation
-Version: 0.99.2
+Version: 0.99.3
 Author: Jeffrey T. Leek, Simina M. Boca
 Maintainer: Simina M. Boca <smb310@georgetown.edu>, Jeffrey T. Leek <jtleek@gmail.com>
 Description: This package allows users to estimate the science-wise false discovery rate from Jager and Leek, 
@@ -11,7 +11,7 @@ Description: This package allows users to estimate the science-wise false discov
 Depends:
     R (>= 3.4)
 Imports:
-    stats4, ggplot2, reshape2
+    stats4, ggplot2, reshape2, stats, dplyr
 License: GPL (>= 3)
 Encoding: UTF-8
 LazyData: true

diff --git a/NAMESPACE b/NAMESPACE
@@ -2,6 +2,7 @@
 
 export(calculateSwfdr)
 export(lm_pi0)
+import(dplyr)
 import(ggplot2)
 import(reshape2)
 import(stats4)

diff --git a/R/BMI_GIANT_GWAS_sample-data.R b/R/BMI_GIANT_GWAS_sample-data.R
@@ -5,7 +5,9 @@
 #' @docType data
 #'
 #' @usage data(BMI_GIANT_GWAS_sample)
-#'   
+#'
+#' @return Object of class tbl_df, tbl, data.frame.
+#'         
 #' @format A data frame with 50,000 rows and 9 variables:
 #' \describe{
 #'   \item{SNP}{ID for SNP (single nucleotide polymorphism)}
@@ -18,7 +20,7 @@
 #'   \item{N}{Total sample size considered for association of SNP and BMI}
 #'   \item{Freq_MAF_Int_Hapmap}{Three approximately equal intervals for the Hapmap MAFs}
 #' }
-#' 
+#'
 #' @keywords datasets
 #' 
 #' @source \url{https://www.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files#GWAS_Anthropometric_2015_BMI}

diff --git a/R/calculateSwfdr.R b/R/calculateSwfdr.R
@@ -16,6 +16,7 @@
 #' @return n Number of rounded p-values between certain cutpoints (0.005, 0.015, 0.025, 0.035, 0.045, 0.05)
 #' 
 #' @import stats4
+#' @import dplyr
 #' @importFrom stats dbeta lsfit pbeta smooth.spline
 #' 
 #' @examples

diff --git a/R/journals_pVals-data.R b/R/journals_pVals-data.R
@@ -4,9 +4,11 @@
 #'
 #' @docType data
 #'
-#' @usage data(journals_pVals)
+#' @usage journals_pVals
 #'
-#' @format A data frame with 15,653 rows and 7 variables:
+#' @return Object of class tbl_df, tbl, data.frame.
+#' 
+#' @format A tbl data frame with 15,653 rows and 7 variables:
 #' \describe{
 #'   \item{pvalue}{P-value}
 #'   \item{pvalueTruncated}{Equals to 1 if the p-value is truncated, 0 otherwise}

diff --git a/data/journals_pVals.RData b/data/journals_pVals.RData
diff --git a/inst/doc/swfdrTutorial.R b/inst/doc/swfdrTutorial.R
@@ -2,32 +2,35 @@
 library(swfdr)
 
 ## ------------------------------------------------------------------------
-data(journals_pVals)
 colnames(journals_pVals)
 
 ## ------------------------------------------------------------------------
 table(journals_pVals$year)
 table(journals_pVals$journal)
 
 ## ------------------------------------------------------------------------
-journals_pVals1 <- journals_pVals[journals_pVals$year==2005 & 
-                                    journals_pVals$journal == "American Journal of Epidemiology" &
-                                    journals_pVals$pvalue < 0.05,]
+journals_pVals1 <- dplyr::filter(journals_pVals,
+                                 year == 2005,
+                                 journal == "American Journal of Epidemiology",
+                                 pvalue < 0.05)
+
 dim(journals_pVals1)
 
 ## ------------------------------------------------------------------------
-tt <- journals_pVals1[,2]
+tt <- data.frame(journals_pVals1)[,2]
 rr <- rep(0,length(tt))
-rr[tt == 0] <- (journals_pVals1[tt==0,1] == round(journals_pVals1[tt==0,1],2))
-pVals <- journals_pVals1[,1]
-resSwfdr <- calculateSwfdr(pValues = pVals, truncated = tt, rounded = rr, numEmIterations=100)
+rr[tt == 0] <- (data.frame(journals_pVals1)[tt==0,1] == 
+                  round(data.frame(journals_pVals1)[tt==0,1],2))
+pVals <- data.frame(journals_pVals1)[,1]
+resSwfdr <- calculateSwfdr(pValues = pVals, 
+                           truncated = tt, 
+                           rounded = rr, numEmIterations=100)
 names(resSwfdr)
 
 ## ------------------------------------------------------------------------
 resSwfdr
 
 ## ------------------------------------------------------------------------
-data(BMI_GIANT_GWAS_sample)
 head(BMI_GIANT_GWAS_sample)
 dim(BMI_GIANT_GWAS_sample)
 

diff --git a/inst/doc/swfdrTutorial.Rmd b/inst/doc/swfdrTutorial.Rmd
@@ -26,9 +26,8 @@ The science-wise false discovery rate (swfdr) is defined in @JagerEtAl2013 as th
 ### Example: Estimate the swfdr based on p-values from biomedical journals
 
 We include a dataset containing 15,653 p-values from articles in 5 biomedical journals (American Journal of Epidemiology, BMJ, Jama, Lancet, New England Journal of Medicine), over 11 years (2000-2010).
-This is obtained from web-scraping, using the code at \url{https://github.com/jtleek/swfdr/blob/master/getPvalues.R} and can be loaded via:
+This is obtained from web-scraping, using the code at \url{https://github.com/jtleek/swfdr/blob/master/getPvalues.R} and is already loaded in the package.
 ```{r}
-data(journals_pVals)
 colnames(journals_pVals)
 ```
 
@@ -52,19 +51,24 @@ This function estimates the swfdr. It inputs the following parameters:
 
 Given that it runs an EM algorithm, it is somewhat computationally intensive. We show an example of applying it to all the p-values from the abstracts for articles published in the American Journal of Epidemiology in 2015. First, we subset the `journals_pVals` and only consider the p-values below $0.05$, as in @JagerEtAl2013:
 ```{r}
-journals_pVals1 <- journals_pVals[journals_pVals$year==2005 & 
-                                    journals_pVals$journal == "American Journal of Epidemiology" &
-                                    journals_pVals$pvalue < 0.05,]
+journals_pVals1 <- dplyr::filter(journals_pVals,
+                                 year == 2005,
+                                 journal == "American Journal of Epidemiology",
+                                 pvalue < 0.05)
+
 dim(journals_pVals1)
 ```
 
 Next, we define vectors corresponding to the truncation status and the rouding status (defined as rounding to 2 significant digits) and use these vectors, along with the vector of p-values, and the number of EM iterations, as inputs to the `calculateSwfdr` function:
 ```{r}
-tt <- journals_pVals1[,2]
+tt <- data.frame(journals_pVals1)[,2]
 rr <- rep(0,length(tt))
-rr[tt == 0] <- (journals_pVals1[tt==0,1] == round(journals_pVals1[tt==0,1],2))
-pVals <- journals_pVals1[,1]
-resSwfdr <- calculateSwfdr(pValues = pVals, truncated = tt, rounded = rr, numEmIterations=100)
+rr[tt == 0] <- (data.frame(journals_pVals1)[tt==0,1] == 
+                  round(data.frame(journals_pVals1)[tt==0,1],2))
+pVals <- data.frame(journals_pVals1)[,1]
+resSwfdr <- calculateSwfdr(pValues = pVals, 
+                           truncated = tt, 
+                           rounded = rr, numEmIterations=100)
 names(resSwfdr)
 ```
 
@@ -97,9 +101,8 @@ prior probability that a hypothesis is true or false.
 
 We consider an example from the meta-analysis of data from a genome-wide association study (GWAS) for
 body mass index (BMI) from @LockeEtAl2015. A subset of this data, corresponding to 50,000 
-single nucleotide polymorphisms (SNPs), can be loaded using:
+single nucleotide polymorphisms (SNPs) is already loaded with the package.
 ```{r}
-data(BMI_GIANT_GWAS_sample)
 head(BMI_GIANT_GWAS_sample)
 dim(BMI_GIANT_GWAS_sample)
 ```

diff --git a/inst/doc/swfdrTutorial.pdf b/inst/doc/swfdrTutorial.pdf
diff --git a/man/BMI_GIANT_GWAS_sample.Rd b/man/BMI_GIANT_GWAS_sample.Rd
diff --git a/man/journals_pVals.Rd b/man/journals_pVals.Rd
diff --git a/vignettes/swfdrTutorial.Rmd b/vignettes/swfdrTutorial.Rmd
@@ -26,9 +26,8 @@ The science-wise false discovery rate (swfdr) is defined in @JagerEtAl2013 as th
 ### Example: Estimate the swfdr based on p-values from biomedical journals
 
 We include a dataset containing 15,653 p-values from articles in 5 biomedical journals (American Journal of Epidemiology, BMJ, Jama, Lancet, New England Journal of Medicine), over 11 years (2000-2010).
-This is obtained from web-scraping, using the code at \url{https://github.com/jtleek/swfdr/blob/master/getPvalues.R} and can be loaded via:
+This is obtained from web-scraping, using the code at \url{https://github.com/jtleek/swfdr/blob/master/getPvalues.R} and is already loaded in the package.
 ```{r}
-data(journals_pVals)
 colnames(journals_pVals)
 ```
 
@@ -52,19 +51,24 @@ This function estimates the swfdr. It inputs the following parameters:
 
 Given that it runs an EM algorithm, it is somewhat computationally intensive. We show an example of applying it to all the p-values from the abstracts for articles published in the American Journal of Epidemiology in 2015. First, we subset the `journals_pVals` and only consider the p-values below $0.05$, as in @JagerEtAl2013:
 ```{r}
-journals_pVals1 <- journals_pVals[journals_pVals$year==2005 & 
-                                    journals_pVals$journal == "American Journal of Epidemiology" &
-                                    journals_pVals$pvalue < 0.05,]
+journals_pVals1 <- dplyr::filter(journals_pVals,
+                                 year == 2005,
+                                 journal == "American Journal of Epidemiology",
+                                 pvalue < 0.05)
+
 dim(journals_pVals1)
 ```
 
 Next, we define vectors corresponding to the truncation status and the rouding status (defined as rounding to 2 significant digits) and use these vectors, along with the vector of p-values, and the number of EM iterations, as inputs to the `calculateSwfdr` function:
 ```{r}
-tt <- journals_pVals1[,2]
+tt <- data.frame(journals_pVals1)[,2]
 rr <- rep(0,length(tt))
-rr[tt == 0] <- (journals_pVals1[tt==0,1] == round(journals_pVals1[tt==0,1],2))
-pVals <- journals_pVals1[,1]
-resSwfdr <- calculateSwfdr(pValues = pVals, truncated = tt, rounded = rr, numEmIterations=100)
+rr[tt == 0] <- (data.frame(journals_pVals1)[tt==0,1] == 
+                  round(data.frame(journals_pVals1)[tt==0,1],2))
+pVals <- data.frame(journals_pVals1)[,1]
+resSwfdr <- calculateSwfdr(pValues = pVals, 
+                           truncated = tt, 
+                           rounded = rr, numEmIterations=100)
 names(resSwfdr)
 ```
 
@@ -97,9 +101,8 @@ prior probability that a hypothesis is true or false.
 
 We consider an example from the meta-analysis of data from a genome-wide association study (GWAS) for
 body mass index (BMI) from @LockeEtAl2015. A subset of this data, corresponding to 50,000 
-single nucleotide polymorphisms (SNPs), can be loaded using:
+single nucleotide polymorphisms (SNPs) is already loaded with the package.
 ```{r}
-data(BMI_GIANT_GWAS_sample)
 head(BMI_GIANT_GWAS_sample)
 dim(BMI_GIANT_GWAS_sample)
 ```