Resolved branch conflicts

From: Leonardo Collado Torres <lcollado@jhsph.edu> git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/sva@130117 bc3139a8-67e5-0310-9ffc-ced21a209358
jtleek · Aug 27, 2018 · e6b80f6 · e6b80f6
1 parent fe27681
commit e6b80f6
Show file tree

Hide file tree

Showing 35 changed files with 1,513 additions and 1,527 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -44,4 +44,4 @@ Suggests:
 License: Artistic-2.0
 biocViews: Microarray, StatisticalMethod, Preprocessing,
     MultipleComparison, Sequencing, RNASeq, BatchEffect, Normalization
-RoxygenNote: 5.0.1
+RoxygenNote: 6.0.1
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,10 +1,5 @@
 # Generated by roxygen2: do not edit by hand
 
-import(mgcv)
-import(BiocParallel)
-import(genefilter)
-import(matrixStats)
-
 export(ComBat)
 export(empirical.controls)
 export(f.pvalue)
@@ -20,4 +15,7 @@ export(sva)
 export(sva.check)
 export(svaseq)
 export(twostepsva.build)
+import(genefilter)
+import(matrixStats)
+import(mgcv)
 useDynLib(sva, .registration = TRUE)
diff --git a/R/empirical.controls.R b/R/empirical.controls.R
@@ -1,38 +1,38 @@
-#' A function for estimating the probability that each gene is an empirical control 
-#' 
-#' This function uses the iteratively reweighted surrogate variable analysis approach
-#' to estimate the probability that each gene is an empirical control. 
-#' 
-#' @param dat The transformed data matrix with the variables in rows and samples in columns
-#' @param mod The model matrix being used to fit the data
-#' @param mod0 The null model being compared when fitting the data
-#' @param n.sv The number of surogate variables to estimate
-#' @param B The number of iterations of the irwsva algorithm to perform
-#' @param type If type is norm then standard irwsva is applied, if type is counts, then the moderated log transform is applied first
-#' 
-#' @return pcontrol A vector of probabilites that each gene is a control. 
-#' 
-#' @examples 
-#' library(bladderbatch)
-#' data(bladderdata)
-#' dat <- bladderEset[1:5000,]
-#' 
-#' pheno = pData(dat)
-#' edata = exprs(dat)
-#' mod = model.matrix(~as.factor(cancer), data=pheno)
-#' 
-#' n.sv = num.sv(edata,mod,method="leek")
-#' pcontrol <- empirical.controls(edata,mod,mod0=NULL,n.sv=n.sv,type="norm")
-#' 
-#' @export
-#' 
-
-empirical.controls <- function(dat, mod, mod0 = NULL,n.sv,B=5,type=c("norm","counts")) {
-  type = match.arg(type)
-  if((type=="counts") & any(dat < 0)){stop("empirical controls error: counts must be zero or greater")}
-  if(type=="counts"){dat = log(dat + 1)}
-  svobj = irwsva.build(dat, mod, mod0 = NULL,n.sv,B=5) 
-  pcontrol = svobj$pprob.gam
-  return(pcontrol)
-
+#' A function for estimating the probability that each gene is an empirical control 
+#' 
+#' This function uses the iteratively reweighted surrogate variable analysis approach
+#' to estimate the probability that each gene is an empirical control. 
+#' 
+#' @param dat The transformed data matrix with the variables in rows and samples in columns
+#' @param mod The model matrix being used to fit the data
+#' @param mod0 The null model being compared when fitting the data
+#' @param n.sv The number of surogate variables to estimate
+#' @param B The number of iterations of the irwsva algorithm to perform
+#' @param type If type is norm then standard irwsva is applied, if type is counts, then the moderated log transform is applied first
+#' 
+#' @return pcontrol A vector of probabilites that each gene is a control. 
+#' 
+#' @examples 
+#' library(bladderbatch)
+#' data(bladderdata)
+#' dat <- bladderEset[1:5000,]
+#' 
+#' pheno = pData(dat)
+#' edata = exprs(dat)
+#' mod = model.matrix(~as.factor(cancer), data=pheno)
+#' 
+#' n.sv = num.sv(edata,mod,method="leek")
+#' pcontrol <- empirical.controls(edata,mod,mod0=NULL,n.sv=n.sv,type="norm")
+#' 
+#' @export
+#' 
+
+empirical.controls <- function(dat, mod, mod0 = NULL,n.sv,B=5,type=c("norm","counts")) {
+  type = match.arg(type)
+  if((type=="counts") & any(dat < 0)){stop("empirical controls error: counts must be zero or greater")}
+  if(type=="counts"){dat = log(dat + 1)}
+  svobj = irwsva.build(dat, mod, mod0 = NULL,n.sv,B=5) 
+  pcontrol = svobj$pprob.gam
+  return(pcontrol)
+
 }
diff --git a/R/f.pvalue.R b/R/f.pvalue.R
@@ -1,49 +1,50 @@
-#' A function for quickly calculating f statistic p-values for use in sva
-#' 
-#' This function does simple linear algebra to calculate f-statistics
-#' for each row of a data matrix comparing the nested models
-#' defined by the design matrices for the alternative (mod) and and null (mod0) cases.
-#' The columns of mod0 must be a subset of the columns of mod.  
-#' 
-#' @param dat The transformed data matrix with the variables in rows and samples in columns
-#' @param mod The model matrix being used to fit the data
-#' @param mod0 The null model being compared when fitting the data
-#' 
-#' @return p A vector of F-statistic p-values one for each row of dat. 
-#' 
-#' @examples 
-#' library(bladderbatch)
-#' data(bladderdata)
-#' dat <- bladderEset[1:50,]
-#' 
-#' pheno = pData(dat)
-#' edata = exprs(dat)
-#' mod = model.matrix(~as.factor(cancer), data=pheno)
-#' mod0 = model.matrix(~1,data=pheno)
-#' 
-#' pValues = f.pvalue(edata,mod,mod0)
-#' qValues = p.adjust(pValues,method="BH")
-#' 
-#' @export
-#' 
-
-f.pvalue <- function(dat,mod,mod0){
-  n <- dim(dat)[2]
-  m <- dim(dat)[1]
-  df1 <- dim(mod)[2]
-  df0 <- dim(mod0)[2]
-  p <- rep(0,m)
-  Id <- diag(n)
-
-  resid <- dat %*% (Id - mod %*% solve(t(mod) %*% mod) %*% t(mod))
-  rss1 <- rowSums(resid*resid)
-  rm(resid)
-
-  resid0 <- dat %*% (Id - mod0 %*% solve(t(mod0) %*% mod0) %*% t(mod0))
-  rss0 <- rowSums(resid0*resid0)
-  rm(resid0)
-
-  fstats <- ((rss0 - rss1)/(df1-df0))/(rss1/(n-df1))
-  p <-  1-pf(fstats,df1=(df1-df0),df2=(n-df1))
-  return(p)
-}
+
+#' A function for quickly calculating f statistic p-values for use in sva
+#' 
+#' This function does simple linear algebra to calculate f-statistics
+#' for each row of a data matrix comparing the nested models
+#' defined by the design matrices for the alternative (mod) and and null (mod0) cases.
+#' The columns of mod0 must be a subset of the columns of mod.  
+#' 
+#' @param dat The transformed data matrix with the variables in rows and samples in columns
+#' @param mod The model matrix being used to fit the data
+#' @param mod0 The null model being compared when fitting the data
+#' 
+#' @return p A vector of F-statistic p-values one for each row of dat. 
+#' 
+#' @examples 
+#' library(bladderbatch)
+#' data(bladderdata)
+#' dat <- bladderEset[1:50,]
+#' 
+#' pheno = pData(dat)
+#' edata = exprs(dat)
+#' mod = model.matrix(~as.factor(cancer), data=pheno)
+#' mod0 = model.matrix(~1,data=pheno)
+#' 
+#' pValues = f.pvalue(edata,mod,mod0)
+#' qValues = p.adjust(pValues,method="BH")
+#' 
+#' @export
+#' 
+
+f.pvalue <- function(dat,mod,mod0){
+  n <- dim(dat)[2]
+  m <- dim(dat)[1]
+  df1 <- dim(mod)[2]
+  df0 <- dim(mod0)[2]
+  p <- rep(0,m)
+  Id <- diag(n)
+
+  resid <- dat %*% (Id - mod %*% solve(t(mod) %*% mod) %*% t(mod))
+  rss1 <- rowSums(resid*resid)
+  rm(resid)
+
+  resid0 <- dat %*% (Id - mod0 %*% solve(t(mod0) %*% mod0) %*% t(mod0))
+  rss0 <- rowSums(resid0*resid0)
+  rm(resid0)
+
+  fstats <- ((rss0 - rss1)/(df1-df0))/(rss1/(n-df1))
+  p <-  1-pf(fstats,df1=(df1-df0),df2=(n-df1))
+  return(p)
+}
diff --git a/R/fstats.R b/R/fstats.R
@@ -1,49 +1,49 @@
-#' A function for quickly calculating f statistics for use in sva
-#' 
-#' This function does simple linear algebra to calculate f-statistics
-#' for each row of a data matrix comparing the nested models
-#' defined by the design matrices for the alternative (mod) and and null (mod0) cases.
-#' The columns of mod0 must be a subset of the columns of mod.  
-#' 
-#' @param dat The transformed data matrix with the variables in rows and samples in columns
-#' @param mod The model matrix being used to fit the data
-#' @param mod0 The null model being compared when fitting the data
-#' 
-#' @return fstats A vector of F-statistics one for each row of dat. 
-#' 
-#' @examples 
-#' library(bladderbatch)
-#' data(bladderdata)
-#' dat <- bladderEset[1:50,]
-#' 
-#' pheno = pData(dat)
-#' edata = exprs(dat)
-#' mod = model.matrix(~as.factor(cancer), data=pheno)
-#' mod0 = model.matrix(~1,data=pheno)
-#' 
-#' fs <- fstats(edata, mod, mod0)
-#' 
-#' @export
-#' 
-
-
-fstats <- function(dat,mod,mod0){
-  # A function for calculating F-statistics
-  # on the rows of dat, comparing the models
-  # mod (alternative) and mod0 (null). 
-  n <- dim(dat)[2]
-  m <- dim(dat)[1]
-  df1 <- dim(mod)[2]
-  df0 <- dim(mod0)[2]
-  p <- rep(0,m)
-  Id <- diag(n)
-
-  resid <- dat %*% (Id - mod %*% solve(t(mod) %*% mod) %*% t(mod))
-  resid0 <- dat %*% (Id - mod0 %*% solve(t(mod0) %*% mod0) %*% t(mod0))
-
-  rss1 <- (resid*resid) %*% rep(1,n)
-  rss0 <- (resid0*resid0) %*% rep(1,n)
-
-  fstats <- ((rss0 - rss1)/(df1-df0))/(rss1/(n-df1))
-  return(fstats)
-}
+#' A function for quickly calculating f statistics for use in sva
+#' 
+#' This function does simple linear algebra to calculate f-statistics
+#' for each row of a data matrix comparing the nested models
+#' defined by the design matrices for the alternative (mod) and and null (mod0) cases.
+#' The columns of mod0 must be a subset of the columns of mod.  
+#' 
+#' @param dat The transformed data matrix with the variables in rows and samples in columns
+#' @param mod The model matrix being used to fit the data
+#' @param mod0 The null model being compared when fitting the data
+#' 
+#' @return fstats A vector of F-statistics one for each row of dat. 
+#' 
+#' @examples 
+#' library(bladderbatch)
+#' data(bladderdata)
+#' dat <- bladderEset[1:50,]
+#' 
+#' pheno = pData(dat)
+#' edata = exprs(dat)
+#' mod = model.matrix(~as.factor(cancer), data=pheno)
+#' mod0 = model.matrix(~1,data=pheno)
+#' 
+#' fs <- fstats(edata, mod, mod0)
+#' 
+#' @export
+#' 
+
+
+fstats <- function(dat,mod,mod0){
+  # A function for calculating F-statistics
+  # on the rows of dat, comparing the models
+  # mod (alternative) and mod0 (null). 
+  n <- dim(dat)[2]
+  m <- dim(dat)[1]
+  df1 <- dim(mod)[2]
+  df0 <- dim(mod0)[2]
+  p <- rep(0,m)
+  Id <- diag(n)
+
+  resid <- dat %*% (Id - mod %*% solve(t(mod) %*% mod) %*% t(mod))
+  resid0 <- dat %*% (Id - mod0 %*% solve(t(mod0) %*% mod0) %*% t(mod0))
+
+  rss1 <- (resid*resid) %*% rep(1,n)
+  rss0 <- (resid0*resid0) %*% rep(1,n)
+
+  fstats <- ((rss0 - rss1)/(df1-df0))/(rss1/(n-df1))
+  return(fstats)
+}