Skip to content

Commit

Permalink
Resolved branch conflicts
Browse files Browse the repository at this point in the history
From: Leonardo Collado Torres <lcollado@jhsph.edu>

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/sva@130117 bc3139a8-67e5-0310-9ffc-ced21a209358
  • Loading branch information
lcolladotor committed Aug 27, 2018
1 parent fe27681 commit e6b80f6
Show file tree
Hide file tree
Showing 35 changed files with 1,513 additions and 1,527 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@ Suggests:
License: Artistic-2.0
biocViews: Microarray, StatisticalMethod, Preprocessing,
MultipleComparison, Sequencing, RNASeq, BatchEffect, Normalization
RoxygenNote: 5.0.1
RoxygenNote: 6.0.1
8 changes: 3 additions & 5 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
# Generated by roxygen2: do not edit by hand

import(mgcv)
import(BiocParallel)
import(genefilter)
import(matrixStats)

export(ComBat)
export(empirical.controls)
export(f.pvalue)
Expand All @@ -20,4 +15,7 @@ export(sva)
export(sva.check)
export(svaseq)
export(twostepsva.build)
import(genefilter)
import(matrixStats)
import(mgcv)
useDynLib(sva, .registration = TRUE)
74 changes: 37 additions & 37 deletions R/empirical.controls.R
Original file line number Diff line number Diff line change
@@ -1,38 +1,38 @@
#' A function for estimating the probability that each gene is an empirical control
#'
#' This function uses the iteratively reweighted surrogate variable analysis approach
#' to estimate the probability that each gene is an empirical control.
#'
#' @param dat The transformed data matrix with the variables in rows and samples in columns
#' @param mod The model matrix being used to fit the data
#' @param mod0 The null model being compared when fitting the data
#' @param n.sv The number of surogate variables to estimate
#' @param B The number of iterations of the irwsva algorithm to perform
#' @param type If type is norm then standard irwsva is applied, if type is counts, then the moderated log transform is applied first
#'
#' @return pcontrol A vector of probabilites that each gene is a control.
#'
#' @examples
#' library(bladderbatch)
#' data(bladderdata)
#' dat <- bladderEset[1:5000,]
#'
#' pheno = pData(dat)
#' edata = exprs(dat)
#' mod = model.matrix(~as.factor(cancer), data=pheno)
#'
#' n.sv = num.sv(edata,mod,method="leek")
#' pcontrol <- empirical.controls(edata,mod,mod0=NULL,n.sv=n.sv,type="norm")
#'
#' @export
#'

empirical.controls <- function(dat, mod, mod0 = NULL,n.sv,B=5,type=c("norm","counts")) {
type = match.arg(type)
if((type=="counts") & any(dat < 0)){stop("empirical controls error: counts must be zero or greater")}
if(type=="counts"){dat = log(dat + 1)}
svobj = irwsva.build(dat, mod, mod0 = NULL,n.sv,B=5)
pcontrol = svobj$pprob.gam
return(pcontrol)

#' A function for estimating the probability that each gene is an empirical control
#'
#' This function uses the iteratively reweighted surrogate variable analysis approach
#' to estimate the probability that each gene is an empirical control.
#'
#' @param dat The transformed data matrix with the variables in rows and samples in columns
#' @param mod The model matrix being used to fit the data
#' @param mod0 The null model being compared when fitting the data
#' @param n.sv The number of surogate variables to estimate
#' @param B The number of iterations of the irwsva algorithm to perform
#' @param type If type is norm then standard irwsva is applied, if type is counts, then the moderated log transform is applied first
#'
#' @return pcontrol A vector of probabilites that each gene is a control.
#'
#' @examples
#' library(bladderbatch)
#' data(bladderdata)
#' dat <- bladderEset[1:5000,]
#'
#' pheno = pData(dat)
#' edata = exprs(dat)
#' mod = model.matrix(~as.factor(cancer), data=pheno)
#'
#' n.sv = num.sv(edata,mod,method="leek")
#' pcontrol <- empirical.controls(edata,mod,mod0=NULL,n.sv=n.sv,type="norm")
#'
#' @export
#'

empirical.controls <- function(dat, mod, mod0 = NULL,n.sv,B=5,type=c("norm","counts")) {
type = match.arg(type)
if((type=="counts") & any(dat < 0)){stop("empirical controls error: counts must be zero or greater")}
if(type=="counts"){dat = log(dat + 1)}
svobj = irwsva.build(dat, mod, mod0 = NULL,n.sv,B=5)
pcontrol = svobj$pprob.gam
return(pcontrol)

}
99 changes: 50 additions & 49 deletions R/f.pvalue.R
Original file line number Diff line number Diff line change
@@ -1,49 +1,50 @@
#' A function for quickly calculating f statistic p-values for use in sva
#'
#' This function does simple linear algebra to calculate f-statistics
#' for each row of a data matrix comparing the nested models
#' defined by the design matrices for the alternative (mod) and and null (mod0) cases.
#' The columns of mod0 must be a subset of the columns of mod.
#'
#' @param dat The transformed data matrix with the variables in rows and samples in columns
#' @param mod The model matrix being used to fit the data
#' @param mod0 The null model being compared when fitting the data
#'
#' @return p A vector of F-statistic p-values one for each row of dat.
#'
#' @examples
#' library(bladderbatch)
#' data(bladderdata)
#' dat <- bladderEset[1:50,]
#'
#' pheno = pData(dat)
#' edata = exprs(dat)
#' mod = model.matrix(~as.factor(cancer), data=pheno)
#' mod0 = model.matrix(~1,data=pheno)
#'
#' pValues = f.pvalue(edata,mod,mod0)
#' qValues = p.adjust(pValues,method="BH")
#'
#' @export
#'

f.pvalue <- function(dat,mod,mod0){
n <- dim(dat)[2]
m <- dim(dat)[1]
df1 <- dim(mod)[2]
df0 <- dim(mod0)[2]
p <- rep(0,m)
Id <- diag(n)

resid <- dat %*% (Id - mod %*% solve(t(mod) %*% mod) %*% t(mod))
rss1 <- rowSums(resid*resid)
rm(resid)

resid0 <- dat %*% (Id - mod0 %*% solve(t(mod0) %*% mod0) %*% t(mod0))
rss0 <- rowSums(resid0*resid0)
rm(resid0)

fstats <- ((rss0 - rss1)/(df1-df0))/(rss1/(n-df1))
p <- 1-pf(fstats,df1=(df1-df0),df2=(n-df1))
return(p)
}

#' A function for quickly calculating f statistic p-values for use in sva
#'
#' This function does simple linear algebra to calculate f-statistics
#' for each row of a data matrix comparing the nested models
#' defined by the design matrices for the alternative (mod) and and null (mod0) cases.
#' The columns of mod0 must be a subset of the columns of mod.
#'
#' @param dat The transformed data matrix with the variables in rows and samples in columns
#' @param mod The model matrix being used to fit the data
#' @param mod0 The null model being compared when fitting the data
#'
#' @return p A vector of F-statistic p-values one for each row of dat.
#'
#' @examples
#' library(bladderbatch)
#' data(bladderdata)
#' dat <- bladderEset[1:50,]
#'
#' pheno = pData(dat)
#' edata = exprs(dat)
#' mod = model.matrix(~as.factor(cancer), data=pheno)
#' mod0 = model.matrix(~1,data=pheno)
#'
#' pValues = f.pvalue(edata,mod,mod0)
#' qValues = p.adjust(pValues,method="BH")
#'
#' @export
#'

f.pvalue <- function(dat,mod,mod0){
n <- dim(dat)[2]
m <- dim(dat)[1]
df1 <- dim(mod)[2]
df0 <- dim(mod0)[2]
p <- rep(0,m)
Id <- diag(n)

resid <- dat %*% (Id - mod %*% solve(t(mod) %*% mod) %*% t(mod))
rss1 <- rowSums(resid*resid)
rm(resid)

resid0 <- dat %*% (Id - mod0 %*% solve(t(mod0) %*% mod0) %*% t(mod0))
rss0 <- rowSums(resid0*resid0)
rm(resid0)

fstats <- ((rss0 - rss1)/(df1-df0))/(rss1/(n-df1))
p <- 1-pf(fstats,df1=(df1-df0),df2=(n-df1))
return(p)
}
98 changes: 49 additions & 49 deletions R/fstats.R
Original file line number Diff line number Diff line change
@@ -1,49 +1,49 @@
#' A function for quickly calculating f statistics for use in sva
#'
#' This function does simple linear algebra to calculate f-statistics
#' for each row of a data matrix comparing the nested models
#' defined by the design matrices for the alternative (mod) and and null (mod0) cases.
#' The columns of mod0 must be a subset of the columns of mod.
#'
#' @param dat The transformed data matrix with the variables in rows and samples in columns
#' @param mod The model matrix being used to fit the data
#' @param mod0 The null model being compared when fitting the data
#'
#' @return fstats A vector of F-statistics one for each row of dat.
#'
#' @examples
#' library(bladderbatch)
#' data(bladderdata)
#' dat <- bladderEset[1:50,]
#'
#' pheno = pData(dat)
#' edata = exprs(dat)
#' mod = model.matrix(~as.factor(cancer), data=pheno)
#' mod0 = model.matrix(~1,data=pheno)
#'
#' fs <- fstats(edata, mod, mod0)
#'
#' @export
#'


fstats <- function(dat,mod,mod0){
# A function for calculating F-statistics
# on the rows of dat, comparing the models
# mod (alternative) and mod0 (null).
n <- dim(dat)[2]
m <- dim(dat)[1]
df1 <- dim(mod)[2]
df0 <- dim(mod0)[2]
p <- rep(0,m)
Id <- diag(n)

resid <- dat %*% (Id - mod %*% solve(t(mod) %*% mod) %*% t(mod))
resid0 <- dat %*% (Id - mod0 %*% solve(t(mod0) %*% mod0) %*% t(mod0))

rss1 <- (resid*resid) %*% rep(1,n)
rss0 <- (resid0*resid0) %*% rep(1,n)

fstats <- ((rss0 - rss1)/(df1-df0))/(rss1/(n-df1))
return(fstats)
}
#' A function for quickly calculating f statistics for use in sva
#'
#' This function does simple linear algebra to calculate f-statistics
#' for each row of a data matrix comparing the nested models
#' defined by the design matrices for the alternative (mod) and and null (mod0) cases.
#' The columns of mod0 must be a subset of the columns of mod.
#'
#' @param dat The transformed data matrix with the variables in rows and samples in columns
#' @param mod The model matrix being used to fit the data
#' @param mod0 The null model being compared when fitting the data
#'
#' @return fstats A vector of F-statistics one for each row of dat.
#'
#' @examples
#' library(bladderbatch)
#' data(bladderdata)
#' dat <- bladderEset[1:50,]
#'
#' pheno = pData(dat)
#' edata = exprs(dat)
#' mod = model.matrix(~as.factor(cancer), data=pheno)
#' mod0 = model.matrix(~1,data=pheno)
#'
#' fs <- fstats(edata, mod, mod0)
#'
#' @export
#'


fstats <- function(dat,mod,mod0){
# A function for calculating F-statistics
# on the rows of dat, comparing the models
# mod (alternative) and mod0 (null).
n <- dim(dat)[2]
m <- dim(dat)[1]
df1 <- dim(mod)[2]
df0 <- dim(mod0)[2]
p <- rep(0,m)
Id <- diag(n)

resid <- dat %*% (Id - mod %*% solve(t(mod) %*% mod) %*% t(mod))
resid0 <- dat %*% (Id - mod0 %*% solve(t(mod0) %*% mod0) %*% t(mod0))

rss1 <- (resid*resid) %*% rep(1,n)
rss0 <- (resid0*resid0) %*% rep(1,n)

fstats <- ((rss0 - rss1)/(df1-df0))/(rss1/(n-df1))
return(fstats)
}
Loading

0 comments on commit e6b80f6

Please sign in to comment.