-
Notifications
You must be signed in to change notification settings - Fork 46
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
From: Leonardo Collado Torres <lcollado@jhsph.edu> git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/sva@130117 bc3139a8-67e5-0310-9ffc-ced21a209358
- Loading branch information
1 parent
fe27681
commit e6b80f6
Showing
35 changed files
with
1,513 additions
and
1,527 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,38 +1,38 @@ | ||
#' A function for estimating the probability that each gene is an empirical control | ||
#' | ||
#' This function uses the iteratively reweighted surrogate variable analysis approach | ||
#' to estimate the probability that each gene is an empirical control. | ||
#' | ||
#' @param dat The transformed data matrix with the variables in rows and samples in columns | ||
#' @param mod The model matrix being used to fit the data | ||
#' @param mod0 The null model being compared when fitting the data | ||
#' @param n.sv The number of surogate variables to estimate | ||
#' @param B The number of iterations of the irwsva algorithm to perform | ||
#' @param type If type is norm then standard irwsva is applied, if type is counts, then the moderated log transform is applied first | ||
#' | ||
#' @return pcontrol A vector of probabilites that each gene is a control. | ||
#' | ||
#' @examples | ||
#' library(bladderbatch) | ||
#' data(bladderdata) | ||
#' dat <- bladderEset[1:5000,] | ||
#' | ||
#' pheno = pData(dat) | ||
#' edata = exprs(dat) | ||
#' mod = model.matrix(~as.factor(cancer), data=pheno) | ||
#' | ||
#' n.sv = num.sv(edata,mod,method="leek") | ||
#' pcontrol <- empirical.controls(edata,mod,mod0=NULL,n.sv=n.sv,type="norm") | ||
#' | ||
#' @export | ||
#' | ||
|
||
empirical.controls <- function(dat, mod, mod0 = NULL,n.sv,B=5,type=c("norm","counts")) { | ||
type = match.arg(type) | ||
if((type=="counts") & any(dat < 0)){stop("empirical controls error: counts must be zero or greater")} | ||
if(type=="counts"){dat = log(dat + 1)} | ||
svobj = irwsva.build(dat, mod, mod0 = NULL,n.sv,B=5) | ||
pcontrol = svobj$pprob.gam | ||
return(pcontrol) | ||
|
||
#' A function for estimating the probability that each gene is an empirical control | ||
#' | ||
#' This function uses the iteratively reweighted surrogate variable analysis approach | ||
#' to estimate the probability that each gene is an empirical control. | ||
#' | ||
#' @param dat The transformed data matrix with the variables in rows and samples in columns | ||
#' @param mod The model matrix being used to fit the data | ||
#' @param mod0 The null model being compared when fitting the data | ||
#' @param n.sv The number of surogate variables to estimate | ||
#' @param B The number of iterations of the irwsva algorithm to perform | ||
#' @param type If type is norm then standard irwsva is applied, if type is counts, then the moderated log transform is applied first | ||
#' | ||
#' @return pcontrol A vector of probabilites that each gene is a control. | ||
#' | ||
#' @examples | ||
#' library(bladderbatch) | ||
#' data(bladderdata) | ||
#' dat <- bladderEset[1:5000,] | ||
#' | ||
#' pheno = pData(dat) | ||
#' edata = exprs(dat) | ||
#' mod = model.matrix(~as.factor(cancer), data=pheno) | ||
#' | ||
#' n.sv = num.sv(edata,mod,method="leek") | ||
#' pcontrol <- empirical.controls(edata,mod,mod0=NULL,n.sv=n.sv,type="norm") | ||
#' | ||
#' @export | ||
#' | ||
|
||
empirical.controls <- function(dat, mod, mod0 = NULL,n.sv,B=5,type=c("norm","counts")) { | ||
type = match.arg(type) | ||
if((type=="counts") & any(dat < 0)){stop("empirical controls error: counts must be zero or greater")} | ||
if(type=="counts"){dat = log(dat + 1)} | ||
svobj = irwsva.build(dat, mod, mod0 = NULL,n.sv,B=5) | ||
pcontrol = svobj$pprob.gam | ||
return(pcontrol) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,49 +1,50 @@ | ||
#' A function for quickly calculating f statistic p-values for use in sva | ||
#' | ||
#' This function does simple linear algebra to calculate f-statistics | ||
#' for each row of a data matrix comparing the nested models | ||
#' defined by the design matrices for the alternative (mod) and and null (mod0) cases. | ||
#' The columns of mod0 must be a subset of the columns of mod. | ||
#' | ||
#' @param dat The transformed data matrix with the variables in rows and samples in columns | ||
#' @param mod The model matrix being used to fit the data | ||
#' @param mod0 The null model being compared when fitting the data | ||
#' | ||
#' @return p A vector of F-statistic p-values one for each row of dat. | ||
#' | ||
#' @examples | ||
#' library(bladderbatch) | ||
#' data(bladderdata) | ||
#' dat <- bladderEset[1:50,] | ||
#' | ||
#' pheno = pData(dat) | ||
#' edata = exprs(dat) | ||
#' mod = model.matrix(~as.factor(cancer), data=pheno) | ||
#' mod0 = model.matrix(~1,data=pheno) | ||
#' | ||
#' pValues = f.pvalue(edata,mod,mod0) | ||
#' qValues = p.adjust(pValues,method="BH") | ||
#' | ||
#' @export | ||
#' | ||
|
||
f.pvalue <- function(dat,mod,mod0){ | ||
n <- dim(dat)[2] | ||
m <- dim(dat)[1] | ||
df1 <- dim(mod)[2] | ||
df0 <- dim(mod0)[2] | ||
p <- rep(0,m) | ||
Id <- diag(n) | ||
|
||
resid <- dat %*% (Id - mod %*% solve(t(mod) %*% mod) %*% t(mod)) | ||
rss1 <- rowSums(resid*resid) | ||
rm(resid) | ||
|
||
resid0 <- dat %*% (Id - mod0 %*% solve(t(mod0) %*% mod0) %*% t(mod0)) | ||
rss0 <- rowSums(resid0*resid0) | ||
rm(resid0) | ||
|
||
fstats <- ((rss0 - rss1)/(df1-df0))/(rss1/(n-df1)) | ||
p <- 1-pf(fstats,df1=(df1-df0),df2=(n-df1)) | ||
return(p) | ||
} | ||
|
||
#' A function for quickly calculating f statistic p-values for use in sva | ||
#' | ||
#' This function does simple linear algebra to calculate f-statistics | ||
#' for each row of a data matrix comparing the nested models | ||
#' defined by the design matrices for the alternative (mod) and and null (mod0) cases. | ||
#' The columns of mod0 must be a subset of the columns of mod. | ||
#' | ||
#' @param dat The transformed data matrix with the variables in rows and samples in columns | ||
#' @param mod The model matrix being used to fit the data | ||
#' @param mod0 The null model being compared when fitting the data | ||
#' | ||
#' @return p A vector of F-statistic p-values one for each row of dat. | ||
#' | ||
#' @examples | ||
#' library(bladderbatch) | ||
#' data(bladderdata) | ||
#' dat <- bladderEset[1:50,] | ||
#' | ||
#' pheno = pData(dat) | ||
#' edata = exprs(dat) | ||
#' mod = model.matrix(~as.factor(cancer), data=pheno) | ||
#' mod0 = model.matrix(~1,data=pheno) | ||
#' | ||
#' pValues = f.pvalue(edata,mod,mod0) | ||
#' qValues = p.adjust(pValues,method="BH") | ||
#' | ||
#' @export | ||
#' | ||
|
||
f.pvalue <- function(dat,mod,mod0){ | ||
n <- dim(dat)[2] | ||
m <- dim(dat)[1] | ||
df1 <- dim(mod)[2] | ||
df0 <- dim(mod0)[2] | ||
p <- rep(0,m) | ||
Id <- diag(n) | ||
|
||
resid <- dat %*% (Id - mod %*% solve(t(mod) %*% mod) %*% t(mod)) | ||
rss1 <- rowSums(resid*resid) | ||
rm(resid) | ||
|
||
resid0 <- dat %*% (Id - mod0 %*% solve(t(mod0) %*% mod0) %*% t(mod0)) | ||
rss0 <- rowSums(resid0*resid0) | ||
rm(resid0) | ||
|
||
fstats <- ((rss0 - rss1)/(df1-df0))/(rss1/(n-df1)) | ||
p <- 1-pf(fstats,df1=(df1-df0),df2=(n-df1)) | ||
return(p) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,49 +1,49 @@ | ||
#' A function for quickly calculating f statistics for use in sva | ||
#' | ||
#' This function does simple linear algebra to calculate f-statistics | ||
#' for each row of a data matrix comparing the nested models | ||
#' defined by the design matrices for the alternative (mod) and and null (mod0) cases. | ||
#' The columns of mod0 must be a subset of the columns of mod. | ||
#' | ||
#' @param dat The transformed data matrix with the variables in rows and samples in columns | ||
#' @param mod The model matrix being used to fit the data | ||
#' @param mod0 The null model being compared when fitting the data | ||
#' | ||
#' @return fstats A vector of F-statistics one for each row of dat. | ||
#' | ||
#' @examples | ||
#' library(bladderbatch) | ||
#' data(bladderdata) | ||
#' dat <- bladderEset[1:50,] | ||
#' | ||
#' pheno = pData(dat) | ||
#' edata = exprs(dat) | ||
#' mod = model.matrix(~as.factor(cancer), data=pheno) | ||
#' mod0 = model.matrix(~1,data=pheno) | ||
#' | ||
#' fs <- fstats(edata, mod, mod0) | ||
#' | ||
#' @export | ||
#' | ||
|
||
|
||
fstats <- function(dat,mod,mod0){ | ||
# A function for calculating F-statistics | ||
# on the rows of dat, comparing the models | ||
# mod (alternative) and mod0 (null). | ||
n <- dim(dat)[2] | ||
m <- dim(dat)[1] | ||
df1 <- dim(mod)[2] | ||
df0 <- dim(mod0)[2] | ||
p <- rep(0,m) | ||
Id <- diag(n) | ||
|
||
resid <- dat %*% (Id - mod %*% solve(t(mod) %*% mod) %*% t(mod)) | ||
resid0 <- dat %*% (Id - mod0 %*% solve(t(mod0) %*% mod0) %*% t(mod0)) | ||
|
||
rss1 <- (resid*resid) %*% rep(1,n) | ||
rss0 <- (resid0*resid0) %*% rep(1,n) | ||
|
||
fstats <- ((rss0 - rss1)/(df1-df0))/(rss1/(n-df1)) | ||
return(fstats) | ||
} | ||
#' A function for quickly calculating f statistics for use in sva | ||
#' | ||
#' This function does simple linear algebra to calculate f-statistics | ||
#' for each row of a data matrix comparing the nested models | ||
#' defined by the design matrices for the alternative (mod) and and null (mod0) cases. | ||
#' The columns of mod0 must be a subset of the columns of mod. | ||
#' | ||
#' @param dat The transformed data matrix with the variables in rows and samples in columns | ||
#' @param mod The model matrix being used to fit the data | ||
#' @param mod0 The null model being compared when fitting the data | ||
#' | ||
#' @return fstats A vector of F-statistics one for each row of dat. | ||
#' | ||
#' @examples | ||
#' library(bladderbatch) | ||
#' data(bladderdata) | ||
#' dat <- bladderEset[1:50,] | ||
#' | ||
#' pheno = pData(dat) | ||
#' edata = exprs(dat) | ||
#' mod = model.matrix(~as.factor(cancer), data=pheno) | ||
#' mod0 = model.matrix(~1,data=pheno) | ||
#' | ||
#' fs <- fstats(edata, mod, mod0) | ||
#' | ||
#' @export | ||
#' | ||
|
||
|
||
fstats <- function(dat,mod,mod0){ | ||
# A function for calculating F-statistics | ||
# on the rows of dat, comparing the models | ||
# mod (alternative) and mod0 (null). | ||
n <- dim(dat)[2] | ||
m <- dim(dat)[1] | ||
df1 <- dim(mod)[2] | ||
df0 <- dim(mod0)[2] | ||
p <- rep(0,m) | ||
Id <- diag(n) | ||
|
||
resid <- dat %*% (Id - mod %*% solve(t(mod) %*% mod) %*% t(mod)) | ||
resid0 <- dat %*% (Id - mod0 %*% solve(t(mod0) %*% mod0) %*% t(mod0)) | ||
|
||
rss1 <- (resid*resid) %*% rep(1,n) | ||
rss0 <- (resid0*resid0) %*% rep(1,n) | ||
|
||
fstats <- ((rss0 - rss1)/(df1-df0))/(rss1/(n-df1)) | ||
return(fstats) | ||
} |
Oops, something went wrong.