From f1ae9d2b33f9cac9f1a6bb5dd3fedacce15da582 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 28 Apr 2017 15:26:53 +0200 Subject: [PATCH 1/9] add unit test for nQuants --- tests/testthat/test_MSnSet.R | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/testthat/test_MSnSet.R b/tests/testthat/test_MSnSet.R index 8907cc4a3..374fa260f 100644 --- a/tests/testthat/test_MSnSet.R +++ b/tests/testthat/test_MSnSet.R @@ -212,6 +212,19 @@ test_that("Transpose and subset", { expect_identical(dim(bb), c(2L, 2L)) }) +test_that("nQuants", { + data(msnset) + pa <- fData(msnset)$ProteinAccession + upa <- unique(pa) + qu <- matrix(1, nrow=length(upa), ncol=ncol(msnset), + dimnames=list(upa[order(upa)], sampleNames(msnset))) + qu[c("ECA0435", "ECA0469", "ECA3349", "ECA3566", "ECA4026"),] <- 2 + qu["BSA",] <- 3 + qu["ENO",] <- c(4, 4, 3, 4) + qu["ECA4514",] <- 6 + expect_equal(nQuants(msnset, pa), qu) +}) + context("MSnSet identification data") test_that("addIdentificationData", { From 8f4fce57d48de49fe61e976b0b182d92da40839b Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 28 Apr 2017 15:38:11 +0200 Subject: [PATCH 2/9] rewrite nQuants using rowsum instead of utils.applyColumnwiseByGroup --- R/functions-MSnSet.R | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/R/functions-MSnSet.R b/R/functions-MSnSet.R index 6c3495933..84339af30 100644 --- a/R/functions-MSnSet.R +++ b/R/functions-MSnSet.R @@ -158,11 +158,9 @@ nQuants <- function(x, groupBy) { if (class(x) != "MSnSet") stop("'x' must be of class 'MSnSet'.") - ans <- utils.applyColumnwiseByGroup(exprs(x), groupBy=groupBy, - FUN=function(y) { - nrow(y)-colSums(is.na(y))}) - colnames(ans) <- sampleNames(x) - ans + e <- !is.na(exprs(x)) + mode(e) <- "numeric" + rowsum(e, group=groupBy, reorder=TRUE) } ##' Subsets \code{MSnSet} instances to their common feature names. From bf2f6dea5010dce76837abcea7153c1247b78a35 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 28 Apr 2017 16:35:11 +0200 Subject: [PATCH 3/9] extend nQuants unit test --- tests/testthat/test_MSnSet.R | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/testthat/test_MSnSet.R b/tests/testthat/test_MSnSet.R index 374fa260f..003f30799 100644 --- a/tests/testthat/test_MSnSet.R +++ b/tests/testthat/test_MSnSet.R @@ -213,6 +213,22 @@ test_that("Transpose and subset", { }) test_that("nQuants", { + m <- new("MSnSet", + exprs=matrix(1:10, nrow=5, ncol=2), + featureData=new("AnnotatedDataFrame", + data=data.frame(accession= + factor(c("A", "A", "A", "B", "B"))))) + qu <- matrix(3:2, nrow=2, ncol=2, dimnames=list(c("A", "B"), 1:2)) + expect_equal(nQuants(m, group=fData(m)$accession), qu) + + ## more levels than items present in the factor + m@featureData <- new("AnnotatedDataFrame", + data=data.frame(accession= + factor(c("A", "A", "A", "B", "B"), + levels=LETTERS[1:10]))) + expect_equal(nQuants(m, group=fData(m)$accession), qu) + + ## real world example data(msnset) pa <- fData(msnset)$ProteinAccession upa <- unique(pa) @@ -223,6 +239,12 @@ test_that("nQuants", { qu["ENO",] <- c(4, 4, 3, 4) qu["ECA4514",] <- 6 expect_equal(nQuants(msnset, pa), qu) + + ## more levels than items present in the factor + qu <- matrix(1, nrow=10, ncol=4, + dimnames=list(as.character(pa[order(pa[1:10])]), + sampleNames(msnset))) + expect_equal(nQuants(msnset[1:10], pa[1:10]), qu) }) context("MSnSet identification data") From 4c2c704532b7a389c6dbc4370ab2e84c43b49164 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 28 Apr 2017 16:55:42 +0200 Subject: [PATCH 4/9] add rowmean with unit tests --- R/utils.R | 22 ++++++++++++++++++++++ tests/testthat/test_utils.R | 15 +++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/R/utils.R b/R/utils.R index e4e546921..396d24fe1 100644 --- a/R/utils.R +++ b/R/utils.R @@ -897,6 +897,28 @@ utils.colSd <- function(x, na.rm = TRUE) { sqrt(colVar * n/(n - 1L)) } +##' Similar to rowsum but calculates the mean. It is slower than colMeans but +##' supports grouping variables. See ?rowsum for details. +##' @param x matrix +##' @param group a vector/factor of grouping +##' @param reorder if TRUE the rows are ordered by `sort(unique(group))` +##' @param na.rm logical. Should missing values (including ‘NaN’) be omitted +##' @return matrix +##' @author Sebastian Gibb +##' @noRd +rowmean <- function(x, group, reorder=FALSE, na.rm=FALSE) { + if (na.rm) { + nna <- !is.na(x) + mode(nna) <- "numeric" + } else { + nna <- x + nna[] <- 1 + } + nna <- rowsum(nna, group=group, reorder=reorder, na.rm=na.rm) + rs <- rowsum(x, group=group, reorder=reorder, na.rm=na.rm) + rs/nna +} + ##' Apply a function groupwise. Similar to tapply but takes a matrix as input ##' and preserve its structure and order. ##' @title applyColumnwiseByGroup diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R index ca78b97b5..00699e92c 100644 --- a/tests/testthat/test_utils.R +++ b/tests/testthat/test_utils.R @@ -172,6 +172,21 @@ test_that("colSd", { apply(mna, 2, sd, na.rm=TRUE)) }) +test_that("rowmean", { + m <- matrix(1:10, ncol=2) + mna <- m + mna[c(2, 8)] <- NA + group <- c("B", "B", "A", "A", "B") + r <- matrix(c(8/3, 3.5, 23/3, 8.5), ncol=2, dimnames=list(c("B", "A"), c())) + rna <- matrix(c(NA, 3.5, 23/3, NA), ncol=2, dimnames=list(c("B", "A"), c())) + rnarm <- matrix(c(3, 3.5, 23/3, 9), ncol=2, dimnames=list(c("B", "A"), c())) + expect_equal(MSnbase:::rowmean(m, group=group), r) + expect_equal(MSnbase:::rowmean(m, group=group, reorder=TRUE), r[2:1,]) + expect_equal(MSnbase:::rowmean(mna, group=group), rna) + expect_equal(MSnbase:::rowmean(mna, group=group, reorder=TRUE), rna[2:1,]) + expect_equal(MSnbase:::rowmean(mna, group=group, na.rm=TRUE), rnarm) +}) + test_that("applyColumnwiseByGroup", { m <- matrix(1:20, nrow=4, byrow=TRUE, dimnames=list(1:4, LETTERS[1:5])) From 61fd3353e797f3da577ae6dc8b2eedec72a24c56 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 28 Apr 2017 17:21:15 +0200 Subject: [PATCH 5/9] add rowsd and unit tests --- R/utils.R | 24 ++++++++++++++++++++++++ tests/testthat/test_utils.R | 25 ++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index 396d24fe1..75fe03185 100644 --- a/R/utils.R +++ b/R/utils.R @@ -919,6 +919,30 @@ rowmean <- function(x, group, reorder=FALSE, na.rm=FALSE) { rs/nna } +##' Similar to rowsum but calculates the sd. +##' See ?rowsum for details. +##' @param x matrix +##' @param group a vector/factor of grouping +##' @param reorder if TRUE the rows are ordered by `sort(unique(group))` +##' @param na.rm logical. Should missing values (including ‘NaN’) be omitted +##' @return matrix +##' @author Sebastian Gibb +##' @noRd +rowsd <- function(x, group, reorder=FALSE, na.rm=FALSE) { + if (na.rm) { + nna <- !is.na(x) + mode(nna) <- "numeric" + } else { + nna <- x + nna[] <- 1 + } + nna <- rowsum(nna, group=group, reorder=reorder, na.rm=na.rm) + nna[nna == 1] <- NA_real_ # return NA if n == 1 (similar to sd) + var <- rowmean(x*x, group=group, reorder=reorder, na.rm=na.rm) - + rowmean(x, group=group, reorder=reorder, na.rm=na.rm)^2L + sqrt(var * nna/(nna - 1L)) +} + ##' Apply a function groupwise. Similar to tapply but takes a matrix as input ##' and preserve its structure and order. ##' @title applyColumnwiseByGroup diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R index 00699e92c..1a81424c9 100644 --- a/tests/testthat/test_utils.R +++ b/tests/testthat/test_utils.R @@ -178,8 +178,10 @@ test_that("rowmean", { mna[c(2, 8)] <- NA group <- c("B", "B", "A", "A", "B") r <- matrix(c(8/3, 3.5, 23/3, 8.5), ncol=2, dimnames=list(c("B", "A"), c())) - rna <- matrix(c(NA, 3.5, 23/3, NA), ncol=2, dimnames=list(c("B", "A"), c())) + rna <- r + rna[c(1, 4)] <- NA rnarm <- matrix(c(3, 3.5, 23/3, 9), ncol=2, dimnames=list(c("B", "A"), c())) + expect_error(MSnbase:::rowmean(m, group=1:2)) expect_equal(MSnbase:::rowmean(m, group=group), r) expect_equal(MSnbase:::rowmean(m, group=group, reorder=TRUE), r[2:1,]) expect_equal(MSnbase:::rowmean(mna, group=group), rna) @@ -187,6 +189,27 @@ test_that("rowmean", { expect_equal(MSnbase:::rowmean(mna, group=group, na.rm=TRUE), rnarm) }) +test_that("rowsd", { + m <- matrix(1:10, ncol=2) + mna <- m + mna[c(2, 8)] <- NA + group <- c("B", "B", "A", "A", "B") + r <- matrix(c(sd(m[c(1, 2, 5), 1]), sd(m[3:4, 1]), + sd(m[c(1, 2, 5), 2]), sd(m[3:4, 2])), + ncol=2, dimnames=list(c("B", "A"), c())) + rna <- r + rna[c(1, 4)] <- NA + rnarm <- matrix(c(sd(mna[c(1, 2, 5), 1], na.rm=TRUE), r[2], + r[3], sd(mna[3:4, 2], na.rm=TRUE)), + ncol=2, dimnames=list(c("B", "A"), c())) + expect_error(MSnbase:::rowsd(m, group=1:2)) + expect_equal(MSnbase:::rowsd(m, group=group), r) + expect_equal(MSnbase:::rowsd(m, group=group, reorder=TRUE), r[2:1,]) + expect_equal(MSnbase:::rowsd(mna, group=group), rna) + expect_equal(MSnbase:::rowsd(mna, group=group, reorder=TRUE), rna[2:1,]) + expect_equal(MSnbase:::rowsd(mna, group=group, na.rm=TRUE), rnarm) +}) + test_that("applyColumnwiseByGroup", { m <- matrix(1:20, nrow=4, byrow=TRUE, dimnames=list(1:4, LETTERS[1:5])) From ecf2a4157f835c15450a23ddc118a3c5fb99f4df Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 28 Apr 2017 17:49:56 +0200 Subject: [PATCH 6/9] rewrite featureCV and add unit test --- R/functions-MSnSet.R | 10 ++++------ tests/testthat/test_MSnSet.R | 25 +++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/R/functions-MSnSet.R b/R/functions-MSnSet.R index 84339af30..eaa9377d4 100644 --- a/R/functions-MSnSet.R +++ b/R/functions-MSnSet.R @@ -81,12 +81,10 @@ featureCV <- function(x, groupBy, na.rm = TRUE, if (norm != "none") x <- normalise(x, method = norm) - ans <- utils.applyColumnwiseByGroup(exprs(x), groupBy=groupBy, - FUN=function(y, ...) { - utils.colSd(y, ...)/ - colMeans(y, ...)}, na.rm=na.rm) - colnames(ans) <- paste("CV", colnames(ans), sep = ".") - ans + cv <- rowsd(exprs(x), group=groupBy, reorder=TRUE, na.rm=na.rm) / + rowmean(exprs(x), group=groupBy, reorder=TRUE, na.rm=na.rm) + colnames(cv) <- paste("CV", colnames(cv), sep=".") + cv } updateFvarLabels <- function(object, label, sep = ".") { diff --git a/tests/testthat/test_MSnSet.R b/tests/testthat/test_MSnSet.R index 003f30799..f336b057a 100644 --- a/tests/testthat/test_MSnSet.R +++ b/tests/testthat/test_MSnSet.R @@ -247,6 +247,31 @@ test_that("nQuants", { expect_equal(nQuants(msnset[1:10], pa[1:10]), qu) }) +test_that("featureCV", { + m <- new("MSnSet", + exprs=matrix(1:10, nrow=5, ncol=2), + featureData=new("AnnotatedDataFrame", + data=data.frame(accession= + factor(c("A", "A", "A", "B", "B"))))) + cv <- matrix(c(0.5, 1/4.5, 1/7, 1/9.5), + nrow=2, ncol=2, dimnames=list(c("A", "B"), c("CV.1", "CV.2")) + expect_equal(featureCV(m, group=fData(m)$accession, norm="none"), cv) + + ## more levels than items present in the factor + m@featureData <- new("AnnotatedDataFrame", + data=data.frame(accession= + factor(c("A", "A", "A", "B", "B"), + levels=LETTERS[1:10]))) + expect_equal(featureCV(m, group=fData(m)$accession, norm="none"), cv) + + i <- list(1:3, 4:5, 6:8, 9:10) + div <- rowSums(exprs(m)) + cv <- matrix(sapply(i, function(ii) { + sd((exprs(m)/div)[ii]/mean((exprs(m)/div)[ii])) }), + nrow=2, ncol=2, dimnames=list(c("A", "B"), c("CV.1", "CV.2"))) + expect_equal(featureCV(m, group=fData(m)$accession, norm="sum"), cv) +}) + context("MSnSet identification data") test_that("addIdentificationData", { From 006e9571263bdadfc3dce4c7f4e586d23bcd0253 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 28 Apr 2017 17:53:47 +0200 Subject: [PATCH 7/9] remove applyColumnwiseByGroup --- R/utils.R | 30 ------------------------------ tests/testthat/test_utils.R | 12 ------------ 2 files changed, 42 deletions(-) diff --git a/R/utils.R b/R/utils.R index 75fe03185..e96d56875 100644 --- a/R/utils.R +++ b/R/utils.R @@ -943,36 +943,6 @@ rowsd <- function(x, group, reorder=FALSE, na.rm=FALSE) { sqrt(var * nna/(nna - 1L)) } -##' Apply a function groupwise. Similar to tapply but takes a matrix as input -##' and preserve its structure and order. -##' @title applyColumnwiseByGroup -##' @param x matrix -##' @param groupBy factor/grouping index -##' @param FUN function to be applied; must work on columns, e.g. colSums -##' @param ... further arguments to FUN -##' @return modified matrix -##' @author Sebastian Gibb -##' @noRd -utils.applyColumnwiseByGroup <- function(x, groupBy, FUN, ...) { - if (!is.matrix(x)) { - stop("x has to be a matrix!") - } - - groupBy <- as.factor(groupBy) - FUN <- match.fun(FUN) - - j <- split(1L:nrow(x), groupBy) - ans <- matrix(NA_real_, nrow = nlevels(groupBy), ncol = ncol(x), - dimnames = list(levels(groupBy), colnames(x))) - - for (i in seq(along = j)) { - subexprs <- x[j[[i]], , drop = FALSE] - ans[i, ] <- do.call(FUN, list(subexprs, ...)) - } - - ans -} - setMethod("trimws", "data.frame", function(x, which, ...) { for (i in 1:ncol(x)) { diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R index 1a81424c9..d03770679 100644 --- a/tests/testthat/test_utils.R +++ b/tests/testthat/test_utils.R @@ -210,18 +210,6 @@ test_that("rowsd", { expect_equal(MSnbase:::rowsd(mna, group=group, na.rm=TRUE), rnarm) }) -test_that("applyColumnwiseByGroup", { - m <- matrix(1:20, nrow=4, byrow=TRUE, - dimnames=list(1:4, LETTERS[1:5])) - r <- matrix(c(seq(7, 15, by=2), seq(27, 35, by=2)), nrow=2, byrow=TRUE, - dimnames=list(1:2, LETTERS[1:5])) - expect_error(MSnbase:::utils.applyColumnwiseByGroup(1:10, 1:2, sum), - "x has to be a matrix") - expect_equal(MSnbase:::utils.applyColumnwiseByGroup(m, - rep(1:2, each=2), - colSums), r) -}) - test_that("get.amino.acids", { aa <- get.amino.acids() cn <- c("AA", "ResidueMass", "Abbrev3", "ImmoniumIonMass", "Name", From f27f1824bd4745b8d6f0e8f9dad0e5dc9b5c241f Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 28 Apr 2017 17:55:31 +0200 Subject: [PATCH 8/9] update NEWS.md --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 603a8ac1f..278a33d72 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,8 @@ ## Changes in version 2.3.1 - Introduce "NTR" method for `combineFeatures` <2017-04-26 Wed>. +- Rewrite `nQuants` and `featureCV` to avoid returning of rows for empty + factors; see PR #208 for details <2017-04-28 Fri>. ## Changes in version 2.3.0 From d272524c76b09fbf18d9c4e04ddab5f068e47d3c Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 28 Apr 2017 18:11:10 +0200 Subject: [PATCH 9/9] remove utils.colSd --- R/utils.R | 20 -------------------- tests/testthat/test_utils.R | 10 ---------- 2 files changed, 30 deletions(-) diff --git a/R/utils.R b/R/utils.R index e96d56875..8d6f38997 100644 --- a/R/utils.R +++ b/R/utils.R @@ -877,26 +877,6 @@ compareMSnSets <- function(x, y, qual = FALSE, proc = FALSE) { all.equal(x, y) } -##' Similar to colMeans but calculates the sd. Should be identical to -##' apply(x, 2, sd, na.rm). -##' based on: http://stackoverflow.com/questions/17549762/is-there-such-colsd-in-r/17551600#17551600 -##' @title colSd -##' @param x matrix/data.frame -##' @param na.rm logical. Should missing values (including ‘NaN’) be omitted -##' from the calculations? -##' @return double -##' @author Sebastian Gibb -##' @noRd -utils.colSd <- function(x, na.rm = TRUE) { - if (na.rm) { - n <- colSums(!is.na(x)) - } else { - n <- nrow(x) - } - colVar <- colMeans(x*x, na.rm = na.rm) - (colMeans(x, na.rm = na.rm))^2L - sqrt(colVar * n/(n - 1L)) -} - ##' Similar to rowsum but calculates the mean. It is slower than colMeans but ##' supports grouping variables. See ?rowsum for details. ##' @param x matrix diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R index d03770679..e004bde60 100644 --- a/tests/testthat/test_utils.R +++ b/tests/testthat/test_utils.R @@ -162,16 +162,6 @@ test_that("formatRt", { expect_warning(is.na(formatRt(TRUE))) }) -test_that("colSd", { - set.seed(1) - m <- matrix(rnorm(10), ncol=2) - mna <- m - mna[c(1, 8)] <- NA - expect_equal(MSnbase:::utils.colSd(m), apply(m, 2, sd)) - expect_equal(MSnbase:::utils.colSd(mna, na.rm=TRUE), - apply(mna, 2, sd, na.rm=TRUE)) -}) - test_that("rowmean", { m <- matrix(1:10, ncol=2) mna <- m