From f1ae9d2b33f9cac9f1a6bb5dd3fedacce15da582 Mon Sep 17 00:00:00 2001
From: Sebastian Gibb <mail@sebastiangibb.de>
Date: Fri, 28 Apr 2017 15:26:53 +0200
Subject: [PATCH 1/9] add unit test for nQuants

---
 tests/testthat/test_MSnSet.R | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/testthat/test_MSnSet.R b/tests/testthat/test_MSnSet.R
index 8907cc4a3..374fa260f 100644
--- a/tests/testthat/test_MSnSet.R
+++ b/tests/testthat/test_MSnSet.R
@@ -212,6 +212,19 @@ test_that("Transpose and subset", {
     expect_identical(dim(bb), c(2L, 2L))
 })
 
+test_that("nQuants", {
+    data(msnset)
+    pa <- fData(msnset)$ProteinAccession
+    upa <- unique(pa)
+    qu <- matrix(1, nrow=length(upa), ncol=ncol(msnset),
+                 dimnames=list(upa[order(upa)], sampleNames(msnset)))
+    qu[c("ECA0435", "ECA0469", "ECA3349", "ECA3566", "ECA4026"),] <- 2
+    qu["BSA",] <- 3
+    qu["ENO",] <- c(4, 4, 3, 4)
+    qu["ECA4514",] <- 6
+    expect_equal(nQuants(msnset, pa), qu)
+})
+
 context("MSnSet identification data")
 
 test_that("addIdentificationData", {

From 8f4fce57d48de49fe61e976b0b182d92da40839b Mon Sep 17 00:00:00 2001
From: Sebastian Gibb <mail@sebastiangibb.de>
Date: Fri, 28 Apr 2017 15:38:11 +0200
Subject: [PATCH 2/9] rewrite nQuants using rowsum instead of
 utils.applyColumnwiseByGroup

---
 R/functions-MSnSet.R | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/R/functions-MSnSet.R b/R/functions-MSnSet.R
index 6c3495933..84339af30 100644
--- a/R/functions-MSnSet.R
+++ b/R/functions-MSnSet.R
@@ -158,11 +158,9 @@ nQuants <- function(x, groupBy) {
   if (class(x) != "MSnSet")
     stop("'x' must be of class 'MSnSet'.")
 
-  ans <- utils.applyColumnwiseByGroup(exprs(x), groupBy=groupBy,
-                                      FUN=function(y) {
-                                        nrow(y)-colSums(is.na(y))})
-  colnames(ans) <- sampleNames(x)
-  ans
+  e <- !is.na(exprs(x))
+  mode(e) <- "numeric"
+  rowsum(e, group=groupBy, reorder=TRUE)
 }
 
 ##' Subsets \code{MSnSet} instances to their common feature names.

From bf2f6dea5010dce76837abcea7153c1247b78a35 Mon Sep 17 00:00:00 2001
From: Sebastian Gibb <mail@sebastiangibb.de>
Date: Fri, 28 Apr 2017 16:35:11 +0200
Subject: [PATCH 3/9] extend nQuants unit test

---
 tests/testthat/test_MSnSet.R | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tests/testthat/test_MSnSet.R b/tests/testthat/test_MSnSet.R
index 374fa260f..003f30799 100644
--- a/tests/testthat/test_MSnSet.R
+++ b/tests/testthat/test_MSnSet.R
@@ -213,6 +213,22 @@ test_that("Transpose and subset", {
 })
 
 test_that("nQuants", {
+    m <- new("MSnSet",
+             exprs=matrix(1:10, nrow=5, ncol=2),
+             featureData=new("AnnotatedDataFrame",
+                             data=data.frame(accession=
+                                             factor(c("A", "A", "A", "B", "B")))))
+    qu <- matrix(3:2, nrow=2, ncol=2, dimnames=list(c("A", "B"), 1:2))
+    expect_equal(nQuants(m, group=fData(m)$accession), qu)
+
+    ## more levels than items present in the factor
+    m@featureData <- new("AnnotatedDataFrame",
+                         data=data.frame(accession=
+                                         factor(c("A", "A", "A", "B", "B"),
+                                                    levels=LETTERS[1:10])))
+    expect_equal(nQuants(m, group=fData(m)$accession), qu)
+
+    ## real world example
     data(msnset)
     pa <- fData(msnset)$ProteinAccession
     upa <- unique(pa)
@@ -223,6 +239,12 @@ test_that("nQuants", {
     qu["ENO",] <- c(4, 4, 3, 4)
     qu["ECA4514",] <- 6
     expect_equal(nQuants(msnset, pa), qu)
+
+    ## more levels than items present in the factor
+    qu <- matrix(1, nrow=10, ncol=4,
+                 dimnames=list(as.character(pa[order(pa[1:10])]),
+                                            sampleNames(msnset)))
+    expect_equal(nQuants(msnset[1:10], pa[1:10]), qu)
 })
 
 context("MSnSet identification data")

From 4c2c704532b7a389c6dbc4370ab2e84c43b49164 Mon Sep 17 00:00:00 2001
From: Sebastian Gibb <mail@sebastiangibb.de>
Date: Fri, 28 Apr 2017 16:55:42 +0200
Subject: [PATCH 4/9] add rowmean with unit tests

---
 R/utils.R                   | 22 ++++++++++++++++++++++
 tests/testthat/test_utils.R | 15 +++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/R/utils.R b/R/utils.R
index e4e546921..396d24fe1 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -897,6 +897,28 @@ utils.colSd <- function(x, na.rm = TRUE) {
   sqrt(colVar * n/(n - 1L))
 }
 
+##' Similar to rowsum but calculates the mean. It is slower than colMeans but
+##' supports grouping variables. See ?rowsum for details.
+##' @param x matrix
+##' @param group a vector/factor of grouping
+##' @param reorder if TRUE the rows are ordered by `sort(unique(group))`
+##' @param na.rm logical. Should missing values (including ‘NaN’) be omitted
+##' @return matrix
+##' @author Sebastian Gibb <mail@@sebastiangibb.de>
+##' @noRd
+rowmean <- function(x, group, reorder=FALSE, na.rm=FALSE) {
+  if (na.rm) {
+    nna <- !is.na(x)
+    mode(nna) <- "numeric"
+  } else {
+    nna <- x
+    nna[] <- 1
+  }
+  nna <- rowsum(nna, group=group, reorder=reorder, na.rm=na.rm)
+  rs <- rowsum(x, group=group, reorder=reorder, na.rm=na.rm)
+  rs/nna
+}
+
 ##' Apply a function groupwise. Similar to tapply but takes a matrix as input
 ##' and preserve its structure and order.
 ##' @title applyColumnwiseByGroup
diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R
index ca78b97b5..00699e92c 100644
--- a/tests/testthat/test_utils.R
+++ b/tests/testthat/test_utils.R
@@ -172,6 +172,21 @@ test_that("colSd", {
                  apply(mna, 2, sd, na.rm=TRUE))
 })
 
+test_that("rowmean", {
+    m <- matrix(1:10, ncol=2)
+    mna <- m
+    mna[c(2, 8)] <- NA
+    group <- c("B", "B", "A", "A", "B")
+    r <- matrix(c(8/3, 3.5, 23/3, 8.5), ncol=2, dimnames=list(c("B", "A"), c()))
+    rna <- matrix(c(NA, 3.5, 23/3, NA), ncol=2, dimnames=list(c("B", "A"), c()))
+    rnarm <- matrix(c(3, 3.5, 23/3, 9), ncol=2, dimnames=list(c("B", "A"), c()))
+    expect_equal(MSnbase:::rowmean(m, group=group), r)
+    expect_equal(MSnbase:::rowmean(m, group=group, reorder=TRUE), r[2:1,])
+    expect_equal(MSnbase:::rowmean(mna, group=group), rna)
+    expect_equal(MSnbase:::rowmean(mna, group=group, reorder=TRUE), rna[2:1,])
+    expect_equal(MSnbase:::rowmean(mna, group=group, na.rm=TRUE), rnarm)
+})
+
 test_that("applyColumnwiseByGroup", {
     m <- matrix(1:20, nrow=4, byrow=TRUE,
                 dimnames=list(1:4, LETTERS[1:5]))

From 61fd3353e797f3da577ae6dc8b2eedec72a24c56 Mon Sep 17 00:00:00 2001
From: Sebastian Gibb <mail@sebastiangibb.de>
Date: Fri, 28 Apr 2017 17:21:15 +0200
Subject: [PATCH 5/9] add rowsd and unit tests

---
 R/utils.R                   | 24 ++++++++++++++++++++++++
 tests/testthat/test_utils.R | 25 ++++++++++++++++++++++++-
 2 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/R/utils.R b/R/utils.R
index 396d24fe1..75fe03185 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -919,6 +919,30 @@ rowmean <- function(x, group, reorder=FALSE, na.rm=FALSE) {
   rs/nna
 }
 
+##' Similar to rowsum but calculates the sd.
+##' See ?rowsum for details.
+##' @param x matrix
+##' @param group a vector/factor of grouping
+##' @param reorder if TRUE the rows are ordered by `sort(unique(group))`
+##' @param na.rm logical. Should missing values (including ‘NaN’) be omitted
+##' @return matrix
+##' @author Sebastian Gibb <mail@@sebastiangibb.de>
+##' @noRd
+rowsd <- function(x, group, reorder=FALSE, na.rm=FALSE) {
+  if (na.rm) {
+    nna <- !is.na(x)
+    mode(nna) <- "numeric"
+  } else {
+    nna <- x
+    nna[] <- 1
+  }
+  nna <- rowsum(nna, group=group, reorder=reorder, na.rm=na.rm)
+  nna[nna == 1] <- NA_real_            # return NA if n == 1 (similar to sd)
+  var <- rowmean(x*x, group=group, reorder=reorder, na.rm=na.rm) -
+    rowmean(x, group=group, reorder=reorder, na.rm=na.rm)^2L
+  sqrt(var * nna/(nna - 1L))
+}
+
 ##' Apply a function groupwise. Similar to tapply but takes a matrix as input
 ##' and preserve its structure and order.
 ##' @title applyColumnwiseByGroup
diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R
index 00699e92c..1a81424c9 100644
--- a/tests/testthat/test_utils.R
+++ b/tests/testthat/test_utils.R
@@ -178,8 +178,10 @@ test_that("rowmean", {
     mna[c(2, 8)] <- NA
     group <- c("B", "B", "A", "A", "B")
     r <- matrix(c(8/3, 3.5, 23/3, 8.5), ncol=2, dimnames=list(c("B", "A"), c()))
-    rna <- matrix(c(NA, 3.5, 23/3, NA), ncol=2, dimnames=list(c("B", "A"), c()))
+    rna <- r
+    rna[c(1, 4)] <- NA
     rnarm <- matrix(c(3, 3.5, 23/3, 9), ncol=2, dimnames=list(c("B", "A"), c()))
+    expect_error(MSnbase:::rowmean(m, group=1:2))
     expect_equal(MSnbase:::rowmean(m, group=group), r)
     expect_equal(MSnbase:::rowmean(m, group=group, reorder=TRUE), r[2:1,])
     expect_equal(MSnbase:::rowmean(mna, group=group), rna)
@@ -187,6 +189,27 @@ test_that("rowmean", {
     expect_equal(MSnbase:::rowmean(mna, group=group, na.rm=TRUE), rnarm)
 })
 
+test_that("rowsd", {
+    m <- matrix(1:10, ncol=2)
+    mna <- m
+    mna[c(2, 8)] <- NA
+    group <- c("B", "B", "A", "A", "B")
+    r <- matrix(c(sd(m[c(1, 2, 5), 1]), sd(m[3:4, 1]),
+                  sd(m[c(1, 2, 5), 2]), sd(m[3:4, 2])),
+                  ncol=2, dimnames=list(c("B", "A"), c()))
+    rna <- r
+    rna[c(1, 4)] <- NA
+    rnarm <- matrix(c(sd(mna[c(1, 2, 5), 1], na.rm=TRUE), r[2],
+                      r[3], sd(mna[3:4, 2], na.rm=TRUE)),
+          ncol=2, dimnames=list(c("B", "A"), c()))
+    expect_error(MSnbase:::rowsd(m, group=1:2))
+    expect_equal(MSnbase:::rowsd(m, group=group), r)
+    expect_equal(MSnbase:::rowsd(m, group=group, reorder=TRUE), r[2:1,])
+    expect_equal(MSnbase:::rowsd(mna, group=group), rna)
+    expect_equal(MSnbase:::rowsd(mna, group=group, reorder=TRUE), rna[2:1,])
+    expect_equal(MSnbase:::rowsd(mna, group=group, na.rm=TRUE), rnarm)
+})
+
 test_that("applyColumnwiseByGroup", {
     m <- matrix(1:20, nrow=4, byrow=TRUE,
                 dimnames=list(1:4, LETTERS[1:5]))

From ecf2a4157f835c15450a23ddc118a3c5fb99f4df Mon Sep 17 00:00:00 2001
From: Sebastian Gibb <mail@sebastiangibb.de>
Date: Fri, 28 Apr 2017 17:49:56 +0200
Subject: [PATCH 6/9] rewrite featureCV and add unit test

---
 R/functions-MSnSet.R         | 10 ++++------
 tests/testthat/test_MSnSet.R | 25 +++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/R/functions-MSnSet.R b/R/functions-MSnSet.R
index 84339af30..eaa9377d4 100644
--- a/R/functions-MSnSet.R
+++ b/R/functions-MSnSet.R
@@ -81,12 +81,10 @@ featureCV <- function(x, groupBy, na.rm = TRUE,
   if (norm != "none")
     x <- normalise(x, method = norm)
 
-  ans <- utils.applyColumnwiseByGroup(exprs(x), groupBy=groupBy,
-                                      FUN=function(y, ...) {
-                                        utils.colSd(y, ...)/
-                                          colMeans(y, ...)}, na.rm=na.rm)
-  colnames(ans) <- paste("CV", colnames(ans), sep = ".")
-  ans
+  cv <- rowsd(exprs(x), group=groupBy, reorder=TRUE, na.rm=na.rm) /
+          rowmean(exprs(x), group=groupBy, reorder=TRUE, na.rm=na.rm)
+  colnames(cv) <- paste("CV", colnames(cv), sep=".")
+  cv
 }
 
 updateFvarLabels <- function(object, label, sep = ".") {
diff --git a/tests/testthat/test_MSnSet.R b/tests/testthat/test_MSnSet.R
index 003f30799..f336b057a 100644
--- a/tests/testthat/test_MSnSet.R
+++ b/tests/testthat/test_MSnSet.R
@@ -247,6 +247,31 @@ test_that("nQuants", {
     expect_equal(nQuants(msnset[1:10], pa[1:10]), qu)
 })
 
+test_that("featureCV", {
+    m <- new("MSnSet",
+             exprs=matrix(1:10, nrow=5, ncol=2),
+             featureData=new("AnnotatedDataFrame",
+                             data=data.frame(accession=
+                                             factor(c("A", "A", "A", "B", "B")))))
+    cv <- matrix(c(0.5, 1/4.5, 1/7, 1/9.5),
+                 nrow=2, ncol=2, dimnames=list(c("A", "B"), c("CV.1", "CV.2"))
+    expect_equal(featureCV(m, group=fData(m)$accession, norm="none"), cv)
+
+    ## more levels than items present in the factor
+    m@featureData <- new("AnnotatedDataFrame",
+                         data=data.frame(accession=
+                                         factor(c("A", "A", "A", "B", "B"),
+                                                    levels=LETTERS[1:10])))
+    expect_equal(featureCV(m, group=fData(m)$accession, norm="none"), cv)
+
+    i <- list(1:3, 4:5, 6:8, 9:10)
+    div <- rowSums(exprs(m))
+    cv <- matrix(sapply(i, function(ii) {
+                   sd((exprs(m)/div)[ii]/mean((exprs(m)/div)[ii])) }),
+                 nrow=2, ncol=2, dimnames=list(c("A", "B"), c("CV.1", "CV.2")))
+    expect_equal(featureCV(m, group=fData(m)$accession, norm="sum"), cv)
+})
+
 context("MSnSet identification data")
 
 test_that("addIdentificationData", {

From 006e9571263bdadfc3dce4c7f4e586d23bcd0253 Mon Sep 17 00:00:00 2001
From: Sebastian Gibb <mail@sebastiangibb.de>
Date: Fri, 28 Apr 2017 17:53:47 +0200
Subject: [PATCH 7/9] remove applyColumnwiseByGroup

---
 R/utils.R                   | 30 ------------------------------
 tests/testthat/test_utils.R | 12 ------------
 2 files changed, 42 deletions(-)

diff --git a/R/utils.R b/R/utils.R
index 75fe03185..e96d56875 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -943,36 +943,6 @@ rowsd <- function(x, group, reorder=FALSE, na.rm=FALSE) {
   sqrt(var * nna/(nna - 1L))
 }
 
-##' Apply a function groupwise. Similar to tapply but takes a matrix as input
-##' and preserve its structure and order.
-##' @title applyColumnwiseByGroup
-##' @param x matrix
-##' @param groupBy factor/grouping index
-##' @param FUN function to be applied; must work on columns, e.g. colSums
-##' @param ... further arguments to FUN
-##' @return modified matrix
-##' @author Sebastian Gibb <mail@@sebastiangibb.de>
-##' @noRd
-utils.applyColumnwiseByGroup <- function(x, groupBy, FUN, ...) {
-  if (!is.matrix(x)) {
-    stop("x has to be a matrix!")
-  }
-
-  groupBy <- as.factor(groupBy)
-  FUN <- match.fun(FUN)
-
-  j <- split(1L:nrow(x), groupBy)
-  ans <- matrix(NA_real_, nrow = nlevels(groupBy), ncol = ncol(x),
-                dimnames = list(levels(groupBy), colnames(x)))
-
-  for (i in seq(along = j)) {
-    subexprs <- x[j[[i]], , drop = FALSE]
-    ans[i, ] <- do.call(FUN, list(subexprs, ...))
-  }
-
-  ans
-}
-
 setMethod("trimws", "data.frame",
           function(x, which, ...) {
               for (i in 1:ncol(x)) {
diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R
index 1a81424c9..d03770679 100644
--- a/tests/testthat/test_utils.R
+++ b/tests/testthat/test_utils.R
@@ -210,18 +210,6 @@ test_that("rowsd", {
     expect_equal(MSnbase:::rowsd(mna, group=group, na.rm=TRUE), rnarm)
 })
 
-test_that("applyColumnwiseByGroup", {
-    m <- matrix(1:20, nrow=4, byrow=TRUE,
-                dimnames=list(1:4, LETTERS[1:5]))
-    r <- matrix(c(seq(7, 15, by=2), seq(27, 35, by=2)), nrow=2, byrow=TRUE,
-                dimnames=list(1:2, LETTERS[1:5]))
-    expect_error(MSnbase:::utils.applyColumnwiseByGroup(1:10, 1:2, sum),
-                 "x has to be a matrix")
-    expect_equal(MSnbase:::utils.applyColumnwiseByGroup(m,
-                                                        rep(1:2, each=2),
-                                                        colSums), r)
-})
-
 test_that("get.amino.acids", {
     aa <- get.amino.acids()
     cn <- c("AA", "ResidueMass", "Abbrev3", "ImmoniumIonMass", "Name",

From f27f1824bd4745b8d6f0e8f9dad0e5dc9b5c241f Mon Sep 17 00:00:00 2001
From: Sebastian Gibb <mail@sebastiangibb.de>
Date: Fri, 28 Apr 2017 17:55:31 +0200
Subject: [PATCH 8/9] update NEWS.md

---
 NEWS.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 603a8ac1f..278a33d72 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -3,6 +3,8 @@
 ## Changes in version 2.3.1
 
 - Introduce "NTR" method for `combineFeatures` <2017-04-26 Wed>.
+- Rewrite `nQuants` and `featureCV` to avoid returning of rows for empty
+    factors; see PR #208 for details <2017-04-28 Fri>.
 
 ## Changes in version 2.3.0
 

From d272524c76b09fbf18d9c4e04ddab5f068e47d3c Mon Sep 17 00:00:00 2001
From: Sebastian Gibb <mail@sebastiangibb.de>
Date: Fri, 28 Apr 2017 18:11:10 +0200
Subject: [PATCH 9/9] remove utils.colSd

---
 R/utils.R                   | 20 --------------------
 tests/testthat/test_utils.R | 10 ----------
 2 files changed, 30 deletions(-)

diff --git a/R/utils.R b/R/utils.R
index e96d56875..8d6f38997 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -877,26 +877,6 @@ compareMSnSets <- function(x, y, qual = FALSE, proc = FALSE) {
     all.equal(x, y)
 }
 
-##' Similar to colMeans but calculates the sd. Should be identical to
-##' apply(x, 2, sd, na.rm).
-##' based on: http://stackoverflow.com/questions/17549762/is-there-such-colsd-in-r/17551600#17551600
-##' @title colSd
-##' @param x matrix/data.frame
-##' @param na.rm logical. Should missing values (including ‘NaN’) be omitted
-##' from the calculations?
-##' @return double
-##' @author Sebastian Gibb <mail@@sebastiangibb.de>
-##' @noRd
-utils.colSd <- function(x, na.rm = TRUE) {
-  if (na.rm) {
-    n <- colSums(!is.na(x))
-  } else {
-    n <- nrow(x)
-  }
-  colVar <- colMeans(x*x, na.rm = na.rm) - (colMeans(x, na.rm = na.rm))^2L
-  sqrt(colVar * n/(n - 1L))
-}
-
 ##' Similar to rowsum but calculates the mean. It is slower than colMeans but
 ##' supports grouping variables. See ?rowsum for details.
 ##' @param x matrix
diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R
index d03770679..e004bde60 100644
--- a/tests/testthat/test_utils.R
+++ b/tests/testthat/test_utils.R
@@ -162,16 +162,6 @@ test_that("formatRt", {
     expect_warning(is.na(formatRt(TRUE)))
 })
 
-test_that("colSd", {
-    set.seed(1)
-    m <- matrix(rnorm(10), ncol=2)
-    mna <- m
-    mna[c(1, 8)] <- NA
-    expect_equal(MSnbase:::utils.colSd(m), apply(m, 2, sd))
-    expect_equal(MSnbase:::utils.colSd(mna, na.rm=TRUE),
-                 apply(mna, 2, sd, na.rm=TRUE))
-})
-
 test_that("rowmean", {
     m <- matrix(1:10, ncol=2)
     mna <- m