feat: add estimatePrecursorIntensity (issue #202)

rformassspectrometry · May 7, 2021 · 49bf2ec · 49bf2ec
1 parent a6532d5
commit 49bf2ec
Show file tree

Hide file tree

Showing 7 changed files with 233 additions and 128 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: Spectra
 Title: Spectra Infrastructure for Mass Spectrometry Data
-Version: 1.1.20
+Version: 1.1.21
 Description: The Spectra package defines an efficient infrastructure
    for storing and handling mass spectrometry spectra and functionality to
    subset, process, visualize and compare spectra data. It provides different

diff --git a/NAMESPACE b/NAMESPACE
@@ -7,6 +7,7 @@ export(applyProcessing)
 export(combinePeaks)
 export(combineSpectra)
 export(concatenateSpectra)
+export(estimatePrecursorIntensity)
 export(joinPeaks)
 export(joinPeaksGnps)
 export(joinSpectraData)
@@ -160,6 +161,7 @@ importFrom(methods,is)
 importFrom(methods,new)
 importFrom(methods,setAs)
 importFrom(methods,validObject)
+importFrom(stats,approx)
 importFrom(stats,cor)
 importFrom(stats,quantile)
 importFrom(stats,weighted.mean)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,10 @@
 # Spectra 1.1
 
+## Changes in 1.1.21
+
+- Add `estimatePrecursorIntensity` function (issue
+  [#202](https://github.com/rformassspectrometry/Spectra/issues/202)).
+
 ## Changes in 1.1.20
 
 - Fix concatenating empty spectra (issue

diff --git a/R/Spectra-functions.R b/R/Spectra-functions.R
@@ -7,19 +7,6 @@ NULL
     NULL
 }
 
-## .apply_processing_queue <- function(x, msLevel, centroided, queue = NULL) {
-##     if (length(queue)) {
-##         for (i in seq_along(x)) {
-##             for (pStep in queue) {
-##                 x[[i]] <- executeProcessingStep(pStep, x[[i]],
-##                                                 spectrumMsLevel = msLevel[i],
-##                                                 centroided = centroided[i])
-##             }
-##         }
-##     }
-##     x
-## }
-
 #' @description
 #'
 #' Apply the lazy evaluation processing queue to the peaks (i.e. m/z, intensity
@@ -65,28 +52,6 @@ NULL
     x
 }
 
-## .peaksapply <- function(object, FUN = NULL, ..., f = dataStorage(object),
-##                         BPPARAM = bpparam()) {
-##     len <- length(object)
-##     if (!len)
-##         return(list())
-##     if (length(f) != len)
-##         stop("Length of 'f' has to match 'length(object)' (", len, ")")
-##     if (!is.factor(f))
-##         f <- factor(f)
-##     pqueue <- object@processingQueue
-##     if (!is.null(FUN))
-##         pqueue <- c(pqueue, ProcessingStep(FUN, ARGS = list(...)))
-##     if (length(levels(f)) > 1 || length(pqueue)) {
-##         res <- bplapply(split(object@backend, f), function(z, queue) {
-##             metad <- spectraData(z, columns = c("msLevel", "centroided"))
-##             .apply_processing_queue(peaksData(z), metad$msLevel,
-##                                     metad$centroided, queue = queue)
-##         }, queue = pqueue, BPPARAM = BPPARAM)
-##         unsplit(res, f = f, drop = TRUE)
-##     } else peaksData(object@backend)
-## }
-
 .processingQueueVariables <- function(x) {
     if (.hasSlot(x, "processingQueueVariables"))
         x@processingQueueVariables
@@ -623,4 +588,91 @@ joinSpectraData <- function(x, y,
 #' @rdname Spectra
 processingLog <- function(x) {
     x@processing
-}
+}
+
+#' @export
+#'
+#' @rdname Spectra
+estimatePrecursorIntensity <- function(x, ppm = 10, tolerance = 0,
+                                       method = c("previous", "interpolation"),
+                                       msLevel. = 2L, f = dataOrigin(x),
+                                       BPPARAM = bpparam()) {
+    if (is.factor(f))
+        f <- as.character(f)
+    f <- factor(f, levels = unique(f))
+    unlist(bplapply(split(x, f), FUN = .estimate_precursor_intensity, ppm = ppm,
+                    tolerance = tolerance, method = method, msLevel = msLevel.,
+                    BPPARAM = BPPARAM), use.names = FALSE)
+}
+
+#' estimate precursor intensities based on MS1 peak intensity. This function
+#' assumes that `x` is a `Spectra` with data **from a single file/sample**.
+#'
+#' @author Johannes Rainer
+#'
+#' @importFrom stats approx
+#'
+#' @noRd
+.estimate_precursor_intensity <- function(x, ppm = 10, tolerance = 0,
+                                          method = c("previous",
+                                                     "interpolation"),
+                                          msLevel = 2L) {
+    method <- match.arg(method)
+    if (any(msLevel > 2))
+        warning("Estimation of precursor intensities for MS levels > 2 is ",
+                "not yet validated")
+    if (!length(x))
+        return(numeric())
+    if (is.unsorted(rtime(x))) {
+        idx <- order(rtime(x))
+        x <- x[idx]
+    } else idx <- integer()
+    pmz <- precursorMz(x)
+    pmi <- rep(NA_real_, length(pmz))
+    idx_ms2 <- which(!is.na(pmz) & msLevel(x) == msLevel)
+    x_ms1 <- filterMsLevel(x, msLevel = msLevel - 1L)
+    pks <- .peaksapply(x_ms1)
+    for (i in idx_ms2) {
+        rt_i <- rtime(x[i])
+        before_idx <- which(rtime(x_ms1) < rt_i)
+        before_int <- NA_real_
+        before_rt <- NA_real_
+        if (length(before_idx)) {
+            before_idx <- before_idx[length(before_idx)]
+            before_rt <- rtime(x_ms1)[before_idx]
+            peaks <- pks[[before_idx]]
+            peak_idx <- closest(pmz[i], peaks[, "mz"],
+                                tolerance = tolerance,
+                                ppm = ppm, duplicates = "closest",
+                                .check = FALSE)
+            before_int <- unname(peaks[peak_idx, "intensity"])
+        }
+        if (method == "interpolation") {
+            after_idx <- which(rtime(x_ms1) > rt_i)
+            after_int <- NA_real_
+            after_rt <- NA_real_
+            if (length(after_idx)) {
+                after_idx <- after_idx[1L]
+                after_rt <- rtime(x_ms1)[after_idx]
+                peaks <- pks[[after_idx]]
+                peak_idx <- closest(pmz[i], peaks[, "mz"],
+                                    tolerance = tolerance,
+                                    ppm = ppm, duplicates = "closest",
+                                    .check = FALSE)
+                after_int <- unname(peaks[peak_idx, "intensity"])
+            }
+            if (is.na(before_int)) {
+                before_int <- after_int
+            } else {
+                if (!is.na(after_int))
+                    before_int <- approx(c(before_rt, after_rt),
+                                         c(before_int, after_int),
+                                         xout = rt_i)$y
+            }
+        }
+        pmi[i] <- before_int
+    }
+    if (length(idx))
+        pmi <- pmi[order(idx)]
+    pmi
+}
diff --git a/R/Spectra.R b/R/Spectra.R
@@ -474,6 +474,14 @@ NULL
 #'   the comparison of `x[2]` with `y[3]`). If `SIMPLIFY = TRUE` the `matrix`
 #'   is *simplified* to a `numeric` if length of `x` or `y` is one.
 #'
+#' - `estimatePrecursorIntensity`: define the precursor intensities for MS2
+#'   spectra using the intensity of the matching MS1 peak from the
+#'   closest MS1 spectrum (i.e. the last MS1 spectrum measured before the
+#'   respective MS2 spectrum). With `method = "interpolation"` it is also
+#'   possible to calculate the precursor intensity based on an interpolation of
+#'   intensity values (and retention times) of the matching MS1 peaks from the
+#'   previous and next MS1 spectrum. See below for an example.
+#'
 #' - `filterIntensity`: filters each spectrum keeping only peaks with
 #'   intensities that are within the provided range or match the criteria of
 #'   the provided function. For the former, parameter `intensity` has to be a
@@ -590,7 +598,8 @@ NULL
 #'     backends changing this parameter can lead to errors.
 #'     For `combineSpectra`: `factor` defining the grouping of the spectra that
 #'     should be combined. For `spectrapply`: `factor` how `object` should be
-#'     splitted.
+#'     splitted. For `estimatePrecursorIntensity`: defining which spectra belong
+#'     to the same original data file (sample). Defaults to `f = dataOrigin(x)`.
 #'
 #' @param FUN For `addProcessing`: function to be applied to the peak matrix
 #'     of each spectrum in `object`. For `compareSpectra`: function to compare
@@ -638,6 +647,10 @@ NULL
 #'   currently, the Moving-Average- (`method = "MovingAverage"`),
 #'   Weighted-Moving-Average- (`method = "WeightedMovingAverage")`,
 #'   Savitzky-Golay-Smoothing (`method = "SavitzkyGolay"`) are supported.
+#' - For `estimatePrecursorIntensity`: `character(1)` defining whether the
+#'   precursor intensity should be estimated on the previous MS1 spectrum
+#'   (`method = "previous"`, the default) or based on an interpolation on the
+#'   previous and next MS1 spectrum (`method = "interpolation"`).
 #'
 #' @param metadata For `Spectra`: optional `list` with metadata information.
 #'
@@ -1015,6 +1028,21 @@ NULL
 #' res <- lapply(intensity(sciex_im[1:20]), mean)
 #' head(res)
 #'
+#' ## Calculating the precursor intensity for MS2 spectra:
+#' ##
+#' ## Some MS instrument manufacturer don't report the precursor intensities
+#' ## for MS@ spectra. The `estimatePrecursorIntensity` function can be used
+#' ## in these cases to calculate the precursor intensity on MS1 data. Below
+#' ## we load an mzML file from a vendor providing precursor intensities and
+#' ## compare the estimated and reported precursor intensities.
+#' tmt <- Spectra(msdata::proteomics(full.names = TRUE)[5],
+#'     backend = MsBackendMzR())
+#' pmi <- estimatePrecursorIntensity(tmt)
+#' plot(pmi, precursorIntensity(tmt))
+#'
+#' ## We can also replace the original precursor intensity values with the
+#' ## newly calculated ones
+#' tmt$precursorIntensity <- pmi
 #'
 #' ## ---- DATA EXPORT ----
 #'

diff --git a/man/Spectra.Rd b/man/Spectra.Rd