Skip to content

Commit

Permalink
feat: add estimatePrecursorIntensity (issue #202)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorainer committed May 7, 2021
1 parent a6532d5 commit 49bf2ec
Show file tree
Hide file tree
Showing 7 changed files with 233 additions and 128 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: Spectra
Title: Spectra Infrastructure for Mass Spectrometry Data
Version: 1.1.20
Version: 1.1.21
Description: The Spectra package defines an efficient infrastructure
for storing and handling mass spectrometry spectra and functionality to
subset, process, visualize and compare spectra data. It provides different
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export(applyProcessing)
export(combinePeaks)
export(combineSpectra)
export(concatenateSpectra)
export(estimatePrecursorIntensity)
export(joinPeaks)
export(joinPeaksGnps)
export(joinSpectraData)
Expand Down Expand Up @@ -160,6 +161,7 @@ importFrom(methods,is)
importFrom(methods,new)
importFrom(methods,setAs)
importFrom(methods,validObject)
importFrom(stats,approx)
importFrom(stats,cor)
importFrom(stats,quantile)
importFrom(stats,weighted.mean)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Spectra 1.1

## Changes in 1.1.21

- Add `estimatePrecursorIntensity` function (issue
[#202](https://github.com/rformassspectrometry/Spectra/issues/202)).

## Changes in 1.1.20

- Fix concatenating empty spectra (issue
Expand Down
124 changes: 88 additions & 36 deletions R/Spectra-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,6 @@ NULL
NULL
}

## .apply_processing_queue <- function(x, msLevel, centroided, queue = NULL) {
## if (length(queue)) {
## for (i in seq_along(x)) {
## for (pStep in queue) {
## x[[i]] <- executeProcessingStep(pStep, x[[i]],
## spectrumMsLevel = msLevel[i],
## centroided = centroided[i])
## }
## }
## }
## x
## }

#' @description
#'
#' Apply the lazy evaluation processing queue to the peaks (i.e. m/z, intensity
Expand Down Expand Up @@ -65,28 +52,6 @@ NULL
x
}

## .peaksapply <- function(object, FUN = NULL, ..., f = dataStorage(object),
## BPPARAM = bpparam()) {
## len <- length(object)
## if (!len)
## return(list())
## if (length(f) != len)
## stop("Length of 'f' has to match 'length(object)' (", len, ")")
## if (!is.factor(f))
## f <- factor(f)
## pqueue <- object@processingQueue
## if (!is.null(FUN))
## pqueue <- c(pqueue, ProcessingStep(FUN, ARGS = list(...)))
## if (length(levels(f)) > 1 || length(pqueue)) {
## res <- bplapply(split(object@backend, f), function(z, queue) {
## metad <- spectraData(z, columns = c("msLevel", "centroided"))
## .apply_processing_queue(peaksData(z), metad$msLevel,
## metad$centroided, queue = queue)
## }, queue = pqueue, BPPARAM = BPPARAM)
## unsplit(res, f = f, drop = TRUE)
## } else peaksData(object@backend)
## }

.processingQueueVariables <- function(x) {
if (.hasSlot(x, "processingQueueVariables"))
x@processingQueueVariables
Expand Down Expand Up @@ -623,4 +588,91 @@ joinSpectraData <- function(x, y,
#' @rdname Spectra
processingLog <- function(x) {
x@processing
}
}

#' @export
#'
#' @rdname Spectra
estimatePrecursorIntensity <- function(x, ppm = 10, tolerance = 0,
method = c("previous", "interpolation"),
msLevel. = 2L, f = dataOrigin(x),
BPPARAM = bpparam()) {
if (is.factor(f))
f <- as.character(f)
f <- factor(f, levels = unique(f))
unlist(bplapply(split(x, f), FUN = .estimate_precursor_intensity, ppm = ppm,
tolerance = tolerance, method = method, msLevel = msLevel.,
BPPARAM = BPPARAM), use.names = FALSE)
}

#' estimate precursor intensities based on MS1 peak intensity. This function
#' assumes that `x` is a `Spectra` with data **from a single file/sample**.
#'
#' @author Johannes Rainer
#'
#' @importFrom stats approx
#'
#' @noRd
.estimate_precursor_intensity <- function(x, ppm = 10, tolerance = 0,
method = c("previous",
"interpolation"),
msLevel = 2L) {
method <- match.arg(method)
if (any(msLevel > 2))
warning("Estimation of precursor intensities for MS levels > 2 is ",
"not yet validated")
if (!length(x))
return(numeric())
if (is.unsorted(rtime(x))) {
idx <- order(rtime(x))
x <- x[idx]
} else idx <- integer()
pmz <- precursorMz(x)
pmi <- rep(NA_real_, length(pmz))
idx_ms2 <- which(!is.na(pmz) & msLevel(x) == msLevel)
x_ms1 <- filterMsLevel(x, msLevel = msLevel - 1L)
pks <- .peaksapply(x_ms1)
for (i in idx_ms2) {
rt_i <- rtime(x[i])
before_idx <- which(rtime(x_ms1) < rt_i)
before_int <- NA_real_
before_rt <- NA_real_
if (length(before_idx)) {
before_idx <- before_idx[length(before_idx)]
before_rt <- rtime(x_ms1)[before_idx]
peaks <- pks[[before_idx]]
peak_idx <- closest(pmz[i], peaks[, "mz"],
tolerance = tolerance,
ppm = ppm, duplicates = "closest",
.check = FALSE)
before_int <- unname(peaks[peak_idx, "intensity"])
}
if (method == "interpolation") {
after_idx <- which(rtime(x_ms1) > rt_i)
after_int <- NA_real_
after_rt <- NA_real_
if (length(after_idx)) {
after_idx <- after_idx[1L]
after_rt <- rtime(x_ms1)[after_idx]
peaks <- pks[[after_idx]]
peak_idx <- closest(pmz[i], peaks[, "mz"],
tolerance = tolerance,
ppm = ppm, duplicates = "closest",
.check = FALSE)
after_int <- unname(peaks[peak_idx, "intensity"])
}
if (is.na(before_int)) {
before_int <- after_int
} else {
if (!is.na(after_int))
before_int <- approx(c(before_rt, after_rt),
c(before_int, after_int),
xout = rt_i)$y
}
}
pmi[i] <- before_int
}
if (length(idx))
pmi <- pmi[order(idx)]
pmi
}
30 changes: 29 additions & 1 deletion R/Spectra.R
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,14 @@ NULL
#' the comparison of `x[2]` with `y[3]`). If `SIMPLIFY = TRUE` the `matrix`
#' is *simplified* to a `numeric` if length of `x` or `y` is one.
#'
#' - `estimatePrecursorIntensity`: define the precursor intensities for MS2
#' spectra using the intensity of the matching MS1 peak from the
#' closest MS1 spectrum (i.e. the last MS1 spectrum measured before the
#' respective MS2 spectrum). With `method = "interpolation"` it is also
#' possible to calculate the precursor intensity based on an interpolation of
#' intensity values (and retention times) of the matching MS1 peaks from the
#' previous and next MS1 spectrum. See below for an example.
#'
#' - `filterIntensity`: filters each spectrum keeping only peaks with
#' intensities that are within the provided range or match the criteria of
#' the provided function. For the former, parameter `intensity` has to be a
Expand Down Expand Up @@ -590,7 +598,8 @@ NULL
#' backends changing this parameter can lead to errors.
#' For `combineSpectra`: `factor` defining the grouping of the spectra that
#' should be combined. For `spectrapply`: `factor` how `object` should be
#' splitted.
#' splitted. For `estimatePrecursorIntensity`: defining which spectra belong
#' to the same original data file (sample). Defaults to `f = dataOrigin(x)`.
#'
#' @param FUN For `addProcessing`: function to be applied to the peak matrix
#' of each spectrum in `object`. For `compareSpectra`: function to compare
Expand Down Expand Up @@ -638,6 +647,10 @@ NULL
#' currently, the Moving-Average- (`method = "MovingAverage"`),
#' Weighted-Moving-Average- (`method = "WeightedMovingAverage")`,
#' Savitzky-Golay-Smoothing (`method = "SavitzkyGolay"`) are supported.
#' - For `estimatePrecursorIntensity`: `character(1)` defining whether the
#' precursor intensity should be estimated on the previous MS1 spectrum
#' (`method = "previous"`, the default) or based on an interpolation on the
#' previous and next MS1 spectrum (`method = "interpolation"`).
#'
#' @param metadata For `Spectra`: optional `list` with metadata information.
#'
Expand Down Expand Up @@ -1015,6 +1028,21 @@ NULL
#' res <- lapply(intensity(sciex_im[1:20]), mean)
#' head(res)
#'
#' ## Calculating the precursor intensity for MS2 spectra:
#' ##
#' ## Some MS instrument manufacturer don't report the precursor intensities
#' ## for MS@ spectra. The `estimatePrecursorIntensity` function can be used
#' ## in these cases to calculate the precursor intensity on MS1 data. Below
#' ## we load an mzML file from a vendor providing precursor intensities and
#' ## compare the estimated and reported precursor intensities.
#' tmt <- Spectra(msdata::proteomics(full.names = TRUE)[5],
#' backend = MsBackendMzR())
#' pmi <- estimatePrecursorIntensity(tmt)
#' plot(pmi, precursorIntensity(tmt))
#'
#' ## We can also replace the original precursor intensity values with the
#' ## newly calculated ones
#' tmt$precursorIntensity <- pmi
#'
#' ## ---- DATA EXPORT ----
#'
Expand Down
90 changes: 64 additions & 26 deletions man/Spectra.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 49bf2ec

Please sign in to comment.