Skip to content

Commit

Permalink
refactor: improve performance of chromatogram,XcmsExperiment
Browse files Browse the repository at this point in the history
- Improve the performance of the chromatogram,XcmsExperiment for extracting EICs
  with detected chromatographic peaks.
  • Loading branch information
jorainer committed Dec 20, 2023
1 parent e4176ae commit b274757
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 20 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: xcms
Version: 4.0.1
Version: 4.0.2
Title: LC-MS and GC-MS Data Analysis
Description: Framework for processing and visualization of chromatographically
separated and single-spectra mass spectral data. Imports from AIA/ANDI NetCDF,
Expand Down
99 changes: 80 additions & 19 deletions R/XcmsExperiment-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -854,6 +854,12 @@
#' but there could be room for improvement.
#'
#' @noRd
#' Function to help extracting the *old* MChromatograms and XChromatograms
#' from an XcmsExperiment.
#'
#' @param object `XcmsExperiment` or `XCMSnExp` object.
#'
#' @noRd
.xmse_extract_chromatograms_old <- function(object, rt, mz, aggregationFun,
msLevel, isolationWindow = NULL,
chunkSize, chromPeaks,
Expand All @@ -869,40 +875,47 @@
fd <- fData(chrs)
rtc <- c("rtmin", "rtmax")
mzc <- c("mzmin", "mzmax")
cpd <- chromPeakData(object)
pks_empty <- chromPeaks(object)[integer(), ]
pkd_empty <- cpd[integer(), ]
cpd <- object@chromPeakData
message("Processing chromatographic peaks")
pb <- progress_bar$new(format = paste0("[:bar] :current/:",
"total (:percent) in ",
":elapsed"),
total = nrow(chrs) + 1L, clear = FALSE)
for (i in seq_len(nrow(chrs))) {
pks <- chromPeaks(object, rt = fd[i, rtc], mz = fd[i, mzc],
msLevel = chrs[i, 1]@msLevel, type = chromPeaks)
idx <- .index_chrom_peaks(
object, rt = fd[i, rtc], mz = fd[i, mzc],
msLevel = chrs[i, 1]@msLevel, type = chromPeaks)
f_s <- factor(object@chromPeaks[idx, "sample"], levels = js)
pkl <- split.data.frame(object@chromPeaks[idx, , drop = FALSE], f_s)
cpl <- split.data.frame(cpd[idx, , drop = FALSE], f_s)
pb$tick()
for (j in js) {
sel <- which(pks[, "sample"] == j)
if (length(sel)) {
slot(chrs@.Data[i, j][[1L]],
"chromPeaks", check = FALSE) <- pks[sel, , drop = FALSE]
slot(chrs@.Data[i, j][[1L]],
"chromPeakData", check = FALSE) <-
extractROWS(cpd, rownames(pks)[sel])
} else {
slot(chrs@.Data[i, j][[1L]],
"chromPeaks", check = FALSE) <- pks_empty
slot(chrs@.Data[i, j][[1L]],
"chromPeakData", check = FALSE) <- pkd_empty
}
tmp <- chrs@.Data[i, j][[1L]]
slot(tmp, "chromPeaks", check = FALSE) <- pkl[[j]]
slot(tmp, "chromPeakData", check = FALSE) <- as(cpl[[j]], "DataFrame")
chrs@.Data[i, j][[1L]] <- tmp
}
}
pb$tick()
## Process features - that is not perfect.
if (hasFeatures(object)) {
message("Processing features")
pb <- progress_bar$new(format = paste0("[:bar] :current/:",
"total (:percent) in ",
":elapsed"),
total = nrow(chrs) + 1L, clear = FALSE)
pks_sub <- chromPeaks(chrs)
fts <- lapply(seq_len(nrow(chrs)), function(r) {
fdev <- featureDefinitions(object, mz = fd[r, mzc], rt = fd[r, rtc])
pb$tick()
if (nrow(fdev)) {
fdev$row <- r
.subset_features_on_chrom_peaks(
fdev, chromPeaks(object), pks_sub)
fdev, object@chromPeaks, pks_sub)
} else data.frame()
})
chrs@featureDefinitions <- DataFrame(do.call(rbind, fts))
pb$tick()
}
chrs@.processHistory <- object@processHistory
chrs
Expand Down Expand Up @@ -1006,3 +1019,51 @@ featureArea <- function(object, mzmin = min, mzmax = max, rtmin = min,
.require_spectra()
Spectra::Spectra(res)
}

#' helper function to get indices of chromatographic peaks for eventual
#' subsetting with `rt`, `mz`, `msLevel` and `type`.
#'
#' @param object `XcmsExperiment` or `XCMSnExp` object with `chromPeaks`.
#'
#' @return `integer` vector with the indices of the `chromPeaks` matching the
#' requested filtering.
#'
#' @noRd
.index_chrom_peaks <- function(object, rt = numeric(),
mz = numeric(), ppm = 0,
msLevel = integer(),
type = c("any", "within",
"apex_within")) {
type <- match.arg(type)
pks <- object@chromPeaks
keep <- rep(TRUE, nrow(pks))
if (length(msLevel))
keep <- keep &
chromPeakData(
object, return.type = "data.frame")$ms_level %in% msLevel
## Select peaks within rt range.
if (length(rt)) {
rt <- range(as.numeric(rt))
if (type == "any")
keep <- keep & pks[, "rtmin"] <= rt[2L] & pks[, "rtmax"] >= rt[1L]
if (type == "within")
keep <- keep & pks[, "rtmin"] >= rt[1L] & pks[, "rtmax"] <= rt[2L]
if (type == "apex_within")
keep <- keep & pks[, "rt"] >= rt[1L] & pks[, "rt"] <= rt[2L]
}
## Select peaks within mz range, considering also ppm
if (length(mz)) {
mz <- range(as.numeric(mz))
if (is.finite(mz[1L]))
mz[1L] <- mz[1L] - mz[1L] * ppm / 1e6
if (is.finite(mz[2L]))
mz[2L] <- mz[2L] + mz[2L] * ppm / 1e6
if (type == "any")
keep <- keep & pks[, "mzmin"] <= mz[2L] & pks[, "mzmax"] >= mz[1L]
if (type == "within")
keep <- keep & pks[, "mzmin"] >= mz[1L] & pks[, "mzmax"] <= mz[2L]
if (type == "apex_within")
keep <- keep & pks[, "mz"] >= mz[1L] & pks[, "mz"] <= mz[2L]
}
which(keep)
}
7 changes: 7 additions & 0 deletions inst/NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
Changes in version 4.0.2
----------------------

- Improve performance to extract EICs with chrom peaks with `chromatogram`
from an `XcmsExperiment`.


Changes in version 4.0.1
----------------------

Expand Down
28 changes: 28 additions & 0 deletions tests/testthat/test_XcmsExperiment.R
Original file line number Diff line number Diff line change
Expand Up @@ -1330,3 +1330,31 @@ test_that("setAs,XcmsExperiment,xcmsSet works", {
expect_s4_class(res, "xcmsSet")
expect_equal(peaks(res), chromPeaks(xmseg))
})

test_that(".index_chrom_peaks works", {
## xmse
res <- .index_chrom_peaks(xmse)
expect_equal(res, seq_len(nrow(chromPeaks(xmse))))

## MS level
res <- .index_chrom_peaks(xmse, msLevel = c(3, 4))
expect_equal(res, integer())

## rt
res <- .index_chrom_peaks(xmse, rt = c(2500, 2700),
type = "apex_within")
rts <- chromPeaks(xmse)[res, "rt"]
expect_true(all(rts >= 2500 & rts <= 2700))

## mz
res <- .index_chrom_peaks(xmse, mz = c(400, 600), type = "apex_within")
mzs <- chromPeaks(xmse)[res, "mz"]
expect_true(all(mzs >= 400 & mzs <= 600))

## rt and mz
res <- .index_chrom_peaks(xmse, mz = c(400, 600), type = "apex_within",
rt = c(2500, 2700))
pks <- chromPeaks(xmse)[res, c("mz", "rt")]
expect_true(all(pks[, "mz"] >= 400 & pks[, "mz"] <= 600 &
pks[, "rt"] >= 2500 & pks[, "rt"] <= 2700))
})

0 comments on commit b274757

Please sign in to comment.