From b27475738be594668e556c5e785b7d9586027e3f Mon Sep 17 00:00:00 2001 From: jorainer Date: Wed, 20 Dec 2023 14:47:02 +0100 Subject: [PATCH] refactor: improve performance of chromatogram,XcmsExperiment - Improve the performance of the chromatogram,XcmsExperiment for extracting EICs with detected chromatographic peaks. --- DESCRIPTION | 2 +- R/XcmsExperiment-functions.R | 99 ++++++++++++++++++++++------ inst/NEWS | 7 ++ tests/testthat/test_XcmsExperiment.R | 28 ++++++++ 4 files changed, 116 insertions(+), 20 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 49401c52b..311b29f1b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: xcms -Version: 4.0.1 +Version: 4.0.2 Title: LC-MS and GC-MS Data Analysis Description: Framework for processing and visualization of chromatographically separated and single-spectra mass spectral data. Imports from AIA/ANDI NetCDF, diff --git a/R/XcmsExperiment-functions.R b/R/XcmsExperiment-functions.R index 44f6e0569..495e9a5fb 100644 --- a/R/XcmsExperiment-functions.R +++ b/R/XcmsExperiment-functions.R @@ -854,6 +854,12 @@ #' but there could be room for improvement. #' #' @noRd +#' Function to help extracting the *old* MChromatograms and XChromatograms +#' from an XcmsExperiment. +#' +#' @param object `XcmsExperiment` or `XCMSnExp` object. +#' +#' @noRd .xmse_extract_chromatograms_old <- function(object, rt, mz, aggregationFun, msLevel, isolationWindow = NULL, chunkSize, chromPeaks, @@ -869,40 +875,47 @@ fd <- fData(chrs) rtc <- c("rtmin", "rtmax") mzc <- c("mzmin", "mzmax") - cpd <- chromPeakData(object) - pks_empty <- chromPeaks(object)[integer(), ] - pkd_empty <- cpd[integer(), ] + cpd <- object@chromPeakData + message("Processing chromatographic peaks") + pb <- progress_bar$new(format = paste0("[:bar] :current/:", + "total (:percent) in ", + ":elapsed"), + total = nrow(chrs) + 1L, clear = FALSE) for (i in seq_len(nrow(chrs))) { - pks <- chromPeaks(object, rt = fd[i, rtc], mz = fd[i, mzc], - msLevel = chrs[i, 1]@msLevel, type = chromPeaks) + idx <- .index_chrom_peaks( + object, rt = fd[i, rtc], mz = fd[i, mzc], + msLevel = chrs[i, 1]@msLevel, type = chromPeaks) + f_s <- factor(object@chromPeaks[idx, "sample"], levels = js) + pkl <- split.data.frame(object@chromPeaks[idx, , drop = FALSE], f_s) + cpl <- split.data.frame(cpd[idx, , drop = FALSE], f_s) + pb$tick() for (j in js) { - sel <- which(pks[, "sample"] == j) - if (length(sel)) { - slot(chrs@.Data[i, j][[1L]], - "chromPeaks", check = FALSE) <- pks[sel, , drop = FALSE] - slot(chrs@.Data[i, j][[1L]], - "chromPeakData", check = FALSE) <- - extractROWS(cpd, rownames(pks)[sel]) - } else { - slot(chrs@.Data[i, j][[1L]], - "chromPeaks", check = FALSE) <- pks_empty - slot(chrs@.Data[i, j][[1L]], - "chromPeakData", check = FALSE) <- pkd_empty - } + tmp <- chrs@.Data[i, j][[1L]] + slot(tmp, "chromPeaks", check = FALSE) <- pkl[[j]] + slot(tmp, "chromPeakData", check = FALSE) <- as(cpl[[j]], "DataFrame") + chrs@.Data[i, j][[1L]] <- tmp } } + pb$tick() ## Process features - that is not perfect. if (hasFeatures(object)) { + message("Processing features") + pb <- progress_bar$new(format = paste0("[:bar] :current/:", + "total (:percent) in ", + ":elapsed"), + total = nrow(chrs) + 1L, clear = FALSE) pks_sub <- chromPeaks(chrs) fts <- lapply(seq_len(nrow(chrs)), function(r) { fdev <- featureDefinitions(object, mz = fd[r, mzc], rt = fd[r, rtc]) + pb$tick() if (nrow(fdev)) { fdev$row <- r .subset_features_on_chrom_peaks( - fdev, chromPeaks(object), pks_sub) + fdev, object@chromPeaks, pks_sub) } else data.frame() }) chrs@featureDefinitions <- DataFrame(do.call(rbind, fts)) + pb$tick() } chrs@.processHistory <- object@processHistory chrs @@ -1006,3 +1019,51 @@ featureArea <- function(object, mzmin = min, mzmax = max, rtmin = min, .require_spectra() Spectra::Spectra(res) } + +#' helper function to get indices of chromatographic peaks for eventual +#' subsetting with `rt`, `mz`, `msLevel` and `type`. +#' +#' @param object `XcmsExperiment` or `XCMSnExp` object with `chromPeaks`. +#' +#' @return `integer` vector with the indices of the `chromPeaks` matching the +#' requested filtering. +#' +#' @noRd +.index_chrom_peaks <- function(object, rt = numeric(), + mz = numeric(), ppm = 0, + msLevel = integer(), + type = c("any", "within", + "apex_within")) { + type <- match.arg(type) + pks <- object@chromPeaks + keep <- rep(TRUE, nrow(pks)) + if (length(msLevel)) + keep <- keep & + chromPeakData( + object, return.type = "data.frame")$ms_level %in% msLevel + ## Select peaks within rt range. + if (length(rt)) { + rt <- range(as.numeric(rt)) + if (type == "any") + keep <- keep & pks[, "rtmin"] <= rt[2L] & pks[, "rtmax"] >= rt[1L] + if (type == "within") + keep <- keep & pks[, "rtmin"] >= rt[1L] & pks[, "rtmax"] <= rt[2L] + if (type == "apex_within") + keep <- keep & pks[, "rt"] >= rt[1L] & pks[, "rt"] <= rt[2L] + } + ## Select peaks within mz range, considering also ppm + if (length(mz)) { + mz <- range(as.numeric(mz)) + if (is.finite(mz[1L])) + mz[1L] <- mz[1L] - mz[1L] * ppm / 1e6 + if (is.finite(mz[2L])) + mz[2L] <- mz[2L] + mz[2L] * ppm / 1e6 + if (type == "any") + keep <- keep & pks[, "mzmin"] <= mz[2L] & pks[, "mzmax"] >= mz[1L] + if (type == "within") + keep <- keep & pks[, "mzmin"] >= mz[1L] & pks[, "mzmax"] <= mz[2L] + if (type == "apex_within") + keep <- keep & pks[, "mz"] >= mz[1L] & pks[, "mz"] <= mz[2L] + } + which(keep) +} diff --git a/inst/NEWS b/inst/NEWS index 61aa4a9bb..ba2d43a56 100644 --- a/inst/NEWS +++ b/inst/NEWS @@ -1,3 +1,10 @@ +Changes in version 4.0.2 +---------------------- + +- Improve performance to extract EICs with chrom peaks with `chromatogram` + from an `XcmsExperiment`. + + Changes in version 4.0.1 ---------------------- diff --git a/tests/testthat/test_XcmsExperiment.R b/tests/testthat/test_XcmsExperiment.R index af2ce41e8..bf238a9b0 100644 --- a/tests/testthat/test_XcmsExperiment.R +++ b/tests/testthat/test_XcmsExperiment.R @@ -1330,3 +1330,31 @@ test_that("setAs,XcmsExperiment,xcmsSet works", { expect_s4_class(res, "xcmsSet") expect_equal(peaks(res), chromPeaks(xmseg)) }) + +test_that(".index_chrom_peaks works", { + ## xmse + res <- .index_chrom_peaks(xmse) + expect_equal(res, seq_len(nrow(chromPeaks(xmse)))) + + ## MS level + res <- .index_chrom_peaks(xmse, msLevel = c(3, 4)) + expect_equal(res, integer()) + + ## rt + res <- .index_chrom_peaks(xmse, rt = c(2500, 2700), + type = "apex_within") + rts <- chromPeaks(xmse)[res, "rt"] + expect_true(all(rts >= 2500 & rts <= 2700)) + + ## mz + res <- .index_chrom_peaks(xmse, mz = c(400, 600), type = "apex_within") + mzs <- chromPeaks(xmse)[res, "mz"] + expect_true(all(mzs >= 400 & mzs <= 600)) + + ## rt and mz + res <- .index_chrom_peaks(xmse, mz = c(400, 600), type = "apex_within", + rt = c(2500, 2700)) + pks <- chromPeaks(xmse)[res, c("mz", "rt")] + expect_true(all(pks[, "mz"] >= 400 & pks[, "mz"] <= 600 & + pks[, "rt"] >= 2500 & pks[, "rt"] <= 2700)) +})