diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 30e0094fc..5a0b42317 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -569,9 +569,15 @@ setGeneric("family<-", function(object, value) standardGeneric("family<-")) #' [XCMSnExp-class] object. The function returns for each feature the #' extracted ion chromatograms (along with all associated chromatographic #' peaks) in each sample. The chromatogram is extracted from the m/z - rt -#' region including all chromatographic peaks of that features (i.e. based on -#' the ranges of `"mzmin"`, `"mzmax"`, `"rtmin"`, `"rtmax"` of all -#' chromatographic peaks of the feature). +#' region including all chromatographic peaks of that features. This region is +#' by default, with `mzmin = min`, `mzmax = max`, `rtmin = min` and +#' `rtmax = max` defined using the **ranges** of `"mzmin"`, `"mzmax"`, +#' `"rtmin"`, `"rtmax"` of all chromatographic peaks of the feature. For some +#' features, and depending on the data, the m/z and rt range can thus be +#' relatively large. The ranges could be restricted by using a different +#' function to define them, e.g. by setting `mzmin = median` and +#' `mzmax = median` in which case the median `"mzmin"` and `"mzmax"` values +#' of all chromatographic peaks would be used. #' #' By default only chromatographic peaks associated with a feature are #' included. For `object` being a `XCMSnExp` object parameter `include` @@ -582,6 +588,14 @@ setGeneric("family<-", function(object, value) standardGeneric("family<-")) #' #' @note #' +#' The **same** m/z and rt boundaries are used on every sample to extract +#' the ion chromatogram. The EIC might thus not exactly represent the actual +#' EIC of each individual chromatographic peak (i.e. signal for the ion in one +#' specific sample), since the m/z and rt boundaries might be slightly +#' different across samples. The [chromPeakChromatograms()] function could be +#' used to extract the actual EIC of the chromatographic peak in a specific +#' sample. See also examples below. +#' #' Parameters `include`, `filled`, `n` and `value` are only supported #' for `object` being an `XCMSnExp`. #' @@ -628,6 +642,16 @@ setGeneric("family<-", function(object, value) standardGeneric("family<-")) #' Defaults to `"feature_only"`; See description above for options and #' details. #' +#' @param mzmax `function` defining how the upper boundary of the m/z region +#' from which the EIC is integrated should be defined. Defaults to +#' `mzmax = max` thus the largest `"mzmax"` value for all chromatographic +#' peaks of a feature will be used. +#' +#' @param mzmin `function` defining how the lower boundary of the m/z region +#' from which the EIC is integrated should be defined. Defaults to +#' `mzmin = min` thus the smallest `"mzmin"` value for all chromatographic +#' peaks of a feature will be used. +#' #' @param n Only for `object` being an `XCMSnExp`: `integer(1)` to optionally #' specify the number of *top n* samples from which the EIC should be #' extracted. @@ -642,6 +666,16 @@ setGeneric("family<-", function(object, value) standardGeneric("family<-")) #' supported and the results are thus returned as an [XChromatograms()] #' object. #' +#' @param rtmax `function` defining how the upper boundary of the rt region +#' from which the EIC is integrated should be defined. Defaults to +#' `rtmax = max` thus the largest `"rtmax"` value for all chromatographic +#' peaks of a feature will be used. +#' +#' @param rtmin `function` defining how the lower boundary of the rt region +#' from which the EIC is integrated should be defined. Defaults to +#' `rtmin = min` thus the smallest `"rtmin"` value for all chromatographic +#' peaks of a feature will be used. +#' #' @param value Only for `object` being an `XCMSnExp`: `character(1)` #' specifying the column to be used to sort the samples. Can be either #' `"maxo"` (the default) or `"into"` to use the maximal peak intensity @@ -672,10 +706,8 @@ setGeneric("family<-", function(object, value) standardGeneric("family<-")) #' ## Disable parallel processing for this example #' register(SerialParam()) #' -#' ## Subset the object to a smaller retention time range -#' xdata <- filterRt(faahko_sub, c(2500, 3500)) -#' -#' xdata <- groupChromPeaks(xdata, +#' ## Perform correspondence analysis +#' xdata <- groupChromPeaks(faahko_sub, #' param = PeakDensityParam(minFraction = 0.8, sampleGroups = rep(1, 3))) #' #' ## Get the feature definitions @@ -686,8 +718,28 @@ setGeneric("family<-", function(object, value) standardGeneric("family<-")) #' chrs <- featureChromatograms(xdata, #' features = rownames(featureDefinitions)[1:3]) #' -#' ## Plot the XIC for the first feature using different colors for each file +#' ## Plot the EIC for the first feature using different colors for each file. #' plot(chrs[1, ], col = c("red", "green", "blue")) +#' +#' ## The EICs for all 3 samples use the same m/z and retention time range, +#' ## which was defined using the `featureArea` function: +#' featureArea(xdata, features = rownames(featureDefinitions(xdata))[1:3], +#' mzmin = min, mzmax = max, rtmin = min, rtmax = max) +#' +#' ## To extract the actual (exact) EICs for each chromatographic peak of +#' ## a feature in each sample, the `chromPeakChromatograms` function would +#' ## need to be used instead. Below we extract the EICs for all +#' ## chromatographic peaks of the first feature. We need to first get the +#' ## IDs of all chromatographic peaks assigned to the first feature: +#' peak_ids <- rownames(chromPeaks(xdata))[featureDefinitions(xdata)$peakidx[[1L]]] +#' +#' ## We can now pass these to the `chromPeakChromatograms` function with +#' ## parameter `peaks`: +#' eic_1 <- chromPeakChromatograms(xdata, peaks = peak_ids) +#' +#' ## To plot these into a single plot we need to use the +#' ## `plotChromatogramsOverlay` function: +#' plotChromatogramsOverlay(eic_1) setGeneric("featureChromatograms", function(object, ...) standardGeneric("featureChromatograms")) @@ -800,8 +852,8 @@ setGeneric("filepaths<-", function(object, value) standardGeneric("filepaths<-") #' `ChromPeakAreaParam`, the median `"mzmin"`, `"mzmax"`, `"rtmin"` and #' `"rtmax"` values from all detected chromatographic peaks of a feature #' would be used instead. -#' In contrast to the `FillChromPeaksParam` approach this method uses the -#' actual identified chromatographic peaks of a feature to define the area +#' In contrast to the `FillChromPeaksParam` approach this method uses (all) +#' identified chromatographic peaks of a feature to define the area #' from which the signal should be integrated. #' #' @details @@ -1822,7 +1874,8 @@ setGeneric("reconstructChromPeakSpectra", function(object, ...) #' #' @param ppm For `MergeNeighboringPeaksParam`: `numeric(1)` defining a m/z #' relative value (in parts per million) by which the m/z range of each -#' chromatographic peak is expanded to check for overlapping peaks. +#' chromatographic peak is expanded (on each side) to check for overlapping +#' peaks. #' #' @param threshold For `FilterIntensityParam`: `numeric(1)` defining the #' threshold below which peaks are removed. diff --git a/R/XcmsExperiment-functions.R b/R/XcmsExperiment-functions.R index 6abb35f65..fe21e10e4 100644 --- a/R/XcmsExperiment-functions.R +++ b/R/XcmsExperiment-functions.R @@ -934,15 +934,77 @@ #' @rdname XcmsExperiment featureArea <- function(object, mzmin = min, mzmax = max, rtmin = min, - rtmax = max, msLevel = integer(), - features = character()) { + rtmax = max, features = character()) { if (!hasFeatures(object)) stop("No correspondence results available. Please run ", "'groupChromPeaks' first.") - if (!length(msLevel)) - msLevel <- seq_len(10) + if (!length(features)) + features <- rownames(featureDefinitions(object)) .features_ms_region(object, mzmin = mzmin, mzmax = mzmax, rtmin = rtmin, - rtmax = rtmax, msLevel = msLevel, features = features) + rtmax = rtmax, features = features) +} + +#' @title Define MS regions for features +#' +#' @param x `XcmsExperiment` or `XCMSnExp`. +#' +#' @param mzmin, mzmax, rtmin, rtmax `function` to be applied to the values +#' (rtmin, ...) of the chrom peaks. Defaults to `median` but would also +#' work with `mean` etc. +#' +#' @param features `character` with the IDs of the features. Mandatory! +#' +#' @return `matrix` with columns `"mzmin"`, `"mzmax"`, `"rtmin"`, `"rtmax"` +#' defining the range of +#' +#' @author Johannes Rainer +#' +#' @noRd +.features_ms_region <- function(x, mzmin = median, mzmax = median, + rtmin = median, rtmax = median, + features = character()) { + features <- .i2index(features, ids = rownames(featureDefinitions(x)), + "features") + pks <- .chromPeaks(x)[, c("mzmin", "mzmax", "rtmin", "rtmax")] + res <- do.call( + rbind, lapply(featureDefinitions(x)$peakidx[features], + function(i) { + ## maybe consider/drop gap-filled peaks? + c(mzmin(pks[i, "mzmin"]), + mzmax(pks[i, "mzmax"]), + rtmin(pks[i, "rtmin"]), + rtmax(pks[i, "rtmax"])) + })) + rownames(res) <- rownames(featureDefinitions(x))[features] + colnames(res) <- c("mzmin", "mzmax", "rtmin", "rtmax") + res +} + +.features_ms_region_old <- function(x, mzmin = median, mzmax = median, + rtmin = median, rtmax = median, + msLevel = 1L, + features = character()) { + pk_idx <- featureValues(x, value = "index", method = "maxint", + msLevel = msLevel) + if (length(features)) { + ## here's a silent bug: should consider msLevel also below. + features <- .i2index( + features, ids = rownames(featureDefinitions(x)), "features") + pk_idx <- pk_idx[features, , drop = FALSE] + } + n_ft <- nrow(pk_idx) + rt_min <- rt_max <- mz_min <- mz_max <- numeric(n_ft) + for (i in seq_len(n_ft)) { + idx <- pk_idx[i, ] + tmp_pks <- .chromPeaks(x)[idx[!is.na(idx)], , drop = FALSE] + rt_min[i] <- rtmin(tmp_pks[, "rtmin"]) + rt_max[i] <- rtmax(tmp_pks[, "rtmax"]) + mz_min[i] <- mzmin(tmp_pks[, "mzmin"]) + mz_max[i] <- mzmax(tmp_pks[, "mzmax"]) + } + res <- cbind(mzmin = mz_min, mzmax = mz_max, rtmin = rt_min, rtmax = rt_max) + rownames(res) <- rownames(pk_idx) + res } #' *Reconstruct* MS2 spectra for DIA data: diff --git a/R/XcmsExperiment.R b/R/XcmsExperiment.R index 74ce07850..43a22998e 100644 --- a/R/XcmsExperiment.R +++ b/R/XcmsExperiment.R @@ -245,9 +245,7 @@ #' `"rtmin"` and `"rtmax"` with the m/z and retention time range for each #' feature (row) in `object`. By default these represent the minimal m/z #' and retention times as well as maximal m/z and retention times for -#' the chromatographi peaks assigned to that feature. Note that if in -#' one sample more than one chromatographic peak is assigned to a feature -#' only the one with the higher intensity is considered. Parameter +#' all chromatographic peaks assigned to that feature. Parameter #' `features` allows to extract these values for selected features only. #' Parameters `mzmin`, `mzmax`, `rtmin` and `rtmax` allow to define #' the function to calculate the reported `"mzmin"`, `"mzmax"`, `"rtmin"` @@ -408,7 +406,7 @@ #' #' @param features For `filterFeatureDefinitions` and `featureArea`: `logical`, #' `integer` or `character` defining the features to keep or from which -#' to extract the feature are, respectively. See function description +#' to extract the feature area, respectively. See function description #' for more information. #' #' @param file For `filterFile`: `integer` with the indices of the samples @@ -1623,12 +1621,13 @@ setMethod( "featureChromatograms", "XcmsExperiment", function(object, expandRt = 0, expandMz = 0, aggregationFun = "max", features = character(), return.type = "XChromatograms", - chunkSize = 2L, ..., progressbar = TRUE, BPPARAM = bpparam()) { + chunkSize = 2L, mzmin = min, mzmax = max, rtmin = min, + rtmax = max, ..., progressbar = TRUE, BPPARAM = bpparam()) { return.type <- match.arg(return.type) if (hasAdjustedRtime(object)) object <- applyAdjustedRtime(object) - area <- featureArea(object, mzmin = min, mzmax = max, rtmin = min, - rtmax = max, features = features, msLevel = 1:10) + area <- featureArea(object, mzmin = mzmin, mzmax = mzmax, rtmin = rtmin, + rtmax = rtmax, features = features) if (expandRt != 0) { area[, "rtmin"] <- area[, "rtmin"] - expandRt area[, "rtmax"] <- area[, "rtmax"] + expandRt @@ -1760,11 +1759,14 @@ setMethod( if (!hasFeatures(object, msLevel = msLevel)) stop("No feature definitions for MS level ", msLevel, " present.") ## Define region to integrate from for each file + feature_ids <- rownames(featureDefinitions(object, msLevel = msLevel)) fr <- .features_ms_region(object, mzmin = param@mzmin, mzmax = param@mzmax, rtmin = param@rtmin, - rtmax = param@rtmax, msLevel = msLevel) - fr <- cbind(fr, mzmed = featureDefinitions(object)$mzmed) + rtmax = param@rtmax, features = feature_ids) + fr <- cbind( + fr, mzmed = featureDefinitions(object, msLevel = msLevel)$mzmed) fvals <- featureValues(object, value = "index", msLevel = msLevel) + ## For each sample, keep features with some missing values. pal <- lapply(seq_len(ncol(fvals)), function(i) { fr[is.na(fvals[, i]), , drop = FALSE] }) diff --git a/R/functions-Params.R b/R/functions-Params.R index 9438474f4..71c05f89d 100644 --- a/R/functions-Params.R +++ b/R/functions-Params.R @@ -349,12 +349,13 @@ MergeNeighboringPeaksParam <- function(expandRt = 2, expandMz = 0, ppm = 10, } #' @rdname fillChromPeaks -ChromPeakAreaParam <- function(mzmin = function(z) quantile(z, probs = 0.25), - mzmax = function(z) quantile(z, probs = 0.75), - rtmin = function(z) quantile(z, probs = 0.25), - rtmax = function(z) quantile(z, probs = 0.75)) { - new("ChromPeakAreaParam", mzmin = mzmin, mzmax = mzmax, rtmin = rtmin, - rtmax = rtmax) +ChromPeakAreaParam <- + function(mzmin = function(z) quantile(z, probs = 0.25, names = FALSE), + mzmax = function(z) quantile(z, probs = 0.75, names = FALSE), + rtmin = function(z) quantile(z, probs = 0.25, names = FALSE), + rtmax = function(z) quantile(z, probs = 0.75, names = FALSE)) { + new("ChromPeakAreaParam", mzmin = mzmin, mzmax = mzmax, rtmin = rtmin, + rtmax = rtmax) } #' @rdname refineChromPeaks diff --git a/R/functions-XCMSnExp.R b/R/functions-XCMSnExp.R index c571bfc0c..fdbf738c7 100644 --- a/R/functions-XCMSnExp.R +++ b/R/functions-XCMSnExp.R @@ -2014,47 +2014,6 @@ setMethod("hasFilledChromPeaks", "XCMSnExp", function(object) { nobject } -#' Define the MS region (m/z - rt range) for each feature based on the rtmin, -#' rtmax, mzmin, mzmax of the corresponding detected peaks. -#' -#' @param x `XCMSnExp` object -#' -#' @param mzmin, mzmax, rtmin, rtmax `function` to be applied to the values -#' (rtmin, ...) of the chrom peaks. Defaults to `median` but would also -#' work with `mean` etc. -#' -#' @return `matrix` with columns `"mzmin"`, `"mzmax"`, `"rtmin"`, `"rtmax"` -#' defining the range of -#' -#' @author Johannes Rainer -#' -#' @noRd -.features_ms_region <- function(x, mzmin = median, mzmax = median, - rtmin = median, rtmax = median, - msLevel = unique(msLevel(x)), - features = character()) { - pk_idx <- featureValues(x, value = "index", method = "maxint", - msLevel = msLevel) - if (length(features)) { - features <- .i2index( - features, ids = rownames(featureDefinitions(x)), "features") - pk_idx <- pk_idx[features, , drop = FALSE] - } - n_ft <- nrow(pk_idx) - rt_min <- rt_max <- mz_min <- mz_max <- numeric(n_ft) - for (i in seq_len(n_ft)) { - idx <- pk_idx[i, ] - tmp_pks <- chromPeaks(x)[idx[!is.na(idx)], , drop = FALSE] - rt_min[i] <- rtmin(tmp_pks[, "rtmin"]) - rt_max[i] <- rtmax(tmp_pks[, "rtmax"]) - mz_min[i] <- mzmin(tmp_pks[, "mzmin"]) - mz_max[i] <- mzmax(tmp_pks[, "mzmax"]) - } - res <- cbind(mzmin = mz_min, mzmax = mz_max, rtmin = rt_min, rtmax = rt_max) - rownames(res) <- rownames(pk_idx) - res -} - #' @param x `XCMSnExp` object of a single file. #' #' @param nValues `integer(1)` defining the number of values that have to be above diff --git a/R/methods-XCMSnExp.R b/R/methods-XCMSnExp.R index 97f22190a..449a5f4a2 100644 --- a/R/methods-XCMSnExp.R +++ b/R/methods-XCMSnExp.R @@ -2542,9 +2542,12 @@ setMethod("fillChromPeaks", startDate <- date() message("Defining peak areas for filling-in .", appendLF = FALSE) + feature_ids <- rownames(featureDefinitions(object, + msLevel = msLevel)) fts_region <- .features_ms_region( object, mzmin = param@mzmin, mzmax = param@mzmax, - rtmin = param@rtmin, rtmax = param@rtmax, msLevel = msLevel) + rtmin = param@rtmin, rtmax = param@rtmax, + features = feature_ids) fts_region <- cbind(group_idx = seq_len(nrow(fts_region)), fts_region, mzmed = featureDefinitions(object)$mzmed) diff --git a/R/methods-group-features.R b/R/methods-group-features.R index d1c46c8a3..d27b15b69 100644 --- a/R/methods-group-features.R +++ b/R/methods-group-features.R @@ -697,6 +697,12 @@ plotFeatureGroups <- function(x, xlim = numeric(), ylim = numeric(), #' #' @note #' +#' At present the [featureChromatograms()] function is used to extract the +#' EICs for each feature, which does however use one m/z and rt range for +#' each feature and the EICs do thus not exactly represent the identified +#' chromatographic peaks of each sample (i.e. their specific m/z and +#' retention time ranges). +#' #' While being possible to be performed on the full data set without prior #' feature grouping, this is not suggested for the following reasons: I) the #' selection of the top `n` samples with the highest signal for the @@ -932,6 +938,12 @@ setMethod( } if (length(idx) > 1) { eics <- do.call( + ## NOTE: chromPeakChromatograms should be used here + ## instead, since featureChromatograms uses the same + ## m/z - rt boundary for all EICs and does thus not + ## exactly represent the chromatographic peaks. + ## TODO: check if we can't use chromPeakChromatograms + ## instead (slower performance?). featureChromatograms, args = c(list(obj_sub, features = rownames(fvals)[idx], filled = TRUE, progressbar = FALSE), diff --git a/inst/NEWS b/inst/NEWS index 5965f7b14..78fe02f63 100644 --- a/inst/NEWS +++ b/inst/NEWS @@ -1,6 +1,13 @@ Changes in version 4.1.6 ---------------------- +- Update `featureArea` function to consider all chromatographic peaks per + feature, not only the one with the highest intensity. As a consequence, + returned m/z and rt ranges might be higher which has an influence in + `featureChromatograms`, EIC-based feature grouping and, to a lesser extent + also in gap-filling. Related documentation was updated. +- Improve performance of the `featureArea` function (and related of the + `PeakAreaParam`-based gap filling). - Add parameter `ppm` to `PeakDensityParam` to enable peak-density-based correspondence throgh m/z-dependent bins along the m/z. diff --git a/man/XcmsExperiment.Rd b/man/XcmsExperiment.Rd index 67628584e..1c12584c3 100644 --- a/man/XcmsExperiment.Rd +++ b/man/XcmsExperiment.Rd @@ -94,7 +94,6 @@ featureArea( mzmax = max, rtmin = min, rtmax = max, - msLevel = integer(), features = character() ) @@ -295,7 +294,7 @@ chromatographic peaks assigned to that feature. Defaults to \item{features}{For \code{filterFeatureDefinitions} and \code{featureArea}: \code{logical}, \code{integer} or \code{character} defining the features to keep or from which -to extract the feature are, respectively. See function description +to extract the feature area, respectively. See function description for more information.} \item{x}{An \code{XcmsExperiment} object.} @@ -620,9 +619,7 @@ analysis. This can be overruled with \code{keepAdjustedRtime = TRUE}. \code{"rtmin"} and \code{"rtmax"} with the m/z and retention time range for each feature (row) in \code{object}. By default these represent the minimal m/z and retention times as well as maximal m/z and retention times for -the chromatographi peaks assigned to that feature. Note that if in -one sample more than one chromatographic peak is assigned to a feature -only the one with the higher intensity is considered. Parameter +all chromatographic peaks assigned to that feature. Parameter \code{features} allows to extract these values for selected features only. Parameters \code{mzmin}, \code{mzmax}, \code{rtmin} and \code{rtmax} allow to define the function to calculate the reported \code{"mzmin"}, \code{"mzmax"}, \code{"rtmin"} diff --git a/man/featureChromatograms.Rd b/man/featureChromatograms.Rd index e484efc0a..dd60188f7 100644 --- a/man/featureChromatograms.Rd +++ b/man/featureChromatograms.Rd @@ -17,6 +17,10 @@ featureChromatograms(object, ...) features = character(), return.type = "XChromatograms", chunkSize = 2L, + mzmin = min, + mzmax = max, + rtmin = min, + rtmax = max, ..., progressbar = TRUE, BPPARAM = bpparam() @@ -71,6 +75,26 @@ object.} defining the number of files from which the data should be loaded at a time into memory. Defaults to \code{chunkSize = 2L}.} +\item{mzmin}{\code{function} defining how the lower boundary of the m/z region +from which the EIC is integrated should be defined. Defaults to +\code{mzmin = min} thus the smallest \code{"mzmin"} value for all chromatographic +peaks of a feature will be used.} + +\item{mzmax}{\code{function} defining how the upper boundary of the m/z region +from which the EIC is integrated should be defined. Defaults to +\code{mzmax = max} thus the largest \code{"mzmax"} value for all chromatographic +peaks of a feature will be used.} + +\item{rtmin}{\code{function} defining how the lower boundary of the rt region +from which the EIC is integrated should be defined. Defaults to +\code{rtmin = min} thus the smallest \code{"rtmin"} value for all chromatographic +peaks of a feature will be used.} + +\item{rtmax}{\code{function} defining how the upper boundary of the rt region +from which the EIC is integrated should be defined. Defaults to +\code{rtmax = max} thus the largest \code{"rtmax"} value for all chromatographic +peaks of a feature will be used.} + \item{progressbar}{\code{logical(1)} defining whether a progress bar is shown.} \item{BPPARAM}{For \code{object} being an \code{XcmsExperiment}: parallel processing @@ -105,9 +129,15 @@ Extract ion chromatograms for features in an \link{XcmsExperiment} or \linkS4class{XCMSnExp} object. The function returns for each feature the extracted ion chromatograms (along with all associated chromatographic peaks) in each sample. The chromatogram is extracted from the m/z - rt -region including all chromatographic peaks of that features (i.e. based on -the ranges of \code{"mzmin"}, \code{"mzmax"}, \code{"rtmin"}, \code{"rtmax"} of all -chromatographic peaks of the feature). +region including all chromatographic peaks of that features. This region is +by default, with \code{mzmin = min}, \code{mzmax = max}, \code{rtmin = min} and +\code{rtmax = max} defined using the \strong{ranges} of \code{"mzmin"}, \code{"mzmax"}, +\code{"rtmin"}, \code{"rtmax"} of all chromatographic peaks of the feature. For some +features, and depending on the data, the m/z and rt range can thus be +relatively large. The ranges could be restricted by using a different +function to define them, e.g. by setting \code{mzmin = median} and +\code{mzmax = median} in which case the median \code{"mzmin"} and \code{"mzmax"} values +of all chromatographic peaks would be used. By default only chromatographic peaks associated with a feature are included. For \code{object} being a \code{XCMSnExp} object parameter \code{include} @@ -117,6 +147,14 @@ chromatographic peak overlapping the m/z and retention time range (\code{include = "any"}). } \note{ +The \strong{same} m/z and rt boundaries are used on every sample to extract +the ion chromatogram. The EIC might thus not exactly represent the actual +EIC of each individual chromatographic peak (i.e. signal for the ion in one +specific sample), since the m/z and rt boundaries might be slightly +different across samples. The \code{\link[=chromPeakChromatograms]{chromPeakChromatograms()}} function could be +used to extract the actual EIC of the chromatographic peak in a specific +sample. See also examples below. + Parameters \code{include}, \code{filled}, \code{n} and \code{value} are only supported for \code{object} being an \code{XCMSnExp}. @@ -133,10 +171,8 @@ faahko_sub <- loadXcmsData("faahko_sub2") ## Disable parallel processing for this example register(SerialParam()) -## Subset the object to a smaller retention time range -xdata <- filterRt(faahko_sub, c(2500, 3500)) - -xdata <- groupChromPeaks(xdata, +## Perform correspondence analysis +xdata <- groupChromPeaks(faahko_sub, param = PeakDensityParam(minFraction = 0.8, sampleGroups = rep(1, 3))) ## Get the feature definitions @@ -147,8 +183,28 @@ featureDefinitions(xdata) chrs <- featureChromatograms(xdata, features = rownames(featureDefinitions)[1:3]) -## Plot the XIC for the first feature using different colors for each file +## Plot the EIC for the first feature using different colors for each file. plot(chrs[1, ], col = c("red", "green", "blue")) + +## The EICs for all 3 samples use the same m/z and retention time range, +## which was defined using the `featureArea` function: +featureArea(xdata, features = rownames(featureDefinitions(xdata))[1:3], + mzmin = min, mzmax = max, rtmin = min, rtmax = max) + +## To extract the actual (exact) EICs for each chromatographic peak of +## a feature in each sample, the `chromPeakChromatograms` function would +## need to be used instead. Below we extract the EICs for all +## chromatographic peaks of the first feature. We need to first get the +## IDs of all chromatographic peaks assigned to the first feature: +peak_ids <- rownames(chromPeaks(xdata))[featureDefinitions(xdata)$peakidx[[1L]]] + +## We can now pass these to the `chromPeakChromatograms` function with +## parameter `peaks`: +eic_1 <- chromPeakChromatograms(xdata, peaks = peak_ids) + +## To plot these into a single plot we need to use the +## `plotChromatogramsOverlay` function: +plotChromatogramsOverlay(eic_1) } \seealso{ \code{\link[=filterColumnsKeepTop]{filterColumnsKeepTop()}} to filter the extracted EICs keeping only diff --git a/man/fillChromPeaks.Rd b/man/fillChromPeaks.Rd index f142caa53..a57e33e05 100644 --- a/man/fillChromPeaks.Rd +++ b/man/fillChromPeaks.Rd @@ -50,10 +50,10 @@ fixedRt(object) fixedMz(object) ChromPeakAreaParam( - mzmin = function(z) quantile(z, probs = 0.25), - mzmax = function(z) quantile(z, probs = 0.75), - rtmin = function(z) quantile(z, probs = 0.25), - rtmax = function(z) quantile(z, probs = 0.75) + mzmin = function(z) quantile(z, probs = 0.25, names = FALSE), + mzmax = function(z) quantile(z, probs = 0.75, names = FALSE), + rtmin = function(z) quantile(z, probs = 0.25, names = FALSE), + rtmax = function(z) quantile(z, probs = 0.75, names = FALSE) ) \S4method{expandMz}{FillChromPeaksParam}(object) @@ -196,8 +196,8 @@ area is defined analogously. Alternatively, by setting \code{mzmin = median}, \code{ChromPeakAreaParam}, the median \code{"mzmin"}, \code{"mzmax"}, \code{"rtmin"} and \code{"rtmax"} values from all detected chromatographic peaks of a feature would be used instead. -In contrast to the \code{FillChromPeaksParam} approach this method uses the -actual identified chromatographic peaks of a feature to define the area +In contrast to the \code{FillChromPeaksParam} approach this method uses (all) +identified chromatographic peaks of a feature to define the area from which the signal should be integrated. } diff --git a/man/groupFeatures-eic-similarity.Rd b/man/groupFeatures-eic-similarity.Rd index c5aa7d033..544b2a6ac 100644 --- a/man/groupFeatures-eic-similarity.Rd +++ b/man/groupFeatures-eic-similarity.Rd @@ -115,6 +115,12 @@ Features with a value of \code{NA} in \code{featureDefinitions(object)$feature_g will be skipped/not considered for feature grouping. } \note{ +At present the \code{\link[=featureChromatograms]{featureChromatograms()}} function is used to extract the +EICs for each feature, which does however use one m/z and rt range for +each feature and the EICs do thus not exactly represent the identified +chromatographic peaks of each sample (i.e. their specific m/z and +retention time ranges). + While being possible to be performed on the full data set without prior feature grouping, this is not suggested for the following reasons: I) the selection of the top \code{n} samples with the highest signal for the diff --git a/man/refineChromPeaks.Rd b/man/refineChromPeaks.Rd index abca8301c..d23bdeccb 100644 --- a/man/refineChromPeaks.Rd +++ b/man/refineChromPeaks.Rd @@ -108,7 +108,8 @@ value by which the m/z range of each chromatographic peak is expanded \item{ppm}{For \code{MergeNeighboringPeaksParam}: \code{numeric(1)} defining a m/z relative value (in parts per million) by which the m/z range of each -chromatographic peak is expanded to check for overlapping peaks.} +chromatographic peak is expanded (on each side) to check for overlapping +peaks.} \item{minProp}{For \code{MergeNeighboringPeaksParam}: \code{numeric(1)} between \code{0} and \code{1} representing the proporion of intensity required for peaks to be diff --git a/tests/testthat/test_XcmsExperiment-functions.R b/tests/testthat/test_XcmsExperiment-functions.R index bd48d8f3d..3f1610745 100644 --- a/tests/testthat/test_XcmsExperiment-functions.R +++ b/tests/testthat/test_XcmsExperiment-functions.R @@ -184,4 +184,20 @@ test_that(".chromPeakData works", { expect_equal(res, xmse@chromPeakData) res <- .chromPeakData(xmse, msLevel = 2L) expect_equal(res, xmse@chromPeakData[integer(), ]) -}) \ No newline at end of file +}) + +test_that(".features_ms_region works", { + res <- .features_ms_region( + xod_xgrg, features = rownames(featureDefinitions(xod_xgrg))) + expect_equal(nrow(res), nrow(featureDefinitions(xod_xgrg))) + expect_equal(colnames(res), c("mzmin", "mzmax", "rtmin", "rtmax")) + expect_true(all(res[, "mzmin"] <= res[, "mzmax"])) + expect_true(all(res[, "rtmin"] < res[, "rtmax"])) + + expect_error(.features_ms_region(xod_xgrg, + features = c("a", "b")), "out of") + + res <- .features_ms_region( + xmseg, features = rownames(featureDefinitions(xmseg))) + expect_equal(rownames(res), rownames(featureDefinitions(xmseg))) +}) diff --git a/tests/testthat/test_XcmsExperiment.R b/tests/testthat/test_XcmsExperiment.R index daa0afd2c..4aa26f58e 100644 --- a/tests/testthat/test_XcmsExperiment.R +++ b/tests/testthat/test_XcmsExperiment.R @@ -1419,5 +1419,5 @@ test_that("fillChromPeaks,XcmsExperiment works with verboseBetaColumns", { res <- fillChromPeaks(res, ChromPeakAreaParam()) pks_det <- chromPeaks(res)[!chromPeakData(res)$is_filled, ] pks_fil <- chromPeaks(res)[chromPeakData(res)$is_filled, ] - expect_true(!any(is.na(pks_fil[, "beta_cor"]))) + expect_true(sum(is.na(pks_fil[, "beta_cor"])) < 4) }) diff --git a/tests/testthat/test_functions-XCMSnExp.R b/tests/testthat/test_functions-XCMSnExp.R index 69883a81e..a5e53d1cf 100644 --- a/tests/testthat/test_functions-XCMSnExp.R +++ b/tests/testthat/test_functions-XCMSnExp.R @@ -565,19 +565,6 @@ test_that(".XCMSnExp2SummarizedExperiment works", { featureValues(xod_xgrg, value = "intb")) }) -test_that(".features_ms_region works", { - skip_on_os(os = "windows", arch = "i386") - - res <- .features_ms_region(xod_xgrg, msLevel = 1L) - expect_equal(nrow(res), nrow(featureDefinitions(xod_xgrg))) - expect_equal(colnames(res), c("mzmin", "mzmax", "rtmin", "rtmax")) - expect_true(all(res[, "mzmin"] <= res[, "mzmax"])) - expect_true(all(res[, "rtmin"] < res[, "rtmax"])) - - expect_error(.features_ms_region(xod_xgrg, msLevel = 1L, - features = c("a", "b")), "out of") -}) - test_that(".which_peaks_above_threshold works", { skip_on_os(os = "windows", arch = "i386") diff --git a/tests/testthat/test_methods-XCMSnExp.R b/tests/testthat/test_methods-XCMSnExp.R index 7e7d65b9f..ed348905d 100644 --- a/tests/testthat/test_methods-XCMSnExp.R +++ b/tests/testthat/test_methods-XCMSnExp.R @@ -2575,3 +2575,16 @@ test_that("reconstructChromPeakSpectra works", { expect_error(reconstructChromPeakSpectra(pest_swth, peakId = c("a", "b")), "None of the provided") }) + +test_that("fillChromPeaks,XcmsExperiment works with verboseBetaColumns", { + p <- CentWaveParam(noise = 10000, snthresh = 40, prefilter = c(3, 10000), + verboseBetaColumns = TRUE) + res <- findChromPeaks(od_x, param = p) + expect_true(all(c("beta_cor", "beta_snr") %in% colnames(chromPeaks(res)))) + p <- PeakDensityParam(sampleGroups = rep(1, 3)) + res <- groupChromPeaks(res, param = p) + res <- fillChromPeaks(res, ChromPeakAreaParam()) + pks_det <- chromPeaks(res)[!chromPeakData(res)$is_filled, ] + pks_fil <- chromPeaks(res)[chromPeakData(res)$is_filled, ] + expect_true(sum(is.na(pks_fil[, "beta_cor"])) < 4) +}) diff --git a/tests/testthat/test_methods-group-features.R b/tests/testthat/test_methods-group-features.R index 34079a7fd..d6dcca67e 100644 --- a/tests/testthat/test_methods-group-features.R +++ b/tests/testthat/test_methods-group-features.R @@ -359,8 +359,6 @@ test_that("EicSimilarityParam works", { }) test_that("groupFeatures,XCMSnExp,EicSimilarityParam works", { - skip_on_os(os = "windows", arch = "i386") - ## n outside number of samples expect_error(groupFeatures(xodg, param = EicSimilarityParam(n = 10)), "smaller than or") @@ -375,6 +373,7 @@ test_that("groupFeatures,XCMSnExp,EicSimilarityParam works", { res_all <- groupFeatures(tmp, param = EicSimilarityParam()) expect_true(is.character(featureGroups(res_all))) + #' FG.009, FG.001, FG.001, FG.002, FG.003, FG.003 idx <- c(3, 12, 13, 34, 39, 40) tmp <- xodg featureDefinitions(tmp)$feature_group <- NA @@ -416,7 +415,8 @@ test_that("groupFeatures,XcmsExperiment,EicSimilarityParam works", { res_all <- groupFeatures(tmp, param = EicSimilarityParam()) expect_true(is.character(featureGroups(res_all))) - idx <- c(1, 2, 3, 9, 10, 14) + #' FG.014, FG.007, FG.007, FG.006, FG.006, FG.006 + idx <- c(1, 2, 3, 10, 13, 14) featureDefinitions(tmp)$feature_group <- NA featureDefinitions(tmp)$feature_group[idx] <- "FG" res <- groupFeatures(tmp, param = EicSimilarityParam())