diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml index 1c31a3a72..a9d90f242 100644 --- a/.github/workflows/check-bioc.yml +++ b/.github/workflows/check-bioc.yml @@ -203,6 +203,7 @@ jobs: BiocManager::install("MsBackendMgf") BiocManager::install("MetaboCoreUtils") BiocManager::install("magick") + BiocManager::install("RforMassSpectrometry/MsExperiment") ## For running the checks message(paste('****', Sys.time(), 'installing rcmdcheck and BiocCheck ****')) diff --git a/DESCRIPTION b/DESCRIPTION index 98a638022..995446c84 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -56,9 +56,9 @@ Imports: robustbase, IRanges, SummarizedExperiment, - MsCoreUtils (>= 1.11.5), + MsCoreUtils (>= 1.15.3), MsFeatures, - MsExperiment (>= 1.1.2), + MsExperiment (>= 1.5.4), Spectra (>= 1.13.2), progress, multtest, diff --git a/NAMESPACE b/NAMESPACE old mode 100755 new mode 100644 index 133eb5e13..aa3563bf3 --- a/NAMESPACE +++ b/NAMESPACE @@ -39,7 +39,7 @@ importFrom("SummarizedExperiment", "rowData") importFrom("SummarizedExperiment", "rowData<-") importFrom("SummarizedExperiment", "assay") importFrom("MsCoreUtils", "rbindFill", "closest", "i2index", "sumi", "between", - "maxi") + "maxi", "breaks_ppm") ## Additional imports proposed by R CMD check: importFrom("graphics", "abline", "barplot", "close.screen", "hist", @@ -575,6 +575,7 @@ importFrom("Spectra", "MsBackendMemory") ## MsExperiment things importClassesFrom("MsExperiment", "MsExperiment") importMethodsFrom("MsExperiment", "spectra") +importMethodsFrom("MsExperiment", "filterSpectra") importMethodsFrom("BiocGenerics", "do.call") importMethodsFrom("BiocGenerics", "rbind") importFrom("MsExperiment", "MsExperiment") diff --git a/R/AllGenerics.R b/R/AllGenerics.R index c97e6b32b..b9cd7618f 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -569,19 +569,30 @@ setGeneric("family<-", function(object, value) standardGeneric("family<-")) #' [XCMSnExp-class] object. The function returns for each feature the #' extracted ion chromatograms (along with all associated chromatographic #' peaks) in each sample. The chromatogram is extracted from the m/z - rt -#' region including all chromatographic peaks of that features (i.e. based on -#' the ranges of `"mzmin"`, `"mzmax"`, `"rtmin"`, `"rtmax"` of all -#' chromatographic peaks of the feature). +#' region that includes **all** chromatographic peaks of a feature. By default, +#' this region is defined using the range of the chromatographic peaks' m/z +#' and retention times (with `mzmin = min`, `mzmax = max`, `rtmin = min` and +#' `rtmax = max`). For some features, and depending on the data, the m/z and +#' rt range can thus be relatively large. The boundaries of the m/z - rt +#' region can also be restricted by changing parameters `mzmin`, `mzmax`, +#' `rtmin` and `rtmax` to a different functions, such as `median`. #' #' By default only chromatographic peaks associated with a feature are -#' included. For `object` being a `XCMSnExp` object parameter `include` -#' allows also to return all chromatographic peaks with their apex -#' position within the selected region (`include = "apex_within"`) or any -#' chromatographic peak overlapping the m/z and retention time range -#' (`include = "any"`). +#' included in the returned [XChromatograms] object. For `object` being an +#' `XCMSnExp` object parameter `include` allows also to return all +#' chromatographic peaks with their apex position within the selected +#' region (`include = "apex_within"`) or any chromatographic peak overlapping +#' the m/z and retention time range (`include = "any"`). #' #' @note #' +#' The EIC data of a feature is extracted from every sample using the same +#' m/z - rt area. The EIC in a sample does thus not exactly represent the +#' signal of the actually identified chromatographic peak in that sample. +#' The [chromPeakChromatograms()] function would allow to extract the actual +#' EIC of the chromatographic peak in a specific sample. See also examples +#' below. +#' #' Parameters `include`, `filled`, `n` and `value` are only supported #' for `object` being an `XCMSnExp`. #' @@ -628,6 +639,16 @@ setGeneric("family<-", function(object, value) standardGeneric("family<-")) #' Defaults to `"feature_only"`; See description above for options and #' details. #' +#' @param mzmax `function` defining how the upper boundary of the m/z region +#' from which the EIC is integrated should be defined. Defaults to +#' `mzmax = max` thus the largest `"mzmax"` value for all chromatographic +#' peaks of a feature will be used. +#' +#' @param mzmin `function` defining how the lower boundary of the m/z region +#' from which the EIC is integrated should be defined. Defaults to +#' `mzmin = min` thus the smallest `"mzmin"` value for all chromatographic +#' peaks of a feature will be used. +#' #' @param n Only for `object` being an `XCMSnExp`: `integer(1)` to optionally #' specify the number of *top n* samples from which the EIC should be #' extracted. @@ -642,6 +663,16 @@ setGeneric("family<-", function(object, value) standardGeneric("family<-")) #' supported and the results are thus returned as an [XChromatograms()] #' object. #' +#' @param rtmax `function` defining how the upper boundary of the rt region +#' from which the EIC is integrated should be defined. Defaults to +#' `rtmax = max` thus the largest `"rtmax"` value for all chromatographic +#' peaks of a feature will be used. +#' +#' @param rtmin `function` defining how the lower boundary of the rt region +#' from which the EIC is integrated should be defined. Defaults to +#' `rtmin = min` thus the smallest `"rtmin"` value for all chromatographic +#' peaks of a feature will be used. +#' #' @param value Only for `object` being an `XCMSnExp`: `character(1)` #' specifying the column to be used to sort the samples. Can be either #' `"maxo"` (the default) or `"into"` to use the maximal peak intensity @@ -672,10 +703,8 @@ setGeneric("family<-", function(object, value) standardGeneric("family<-")) #' ## Disable parallel processing for this example #' register(SerialParam()) #' -#' ## Subset the object to a smaller retention time range -#' xdata <- filterRt(faahko_sub, c(2500, 3500)) -#' -#' xdata <- groupChromPeaks(xdata, +#' ## Perform correspondence analysis +#' xdata <- groupChromPeaks(faahko_sub, #' param = PeakDensityParam(minFraction = 0.8, sampleGroups = rep(1, 3))) #' #' ## Get the feature definitions @@ -686,8 +715,28 @@ setGeneric("family<-", function(object, value) standardGeneric("family<-")) #' chrs <- featureChromatograms(xdata, #' features = rownames(featureDefinitions)[1:3]) #' -#' ## Plot the XIC for the first feature using different colors for each file +#' ## Plot the EIC for the first feature using different colors for each file. #' plot(chrs[1, ], col = c("red", "green", "blue")) +#' +#' ## The EICs for all 3 samples use the same m/z and retention time range, +#' ## which was defined using the `featureArea` function: +#' featureArea(xdata, features = rownames(featureDefinitions(xdata))[1:3], +#' mzmin = min, mzmax = max, rtmin = min, rtmax = max) +#' +#' ## To extract the actual (exact) EICs for each chromatographic peak of +#' ## a feature in each sample, the `chromPeakChromatograms` function would +#' ## need to be used instead. Below we extract the EICs for all +#' ## chromatographic peaks of the first feature. We need to first get the +#' ## IDs of all chromatographic peaks assigned to the first feature: +#' peak_ids <- rownames(chromPeaks(xdata))[featureDefinitions(xdata)$peakidx[[1L]]] +#' +#' ## We can now pass these to the `chromPeakChromatograms` function with +#' ## parameter `peaks`: +#' eic_1 <- chromPeakChromatograms(xdata, peaks = peak_ids) +#' +#' ## To plot these into a single plot we need to use the +#' ## `plotChromatogramsOverlay` function: +#' plotChromatogramsOverlay(eic_1) setGeneric("featureChromatograms", function(object, ...) standardGeneric("featureChromatograms")) @@ -800,8 +849,8 @@ setGeneric("filepaths<-", function(object, value) standardGeneric("filepaths<-") #' `ChromPeakAreaParam`, the median `"mzmin"`, `"mzmax"`, `"rtmin"` and #' `"rtmax"` values from all detected chromatographic peaks of a feature #' would be used instead. -#' In contrast to the `FillChromPeaksParam` approach this method uses the -#' actual identified chromatographic peaks of a feature to define the area +#' In contrast to the `FillChromPeaksParam` approach this method uses (all) +#' identified chromatographic peaks of a feature to define the area #' from which the signal should be integrated. #' #' @details @@ -1235,10 +1284,14 @@ setGeneric("group", function(object, ...) standardGeneric("group")) #' #' - `PeakDensityParam`: correspondence using the *peak density* method #' (Smith 2006) that groups chromatographic peaks along the retention time -#' axis within slices of (partially overlapping) m/z ranges. All peaks (from -#' the same or from different samples) with their apex position being close -#' on the retention time axis are grouped into a LC-MS feature. See in -#' addition [do_groupChromPeaks_density()] for the core API function. +#' axis within slices of (partially overlapping) m/z ranges. By default, +#' these m/z ranges (bins) have a constant size. By setting `ppm` to a value +#' larger than 0, m/z dependent bin sizes can be used instead (better +#' representing the m/z dependent measurement error of some MS instruments). +#' All peaks (from the same or from different samples) with their apex +#' position being close on the retention time axis are grouped into a LC-MS +#' feature. See in addition [do_groupChromPeaks_density()] for the core API +#' function. #' #' - `NearestPeaksParam`: performs peak grouping based on the proximity of #' chromatographic peaks from different samples in the m/z - rt space similar @@ -1304,6 +1357,14 @@ setGeneric("group", function(object, ...) standardGeneric("group")) #' #' @param ppm For `MzClustParam`: `numeric(1)` representing the relative m/z #' error for the clustering/grouping (in parts per million). +#' For `PeakDensityParam`: `numeric(1)` to define m/z-dependent, increasing +#' m/z bin sizes. If `ppm = 0` (the default) m/z bins are defined by the +#' sequence of values from the smallest to the larges m/z value with a +#' constant bin size of `binSize`. For `ppm` > 0 the size of each bin is +#' increased in addition by the `ppm` of the (upper) m/z boundary of the +#' bin. The maximal bin size (used for the largest m/z values) would then +#' be `binSize` plus `ppm` parts-per-million of the largest m/z value of +#' all peaks in the data set. #' #' @param param The parameter object selecting and configuring the algorithm. #' @@ -1808,7 +1869,8 @@ setGeneric("reconstructChromPeakSpectra", function(object, ...) #' #' @param ppm For `MergeNeighboringPeaksParam`: `numeric(1)` defining a m/z #' relative value (in parts per million) by which the m/z range of each -#' chromatographic peak is expanded to check for overlapping peaks. +#' chromatographic peak is expanded (on each side) to check for overlapping +#' peaks. #' #' @param threshold For `FilterIntensityParam`: `numeric(1)` defining the #' threshold below which peaks are removed. diff --git a/R/DataClasses.R b/R/DataClasses.R index 024caa34e..a48dfcb7a 100644 --- a/R/DataClasses.R +++ b/R/DataClasses.R @@ -443,13 +443,13 @@ setClass("XProcessHistory", #' method to extend the EIC to a integer base-2 length prior to being passed to #' \code{convolve} rather than the default "reflect" method. See #' https://github.com/sneumann/xcms/issues/445 for more information. -#' +#' #' @param verboseBetaColumns Option to calculate two additional metrics of peak #' quality via comparison to an idealized bell curve. Adds \code{beta_cor} and #' \code{beta_snr} to the \code{chromPeaks} output, corresponding to a Pearson #' correlation coefficient to a bell curve with several degrees of skew as well #' as an estimate of signal-to-noise using the residuals from the best-fitting -#' bell curve. See https://github.com/sneumann/xcms/pull/685 and +#' bell curve. See https://github.com/sneumann/xcms/pull/685 and #' https://doi.org/10.1186/s12859-023-05533-4 for more information. #' #' @details @@ -1316,7 +1316,8 @@ setClass("PeakDensityParam", minFraction = "numeric", minSamples = "numeric", binSize = "numeric", - maxFeatures = "numeric"), + maxFeatures = "numeric", + ppm = "numeric"), contains = "Param", prototype = prototype( sampleGroups = numeric(), @@ -1324,6 +1325,7 @@ setClass("PeakDensityParam", minFraction = 0.5, minSamples = 1, binSize = 0.25, + ppm = 0, maxFeatures = 50), validity = function(object) { msg <- character() diff --git a/R/MsExperiment-functions.R b/R/MsExperiment-functions.R index f17df8fd6..930b491c6 100644 --- a/R/MsExperiment-functions.R +++ b/R/MsExperiment-functions.R @@ -247,37 +247,6 @@ USE.NAMES = FALSE, BPPARAM = BPPARAM) } -#' generic method to apply a filtering to the spectra data. The function will -#' apply the filtering and (most importantly) keep/update the link between -#' spectra and samples. -#' -#' @importMethodsFrom Spectra selectSpectraVariables -#' -#' @param x `MsExperiment`. -#' -#' @param FUN filter function. -#' -#' @param ... parameters for `FUN`. -#' -#' @author Johannes Rainer -#' -#' @noRd -.mse_filter_spectra <- function(x, FUN, ...) { - ls <- length(spectra(x)) - have_links <- length(x@sampleDataLinks[["spectra"]]) > 0 - if (have_links) - x@spectra$._SPECTRA_IDX <- seq_len(ls) - x@spectra <- FUN(x@spectra, ...) - if (have_links) { - if (ls != length(spectra(x))) - x <- .update_sample_data_links_spectra(x) - svs <- unique(c(spectraVariables(spectra(x)), "mz", "intensity")) - x@spectra <- selectSpectraVariables( - x@spectra, svs[svs != "._SPECTRA_IDX"]) - } - x -} - #' Ensure that each spectrum is assigned to a sample and that we only have 1:1 #' mappings. That is important for most code involving splitting of samples #' etc. diff --git a/R/MsExperiment.R b/R/MsExperiment.R index 2ec042029..ba12ce461 100644 --- a/R/MsExperiment.R +++ b/R/MsExperiment.R @@ -2,8 +2,7 @@ setMethod("filterRt", "MsExperiment", function(object, rt = numeric(), ...) { message("Filter spectra") - object <- .mse_filter_spectra(object, filterRt, rt = rt, ...) - object + filterSpectra(object, filterRt, rt = rt, ...) }) #' @rdname XcmsExperiment @@ -24,8 +23,7 @@ setMethod("filterMz", "MsExperiment", setMethod("filterMsLevel", "MsExperiment", function(object, msLevel. = uniqueMsLevels(object)) { message("Filter spectra") - .mse_filter_spectra(object, filterMsLevel, - msLevel. = msLevel.) + filterSpectra(object, filterMsLevel, msLevel. = msLevel.) }) #' @rdname XcmsExperiment @@ -93,7 +91,7 @@ setMethod("polarity", "MsExperiment", function(object) { #' @rdname XcmsExperiment setMethod( "filterIsolationWindow", "MsExperiment", function(object, mz = numeric()) { - .mse_filter_spectra(object, filterIsolationWindow, mz = mz) + filterSpectra(object, filterIsolationWindow, mz = mz) }) #' @rdname XcmsExperiment diff --git a/R/XcmsExperiment-functions.R b/R/XcmsExperiment-functions.R index 6384f2a61..e2b2957d5 100644 --- a/R/XcmsExperiment-functions.R +++ b/R/XcmsExperiment-functions.R @@ -133,7 +133,8 @@ cp, sampleGroups = sampleGroups(param), bw = bw(param), minFraction = minFraction(param), minSamples = minSamples(param), binSize = binSize(param), - maxFeatures = maxFeatures(param), index = index) + maxFeatures = maxFeatures(param), ppm = ppm(param), + index = index) }, MzClustParam = { tmp <- do_groupPeaks_mzClust( @@ -933,15 +934,50 @@ #' @rdname XcmsExperiment featureArea <- function(object, mzmin = min, mzmax = max, rtmin = min, - rtmax = max, msLevel = integer(), - features = character()) { + rtmax = max, features = character()) { if (!hasFeatures(object)) stop("No correspondence results available. Please run ", "'groupChromPeaks' first.") - if (!length(msLevel)) - msLevel <- seq_len(10) + if (!length(features)) + features <- rownames(featureDefinitions(object)) .features_ms_region(object, mzmin = mzmin, mzmax = mzmax, rtmin = rtmin, - rtmax = rtmax, msLevel = msLevel, features = features) + rtmax = rtmax, features = features) +} + +#' @title Define MS regions for features +#' +#' @param x `XcmsExperiment` or `XCMSnExp`. +#' +#' @param mzmin, mzmax, rtmin, rtmax `function` to be applied to the values +#' (rtmin, ...) of the chrom peaks. Defaults to `median` but would also +#' work with `mean` etc. +#' +#' @param features `character` with the IDs of the features. Mandatory! +#' +#' @return `matrix` with columns `"mzmin"`, `"mzmax"`, `"rtmin"`, `"rtmax"` +#' defining the range of +#' +#' @author Johannes Rainer +#' +#' @noRd +.features_ms_region <- function(x, mzmin = median, mzmax = median, + rtmin = median, rtmax = median, + features = character()) { + features <- .i2index(features, ids = rownames(featureDefinitions(x)), + "features") + pks <- .chromPeaks(x)[, c("mzmin", "mzmax", "rtmin", "rtmax")] + res <- do.call( + rbind, lapply(featureDefinitions(x)$peakidx[features], + function(i) { + ## maybe consider/drop gap-filled peaks? + c(mzmin(pks[i, "mzmin"]), + mzmax(pks[i, "mzmax"]), + rtmin(pks[i, "rtmin"]), + rtmax(pks[i, "rtmax"])) + })) + rownames(res) <- rownames(featureDefinitions(x))[features] + colnames(res) <- c("mzmin", "mzmax", "rtmin", "rtmax") + res } #' *Reconstruct* MS2 spectra for DIA data: diff --git a/R/XcmsExperiment.R b/R/XcmsExperiment.R index 74ce07850..fdb94eddc 100644 --- a/R/XcmsExperiment.R +++ b/R/XcmsExperiment.R @@ -245,9 +245,7 @@ #' `"rtmin"` and `"rtmax"` with the m/z and retention time range for each #' feature (row) in `object`. By default these represent the minimal m/z #' and retention times as well as maximal m/z and retention times for -#' the chromatographi peaks assigned to that feature. Note that if in -#' one sample more than one chromatographic peak is assigned to a feature -#' only the one with the higher intensity is considered. Parameter +#' all chromatographic peaks assigned to that feature. Parameter #' `features` allows to extract these values for selected features only. #' Parameters `mzmin`, `mzmax`, `rtmin` and `rtmax` allow to define #' the function to calculate the reported `"mzmin"`, `"mzmax"`, `"rtmin"` @@ -408,7 +406,7 @@ #' #' @param features For `filterFeatureDefinitions` and `featureArea`: `logical`, #' `integer` or `character` defining the features to keep or from which -#' to extract the feature are, respectively. See function description +#' to extract the feature area, respectively. See function description #' for more information. #' #' @param file For `filterFile`: `integer` with the indices of the samples @@ -779,7 +777,7 @@ setMethod( function(object, mz = numeric()) { if (length(mz) > 1L) mz <- mz[1L] - object <- .mse_filter_spectra(object, filterIsolationWindow, mz = mz) + object <- filterSpectra(object, filterIsolationWindow, mz = mz) if (hasChromPeaks(object) && length(mz) && all(c("isolationWindowLowerMz", "isolationWindowUpperMz") %in% colnames(object@chromPeakData))) { @@ -1623,12 +1621,13 @@ setMethod( "featureChromatograms", "XcmsExperiment", function(object, expandRt = 0, expandMz = 0, aggregationFun = "max", features = character(), return.type = "XChromatograms", - chunkSize = 2L, ..., progressbar = TRUE, BPPARAM = bpparam()) { + chunkSize = 2L, mzmin = min, mzmax = max, rtmin = min, + rtmax = max, ..., progressbar = TRUE, BPPARAM = bpparam()) { return.type <- match.arg(return.type) if (hasAdjustedRtime(object)) object <- applyAdjustedRtime(object) - area <- featureArea(object, mzmin = min, mzmax = max, rtmin = min, - rtmax = max, features = features, msLevel = 1:10) + area <- featureArea(object, mzmin = mzmin, mzmax = mzmax, rtmin = rtmin, + rtmax = rtmax, features = features) if (expandRt != 0) { area[, "rtmin"] <- area[, "rtmin"] - expandRt area[, "rtmax"] <- area[, "rtmax"] + expandRt @@ -1760,11 +1759,14 @@ setMethod( if (!hasFeatures(object, msLevel = msLevel)) stop("No feature definitions for MS level ", msLevel, " present.") ## Define region to integrate from for each file + feature_ids <- rownames(featureDefinitions(object, msLevel = msLevel)) fr <- .features_ms_region(object, mzmin = param@mzmin, mzmax = param@mzmax, rtmin = param@rtmin, - rtmax = param@rtmax, msLevel = msLevel) - fr <- cbind(fr, mzmed = featureDefinitions(object)$mzmed) + rtmax = param@rtmax, features = feature_ids) + fr <- cbind( + fr, mzmed = featureDefinitions(object, msLevel = msLevel)$mzmed) fvals <- featureValues(object, value = "index", msLevel = msLevel) + ## For each sample, keep features with some missing values. pal <- lapply(seq_len(ncol(fvals)), function(i) { fr[is.na(fvals[, i]), , drop = FALSE] }) diff --git a/R/do_groupChromPeaks-functions.R b/R/do_groupChromPeaks-functions.R index 9676883d7..9bf3668bc 100644 --- a/R/do_groupChromPeaks-functions.R +++ b/R/do_groupChromPeaks-functions.R @@ -8,22 +8,31 @@ #' #' The `do_groupChromPeaks_density` function performs chromatographic peak #' grouping based on the density (distribution) of peaks, found in different -#' samples, along the retention time axis in slices of overlapping mz ranges. +#' samples, along the retention time axis in slices of overlapping m/z ranges. +#' By default (with parameter `ppm = 0`) these m/z ranges have all the same +#' (constant) size (depending on parameter `binSize`). For values of `ppm` +#' larger than 0 the m/z bins (ranges or slices) will have increasing sizes +#' depending on the m/z value. This better models the m/z-dependent +#' measurement error/precision seen on some MS instruments. #' -#' @details For overlapping slices along the mz dimension, the function +#' @details +#' +#' For overlapping slices along the mz dimension, the function #' calculates the density distribution of identified peaks along the #' retention time axis and groups peaks from the same or different samples #' that are close to each other. See (Smith 2006) for more details. #' -#' @note The default settings might not be appropriate for all LC/GC-MS setups, +#' @note +#' +#' The default settings might not be appropriate for all LC/GC-MS setups, #' especially the `bw` and `binSize` parameter should be adjusted #' accordingly. #' #' @param peaks A `matrix` or `data.frame` with the mz values and -#' retention times of the identified chromatographic peaks in all samples of an -#' experiment. Required columns are `"mz"`, `"rt"` and -#' `"sample"`. The latter should contain `numeric` values representing -#' the index of the sample in which the peak was found. +#' retention times of the identified chromatographic peaks in all samples +#' of an experiment. Required columns are `"mz"`, `"rt"` and +#' `"sample"`. The latter should contain `numeric` values representing +#' the index of the sample in which the peak was found. #' #' @param index An optional `integer` providing the indices of the peaks in the #' original peak matrix. @@ -83,7 +92,8 @@ do_groupChromPeaks_density <- function(peaks, sampleGroups, bw = 30, minFraction = 0.5, minSamples = 1, binSize = 0.25, maxFeatures = 50, sleep = 0, - index = seq_len(nrow(peaks))) { + index = seq_len(nrow(peaks)), + ppm = 0) { if (missing(sampleGroups)) stop("Parameter 'sampleGroups' is missing! This should be a vector of ", "length equal to the number of samples specifying the group ", @@ -120,9 +130,10 @@ do_groupChromPeaks_density <- function(peaks, sampleGroups, rtRange <- range(peaks[, "rt"]) ## Define the mass slices and the index in the peaks matrix with an mz - ## value >= mass[i]. - mass <- seq(peaks[1, "mz"], peaks[nrow(peaks), "mz"] + binSize, - by = binSize / 2) + ## value >= mass[i]. If ppm != 0 the size of the individual bins will + ## be dependend on the m/z value. + mass <- breaks_ppm(peaks[1, "mz"], peaks[nrow(peaks), "mz"] + binSize, + by = binSize / 2, ppm = ppm / 2) masspos <- findEqualGreaterM(peaks[, "mz"], mass) densFrom <- rtRange[1] - 3 * bw diff --git a/R/functions-Params.R b/R/functions-Params.R index bea36c2bc..71c05f89d 100644 --- a/R/functions-Params.R +++ b/R/functions-Params.R @@ -93,7 +93,7 @@ CentWaveParam <- function(ppm = 25, peakwidth = c(20, 50), snthresh = 10, mzdiff = mzdiff, fitgauss = fitgauss, noise = noise, verboseColumns = verboseColumns, roiList = roiList, firstBaselineCheck = firstBaselineCheck, roiScales = roiScales, - extendLengthMSW = extendLengthMSW, + extendLengthMSW = extendLengthMSW, verboseBetaColumns=verboseBetaColumns)) } @@ -223,7 +223,7 @@ CentWavePredIsoParam <- function(ppm = 25, peakwidth = c(20, 50), snthresh = 10, mzdiff = mzdiff, fitgauss = fitgauss, noise = noise, verboseColumns = verboseColumns, roiList = roiList, firstBaselineCheck = firstBaselineCheck, roiScales = roiScales, - extendLengthMSW = extendLengthMSW, + extendLengthMSW = extendLengthMSW, verboseBetaColumns = verboseBetaColumns, snthreshIsoROIs = snthreshIsoROIs, maxIso = as.integer(maxIso), maxCharge = as.integer(maxCharge), @@ -232,14 +232,14 @@ CentWavePredIsoParam <- function(ppm = 25, peakwidth = c(20, 50), snthresh = 10, #' @rdname groupChromPeaks PeakDensityParam <- function(sampleGroups = numeric(), bw = 30, - minFraction = 0.5, minSamples = 1, - binSize = 0.25, maxFeatures = 50) { + minFraction = 0.5, minSamples = 1, + binSize = 0.25, ppm = 0, maxFeatures = 50) { if (length(sampleGroups) == 0 | any(is.na(sampleGroups))) stop("Argument 'sampleGroups' has to be defined. It should not ", "contain 'NA's") new("PeakDensityParam", sampleGroups = sampleGroups, bw = bw, minFraction = minFraction, minSamples = minSamples, - binSize = binSize, maxFeatures = maxFeatures) + binSize = binSize, ppm = ppm, maxFeatures = maxFeatures) } #' @rdname groupChromPeaks @@ -349,12 +349,13 @@ MergeNeighboringPeaksParam <- function(expandRt = 2, expandMz = 0, ppm = 10, } #' @rdname fillChromPeaks -ChromPeakAreaParam <- function(mzmin = function(z) quantile(z, probs = 0.25), - mzmax = function(z) quantile(z, probs = 0.75), - rtmin = function(z) quantile(z, probs = 0.25), - rtmax = function(z) quantile(z, probs = 0.75)) { - new("ChromPeakAreaParam", mzmin = mzmin, mzmax = mzmax, rtmin = rtmin, - rtmax = rtmax) +ChromPeakAreaParam <- + function(mzmin = function(z) quantile(z, probs = 0.25, names = FALSE), + mzmax = function(z) quantile(z, probs = 0.75, names = FALSE), + rtmin = function(z) quantile(z, probs = 0.25, names = FALSE), + rtmax = function(z) quantile(z, probs = 0.75, names = FALSE)) { + new("ChromPeakAreaParam", mzmin = mzmin, mzmax = mzmax, rtmin = rtmin, + rtmax = rtmax) } #' @rdname refineChromPeaks diff --git a/R/functions-XCMSnExp.R b/R/functions-XCMSnExp.R index c571bfc0c..fdbf738c7 100644 --- a/R/functions-XCMSnExp.R +++ b/R/functions-XCMSnExp.R @@ -2014,47 +2014,6 @@ setMethod("hasFilledChromPeaks", "XCMSnExp", function(object) { nobject } -#' Define the MS region (m/z - rt range) for each feature based on the rtmin, -#' rtmax, mzmin, mzmax of the corresponding detected peaks. -#' -#' @param x `XCMSnExp` object -#' -#' @param mzmin, mzmax, rtmin, rtmax `function` to be applied to the values -#' (rtmin, ...) of the chrom peaks. Defaults to `median` but would also -#' work with `mean` etc. -#' -#' @return `matrix` with columns `"mzmin"`, `"mzmax"`, `"rtmin"`, `"rtmax"` -#' defining the range of -#' -#' @author Johannes Rainer -#' -#' @noRd -.features_ms_region <- function(x, mzmin = median, mzmax = median, - rtmin = median, rtmax = median, - msLevel = unique(msLevel(x)), - features = character()) { - pk_idx <- featureValues(x, value = "index", method = "maxint", - msLevel = msLevel) - if (length(features)) { - features <- .i2index( - features, ids = rownames(featureDefinitions(x)), "features") - pk_idx <- pk_idx[features, , drop = FALSE] - } - n_ft <- nrow(pk_idx) - rt_min <- rt_max <- mz_min <- mz_max <- numeric(n_ft) - for (i in seq_len(n_ft)) { - idx <- pk_idx[i, ] - tmp_pks <- chromPeaks(x)[idx[!is.na(idx)], , drop = FALSE] - rt_min[i] <- rtmin(tmp_pks[, "rtmin"]) - rt_max[i] <- rtmax(tmp_pks[, "rtmax"]) - mz_min[i] <- mzmin(tmp_pks[, "mzmin"]) - mz_max[i] <- mzmax(tmp_pks[, "mzmax"]) - } - res <- cbind(mzmin = mz_min, mzmax = mz_max, rtmin = rt_min, rtmax = rt_max) - rownames(res) <- rownames(pk_idx) - res -} - #' @param x `XCMSnExp` object of a single file. #' #' @param nValues `integer(1)` defining the number of values that have to be above diff --git a/R/methods-Params.R b/R/methods-Params.R index aa1b1e454..d29016e79 100644 --- a/R/methods-Params.R +++ b/R/methods-Params.R @@ -1002,6 +1002,13 @@ setReplaceMethod("maxFeatures", "PeakDensityParam", function(object, value) { return(object) }) +#' @rdname groupChromPeaks +setMethod("ppm", "PeakDensityParam", function(object) { + if (.hasSlot(object, "ppm")) + object@ppm + else 0.0 +}) + ############################################################ ## MzClustParam diff --git a/R/methods-XCMSnExp.R b/R/methods-XCMSnExp.R index 285859f5b..449a5f4a2 100644 --- a/R/methods-XCMSnExp.R +++ b/R/methods-XCMSnExp.R @@ -1482,6 +1482,7 @@ setMethod("groupChromPeaks", minFraction = minFraction(param), minSamples = minSamples(param), binSize = binSize(param), + ppm = ppm(param), maxFeatures = maxFeatures(param)) xph <- XProcessHistory(param = param, date. = startDate, type. = .PROCSTEP.PEAK.GROUPING, @@ -2541,9 +2542,12 @@ setMethod("fillChromPeaks", startDate <- date() message("Defining peak areas for filling-in .", appendLF = FALSE) + feature_ids <- rownames(featureDefinitions(object, + msLevel = msLevel)) fts_region <- .features_ms_region( object, mzmin = param@mzmin, mzmax = param@mzmax, - rtmin = param@rtmin, rtmax = param@rtmax, msLevel = msLevel) + rtmin = param@rtmin, rtmax = param@rtmax, + features = feature_ids) fts_region <- cbind(group_idx = seq_len(nrow(fts_region)), fts_region, mzmed = featureDefinitions(object)$mzmed) diff --git a/R/methods-group-features.R b/R/methods-group-features.R index d1c46c8a3..d27b15b69 100644 --- a/R/methods-group-features.R +++ b/R/methods-group-features.R @@ -697,6 +697,12 @@ plotFeatureGroups <- function(x, xlim = numeric(), ylim = numeric(), #' #' @note #' +#' At present the [featureChromatograms()] function is used to extract the +#' EICs for each feature, which does however use one m/z and rt range for +#' each feature and the EICs do thus not exactly represent the identified +#' chromatographic peaks of each sample (i.e. their specific m/z and +#' retention time ranges). +#' #' While being possible to be performed on the full data set without prior #' feature grouping, this is not suggested for the following reasons: I) the #' selection of the top `n` samples with the highest signal for the @@ -932,6 +938,12 @@ setMethod( } if (length(idx) > 1) { eics <- do.call( + ## NOTE: chromPeakChromatograms should be used here + ## instead, since featureChromatograms uses the same + ## m/z - rt boundary for all EICs and does thus not + ## exactly represent the chromatographic peaks. + ## TODO: check if we can't use chromPeakChromatograms + ## instead (slower performance?). featureChromatograms, args = c(list(obj_sub, features = rownames(fvals)[idx], filled = TRUE, progressbar = FALSE), diff --git a/inst/NEWS b/inst/NEWS index d3f6c26d6..5fc15922b 100644 --- a/inst/NEWS +++ b/inst/NEWS @@ -1,4 +1,4 @@ -Changes in version 4.1.6 +Changes in version 4.1.7 ---------------------- - Implementation of `filterFeatures` function with `filter` parameters: @@ -6,6 +6,21 @@ Changes in version 4.1.6 used ot filter features from `XcmsResult` and `SummarizedExperiment` objects. - Addition of a section in the main xcms vignette to describe how to use it. +Changes in version 4.1.6 +---------------------- + +- Import `filterSpectra` from `MsExperiment`. +- Import `breaks_ppm` from `MsCoreUtils`. +- Update `featureArea` function to consider all chromatographic peaks per + feature, not only the one with the highest intensity. As a consequence, + returned m/z and rt ranges might be higher which has an influence in + `featureChromatograms`, EIC-based feature grouping and, to a lesser extent + also in gap-filling. Related documentation was updated. +- Improve performance of the `featureArea` function (and related of the + `PeakAreaParam`-based gap filling). +- Add parameter `ppm` to `PeakDensityParam` to enable peak-density-based + correspondence throgh m/z-dependent bins along the m/z. + Changes in version 4.1.5 ---------------------- diff --git a/man/XcmsExperiment.Rd b/man/XcmsExperiment.Rd index 67628584e..1c12584c3 100644 --- a/man/XcmsExperiment.Rd +++ b/man/XcmsExperiment.Rd @@ -94,7 +94,6 @@ featureArea( mzmax = max, rtmin = min, rtmax = max, - msLevel = integer(), features = character() ) @@ -295,7 +294,7 @@ chromatographic peaks assigned to that feature. Defaults to \item{features}{For \code{filterFeatureDefinitions} and \code{featureArea}: \code{logical}, \code{integer} or \code{character} defining the features to keep or from which -to extract the feature are, respectively. See function description +to extract the feature area, respectively. See function description for more information.} \item{x}{An \code{XcmsExperiment} object.} @@ -620,9 +619,7 @@ analysis. This can be overruled with \code{keepAdjustedRtime = TRUE}. \code{"rtmin"} and \code{"rtmax"} with the m/z and retention time range for each feature (row) in \code{object}. By default these represent the minimal m/z and retention times as well as maximal m/z and retention times for -the chromatographi peaks assigned to that feature. Note that if in -one sample more than one chromatographic peak is assigned to a feature -only the one with the higher intensity is considered. Parameter +all chromatographic peaks assigned to that feature. Parameter \code{features} allows to extract these values for selected features only. Parameters \code{mzmin}, \code{mzmax}, \code{rtmin} and \code{rtmax} allow to define the function to calculate the reported \code{"mzmin"}, \code{"mzmax"}, \code{"rtmin"} diff --git a/man/do_findChromPeaks_centWave.Rd b/man/do_findChromPeaks_centWave.Rd index 6a38eedd1..9a91e2df9 100644 --- a/man/do_findChromPeaks_centWave.Rd +++ b/man/do_findChromPeaks_centWave.Rd @@ -124,7 +124,7 @@ quality via comparison to an idealized bell curve. Adds \code{beta_cor} and \code{beta_snr} to the \code{chromPeaks} output, corresponding to a Pearson correlation coefficient to a bell curve with several degrees of skew as well as an estimate of signal-to-noise using the residuals from the best-fitting -bell curve. See https://github.com/sneumann/xcms/pull/685 and +bell curve. See https://github.com/sneumann/xcms/pull/685 and https://doi.org/10.1186/s12859-023-05533-4 for more information.} } \value{ diff --git a/man/do_findChromPeaks_centWaveWithPredIsoROIs.Rd b/man/do_findChromPeaks_centWaveWithPredIsoROIs.Rd index 6216aa927..8c1ffb14a 100644 --- a/man/do_findChromPeaks_centWaveWithPredIsoROIs.Rd +++ b/man/do_findChromPeaks_centWaveWithPredIsoROIs.Rd @@ -167,7 +167,7 @@ quality via comparison to an idealized bell curve. Adds \code{beta_cor} and \code{beta_snr} to the \code{chromPeaks} output, corresponding to a Pearson correlation coefficient to a bell curve with several degrees of skew as well as an estimate of signal-to-noise using the residuals from the best-fitting -bell curve. See https://github.com/sneumann/xcms/pull/685 and +bell curve. See https://github.com/sneumann/xcms/pull/685 and https://doi.org/10.1186/s12859-023-05533-4 for more information.} \item{peaks.}{A matrix or \code{xcmsPeaks} object such as one returned by diff --git a/man/do_groupChromPeaks_density.Rd b/man/do_groupChromPeaks_density.Rd index ee4d5f950..4b27d8a4e 100644 --- a/man/do_groupChromPeaks_density.Rd +++ b/man/do_groupChromPeaks_density.Rd @@ -14,13 +14,14 @@ do_groupChromPeaks_density( binSize = 0.25, maxFeatures = 50, sleep = 0, - index = seq_len(nrow(peaks)) + index = seq_len(nrow(peaks)), + ppm = 0 ) } \arguments{ \item{peaks}{A \code{matrix} or \code{data.frame} with the mz values and -retention times of the identified chromatographic peaks in all samples of an -experiment. Required columns are \code{"mz"}, \code{"rt"} and +retention times of the identified chromatographic peaks in all samples +of an experiment. Required columns are \code{"mz"}, \code{"rt"} and \code{"sample"}. The latter should contain \code{numeric} values representing the index of the sample in which the peak was found.} @@ -55,6 +56,17 @@ iterations and plot the result from the current iteration.} \item{index}{An optional \code{integer} providing the indices of the peaks in the original peak matrix.} + +\item{ppm}{For \code{MzClustParam}: \code{numeric(1)} representing the relative m/z +error for the clustering/grouping (in parts per million). +For \code{PeakDensityParam}: \code{numeric(1)} to define m/z-dependent, increasing +m/z bin sizes. If \code{ppm = 0} (the default) m/z bins are defined by the +sequence of values from the smallest to the larges m/z value with a +constant bin size of \code{binSize}. For \code{ppm} > 0 the size of each bin is +increased in addition by the \code{ppm} of the (upper) m/z boundary of the +bin. The maximal bin size (used for the largest m/z values) would then +be \code{binSize} plus \code{ppm} parts-per-million of the largest m/z value of +all peaks in the data set.} } \value{ A \code{data.frame}, each row representing a (mz-rt) feature (i.e. a peak group) @@ -77,7 +89,12 @@ multiple peaks from the same sample could be assigned to a feature. \description{ The \code{do_groupChromPeaks_density} function performs chromatographic peak grouping based on the density (distribution) of peaks, found in different -samples, along the retention time axis in slices of overlapping mz ranges. +samples, along the retention time axis in slices of overlapping m/z ranges. +By default (with parameter \code{ppm = 0}) these m/z ranges have all the same +(constant) size (depending on parameter \code{binSize}). For values of \code{ppm} +larger than 0 the m/z bins (ranges or slices) will have increasing sizes +depending on the m/z value. This better models the m/z-dependent +measurement error/precision seen on some MS instruments. } \details{ For overlapping slices along the mz dimension, the function diff --git a/man/do_groupChromPeaks_nearest.Rd b/man/do_groupChromPeaks_nearest.Rd index 4bcc222d4..085f1bc11 100644 --- a/man/do_groupChromPeaks_nearest.Rd +++ b/man/do_groupChromPeaks_nearest.Rd @@ -16,8 +16,8 @@ do_groupChromPeaks_nearest( } \arguments{ \item{peaks}{A \code{matrix} or \code{data.frame} with the mz values and -retention times of the identified chromatographic peaks in all samples of an -experiment. Required columns are \code{"mz"}, \code{"rt"} and +retention times of the identified chromatographic peaks in all samples +of an experiment. Required columns are \code{"mz"}, \code{"rt"} and \code{"sample"}. The latter should contain \code{numeric} values representing the index of the sample in which the peak was found.} diff --git a/man/do_groupPeaks_mzClust.Rd b/man/do_groupPeaks_mzClust.Rd index cfe3f5ae5..dba42f79c 100644 --- a/man/do_groupPeaks_mzClust.Rd +++ b/man/do_groupPeaks_mzClust.Rd @@ -15,8 +15,8 @@ do_groupPeaks_mzClust( } \arguments{ \item{peaks}{A \code{matrix} or \code{data.frame} with the mz values and -retention times of the identified chromatographic peaks in all samples of an -experiment. Required columns are \code{"mz"}, \code{"rt"} and +retention times of the identified chromatographic peaks in all samples +of an experiment. Required columns are \code{"mz"}, \code{"rt"} and \code{"sample"}. The latter should contain \code{numeric} values representing the index of the sample in which the peak was found.} @@ -29,7 +29,15 @@ experiment (in which case all samples should be assigned to the same group).} \item{ppm}{For \code{MzClustParam}: \code{numeric(1)} representing the relative m/z -error for the clustering/grouping (in parts per million).} +error for the clustering/grouping (in parts per million). +For \code{PeakDensityParam}: \code{numeric(1)} to define m/z-dependent, increasing +m/z bin sizes. If \code{ppm = 0} (the default) m/z bins are defined by the +sequence of values from the smallest to the larges m/z value with a +constant bin size of \code{binSize}. For \code{ppm} > 0 the size of each bin is +increased in addition by the \code{ppm} of the (upper) m/z boundary of the +bin. The maximal bin size (used for the largest m/z values) would then +be \code{binSize} plus \code{ppm} parts-per-million of the largest m/z value of +all peaks in the data set.} \item{absMz}{For \code{NearestPeaksParam} and \code{MzClustParam}: \code{numeric(1)} maximum tolerated distance for m/z values.} diff --git a/man/featureChromatograms.Rd b/man/featureChromatograms.Rd index e484efc0a..8d194e81b 100644 --- a/man/featureChromatograms.Rd +++ b/man/featureChromatograms.Rd @@ -17,6 +17,10 @@ featureChromatograms(object, ...) features = character(), return.type = "XChromatograms", chunkSize = 2L, + mzmin = min, + mzmax = max, + rtmin = min, + rtmax = max, ..., progressbar = TRUE, BPPARAM = bpparam() @@ -71,6 +75,26 @@ object.} defining the number of files from which the data should be loaded at a time into memory. Defaults to \code{chunkSize = 2L}.} +\item{mzmin}{\code{function} defining how the lower boundary of the m/z region +from which the EIC is integrated should be defined. Defaults to +\code{mzmin = min} thus the smallest \code{"mzmin"} value for all chromatographic +peaks of a feature will be used.} + +\item{mzmax}{\code{function} defining how the upper boundary of the m/z region +from which the EIC is integrated should be defined. Defaults to +\code{mzmax = max} thus the largest \code{"mzmax"} value for all chromatographic +peaks of a feature will be used.} + +\item{rtmin}{\code{function} defining how the lower boundary of the rt region +from which the EIC is integrated should be defined. Defaults to +\code{rtmin = min} thus the smallest \code{"rtmin"} value for all chromatographic +peaks of a feature will be used.} + +\item{rtmax}{\code{function} defining how the upper boundary of the rt region +from which the EIC is integrated should be defined. Defaults to +\code{rtmax = max} thus the largest \code{"rtmax"} value for all chromatographic +peaks of a feature will be used.} + \item{progressbar}{\code{logical(1)} defining whether a progress bar is shown.} \item{BPPARAM}{For \code{object} being an \code{XcmsExperiment}: parallel processing @@ -105,18 +129,29 @@ Extract ion chromatograms for features in an \link{XcmsExperiment} or \linkS4class{XCMSnExp} object. The function returns for each feature the extracted ion chromatograms (along with all associated chromatographic peaks) in each sample. The chromatogram is extracted from the m/z - rt -region including all chromatographic peaks of that features (i.e. based on -the ranges of \code{"mzmin"}, \code{"mzmax"}, \code{"rtmin"}, \code{"rtmax"} of all -chromatographic peaks of the feature). +region that includes \strong{all} chromatographic peaks of a feature. By default, +this region is defined using the range of the chromatographic peaks' m/z +and retention times (with \code{mzmin = min}, \code{mzmax = max}, \code{rtmin = min} and +\code{rtmax = max}). For some features, and depending on the data, the m/z and +rt range can thus be relatively large. The boundaries of the m/z - rt +region can also be restricted by changing parameters \code{mzmin}, \code{mzmax}, +\code{rtmin} and \code{rtmax} to a different functions, such as \code{median}. By default only chromatographic peaks associated with a feature are -included. For \code{object} being a \code{XCMSnExp} object parameter \code{include} -allows also to return all chromatographic peaks with their apex -position within the selected region (\code{include = "apex_within"}) or any -chromatographic peak overlapping the m/z and retention time range -(\code{include = "any"}). +included in the returned \link{XChromatograms} object. For \code{object} being an +\code{XCMSnExp} object parameter \code{include} allows also to return all +chromatographic peaks with their apex position within the selected +region (\code{include = "apex_within"}) or any chromatographic peak overlapping +the m/z and retention time range (\code{include = "any"}). } \note{ +The EIC data of a feature is extracted from every sample using the same +m/z - rt area. The EIC in a sample does thus not exactly represent the +signal of the actually identified chromatographic peak in that sample. +The \code{\link[=chromPeakChromatograms]{chromPeakChromatograms()}} function would allow to extract the actual +EIC of the chromatographic peak in a specific sample. See also examples +below. + Parameters \code{include}, \code{filled}, \code{n} and \code{value} are only supported for \code{object} being an \code{XCMSnExp}. @@ -133,10 +168,8 @@ faahko_sub <- loadXcmsData("faahko_sub2") ## Disable parallel processing for this example register(SerialParam()) -## Subset the object to a smaller retention time range -xdata <- filterRt(faahko_sub, c(2500, 3500)) - -xdata <- groupChromPeaks(xdata, +## Perform correspondence analysis +xdata <- groupChromPeaks(faahko_sub, param = PeakDensityParam(minFraction = 0.8, sampleGroups = rep(1, 3))) ## Get the feature definitions @@ -147,8 +180,28 @@ featureDefinitions(xdata) chrs <- featureChromatograms(xdata, features = rownames(featureDefinitions)[1:3]) -## Plot the XIC for the first feature using different colors for each file +## Plot the EIC for the first feature using different colors for each file. plot(chrs[1, ], col = c("red", "green", "blue")) + +## The EICs for all 3 samples use the same m/z and retention time range, +## which was defined using the `featureArea` function: +featureArea(xdata, features = rownames(featureDefinitions(xdata))[1:3], + mzmin = min, mzmax = max, rtmin = min, rtmax = max) + +## To extract the actual (exact) EICs for each chromatographic peak of +## a feature in each sample, the `chromPeakChromatograms` function would +## need to be used instead. Below we extract the EICs for all +## chromatographic peaks of the first feature. We need to first get the +## IDs of all chromatographic peaks assigned to the first feature: +peak_ids <- rownames(chromPeaks(xdata))[featureDefinitions(xdata)$peakidx[[1L]]] + +## We can now pass these to the `chromPeakChromatograms` function with +## parameter `peaks`: +eic_1 <- chromPeakChromatograms(xdata, peaks = peak_ids) + +## To plot these into a single plot we need to use the +## `plotChromatogramsOverlay` function: +plotChromatogramsOverlay(eic_1) } \seealso{ \code{\link[=filterColumnsKeepTop]{filterColumnsKeepTop()}} to filter the extracted EICs keeping only diff --git a/man/fillChromPeaks.Rd b/man/fillChromPeaks.Rd index f142caa53..a57e33e05 100644 --- a/man/fillChromPeaks.Rd +++ b/man/fillChromPeaks.Rd @@ -50,10 +50,10 @@ fixedRt(object) fixedMz(object) ChromPeakAreaParam( - mzmin = function(z) quantile(z, probs = 0.25), - mzmax = function(z) quantile(z, probs = 0.75), - rtmin = function(z) quantile(z, probs = 0.25), - rtmax = function(z) quantile(z, probs = 0.75) + mzmin = function(z) quantile(z, probs = 0.25, names = FALSE), + mzmax = function(z) quantile(z, probs = 0.75, names = FALSE), + rtmin = function(z) quantile(z, probs = 0.25, names = FALSE), + rtmax = function(z) quantile(z, probs = 0.75, names = FALSE) ) \S4method{expandMz}{FillChromPeaksParam}(object) @@ -196,8 +196,8 @@ area is defined analogously. Alternatively, by setting \code{mzmin = median}, \code{ChromPeakAreaParam}, the median \code{"mzmin"}, \code{"mzmax"}, \code{"rtmin"} and \code{"rtmax"} values from all detected chromatographic peaks of a feature would be used instead. -In contrast to the \code{FillChromPeaksParam} approach this method uses the -actual identified chromatographic peaks of a feature to define the area +In contrast to the \code{FillChromPeaksParam} approach this method uses (all) +identified chromatographic peaks of a feature to define the area from which the signal should be integrated. } diff --git a/man/findChromPeaks-centWave.Rd b/man/findChromPeaks-centWave.Rd index 87cdf2832..f7f2ac351 100644 --- a/man/findChromPeaks-centWave.Rd +++ b/man/findChromPeaks-centWave.Rd @@ -219,7 +219,7 @@ quality via comparison to an idealized bell curve. Adds \code{beta_cor} and \code{beta_snr} to the \code{chromPeaks} output, corresponding to a Pearson correlation coefficient to a bell curve with several degrees of skew as well as an estimate of signal-to-noise using the residuals from the best-fitting -bell curve. See https://github.com/sneumann/xcms/pull/685 and +bell curve. See https://github.com/sneumann/xcms/pull/685 and https://doi.org/10.1186/s12859-023-05533-4 for more information.} \item{object}{For \code{findChromPeaks}: an diff --git a/man/findChromPeaks-centWaveWithPredIsoROIs.Rd b/man/findChromPeaks-centWaveWithPredIsoROIs.Rd index e1aae532b..575e8e451 100644 --- a/man/findChromPeaks-centWaveWithPredIsoROIs.Rd +++ b/man/findChromPeaks-centWaveWithPredIsoROIs.Rd @@ -160,7 +160,7 @@ quality via comparison to an idealized bell curve. Adds \code{beta_cor} and \code{beta_snr} to the \code{chromPeaks} output, corresponding to a Pearson correlation coefficient to a bell curve with several degrees of skew as well as an estimate of signal-to-noise using the residuals from the best-fitting -bell curve. See https://github.com/sneumann/xcms/pull/685 and +bell curve. See https://github.com/sneumann/xcms/pull/685 and https://doi.org/10.1186/s12859-023-05533-4 for more information.} \item{snthreshIsoROIs}{\code{numeric(1)} defining the signal to noise ratio diff --git a/man/groupChromPeaks.Rd b/man/groupChromPeaks.Rd index b3690fc70..fe6dbdaff 100644 --- a/man/groupChromPeaks.Rd +++ b/man/groupChromPeaks.Rd @@ -32,6 +32,7 @@ \alias{maxFeatures} \alias{maxFeatures<-,PeakDensityParam-method} \alias{maxFeatures<-} +\alias{ppm,PeakDensityParam-method} \alias{sampleGroups,MzClustParam-method} \alias{sampleGroups<-,MzClustParam-method} \alias{ppm,MzClustParam-method} @@ -75,6 +76,7 @@ PeakDensityParam( minFraction = 0.5, minSamples = 1, binSize = 0.25, + ppm = 0, maxFeatures = 50 ) @@ -118,6 +120,8 @@ NearestPeaksParam( \S4method{maxFeatures}{PeakDensityParam}(object) <- value +\S4method{ppm}{PeakDensityParam}(object) + \S4method{sampleGroups}{MzClustParam}(object) \S4method{sampleGroups}{MzClustParam}(object) <- value @@ -204,12 +208,20 @@ to be detected to be considered a peak group (feature).} \item{binSize}{For \code{PeakDensityParam}: \code{numeric(1)} defining the size of the overlapping slices in m/z dimension.} +\item{ppm}{For \code{MzClustParam}: \code{numeric(1)} representing the relative m/z +error for the clustering/grouping (in parts per million). +For \code{PeakDensityParam}: \code{numeric(1)} to define m/z-dependent, increasing +m/z bin sizes. If \code{ppm = 0} (the default) m/z bins are defined by the +sequence of values from the smallest to the larges m/z value with a +constant bin size of \code{binSize}. For \code{ppm} > 0 the size of each bin is +increased in addition by the \code{ppm} of the (upper) m/z boundary of the +bin. The maximal bin size (used for the largest m/z values) would then +be \code{binSize} plus \code{ppm} parts-per-million of the largest m/z value of +all peaks in the data set.} + \item{maxFeatures}{For \code{PeakDensityParam}: \code{numeric(1)} with the maximum number of peak groups to be identified in a single mz slice.} -\item{ppm}{For \code{MzClustParam}: \code{numeric(1)} representing the relative m/z -error for the clustering/grouping (in parts per million).} - \item{absMz}{For \code{NearestPeaksParam} and \code{MzClustParam}: \code{numeric(1)} maximum tolerated distance for m/z values.} @@ -248,10 +260,14 @@ Supported \code{param} objects are: \itemize{ \item \code{PeakDensityParam}: correspondence using the \emph{peak density} method (Smith 2006) that groups chromatographic peaks along the retention time -axis within slices of (partially overlapping) m/z ranges. All peaks (from -the same or from different samples) with their apex position being close -on the retention time axis are grouped into a LC-MS feature. See in -addition \code{\link[=do_groupChromPeaks_density]{do_groupChromPeaks_density()}} for the core API function. +axis within slices of (partially overlapping) m/z ranges. By default, +these m/z ranges (bins) have a constant size. By setting \code{ppm} to a value +larger than 0, m/z dependent bin sizes can be used instead (better +representing the m/z dependent measurement error of some MS instruments). +All peaks (from the same or from different samples) with their apex +position being close on the retention time axis are grouped into a LC-MS +feature. See in addition \code{\link[=do_groupChromPeaks_density]{do_groupChromPeaks_density()}} for the core API +function. \item \code{NearestPeaksParam}: performs peak grouping based on the proximity of chromatographic peaks from different samples in the m/z - rt space similar to the correspondence method of \emph{mzMine} (Katajamaa 2006). The method diff --git a/man/groupFeatures-eic-similarity.Rd b/man/groupFeatures-eic-similarity.Rd index c5aa7d033..544b2a6ac 100644 --- a/man/groupFeatures-eic-similarity.Rd +++ b/man/groupFeatures-eic-similarity.Rd @@ -115,6 +115,12 @@ Features with a value of \code{NA} in \code{featureDefinitions(object)$feature_g will be skipped/not considered for feature grouping. } \note{ +At present the \code{\link[=featureChromatograms]{featureChromatograms()}} function is used to extract the +EICs for each feature, which does however use one m/z and rt range for +each feature and the EICs do thus not exactly represent the identified +chromatographic peaks of each sample (i.e. their specific m/z and +retention time ranges). + While being possible to be performed on the full data set without prior feature grouping, this is not suggested for the following reasons: I) the selection of the top \code{n} samples with the highest signal for the diff --git a/man/refineChromPeaks.Rd b/man/refineChromPeaks.Rd index abca8301c..d23bdeccb 100644 --- a/man/refineChromPeaks.Rd +++ b/man/refineChromPeaks.Rd @@ -108,7 +108,8 @@ value by which the m/z range of each chromatographic peak is expanded \item{ppm}{For \code{MergeNeighboringPeaksParam}: \code{numeric(1)} defining a m/z relative value (in parts per million) by which the m/z range of each -chromatographic peak is expanded to check for overlapping peaks.} +chromatographic peak is expanded (on each side) to check for overlapping +peaks.} \item{minProp}{For \code{MergeNeighboringPeaksParam}: \code{numeric(1)} between \code{0} and \code{1} representing the proporion of intensity required for peaks to be diff --git a/src/fastMatch.c b/src/fastMatch.c index cec75b2fa..667583ecc 100644 --- a/src/fastMatch.c +++ b/src/fastMatch.c @@ -29,7 +29,7 @@ SEXP fastMatch(SEXP x, SEXP y, SEXP xidx, SEXP yidx, SEXP xolength, SEXP tol) { struct idxStruct * pidxS = (struct idxStruct *) calloc(nx, sizeof(struct idxStruct)); if (pidxS == NULL) - error("fastMatch/calloc: memory could not be allocated ! (%llu bytes)\n", nx * sizeof(struct idxStruct) ); + error("fastMatch/calloc: memory could not be allocated ! (%lu bytes)\n", nx * sizeof(struct idxStruct) ); for (xi=0;xi < nx;xi++) pidxS[xi].from = ny+1; diff --git a/src/mzROI.c b/src/mzROI.c index a308ae142..19333b469 100644 --- a/src/mzROI.c +++ b/src/mzROI.c @@ -76,7 +76,7 @@ struct mzROIStruct * checkmzROIBufSize(struct mzROIStruct *mzROI, const unsigned mzROI = (struct mzROIStruct *) realloc(mzROI, newLength * sizeof(struct mzROIStruct)); if (mzROI == NULL) - error("findmzROI/realloc: buffer memory could not be allocated ! (%llu bytes)\n", newLength * sizeof(struct mzROIStruct) ); + error("findmzROI/realloc: buffer memory could not be allocated ! (%lu bytes)\n", newLength * sizeof(struct mzROIStruct) ); mzLength->mzROITotal = newLength; } @@ -99,7 +99,7 @@ struct mzROIStruct * checkmzvalBufSize(struct mzROIStruct *mzval, const unsigned mzval = (struct mzROIStruct *) realloc(mzval, newLength * sizeof(struct mzROIStruct)); if (mzval == NULL) - error("findmzROI/realloc: buffer memory could not be allocated ! (%llu bytes)\n", newLength * sizeof(struct mzROIStruct)); + error("findmzROI/realloc: buffer memory could not be allocated ! (%lu bytes)\n", newLength * sizeof(struct mzROIStruct)); mzLength->mzvalTotal = newLength; } @@ -328,7 +328,7 @@ int i,p,del=0; p=0; struct mzROIStruct * tmp = (struct mzROIStruct *) calloc(mzLength->mzval - del, sizeof(struct mzROIStruct)); if (tmp == NULL) - error("findmzROI/cleanup: buffer memory could not be allocated ! (%llu bytes)\n", (mzLength->mzval - del) * sizeof(struct mzROIStruct)); + error("findmzROI/cleanup: buffer memory could not be allocated ! (%lu bytes)\n", (mzLength->mzval - del) * sizeof(struct mzROIStruct)); for (i=0; i < mzLength->mzval; i++) { if (mzval[i].deleteMe == FALSE) { tmp[p].mz = mzval[i].mz; @@ -626,11 +626,11 @@ SEXP findmzROI(SEXP mz, SEXP intensity, SEXP scanindex, SEXP mzrange, struct mzROIStruct * mzROI = (struct mzROIStruct *) calloc(ROI_INIT_LENGTH, sizeof(struct mzROIStruct)); if (mzROI == NULL) - error("findmzROI/calloc: buffer memory could not be allocated ! (%llu bytes)\n",ROI_INIT_LENGTH * sizeof(struct mzROIStruct) ); + error("findmzROI/calloc: buffer memory could not be allocated ! (%lu bytes)\n",ROI_INIT_LENGTH * sizeof(struct mzROIStruct) ); struct mzROIStruct * mzval = (struct mzROIStruct *) calloc(MZVAL_INIT_LENGTH, sizeof(struct mzROIStruct)); if (mzval == NULL) - error("findmzROI/calloc: buffer memory could not be allocated ! (%llu bytes)\n",MZVAL_INIT_LENGTH * sizeof(struct mzROIStruct) ); + error("findmzROI/calloc: buffer memory could not be allocated ! (%lu bytes)\n",MZVAL_INIT_LENGTH * sizeof(struct mzROIStruct) ); mzLength.mzvalTotal = MZVAL_INIT_LENGTH; mzLength.mzROITotal = ROI_INIT_LENGTH; diff --git a/tests/testthat/test_MsExperiment-functions.R b/tests/testthat/test_MsExperiment-functions.R index 5cca0ea6e..a8cd065de 100644 --- a/tests/testthat/test_MsExperiment-functions.R +++ b/tests/testthat/test_MsExperiment-functions.R @@ -141,42 +141,6 @@ test_that(".mse_find_chrom_peaks_chunk works", { expect_true(is.null(res[[2L]])) }) -test_that(".mse_filter_spectra works", { - ## Create a custom, small test object. - fls <- normalizePath(faahko_3_files) - df <- data.frame(mzML_file = basename(fls), - dataOrigin = fls, - sample = c("ko15", "ko16", "ko18")) - a <- Spectra::Spectra(fls[1]) - b <- Spectra::Spectra(fls[2]) - c <- Spectra::Spectra(fls[3]) - - ## Select first 10 spectra from a and c and last 10 from b and combine them - sps <- c(a[1:10], b[(length(b)-9):length(b)], c[1:10]) - - tst <- MsExperiment() - spectra(tst) <- sps - sampleData(tst) <- DataFrame(df) - ## Link samples to spectra. - tst <- linkSampleData(tst, with = "sampleData.dataOrigin = spectra.dataOrigin") - - res <- .mse_filter_spectra(tst, filterRt, rt = c(2502, 2505)) - expect_true(length(spectra(res)) == 4L) - expect_equal(res@sampleDataLinks[["spectra"]], - cbind(c(1L, 1L, 3L, 3L), 1:4)) - - ## Some artificial sample assignment. - tst@sampleDataLinks[["spectra"]] <- cbind( - c(1, 1, 1, 2, 2, 2, 3, 3, 3), - c(2, 3, 4, 2, 3, 4, 2, 3, 4)) - res <- .mse_filter_spectra(tst, filterRt, rt = c(2502, 2505)) - ## Filtering will filter based on rt - expect_true(length(spectra(res)) == 4L) - ## Sample assignment should map the first 2 spectra to all 3 samples - expect_equal(res@sampleDataLinks[["spectra"]], - cbind(c(1L, 1L, 2L, 2L, 3L, 3L), c(1L, 2L, 1L, 2L, 1L, 2L))) -}) - test_that(".mse_check_spectra_sample_mapping works", { expect_true(length(.mse_check_spectra_sample_mapping(mse)) == 0) diff --git a/tests/testthat/test_XcmsExperiment-functions.R b/tests/testthat/test_XcmsExperiment-functions.R index bd48d8f3d..3f1610745 100644 --- a/tests/testthat/test_XcmsExperiment-functions.R +++ b/tests/testthat/test_XcmsExperiment-functions.R @@ -184,4 +184,20 @@ test_that(".chromPeakData works", { expect_equal(res, xmse@chromPeakData) res <- .chromPeakData(xmse, msLevel = 2L) expect_equal(res, xmse@chromPeakData[integer(), ]) -}) \ No newline at end of file +}) + +test_that(".features_ms_region works", { + res <- .features_ms_region( + xod_xgrg, features = rownames(featureDefinitions(xod_xgrg))) + expect_equal(nrow(res), nrow(featureDefinitions(xod_xgrg))) + expect_equal(colnames(res), c("mzmin", "mzmax", "rtmin", "rtmax")) + expect_true(all(res[, "mzmin"] <= res[, "mzmax"])) + expect_true(all(res[, "rtmin"] < res[, "rtmax"])) + + expect_error(.features_ms_region(xod_xgrg, + features = c("a", "b")), "out of") + + res <- .features_ms_region( + xmseg, features = rownames(featureDefinitions(xmseg))) + expect_equal(rownames(res), rownames(featureDefinitions(xmseg))) +}) diff --git a/tests/testthat/test_XcmsExperiment.R b/tests/testthat/test_XcmsExperiment.R index daa0afd2c..4aa26f58e 100644 --- a/tests/testthat/test_XcmsExperiment.R +++ b/tests/testthat/test_XcmsExperiment.R @@ -1419,5 +1419,5 @@ test_that("fillChromPeaks,XcmsExperiment works with verboseBetaColumns", { res <- fillChromPeaks(res, ChromPeakAreaParam()) pks_det <- chromPeaks(res)[!chromPeakData(res)$is_filled, ] pks_fil <- chromPeaks(res)[chromPeakData(res)$is_filled, ] - expect_true(!any(is.na(pks_fil[, "beta_cor"]))) + expect_true(sum(is.na(pks_fil[, "beta_cor"])) < 4) }) diff --git a/tests/testthat/test_do_groupChromPeaks-functions.R b/tests/testthat/test_do_groupChromPeaks-functions.R index b65fdb9ea..f01f3617a 100644 --- a/tests/testthat/test_do_groupChromPeaks-functions.R +++ b/tests/testthat/test_do_groupChromPeaks-functions.R @@ -7,6 +7,9 @@ test_that("do_groupChromPeaks_density works", { res_2 <- do_groupChromPeaks_density(fts, sampleGroups = grps, minFraction = 0.9) expect_true(nrow(res) > nrow(res_2)) + + res_3 <- do_groupChromPeaks_density(fts, sampleGroups = grps, ppm = 20) + expect_equal(nrow(res), nrow(res_3)) }) test_that("do_groupPeaks_mzClust works", { diff --git a/tests/testthat/test_functions-XCMSnExp.R b/tests/testthat/test_functions-XCMSnExp.R index 69883a81e..a5e53d1cf 100644 --- a/tests/testthat/test_functions-XCMSnExp.R +++ b/tests/testthat/test_functions-XCMSnExp.R @@ -565,19 +565,6 @@ test_that(".XCMSnExp2SummarizedExperiment works", { featureValues(xod_xgrg, value = "intb")) }) -test_that(".features_ms_region works", { - skip_on_os(os = "windows", arch = "i386") - - res <- .features_ms_region(xod_xgrg, msLevel = 1L) - expect_equal(nrow(res), nrow(featureDefinitions(xod_xgrg))) - expect_equal(colnames(res), c("mzmin", "mzmax", "rtmin", "rtmax")) - expect_true(all(res[, "mzmin"] <= res[, "mzmax"])) - expect_true(all(res[, "rtmin"] < res[, "rtmax"])) - - expect_error(.features_ms_region(xod_xgrg, msLevel = 1L, - features = c("a", "b")), "out of") -}) - test_that(".which_peaks_above_threshold works", { skip_on_os(os = "windows", arch = "i386") diff --git a/tests/testthat/test_methods-XCMSnExp.R b/tests/testthat/test_methods-XCMSnExp.R index 7e7d65b9f..ed348905d 100644 --- a/tests/testthat/test_methods-XCMSnExp.R +++ b/tests/testthat/test_methods-XCMSnExp.R @@ -2575,3 +2575,16 @@ test_that("reconstructChromPeakSpectra works", { expect_error(reconstructChromPeakSpectra(pest_swth, peakId = c("a", "b")), "None of the provided") }) + +test_that("fillChromPeaks,XcmsExperiment works with verboseBetaColumns", { + p <- CentWaveParam(noise = 10000, snthresh = 40, prefilter = c(3, 10000), + verboseBetaColumns = TRUE) + res <- findChromPeaks(od_x, param = p) + expect_true(all(c("beta_cor", "beta_snr") %in% colnames(chromPeaks(res)))) + p <- PeakDensityParam(sampleGroups = rep(1, 3)) + res <- groupChromPeaks(res, param = p) + res <- fillChromPeaks(res, ChromPeakAreaParam()) + pks_det <- chromPeaks(res)[!chromPeakData(res)$is_filled, ] + pks_fil <- chromPeaks(res)[chromPeakData(res)$is_filled, ] + expect_true(sum(is.na(pks_fil[, "beta_cor"])) < 4) +}) diff --git a/tests/testthat/test_methods-group-features.R b/tests/testthat/test_methods-group-features.R index 34079a7fd..d6dcca67e 100644 --- a/tests/testthat/test_methods-group-features.R +++ b/tests/testthat/test_methods-group-features.R @@ -359,8 +359,6 @@ test_that("EicSimilarityParam works", { }) test_that("groupFeatures,XCMSnExp,EicSimilarityParam works", { - skip_on_os(os = "windows", arch = "i386") - ## n outside number of samples expect_error(groupFeatures(xodg, param = EicSimilarityParam(n = 10)), "smaller than or") @@ -375,6 +373,7 @@ test_that("groupFeatures,XCMSnExp,EicSimilarityParam works", { res_all <- groupFeatures(tmp, param = EicSimilarityParam()) expect_true(is.character(featureGroups(res_all))) + #' FG.009, FG.001, FG.001, FG.002, FG.003, FG.003 idx <- c(3, 12, 13, 34, 39, 40) tmp <- xodg featureDefinitions(tmp)$feature_group <- NA @@ -416,7 +415,8 @@ test_that("groupFeatures,XcmsExperiment,EicSimilarityParam works", { res_all <- groupFeatures(tmp, param = EicSimilarityParam()) expect_true(is.character(featureGroups(res_all))) - idx <- c(1, 2, 3, 9, 10, 14) + #' FG.014, FG.007, FG.007, FG.006, FG.006, FG.006 + idx <- c(1, 2, 3, 10, 13, 14) featureDefinitions(tmp)$feature_group <- NA featureDefinitions(tmp)$feature_group[idx] <- "FG" res <- groupFeatures(tmp, param = EicSimilarityParam()) diff --git a/vignettes/xcms.Rmd b/vignettes/xcms.Rmd index 188d65a5e..0848eb2b5 100644 --- a/vignettes/xcms.Rmd +++ b/vignettes/xcms.Rmd @@ -60,7 +60,9 @@ This document describes data import, exploration and pre-processing of a simple test LC-MS data set with the *xcms* package version >= 4. The same functions can be applied to the older *MSnbase*-based workflows (xcms version 3). Additional documents and tutorials covering also other topics of untargeted metabolomics -analysis are listed at the end of this document. +analysis are listed at the end of this document. There is also a [xcms +tutorial](https://jorainer.github.io/xcmsTutorials) available with more examples +and details. # Pre-processing of LC-MS data @@ -325,7 +327,7 @@ internal standard of known compound. It is suggested to inspect the ranges of m/z values for several compounds (either internal standards or compounds known to be present in the sample) and define the `ppm` parameter for *centWave* according to these. See also this -[tutorial](https://jorainer.github.io/metabolomics2018) for additional +[tutorial](https://jorainer.github.io/xcmsTutorials) for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -856,17 +858,62 @@ correspondence settings on manually defined m/z slices before applying them to the full data set. For the tested m/z slice the settings seemed to be OK and we are thus applying them to the full data set below. Especially the parameter `bw` will be very data set dependent (or more specifically LC-dependent) and should -be adapted to each data set. See the [Metabolomics pre-processing with -`xcms`](https://jorainer.github.io/metabolomics2018) tutorial for examples and -more details. +be adapted to each data set. + +Another important parameter is `binSize` that defines the size of the m/z slices +(bins) within which peaks are being grouped. This parameter thus defines the +required similarity in m/z values for the chromatographic peaks that are then +assumed to represent signal from the same (type of ion of a) compound and hence +evaluated for grouping. By default, a constant m/z bin size is used, but by +changing parameter `ppm` to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value hence +better representing the measurement error/precision of some MS instruments). The +bin sizes (and subsequently the m/z width of the defined features) would then +reach a maximal value of `binSize` plus `ppm` parts-per-million of the largest +m/z value of any chromatographic peak in the data set. + +See also the [xcms +tutorial](https://jorainer.github.io/xcmsTutorials) for more examples and +details. ```{r correspondence, message = FALSE } -## Perform the correspondence +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) ``` +As an alternative we perform the correspondence using m/z relative bin sizes. + +```{r} +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +``` + +The results will be *mostly* similar, except for the higher m/z range (in which +larger m/z bins will be used). Below we plot the m/z range for features against +their median m/z. For the present data set (acquired with a triple quad +instrument) no clear difference can be seen for the two approaches hence we +proceed the analysis with the fixed bin size setting. A stronger relationship +would be expected for example for data measured on TOF instruments. + +```{r, fig.cap = "Relationship between a feature's m/z and the m/z width (max - min m/z) of the feature. Red points represent the results with the fixed m/z bin size, blue with the m/z-relative bin size."} +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + +``` + Results from the correspondence analysis can be accessed with the `featureDefinitions` and `featureValues` function. The former returns a data frame with general information on each of the defined features, with each row