Skip to content

Commit

Permalink
Merge branch 'devel' into phili
Browse files Browse the repository at this point in the history
  • Loading branch information
philouail committed Jan 26, 2024
2 parents a025fc1 + 6f4f371 commit 36a95ad
Show file tree
Hide file tree
Showing 39 changed files with 462 additions and 254 deletions.
1 change: 1 addition & 0 deletions .github/workflows/check-bioc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ jobs:
BiocManager::install("MsBackendMgf")
BiocManager::install("MetaboCoreUtils")
BiocManager::install("magick")
BiocManager::install("RforMassSpectrometry/MsExperiment")
## For running the checks
message(paste('****', Sys.time(), 'installing rcmdcheck and BiocCheck ****'))
Expand Down
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ Imports:
robustbase,
IRanges,
SummarizedExperiment,
MsCoreUtils (>= 1.11.5),
MsCoreUtils (>= 1.15.3),
MsFeatures,
MsExperiment (>= 1.1.2),
MsExperiment (>= 1.5.4),
Spectra (>= 1.13.2),
progress,
multtest,
Expand Down
3 changes: 2 additions & 1 deletion NAMESPACE
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ importFrom("SummarizedExperiment", "rowData")
importFrom("SummarizedExperiment", "rowData<-")
importFrom("SummarizedExperiment", "assay")
importFrom("MsCoreUtils", "rbindFill", "closest", "i2index", "sumi", "between",
"maxi")
"maxi", "breaks_ppm")

## Additional imports proposed by R CMD check:
importFrom("graphics", "abline", "barplot", "close.screen", "hist",
Expand Down Expand Up @@ -575,6 +575,7 @@ importFrom("Spectra", "MsBackendMemory")
## MsExperiment things
importClassesFrom("MsExperiment", "MsExperiment")
importMethodsFrom("MsExperiment", "spectra")
importMethodsFrom("MsExperiment", "filterSpectra")
importMethodsFrom("BiocGenerics", "do.call")
importMethodsFrom("BiocGenerics", "rbind")
importFrom("MsExperiment", "MsExperiment")
Expand Down
102 changes: 82 additions & 20 deletions R/AllGenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -569,19 +569,30 @@ setGeneric("family<-", function(object, value) standardGeneric("family<-"))
#' [XCMSnExp-class] object. The function returns for each feature the
#' extracted ion chromatograms (along with all associated chromatographic
#' peaks) in each sample. The chromatogram is extracted from the m/z - rt
#' region including all chromatographic peaks of that features (i.e. based on
#' the ranges of `"mzmin"`, `"mzmax"`, `"rtmin"`, `"rtmax"` of all
#' chromatographic peaks of the feature).
#' region that includes **all** chromatographic peaks of a feature. By default,
#' this region is defined using the range of the chromatographic peaks' m/z
#' and retention times (with `mzmin = min`, `mzmax = max`, `rtmin = min` and
#' `rtmax = max`). For some features, and depending on the data, the m/z and
#' rt range can thus be relatively large. The boundaries of the m/z - rt
#' region can also be restricted by changing parameters `mzmin`, `mzmax`,
#' `rtmin` and `rtmax` to a different functions, such as `median`.
#'
#' By default only chromatographic peaks associated with a feature are
#' included. For `object` being a `XCMSnExp` object parameter `include`
#' allows also to return all chromatographic peaks with their apex
#' position within the selected region (`include = "apex_within"`) or any
#' chromatographic peak overlapping the m/z and retention time range
#' (`include = "any"`).
#' included in the returned [XChromatograms] object. For `object` being an
#' `XCMSnExp` object parameter `include` allows also to return all
#' chromatographic peaks with their apex position within the selected
#' region (`include = "apex_within"`) or any chromatographic peak overlapping
#' the m/z and retention time range (`include = "any"`).
#'
#' @note
#'
#' The EIC data of a feature is extracted from every sample using the same
#' m/z - rt area. The EIC in a sample does thus not exactly represent the
#' signal of the actually identified chromatographic peak in that sample.
#' The [chromPeakChromatograms()] function would allow to extract the actual
#' EIC of the chromatographic peak in a specific sample. See also examples
#' below.
#'
#' Parameters `include`, `filled`, `n` and `value` are only supported
#' for `object` being an `XCMSnExp`.
#'
Expand Down Expand Up @@ -628,6 +639,16 @@ setGeneric("family<-", function(object, value) standardGeneric("family<-"))
#' Defaults to `"feature_only"`; See description above for options and
#' details.
#'
#' @param mzmax `function` defining how the upper boundary of the m/z region
#' from which the EIC is integrated should be defined. Defaults to
#' `mzmax = max` thus the largest `"mzmax"` value for all chromatographic
#' peaks of a feature will be used.
#'
#' @param mzmin `function` defining how the lower boundary of the m/z region
#' from which the EIC is integrated should be defined. Defaults to
#' `mzmin = min` thus the smallest `"mzmin"` value for all chromatographic
#' peaks of a feature will be used.
#'
#' @param n Only for `object` being an `XCMSnExp`: `integer(1)` to optionally
#' specify the number of *top n* samples from which the EIC should be
#' extracted.
Expand All @@ -642,6 +663,16 @@ setGeneric("family<-", function(object, value) standardGeneric("family<-"))
#' supported and the results are thus returned as an [XChromatograms()]
#' object.
#'
#' @param rtmax `function` defining how the upper boundary of the rt region
#' from which the EIC is integrated should be defined. Defaults to
#' `rtmax = max` thus the largest `"rtmax"` value for all chromatographic
#' peaks of a feature will be used.
#'
#' @param rtmin `function` defining how the lower boundary of the rt region
#' from which the EIC is integrated should be defined. Defaults to
#' `rtmin = min` thus the smallest `"rtmin"` value for all chromatographic
#' peaks of a feature will be used.
#'
#' @param value Only for `object` being an `XCMSnExp`: `character(1)`
#' specifying the column to be used to sort the samples. Can be either
#' `"maxo"` (the default) or `"into"` to use the maximal peak intensity
Expand Down Expand Up @@ -672,10 +703,8 @@ setGeneric("family<-", function(object, value) standardGeneric("family<-"))
#' ## Disable parallel processing for this example
#' register(SerialParam())
#'
#' ## Subset the object to a smaller retention time range
#' xdata <- filterRt(faahko_sub, c(2500, 3500))
#'
#' xdata <- groupChromPeaks(xdata,
#' ## Perform correspondence analysis
#' xdata <- groupChromPeaks(faahko_sub,
#' param = PeakDensityParam(minFraction = 0.8, sampleGroups = rep(1, 3)))
#'
#' ## Get the feature definitions
Expand All @@ -686,8 +715,28 @@ setGeneric("family<-", function(object, value) standardGeneric("family<-"))
#' chrs <- featureChromatograms(xdata,
#' features = rownames(featureDefinitions)[1:3])
#'
#' ## Plot the XIC for the first feature using different colors for each file
#' ## Plot the EIC for the first feature using different colors for each file.
#' plot(chrs[1, ], col = c("red", "green", "blue"))
#'
#' ## The EICs for all 3 samples use the same m/z and retention time range,
#' ## which was defined using the `featureArea` function:
#' featureArea(xdata, features = rownames(featureDefinitions(xdata))[1:3],
#' mzmin = min, mzmax = max, rtmin = min, rtmax = max)
#'
#' ## To extract the actual (exact) EICs for each chromatographic peak of
#' ## a feature in each sample, the `chromPeakChromatograms` function would
#' ## need to be used instead. Below we extract the EICs for all
#' ## chromatographic peaks of the first feature. We need to first get the
#' ## IDs of all chromatographic peaks assigned to the first feature:
#' peak_ids <- rownames(chromPeaks(xdata))[featureDefinitions(xdata)$peakidx[[1L]]]
#'
#' ## We can now pass these to the `chromPeakChromatograms` function with
#' ## parameter `peaks`:
#' eic_1 <- chromPeakChromatograms(xdata, peaks = peak_ids)
#'
#' ## To plot these into a single plot we need to use the
#' ## `plotChromatogramsOverlay` function:
#' plotChromatogramsOverlay(eic_1)
setGeneric("featureChromatograms", function(object, ...)
standardGeneric("featureChromatograms"))

Expand Down Expand Up @@ -800,8 +849,8 @@ setGeneric("filepaths<-", function(object, value) standardGeneric("filepaths<-")
#' `ChromPeakAreaParam`, the median `"mzmin"`, `"mzmax"`, `"rtmin"` and
#' `"rtmax"` values from all detected chromatographic peaks of a feature
#' would be used instead.
#' In contrast to the `FillChromPeaksParam` approach this method uses the
#' actual identified chromatographic peaks of a feature to define the area
#' In contrast to the `FillChromPeaksParam` approach this method uses (all)
#' identified chromatographic peaks of a feature to define the area
#' from which the signal should be integrated.
#'
#' @details
Expand Down Expand Up @@ -1235,10 +1284,14 @@ setGeneric("group", function(object, ...) standardGeneric("group"))
#'
#' - `PeakDensityParam`: correspondence using the *peak density* method
#' (Smith 2006) that groups chromatographic peaks along the retention time
#' axis within slices of (partially overlapping) m/z ranges. All peaks (from
#' the same or from different samples) with their apex position being close
#' on the retention time axis are grouped into a LC-MS feature. See in
#' addition [do_groupChromPeaks_density()] for the core API function.
#' axis within slices of (partially overlapping) m/z ranges. By default,
#' these m/z ranges (bins) have a constant size. By setting `ppm` to a value
#' larger than 0, m/z dependent bin sizes can be used instead (better
#' representing the m/z dependent measurement error of some MS instruments).
#' All peaks (from the same or from different samples) with their apex
#' position being close on the retention time axis are grouped into a LC-MS
#' feature. See in addition [do_groupChromPeaks_density()] for the core API
#' function.
#'
#' - `NearestPeaksParam`: performs peak grouping based on the proximity of
#' chromatographic peaks from different samples in the m/z - rt space similar
Expand Down Expand Up @@ -1304,6 +1357,14 @@ setGeneric("group", function(object, ...) standardGeneric("group"))
#'
#' @param ppm For `MzClustParam`: `numeric(1)` representing the relative m/z
#' error for the clustering/grouping (in parts per million).
#' For `PeakDensityParam`: `numeric(1)` to define m/z-dependent, increasing
#' m/z bin sizes. If `ppm = 0` (the default) m/z bins are defined by the
#' sequence of values from the smallest to the larges m/z value with a
#' constant bin size of `binSize`. For `ppm` > 0 the size of each bin is
#' increased in addition by the `ppm` of the (upper) m/z boundary of the
#' bin. The maximal bin size (used for the largest m/z values) would then
#' be `binSize` plus `ppm` parts-per-million of the largest m/z value of
#' all peaks in the data set.
#'
#' @param param The parameter object selecting and configuring the algorithm.
#'
Expand Down Expand Up @@ -1808,7 +1869,8 @@ setGeneric("reconstructChromPeakSpectra", function(object, ...)
#'
#' @param ppm For `MergeNeighboringPeaksParam`: `numeric(1)` defining a m/z
#' relative value (in parts per million) by which the m/z range of each
#' chromatographic peak is expanded to check for overlapping peaks.
#' chromatographic peak is expanded (on each side) to check for overlapping
#' peaks.
#'
#' @param threshold For `FilterIntensityParam`: `numeric(1)` defining the
#' threshold below which peaks are removed.
Expand Down
8 changes: 5 additions & 3 deletions R/DataClasses.R
Original file line number Diff line number Diff line change
Expand Up @@ -443,13 +443,13 @@ setClass("XProcessHistory",
#' method to extend the EIC to a integer base-2 length prior to being passed to
#' \code{convolve} rather than the default "reflect" method. See
#' https://github.com/sneumann/xcms/issues/445 for more information.
#'
#'
#' @param verboseBetaColumns Option to calculate two additional metrics of peak
#' quality via comparison to an idealized bell curve. Adds \code{beta_cor} and
#' \code{beta_snr} to the \code{chromPeaks} output, corresponding to a Pearson
#' correlation coefficient to a bell curve with several degrees of skew as well
#' as an estimate of signal-to-noise using the residuals from the best-fitting
#' bell curve. See https://github.com/sneumann/xcms/pull/685 and
#' bell curve. See https://github.com/sneumann/xcms/pull/685 and
#' https://doi.org/10.1186/s12859-023-05533-4 for more information.
#'
#' @details
Expand Down Expand Up @@ -1316,14 +1316,16 @@ setClass("PeakDensityParam",
minFraction = "numeric",
minSamples = "numeric",
binSize = "numeric",
maxFeatures = "numeric"),
maxFeatures = "numeric",
ppm = "numeric"),
contains = "Param",
prototype = prototype(
sampleGroups = numeric(),
bw = 30,
minFraction = 0.5,
minSamples = 1,
binSize = 0.25,
ppm = 0,
maxFeatures = 50),
validity = function(object) {
msg <- character()
Expand Down
31 changes: 0 additions & 31 deletions R/MsExperiment-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -247,37 +247,6 @@
USE.NAMES = FALSE, BPPARAM = BPPARAM)
}

#' generic method to apply a filtering to the spectra data. The function will
#' apply the filtering and (most importantly) keep/update the link between
#' spectra and samples.
#'
#' @importMethodsFrom Spectra selectSpectraVariables
#'
#' @param x `MsExperiment`.
#'
#' @param FUN filter function.
#'
#' @param ... parameters for `FUN`.
#'
#' @author Johannes Rainer
#'
#' @noRd
.mse_filter_spectra <- function(x, FUN, ...) {
ls <- length(spectra(x))
have_links <- length(x@sampleDataLinks[["spectra"]]) > 0
if (have_links)
x@spectra$._SPECTRA_IDX <- seq_len(ls)
x@spectra <- FUN(x@spectra, ...)
if (have_links) {
if (ls != length(spectra(x)))
x <- .update_sample_data_links_spectra(x)
svs <- unique(c(spectraVariables(spectra(x)), "mz", "intensity"))
x@spectra <- selectSpectraVariables(
x@spectra, svs[svs != "._SPECTRA_IDX"])
}
x
}

#' Ensure that each spectrum is assigned to a sample and that we only have 1:1
#' mappings. That is important for most code involving splitting of samples
#' etc.
Expand Down
8 changes: 3 additions & 5 deletions R/MsExperiment.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
setMethod("filterRt", "MsExperiment",
function(object, rt = numeric(), ...) {
message("Filter spectra")
object <- .mse_filter_spectra(object, filterRt, rt = rt, ...)
object
filterSpectra(object, filterRt, rt = rt, ...)
})

#' @rdname XcmsExperiment
Expand All @@ -24,8 +23,7 @@ setMethod("filterMz", "MsExperiment",
setMethod("filterMsLevel", "MsExperiment",
function(object, msLevel. = uniqueMsLevels(object)) {
message("Filter spectra")
.mse_filter_spectra(object, filterMsLevel,
msLevel. = msLevel.)
filterSpectra(object, filterMsLevel, msLevel. = msLevel.)
})

#' @rdname XcmsExperiment
Expand Down Expand Up @@ -93,7 +91,7 @@ setMethod("polarity", "MsExperiment", function(object) {
#' @rdname XcmsExperiment
setMethod(
"filterIsolationWindow", "MsExperiment", function(object, mz = numeric()) {
.mse_filter_spectra(object, filterIsolationWindow, mz = mz)
filterSpectra(object, filterIsolationWindow, mz = mz)
})

#' @rdname XcmsExperiment
Expand Down
48 changes: 42 additions & 6 deletions R/XcmsExperiment-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@
cp, sampleGroups = sampleGroups(param), bw = bw(param),
minFraction = minFraction(param),
minSamples = minSamples(param), binSize = binSize(param),
maxFeatures = maxFeatures(param), index = index)
maxFeatures = maxFeatures(param), ppm = ppm(param),
index = index)
},
MzClustParam = {
tmp <- do_groupPeaks_mzClust(
Expand Down Expand Up @@ -933,15 +934,50 @@

#' @rdname XcmsExperiment
featureArea <- function(object, mzmin = min, mzmax = max, rtmin = min,
rtmax = max, msLevel = integer(),
features = character()) {
rtmax = max, features = character()) {
if (!hasFeatures(object))
stop("No correspondence results available. Please run ",
"'groupChromPeaks' first.")
if (!length(msLevel))
msLevel <- seq_len(10)
if (!length(features))
features <- rownames(featureDefinitions(object))
.features_ms_region(object, mzmin = mzmin, mzmax = mzmax, rtmin = rtmin,
rtmax = rtmax, msLevel = msLevel, features = features)
rtmax = rtmax, features = features)
}

#' @title Define MS regions for features
#'
#' @param x `XcmsExperiment` or `XCMSnExp`.
#'
#' @param mzmin, mzmax, rtmin, rtmax `function` to be applied to the values
#' (rtmin, ...) of the chrom peaks. Defaults to `median` but would also
#' work with `mean` etc.
#'
#' @param features `character` with the IDs of the features. Mandatory!
#'
#' @return `matrix` with columns `"mzmin"`, `"mzmax"`, `"rtmin"`, `"rtmax"`
#' defining the range of
#'
#' @author Johannes Rainer
#'
#' @noRd
.features_ms_region <- function(x, mzmin = median, mzmax = median,
rtmin = median, rtmax = median,
features = character()) {
features <- .i2index(features, ids = rownames(featureDefinitions(x)),
"features")
pks <- .chromPeaks(x)[, c("mzmin", "mzmax", "rtmin", "rtmax")]
res <- do.call(
rbind, lapply(featureDefinitions(x)$peakidx[features],
function(i) {
## maybe consider/drop gap-filled peaks?
c(mzmin(pks[i, "mzmin"]),
mzmax(pks[i, "mzmax"]),
rtmin(pks[i, "rtmin"]),
rtmax(pks[i, "rtmax"]))
}))
rownames(res) <- rownames(featureDefinitions(x))[features]
colnames(res) <- c("mzmin", "mzmax", "rtmin", "rtmax")
res
}

#' *Reconstruct* MS2 spectra for DIA data:
Expand Down
Loading

0 comments on commit 36a95ad

Please sign in to comment.