Skip to content

Commit

Permalink
Merge pull request #732 from sneumann/lama
Browse files Browse the repository at this point in the history
`LamaParama` implementation for `AdjustRtime()`: correcting alignment based on external dataset.
  • Loading branch information
sneumann committed Mar 21, 2024
2 parents c37baa7 + b28278f commit d6cc33d
Show file tree
Hide file tree
Showing 16 changed files with 1,092 additions and 43 deletions.
13 changes: 9 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: xcms
Version: 4.1.11
Version: 4.1.12
Title: LC-MS and GC-MS Data Analysis
Description: Framework for processing and visualization of chromatographically
separated and single-spectra mass spectral data. Imports from AIA/ANDI NetCDF,
Expand Down Expand Up @@ -39,7 +39,11 @@ Authors@R: c(
person(given = "Pablo", family = "Vangeenderhuysen",
email = "pablo.vangeenderhuysen@ugent.be",
role = "ctb",
comment = c(ORCID = "0000-0002-5492-6904"))
comment = c(ORCID = "0000-0002-5492-6904")),
person(given = "Carl", family = "Brunius",
email = "carl.brunius@chalmers.se",
role = "ctb",
comment = c(ORCID = 0000-0003-3957-870X))
)
Depends:
R (>= 4.0.0),
Expand All @@ -56,7 +60,7 @@ Imports:
S4Vectors,
IRanges,
SummarizedExperiment,
MsCoreUtils (>= 1.15.3),
MsCoreUtils (>= 1.15.5),
MsFeatures,
MsExperiment (>= 1.5.4),
Spectra (>= 1.13.7),
Expand All @@ -79,7 +83,8 @@ Suggests:
RANN,
multtest,
MsBackendMgf,
signal
signal,
mgcv
Enhances:
Rgraphviz,
rgl
Expand Down
13 changes: 9 additions & 4 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ importFrom("SummarizedExperiment", "rowData")
importFrom("SummarizedExperiment", "rowData<-")
importFrom("SummarizedExperiment", "assay")
importFrom("MsCoreUtils", "rbindFill", "closest", "i2index", "sumi", "between",
"maxi", "breaks_ppm")
"maxi", "breaks_ppm", "force_sorted")
importFrom("RColorBrewer", "brewer.pal")

importFrom("graphics", "image", "boxplot", "matplot", "rect", "axis",
Expand All @@ -49,7 +49,7 @@ importFrom("stats", "aov", "approx", "convolve", "cor", "deriv3",
"dist", "fft", "fitted", "lm", "loess", "lsfit", "median",
"na.omit", "nextn", "nls", "predict", "pt", "quantile",
"runmed", "sd", "stepfun", "weighted.mean", "density", "approxfun",
"rnorm", "runif", "dbeta")
"rnorm", "runif", "dbeta", "resid")
importFrom("utils", "flush.console", "head", "object.size",
"packageVersion", "read.csv", "tail", "write.csv",
"write.table", "capture.output", "data")
Expand Down Expand Up @@ -251,7 +251,10 @@ export(
"groupOverlaps",
"estimatePrecursorIntensity",
"featureArea",
"loadXcmsData"
"loadXcmsData",
"matchLamasChromPeaks",
"summarizeLamaMatch",
"matchedRtimes"
)

## New analysis methods
Expand All @@ -275,6 +278,7 @@ exportClasses(
"MzClustParam",
"NearestPeaksParam",
"PeakGroupsParam",
"LamaParama",
"ObiwarpParam",
"GenericParam",
"FillChromPeaksParam",
Expand Down Expand Up @@ -444,6 +448,7 @@ export("CentWaveParam",
"MzClustParam",
"NearestPeaksParam",
"PeakGroupsParam",
"LamaParama",
"ObiwarpParam",
"GenericParam",
"FillChromPeaksParam",
Expand Down Expand Up @@ -593,7 +598,7 @@ exportMethods("storeResults")

## filtering features things
importFrom("MetaboCoreUtils", "rowRsd", "rowDratio", "rowPercentMissing",
"rowBlank")
"rowBlank", "mclosest")
export("RsdFilter")
export("DratioFilter")
export("PercentMissingFilter")
Expand Down
40 changes: 25 additions & 15 deletions R/AllGenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ setGeneric("addProcessHistory", function(object, ...)
#' @description
#'
#' The `adjustRtime` method(s) perform retention time correction (alignment)
#' between chromatograms of different samples. Alignment is performed by defaul
#' on MS level 1 data. Retention times of spectra from other MS levels, if
#' present, are subsequently adjusted based on the adjusted retention times
#' of the MS1 spectra. Note that calling `adjustRtime` on a *xcms* result object
#' will remove any eventually present previous alignment results as well as
#' any correspondence analysis results. To run a second round of alignment,
#' raw retention times need to be replaced with adjusted ones using the
#' [applyAdjustedRtime()] function.
#' between chromatograms of different samples/dataset. Alignment is performed
#' by default on MS level 1 data. Retention times of spectra from other MS
#' levels, if present, are subsequently adjusted based on the adjusted
#' retention times of the MS1 spectra. Note that calling `adjustRtime` on a
#' *xcms* result object will remove any eventually present previous alignment
#' results as well as any correspondence analysis results. To run a second
#' round of alignment, raw retention times need to be replaced with adjusted
#' ones using the [applyAdjustedRtime()] function.
#'
#' The alignment method can be specified (and configured) using a dedicated
#' `param` argument.
Expand All @@ -40,7 +40,7 @@ setGeneric("addProcessHistory", function(object, ...)
#' The alignment is performed directly on the [profile-matrix] and can hence
#' be performed independently of the peak detection or peak grouping.
#'
#' - `PeakGroupsParam`: performs retention time correctoin based on the
#' - `PeakGroupsParam`: performs retention time correction based on the
#' alignment of features defined in all/most samples (corresponding to
#' *house keeping compounds* or marker compounds) (Smith 2006). First the
#' retention time deviation of these features is described by fitting either a
Expand All @@ -60,6 +60,15 @@ setGeneric("addProcessHistory", function(object, ...)
#' in `param`. See also [do_adjustRtime_peakGroups()] for the core API
#' function.
#'
#' - `LamaParama`: This function performs retention time correction by aligning
#' chromatographic data to an external reference dataset (concept and initial
#' implementation by Carl Brunius). The process involves identifying and
#' aligning peaks within the experimental chromatographic data, represented
#' as an `XcmsExperiment` object, to a predefined set of landmark features
#' called "lamas". These landmark features are characterized by their
#' mass-to-charge ratio (m/z) and retention time. see [LamaParama()] for more
#' information on the method.
#'
#' @section Subset-based alignment:
#'
#' All alignment methods allow to perform the retention time correction on a
Expand Down Expand Up @@ -189,9 +198,9 @@ setGeneric("addProcessHistory", function(object, ...)
#' be used to interpolate corrected retention times for all peak groups.
#' Can be either `"loess"` or `"linear"`.
#'
#' @param span For `PeakGroupsParam`: `numeric(1)` defining the degree of
#' smoothing (if `smooth = "loess"`). This parameter is passed to the
#' internal call to [loess()].
#' @param span For `PeakGroupsParam`: `numeric(1)` defining
#' the degree of smoothing (if `smooth = "loess"`). This parameter is
#' passed to the internal call to [loess()].
#'
#' @param subset For `ObiwarpParam` and `PeakGroupsParam`: `integer` with the
#' indices of samples within the experiment on which the alignment models
Expand All @@ -206,7 +215,7 @@ setGeneric("addProcessHistory", function(object, ...)
#'
#' @param value For all assignment methods: the value to set/replace.
#'
#' @param x An `ObiwarpParam` or `PeakGroupsParam` object.
#' @param x An `ObiwarpParam`, `PeakGroupsParam` or `LamaParama` object.
#'
#' @param ... ignored.
#'
Expand All @@ -219,7 +228,8 @@ setGeneric("addProcessHistory", function(object, ...)
#' `XcmsExperiment` with the adjusted retention times stored in an new
#' *spectra variable* `rtime_adjusted` in the object's `spectra`.
#'
#' `ObiwarpParam` and `PeakGroupsParam` return the respective parameter object.
#' `ObiwarpParam`, `PeakGroupsParam` and `LamaParama` return the respective
#' parameter object.
#'
#' `adjustRtimeGroups` returns a `matrix` with the retention times of *marker*
#' features in each sample (each row one feature, each row one sample).
Expand All @@ -230,7 +240,7 @@ setGeneric("addProcessHistory", function(object, ...)
#'
#' @seealso [plotAdjustedRtime()] for visualization of alignment results.
#'
#' @author Colin Smith, Johannes Rainer
#' @author Colin Smith, Johannes Rainer, Philippine Louail, Carl Brunius
#'
#' @references
#'
Expand Down
39 changes: 39 additions & 0 deletions R/DataClasses.R
Original file line number Diff line number Diff line change
Expand Up @@ -1471,6 +1471,45 @@ setClass("PeakGroupsParam",
else TRUE
})

setClass("LamaParama",
slots = c(lamas = "matrix",
method = "character",
span = "numeric",
outlierTolerance = "numeric",
zeroWeight = "numeric",
ppm = "numeric",
tolerance = "numeric",
toleranceRt = "numeric",
bs = "character",
rtMap = "list",
nChromPeaks = "numeric"),
contains = "Param",
prototype = prototype(
lamas = matrix(ncol = 2, nrow = 0),
method = "loess",
span = 0.5,
outlierTolerance = 3,
zeroWeight = 10,
ppm = 20,
tolerance = 0,
toleranceRt = 20,
bs = "tp",
rtMap = list(),
nChromPeaks = numeric()),
validity = function(object) {
msg <- NULL
if (!nrow(object@lamas))
msg <- c(msg, paste0("'lamas' cannot be empty"))
else {
}
if (length(object@method) > 1 |
!all(object@method %in% c("gam", "loess")))
msg <- c(msg, paste0("'method' has to be either \"",
"gam\" or \"loess\"!"))
msg
})


setClass("ObiwarpParam",
slots = c(binSize = "numeric",
centerSample = "integer",
Expand Down
60 changes: 60 additions & 0 deletions R/XcmsExperiment.R
Original file line number Diff line number Diff line change
Expand Up @@ -1357,6 +1357,66 @@ setMethod(
object
})

#'@rdname LamaParama
setMethod(
"adjustRtime", signature(object = "XcmsExperiment", param = "LamaParama"),
function(object, param, BPPARAM = bpparam(), ...) {
if (!hasChromPeaks(object))
stop("'object' needs to have detected chromPeaks. ",
"Run 'findChromPeaks()' first")
if (hasAdjustedRtime(object))
stop("Alignment results already present. Please either remove ",
"them with 'dropAdjustedRtime' in order to perform an ",
"alternative, new, alignment, or use 'applyAdjustedRtime'",
" prior 'adjustRtime' to perform a second round of ",
"alignment.")
fidx <- as.factor(fromFile(object))
rt_raw <- split(rtime(object), fidx)
idx <- seq_along(object)

# Check if user as ran matching lama vs chrompeaks beforehand
if (length(param@rtMap) == 0)
param <- matchLamasChromPeaks(object, param)
rtMap <- param@rtMap
if (length(rtMap) != length(object))
stop("Mismatch between the number of files matched to lamas: ",
length(rtMap), " and files in the object: ", length(object))

# Make model and adjust retention for each file
rt_adj <- bpmapply(rtMap, rt_raw, idx, FUN = function(x, y, i, param) {
if (nrow(x) >= 10) { # too strict ? Gam always throws error when less than that and loess does not work that well either.
.adjust_rt_model(y, method = param@method,
rt_map = x, span = param@span,
resid_ratio = param@outlierTolerance,
zero_weight = param@zeroWeight,
bs = param@bs)
} else {
warning("Too few chrompeaks could be assigned to external",
" reference peaks (lamas) for sample ", i,
". Skipping alignment for this sample.")
y
}
}, SIMPLIFY = FALSE, BPPARAM = BPPARAM, MoreArgs = list(param = param))

# post processing housekeeping steps
pt <- vapply(object@processHistory, processType, character(1))
idx_pg <- .match_last(.PROCSTEP.PEAK.GROUPING, pt,
nomatch = -1L)
if (idx_pg > 0)
ph <- object@processHistory[idx_pg]
else ph <- list()
object <- dropFeatureDefinitions(object)
object@spectra$rtime_adjusted <- unlist(rt_adj, use.names = FALSE)
object@chromPeaks <-.applyRtAdjToChromPeaks(
.chromPeaks(object), rtraw = rt_raw, rtadj = rt_adj)
xph <- XProcessHistory(
param = param, type. = .PROCSTEP.RTIME.CORRECTION,
fileIndex = seq_along(object))
object@processHistory <- c(object@processHistory, ph, list(xph))
validObject(object)
object
})

#' @rdname XcmsExperiment
setMethod("dropAdjustedRtime", "XcmsExperiment", function(object) {
if (!hasAdjustedRtime(object))
Expand Down
Loading

0 comments on commit d6cc33d

Please sign in to comment.