Skip to content

Commit

Permalink
Merge pull request #703 from sneumann/jomain
Browse files Browse the repository at this point in the history
Fix issue with subset-based peakGroups alignment and other updates
  • Loading branch information
sneumann committed Dec 6, 2023
2 parents 3d79444 + 6aea748 commit 604341d
Show file tree
Hide file tree
Showing 30 changed files with 750 additions and 579 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/check-bioc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ jobs:
matrix:
config:
- { os: ubuntu-latest, r: 'devel', bioc: 'devel', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" }
- { os: macOS-latest, r: 'next', bioc: '3.18'}
- { os: windows-latest, r: 'next', bioc: '3.18'}
- { os: macOS-latest, r: 'devel', bioc: '3.19'}
- { os: windows-latest, r: 'devel', bioc: '3.19'}
env:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
RSPM: ${{ matrix.config.rspm }}
Expand Down
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: xcms
Version: 4.1.3
Version: 4.1.4
Title: LC-MS and GC-MS Data Analysis
Description: Framework for processing and visualization of chromatographically
separated and single-spectra mass spectral data. Imports from AIA/ANDI NetCDF,
Expand Down Expand Up @@ -59,7 +59,7 @@ Imports:
MsCoreUtils (>= 1.11.5),
MsFeatures,
MsExperiment (>= 1.1.2),
Spectra (>= 1.11.10),
Spectra (>= 1.13.2),
progress,
multtest,
jsonlite
Expand Down
8 changes: 5 additions & 3 deletions R/AllGenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ setGeneric("addProcessHistory", function(object, ...)
#' present, are subsequently adjusted based on the adjusted retention times
#' of the MS1 spectra. Note that calling `adjustRtime` on a *xcms* result object
#' will remove any eventually present previous alignment results as well as
#' any correspondence analysis results.
#' any correspondence analysis results. To run a second round of alignment,
#' raw retention times need to be replaced with adjusted ones using the
#' [applyAdjustedRtime()] function.
#'
#' The alignment method can be specified (and configured) using a dedicated
#' `param` argument.
Expand Down Expand Up @@ -153,14 +155,14 @@ setGeneric("addProcessHistory", function(object, ...)
#' alignment should be performed instead of the default global alignment.
#'
#' @param minFraction For `PeakGroupsParam`: `numeric(1)` between 0 and 1
#' defining the minimum required fraction of samples in which peaks for
#' defining the minimum required proportion of samples in which peaks for
#' the peak group were identified. Peak groups passing this criteria will
#' be aligned across samples and retention times of individual spectra will
#' be adjusted based on this alignment. For `minFraction = 1` the peak
#' group has to contain peaks in all samples of the experiment. Note that if
#' `subset` is provided, the specified fraction is relative to the
#' defined subset of samples and not to the total number of samples within
#' the experiment (i.e. a peak has to be present in the specified
#' the experiment (i.e., a peak has to be present in the specified
#' proportion of subset samples).
#'
#' @param msLevel For `adjustRtime`: `integer(1)` defining the MS level on
Expand Down
17 changes: 11 additions & 6 deletions R/MsExperiment-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
.mse_find_chrom_peaks_sample <- function(x, msLevel = 1L, param, ...) {
x <- filterMsLevel(x, msLevel)
pkd <- Spectra::peaksData(x, columns = c("mz", "intensity"),
BPPARAM = SerialParam())
f = factor(), BPPARAM = SerialParam())
vals_per_spect <- vapply(pkd, nrow, integer(1), USE.NAMES = FALSE)
## Open questions:
## - What to do with empty spectra? Remove them? MatchFilter does not like
Expand Down Expand Up @@ -229,7 +229,7 @@
}
}
bpmapply(
split(peaksData(x, columns = c("mz", "intensity"),
split(peaksData(x, columns = c("mz", "intensity"), f = factor(),
BPPARAM = SerialParam()), f),
split(rtime(x), f),
FUN = function(p, rt, prm, msl) {
Expand Down Expand Up @@ -311,6 +311,7 @@
else f <- factor(integer(), levels = sidx)
bplapply(
split(Spectra::peaksData(x, columns = c("mz", "intensity"),
f = factor(),
BPPARAM = SerialParam()), f),
FUN = .peaksdata_profmat, method = method, step = step,
baselevel = baselevel, basespace = basespace, mzrange. = mzrange.,
Expand Down Expand Up @@ -374,8 +375,9 @@
if (!(ref_idx %in% seq_along(x)))
stop("'centerSample' needs to be an integer between 1 and ", length(x))
ref_sps <- filterMsLevel(spectra(x[ref_idx]), msLevel = msLevel)
ref_pm <- .peaksdata_profmat(peaksData(ref_sps), method = "bin",
step = binSize(param), returnBreaks = TRUE)
ref_pm <- .peaksdata_profmat(peaksData(ref_sps, f = factor()),
method = "bin", step = binSize(param),
returnBreaks = TRUE)
res <- unlist(.mse_spectrapply_chunks(
x, FUN = function(z, ref, ref_pm, param, msLevel, BPPARAM) {
z <- setBackend(
Expand Down Expand Up @@ -416,10 +418,12 @@
if (!(length(ref) & length(other)))
stop("No spectra with MS level ", msLevel, " present")
if (!length(ref_pm))
ref_pm <- .peaksdata_profmat(peaksData(ref), method = "bin",
ref_pm <- .peaksdata_profmat(peaksData(ref, f = factor()),
method = "bin",
step = binSize(param),
returnBreaks = TRUE)
other_pm <- .peaksdata_profmat(peaksData(other), method = "bin",
other_pm <- .peaksdata_profmat(peaksData(other, f = factor()),
method = "bin",
step = binSize(param),
returnBreaks = TRUE)
adj <- .obiwarp_bare(rtime(ref), rtime(other), ref_pr = ref_pm,
Expand Down Expand Up @@ -506,6 +510,7 @@
else f <- factor(integer(), levels = sidx)
bpmapply(
split(Spectra::peaksData(z, columns = c("mz", "intensity"),
f = factor(),
BPPARAM = SerialParam()), f),
split(rtime(z), f),
split(msLevel(z), f),
Expand Down
8 changes: 8 additions & 0 deletions R/MsExperiment.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@ setMethod("filterMz", "MsExperiment",
filterMzRange(object, mz, msLevel.)
})

#' @rdname XcmsExperiment
setMethod("filterMsLevel", "MsExperiment",
function(object, msLevel. = uniqueMsLevels(object)) {
message("Filter spectra")
.mse_filter_spectra(object, filterMsLevel,
msLevel. = msLevel.)
})

#' @rdname XcmsExperiment
setMethod("uniqueMsLevels", "MsExperiment", function(object) {
uniqueMsLevels(spectra(object))
Expand Down
15 changes: 11 additions & 4 deletions R/XcmsExperiment-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,8 @@
levels = levels(f))
res <- bpmapply(
.merge_neighboring_peaks2,
split(peaksData(filterMsLevel(spectra(x), msLevel = msLevel)), f),
split(peaksData(filterMsLevel(spectra(x), msLevel = msLevel),
f = factor()), f),
split.data.frame(chromPeaks(x, msLevel = msLevel), f = f_peaks),
split.data.frame(chromPeakData(
x, msLevel = msLevel, return.type = "data.frame"), f = f_peaks),
Expand Down Expand Up @@ -397,7 +398,8 @@
}
}, logical(1))
},
split(peaksData(filterMsLevel(spectra(x), msLevel = msLevel)), f),
split(peaksData(filterMsLevel(spectra(x), msLevel = msLevel),
f = factor()), f),
split(rt, f),
split.data.frame(chromPeaks(x), f = f_peaks),
split(chromPeakData(x)$ms_level, f = f_peaks),
Expand Down Expand Up @@ -467,7 +469,8 @@
if (hasAdjustedRtime(x)) rt <- spectra(x)$rtime_adjusted[keep]
else rt <- rtime(spectra(x))[keep]
cn <- colnames(chromPeaks(x))
res <- bpmapply(split(peaksData(filterMsLevel(spectra(x), msLevel)), f),
res <- bpmapply(split(peaksData(filterMsLevel(spectra(x), msLevel),
f = factor()), f),
split(rt, f),
pal,
as.integer(names(pal)),
Expand Down Expand Up @@ -507,6 +510,9 @@
if (length(keep)) {
xsub <- lapply(x[keep], .pmat_filter_mz,
mzr = peakArea[i, c("mzmin", "mzmax")])
## length of xsub is the number of spectra, the number of peaks can
## however be 0 if no peak was found. Maybe we should/need to
## consider adding 0 or NA intensity for those.
mat <- do.call(rbind, xsub)
if (nrow(mat)) {
## can have 0, 1 or x values per rt; repeat rt accordingly
Expand Down Expand Up @@ -958,7 +964,8 @@ featureArea <- function(object, mzmin = min, mzmax = max, rtmin = min,
## Get EICs for all chrom peaks (all MS levels)
object <- filterRt(object, rt = range(pks[, c("rtmin", "rtmax")]))
chrs <- .chromatograms_for_peaks(
peaksData(object@spectra), rt = rtime(object@spectra),
peaksData(object@spectra, f = factor()),
rt = rtime(object@spectra),
msl = msLevel(object@spectra), file_idx = fromFile,
tmz = isolationWindowTargetMz(object@spectra), pks = pks,
pks_msl = chromPeakData(object)$ms_level[ord],
Expand Down
76 changes: 45 additions & 31 deletions R/XcmsExperiment.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@
#' `"isolationWindowUpperMz"` (columns in `chromPeakData`) do not contain
#' the user provided `mz`.
#'
#' - `filterMsLevel`: filter the data of the `XcmsExperiment` or `MsExperiment`
#' to keep only data of the MS level(s) specified with parameter `msLevel.`.
#'
#' - `filterMz`, `filterMzRange`: filter the spectra within an
#' `XcmsExperiment` or `MsExperiment` to the specified m/z range (parameter
#' `mz`). For `XcmsExperiment` also identified chromatographic peaks and
Expand Down Expand Up @@ -482,6 +485,8 @@
#' length equal to the numer of rows of the parameters `mz` and `rt`
#' defining the m/z and rt regions from which the chromatograms should
#' be created. Defaults to `msLevel = 1L`.
#' for `filterMsLevel`: `integer` defining the MS level(s) to which the
#' data should be subset.
#'
#' @param mz For `chromPeaks` and `featureDefinitions`: `numeric(2)` optionally
#' defining the m/z range for which chromatographic peaks or feature
Expand Down Expand Up @@ -816,6 +821,20 @@ setMethod(
callNextMethod(object = object, mz = mz, msLevel. = msLevel.)
})

#' @rdname XcmsExperiment
setMethod(
"filterMsLevel", "XcmsExperiment",
function(object, msLevel. = uniqueMsLevels(object)) {
if (!length(msLevel.))
return(object)
if (hasChromPeaks(object)) {
keep <- chromPeakData(object)$ms_level %in% msLevel.
object <- .filter_chrom_peaks(object, idx = base::which(keep))
}
callNextMethod(object = object, msLevel. = msLevel.)
})


################################################################################
## chromatographic peaks
################################################################################
Expand All @@ -839,7 +858,7 @@ setMethod(
chunkSize = chunkSize, BPPARAM = BPPARAM)
}
## Assign/define peak IDs.
pkd <- data.frame(ms_level = rep(msLevel, nrow(res)),
pkd <- data.frame(ms_level = rep(as.integer(msLevel), nrow(res)),
is_filled = rep(FALSE, nrow(res)))
ph <- XProcessHistory(param = param,
type. = .PROCSTEP.PEAK.DETECTION,
Expand Down Expand Up @@ -1205,7 +1224,7 @@ setMethod(
maxi <- max(
0, as.integer(sub("CP", "", rownames(chromPeaks(object)))))
rownames(res) <- .featureIDs(nr, "CP", maxi + 1)
pkd <- data.frame(ms_level = rep(msLevel, nr),
pkd <- data.frame(ms_level = rep(as.integer(msLevel), nr),
is_filled = rep(FALSE, nr))
rownames(pkd) <- rownames(res)
pb$tick()
Expand Down Expand Up @@ -1332,35 +1351,30 @@ setMethod(
"adjustRtime", signature(object = "MsExperiment",
param = "PeakGroupsParam"),
function(object, param, msLevel = 1L, ...) {
if (hasAdjustedRtime(object)) {
message("Removing previous alignment results")
object <- dropAdjustedRtime(object)
}
if (!inherits(object, "XcmsExperiment"))
object <- as(object, "XcmsExperiment")
if (hasAdjustedRtime(object))
stop("Alignment results already present. Please either remove ",
"them with 'dropAdjustedRtime' in order to perform an ",
"alternative, new, alignment, or use 'applyAdjustedRtime'",
" prior 'adjustRtime' to perform a second round of ",
"alignment.")
if (any(msLevel != 1L))
stop("Alignment is currently only supported for MS level 1")
if (!hasFeatures(object))
stop("No feature definitions present in 'object'. Please perform ",
"first a correspondence analysis using 'groupChromPeaks'")
if (!nrow(peakGroupsMatrix(param)))
if (!nrow(peakGroupsMatrix(param))) {
if (!hasFeatures(object))
stop("No feature definitions present in 'object'. Please ",
"perform first a correspondence analysis using ",
"'groupChromPeaks'")
peakGroupsMatrix(param) <- adjustRtimePeakGroups(
object, param = param)
}
fidx <- as.factor(fromFile(object))
rt_raw <- split(rtime(object), fidx)
rt_adj <- do_adjustRtime_peakGroups(
chromPeaks(object, msLevel = msLevel),
peakIndex = .update_feature_definitions(
featureDefinitions(object), rownames(chromPeaks(object)),
rownames(chromPeaks(object, msLevel = msLevel)))$peakidx,
rtime = rt_raw,
minFraction = minFraction(param),
extraPeaks = extraPeaks(param),
smooth = smooth(param),
span = span(param),
family = family(param),
peakGroupsMatrix = peakGroupsMatrix(param),
subset = subset(param),
subsetAdjust = subsetAdjust(param)
)
rt_adj <- .adjustRtime_peakGroupsMatrix(
rt_raw, peakGroupsMatrix(param), smooth = smooth(param),
span = span(param), family = family(param),
subset = subset(param), subsetAdjust = subsetAdjust(param))
pt <- vapply(object@processHistory, processType, character(1))
idx_pg <- .match_last(.PROCSTEP.PEAK.GROUPING, pt, nomatch = -1L)
if (idx_pg > 0)
Expand Down Expand Up @@ -1398,8 +1412,11 @@ setMethod("dropAdjustedRtime", "XcmsExperiment", function(object) {
object@spectra, svs[svs != "rtime_adjusted"])
object@processHistory <- dropProcessHistoriesList(
object@processHistory, type = .PROCSTEP.RTIME.CORRECTION, num = 1L)
if (hasFeatures(object) && idx_co > idx_al)
if (hasFeatures(object) && idx_co > idx_al) {
warning("Had to remove feature definitions along with the adjusted ",
"retention times because of the dependency between them.")
object <- dropFeatureDefinitions(object)
}
object
})

Expand Down Expand Up @@ -1535,7 +1552,7 @@ setMethod(
stop("No chromatographic peaks present. ",
"Please run 'findChromPeaks' first.")
res <- .manual_feature_definitions(chromPeaks(object), peakIdx)
res$ms_level <- msLevel
res$ms_level <- as.integer(msLevel)
if (hasFeatures(object)) {
maxi <- max(as.integer(
sub("FT", "", rownames(featureDefinitions(object)))))
Expand Down Expand Up @@ -1821,7 +1838,7 @@ setMethod(
nr <- nrow(res)
maxi <- max(as.integer(sub("CP", "", rownames(chromPeaks(object)))))
rownames(res) <- .featureIDs(nr, "CP", maxi + 1)
cpd <- data.frame(ms_level = rep(msLevel, nr),
cpd <- data.frame(ms_level = rep(as.integer(msLevel), nr),
is_filled = rep(TRUE, nr))
rownames(cpd) <- rownames(res)
object@chromPeaks <- rbind(object@chromPeaks, res)
Expand Down Expand Up @@ -1957,6 +1974,3 @@ setMethod(
object[i = sort(unique(file)), keepAdjustedRtime = keepAdjustedRtime,
keepFeatures = keepFeatures, ...]
})

## TODO filterMsLevel? Function not yet needed. In case, needs also an
## implementation for MsExperiment: update the spectra-sample-mapping.
Loading

0 comments on commit 604341d

Please sign in to comment.