diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml index 068d2e7ef..131fac9e8 100644 --- a/.github/workflows/check-bioc.yml +++ b/.github/workflows/check-bioc.yml @@ -53,8 +53,8 @@ jobs: matrix: config: - { os: ubuntu-latest, r: 'devel', bioc: 'devel', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" } - - { os: macOS-latest, r: 'next', bioc: '3.18'} - - { os: windows-latest, r: 'next', bioc: '3.18'} + - { os: macOS-latest, r: 'devel', bioc: '3.19'} + - { os: windows-latest, r: 'devel', bioc: '3.19'} env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true RSPM: ${{ matrix.config.rspm }} diff --git a/DESCRIPTION b/DESCRIPTION index 13d036769..bf02a7978 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: xcms -Version: 4.1.3 +Version: 4.1.4 Title: LC-MS and GC-MS Data Analysis Description: Framework for processing and visualization of chromatographically separated and single-spectra mass spectral data. Imports from AIA/ANDI NetCDF, @@ -59,7 +59,7 @@ Imports: MsCoreUtils (>= 1.11.5), MsFeatures, MsExperiment (>= 1.1.2), - Spectra (>= 1.11.10), + Spectra (>= 1.13.2), progress, multtest, jsonlite diff --git a/R/AllGenerics.R b/R/AllGenerics.R index c433bc15f..c97e6b32b 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -24,7 +24,9 @@ setGeneric("addProcessHistory", function(object, ...) #' present, are subsequently adjusted based on the adjusted retention times #' of the MS1 spectra. Note that calling `adjustRtime` on a *xcms* result object #' will remove any eventually present previous alignment results as well as -#' any correspondence analysis results. +#' any correspondence analysis results. To run a second round of alignment, +#' raw retention times need to be replaced with adjusted ones using the +#' [applyAdjustedRtime()] function. #' #' The alignment method can be specified (and configured) using a dedicated #' `param` argument. @@ -153,14 +155,14 @@ setGeneric("addProcessHistory", function(object, ...) #' alignment should be performed instead of the default global alignment. #' #' @param minFraction For `PeakGroupsParam`: `numeric(1)` between 0 and 1 -#' defining the minimum required fraction of samples in which peaks for +#' defining the minimum required proportion of samples in which peaks for #' the peak group were identified. Peak groups passing this criteria will #' be aligned across samples and retention times of individual spectra will #' be adjusted based on this alignment. For `minFraction = 1` the peak #' group has to contain peaks in all samples of the experiment. Note that if #' `subset` is provided, the specified fraction is relative to the #' defined subset of samples and not to the total number of samples within -#' the experiment (i.e. a peak has to be present in the specified +#' the experiment (i.e., a peak has to be present in the specified #' proportion of subset samples). #' #' @param msLevel For `adjustRtime`: `integer(1)` defining the MS level on diff --git a/R/MsExperiment-functions.R b/R/MsExperiment-functions.R index e110ae6be..f17df8fd6 100644 --- a/R/MsExperiment-functions.R +++ b/R/MsExperiment-functions.R @@ -63,7 +63,7 @@ .mse_find_chrom_peaks_sample <- function(x, msLevel = 1L, param, ...) { x <- filterMsLevel(x, msLevel) pkd <- Spectra::peaksData(x, columns = c("mz", "intensity"), - BPPARAM = SerialParam()) + f = factor(), BPPARAM = SerialParam()) vals_per_spect <- vapply(pkd, nrow, integer(1), USE.NAMES = FALSE) ## Open questions: ## - What to do with empty spectra? Remove them? MatchFilter does not like @@ -229,7 +229,7 @@ } } bpmapply( - split(peaksData(x, columns = c("mz", "intensity"), + split(peaksData(x, columns = c("mz", "intensity"), f = factor(), BPPARAM = SerialParam()), f), split(rtime(x), f), FUN = function(p, rt, prm, msl) { @@ -311,6 +311,7 @@ else f <- factor(integer(), levels = sidx) bplapply( split(Spectra::peaksData(x, columns = c("mz", "intensity"), + f = factor(), BPPARAM = SerialParam()), f), FUN = .peaksdata_profmat, method = method, step = step, baselevel = baselevel, basespace = basespace, mzrange. = mzrange., @@ -374,8 +375,9 @@ if (!(ref_idx %in% seq_along(x))) stop("'centerSample' needs to be an integer between 1 and ", length(x)) ref_sps <- filterMsLevel(spectra(x[ref_idx]), msLevel = msLevel) - ref_pm <- .peaksdata_profmat(peaksData(ref_sps), method = "bin", - step = binSize(param), returnBreaks = TRUE) + ref_pm <- .peaksdata_profmat(peaksData(ref_sps, f = factor()), + method = "bin", step = binSize(param), + returnBreaks = TRUE) res <- unlist(.mse_spectrapply_chunks( x, FUN = function(z, ref, ref_pm, param, msLevel, BPPARAM) { z <- setBackend( @@ -416,10 +418,12 @@ if (!(length(ref) & length(other))) stop("No spectra with MS level ", msLevel, " present") if (!length(ref_pm)) - ref_pm <- .peaksdata_profmat(peaksData(ref), method = "bin", + ref_pm <- .peaksdata_profmat(peaksData(ref, f = factor()), + method = "bin", step = binSize(param), returnBreaks = TRUE) - other_pm <- .peaksdata_profmat(peaksData(other), method = "bin", + other_pm <- .peaksdata_profmat(peaksData(other, f = factor()), + method = "bin", step = binSize(param), returnBreaks = TRUE) adj <- .obiwarp_bare(rtime(ref), rtime(other), ref_pr = ref_pm, @@ -506,6 +510,7 @@ else f <- factor(integer(), levels = sidx) bpmapply( split(Spectra::peaksData(z, columns = c("mz", "intensity"), + f = factor(), BPPARAM = SerialParam()), f), split(rtime(z), f), split(msLevel(z), f), diff --git a/R/MsExperiment.R b/R/MsExperiment.R index bb318fda7..2ec042029 100644 --- a/R/MsExperiment.R +++ b/R/MsExperiment.R @@ -20,6 +20,14 @@ setMethod("filterMz", "MsExperiment", filterMzRange(object, mz, msLevel.) }) +#' @rdname XcmsExperiment +setMethod("filterMsLevel", "MsExperiment", + function(object, msLevel. = uniqueMsLevels(object)) { + message("Filter spectra") + .mse_filter_spectra(object, filterMsLevel, + msLevel. = msLevel.) + }) + #' @rdname XcmsExperiment setMethod("uniqueMsLevels", "MsExperiment", function(object) { uniqueMsLevels(spectra(object)) diff --git a/R/XcmsExperiment-functions.R b/R/XcmsExperiment-functions.R index 1131ab31b..4f403b81a 100644 --- a/R/XcmsExperiment-functions.R +++ b/R/XcmsExperiment-functions.R @@ -355,7 +355,8 @@ levels = levels(f)) res <- bpmapply( .merge_neighboring_peaks2, - split(peaksData(filterMsLevel(spectra(x), msLevel = msLevel)), f), + split(peaksData(filterMsLevel(spectra(x), msLevel = msLevel), + f = factor()), f), split.data.frame(chromPeaks(x, msLevel = msLevel), f = f_peaks), split.data.frame(chromPeakData( x, msLevel = msLevel, return.type = "data.frame"), f = f_peaks), @@ -397,7 +398,8 @@ } }, logical(1)) }, - split(peaksData(filterMsLevel(spectra(x), msLevel = msLevel)), f), + split(peaksData(filterMsLevel(spectra(x), msLevel = msLevel), + f = factor()), f), split(rt, f), split.data.frame(chromPeaks(x), f = f_peaks), split(chromPeakData(x)$ms_level, f = f_peaks), @@ -467,7 +469,8 @@ if (hasAdjustedRtime(x)) rt <- spectra(x)$rtime_adjusted[keep] else rt <- rtime(spectra(x))[keep] cn <- colnames(chromPeaks(x)) - res <- bpmapply(split(peaksData(filterMsLevel(spectra(x), msLevel)), f), + res <- bpmapply(split(peaksData(filterMsLevel(spectra(x), msLevel), + f = factor()), f), split(rt, f), pal, as.integer(names(pal)), @@ -507,6 +510,9 @@ if (length(keep)) { xsub <- lapply(x[keep], .pmat_filter_mz, mzr = peakArea[i, c("mzmin", "mzmax")]) + ## length of xsub is the number of spectra, the number of peaks can + ## however be 0 if no peak was found. Maybe we should/need to + ## consider adding 0 or NA intensity for those. mat <- do.call(rbind, xsub) if (nrow(mat)) { ## can have 0, 1 or x values per rt; repeat rt accordingly @@ -958,7 +964,8 @@ featureArea <- function(object, mzmin = min, mzmax = max, rtmin = min, ## Get EICs for all chrom peaks (all MS levels) object <- filterRt(object, rt = range(pks[, c("rtmin", "rtmax")])) chrs <- .chromatograms_for_peaks( - peaksData(object@spectra), rt = rtime(object@spectra), + peaksData(object@spectra, f = factor()), + rt = rtime(object@spectra), msl = msLevel(object@spectra), file_idx = fromFile, tmz = isolationWindowTargetMz(object@spectra), pks = pks, pks_msl = chromPeakData(object)$ms_level[ord], diff --git a/R/XcmsExperiment.R b/R/XcmsExperiment.R index 9fdc90f7c..aef1c0be8 100644 --- a/R/XcmsExperiment.R +++ b/R/XcmsExperiment.R @@ -83,6 +83,9 @@ #' `"isolationWindowUpperMz"` (columns in `chromPeakData`) do not contain #' the user provided `mz`. #' +#' - `filterMsLevel`: filter the data of the `XcmsExperiment` or `MsExperiment` +#' to keep only data of the MS level(s) specified with parameter `msLevel.`. +#' #' - `filterMz`, `filterMzRange`: filter the spectra within an #' `XcmsExperiment` or `MsExperiment` to the specified m/z range (parameter #' `mz`). For `XcmsExperiment` also identified chromatographic peaks and @@ -482,6 +485,8 @@ #' length equal to the numer of rows of the parameters `mz` and `rt` #' defining the m/z and rt regions from which the chromatograms should #' be created. Defaults to `msLevel = 1L`. +#' for `filterMsLevel`: `integer` defining the MS level(s) to which the +#' data should be subset. #' #' @param mz For `chromPeaks` and `featureDefinitions`: `numeric(2)` optionally #' defining the m/z range for which chromatographic peaks or feature @@ -816,6 +821,20 @@ setMethod( callNextMethod(object = object, mz = mz, msLevel. = msLevel.) }) +#' @rdname XcmsExperiment +setMethod( + "filterMsLevel", "XcmsExperiment", + function(object, msLevel. = uniqueMsLevels(object)) { + if (!length(msLevel.)) + return(object) + if (hasChromPeaks(object)) { + keep <- chromPeakData(object)$ms_level %in% msLevel. + object <- .filter_chrom_peaks(object, idx = base::which(keep)) + } + callNextMethod(object = object, msLevel. = msLevel.) + }) + + ################################################################################ ## chromatographic peaks ################################################################################ @@ -839,7 +858,7 @@ setMethod( chunkSize = chunkSize, BPPARAM = BPPARAM) } ## Assign/define peak IDs. - pkd <- data.frame(ms_level = rep(msLevel, nrow(res)), + pkd <- data.frame(ms_level = rep(as.integer(msLevel), nrow(res)), is_filled = rep(FALSE, nrow(res))) ph <- XProcessHistory(param = param, type. = .PROCSTEP.PEAK.DETECTION, @@ -1205,7 +1224,7 @@ setMethod( maxi <- max( 0, as.integer(sub("CP", "", rownames(chromPeaks(object))))) rownames(res) <- .featureIDs(nr, "CP", maxi + 1) - pkd <- data.frame(ms_level = rep(msLevel, nr), + pkd <- data.frame(ms_level = rep(as.integer(msLevel), nr), is_filled = rep(FALSE, nr)) rownames(pkd) <- rownames(res) pb$tick() @@ -1332,35 +1351,30 @@ setMethod( "adjustRtime", signature(object = "MsExperiment", param = "PeakGroupsParam"), function(object, param, msLevel = 1L, ...) { - if (hasAdjustedRtime(object)) { - message("Removing previous alignment results") - object <- dropAdjustedRtime(object) - } + if (!inherits(object, "XcmsExperiment")) + object <- as(object, "XcmsExperiment") + if (hasAdjustedRtime(object)) + stop("Alignment results already present. Please either remove ", + "them with 'dropAdjustedRtime' in order to perform an ", + "alternative, new, alignment, or use 'applyAdjustedRtime'", + " prior 'adjustRtime' to perform a second round of ", + "alignment.") if (any(msLevel != 1L)) stop("Alignment is currently only supported for MS level 1") - if (!hasFeatures(object)) - stop("No feature definitions present in 'object'. Please perform ", - "first a correspondence analysis using 'groupChromPeaks'") - if (!nrow(peakGroupsMatrix(param))) + if (!nrow(peakGroupsMatrix(param))) { + if (!hasFeatures(object)) + stop("No feature definitions present in 'object'. Please ", + "perform first a correspondence analysis using ", + "'groupChromPeaks'") peakGroupsMatrix(param) <- adjustRtimePeakGroups( object, param = param) + } fidx <- as.factor(fromFile(object)) rt_raw <- split(rtime(object), fidx) - rt_adj <- do_adjustRtime_peakGroups( - chromPeaks(object, msLevel = msLevel), - peakIndex = .update_feature_definitions( - featureDefinitions(object), rownames(chromPeaks(object)), - rownames(chromPeaks(object, msLevel = msLevel)))$peakidx, - rtime = rt_raw, - minFraction = minFraction(param), - extraPeaks = extraPeaks(param), - smooth = smooth(param), - span = span(param), - family = family(param), - peakGroupsMatrix = peakGroupsMatrix(param), - subset = subset(param), - subsetAdjust = subsetAdjust(param) - ) + rt_adj <- .adjustRtime_peakGroupsMatrix( + rt_raw, peakGroupsMatrix(param), smooth = smooth(param), + span = span(param), family = family(param), + subset = subset(param), subsetAdjust = subsetAdjust(param)) pt <- vapply(object@processHistory, processType, character(1)) idx_pg <- .match_last(.PROCSTEP.PEAK.GROUPING, pt, nomatch = -1L) if (idx_pg > 0) @@ -1398,8 +1412,11 @@ setMethod("dropAdjustedRtime", "XcmsExperiment", function(object) { object@spectra, svs[svs != "rtime_adjusted"]) object@processHistory <- dropProcessHistoriesList( object@processHistory, type = .PROCSTEP.RTIME.CORRECTION, num = 1L) - if (hasFeatures(object) && idx_co > idx_al) + if (hasFeatures(object) && idx_co > idx_al) { + warning("Had to remove feature definitions along with the adjusted ", + "retention times because of the dependency between them.") object <- dropFeatureDefinitions(object) + } object }) @@ -1535,7 +1552,7 @@ setMethod( stop("No chromatographic peaks present. ", "Please run 'findChromPeaks' first.") res <- .manual_feature_definitions(chromPeaks(object), peakIdx) - res$ms_level <- msLevel + res$ms_level <- as.integer(msLevel) if (hasFeatures(object)) { maxi <- max(as.integer( sub("FT", "", rownames(featureDefinitions(object))))) @@ -1821,7 +1838,7 @@ setMethod( nr <- nrow(res) maxi <- max(as.integer(sub("CP", "", rownames(chromPeaks(object))))) rownames(res) <- .featureIDs(nr, "CP", maxi + 1) - cpd <- data.frame(ms_level = rep(msLevel, nr), + cpd <- data.frame(ms_level = rep(as.integer(msLevel), nr), is_filled = rep(TRUE, nr)) rownames(cpd) <- rownames(res) object@chromPeaks <- rbind(object@chromPeaks, res) @@ -1957,6 +1974,3 @@ setMethod( object[i = sort(unique(file)), keepAdjustedRtime = keepAdjustedRtime, keepFeatures = keepFeatures, ...] }) - -## TODO filterMsLevel? Function not yet needed. In case, needs also an -## implementation for MsExperiment: update the spectra-sample-mapping. diff --git a/R/do_adjustRtime-functions.R b/R/do_adjustRtime-functions.R index a3443b27f..80aaf825d 100644 --- a/R/do_adjustRtime-functions.R +++ b/R/do_adjustRtime-functions.R @@ -66,40 +66,46 @@ #' Profiling Using Nonlinear Peak Alignment, Matching, and Identification" #' \emph{Anal. Chem.} 2006, 78:779-787. do_adjustRtime_peakGroups <- - function(peaks, peakIndex, rtime, minFraction = 0.9, extraPeaks = 1, + function(peaks, peakIndex, rtime = list(), minFraction = 0.9, + extraPeaks = 1, smooth = c("loess", "linear"), span = 0.2, family = c("gaussian", "symmetric"), peakGroupsMatrix = matrix(ncol = 0, nrow = 0), - subset = integer(), subsetAdjust = c("average", "previous")) -{ - subsetAdjust <- match.arg(subsetAdjust) + subset = integer(), subsetAdjust = c("average", "previous")) { + subsetAdjust <- match.arg(subsetAdjust) + smooth <- match.arg(smooth) + family <- match.arg(family) + if (!nrow(peakGroupsMatrix)) + peakGroupsMatrix <- .define_peak_groups( + peaks = peaks, peakIndex = peakIndex, subset = subset, + minFraction = minFraction, extraPeaks = extraPeaks, + total_samples = length(rtime)) + .adjustRtime_peakGroupsMatrix( + rtime, peakGroupsMatrix, smooth = smooth, span = span, + family = family, subset = subset, subsetAdjust = subsetAdjust) + } + +#' Performs all input parameter checks and then gets the peak group matrix +#' +#' @noRd +.define_peak_groups <- function(peaks, peakIndex, subset = integer(), + minFraction = 0.9, extraPeaks = 1, + total_samples = integer()) { ## Check input. - if (missing(peaks) | missing(peakIndex) | missing(rtime)) - stop("Arguments 'peaks', 'peakIndex' and 'rtime' are required!") - smooth <- match.arg(smooth) - family <- match.arg(family) + if (missing(peaks) | missing(peakIndex)) + stop("Arguments 'peaks' and 'peakIndex' are required if ", + "'peakGroupsMatrix' is not provided!") ## minFraction if (any(minFraction > 1) | any(minFraction < 0)) stop("'minFraction' has to be between 0 and 1!") ## Check peaks: - OK <- xcms:::.validChromPeaksMatrix(peaks) + OK <- .validChromPeaksMatrix(peaks) if (is.character(OK)) stop(OK) ## Check peakIndex: if (any(!(unique(unlist(peakIndex)) %in% seq_len(nrow(peaks))))) stop("Some indices listed in 'peakIndex' are outside of ", "1:nrow(peaks)!") - ## Check rtime: - if (!is.list(rtime)) - stop("'rtime' should be a list of numeric vectors with the retention ", - "times of the spectra per sample!") - if (!all(unlist(lapply(rtime, is.numeric), use.names = FALSE))) - stop("'rtime' should be a list of numeric vectors with the retention ", - "times of the spectra per sample!") - if (length(rtime) != max(peaks[, "sample"])) - stop("The length of 'rtime' does not match with the total number of ", - "samples according to the 'peaks' matrix!") - total_samples <- length(rtime) if (length(subset)) { if (!is.numeric(subset)) stop("If provided, 'subset' is expected to be an integer") @@ -115,282 +121,184 @@ do_adjustRtime_peakGroups <- ## Remove peaks not present in "subset" from the peakIndex peaks_in_subset <- which(peaks[, "sample"] %in% subset) peakIndex <- lapply(peakIndex, function(z) z[z %in% peaks_in_subset]) - ## Check if we've got a valid peakGroupsMatrix - ## o Same number of samples. - ## o range of rt values is within the rtime. - if (nrow(peakGroupsMatrix)) { + .getPeakGroupsRtMatrix(peaks, peakIndex, subset, + missingSample, extraPeaks) +} + +#' @param peakGroupsMatrix needs to be a matrix with retention times with +#' number of columns being equal to the length of `subset` or number of +#' samples (if `subset` is not defined). +#' +#' @noRd +.adjustRtime_peakGroupsMatrix <- + function(rtime, peakGroupsMatrix = matrix(ncol = 0, nrow = 0), + smooth = c("loess", "linear"), span = 0.2, + family = c("gaussian", "symmetric"), + subset = integer(), subsetAdjust = c("average", "previous")) { + if (!nrow(peakGroupsMatrix)) + stop("Parameter 'peakGroupsMatrix' is empty") + if (missing(rtime)) + stop("Parameter 'rtime' is empty") + ## Check rtime: + if (!is.list(rtime)) + stop("'rtime' should be a list of numeric vectors with the ", + "retention times of the spectra per sample!") + if (!all(unlist(lapply(rtime, is.numeric), use.names = FALSE))) + stop("'rtime' should be a list of numeric vectors with the ", + "retention times of the spectra per sample!") + total_samples <- length(rtime) + if (length(subset)) { + if (!is.numeric(subset)) + stop("If provided, 'subset' is expected to be an integer") + if (!all(subset %in% seq_len(total_samples))) + stop("One or more indices in 'subset' are out of range.") + if (length(subset) < 2) + stop("Length of 'subset' too small: minimum required ", + "samples for alignment is 2.") + } else subset <- seq_len(total_samples) + nSamples <- length(subset) + ## Check if we've got a valid peakGroupsMatrix + ## o Same number of samples. + ## o range of rt values is within the rtime. if (ncol(peakGroupsMatrix) != nSamples) - stop("'peakGroupsMatrix' has to have the same number of columns ", - "as there are samples!") + stop("Number of columns of 'peakGroupsMatrix' is ", + ncol(peakGroupsMatrix), " while ", nSamples, + " columns are expected") pg_range <- range(peakGroupsMatrix, na.rm = TRUE) rt_range <- range(rtime) if (!(pg_range[1] >= rt_range[1] & pg_range[2] <= rt_range[2])) stop("The retention times in 'peakGroupsMatrix' have to be within", " the retention time range of the experiment!") rt <- peakGroupsMatrix - } else - rt <- .getPeakGroupsRtMatrix(peaks, peakIndex, subset, - missingSample, extraPeaks) - ## Fix for issue #175 - if (length(rt) == 0) - stop("No peak groups found in the data for the provided settings") - if (ncol(rt) != length(subset)) - stop("Length of 'subset' and number of columns of the peak group ", - "matrix do not match.") - message("Performing retention time correction using ", nrow(rt), - " peak groups.") - - ## Calculate the deviation of each peak group in each sample from its - ## median - rtdev <- rt - apply(rt, 1, median, na.rm = TRUE) - - if (smooth == "loess") { - mingroups <- min(colSums(!is.na(rt))) - if (mingroups < 4) { - smooth <- "linear" - warning("Too few peak groups for 'loess', reverting to linear", - " method") - } else if (mingroups * span < 4) { - span <- 4 / mingroups - warning("Span too small for 'loess' and the available number of ", - "peak groups, resetting to ", round(span, 2)) - } - } + message("Performing retention time correction using ", nrow(rt), + " peak groups.") - rtdevsmo <- vector("list", nSamples) + ## Calculate the deviation of each peak group in each sample from its + ## median + rtdev <- rt - apply(rt, 1, median, na.rm = TRUE) - ## Code for checking to see if retention time correction is overcorrecting - rtdevrange <- range(rtdev, na.rm = TRUE) - warn.overcorrect <- FALSE - warn.tweak.rt <- FALSE - - pb <- progress_bar$new(format = paste0("[:bar] :current/:", - "total (:percent) in ", - ":elapsed"), - total = length(subset), - clear = FALSE) - rtime_adj <- rtime - ## Adjust samples in subset. - for (i in seq_along(subset)) { - i_all <- subset[i] # Index of sample in whole dataset. - pts <- na.omit(data.frame(rt = rt[, i], rtdev = rtdev[, i])) - - ## order the data.frame such that rt and rtdev are increasingly ordered. - pk_idx <- order(pts$rt, pts$rtdev) - pts <- pts[pk_idx, ] if (smooth == "loess") { - lo <- suppressWarnings(loess(rtdev ~ rt, pts, span = span, - degree = 1, family = family)) - - rtdevsmo[[i]] <- xcms:::na.flatfill( - predict(lo, data.frame(rt = rtime[[i_all]]))) - ## Remove singularities from the loess function - rtdevsmo[[i]][abs(rtdevsmo[[i]]) > - quantile(abs(rtdevsmo[[i]]), 0.9, - na.rm = TRUE) * 2] <- NA - if (length(naidx <- which(is.na(rtdevsmo[[i]])))) - rtdevsmo[[i]][naidx] <- suppressWarnings( - approx(na.omit(data.frame(rtime[[i_all]], rtdevsmo[[i]])), - xout = rtime[[i_all]][naidx], rule = 2)$y - ) + mingroups <- min(colSums(!is.na(rt))) + if (mingroups < 4) { + smooth <- "linear" + warning("Too few peak groups for 'loess', reverting to linear", + " method") + } else if (mingroups * span < 4) { + span <- 4 / mingroups + warning("Span too small for 'loess' and the available number ", + "of peak groups, resetting to ", round(span, 2)) + } + } - ## Check if there are adjusted retention times that are not ordered - ## increasingly. If there are, search for each first unordered rt - ## the next rt that is larger and linearly interpolate the values - ## in between (see issue #146 for an illustration). - while (length(decidx <- which(diff(rtime[[i_all]] - rtdevsmo[[i]]) < 0))) { - warn.tweak.rt <- TRUE ## Warn that we had to tweak the rts. - rtadj <- rtime[[i_all]] - rtdevsmo[[i]] - rtadj_start <- rtadj[decidx[1]] ## start interpolating from here - ## Define the - next_larger <- which(rtadj > rtadj[decidx[1]]) - if (length(next_larger) == 0) { - ## Fix if there is no larger adjusted rt up to the end. - next_larger <- length(rtadj) + 1 - rtadj_end <- rtadj_start - } else { - next_larger <- min(next_larger) - rtadj_end <- rtadj[next_larger] + rtdevsmo <- vector("list", nSamples) + + ## Code for checking to see if retention time correction is + ## overcorrecting + rtdevrange <- range(rtdev, na.rm = TRUE) + warn.overcorrect <- FALSE + warn.tweak.rt <- FALSE + + pb <- progress_bar$new(format = paste0("[:bar] :current/:", + "total (:percent) in ", + ":elapsed"), + total = length(subset), + clear = FALSE) + rtime_adj <- rtime + ## Adjust samples in subset. + for (i in seq_along(subset)) { + i_all <- subset[i] # Index of sample in whole dataset. + pts <- na.omit(data.frame(rt = rt[, i], rtdev = rtdev[, i])) + + ## order the data.frame such that rt and rtdev are increasingly + ## ordered. + pk_idx <- order(pts$rt, pts$rtdev) + pts <- pts[pk_idx, ] + if (smooth == "loess") { + lo <- suppressWarnings(loess(rtdev ~ rt, pts, span = span, + degree = 1, family = family)) + + rtdevsmo[[i]] <- na.flatfill( + predict(lo, data.frame(rt = rtime[[i_all]]))) + ## Remove singularities from the loess function + rtdevsmo[[i]][abs(rtdevsmo[[i]]) > + quantile(abs(rtdevsmo[[i]]), 0.9, + na.rm = TRUE) * 2] <- NA + if (length(naidx <- which(is.na(rtdevsmo[[i]])))) + rtdevsmo[[i]][naidx] <- suppressWarnings( + approx(na.omit(data.frame(rtime[[i_all]], + rtdevsmo[[i]])), + xout = rtime[[i_all]][naidx], rule = 2)$y + ) + + ## Check if there are adjusted retention times that are not + ## ordered increasingly. If there are, search for each first + ## unordered rt the next rt that is larger and linearly + ## interpolate the values in between (see issue #146 for an + ## illustration). + while (length(decidx <- which(diff(rtime[[i_all]] - rtdevsmo[[i]]) < 0))) { + warn.tweak.rt <- TRUE ## Warn that we had to tweak the rts + rtadj <- rtime[[i_all]] - rtdevsmo[[i]] + rtadj_start <- rtadj[decidx[1]] ## start interpolating from here + next_larger <- which(rtadj > rtadj[decidx[1]]) + if (length(next_larger) == 0) { + ## Fix if there is no larger adjusted rt up to the end. + next_larger <- length(rtadj) + 1 + rtadj_end <- rtadj_start + } else { + next_larger <- min(next_larger) + rtadj_end <- rtadj[next_larger] + } + ## linearly interpolate the values in between. + adj_idxs <- (decidx[1] + 1):(next_larger - 1) + incr <- (rtadj_end - rtadj_start) / length(adj_idxs) + rtdevsmo[[i]][adj_idxs] <- rtime[[i_all]][adj_idxs] - + (rtadj_start + (1:length(adj_idxs)) * incr) } - ## linearly interpolate the values in between. - adj_idxs <- (decidx[1] + 1):(next_larger - 1) - incr <- (rtadj_end - rtadj_start) / length(adj_idxs) - rtdevsmo[[i]][adj_idxs] <- rtime[[i_all]][adj_idxs] - - (rtadj_start + (1:length(adj_idxs)) * incr) - } - rtdevsmorange <- range(rtdevsmo[[i]]) - if (any(rtdevsmorange / rtdevrange > 2)) - warn.overcorrect <- TRUE - } else { - if (nrow(pts) < 2) { - stop("Not enough peak groups even for linear smoothing ", - "available!") + rtdevsmorange <- range(rtdevsmo[[i]]) + if (any(rtdevsmorange / rtdevrange > 2)) + warn.overcorrect <- TRUE + } else { + if (nrow(pts) < 2) { + stop("Not enough peak groups even for linear smoothing ", + "available!") + } + ## Use lm instead? + fit <- lsfit(pts$rt, pts$rtdev) + rtdevsmo[[i]] <- rtime[[i_all]] * fit$coef[2] + fit$coef[1] + ptsrange <- range(pts$rt) + minidx <- rtime[[i_all]] < ptsrange[1] + maxidx <- rtime[[i_all]] > ptsrange[2] + rtdevsmo[[i]][minidx] <- rtdevsmo[[i]][head(which(!minidx), n = 1)] + rtdevsmo[[i]][maxidx] <- rtdevsmo[[i]][tail(which(!maxidx), n = 1)] } - ## Use lm instead? - fit <- lsfit(pts$rt, pts$rtdev) - rtdevsmo[[i]] <- rtime[[i_all]] * fit$coef[2] + fit$coef[1] - ptsrange <- range(pts$rt) - minidx <- rtime[[i_all]] < ptsrange[1] - maxidx <- rtime[[i_all]] > ptsrange[2] - rtdevsmo[[i]][minidx] <- rtdevsmo[[i]][head(which(!minidx), n = 1)] - rtdevsmo[[i]][maxidx] <- rtdevsmo[[i]][tail(which(!maxidx), n = 1)] + ## Finally applying the correction + rtime_adj[[i_all]] <- rtime[[i_all]] - rtdevsmo[[i]] + pb$tick() } - ## Finally applying the correction - rtime_adj[[i_all]] <- rtime[[i_all]] - rtdevsmo[[i]] - pb$tick() - } - ## Adjust the remaining samples. - rtime_adj <- adjustRtimeSubset(rtime, rtime_adj, subset = subset, - method = subsetAdjust) - if (warn.overcorrect) { - warning("Fitted retention time deviation curves exceed points by more", - " than 2x. This is dangerous and the algorithm is probably ", - "overcorrecting your data. Consider increasing the span ", - "parameter or switching to the linear smoothing method.") - } - - if (warn.tweak.rt) { - warning(call. = FALSE, "Adjusted retention times had to be ", - "re-adjusted for some files to ensure them being in the same", - " order than the raw retention times. A call to ", - "'dropAdjustedRtime' might thus fail to restore retention ", - "times of chromatographic peaks to their original values. ", - "Eventually consider to increase the value of the 'span' ", - "parameter.") - } - rtime_adj -} -## That's the original code that fails to fix unsorted adjusted retention times -## (see issue #146). -do_adjustRtime_peakGroups_orig <- function(peaks, peakIndex, rtime, - minFraction = 0.9, extraPeaks = 1, - smooth = c("loess", "linear"), span = 0.2, - family = c("gaussian", "symmetric")) { - ## Check input. - if (missing(peaks) | missing(peakIndex) | missing(rtime)) - stop("Arguments 'peaks', 'peakIndex' and 'rtime' are required!") - smooth <- match.arg(smooth) - family <- match.arg(family) - ## minFraction - if (any(minFraction > 1) | any(minFraction < 0)) - stop("'minFraction' has to be between 0 and 1!") - - ## Check peaks: - OK <- .validChromPeaksMatrix(peaks) - if (is.character(OK)) - stop(OK) - ## Check peakIndex: - if (any(!(unique(unlist(peakIndex)) %in% seq_len(nrow(peaks))))) - stop("Some indices listed in 'peakIndex' are outside of ", - "1:nrow(peaks)!") - ## Check rtime: in line with the total number of samples we've got in - ## peaks? - if (!is.list(rtime)) - stop("'rtime' should be a list of numeric vectors with the retention ", - "times of the spectra per sample!") - if (!all(unlist(lapply(rtime, is.numeric), use.names = FALSE))) - stop("'rtime' should be a list of numeric vectors with the retention ", - "times of the spectra per sample!") - if (length(rtime) != max(peaks[, "sample"])) - stop("The length of 'rtime' does not match with the total number of ", - "samples according to the 'peaks' matrix!") - - nSamples <- length(rtime) - ## Translate minFraction to number of allowed missing samples. - missingSample <- nSamples - (nSamples * minFraction) - - rt <- .getPeakGroupsRtMatrix(peaks, peakIndex, seq_len(nSamples), - missingSample, extraPeaks) - - message("Performing retention time correction using ", nrow(rt), - " peak groups.") - - ## Calculate the deviation of each peak group in each sample from its - ## median - rtdev <- rt - apply(rt, 1, median, na.rm = TRUE) - - if (smooth == "loess") { - mingroups <- min(colSums(!is.na(rt))) - if (mingroups < 4) { - smooth <- "linear" - warning("Too few peak groups for 'loess', reverting to linear", - " method") - } else if (mingroups * span < 4) { - span <- 4 / mingroups - warning("Span too small for 'loess' and the available number of ", - "peak groups, resetting to ", round(span, 2)) + ## Adjust the remaining samples. + rtime_adj <- adjustRtimeSubset(rtime, rtime_adj, subset = subset, + method = subsetAdjust) + if (warn.overcorrect) { + warning("Fitted retention time deviation curves exceed points ", + "by more than 2x. This is dangerous and the algorithm ", + "is probably overcorrecting your data. Consider ", + "increasing the span parameter or switching to the ", + "linear smoothing method.") } - } - - rtdevsmo <- vector("list", nSamples) - - ## Code for checking to see if retention time correction is overcorrecting - rtdevrange <- range(rtdev, na.rm = TRUE) - warn.overcorrect <- FALSE - for (i in 1:nSamples) { - pts <- na.omit(data.frame(rt = rt[, i], rtdev = rtdev[, i])) - - if (smooth == "loess") { - lo <- suppressWarnings(loess(rtdev ~ rt, pts, span = span, - degree = 1, family = family)) - - rtdevsmo[[i]] <- na.flatfill(predict(lo, data.frame(rt = rtime[[i]]))) - ## Remove singularities from the loess function - rtdevsmo[[i]][abs(rtdevsmo[[i]]) > - quantile(abs(rtdevsmo[[i]]), 0.9) * 2] <- NA - if (length(naidx <- which(is.na(rtdevsmo[[i]])))) - rtdevsmo[[i]][naidx] <- suppressWarnings( - approx(na.omit(data.frame(rtime[[i]], rtdevsmo[[i]])), - xout = rtime[[i]][naidx], rule = 2)$y - ) - ## That's to ensure that the adjusted retention times are in the - ## same order than the raw retention times - I guess... - ## Check if adjustment changes the order of the adjusted retention - ## times. If yes, the difference between consecutive retention times - ## will be negative. - ## What does this code do: - ## o move the last adjusted retention time to the left by its - ## difference to the next one. - while (length(decidx <- which(diff(rtime[[i]] - rtdevsmo[[i]]) < 0))) { - d <- diff(rtime[[i]] - rtdevsmo[[i]])[tail(decidx, 1)] - rtdevsmo[[i]][tail(decidx, 1)] <- rtdevsmo[[i]][tail(decidx, 1)] - d - if (abs(d) <= 1e-06) - break - } - - rtdevsmorange <- range(rtdevsmo[[i]]) - if (any(rtdevsmorange / rtdevrange > 2)) - warn.overcorrect <- TRUE - } else { - if (nrow(pts) < 2) { - stop("Not enough peak groups even for linear smoothing ", - "available!") - } - ## Use lm instead? - fit <- lsfit(pts$rt, pts$rtdev) - rtdevsmo[[i]] <- rtime[[i]] * fit$coef[2] + fit$coef[1] - ptsrange <- range(pts$rt) - minidx <- rtime[[i]] < ptsrange[1] - maxidx <- rtime[[i]] > ptsrange[2] - rtdevsmo[[i]][minidx] <- rtdevsmo[[i]][head(which(!minidx), n = 1)] - rtdevsmo[[i]][maxidx] <- rtdevsmo[[i]][tail(which(!maxidx), n = 1)] + if (warn.tweak.rt) { + warning(call. = FALSE, "Adjusted retention times had to be ", + "re-adjusted for some files to ensure them being in ", + "the same order than the raw retention times. A call to ", + "'dropAdjustedRtime' might thus fail to restore retention ", + "times of chromatographic peaks to their original values. ", + "Eventually consider to increase the value of the 'span' ", + "parameter.") } - ## Finally applying the correction - rtime[[i]] <- rtime[[i]] - rtdevsmo[[i]] - } - - if (warn.overcorrect) { - warning("Fitted retention time deviation curves exceed points by more", - " than 2x. This is dangerous and the algorithm is probably ", - "overcorrecting your data. Consider increasing the span ", - "parameter or switching to the linear smoothing method.") + rtime_adj } - return(rtime) -} - #' This function adjusts retentin times in the vector/matrix `x` given the #' provided `numeric` vectors `rtraw` and `rtadj`. #' @@ -485,6 +393,10 @@ do_adjustRtime_peakGroups_orig <- function(peaks, peakIndex, rtime, #' @details This function is called internally by the #' do_adjustRtime_peakGroups function and the retcor.peakgroups method. #' +#' Update for version 4.1.4: correctly consider the `sampleIndex` and +#' `missingSample` to select only peaks present in samples of an eventual +#' sample subset (for subset-based alignment): fixes issue #702 +#' #' @noRd .getPeakGroupsRtMatrix <- function(peaks, peakIndex, sampleIndex, missingSample, extraPeaks) { @@ -493,19 +405,19 @@ do_adjustRtime_peakGroups_orig <- function(peaks, peakIndex, rtime, ## o skip peak groups if they are not assigned a peak in at least a ## minimum number of samples OR if have too many peaks from the same ## sample assigned to it. - nSamples <- length(sampleIndex) + req_samples <- length(sampleIndex) - missingSample rt <- lapply(peakIndex, function(z) { cur_fts <- peaks[z, c("rt", "into", "sample"), drop = FALSE] - ## Return NULL if we've got less samples that required or is the total + ## Return NULL if we've got less samples that required or the total ## number of peaks is larger than a certain threshold. - ## Note that the original implementation is not completely correct! - ## nsamp > nsamp + extraPeaks might be correct. - nsamp <- length(unique(cur_fts[, "sample"])) - if (nsamp < (nSamples - missingSample) | - nrow(cur_fts) > (nsamp + extraPeaks)) + smps <- cur_fts[, 3L] + smps <- smps[smps %in% sampleIndex] + nsamp <- length(unique(smps)) + if ((nsamp < req_samples) | (length(smps) > (nsamp + extraPeaks))) return(NULL) - cur_fts[] <- cur_fts[order(cur_fts[, 2], decreasing = TRUE), , drop = FALSE] - cur_fts[match(sampleIndex, cur_fts[, 3]), 1] + cur_fts[] <- cur_fts[order(cur_fts[, 2L], decreasing = TRUE), , + drop = FALSE] + cur_fts[match(sampleIndex, cur_fts[, 3L]), 1L] }) rt <- do.call(rbind, rt) ## Order them by median retention time. NOTE: this is different from the @@ -648,13 +560,13 @@ adjustRtimeSubset <- function(rtraw, rtadj, subset, message("Aligning sample number ", i, " against subset ... ", appendLF = FALSE) if (method == "previous") { - i_adj <- xcms:::.get_closest_index(i, subset, method = "previous") + i_adj <- .get_closest_index(i, subset, method = "previous") rtadj[[i]] <- .applyRtAdjustment(rtraw[[i]], rtraw[[i_adj]], rtadj[[i_adj]]) } if (method == "average") { - i_ref <- c(xcms:::.get_closest_index(i, subset, method = "previous"), - xcms:::.get_closest_index(i, subset, method = "next")) + i_ref <- c(.get_closest_index(i, subset, method = "previous"), + .get_closest_index(i, subset, method = "next")) trim_idx <- .match_trim_vector_index(rtraw[i_ref]) rt_raw_ref <- do.call( cbind, .match_trim_vectors(rtraw[i_ref], idxs = trim_idx)) diff --git a/R/functions-XCMSnExp.R b/R/functions-XCMSnExp.R index 2d44aa4f9..a808d4137 100644 --- a/R/functions-XCMSnExp.R +++ b/R/functions-XCMSnExp.R @@ -591,6 +591,7 @@ adjustRtimePeakGroups <- function(object, param = PeakGroupsParam(), if (!length(subs)) subs <- seq_along(fileNames(object)) nSamples <- length(subs) + missingSample <- nSamples - (nSamples * minFraction(param)) pkGrp <- .getPeakGroupsRtMatrix( peaks = chromPeaks(object, msLevel = msLevel), peakIndex = .peakIndex( @@ -598,7 +599,7 @@ adjustRtimePeakGroups <- function(object, param = PeakGroupsParam(), featureDefinitions(object), rownames(chromPeaks(object)), rownames(chromPeaks(object, msLevel = msLevel)))), sampleIndex = subs, - missingSample = nSamples - (nSamples * minFraction(param)), + missingSample = missingSample, extraPeaks = extraPeaks(param) ) colnames(pkGrp) <- basename(fileNames(object))[subs] diff --git a/R/functions-utils.R b/R/functions-utils.R index 7868f0da2..8df509b18 100644 --- a/R/functions-utils.R +++ b/R/functions-utils.R @@ -765,7 +765,10 @@ groupOverlaps <- function(xmin, xmax) { } .match_last <- function(x, table, nomatch = NA_integer_) { - length(table) - match(x, rev(table), nomatch = nomatch) + 1 + mtch <- match(x, rev(table), nomatch = NA_integer_) + mtch <- length(table) - mtch + 1 + mtch[is.na(mtch)] <- nomatch + mtch } #' @description @@ -816,6 +819,7 @@ groupOverlaps <- function(xmin, xmax) { pks_tmz = rep(1L, nrow(pks)), aggregationFun = "sum") { nr <- nrow(pks) + pks_msl <- as.integer(pks_msl) FUN <- switch(aggregationFun, "sum" = getFunction("sumi"), "max" = getFunction("maxi"), @@ -850,3 +854,74 @@ groupOverlaps <- function(xmin, xmax) { } res } + +## @jo TODO LLL replace that with an implementation in C. +## Note: this function silently drops retention times for which no intensity-mz +## pair was measured. +.rawMat <- function(mz, int, scantime, valsPerSpect, mzrange = numeric(), + rtrange = numeric(), scanrange = numeric(), + log = FALSE) { + if (length(rtrange) >= 2) { + rtrange <- range(rtrange) + ## Fix for issue #267. rtrange outside scanrange causes scanrange + ## being c(Inf, -Inf) + scns <- which((scantime >= rtrange[1]) & (scantime <= rtrange[2])) + if (!length(scns)) + return(matrix( + nrow = 0, ncol = 3, + dimnames = list(character(), c("time", "mz", "intensity")))) + scanrange <- range(scns) + } + if (length(scanrange) < 2) + scanrange <- c(1, length(valsPerSpect)) + else scanrange <- range(scanrange) + if (!all(is.finite(scanrange))) + stop("'scanrange' does not contain finite values") + if (!all(is.finite(mzrange))) + stop("'mzrange' does not contain finite values") + if (!all(is.finite(rtrange))) + stop("'rtrange' does not contain finite values") + if (scanrange[1] == 1) + startidx <- 1 + else + startidx <- sum(valsPerSpect[1:(scanrange[1] - 1)]) + 1 + endidx <- sum(valsPerSpect[1:scanrange[2]]) + scans <- rep(scanrange[1]:scanrange[2], + valsPerSpect[scanrange[1]:scanrange[2]]) + masses <- mz[startidx:endidx] + massidx <- 1:length(masses) + if (length(mzrange) >= 2) { + mzrange <- range(mzrange) + massidx <- massidx[(masses >= mzrange[1] & (masses <= mzrange[2]))] + } + int <- int[startidx:endidx][massidx] + if (log && (length(int) > 0)) + int <- log(int + max(1 - min(int), 0)) + cbind(time = scantime[scans[massidx]], + mz = masses[massidx], + intensity = int) +} + +#' Helper function to use the internal getEIC C call to extract (TIC) EIC +#' data. +#' +#' @noRd +.getEIC <- function(mz, int, scantime, valsPerSpect, mzrange = numeric(), + rtrange = numeric(), log = FALSE) { + rtrange <- range(rtrange) + scns <- which((scantime >= rtrange[1]) & (scantime <= rtrange[2])) + if (!length(scns)) + return(matrix( + nrow = 0, ncol = 3, + dimnames = list(character(), c("time", "mz", "intensity")))) + if (!all(is.finite(mzrange))) + stop("'mzrange' does not contain finite values") + if (!all(is.finite(rtrange))) + stop("'rtrange' does not contain finite values") + scanindex <- valueCount2ScanIndex(valsPerSpect) + res <- .Call("getEIC", mz, int, scanindex, mzrange, + as.integer(range(scns) - 1L), as.integer(length(scanindex)), + PACKAGE = "xcms") + cbind(rtime = scantime[scns], + intensity = res$intensity) +} diff --git a/R/functions-xcmsSwath.R b/R/functions-xcmsSwath.R index 28004f9e1..27c29d274 100644 --- a/R/functions-xcmsSwath.R +++ b/R/functions-xcmsSwath.R @@ -96,7 +96,8 @@ object <- filterRt(object, rt = range(pks[, c("rtmin", "rtmax")]))) if (inherits(object, "MsExperiment")) chrs <- .chromatograms_for_peaks( - peaksData(object@spectra), rt = rtime(object@spectra), + peaksData(object@spectra, f = factor()), + rt = rtime(object@spectra), msl = msLevel(object@spectra), file_idx = fromFile, tmz = isolationWindowTargetMz(object@spectra), pks = pks, pks_msl = object@chromPeakData$ms_level[ord], diff --git a/R/methods-XCMSnExp.R b/R/methods-XCMSnExp.R index d95c49389..f46cc4cfc 100644 --- a/R/methods-XCMSnExp.R +++ b/R/methods-XCMSnExp.R @@ -1680,38 +1680,27 @@ setMethod("adjustRtime", } if (any(msLevel != 1)) stop("Alignment is currently only supported for MS level 1") - if (!hasChromPeaks(object)) - stop("No chromatographic peak detection results in 'object'!", - " Please perform first a peak detection using the ", - "'findChromPeaks' method.") - if (!hasFeatures(object)) - stop("No feature definitions found in 'object'! Please ", - "perform first a peak grouping using the ", - "'groupChromPeak' method.") - if (hasChromPeaks(object) & !.has_chrom_peak_data(object)) - object <- updateObject(object) - startDate <- date() - ## If param does contain a peakGroupsMatrix extract that one, - ## otherwise generate it. if (nrow(peakGroupsMatrix(param))) pkGrpMat <- peakGroupsMatrix(param) - else + else { + if (!hasChromPeaks(object)) + stop("No chromatographic peak detection results in ", + "'object'! Please perform first a peak detection ", + "using the 'findChromPeaks' method.") + if (!hasFeatures(object)) + stop("No feature definitions found in 'object'! Please ", + "perform first a peak grouping using the ", + "'groupChromPeak' method.") pkGrpMat <- adjustRtimePeakGroups(object, param = param) - res <- do_adjustRtime_peakGroups( - chromPeaks(object, msLevel = msLevel), - peakIndex = .update_feature_definitions( - featureDefinitions(object), rownames(chromPeaks(object)), - rownames(chromPeaks(object, msLevel = msLevel)))$peakidx, - rtime = rtime(object, bySample = TRUE), - minFraction = minFraction(param), - extraPeaks = extraPeaks(param), - smooth = smooth(param), - span = span(param), - family = family(param), - peakGroupsMatrix = pkGrpMat, - subset = subset(param), - subsetAdjust = subsetAdjust(param) - ) + } + if (hasChromPeaks(object) & !.has_chrom_peak_data(object)) + object <- updateObject(object) + startDate <- date() + res <- .adjustRtime_peakGroupsMatrix( + rtime(object, bySample = TRUE), pkGrpMat, + smooth = smooth(param), span = span(param), + family = family(param), subset = subset(param), + subsetAdjust = subsetAdjust(param)) ## Add the pkGrpMat that's being used to the param object. peakGroupsMatrix(param) <- pkGrpMat ## Dropping the peak groups but don't remove its process history diff --git a/R/methods-xcmsRaw.R b/R/methods-xcmsRaw.R index 67d353d0e..747b37d9f 100755 --- a/R/methods-xcmsRaw.R +++ b/R/methods-xcmsRaw.R @@ -809,85 +809,6 @@ setMethod("rawMat", "xcmsRaw", function(object, mzrange = mzrange, rtrange = rtrange, scanrange = scanrange, log = log) }) -## @jo TODO LLL replace that with an implementation in C. -## Note: this function silently drops retention times for which no intensity-mz -## pair was measured. -.rawMat <- function(mz, int, scantime, valsPerSpect, mzrange = numeric(), - rtrange = numeric(), scanrange = numeric(), - log = FALSE) { - if (length(rtrange) >= 2) { - rtrange <- range(rtrange) - ## Fix for issue #267. rtrange outside scanrange causes scanrange - ## being c(Inf, -Inf) - scns <- which((scantime >= rtrange[1]) & (scantime <= rtrange[2])) - if (!length(scns)) - return(matrix( - nrow = 0, ncol = 3, - dimnames = list(character(), c("time", "mz", "intensity")))) - scanrange <- range(scns) - } - if (length(scanrange) < 2) - scanrange <- c(1, length(valsPerSpect)) - else scanrange <- range(scanrange) - if (!all(is.finite(scanrange))) - stop("'scanrange' does not contain finite values") - if (!all(is.finite(mzrange))) - stop("'mzrange' does not contain finite values") - if (!all(is.finite(rtrange))) - stop("'rtrange' does not contain finite values") - if (scanrange[1] == 1) - startidx <- 1 - else - startidx <- sum(valsPerSpect[1:(scanrange[1] - 1)]) + 1 - endidx <- sum(valsPerSpect[1:scanrange[2]]) - scans <- rep(scanrange[1]:scanrange[2], - valsPerSpect[scanrange[1]:scanrange[2]]) - masses <- mz[startidx:endidx] - massidx <- 1:length(masses) - if (length(mzrange) >= 2) { - mzrange <- range(mzrange) - massidx <- massidx[(masses >= mzrange[1] & (masses <= mzrange[2]))] - } - int <- int[startidx:endidx][massidx] - if (log && (length(int) > 0)) - int <- log(int + max(1 - min(int), 0)) - cbind(time = scantime[scans[massidx]], - mz = masses[massidx], - intensity = int) -} - -## .rawMat2 <- function(mz, int, scantime, valsPerSpect, mzrange = numeric(), -## rtrange = numeric(), scanrange = numeric, -## log = FALSE) { -## if (length(rtrange) >= 2) { -## rtrange <- range(rtrange) -## scanrange <- range(which((scantime >= rtrange[1]) & -## (scantime <= rtrange[2]))) -## } -## if (length(scanrange) < 2) -## scanrange <- c(1, length(valsPerSpect)) -## else scanrange <- range(scanrange) -## if (scanrange[1] == 1) -## startidx <- 1 -## else -## startidx <- sum(valsPerSpect[1:(scanrange[1]-1)]) + 1 -## endidx <- sum(valsPerSpect[1:scanrange[2]]) -## scans <- rep(scanrange[1]:scanrange[2], -## valsPerSpect[scanrange[1]:scanrange[2]]) -## masses <- mz[startidx:endidx] -## massidx <- 1:length(masses) -## if (length(mzrange) >= 2) { -## mzrange <- range(mzrange) -## massidx <- massidx[(masses >= mzrange[1] & (masses <= mzrange[2]))] -## } -## int <- int[startidx:endidx][massidx] -## if (log && (length(int) > 0)) -## int <- log(int + max(1 - min(int), 0)) -## cbind(time = scantime[scans[massidx]], -## mz = masses[massidx], -## intensity = int) -## } - ############################################################ ## plotRaw diff --git a/data/faahko_sub2.RData b/data/faahko_sub2.RData index bbd8dfa5e..c58a21c5d 100644 Binary files a/data/faahko_sub2.RData and b/data/faahko_sub2.RData differ diff --git a/data/xmse.RData b/data/xmse.RData index cdfd3125a..1e9a8b600 100644 Binary files a/data/xmse.RData and b/data/xmse.RData differ diff --git a/inst/NEWS b/inst/NEWS index 5203e9f9b..5e31d4adc 100644 --- a/inst/NEWS +++ b/inst/NEWS @@ -1,3 +1,19 @@ +Changes in version 4.1.4 +---------------------- + +- Rename variable `data` in the vignette to `faahko`. +- Fix issue in adjustRtime resulting in corrupt processHistory. +- Add support to perform peakGroups alignment using pre-defined anchor peak + matrix (i.e., the numeric matrix with retention times of anchor peaks in + the samples that can be used to align these samples). +- Fix errors related to invalid `Chromatogram` objects extracted from xcms + results: ensure MS level in `chromPeaksMatrix` is `integer`. +- Fix definition of anchor peaks for peakGroups alignment with subset + (issue #702). +- Add `filterMsLevel` method for `MsExperiment` and `XcmsExperiment`. +- Ensure chunk-wise processing of Spectra (introduced with version 1.13.2) is + disabled when xcms is using its own chunk-wise processing. + Changes in version 4.1.3 ---------------------- diff --git a/man/XcmsExperiment.Rd b/man/XcmsExperiment.Rd index fe1d1ecb3..cd70426a6 100644 --- a/man/XcmsExperiment.Rd +++ b/man/XcmsExperiment.Rd @@ -7,6 +7,7 @@ \alias{filterRt,MsExperiment-method} \alias{filterMzRange,MsExperiment-method} \alias{filterMz,MsExperiment-method} +\alias{filterMsLevel,MsExperiment-method} \alias{uniqueMsLevels,MsExperiment-method} \alias{filterFile,MsExperiment-method} \alias{rtime,MsExperiment-method} @@ -25,6 +26,7 @@ \alias{filterIsolationWindow,XcmsExperiment-method} \alias{filterRt,XcmsExperiment-method} \alias{filterMzRange,XcmsExperiment-method} +\alias{filterMsLevel,XcmsExperiment-method} \alias{hasChromPeaks,XcmsExperiment-method} \alias{dropChromPeaks,XcmsExperiment-method} \alias{chromPeaks<-,XcmsExperiment-method} @@ -58,6 +60,8 @@ filterFeatureDefinitions(object, ...) \S4method{filterMz}{MsExperiment}(object, mz = numeric(), msLevel. = uniqueMsLevels(object)) +\S4method{filterMsLevel}{MsExperiment}(object, msLevel. = uniqueMsLevels(object)) + \S4method{uniqueMsLevels}{MsExperiment}(object) \S4method{filterFile}{MsExperiment}(object, file = integer(), ...) @@ -104,6 +108,8 @@ featureArea( \S4method{filterMzRange}{XcmsExperiment}(object, mz = numeric(), msLevel. = uniqueMsLevels(object)) +\S4method{filterMsLevel}{XcmsExperiment}(object, msLevel. = uniqueMsLevels(object)) + \S4method{hasChromPeaks}{XcmsExperiment}(object, msLevel = integer()) \S4method{dropChromPeaks}{XcmsExperiment}(object, keepAdjustedRtime = FALSE) @@ -229,7 +235,9 @@ For \code{chromatogram}: \code{integer} with the MS level from which the chromatogram(s) should be extracted. Has to be either of length 1 or length equal to the numer of rows of the parameters \code{mz} and \code{rt} defining the m/z and rt regions from which the chromatograms should -be created. Defaults to \code{msLevel = 1L}.} +be created. Defaults to \code{msLevel = 1L}. +for \code{filterMsLevel}: \code{integer} defining the MS level(s) to which the +data should be subset.} \item{file}{For \code{filterFile}: \code{integer} with the indices of the samples (files) to which the data should be subsetted.} @@ -462,6 +470,8 @@ all chromatographic peaks (and subsequently also features) are removed for which the range of their \code{"isolationWindowLowerMz"} and \code{"isolationWindowUpperMz"} (columns in \code{chromPeakData}) do not contain the user provided \code{mz}. +\item \code{filterMsLevel}: filter the data of the \code{XcmsExperiment} or \code{MsExperiment} +to keep only data of the MS level(s) specified with parameter \code{msLevel.}. \item \code{filterMz}, \code{filterMzRange}: filter the spectra within an \code{XcmsExperiment} or \code{MsExperiment} to the specified m/z range (parameter \code{mz}). For \code{XcmsExperiment} also identified chromatographic peaks and diff --git a/man/adjustRtime.Rd b/man/adjustRtime.Rd index 2f0fb9e32..70b8182a0 100644 --- a/man/adjustRtime.Rd +++ b/man/adjustRtime.Rd @@ -237,14 +237,14 @@ See \code{\link[=bpparam]{bpparam()}} for details.} which the alignment should be performed.} \item{minFraction}{For \code{PeakGroupsParam}: \code{numeric(1)} between 0 and 1 -defining the minimum required fraction of samples in which peaks for +defining the minimum required proportion of samples in which peaks for the peak group were identified. Peak groups passing this criteria will be aligned across samples and retention times of individual spectra will be adjusted based on this alignment. For \code{minFraction = 1} the peak group has to contain peaks in all samples of the experiment. Note that if \code{subset} is provided, the specified fraction is relative to the defined subset of samples and not to the total number of samples within -the experiment (i.e. a peak has to be present in the specified +the experiment (i.e., a peak has to be present in the specified proportion of subset samples).} \item{extraPeaks}{For \code{PeakGroupsParam}: \code{numeric(1)} defining the maximal @@ -353,7 +353,9 @@ on MS level 1 data. Retention times of spectra from other MS levels, if present, are subsequently adjusted based on the adjusted retention times of the MS1 spectra. Note that calling \code{adjustRtime} on a \emph{xcms} result object will remove any eventually present previous alignment results as well as -any correspondence analysis results. +any correspondence analysis results. To run a second round of alignment, +raw retention times need to be replaced with adjusted ones using the +\code{\link[=applyAdjustedRtime]{applyAdjustedRtime()}} function. The alignment method can be specified (and configured) using a dedicated \code{param} argument. diff --git a/man/do_adjustRtime_peakGroups.Rd b/man/do_adjustRtime_peakGroups.Rd index aeddb102b..28e4297c2 100644 --- a/man/do_adjustRtime_peakGroups.Rd +++ b/man/do_adjustRtime_peakGroups.Rd @@ -8,7 +8,7 @@ found in most samples} do_adjustRtime_peakGroups( peaks, peakIndex, - rtime, + rtime = list(), minFraction = 0.9, extraPeaks = 1, smooth = c("loess", "linear"), @@ -30,14 +30,14 @@ information of the chromatographic peaks (across and within samples).} times per file/sample.} \item{minFraction}{For \code{PeakGroupsParam}: \code{numeric(1)} between 0 and 1 -defining the minimum required fraction of samples in which peaks for +defining the minimum required proportion of samples in which peaks for the peak group were identified. Peak groups passing this criteria will be aligned across samples and retention times of individual spectra will be adjusted based on this alignment. For \code{minFraction = 1} the peak group has to contain peaks in all samples of the experiment. Note that if \code{subset} is provided, the specified fraction is relative to the defined subset of samples and not to the total number of samples within -the experiment (i.e. a peak has to be present in the specified +the experiment (i.e., a peak has to be present in the specified proportion of subset samples).} \item{extraPeaks}{For \code{PeakGroupsParam}: \code{numeric(1)} defining the maximal diff --git a/src/binners.c b/src/binners.c index cfa194d86..f97136310 100644 --- a/src/binners.c +++ b/src/binners.c @@ -204,7 +204,7 @@ SEXP binYonX(SEXP x, SEXP y, SEXP breaks, SEXP nBins, SEXP binSize, SET_VECTOR_ELT(ans_list, current_idx, index); } setAttrib(ans_list, R_NamesSymbol, names); - + UNPROTECT(count_protect); return ans_list; } @@ -880,7 +880,7 @@ SEXP test_integer(SEXP x) { SEXP test_real(SEXP x) { int x_val = asReal(x); - Rprintf("input asReal(x): %f\n", x_val); + Rprintf("input asReal(x): %d\n", x_val); // double *p_ans; @@ -892,5 +892,3 @@ SEXP test_real(SEXP x) { p_ans[0] = x_val; return ans; } - - diff --git a/src/fastMatch.c b/src/fastMatch.c index e58e2ff81..cec75b2fa 100644 --- a/src/fastMatch.c +++ b/src/fastMatch.c @@ -29,7 +29,7 @@ SEXP fastMatch(SEXP x, SEXP y, SEXP xidx, SEXP yidx, SEXP xolength, SEXP tol) { struct idxStruct * pidxS = (struct idxStruct *) calloc(nx, sizeof(struct idxStruct)); if (pidxS == NULL) - error("fastMatch/calloc: memory could not be allocated ! (%d bytes)\n", nx * sizeof(struct idxStruct) ); + error("fastMatch/calloc: memory could not be allocated ! (%llu bytes)\n", nx * sizeof(struct idxStruct) ); for (xi=0;xi < nx;xi++) pidxS[xi].from = ny+1; diff --git a/src/mzROI.c b/src/mzROI.c index 045bc0c94..a308ae142 100644 --- a/src/mzROI.c +++ b/src/mzROI.c @@ -76,7 +76,7 @@ struct mzROIStruct * checkmzROIBufSize(struct mzROIStruct *mzROI, const unsigned mzROI = (struct mzROIStruct *) realloc(mzROI, newLength * sizeof(struct mzROIStruct)); if (mzROI == NULL) - error("findmzROI/realloc: buffer memory could not be allocated ! (%d bytes)\n", newLength * sizeof(struct mzROIStruct) ); + error("findmzROI/realloc: buffer memory could not be allocated ! (%llu bytes)\n", newLength * sizeof(struct mzROIStruct) ); mzLength->mzROITotal = newLength; } @@ -99,7 +99,7 @@ struct mzROIStruct * checkmzvalBufSize(struct mzROIStruct *mzval, const unsigned mzval = (struct mzROIStruct *) realloc(mzval, newLength * sizeof(struct mzROIStruct)); if (mzval == NULL) - error("findmzROI/realloc: buffer memory could not be allocated ! (%d bytes)\n", newLength * sizeof(struct mzROIStruct)); + error("findmzROI/realloc: buffer memory could not be allocated ! (%llu bytes)\n", newLength * sizeof(struct mzROIStruct)); mzLength->mzvalTotal = newLength; } @@ -328,7 +328,7 @@ int i,p,del=0; p=0; struct mzROIStruct * tmp = (struct mzROIStruct *) calloc(mzLength->mzval - del, sizeof(struct mzROIStruct)); if (tmp == NULL) - error("findmzROI/cleanup: buffer memory could not be allocated ! (%d bytes)\n", (mzLength->mzval - del) * sizeof(struct mzROIStruct)); + error("findmzROI/cleanup: buffer memory could not be allocated ! (%llu bytes)\n", (mzLength->mzval - del) * sizeof(struct mzROIStruct)); for (i=0; i < mzLength->mzval; i++) { if (mzval[i].deleteMe == FALSE) { tmp[p].mz = mzval[i].mz; @@ -379,7 +379,7 @@ struct scanBuf * getScan(int scan, double *pmz, double *pintensity, int *pscanin scanbuf->thisScan= (struct scanStruct *) calloc(N, sizeof(struct scanStruct)); // scanbuf->thisScan= (struct scanStruct *) malloc(N * sizeof(struct scanStruct)); if (scanbuf->thisScan == NULL) - error("findmzROI/getThisScan: Memory could not be allocated (%d * %d) !\n",N , sizeof(struct scanStruct)); + error("findmzROI/getThisScan: Memory could not be allocated!\n"); scanbuf->thisScanLength=N; @@ -411,7 +411,7 @@ struct scanBuf * getScan(int scan, double *pmz, double *pintensity, int *pscanin if (N > 0) { scanbuf->nextScan= (double *) calloc(N, sizeof(double)); if (scanbuf->nextScan == NULL) - error("findmzROI/getNextScan: Memory could not be allocated (%d * %d) !\n",N , sizeof(struct scanStruct)); + error("findmzROI/getNextScan: Memory could not be allocated!\n"); scanbuf->nextScanLength=N; for (idx=idx1;idx <= idx2; idx++) @@ -626,11 +626,11 @@ SEXP findmzROI(SEXP mz, SEXP intensity, SEXP scanindex, SEXP mzrange, struct mzROIStruct * mzROI = (struct mzROIStruct *) calloc(ROI_INIT_LENGTH, sizeof(struct mzROIStruct)); if (mzROI == NULL) - error("findmzROI/calloc: buffer memory could not be allocated ! (%d bytes)\n",ROI_INIT_LENGTH * sizeof(struct mzROIStruct) ); + error("findmzROI/calloc: buffer memory could not be allocated ! (%llu bytes)\n",ROI_INIT_LENGTH * sizeof(struct mzROIStruct) ); struct mzROIStruct * mzval = (struct mzROIStruct *) calloc(MZVAL_INIT_LENGTH, sizeof(struct mzROIStruct)); if (mzval == NULL) - error("findmzROI/calloc: buffer memory could not be allocated ! (%d bytes)\n",MZVAL_INIT_LENGTH * sizeof(struct mzROIStruct) ); + error("findmzROI/calloc: buffer memory could not be allocated ! (%llu bytes)\n",MZVAL_INIT_LENGTH * sizeof(struct mzROIStruct) ); mzLength.mzvalTotal = MZVAL_INIT_LENGTH; mzLength.mzROITotal = ROI_INIT_LENGTH; diff --git a/tests/testthat.R b/tests/testthat.R index 976c81946..f09b7c9c0 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -16,10 +16,9 @@ faahko_3_files <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), system.file('cdf/KO/ko16.CDF', package = "faahKO"), system.file('cdf/KO/ko18.CDF', package = "faahKO")) +cwp <- CentWaveParam(noise = 10000, snthresh = 40, prefilter = c(3, 10000)) faahko_od <- readMSData(faahko_3_files, mode = "onDisk") -faahko_xod <- findChromPeaks( - faahko_od, param = CentWaveParam(noise = 10000, snthresh = 40, - prefilter = c(3, 10000))) +faahko_xod <- findChromPeaks(faahko_od, param = cwp) od_x <- faahko_od mzr <- matrix(c(335, 335, 344, 344), ncol = 2, byrow = TRUE) od_chrs <- chromatogram(od_x, mz = mzr) @@ -50,14 +49,14 @@ fticr_xod <- findChromPeaks(fticr, MSWParam(scales = c(1, 7), ## Pesticide data fl <- system.file("TripleTOF-SWATH", "PestMix1_SWATH.mzML", package = "msdata") pest_swth <- readMSData(fl, mode = "onDisk") -cwp <- CentWaveParam(snthresh = 5, noise = 100, ppm = 10, - peakwidth = c(3, 20), prefilter = c(3, 1000)) -pest_swth <- findChromPeaks(pest_swth, param = cwp) -pest_swth <- findChromPeaksIsolationWindow(pest_swth, param = cwp) +cwp2 <- CentWaveParam(snthresh = 5, noise = 100, ppm = 10, + peakwidth = c(3, 20), prefilter = c(3, 1000)) +pest_swth <- findChromPeaks(pest_swth, param = cwp2) +pest_swth <- findChromPeaksIsolationWindow(pest_swth, param = cwp2) fl <- system.file("TripleTOF-SWATH", "PestMix1_DDA.mzML", package = "msdata") pest_dda <- readMSData(fl, mode = "onDisk") -pest_dda <- findChromPeaks(pest_dda, param = cwp) +pest_dda <- findChromPeaks(pest_dda, param = cwp2) ## Sciex test data. ## fl <- dir(system.file("sciex", package = "msdata"), full.names = TRUE) @@ -70,11 +69,11 @@ df <- data.frame(mzML_file = basename(fls), dataOrigin = fls, sample = c("ko15", "ko16", "ko18")) mse <- readMsExperiment(spectraFiles = fls, sampleData = df) -p <- CentWaveParam(noise = 10000, snthresh = 40, prefilter = c(3, 10000)) -xmse <- findChromPeaks(mse, param = p) +xmse <- findChromPeaks(mse, param = cwp) +expect_true(length(processHistory(xmse)) == 1L) pdp <- PeakDensityParam(sampleGroups = rep(1, 3)) xmseg <- groupChromPeaks(xmse, param = pdp, add = FALSE) - +expect_true(length(processHistory(xmseg)) == 2L) test_check("xcms") bpstop(prm) diff --git a/tests/testthat/test_Chromatogram.R b/tests/testthat/test_Chromatogram.R index 3f81a68fd..be177a511 100644 --- a/tests/testthat/test_Chromatogram.R +++ b/tests/testthat/test_Chromatogram.R @@ -1,11 +1,3 @@ -## test_that("extractChromatograms is deprecated", { - skip_on_os(os = "windows", arch = "i386") - -## expect_warning(chrs <- extractChromatograms( -## filterRt(filterFile(od_x, file = 2), c(2500, 2600)))) -## expect_warning(plotChromatogram(chrs)) -## }) - test_that("chromatogram works", { skip_on_os(os = "windows", arch = "i386") diff --git a/tests/testthat/test_MsExperiment.R b/tests/testthat/test_MsExperiment.R index 4e122b4d1..4bef62211 100644 --- a/tests/testthat/test_MsExperiment.R +++ b/tests/testthat/test_MsExperiment.R @@ -16,6 +16,18 @@ test_that("filterRt,MsExperiment works", { expect_equal(rtime(spectra(res)), rtime(spectra(mse))) }) +test_that("filterMsLevel,MsExperiment works", { + res <- filterMsLevel(mse, msLevel = 1) + expect_equal(rtime(res), rtime(mse)) + + res <- filterMsLevel(mse, msLevel = integer()) + expect_equal(rtime(res), rtime(mse)) + + res <- filterMsLevel(mse, msLevel = 2L) + expect_equal(rtime(res), numeric()) + expect_equal(length(res), 3L) +}) + test_that("filterFile,MsExperiment works", { res <- filterFile(mse) expect_s4_class(res, "MsExperiment") diff --git a/tests/testthat/test_XcmsExperiment.R b/tests/testthat/test_XcmsExperiment.R index 077a361e4..09569ba84 100644 --- a/tests/testthat/test_XcmsExperiment.R +++ b/tests/testthat/test_XcmsExperiment.R @@ -30,16 +30,17 @@ test_that("XcmsExperiment validation works", { ## - chromPeaks ## - chromPeakData test_that("findChromPeaks,MsExperiment et al works", { - expect_error(findChromPeaks(MsExperiment(), param = p), "No spectra") + expect_error(findChromPeaks(MsExperiment(), param = cwp), "No spectra") a <- MsExperiment() spectra(a) <- spectra(mse) - expect_error(findChromPeaks(a, param = p), "No link") + expect_error(findChromPeaks(a, param = cwp), "No link") res <- xmse expect_equal(res@chromPeaks, chromPeaks(faahko_xod)) expect_equal(res@chromPeakData, as.data.frame(chromPeakData(faahko_xod))) expect_true(hasChromPeaks(res)) + expect_true(length(processHistory(res)) == 1L) ## chromPeaks expect_equal(chromPeaks(res), res@chromPeaks) @@ -58,6 +59,7 @@ test_that("findChromPeaks,MsExperiment et al works", { expect_true(is.data.frame(chromPeakData(xmse, return.type = "data.frame"))) expect_s4_class(chromPeakData(xmse), "DataFrame") expect_true(nrow(chromPeakData(xmse, 2:3)) == 0) + expect_true(is.integer(chromPeakData(res)$ms_level)) ## dropChromPeaks rres <- dropChromPeaks(res) @@ -65,21 +67,24 @@ test_that("findChromPeaks,MsExperiment et al works", { expect_true(nrow(rres@chromPeaks) == 0) expect_false(hasChromPeaks(rres)) - res2 <- findChromPeaks(mse, param = p, msLevel = 2L) + res2 <- findChromPeaks(mse, param = cwp, msLevel = 2L) expect_true(nrow(res2@chromPeaks) == 0) expect_false(hasChromPeaks(res2)) - res2 <- findChromPeaks(res, param = p, msLevel = 2L, add = TRUE) + res2 <- findChromPeaks(res, param = cwp, msLevel = 2L, add = TRUE) expect_equal(res@chromPeaks, res2@chromPeaks) expect_equal(res@chromPeakData, res2@chromPeakData) expect_true(length(res2@processHistory) == 2) + expect_true(is.integer(chromPeakData(res2)$ms_level)) - res2 <- findChromPeaks(res, param = p, msLevel = 2L, add = FALSE) + res2 <- findChromPeaks(res, param = cwp, msLevel = 2L, add = FALSE) expect_equal(nrow(res2@chromPeaks), 0) expect_true(length(res2@processHistory) == 1) + expect_true(is.integer(chromPeakData(res2)$ms_level)) - res2 <- findChromPeaks(mse, param = p, chunkSize = -1) + res2 <- findChromPeaks(mse, param = cwp, chunkSize = -1) expect_equal(res@chromPeaks, res2@chromPeaks) + expect_true(is.integer(chromPeakData(res2)$ms_level)) expect_true(hasChromPeaks(res)) expect_true(hasChromPeaks(res, msLevel = 1L)) @@ -138,6 +143,28 @@ test_that("subsetting,XcmsExperiment works", { expect_true(hasChromPeaks(res)) expect_true(hasFeatures(res)) expect_equal(featureValues(res), featureValues(xmseg)[, c(3, 1)]) + + ## Duplicating + expect_error(xmse[c(2, 1, 2)], "Duplicated") + + ## there and back again + sampleData(xmse)$original_index <- seq_along(xmse) + res <- xmse[c(3, 1, 2)] + res <- res[order(sampleData(res)$original_index)] + expect_equal(sampleData(res)$original_index, + sampleData(xmse)$original_index) + expect_equal(sampleData(res), sampleData(xmse)) + expect_equal(chromPeaks(res), chromPeaks(xmse)) + expect_equal(res@spectra, xmse@spectra) + + sampleData(xmseg)$original_index <- seq_along(xmseg) + res <- xmseg[c(3, 1, 2), keepFeatures = TRUE] + res <- res[order(sampleData(res)$original_index), keepFeatures = TRUE] + expect_equal(sampleData(res)$original_index, + sampleData(xmseg)$original_index) + expect_equal(sampleData(res), sampleData(xmseg)) + expect_equal(chromPeaks(res), chromPeaks(xmseg)) + expect_equal(featureDefinitions(res), featureDefinitions(xmseg)) }) test_that("filterRt,XcmsExperiment works", { @@ -172,6 +199,16 @@ test_that("filterRt,XcmsExperiment works", { seq_len(nrow(chromPeaks(res)))) }) +test_that("filterMsLevel,XcmsExperiment works", { + res <- filterMsLevel(xmse, c(1L, 2L)) + expect_equal(rtime(res), rtime(xmse)) + expect_equal(chromPeaks(res), chromPeaks(xmse)) + + res <- filterMsLevel(xmse, msLevel = 2L) + expect_equal(rtime(res), numeric()) + expect_true(nrow(chromPeaks(res)) == 0L) +}) + test_that("filterFile,XcmsExperiment works", { res <- filterFile(xmse) expect_s4_class(res, "XcmsExperiment") @@ -263,7 +300,7 @@ test_that("adjustRtime,MsExperiment,XcmsExperiment,ObiwarpParam works", { expect_equal(rtime(res3, adjusted = TRUE), rtime(res2, adjusted = TRUE)) ## Order: alignment, peak detection. - res3 <- findChromPeaks(res, param = p) + res3 <- findChromPeaks(res, param = cwp) expect_true(hasChromPeaks(res3)) expect_true(hasAdjustedRtime(res3)) expect_true(length(res3@processHistory) == 2L) @@ -311,9 +348,9 @@ test_that(".empty_feature_definitions works", { ## That's from XcmsExperiment-functions.R test_that(".xmse_group_cpeaks works", { - expect_error(.xmse_group_cpeaks(chromPeaks(xmse), p), "No correspondence") - ## Just for PeakDensityParam. pdp <- PeakDensityParam(sampleGroups = rep(1, 3)) + expect_error(.xmse_group_cpeaks(chromPeaks(xmse), cwp), "No correspondence") + ## Just for PeakDensityParam. cp <- chromPeaks(xmse, msLevel = 1L) res <- .xmse_group_cpeaks(cp, pdp) expect_true(is.data.frame(res)) @@ -415,6 +452,7 @@ test_that("groupChromPeaks,XcmsExperiment and related things work", { test_that("adjustRtime,MsExperiment,PeakGroupsParam works", { a <- groupChromPeaks(xmse, param = PeakDensityParam( sampleGroups = c(1, 1, 1))) + expect_true(length(processHistory(a)) == 2L) pgp <- PeakGroupsParam(span = 0.4) expect_false(hasAdjustedRtime(a)) @@ -425,6 +463,41 @@ test_that("adjustRtime,MsExperiment,PeakGroupsParam works", { expect_false(hasFeatures(res)) expect_equal(unname(rtime(xod_xgr)), unname(rtime(res))) expect_true(length(res@processHistory) == 3L) + expect_true(sum(rtime(res) != rtime(a)) > 1000) + expect_error(adjustRtime(res, param = pgp), "applyAdjustedRtime") + + ## Run with pre-defined anchor peak data + p <- res@processHistory[[3]]@param + res_2 <- adjustRtime(xmse, param = p) + expect_true(length(processHistory(res_2)) == + (length(processHistory(xmse)) + 1)) + expect_true(hasAdjustedRtime(res_2)) + expect_equal(rtime(res), rtime(res_2)) + res_2 <- adjustRtime(mse, param = p) + expect_true(hasAdjustedRtime(res_2)) + expect_equal(rtime(res), rtime(res_2)) + expect_true(length(processHistory(res_2)) == 1L) + + ## Subset-based + p <- PeakGroupsParam(span = 0.4, subset = c(1, 3)) + res_2 <- adjustRtime(a, p) + expect_true(hasAdjustedRtime(res_2)) + expect_false(hasFeatures(res_2)) + expect_true(length(res@processHistory) == 3L) + expect_true(sum(rtime(res_2) != rtime(a)) > 1000) + expect_true(sum(rtime(res_2) != rtime(res)) > 1000) + + ## Run with pre-defined anchor peak data + p <- res_2@processHistory[[3]]@param + res_3 <- adjustRtime(xmse, param = p) + expect_true(hasAdjustedRtime(res_3)) + expect_equal(rtime(res_2), rtime(res_3)) + expect_true(length(processHistory(res_3)) == + (length(processHistory(xmse)) + 1L)) + res_3 <- adjustRtime(mse, param = p) + expect_true(hasAdjustedRtime(res_3)) + expect_equal(rtime(res_2), rtime(res_3)) + expect_true(length(processHistory(res_3)) == 1L) }) test_that("findChromPeaks,XcmsExperiment,MatchedFilterParam works", { @@ -731,6 +804,9 @@ test_that(".chrom_peak_intensity_centWave works", { ## expect_equal(res[, "rt"], unname(pks[, "rt"])) # that is different. expect_equal(unname(res[, "into"]), unname(pks[, "into"])) expect_equal(unname(res[, "maxo"]), unname(pks[, "maxo"])) + + ## One example with missing values within the range: + ## pks[11, ]. }) ## That's from XcmsExperiment-functions.R diff --git a/tests/testthat/test_do_adjustRtime-functions.R b/tests/testthat/test_do_adjustRtime-functions.R index bf24cad14..e8a3e6b02 100644 --- a/tests/testthat/test_do_adjustRtime-functions.R +++ b/tests/testthat/test_do_adjustRtime-functions.R @@ -37,16 +37,6 @@ test_that("do_adjustRtime_peakGroups works", { rtime = rtime(xsg, bySample = TRUE), minFraction = minFr, subset = c(1, 2, 5, 14)), "out of range") - res <- do_adjustRtime_peakGroups( - peaks = chromPeaks(xsg), peakIndex = featureDefinitions(xsg)$peakidx, - rtime = rtime(xsg, bySample = TRUE), minFraction = minFr) - res_orig <- do_adjustRtime_peakGroups_orig( - peaks = chromPeaks(xsg), - peakIndex = featureDefinitions(xsg)$peakidx, - rtime = rtime(xsg, bySample = TRUE), - minFraction = minFr) - expect_equal(res, res_orig) - expect_true(sum(unlist(res) != rtime(xsg)) > 3000) ## Use only a subset. res_sub <- do_adjustRtime_peakGroups( peaks = chromPeaks(xsg), peakIndex = featureDefinitions(xsg)$peakidx, @@ -183,3 +173,95 @@ test_that("adjustRtimeSubset works", { points(res[[2]], b, type = "l", col = "#00ff0060", lty = 1) points(res[[3]], c, type = "l", col = "#0000ff40", lty = 2) }) + +test_that(".adjustRtime_peakGroupsMatrix works", { + ## Expect the same results by running just this function with a pre-defined + ## peakGroupsMatrix. + ph <- processHistory(xod_xgr) + pgm <- peakGroupsMatrix(ph[[length(ph)]]@param) + rts <- split(rtime(xod_xg), fromFile(xod_xg)) + + ## Errors + expect_error(.adjustRtime_peakGroupsMatrix(rts), "peakGroupsMatrix") + expect_error(.adjustRtime_peakGroupsMatrix(peakGroupsMatrix = pgm), "rtime") + expect_error(.adjustRtime_peakGroupsMatrix(rts[[1L]], pgm), "list of") + expect_error(.adjustRtime_peakGroupsMatrix(list(1:3, "a"), pgm), "numeric") + expect_error(.adjustRtime_peakGroupsMatrix(rts, pgm, subset = "a"), + "integer") + expect_error(.adjustRtime_peakGroupsMatrix(rts, pgm, subset = 1), "is 2") + expect_error(.adjustRtime_peakGroupsMatrix(rts, pgm, subset = c(1, 10)), + "out of range") + expect_error(.adjustRtime_peakGroupsMatrix(rts[1:2], pgm), "columns of") + + ## Results are identical + res <- .adjustRtime_peakGroupsMatrix(rts, pgm, span = 0.4, smooth = "loess") + expect_equal(res, split(rtime(xod_xgr), fromFile(xod_xgr))) + + ## Works with subset. + tmp <- adjustRtime(xod_xg, PeakGroupsParam(subset = c(1, 3), span = 0.4)) + ph <- processHistory(tmp) + pgm <- peakGroupsMatrix(ph[[length(ph)]]@param) + + res <- .adjustRtime_peakGroupsMatrix(rts, pgm, span = 0.4, + smooth = "loess", subset = c(1, 3)) + expect_equal(res, split(rtime(tmp), fromFile(tmp))) + +}) + +test_that(".define_peak_groups works", { + expect_error(.define_peak_groups(), "peaks") + expect_error(.define_peak_groups( + chromPeaks(xod_xgrg)[1:10, ], featureDefinitions(xod_xgrg)$peakidx), + "outside") + expect_error(.define_peak_groups( + chromPeaks(xod_xgrg), featureDefinitions(xod_xgrg)$peakidx, + subset = "a"), "integer") + expect_error(.define_peak_groups( + chromPeaks(xod_xgrg), featureDefinitions(xod_xgrg)$peakidx, + subset = c(1, 10), total_samples = 3), "out of range") + expect_error(.define_peak_groups( + chromPeaks(xod_xgrg), featureDefinitions(xod_xgrg)$peakidx, + subset = c(1), total_samples = 3), "is 2") + res <- .define_peak_groups(chromPeaks( + xod_xg), featureDefinitions(xod_xg)$peakidx, subset = integer(), + total_samples = 3) + expect_true(is.matrix(res)) + expect_true(ncol(res) == 3) + expect_true(is.numeric(res)) + + ## Compare with results from full run. + ph <- processHistory(xod_xgr) + pgm <- peakGroupsMatrix(ph[[length(ph)]]@param) + expect_equal(unname(res), unname(pgm)) + + ## Run with subset. + res <- .define_peak_groups(chromPeaks( + xod_xg), featureDefinitions(xod_xg)$peakidx, subset = c(1, 3), + total_samples = 3, minFraction = 0.9, extraPeaks = 1) + expect_true(is.matrix(res)) + expect_true(ncol(res) == 2) + expect_true(is.numeric(res)) + expect_true(!any(is.na(res))) + + pgm <- adjustRtimePeakGroups( + xod_xg, param = PeakGroupsParam(span = 0.4, subset = c(1, 3), + minFraction = 0.9, extraPeaks = 1)) + expect_equal(unname(res), unname(pgm)) + + res <- xcms:::.define_peak_groups(chromPeaks( + xod_xg), featureDefinitions(xod_xg)$peakidx, subset = c(1, 3), + total_samples = 3, minFraction = 0.5, extraPeaks = 1) + expect_true(is.matrix(res)) + expect_true(ncol(res) == 2) + expect_true(is.numeric(res)) + expect_true(any(is.na(res))) +}) + +test_that(".getPeakGroupsRtMatrix works with subsets", { + res <- .getPeakGroupsRtMatrix(chromPeaks(xod_xg), + featureDefinitions(xod_xg)$peakidx, + sampleIndex = c(1, 3), + missingSample = 0, + extraPeaks = 1L) + expect_true(!any(is.na(res))) +}) diff --git a/tests/testthat/test_functions-utils.R b/tests/testthat/test_functions-utils.R index b19111260..d0c18dc64 100644 --- a/tests/testthat/test_functions-utils.R +++ b/tests/testthat/test_functions-utils.R @@ -456,3 +456,52 @@ test_that(".chromatograms_for_peaks works", { expect_equal(lapply(ref, intensity), lapply(res[idx], intensity)) expect_equal(lapply(ref, rtime), lapply(res[idx], rtime)) }) + +test_that(".rawMat, .getEIC etc", { + mzr <- c(532.2000, 532.20003) + rtr <- c(2855.057, 2883.226) + + ## Testing rawMat + s <- spectra(xmse[2L]) + mz <- unlist(mz(s)) + int <- unlist(intensity(s)) + scantime <- rtime(s) + valsPerSpect <- lengths(s) + mzrange <- mzr + rtrange <- rtr + + res <- .rawMat(mz = mz, int, scantime, valsPerSpect, mzrange, + rtrange) + expect_true(is.matrix(res)) + expect_true(ncol(res) == 3) + ## That is actually an issue. .rawMat skips scans/retention times if + ## no peak was recorded. Thus is introduces gaps in the data. + expect_true(!any(res[, "intensity"] == 0)) + + ## Compare to getEIC + scns <- which((scantime >= rtrange[1]) & (scantime <= rtrange[2])) + scanindex <- valueCount2ScanIndex(valsPerSpect) + sr <- range(scns) - 1 + res2 <- .Call("getEIC", mz, int, scanindex, mzrange, as.integer(sr), + as.integer(length(scanindex)), PACKAGE = "xcms") + expect_true(is.list(res2)) + expect_true(any(res2$intensity == 0)) + + res3 <- .getEIC(mz, int, scantime, valsPerSpect, mzrange = mzrange, + rtrange = rtr) + expect_true(is.matrix(res3)) + expect_true(ncol(res3) == 2) + expect_true(any(res3[, "intensity"] == 0)) +}) + +test_that(".match_last works", { + a <- c("a", "b", "c", "a", "b") + res <- .match_last("a", a) + expect_equal(res, 4) + + res <- .match_last("d", a) + expect_equal(res, NA_integer_) + + res <- .match_last(c("c", "a", "d"), a) + expect_equal(res, c(3L, 4L, NA_integer_)) +}) diff --git a/tests/testthat/test_methods-XCMSnExp.R b/tests/testthat/test_methods-XCMSnExp.R index 30b2e021b..7e7d65b9f 100644 --- a/tests/testthat/test_methods-XCMSnExp.R +++ b/tests/testthat/test_methods-XCMSnExp.R @@ -1622,8 +1622,8 @@ test_that("adjustRtime,peakGroups works", { skip_on_os(os = "windows", arch = "i386") xod <- faahko_xod - xodg <- groupChromPeaks(xod, - param = PeakDensityParam(sampleGroups = rep(1, 3))) + xodg <- groupChromPeaks( + xod, param = PeakDensityParam(sampleGroups = rep(1, 3))) pks <- chromPeaks(xodg) expect_true(length(processHistory(xodg, type = .PROCSTEP.PEAK.DETECTION)) == 1) @@ -1709,7 +1709,7 @@ test_that("adjustRtime,peakGroups works", { rtime(xodg, bySample = TRUE)[[2]])) expect_true(all(rtime(res_sub, bySample = TRUE)[[3]] != rtime(xodg, bySample = TRUE)[[3]])) - expect_true(all(rtime(res_sub, bySample = TRUE)[[1]] != + expect_true(any(rtime(res_sub, bySample = TRUE)[[1]] != rtime(res_sub, bySample = TRUE)[[2]])) tmp <- adjustRtime(xodg, param = PeakGroupsParam()) diff --git a/vignettes/xcms.Rmd b/vignettes/xcms.Rmd index b85e832e8..6cb66d025 100644 --- a/vignettes/xcms.Rmd +++ b/vignettes/xcms.Rmd @@ -113,12 +113,12 @@ We next load our data using the `readMsExperiment` function from the `r Biocpkg("MsExperiment")` package. ```{r} -data <- readMsExperiment(spectraFiles = cdfs, sampleData = pd) -data +faahko <- readMsExperiment(spectraFiles = cdfs, sampleData = pd) +faahko ``` The MS spectra data from our experiment is now available as a `Spectra` object -within `data`. Note that this `MsExperiment` container could in addition to +within `faahko`. Note that this `MsExperiment` container could in addition to spectra data also contain other types of data or also references to other files. See the vignette from the `r Biocpkg("MsExperiment")` for more details. Also, when loading data from mzML, mzXML or CDF files, by default only @@ -129,7 +129,7 @@ files when needed (this is similar to the *MSnbase* *on-disk* mode described in hence allowing to analyze also large experiments without the need of high performance computing environments. Note that also different alternative *backends* (and hence data representations) could be used for the `Spectra` -object within `data` with eventually even lower memory footprint, or higher +object within `faahko` with eventually even lower memory footprint, or higher performance. See the package vignette from the `r Biocpkg("Spectra")` package or the [SpectraTutorials](https://jorainer.github.io/SpectraTutorials) tutorial for more details on `Spectra` backends and how to change between them. @@ -142,7 +142,7 @@ experiments. The *raw* MS data is stored as a `Spectra` object that can be accessed through the `spectra` function. ```{r} -spectra(data) +spectra(faahko) ``` All spectra are organized *sequentially* (i.e., not by file) but the `fromFile` @@ -150,23 +150,23 @@ function can be used to get for each spectrum the information to which of the data files it belongs. Below we simply count the number of spectra per file. ```{r} -table(fromFile(data)) +table(fromFile(faahko)) ``` Information on samples can be retrieved through the `sampleData` function. ```{r} -sampleData(data) +sampleData(faahko) ``` Each row in this `DataFrame` represents one sample (input file). Using `[` it is possible to subset a `MsExperiment` object **by sample**. Below we subset the -`data` to the 3rd sample (file) and access its spectra and sample data. +`faahko` to the 3rd sample (file) and access its spectra and sample data. ```{r} -data_3 <- data[3] -spectra(data_3) -sampleData(data_3) +faahko_3 <- faahko[3] +spectra(faahko_3) +sampleData(faahko_3) ``` As a first evaluation of the data we below plot the base peak chromatogram (BPC) @@ -177,13 +177,13 @@ we could set `aggregationFun` to `"sum"`. ```{r data-inspection-bpc, message = FALSE, fig.align = "center", fig.width = 12, fig.height = 6, fig.cap = "Base peak chromatogram."} ## Get the base peak chromatograms. This reads data from the files. -bpis <- chromatogram(data, aggregationFun = "max") +bpis <- chromatogram(faahko, aggregationFun = "max") ## Define colors for the two groups group_colors <- paste0(brewer.pal(3, "Set1")[1:2], "60") names(group_colors) <- c("KO", "WT") ## Plot all chromatograms. -plot(bpis, col = group_colors[sampleData(data)$sample_group]) +plot(bpis, col = group_colors[sampleData(faahko)$sample_group]) ``` The `chromatogram` method returned a `MChromatograms` object that organizes @@ -205,22 +205,22 @@ will only subset the spectra within the `MsExperiment`. Subsequently we re-create also the BPC. ```{r} -data <- filterRt(data, rt = c(2550, 4250)) +faahko <- filterRt(faahko, rt = c(2550, 4250)) ## creating the BPC on the subsetted data -bpis <- chromatogram(data, aggregationFun = "max") +bpis <- chromatogram(faahko, aggregationFun = "max") ``` We next create boxplots representing the distribution of the total ion currents per data file. Such plots can be very useful to spot potentially problematic MS runs. To extract this information, we use the `tic` function on the `Spectra` -object within `data` and split the values by file using `fromFile`. +object within `faahko` and split the values by file using `fromFile`. ```{r data-inspection-tic-boxplot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4, fig.cap = "Distribution of total ion currents per file." } ## Get the total ion current by file -tc <- spectra(data) |> +tc <- spectra(faahko) |> tic() |> - split(f = fromFile(data)) -boxplot(tc, col = group_colors[sampleData(data)$sample_group], + split(f = fromFile(faahko)) +boxplot(tc, col = group_colors[sampleData(faahko)$sample_group], ylab = "intensity", main = "Total ion current") ``` @@ -275,7 +275,7 @@ the signal for that compound. rtr <- c(2700, 2900) mzr <- c(334.9, 335.1) ## extract the chromatogram -chr_raw <- chromatogram(data, mz = mzr, rt = rtr) +chr_raw <- chromatogram(faahko, mz = mzr, rt = rtr) plot(chr_raw, col = group_colors[chr_raw$sample_group]) ``` @@ -302,7 +302,7 @@ raw object by retention time, then by m/z and finally plot the object with `type illustrate the corresponding workflow. ```{r peak-detection-plot-ms-data, message = FALSE, warning = FALSE, fig.aligh = "center", fig.width = 14, fig.height = 14, fig.cap = "Visualization of the raw MS data for one peak. For each plot: upper panel: chromatogram plotting the intensity values against the retention time, lower panel m/z against retention time plot. The individual data points are colored according to the intensity." } -data |> +faahko |> filterRt(rt = rtr) |> filterMz(mz = mzr) |> plot(type = "XIC") @@ -390,7 +390,7 @@ for the *centWave* chromatographic peak detection. ```{r peak-detection-centwave, message = FALSE, results = "hide" } cwp <- CentWaveParam(peakwidth = c(20, 80), noise = 5000, prefilter = c(6, 5000)) -xdata <- findChromPeaks(data, param = cwp) +faahko <- findChromPeaks(faahko, param = cwp) ``` The results of `findChromPeaks` on a `MsExperiment` object are returned as an @@ -406,7 +406,7 @@ chromatographic peak detection. Below we show the first 6 identified chromatographic peaks. ```{r peak-detection-chromPeaks, message = FALSE } -chromPeaks(xdata) |> head() +chromPeaks(faahko) |> head() ``` Columns of this `chromPeaks` matrix might differ depending on the used peak @@ -425,7 +425,7 @@ arbitrary annotations for each detected peak (that don't necessarily need to be numeric). ```{r peak-detection-chromPeakData} -chromPeakData(xdata) +chromPeakData(faahko) ``` Peak detection will not always work perfectly for all types of peak shapes @@ -451,15 +451,15 @@ help page for a detailed description of the settings and the approach. ```{r peak-postprocessing, message = FALSE} mpp <- MergeNeighboringPeaksParam(expandRt = 4) -xdata_pp <- refineChromPeaks(xdata, mpp) +faahko_pp <- refineChromPeaks(faahko, mpp) ``` An example for a merged peak is given below. ```{r peak-postprocessing-merged, fig.widht = 10, fig.height = 5, fig.cap = "Result from the peak refinement step. Left: data before processing, right: after refinement. The splitted peak was merged into one."} mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(xdata[1], mz = mzr_1) -chr_2 <- chromatogram(xdata_pp[1], mz = mzr_1) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -473,8 +473,8 @@ with a lower intensity between them, were however not merged (see below). ```{r peak-postprocessing-not-merged, fig.widht = 10, fig.height = 5, fig.cap = "Result from the peak refinement step. Left: data before processing, right: after refinement. The peaks were not merged."} mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(xdata[1], mz = mzr_1) -chr_2 <- chromatogram(xdata_pp[1], mz = mzr_1) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -492,11 +492,11 @@ chromPeaks(res) plot(res) ``` -Before proceeding we next replace the `xdata` object with the results from the +Before proceeding we next replace the `faahko` object with the results from the peak refinement step. ```{r} -xdata <- xdata_pp +faahko <- faahko_pp ``` Below we use the data from the `chromPeaks` matrix to calculate per-file @@ -507,11 +507,11 @@ well as the distribution of the retention time widths. summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) -T <- chromPeaks(xdata) |> - split.data.frame(f = chromPeaks(xdata)[, "sample"]) |> +T <- chromPeaks(faahko) |> + split.data.frame(f = chromPeaks(faahko)[, "sample"]) |> lapply(FUN = summary_fun) |> do.call(what = rbind) -rownames(T) <- basename(fileNames(xdata)) +rownames(T) <- basename(fileNames(faahko)) pandoc.table( T, caption = paste0("Summary statistics on identified chromatographic", @@ -525,7 +525,7 @@ in a result object it is also possible to extract only chromatographic peaks for a specified m/z and/or rt range: ```{r peak-detection-chrom-peak-table-selected, message = FALSE} -chromPeaks(xdata, mz = c(334.9, 335.1), rt = c(2700, 2900)) +chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ``` We can also plot the location of the identified chromatographic peaks in the @@ -533,7 +533,7 @@ m/z - retention time space for one file using the `plotChromPeaks` function. Below we plot this information for the third sample. ```{r peak-detection-chrom-peaks-plot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 8, fig.cap = "Identified chromatographic peaks in the m/z by retention time space for one sample." } -plotChromPeaks(xdata, file = 3) +plotChromPeaks(faahko, file = 3) ``` As a general overview of the peak detection results we can in addition visualize @@ -543,7 +543,7 @@ in which peaks should be counted. This number of chromatographic peaks within each bin is then shown color-coded in the resulting plot. ```{r peak-detection-chrom-peak-image, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Frequency of identified chromatographic peaks along the retention time axis. The frequency is color coded with higher frequency being represented by yellow-white. Each line shows the peak frequency for one file." } -plotChromPeakImage(xdata, binSize = 10) +plotChromPeakImage(faahko, binSize = 10) ``` Note that extracting ion chromatorams from an *xcms* result object will in @@ -555,7 +555,7 @@ rt region above and access the detected peaks in that region using the `chromPeaks` function. ```{r peak-detection-eic-example-peak, message = FALSE} -chr_ex <- chromatogram(xdata, mz = mzr, rt = rtr) +chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ``` @@ -587,8 +587,8 @@ are present. ```{r peak-detection-chrom-peak-intensity-boxplot, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Peak intensity distribution per sample." } ## Extract a list of per-sample peak intensities (in log2 scale) -ints <- split(log2(chromPeaks(xdata)[, "into"]), - f = chromPeaks(xdata)[, "sample"]) +ints <- split(log2(chromPeaks(faahko)[, "into"]), + f = chromPeaks(faahko)[, "sample"]) boxplot(ints, varwidth = TRUE, col = sample_colors, ylab = expression(log[2]~intensity), main = "Peak intensities") grid(nx = NA, ny = NULL) @@ -625,7 +625,7 @@ samples. We use a `binSize = 0.6` which creates warping functions in m/z bins of experiment. ```{r alignment-obiwarp, message = FALSE, results = "hide" } -xdata <- adjustRtime(xdata, param = ObiwarpParam(binSize = 0.6)) +faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) ``` Note that `adjustRtime`, besides calculating adjusted retention times for each @@ -636,13 +636,13 @@ function or using `rtime` with parameter `adjusted = TRUE` (the default): ```{r alignment-rtime, message = FALSE } ## Extract adjusted retention times -adjustedRtime(xdata) |> head() +adjustedRtime(faahko) |> head() ## Or simply use the rtime method -rtime(xdata) |> head() +rtime(faahko) |> head() ## Get raw (unadjusted) retention times -rtime(xdata, adjusted = FALSE) |> head() +rtime(faahko, adjusted = FALSE) |> head() ``` To evaluate the impact of the alignment we plot the BPC on the adjusted data. In @@ -654,14 +654,14 @@ automatic extraction of all identified chromatographic peaks by the ```{r alignment-obiwarp-plot, message = FALSE, fig.align = "center", fig.width = 12, fig.height = 8, fig.cap = "Obiwarp aligned data. Base peak chromatogram before (top) and after alignment (middle) and difference between adjusted and raw retention times along the retention time axis (bottom)." } ## Get the base peak chromatograms. -bpis_adj <- chromatogram(xdata, aggregationFun = "max", chromPeaks = "none") +bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) plot(bpis, col = sample_colors) grid() plot(bpis_adj, col = sample_colors) grid() ## Plot also the difference of adjusted to raw retention time. -plotAdjustedRtime(xdata, col = sample_colors) +plotAdjustedRtime(faahko, col = sample_colors) grid() ``` @@ -676,7 +676,7 @@ par(mfrow = c(2, 1)) plot(chr_raw, col = sample_colors) grid() ## Extract the chromatogram from the adjusted object -chr_adj <- chromatogram(xdata, rt = rtr, mz = mzr) +chr_adj <- chromatogram(faahko, rt = rtr, mz = mzr) plot(chr_adj, col = sample_colors, peakType = "none") grid() ``` @@ -749,11 +749,11 @@ spectra, this function will also *restore* the original retention times for identified chromatographic peaks. ```{r subset-define, message = FALSE, warning = FALSE} -xdata <- dropAdjustedRtime(xdata) +faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout -sampleData(xdata)$sample_type <- "study" -sampleData(xdata)$sample_type[c(1, 4, 7)] <- "QC" +sampleData(faahko)$sample_type <- "study" +sampleData(faahko)$sample_type[c(1, 4, 7)] <- "QC" ``` For an alignment with the *peak groups* method an initial peak grouping @@ -766,20 +766,20 @@ section). The definition of the sample groups (i.e. assignment of individual samples to the sample groups in the experiment) is mandatory for the `PeakDensityParam`. If there are no sample groups in the experiment, `sampleGroups` should be set to a single value for each file (e.g. `rep(1, -length(fileNames(xdata))`). +length(fileNames(faahko))`). ```{r alignment-subset, message = FALSE, warning = FALSE} ## Initial peak grouping. Use sample_type as grouping variable -pdp_subs <- PeakDensityParam(sampleGroups = sampleData(xdata)$sample_type, +pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) -xdata <- groupChromPeaks(xdata, param = pdp_subs) +faahko <- groupChromPeaks(faahko, param = pdp_subs) ## Define subset-alignment options and perform the alignment pgp_subs <- PeakGroupsParam( minFraction = 0.85, - subset = which(sampleData(xdata)$sample_type == "QC"), + subset = which(sampleData(faahko)$sample_type == "QC"), subsetAdjust = "average", span = 0.4) -xdata <- adjustRtime(xdata, param = pgp_subs) +faahko <- adjustRtime(faahko, param = pgp_subs) ``` Below we plot the results of the alignment highlighting the subset samples in @@ -792,12 +792,12 @@ second subset sample (4). ```{r alignment-subset-plot-2, message = FALSE, warning = FALSE, fig.align = "center", fig.width = 10, fig.height = 10, fig.cap = "Subset-alignment results with option average. Difference between adjusted and raw retention times along the retention time axis. Samples on which the alignment models were estimated are shown in green, study samples in grey." } clrs <- rep("#00000040", 8) -clrs[sampleData(xdata)$sample_type == "QC"] <- c("#00ce0080") +clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) -plot(chromatogram(xdata, aggregationFun = "max", chromPeaks = "none"), +plot(chromatogram(faahko, aggregationFun = "max", chromPeaks = "none"), col = clrs) grid() -plotAdjustedRtime(xdata, col = clrs, peakGroupsPch = 1, +plotAdjustedRtime(faahko, col = clrs, peakGroupsPch = 1, peakGroupsCol = "#00ce0040") grid() ``` @@ -832,9 +832,9 @@ each sample belongs. mzr <- c(305.05, 305.15) ## Extract and plot the chromatograms -chr_mzr <- chromatogram(xdata, mz = mzr) +chr_mzr <- chromatogram(faahko, mz = mzr) ## Define the parameters for the peak density method -pdp <- PeakDensityParam(sampleGroups = sampleData(xdata)$sample_group, +pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) plotChromPeakDensity(chr_mzr, col = sample_colors, param = pdp, peakBg = sample_colors[chromPeaks(chr_mzr)[, "sample"]], @@ -862,9 +862,9 @@ more details. ```{r correspondence, message = FALSE } ## Perform the correspondence -pdp <- PeakDensityParam(sampleGroups = sampleData(xdata)$sample_group, +pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) -xdata <- groupChromPeaks(xdata, param = pdp) +faahko <- groupChromPeaks(faahko, param = pdp) ``` Results from the correspondence analysis can be accessed with the @@ -876,7 +876,7 @@ the feature in column `"peakidx"`. Below we show the information on the first 6 features. ```{r} -featureDefinitions(xdata) |> head() +featureDefinitions(faahko) |> head() ``` The `featureValues` function returns a `matrix` with rows being features and @@ -888,7 +888,7 @@ as the intensity matrix for downstream analysis. Below we extract the intensities for the first 6 features. ```{r correspondence-feature-values, message = FALSE } -featureValues(xdata, value = "into") |> head() +featureValues(faahko, value = "into") |> head() ``` As we can see we have several missing values in this feature matrix. Missing @@ -915,7 +915,7 @@ considerable amount of time. Below we extract the chromatograms for the first 4 features. ```{r featureChromatograms, message = FALSE } -feature_chroms <- featureChromatograms(xdata, features = 1:4) +feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms ``` @@ -962,13 +962,13 @@ extract the feature values for the first 6 features after gap filling. An `NA` is reported if no signal is measured at all for a specific sample. ```{r fill-chrom-peaks, message = FALSE } -xdata <- fillChromPeaks(xdata, param = ChromPeakAreaParam()) +faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) -featureValues(xdata, value = "into") |> head() +featureValues(faahko, value = "into") |> head() ``` ```{r export-result, eval = FALSE, echo = FALSE} -save(xdata, file = "xdata.RData") +save(faahko, file = "faahko.RData") ``` ## Final result @@ -991,7 +991,7 @@ peaks in each sample). ```{r} library(SummarizedExperiment) -res <- quantify(xdata, value = "into", method = "sum") +res <- quantify(faahko, value = "into", method = "sum") res ``` @@ -1033,7 +1033,7 @@ the feature value matrix **without** filled-in values (i.e. feature intensities that were added by the gap filling step). ```{r} -assays(res)$raw_nofill <- featureValues(xdata, filled = FALSE, method = "sum") +assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") ``` With that we have now two assays in our result object. @@ -1060,14 +1060,14 @@ the `processHistory` function. Below we extract the information for the first processing step. ```{r} -processHistory(xdata)[[1]] +processHistory(faahko)[[1]] ``` These processing steps contain also the individual parameter objects used for the analysis, hence allowing to exactly reproduce the analysis. ```{r} -processHistory(xdata)[[1]] |> processParam() +processHistory(faahko)[[1]] |> processParam() ``` At last we perform also a principal component analysis to evaluate the grouping