diff --git a/DESCRIPTION b/DESCRIPTION index 312f10eb9..a6cab916e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: xcms -Version: 4.1.10 +Version: 4.1.12 Title: LC-MS and GC-MS Data Analysis Description: Framework for processing and visualization of chromatographically separated and single-spectra mass spectral data. Imports from AIA/ANDI NetCDF, @@ -39,34 +39,34 @@ Authors@R: c( person(given = "Pablo", family = "Vangeenderhuysen", email = "pablo.vangeenderhuysen@ugent.be", role = "ctb", - comment = c(ORCID = "0000-0002-5492-6904")) + comment = c(ORCID = "0000-0002-5492-6904")), + person(given = "Carl", family = "Brunius", + email = "carl.brunius@chalmers.se", + role = "ctb", + comment = c(ORCID = "0000-0003-3957-870X")) ) Depends: R (>= 4.0.0), - BiocParallel (>= 1.8.0), - MSnbase (>= 2.23.1) + BiocParallel (>= 1.8.0) Imports: + MSnbase (>= 2.29.3), mzR (>= 2.25.3), methods, Biobase, BiocGenerics, - ProtGenerics (>= 1.35.2), + ProtGenerics (>= 1.35.4), lattice, - RColorBrewer, - plyr, - RANN, MassSpecWavelet (>= 1.66.0), S4Vectors, - robustbase, IRanges, SummarizedExperiment, - MsCoreUtils (>= 1.15.3), + MsCoreUtils (>= 1.15.5), MsFeatures, MsExperiment (>= 1.5.4), - Spectra (>= 1.13.2), + Spectra (>= 1.13.7), progress, - multtest, jsonlite, + RColorBrewer, MetaboCoreUtils (>= 1.11.2) Suggests: BiocStyle, @@ -80,18 +80,21 @@ Suggests: rmarkdown, MALDIquant, pheatmap, + RANN, + multtest, MsBackendMgf, - signal + signal, + mgcv Enhances: Rgraphviz, - rgl, - XML + rgl License: GPL (>= 2) + file LICENSE URL: https://github.com/sneumann/xcms BugReports: https://github.com/sneumann/xcms/issues/new VignetteBuilder: knitr biocViews: ImmunoOncology, MassSpectrometry, Metabolomics RoxygenNote: 7.3.1 +Encoding: UTF-8 Collate: 'AllGenerics.R' 'functions-XChromatograms.R' diff --git a/NAMESPACE b/NAMESPACE index 33a6c6389..8e10ffd86 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,11 +1,10 @@ -useDynLib(xcms) +useDynLib(xcms, .registration = TRUE) -importFrom("utils", "capture.output", "data") import("methods") importMethodsFrom("ProtGenerics", "peaks", "chromatogram", "writeMSData", "polarity<-", "centroided", "isCentroided", "peaks<-", "isolationWindowTargetMz", "quantify", "bin", "spectrapply", - "filterFeatures", "filterMzRange") + "filterFeatures", "filterMzRange", "filterRt", "filterMz", "filterMsLevel") importClassesFrom("ProtGenerics", "Param") importFrom("BiocGenerics", "updateObject", "fileName", "subset", "dirname", "dirname<-") @@ -18,18 +17,12 @@ importMethodsFrom("Biobase", "phenoData", importFrom("IRanges", "CharacterList", "NumericList") importClassesFrom("IRanges", "CharacterList", "NumericList") -importFrom("graphics", "image", "boxplot", "matplot", "rect", "axis", - "grid", "mtext", "polygon", "box", "plot.xy") importFrom("mzR", "peaks", "close", "openMSfile", "header") importFrom("lattice", "levelplot", "panel.rect", "panel.levelplot", "level.colors", "do.breaks") -importFrom("plyr", "rbind.fill") -importFrom("robustbase", "lmrob", "lmrob.control") -import("RColorBrewer") -import("BiocParallel") -## importMethodsFrom("stats4", "plot") +importFrom("BiocParallel", "bpparam", "SerialParam") +importMethodsFrom("BiocParallel", "bplapply", "bpmapply") -## import("S4Vectors") importClassesFrom("S4Vectors", "Rle", "DataFrame", "Hits") importFrom("S4Vectors", "split", "Rle", "DataFrame", "SimpleList", "List", "as.matrix") @@ -40,13 +33,14 @@ importFrom("SummarizedExperiment", "rowData") importFrom("SummarizedExperiment", "rowData<-") importFrom("SummarizedExperiment", "assay") importFrom("MsCoreUtils", "rbindFill", "closest", "i2index", "sumi", "between", - "maxi", "breaks_ppm") + "maxi", "breaks_ppm", "force_sorted") +importFrom("RColorBrewer", "brewer.pal") -## Additional imports proposed by R CMD check: -importFrom("graphics", "abline", "barplot", "close.screen", "hist", - "identify", "layout", "legend", "lines", "par", "plot.new", - "plot.window", "points", "screen", "split.screen", - "strwidth", "text", "title") +importFrom("graphics", "image", "boxplot", "matplot", "rect", "axis", + "grid", "mtext", "polygon", "box", "plot.xy", "abline", + "barplot", "close.screen", "hist", "identify", "layout", + "legend", "lines", "par", "plot.new", "plot.window", "points", + "screen", "split.screen", "strwidth", "text", "title") importFrom("grDevices", "col2rgb", "colorRampPalette", "dev.cur", "dev.list", "dev.off", "dev.set", "palette", "pdf", "png", "rainbow", "rgb", "terrain.colors", "n2mfrow", "dev.flush", @@ -55,14 +49,11 @@ importFrom("stats", "aov", "approx", "convolve", "cor", "deriv3", "dist", "fft", "fitted", "lm", "loess", "lsfit", "median", "na.omit", "nextn", "nls", "predict", "pt", "quantile", "runmed", "sd", "stepfun", "weighted.mean", "density", "approxfun", - "rnorm", "runif", "dbeta") + "rnorm", "runif", "dbeta", "resid") importFrom("utils", "flush.console", "head", "object.size", "packageVersion", "read.csv", "tail", "write.csv", - "write.table") + "write.table", "capture.output", "data") -## New imports from packages moved from Suggests to Imports: -importFrom("multtest", "mt.teststat") -importFrom("RANN", "nn2") importFrom("MassSpecWavelet", "peakDetectionCWT", "tuneInPeakInfo") ## MSnbase: @@ -260,7 +251,10 @@ export( "groupOverlaps", "estimatePrecursorIntensity", "featureArea", - "loadXcmsData" + "loadXcmsData", + "matchLamasChromPeaks", + "summarizeLamaMatch", + "matchedRtimes" ) ## New analysis methods @@ -284,6 +278,7 @@ exportClasses( "MzClustParam", "NearestPeaksParam", "PeakGroupsParam", + "LamaParama", "ObiwarpParam", "GenericParam", "FillChromPeaksParam", @@ -453,6 +448,7 @@ export("CentWaveParam", "MzClustParam", "NearestPeaksParam", "PeakGroupsParam", + "LamaParama", "ObiwarpParam", "GenericParam", "FillChromPeaksParam", @@ -590,6 +586,8 @@ importFrom("progress", "progress_bar") exportClasses("XcmsExperiment") exportMethods("uniqueMsLevels") exportMethods("filterMzRange") +exportMethods("fromFile") +exportMethods("fileNames") ## saving xcms objects things importFrom("jsonlite", "serializeJSON", "write_json", "unserializeJSON", @@ -601,7 +599,7 @@ exportMethods("storeResults") ## filtering features things importFrom("MetaboCoreUtils", "rowRsd", "rowDratio", "rowPercentMissing", - "rowBlank") + "rowBlank", "mclosest") export("RsdFilter") export("DratioFilter") export("PercentMissingFilter") diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 000000000..6578300b6 --- /dev/null +++ b/NEWS.md @@ -0,0 +1,1263 @@ +# xcms 4.1 + +## Changes in version 4.1.11 + +- Clean up of required and suggested packages and namespace imports. +- Re-creation of bundled data objects. + +## Changes in version 4.1.10 + +- Ensure backward compatibility for parameter objects that gained additional + slots. + +## Changes in version 4.1.9 + +- Fix bug in `filterFeatures,PercentMissingFilter`. + +## Changes in version 4.1.8 + +- Fixing issue #716: edit of `.empty_chrom_peaks` function so an `sn` column is + returned. Fixes extracting and plotting of peaks after using + `manualChromPeaks` + +## Changes in version 4.1.7 + +- Implementation of `filterFeatures` function with `filter` parameters: + `RsdFilter`, `DratioFilter`, `PercentMissingFilter`, `BlankFlag`. They can be + used ot filter features from `XcmsResult` and `SummarizedExperiment` objects. +- Addition of a section in the main xcms vignette to describe how to use it. + +## Changes in version 4.1.6 + +- Import `filterSpectra` from `MsExperiment`. +- Import `breaks_ppm` from `MsCoreUtils`. +- Update `featureArea` function to consider all chromatographic peaks per + feature, not only the one with the highest intensity. As a consequence, + returned m/z and rt ranges might be higher which has an influence in + `featureChromatograms`, EIC-based feature grouping and, to a lesser extent + also in gap-filling. Related documentation was updated. +- Improve performance of the `featureArea` function (and related of the + `PeakAreaParam`-based gap filling). +- Add parameter `ppm` to `PeakDensityParam` to enable peak-density-based + correspondence throgh m/z-dependent bins along the m/z. + +## Changes in version 4.1.5 + +- Improve performance of the `chromatogram` call for `XcmsExperiment` objects. +- Remove internal (not exported) normalization functions. These have been + transferred to the MetaboCoreUtils package. +- Support subsetting of `XcmsExperiment` with negative indices. + +## Changes in version 4.1.4 + +- Rename variable `data` in the vignette to `faahko`. +- Fix issue in adjustRtime resulting in corrupt processHistory. +- Add support to perform peakGroups alignment using pre-defined anchor peak + matrix (i.e., the numeric matrix with retention times of anchor peaks in + the samples that can be used to align these samples). +- Fix errors related to invalid `Chromatogram` objects extracted from xcms + results: ensure MS level in `chromPeaksMatrix` is `integer`. +- Fix definition of anchor peaks for peakGroups alignment with subset + (issue #702). +- Add `filterMsLevel` method for `MsExperiment` and `XcmsExperiment`. +- Ensure chunk-wise processing of Spectra (introduced with version 1.13.2) is + disabled when xcms is using its own chunk-wise processing. + +## Changes in version 4.1.3 + +- Add parameter `verboseBetaColumns` to `CentWaveParam` to enable calculation + of additional peak quality metrics comparing the EIC to an idealized bell + curve. + +## Changes in version 4.1.2 + +- Add a `param =` to generic function `storeResults`: `PlainTextParam` to save + an `XcmsExperiment` or `MsExperiment` object as colleciton of plain text + files. + +## Changes in version 4.1.1 + +- Add method `storeResults` and one of its `param =`: `RDataParam` to save an + `XcmsExperiment` object as an .RData file. + + +# xcms 3.99 + +## Changes in version 3.99.6 + +- Add method to coerce a `XcmsExperiment` to a `xcmsSet` (issue #696). +- Support providing only `mz` or `rt` also for `chromatogram,MsExperiment`. + +## Changes in version 3.99.5 + +- Only `mz` or `rt` need to be provided for `chromatogram`. + +## Changes in version 3.99.4 + +- Add `chromPeakChromatograms` function to extract (EIC) chromatograms for + chromatographic peaks. + +## Changes in version 3.99.3 + +- Small fixes in the *direct injection* vignette. +- Add parameter `isolationWindowTargetMz` to the `chromatogram` function for + `MsExperiment` and `XcmsExperiment` to ensure MS2 chromatographic data is + extracted from the MS2 spectra containing fragments of the compound of + interest. + +## Changes in version 3.99.2 + +- Add the `xmse` data set representing an `XcmsExperiment` object. +- Update the *compounding* vignette to use the new objects. +- Add `loadXcmsData` to load test data objects (and fix/update paths). +- Add `groupFeatures` methods for `XcmsExperiment`. +- Fix issue in `featureArea` for `XcmsExperiment`. +- Update main vignette to use and describe the new data objects. +- Add `findChromPeaksIsolationWindow` method for `MsExperiment` and + `XcmsExperiment`. +- Make `reconstructChromPeakSpectra` a method. +- Add `reconstructChromPeakSpectra` implementation for `XcmsExperiment`. +- Add `filterIsolationWindow` for `MsExperiment` and `XcmsExperiment` to filter + spectra (and eventually chromatographic peaks) based on the isolation window. +- Update the LC-MS/MS vignette adding also an example how to deisotope SWATH + MS2 spectra. + +## Changes in version 3.99.1 + +- `featureSummary` and `overlappingFeatures` gain support for `XcmsExperiment`. +- Fix in `featureChromatograms` to ensure a valid object is returned. + +## Changes in version 3.99.0 + +- Add `XcmsExperiment` and support for `MsExperiment`/`Spectra`: add all + functionality for a full xcms processing on a `MsExperiment` object. +- Fix issue in `refineChromPeaks` with `MergeNeighboringPeaksParam` where a + wrong apex position was considered in the evaluation whether candidate peaks + should be merged (would only happen for merging of > 2 candidate peaks). +- Re-write the `reconstructChromPeakSpectra` for DIA data analysis to fix an + issue with chromatographic peaks in overlapping SWATH isolation windows and + generally to improve performance. + + +# xcms 3.21 + +## Changes in version 3.21.5 + +- Fix issue in `chromatogram` after filtering a result object (issue #511). + +## Changes in version 3.21.4 + +- Move multtest from Suggests to Imports in dependencies + +## Changes in version 3.21.3 + +- Only fixes in the long running tests + +## Changes in version 3.21.1 + +- Fix error with `fillChromPeaks` on sparse data (many empty spectra) and peak + detection performed with `MatchedFilterParam` (issue #653). +- Update to newer function names in the `rgl` package (issue #654). + + +# xcms 3.19 + +## Changes in version 3.19.2 + +- Update/expand documentation for the `firstBaselineCheck` parameter of + centWave. + +## Changes in version 3.19.1 + +- Update documentation to reference updates in MassSpecWavelet package. + + +# xcms 3.17 + +## Changes in version 3.17.6 + +- Rewrite code to subset features and chromatographic peaks. This results in a + perfomance improvement for `filterFile` and similar functions. +- Add parameter `expandMz` to `featureChromatograms` + https://github.com/sneumann/xcms/issues/612. + +## Changes in version 3.17.5 + +- Change the way the m/z value for a chromatographic peak is determined by + centWave: if a ROI contains more than one peak for one scan (spectrum) an + intensity-weighted m/z is reported for that scan. The m/z of the + chromatographic peak is then calculated based on these reported m/z values for + each scan (spectrum). In the original version the mean m/z for a scan was + reported instead. As a result, m/z values of chromatographic peaks are now + slightly different but are expected to be more accurate. See + https://github.com/sneumann/xcms/issues/590 for more details. + +## Changes in version 3.17.4 + +- Add `transformIntensity` method. +- Fix issue when calling `chromPeakSpectra` or `featureSpectra` on an object + that contains also files with only MS1 spectra + (https://github.com/sneumann/xcms/issues/603). + +## Changes in version 3.17.2 + +- Use mzML instead of mzData files in testing and vignettes, + since mzR drop mzData reading and msdata package will drop mzData files as well + +## Changes in version 3.17.1 + +- Fix bug in feature grouping by EIC correlation that would return a + non-symmetric similarity matrix. +- Fix error message from issue [584](https://github.com/sneumann/xcms/issues/584). + + +# xcms 3.15 + +## Changes in version 3.15.5 + +- Disable testing on windows i386, providing some speedup +- Disable parallel processing on Windows, causing an issue in testthat on BioC build check + +## Changes in version 3.15.4 + +- Fix in `plot` with `type = "XIC"` to plot an empty plot if no data is present. +- Skip re-indexing of peaks to features if not necessary. This results in + performance improvements for MS1 only data. + +## Changes in version 3.15.3 + +- Add `manualFeatures` allowing to manually define and add features to an + `XCMSnExp` object. +- Add `plotChromatogramsOverlay` function to support plotting of multiple EICs + from the same sample into the same plot (eventually stacked). +- Add feature grouping by EIC similarity: `EicSimilarityParam`. +- Import `compareChromatograms` from `MSnbase`. +- Add feature grouping by similar retention time: `SimilarRtimeParams. +- Add feature grouping by similarity of feature abundances across samples: + `AbundanceSimilarityParam`. +- Add feature grouping methodology based on `MsFeatures`. + +## Changes in version 3.15.2 + +- Fix LC-MS/MS vignette. + +## Changes in version 3.15.1 + +- Compatibility fix for nls() in R >= 4.1, contributed by Rick Helmus. + + +# xcms 3.13 + +## Changes in version 3.13.8 + +- Fix plotQC() for XCMSnExp objects + +## Changes in version 3.13.7 + +- Add `featureArea` function to extract the m/z-rt region for features. +- Fix `featureSpectra` function. +- Re-add the LC-MS/MS vignette. +- Feature: plotQC() supports XCMSnExp objects now + +## Changes in version 3.13.6 + +- Fix issue #545: skip second centWave run with CentWavePredIsoParam in regions + of interest with undefined peak boundaries/scan ranges. +- Temporarily remove the LC-MS/MS vignette (until MsBackendMgf is added to + Bioconductor). + +## Changes in version 3.13.5 + +- Add `filterChromPeaks` method to filter chromatographic peaks in a + `XChromatogram` or `XChromatograms` object. +- Add `filterChromPeaks` method for `XCMSnExp` (issue #541). +- Support return of `Spectra` objects by `chromPeakSpectra`, `featureSpectra` + and `reconstructChromPeakSpectra`. +- Support extraction of MS1 spectra with `chromPeakSpectra`. +- Support extraction of the spectrum with the largest total signal or largest + base peak signal in `chromPeakSpectra`. +- Add support for extraction of spectra for selected/individual peaks/features + using the `peaks` and `features` parameter in `chromPeakSpectra` and + `featureSpectra`, respectively. + +## Changes in version 3.13.4 + +- Import `Param` object from `ProtGenerics`. +- Import `filterIntensity`, `normalize` and `alignRt` for `Chromatogram` and + `MChromatograms` from `MSnbase`. + +## Changes in version 3.13.3 + +- `align,Chromatogram` gains new method `"none"` which will only keep values + with identical retention times. For `method = "matchRtime"` the (much faster) + matching function `closest` from the `MsCoreUtils` package is used. +- Method `correlate,Chromatogram` gains parameter `useIntensitiesAbove` to + perform the correlation only with values larger than this threshold + (avoiding thus high correlation because of many 0-values). +- Add method `filterIntensity,Chromatogram` that allows to filter a chromatogram + object keeping only data points with an intensity above a user provided + threshold. + +## Changes in version 3.13.2 + +- Add new function `manualChromPeaks` allowing to manually add and integrate + chromatographic peaks. + +## Changes in version 3.13.1 + +- Support subsetting of `XChromatograms` with `drop = FALSE`. + + +# xcms 3.11 + +## Changes in version 3.11.8 + +- Disable parallel processing in vignettes. + +## Changes in version 3.11.7 + +- More efficient splitting data per file especially for larger data sets. +- Disable parallel processing in examples. + +## Changes in version 3.11.6 + +- Add `FilterIntensityParam` to filter chromatographic peaks on intensity + (issue #502). +- Add `estimatePrecursorIntensity` function to determine the precursor intensity + for MS2 spectra from the neighboring MS1 spectra. + +## Changes in version 3.11.4 + +- Change from `Spectra` and `Chromatograms` to `MSpectra` and `MChromatograms` + from MSnbase version >= 2.15.3. + +## Changes in version 3.11.3 + +- `reconstructChromPeakSpectra`: report also polarity and `precusorIntensity`. +- `reconstructChromPeakSpectra`: ensure a retention time is reported for + reconstructed MS2 spectra (issue #485). +- Change default for `expandRt` to `0` in `reconstructChromPeakSpectra`. +- Fix error in `refineChromPeaks,MergeNeighboringPeaksParam` if no peaks found + to be merged. + +## Changes in version 3.11.2 + +- Add `fillChromPeaks,ChromPeakAreaParam` to base the area from which missing + peak data should be filled-in on the actually detected chromatographic peaks + of a feature. +- Potential fix for issue #481: function should no longer throw an error because + retention times are of length 0. +- More efficient splitting of processing which should increase the speed of + the findChromPeaks, refineChromPeaks, reconstructChromPeakSpectra and + chromPeakSpectra calls. + +## Changes in version 3.11.1 + +- Fix issue #471: conversion from `XCMSnExp` to `xcmsSet` looses phenodata + (thanks to Andris Jankevics for reporting and providing a solution). +- Add `normalize` method for `Chromatogram` and `Chromatograms` objects. +- `featureChromatograms` gets new parameter `n` and `value` to extract EICs + only from the top n samples with highest intensities. +- `filterFile` gets new parameter `keepFeatures` to support retaining + correspondence results even if a data set is filtered by file. +- Export the virtual `Param` class. +- Add filterColumnsIntensityAbove method for Chromatograms object that allows + to select columns (samples) of an Chromatograms object for which intensities + of its chromatographic data are higher than a threshold. +- Add removeIntensity method for Chromatogram, Chromatograms, XChromatogram + and XChromatograms objects allowing to *remove* intensities based on different + criteria. +- Add correlate method for Chromatograms allowing to correlate multiple + chromatograms with each other. + + +# xcms 3.9 + +## Changes in version 3.9.4 + +- Fix issue in centWave which skips peak detection depending on minimum + peakwidth (issue #445): add parameter `extendLengthMSW` in `CentWaveParam`. + Thanks to William Kumler for contributing the fix. +- Tentatively reduce memory requirements in `fillChromPeaks`. +- Fix issue #467 for fillPeaks() of an xcmsSet converted from an XCMSnSet + +## Changes in version 3.9.3 + +- Move multtest from Imports to Suggests to avoid duplicated method definition + for plot (issue #459). +- Add support for peak filling from MS level > 1 to fillChromPeaks. +- featureValues gains parameter msLevel to extract feature values for features + of all, or from a specific MS level. +- refineChromPeaks supports different MS levels. +- Added support to perform correspondence analysis on MS level > 1 and add the + respective results to already present feature definitions. +- hasChromPeaks and hasFeatures gain parameter msLevel to check for presence of + chromatographic peaks or features from a specific MS level. + +## Changes in version 3.9.2 + +- Fix featureChromatograms and chromatograms on a XCMSnExp object with features: + features can be duplicated across rows (EICs). +- findChromPeaks: add parameter `add` to allow several rounds of peak detections + on the same object. +- Small performance enhancement in fillChromPeaks. +- Better support for MS > 1 data in fillChromPeaks: skip MS level 2 spectra for + filling in. +- Add refineChromPeaks for XChromatogram and XChromatograms objects. +- Add groupOverlaps function to group arbitrary ranges. +- Add quantify,XCMSnExp object to quantify an XCMSnExp into a + SummarizedExperiment. +- Fine-tune MergeNeighboringPeaks peak refinement method: the average of the + 3 data points between candidate peaks is used to evaluate whether the peaks + should be merged making the approach more robust against outliers. In + addition, an ion chromatogram for candidate peaks is extracted with an m/z + range expanded depending on the expandMz and ppm setting ensuring that low + intensity data points between candidate peaks are not missed out (because + their m/z might be slightly shifted on ToF instruments). The mzmin and mzmax + of the merged peak represents also the minimum and maximum m/z of all data + points in that extracted ion chromatogram. + +## Changes in version 3.9.1 + +- Fix problem of not shown/plotted peak positions in plotChromPeakSpectra + for experiments in which peaks were not detected in the first sample(s). +- Add method *from_to* to missing value imputation method `imputeRowMinRand`. +- Show warning in findChromPeaks if empty spectra are detected. +- Add refineChromPeaks method and CleanPeaksParam class to allow removal of + chromatographic peaks exceeding a user-definable maximal peak width. +- Add MergeNeighboringPeaksParam for refineChromPeaks to allow merging of + chromatographic peaks close in m/z and retention time with a signal between + them higher than a certain threshold (issue #414). +- Fix misspelled parameter `mzd` in LC-MS/MS vignette. + + +# xcms 3.7 + +## Changes in version 3.7.5 + +- Remove xcmsMSn vignette (based on old xcms). + +## Changes in version 3.7.4 + +- mzClust correspondence analysis: check and fix missing values in column mz of + the peaks matrix (issue #416). + +## Changes in version 3.7.3 + +- plot type = "XIC" on an XCMSnExp object will draw rectangles indicating the + identified chromatographic peaks. +- Add a vignette describing LC-MS/MS data analysis with xcms. + +## Changes in version 3.7.2 + +- Fix documentation (issue #401). +- Add support for SWATH data analysis. + +## Changes in version 3.7.1 + +- Add correlate method for Chromatogram objects. +- Add parameter lwd to plotAdjustedRtime. +- Add align method for Chromatogram objects. +- Add findChromPeaksIsolationWindow to enable chromatographic peak detection + in isolation windows. +- Fix issue in chromPeakSpectra with method = "signal". +- chromPeakSpectra and featureSpectra return now MS2 spectra with an precursor + m/z >= mzmin, <= mzmax and retention time >= rtmin, <= rtmax. +- Improve performance of chromPeakSpectra and featureSpectra. + + +# xcms 3.5 + +## Changes in version 3.5.5 + +- Add dirname and dirname<- methods for OnDiskMSnExp to change the path to the + raw data files. +- Add section "Subset-based alignment" to the xcms vignette to describe the + alignment possibility to perform alignments based on a subset of samples + (e.g. QC samples). + +## Changes in version 3.5.4 + +- Fix problem in featureChromatograms with include = "feature_only" that could + return a non-valid object. +- Ensure that XCMSnExp objects are updated if necessary in all analysis methods. + +## Changes in version 3.5.3 + +- Fix unit tests. + +## Changes in version 3.5.2 + +- Small changes in fillChromPeaks,XCMSnExp to reduce memory demand. +- Fix issue #359. +- Fix issue #360: rawEIC skipped last scan/spectrum if rtrange was provided. +- filterMsLevel keeps now chromatographic peaks and feature definitions from the + specified MS levels (issue #362). +- Fix bug in `xcmsRaw` that leads to a netCDF error message (issue #363). +- Add parameter msLevel to chromPeaks for XCMSnExp objects. +- Add chromPeakData to allow adding arbitrary annotation to chromatographic + peaks. +- Change default of parameter value in featureValues from value = "index" to + value = "into". +- Add parameter isFilledColumn to chromPeaks allowing the old behaviour to + include the is_filled column in the chromatographic peak matrix. + +## Changes in version 3.5.1 + +- Fix issue #349. +- Add updateObject function for XCMSnExp objects (issue #347). +- Add dropFilledChromPeaks methods for XChromatogram and XChromatograms objects. +- Add parameter filled = FALSE to chromatogram and featureChromatograms + functions. +- Fix matchedFilter peak detection problems with empty spectra (issue #325). +- featureChromatograms extracts by default only chromatographic peaks associated + with a feature. +- chromatogram,XCMSnExp extracts an XChromatogram containing also + chromatographic peaks and feature definitions. +- Add featureValues method for XChromatograms objects (issue #336). +- Add correspondence analysis (peak grouping) for chromatographic data (for now + only with PeakDensity method; issue #336). +- Add featureDefinitions slot to XChromatograms object and related accessor + methods. +- Add subset alignment option subsetAdjust = "average" to adjust left-out + samples (blanks or simply non-subset samples) based on an interpolation from + the results of the previous and subsequent subset sample. +- Add parameter subsetAdjust to PeakGroupsParam allowing to switch between + different methods to adjust samples left out in the alignment process. +- Alignment based on a sample subset for the peak groups method (issue #335): + sample subset can be defined with the subset parameter, samples not included + in the subset will be aligned based on the adjusted retention times of the + closest sample in the subset. +- Add findChromPeaks,XChromatograms (issue #332). +- Add processHistory,XChromatograms. +- Add plot,XChromatograms method with automatic peak highlighting (issue #334). +- Add hasChromPeaks,XChromatograms method. +- Add XChromatograms class with constructor function and coercing method. +- Add hasChromPeaks,XChromatogram method. +- Add filterRt,XChromatogram, filterMz,XChromatogram. +- Add plot,XChromatogram function supporting of highlighting/drawing identified + chromatographic peaks. +- findChromPeaks,Chromatogram returns an XChromatogram object (issue #329). +- Add chromPeaks,XChromatogram (issue #329). +- Add XChromatogram object (issue #329). +- Fix higlightChromPeaks with type = "polygon": peak filling represents now the + full detected peak and is no longer cut by the provided rt. +- Add argument peakIds to highlightChromPeaks allowing to specify the IDs of + peaks to be highlighted. +- Add example on clustering of base peak chromatograms to the vignette (issue + #328). +- Small fix in the vignette (issue #327). +- Add parameter groupval to exportMetaboAnalyst (issue #296). +- Fix bug in show,XCMSnExp that would throw an error if no process history is + present. + + +# xcms 3.3 + +## Changes in version 3.3.6 + +- Add type = "polygon" to highlightChromPeaks allowing to fill the actual + signal area of identified chromatographic peaks. + +## Changes in version 3.3.5 + +- Performance enhancement of the chromPeakSpectra and featureSpectra functions. + +## Changes in version 3.3.4 + +- Add featureChromatograms to extract ion chromatograms for each feature. +- Add hasFilledChromPeaks function. +- Add argument skipFilled to the featureSummary function. + +## Changes in version 3.3.3 + +- Add chromPeakSpectra and featureSpectra functions to extract MS2 spectra + for chromatographic peaks and features, respectively (issue #321). +- Fix profMat to handle also data files with empty spectra (issue #312). +- Add argument ylim to plotAdjustedRtime (issue #314). +- Add imputeRowMin and imputeRowMinRand, two simple missing value imputation + helper functions. +- Fix additional problem mentioned in issue #301 with obiwarp retention time + correction if some spectra have m/z values of `NA`. +- Fix issue #300 avoiding chromatographic peaks with rtmin > rtmax. +- Fixes for issues #291, #296. +- Add parameter 'missing' to diffreport allowing to replace NA with arbitrary + numbers. +- Add exportMetaboAnalyst function to export the feature matrix in MetaboAnalyst + format. +- Add parameter missing to featureValues allowing to specify how to handle/ + report missing values. +- The chromPeaks matrix has now rownames to uniquely identify chromatographic + peaks in an experiment. Chromatographic peak IDs start with "CP" followed by + a number. + +## Changes in version 3.3.2 + +- Add writeMSData method for XCMSnExp allowing to write mzML/mzXML files with + adjusted retention times (issue #294). +- Fix profEIC call for single-scan-peak (pull request #287 from @trljcl). +- Fix centWave avoiding that the same peak is reported multiple times if + fitgauss = TRUE is used (issue #284). +- featureSummary reports also RSD (relative standard deviations) of features + across samples (issue #286). +- Add parameters fixedMz and fixedRt to FillChromPeaksParam that allow to + increase the features' m/z and rt widths by a constant factor. +- Add option "sum" to featureValues' method parameter allowing to sum the + intensities of peaks that are assigned to the same feature in a file/sample. + +## Changes in version 3.3.1 + +- Add overlappingFeatures function to identify overlapping or close features. +- Add support for type = "apex_within" for featureDefinitions. +- Fix a bug in fillChromPeaks that would return the integrated signal being Inf. +- Fix for issue #267: error in fillChromPeaks when the retention time of the + peaks are outside of the retention time range of certain files. +- New featureSummary function to calculate basic feature summaries (number of + samples in which peaks were found etc). +- Parameter 'type' added to plotChromPeakDensity and 'whichPeaks' to + highlightChromPeaks. Both parameters are passed to the 'type' argument + of chromPeaks. +- Parameter 'type' in chromPeaks gets additional option "apex_within" to return + chromatographic peaks that have their apex within the defined rt and/or m/z + range. +- Add functions rla and rowRla to calculate RLA (relative log abundances). +- Add peaksWithMatchedFilter to perform peak detection in chromatographic + (MRM/SRM) data (issues #277 and #278). +- Add peaksWithCentWave to perform centWave peak detection in chromatographic + (MRM/SRM) data (issue #279). +- Add findChromPeaks,Chromatogram methods for CentWaveParam and + MatchedFilterParam (issue #280). + + +# xcms 3.1 + +## Changes in version 3.1.3 + +- Fix misplaced parenthesis in the check for multiple spectra in + findChromPeaks,OnDiskMSnExp,MSWParam. Thanks to @RonanDaly (PR #276). +- Update link to correct metlin page in diffreport result (issue #204). + +## Changes in version 3.1.2 + +- Add filterFeatureDefinitions function. +- Fix #273: better error message in case not a single feature could be defined + by groupChromPeaks. + +## Changes in version 3.1.1 + +- Reading raw files using xcmsSet or xcmsRaw uses now the automatic file type + detection feature from mzR. +- c function to concatenate XCMSnExp objects. +- groupnames method for XCMSnExp objects (issue #250). +- Fix #237: findPeaks.MSW was not throwing an error if applied to multi-spectrum + MS file. +- Fix #249: quantile call in adjustRtime PeakGroups without na.rm = TRUE. +- Fix #259 + + +# xcms 2.99 + +## Changes in version 2.99.10 + +- Fix #230: Failing vignettes on Windows. + +## Changes in version 2.99.9 + +- Chromatographic peak detection uses adjusted retention times on an aligned + XCMSnExp object (issue #213, #208). +- New parameter msLevel for processHistory,XCMSnExp. +- New parameter keepAdjustedRtime for filterMsLevel,XCMSnExp, dropChromPeaks, + XCMSnExp and dropFeatureDefinitions,XCMSnExp. +- Add parameter msLevel to chromatogram,XCMSnExp method (issue #205). +- Obiwarp alignment is now performed on one MS level and adjustment is applied + to all MS levels (issue #214). +- Add function plotMsData to plot intensity against retention time and m/z + against retention time for a MS slice in one sample. +- Add argument msLevel = 1L to extractMsData method (issue #223). +- New applyAdjustedRtime function to consolidate the alignment results, i.e. + replace the raw retention times in the XCMSnExp with the adjusted retention + times. +- [,XCMSnExp method gains argument keepAdjustedRtime to allow keeping adjusted + retention times in the sub-setting. +- Implement spectrapply,XCMSnExp to ensure returned results use adjusted + retention times (if present). +- [[,XCMSnExp method returns a Spectrum object with adjusted retention time, if + the XCMSnExp contains adjusted retention times. +- Argument 'sampleGroups' is mandatory for 'PeakDensityParam' (issue #228). +- Fix #191: Excessive memory use in fillPeaks. +- Fix #220: peaks matrix is missing column "sample" if no peaks were found in + the first sample. +- Fix #222: findChromPeaks does not return an XCMSnExp object filtered to a + single MS level despite peak detection is performed on a single level. +- Fix problem in plotMsData causing wrong colors to be used to label the data + points. + +## Changes in version 2.99.8 + +- Replace xcmsMSn Rnw with Rmd vignette to fix Windows build errors. + +## Changes in version 2.99.7 + +- Fix #201: Warnings: 'readMSData2' is deprecated, thanks to L. Gatto. +- Merge with BioC git after transition + +## Changes in version 2.99.6 + +- calibrate,XCMSnExp method that allows to calibrate chromatographic peaks. +- Export phenoDataFromPaths function (issue $195). +- Add arguments mz and rt to featureDefinitions method allowing to extract + features within the specified ranges. +- Increase n for the density function call in group density-based correspondence + by 2. +- Replace xcmsDirect.Rnw with rmarkdown-based vignette using the new user + interface. +- issue #196: removed the unnecessary requirement for same-dimension profile + matrices in adjustRtime,XCMSnExp,ObiwarpParam. +- issue #194: fixes in retcor.obiwarp: 1) subset raw data if scanrange != NULL. + 2) if the mz range of the two files to be aligned differ, expand them + correctly. Depending on the profStep and the mz values/ranges the matrices + were not expanded correctly. +- Potential problems in the plotChromPeakDensity function. + +## Changes in version 2.99.5 + +- Re-enable sleep parameter in findPeaks.centWave and findPeaks.matchedFilter. + +## Changes in version 2.99.4 + +- Add plotChromPeaks function to plot the definition (rt and mz range) of + detected chromatographic peaks of one file into the mz-rt plane. +- Add plotChromPeakImage function to plot the number of detected peaks along + the retention time axis per file as an image plot. +- Move Chromatogram class and functionality to the MSnbase package +- Add argument msLevel to the findChromPeaks method to allow (chromatographic) + peak detection also on MS level > 1. +- Polarity information was not read from mzXML files (issue #192). + +## Changes in version 2.99.3 + +- issue #188: determine file type from file content if file ending not known. + +## Changes in version 2.99.2 + +- issue #181: problem when isCentroided,Spectrum method returns NA because of + too few peaks in a spectrum. Fixed by checking in such cases all spectra in + the file. +- issue #184: add parameter sleep to do_groupChromPeaks_density function to be + backwards compatible with the old group.density code. + +## Changes in version 2.99.1 + +- extractMsData to extract raw MS data as a data.frame (issue #120). +- issue #175: an error is now thrown if no peak group was identified for peak + group retention time correction. +- issue #178: scanrange was collapsed when the adjusted range was reported + (pull request by Jan Stanstrup). +- issue #180: error when both parameters method and smooth are provided in the + retcor method. + +## Changes in version 2.99.0 + +- plotChromatogram and highlightChromPeaks functions. +- plotChromPeakDensity function. +- clean method for Chromatogram classes. +- Change default for ppm parameter in chromPeaks method to 0. +- extractChromatograms supports extraction of multiple rt and mz ranges. +- New parameter missing for extractChromatograms allowing to specify the + intensity value to be used for rts for which no signal is available within + the mz range. +- extractChromatograms returns Chromatograms of length equal to the number of + scans within the specified rt range, even if no signals are measured + (intensity values are NA). + + +# xcms 1.53 + +## Changes in version 1.53.1 + +- Increase parameter n for the density call in the peak density correspondence + method. This enables to separate neighboring peaks using small n (issue #161). + Thanks to Jan Stanstrup. + +# xcms 1.51 + +## Changes in version 1.51.11 + +- Parameter "filled" for featureValues (issue #157). +- Parameters "rt" and "mz" in chromPeaks method allowing to extract + chromatographic peaks from the specified ranges (issue #156). +- Fixed possible memory problem in obiwarp (issue #159). +- Update getPeaks to use non-deprecated API (issue #163). + +## Changes in version 1.51.10 + +- filterRt for Chromatogram class (issue #142). +- adjustRtimePeakGroups function (issue #147). +- adjustRtime,XCMSnExp,PeakGroupsParam and do_adjustRtime_peakGroups support + use of pre-defined matrix to perform alignment (issue #153). +- plotAdjustedRtime to visualize alignment results (issue #141). +- featureDefinitions and featureValues return DataFrame and matrix with rownames + corresponding to arbitrary feature IDs (issue #148). +- New peakGroupsMatrix slot for PeakGroupsParam class (issue #153). +- Issue #146: ensure adjusted retention times returned by the peakGroups method + to be in the same order than the raw retention times. + +## Changes in version 1.51.9 + +- fillChromPeaks, dropFilledChromPeaks methods and FillChromPeaksParam class. +- featureValues method. +- Extended new_functionality vignette. +- Change default backend for reading mzML files to pwiz. +- Issue #135: fix peak signal integration for centWave. +- Issue #139: problem with expand.mz and expand.rt in fillPeaks.chrom. +- Issue #137: Error in findChromPeaks if no peaks are found. + +## Changes in version 1.51.8 + +- Add Chromatogram class and extractChromatograms method. +- Issue #118: failing unit test on Windows build machine. +- Issue #133: error with c() and xcmsSet without peaks. +- Issue #134: xcmsSet constructor endless loop. + +## Changes in version 1.51.7 + +- Major renaming of methods and classes to follow the naming convention: + - chromatographic peak (chromPeak): the peaks identified in rt dimension. + - feature: mz-rt feature, being the grouped chromatographic peaks within and + across samples. +- Issue #127: failing unit test on Windows build machine. + +## Changes in version 1.51.6 + +- groupFeatures and adjustRtime methods for XCMSnExp objects. +- New Param classes for groupFeatures and adjustRtime analysis methods: + FeatureDensityParam, MzClustParam, NearestFeaturesParam, FeatureGroupsParam + and ObiwarpParam. +- Issue #124 (filterRt,XCMSnExp returned empty object). + +## Changes in version 1.51.5 + +- MsFeatureData and XCMSnExp objects. +- features, features<-, adjustedRtime, adjustedRtime<-, featureGroups, + featureGroups<-, hasAlignedFeatures, hasAdjustedRtime and hasDetectedFeatures + methods. +- dropFeatures, dropFeatureGroups and dropAdjustedRtime methods. +- filterMz, filterRt, filterFile etc implemented. +- mz, intensity and rtime methods for XCMSnExp allowing to return values grouped + by sample. +- Issue #99 (rtrange outside of retention time range in getEIC,xcmsSet). +- Issue #101 (xcmsRaw function returns NULL if mslevel = 1 is specified). +- Issue #102 (centWave returns empty matrix if scales not OK). Thanks to + J. Stanstrup. +- Issue #91 (warning instead of error if no peaks in ROI). Thanks to J. Stanstrup. + +## Changes in version 1.51.4 + +- added deepCopy to avoid corrupting the original object, thanks to + J. Stanstrup, closes #93 + +## Changes in version 1.51.3 + +- binYonX binning function. +- imputeLinInterpol function providing linear interpolation of missing values. +- breaks_on_binSize and breaks_on_nBins functions to calculate breaks defining + bins. +- New vignette "new_functionality.Rmd" describing new and modified functionality + in xcms. +- Add do_detectFeatures_matchedFilter function. +- Add do_detectFeatures_centWave function. +- Add do_detectFeatures_centWaveWithPredIsoROIs function and unit test. +- Implement a new data import function. +- Add do_detectFeatures_MSW function and unit test. +- Argument stopOnError in xcmsSet function that allows to perform feature + detection on all files without stopping on errors. +- Method showError for xcmsSet objects that list all errors during feature + detection (if stopOnError = FALSE in the xcmsSet function). +- [ method to subset xcmsRaw objects by scans. +- profMat method to extract/create the profile matrix from/for an xcmsRaw. +- Add new detectFeatures methods for MSnExp and OnDiskMSnExp objects from the + MSnbase package. +- Add new CentWaveParam, MatchedFilterParam, MassifquantParam, MSWParam and + CentWavePredIsoParam parameter class to perform method dispatch in the + detectFeatures method. +- retcor.obiwarp uses the new binning methods for profile matrix generation. +- scanrange,xcmsRaw reports always a scanrange of 1 and length(object@scantime). +- scanrange,xcmsSet reports the scanrange eventually specified by the user in + the xcmsSet function. +- Fixed bug in rawMat (issue #58). +- Fix issue #60: findPeaks.massifquant always returns a xcmsPeaks object. + +## Changes in version 1.51.2 + +- As suggested by Jan Stanstrup, do not error if a centWave ROI + contains no data, closes #90 + +## Changes in version 1.51.1 + +- Fix incorrrect indexing getEIC function reported by Will Edmands, closes #92 + + +# xcms 1.49 + +## Changes in version 1.49.7 + +- Fix documentation warnings. + +## Changes in version 1.49.6 + +- Peak Picking function findPeaks.centWaveWithPredictedIsotopeROIs() and + findPeaks.addPredictedIsotopeFeatures(), which allow more sensitive detection + of isotope features. + +## Changes in version 1.49.5 + +- Some documentation updates. +- Preparation for a new binning function + +## Changes in version 1.49.4 + +- Fix getXcmsRaw that would prevent retention time correction to be applied + (issue #44 reported by Aleksandr). + +## Changes in version 1.49.3 + +- updateObject method for xcmsSet. +- xcms uses now BiocParallel for parallel processing. All other parallel + processing functions have been deprecated. +- Added missing package imports. +- Fix bug in fillPeaksChromPar referencing a non-existing variables i and + object. +- Fix bug in group.nearest: variable scoreList was mis-spelled (coreList). +- Remove all DUP = FALSE from the .C calls as they are ignored anyways. +- Re-organization of class, function and method definitions in R-files. +- Use roxygen2 to manage the DESCRIPTION's collate field. + +## Changes in version 1.49.2 + +- Initial support for exporint mzTab format. Since Changes are + still to be expected, xcms:::writeMzTab() is not yet exported. + +## Changes in version 1.49.1 + +- The raw CDF/mzXML/mzData/mzML is assumed to have scans sorted by m/z. + Instead of throwing an "m/z sort assumption violated !" error, + the data is re-read and on-demand sorted by m/z. + + +# xcms 1.47 + +## Changes in version 1.47.3 + +- Disable parallel processing in unit tests causing a timeout + on BioC build machines + +## Changes in version 1.47.2 + +- Fix problem in getEIC on xcmsSet objects reported by Alan Smith in issue #7 and + add a RUnit test case to test for this (test.issue7 in runit.getEIC.R). +- Changed some unnecessary warnings into messages. + +## Changes in version 1.47.2 + +- Disabled parallel processing in unit tests +- migrate dependencies from ncdf -> ncdf4 + + +# xcms 1.45 + +## Changes in version 1.45.7 + +- Disabled Rmpi support and usage on Windows + +## Changes in version 1.45.6 + +- J. Rainer implemented a [ method that allows to subset an xcmsSet. +- Fixed a problem in split.xcmsSet that did not split the phenoData properly. + Added some details to the documentation of xcmsSet-class. + +## Changes in version 1.45.5 + +- The sampclass method for xcmsSet will now return the content of the + column "class" from the data.frame in the phenoData slot, or if not + present, the interaction of all factors (columns) of that data.frame. +- The sampclass<- method replaces the content of the "class" column in + the phenoData data.frame. If a data.frame is submitted, the interaction + of its columns is calculated and stored into the "class" column. +- Fixed a bug that resulted in a cryptic error message + when no input files are available to the xcmsSet function. + +## Changes in version 1.45.4 + +- Fixed a bug in the levelplot method for xcmsSet. + +## Changes in version 1.45.3 + +- xcmsSet now allows phenoData to be an AnnotatedDataFrame. +- new slots for xcmsRaw: + - mslevel: store the mslevel parameter submitted to xcmsRaw. + - scanrange: store the scanrange parameter submitted to xcmsRaw. +- new slots for xcmsSet: + - mslevel: stores the mslevel argument from the xcmsSet method. + - scanrange: to keep track of the scanrange argument of the xcmsSet method. +- new methods for xcmsRaw: + - levelplot: similar to the image method, plots m/z vs RT with color coded + intensities. + - mslevel: returns the value for the .mslevel slot. For downstream + compatibility, this method returns NULL if the object does not have the same + named slot. + - profinfo: same functionality as the profinfo method for xcmsSet. + - scanrange: returns the value for the scanrange slot. For downstream + compatibility, this method returns NULL if the object does not have the same + named slot. +- new methods for xcmsSet: + - getXcmsRaw: returns a xcmsRaw object for one or more files in the xcmsSet, + eventually applying retention time correction etc. + - levelplot: similar to the image method, plots m/z vs RT with color coded + intensities. Allows in addition to highlight identified peaks. + - mslevel: returns the value for the mslevel slot. For downstream + compatibility, this method returns NULL if the object does not have the same + named slot. + - profMethod: same functionality as the profMethod method of xcmsRaw. + - profStep: same functionality as the profStep method of xcmsRaw. + - scanrange: returns the value for the scanrange slot. For downstream + compatibility, this method returns NULL if the object does not have the same + named slot. +- show method for xcmsSet updated to display also informations about the mslevel + and scanrange. +- Elaborated some documentation entries. +- rtrange and mzrange for xcmsRaw method plotEIC use by default the full RT and + m/z range. +- Added arguments "lty" and "add" to plotEIC method for xcmsRaw. +- getEIC without specifying mzrange returns the ion chromatogram for the full + m/z range (i.e. the base peak chromatogram). +- Checking if phenoData is a data.frame or AnnotatedDataFrame and throw an error + otherwise. +- xcmsSet getEIC method for water Lock mass corrected files for a subset of + files did not evaluate whether the specified files were corrected. + +## Changes in version 1.45.2 + +- The xcms split() function now accepts factors that are shorter than the number + of samples in the xcmsSet, following more closely the standard split() + behaviour + +## Changes in version 1.45.1 + +- plotrt now allows col to be a vector of color definition, + same as the plots for retcor methods. +- Added $ method to access phenoData columns in a eSet/ExpressionSet like + manner. +- Allow to use the "parallel" package for parallel processing of the functions + xcmsSet and fillPeaks.chrom. +- Thanks to J. Rainer! + + +# xcms 1.43 + +## Changes in version 1.43.3 + +- Give a more verbose error message when file not found + +## Changes in version 1.43.2 + +- Use ProtGenerics, adapted xcms peaks() + +## Changes in version 1.43.1 + +- function plotQC() for plotting various QC plots on RT and m/z + + +# xcms 1.41 + +## Changes in version 1.41.1 + +- fix sampclass generation from phenoData if some combinations of factors don't + exist +- disable parallel code in manpages to avoid issues on BioC windows build farm + machines + + +# xcms 1.39 + +## Changes in version 1.39.6 + +- Massifquant reports the maximum intensity for each isotope trace (peak). This + is useful for interactive parameter optimization. +- Major memory reduction in parallel fillPeaks() thanks to Jan Stanstrup. Now + using an environment to mirror gvals to each list item in the very large + argList. + +## Changes in version 1.39.4 + +- Fixed write.cdf(), which had an intensity offset of +1, added a unit test + +## Changes in version 1.39.3 + +- New R-devel check unload better. Lingering ramp code removed, import from + mzR. Cleaned up other errors in package check. + +## Changes in version 1.39.1 + +- Updated doubleMatrix c code to allow for larger profile matrixes +- Moved inst/doc to vignettes + + +# xcms 1.37 + +## Changes in version 1.37.6 + +- Introducing write.mzQuantML(xcmsSet) to export the peak list and grouped + matrix to the PSI format mzQuantML (see http://www.psidev.info/mzquantml) +- Add Brigham Young University to LICENSE file for copyright purposes. +- Add copyright information display when running findPeaks.massifquant() + within xcmsRaw.R +- Clean and update documentation for findPeaks.massifquant-methods.Rd +- Remove unused parameters in findKalmanROIs() within xcmsRaw.R + +## Changes in version 1.37.5 + +- fixed bug in retcor.obiwarp where the scanrange of the first sample would be + checked instead of the center sample + +## Changes in version 1.37.4 + +- Skip t-test in diffreport() if one class has less than 2 samples. + +## Changes in version 1.37.3 + +- fixed bug in patternVsRowScore (group.nearest) that was introduced by the + modifications in rev 65169 and caused features to be aligned that were far + outside the given m/z and retention time windows. + +## Changes in version 1.37.1 + +- fixed fillPeaks, which 1) dropped non-standard columns and 2) failed if + nothing to do, based on patches by Tony Larson. + +## Changes in version 1.37.1 + +- Introducing msn2xcmsRaw, to allow findPeaks() on MS2 and MSn data + + +# xcms 1.35 + +## Changes in version 1.35.7 + +- fixed indexing bug in group.nearest, which under certain circumstances caused + all peaks in the first sample to be ignored (reported by Tony Larson) + +## Changes in version 1.35.6 + +- Obiwarp retention time alignment error-ed if scanrange was used as a parameter + setting during xcmsSet/peak detection The method now tries to automatically + find the set scanrange and uses this range for alignment. + +## Changes in version 1.35.4 + +- Introducing parallel fillPeaks +- Replace snow requirement with minimum R version 2.14.0 + +## Changes in version 1.35.3 + +- if group.density was used with very low minfrac settings (< 0.5) it did not + return all feature groups, but only those that include features from at least + 50% of samples in a group. This limitation was removed. + +## Changes in version 1.35.2 + +- Behind the scenes xcms now uses the xcmsSource class to read raw data. This + allows e.g. to write a class that pulls raw data from e.g. a database +- massifquant: simplified logic structure of Tracker::claimDataIdx resolved + failure on new test case. +- massifquant: reporting features data structure compatible with multiple sample + comparison within XCMS. + +## Changes in version 1.35.1 + +- The mzData export is now much faster and uses less memory + + +# xcms 1.33 + +## Changes in version 1.33.16 + +- diffreport and plotEIC have a new parameter mzdec, with is the number of + decimal places of the m/z values in the EIC plot title + +## Changes in version 1.33.16 + +- Lock mass gap filler now works with netCDF lock mass function file to find the + exact times of the scans and works with the newer Waters MS instruments. + +## Changes in version 1.33.15 + +- scanrage is now honoured in xcmsSet, also when in parallel mode + +## Changes in version 1.33.14 + +- scanrage is now honoured in xcmsRaw, and consequently also in + xcmsSet(matchedFilter), where previously it was ignored. + +## Changes in version 1.33.13 + +- write.cdf() has been fixed to write files AMDIS can read + +## Changes in version 1.33.12 + +- write.mzData adds Polarity to the file if available + +## Changes in version 1.33.11 + +- centWave uses a new method to estimate local noise which improves detection of + closely spaced peaks +- group.mzClust was failing when result had one peak + +# xcms 1.32 and before + +For more details and all changes before May 2012 please see the (now +discontinued) CHANGELOG in the source package (inst/ folder). + +## CHANGED BEHAVIOUR since Version 1.32: + +Other Changes since Version 1.32: +- improved mzData writing, now includes MSn spectra and less verbose. +- improved netCDF writing, but not yet good enough for AMDIS + +## CHANGED BEHAVIOUR since Version 1.14: + +- centWave may report a smaller set of peaks, due to a small bug + in the ROI algorithm some features with mass deviation > ppm were retained. + +Other Changes since Version 1.14: + +- New method for grouping: an algorithm inspired by mzMine + group(method="nearest") has been implemented. It is slower + than group(method="density"). It can individually group + close-eluting peaks of very similar mass + +- New method for retention time correction: + The retcor(method="obiwarp") algorithm operates on the raw data, + and thus allows to correct runs without well-behaving + peak groups, or without peak picking at all. + +- fillPeaks(method="MSW") is now also available + for direct infusion spectra. The findPeaks(method="MSW") + now returns several intensities, and correctly reports + mzmin and mzmax for peaks. + +- centWave now uses dynamic memory allocation, needs much less memory, + and these BUF related errors should be a thing of the past. + +- centWave gains an optional argument "noise", + which is useful for data that was centroided without any intensity threshold, + centroids with intensity < "noise" are omitted from ROI detection + +- the fillPeaks() methods now remember which was + an observed, and which was a "filled" peak. + +- For direct infusion spectra diffreport() now shows + the raw peak shapes, and also indicated "real" and "filled" peaks. + +- xcmsRaw can now filter for positive/negative spectra, + if the file includes both polarities. xcmsSet() can pass + the polarity to contain positive/negative peaks only. diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 3cea0d758..b688023c4 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -19,14 +19,14 @@ setGeneric("addProcessHistory", function(object, ...) #' @description #' #' The `adjustRtime` method(s) perform retention time correction (alignment) -#' between chromatograms of different samples. Alignment is performed by defaul -#' on MS level 1 data. Retention times of spectra from other MS levels, if -#' present, are subsequently adjusted based on the adjusted retention times -#' of the MS1 spectra. Note that calling `adjustRtime` on a *xcms* result object -#' will remove any eventually present previous alignment results as well as -#' any correspondence analysis results. To run a second round of alignment, -#' raw retention times need to be replaced with adjusted ones using the -#' [applyAdjustedRtime()] function. +#' between chromatograms of different samples/dataset. Alignment is performed +#' by default on MS level 1 data. Retention times of spectra from other MS +#' levels, if present, are subsequently adjusted based on the adjusted +#' retention times of the MS1 spectra. Note that calling `adjustRtime` on a +#' *xcms* result object will remove any eventually present previous alignment +#' results as well as any correspondence analysis results. To run a second +#' round of alignment, raw retention times need to be replaced with adjusted +#' ones using the [applyAdjustedRtime()] function. #' #' The alignment method can be specified (and configured) using a dedicated #' `param` argument. @@ -40,7 +40,7 @@ setGeneric("addProcessHistory", function(object, ...) #' The alignment is performed directly on the [profile-matrix] and can hence #' be performed independently of the peak detection or peak grouping. #' -#' - `PeakGroupsParam`: performs retention time correctoin based on the +#' - `PeakGroupsParam`: performs retention time correction based on the #' alignment of features defined in all/most samples (corresponding to #' *house keeping compounds* or marker compounds) (Smith 2006). First the #' retention time deviation of these features is described by fitting either a @@ -60,6 +60,15 @@ setGeneric("addProcessHistory", function(object, ...) #' in `param`. See also [do_adjustRtime_peakGroups()] for the core API #' function. #' +#' - `LamaParama`: This function performs retention time correction by aligning +#' chromatographic data to an external reference dataset (concept and initial +#' implementation by Carl Brunius). The process involves identifying and +#' aligning peaks within the experimental chromatographic data, represented +#' as an `XcmsExperiment` object, to a predefined set of landmark features +#' called "lamas". These landmark features are characterized by their +#' mass-to-charge ratio (m/z) and retention time. see [LamaParama()] for more +#' information on the method. +#' #' @section Subset-based alignment: #' #' All alignment methods allow to perform the retention time correction on a @@ -189,9 +198,9 @@ setGeneric("addProcessHistory", function(object, ...) #' be used to interpolate corrected retention times for all peak groups. #' Can be either `"loess"` or `"linear"`. #' -#' @param span For `PeakGroupsParam`: `numeric(1)` defining the degree of -#' smoothing (if `smooth = "loess"`). This parameter is passed to the -#' internal call to [loess()]. +#' @param span For `PeakGroupsParam`: `numeric(1)` defining +#' the degree of smoothing (if `smooth = "loess"`). This parameter is +#' passed to the internal call to [loess()]. #' #' @param subset For `ObiwarpParam` and `PeakGroupsParam`: `integer` with the #' indices of samples within the experiment on which the alignment models @@ -206,7 +215,7 @@ setGeneric("addProcessHistory", function(object, ...) #' #' @param value For all assignment methods: the value to set/replace. #' -#' @param x An `ObiwarpParam` or `PeakGroupsParam` object. +#' @param x An `ObiwarpParam`, `PeakGroupsParam` or `LamaParama` object. #' #' @param ... ignored. #' @@ -219,7 +228,8 @@ setGeneric("addProcessHistory", function(object, ...) #' `XcmsExperiment` with the adjusted retention times stored in an new #' *spectra variable* `rtime_adjusted` in the object's `spectra`. #' -#' `ObiwarpParam` and `PeakGroupsParam` return the respective parameter object. +#' `ObiwarpParam`, `PeakGroupsParam` and `LamaParama` return the respective +#' parameter object. #' #' `adjustRtimeGroups` returns a `matrix` with the retention times of *marker* #' features in each sample (each row one feature, each row one sample). @@ -230,7 +240,7 @@ setGeneric("addProcessHistory", function(object, ...) #' #' @seealso [plotAdjustedRtime()] for visualization of alignment results. #' -#' @author Colin Smith, Johannes Rainer +#' @author Colin Smith, Johannes Rainer, Philippine Louail, Carl Brunius #' #' @references #' @@ -321,6 +331,9 @@ setGeneric("checkBack<-", function(object, value) standardGeneric("checkBack<-") #' @examples #' #' ## Load a test data set with detected peaks +#' library(MSnbase) +#' library(xcms) +#' library(MsExperiment) #' faahko_sub <- loadXcmsData("faahko_sub2") #' #' ## Get EICs for every detected chromatographic peak @@ -698,6 +711,8 @@ setGeneric("family<-", function(object, value) standardGeneric("family<-")) #' @examples #' #' ## Load a test data set with detected peaks +#' library(xcms) +#' library(MsExperiment) #' faahko_sub <- loadXcmsData("faahko_sub2") #' #' ## Disable parallel processing for this example @@ -999,6 +1014,8 @@ setGeneric("filepaths<-", function(object, value) standardGeneric("filepaths<-") #' @examples #' #' ## Load a test data set with identified chromatographic peaks +#' library(xcms) +#' library(MsExperiment) #' res <- loadXcmsData("faahko_sub2") #' #' ## Disable parallel processing for this example @@ -1006,7 +1023,7 @@ setGeneric("filepaths<-", function(object, value) standardGeneric("filepaths<-") #' #' ## Perform the correspondence. We assign all samples to the same group. #' res <- groupChromPeaks(res, -#' param = PeakDensityParam(sampleGroups = rep(1, length(fileNames(res))))) +#' param = PeakDensityParam(sampleGroups = rep(1, length(res)))) #' #' ## For how many features do we lack an integrated peak signal? #' sum(is.na(featureValues(res))) @@ -1378,6 +1395,8 @@ setGeneric("group", function(object, ...) standardGeneric("group")) #' #' @param value Replacement value for `<-` methods. #' +#' @param x The parameter object. +#' #' @param ... Optional parameters. #' #' @return For `groupChromPeaks`: either an [XcmsExperiment()] or [XCMSnExp()] @@ -1895,6 +1914,8 @@ setGeneric("reconstructChromPeakSpectra", function(object, ...) #' @examples #' #' ## Load a test data set with detected peaks +#' library(xcms) +#' library(MsExperiment) #' faahko_sub <- loadXcmsData("faahko_sub2") #' #' ## Disable parallel processing for this example @@ -2046,6 +2067,8 @@ setGeneric("stitch.netCDF.new", function(object, lockMass) standardGeneric("stit #' @examples #' #' ## Load a test data set with detected peaks +#' library(xcms) +#' library(MsExperiment) #' faahko_sub <- loadXcmsData("faahko_sub2") #' #' ## Set up parameter to save as .RData file diff --git a/R/DataClasses.R b/R/DataClasses.R index a48dfcb7a..fab0cc414 100644 --- a/R/DataClasses.R +++ b/R/DataClasses.R @@ -452,6 +452,8 @@ setClass("XProcessHistory", #' bell curve. See https://github.com/sneumann/xcms/pull/685 and #' https://doi.org/10.1186/s12859-023-05533-4 for more information. #' +#' @param x The parameter object. +#' #' @details #' #' The centWave algorithm is most suitable for high resolution @@ -487,7 +489,7 @@ setClass("XProcessHistory", #' detection in purely chromatographic data. #' #' @references -#' Ralf Tautenhahn, Christoph B\"{o}ttcher, and Steffen Neumann "Highly +#' Ralf Tautenhahn, Christoph Böttcher, and Steffen Neumann "Highly #' sensitive feature detection for high resolution LC/MS" \emph{BMC Bioinformatics} #' 2008, 9:504 #' @@ -517,13 +519,14 @@ NULL #' cwp #' #' ## Perform the peak detection using centWave on some of the files from the -#' ## faahKO package. Files are read using the readMSData from the MSnbase -#' ## package +#' ## faahKO package. Files are read using the `readMsExperiment` function +#' ## from the MsExperiment package #' library(faahKO) #' library(xcms) +#' library(MsExperiment) #' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, #' full.names = TRUE) -#' raw_data <- readMSData(fls[1], mode = "onDisk") +#' raw_data <- readMsExperiment(fls[1]) #' #' ## Perform the peak detection using the settings defined above. #' res <- findChromPeaks(raw_data, param = cwp) @@ -892,7 +895,7 @@ setClass("MatchedFilterParam", #' centWave algorithm, which includes wavelet estimation. #' #' @details This algorithm's performance has been tested rigorously -#' on high resolution LC/{OrbiTrap, TOF}-MS data in centroid mode. +#' on high resolution LC/(OrbiTrap, TOF)-MS data in centroid mode. #' Simultaneous kalman filters identify chromatographic peaks and calculate #' their area under the curve. The default parameters are set to operate on #' a complex LC-MS Orbitrap sample. Users will find it useful to do some @@ -1108,6 +1111,7 @@ NULL #' #' @examples #' +#' library(MSnbase) #' ## Create a MSWParam object #' mp <- MSWParam() #' ## Change snthresh parameter @@ -1467,6 +1471,45 @@ setClass("PeakGroupsParam", else TRUE }) +setClass("LamaParama", + slots = c(lamas = "matrix", + method = "character", + span = "numeric", + outlierTolerance = "numeric", + zeroWeight = "numeric", + ppm = "numeric", + tolerance = "numeric", + toleranceRt = "numeric", + bs = "character", + rtMap = "list", + nChromPeaks = "numeric"), + contains = "Param", + prototype = prototype( + lamas = matrix(ncol = 2, nrow = 0), + method = "loess", + span = 0.5, + outlierTolerance = 3, + zeroWeight = 10, + ppm = 20, + tolerance = 0, + toleranceRt = 20, + bs = "tp", + rtMap = list(), + nChromPeaks = numeric()), + validity = function(object) { + msg <- NULL + if (!nrow(object@lamas)) + msg <- c(msg, paste0("'lamas' cannot be empty")) + else { + } + if (length(object@method) > 1 | + !all(object@method %in% c("gam", "loess"))) + msg <- c(msg, paste0("'method' has to be either \"", + "gam\" or \"loess\"!")) + msg + }) + + setClass("ObiwarpParam", slots = c(binSize = "numeric", centerSample = "integer", @@ -1797,6 +1840,7 @@ setClass("MsFeatureData", contains = c("environment"), #' @examples #' #' ## Load a test data set with detected peaks +#' library(MSnbase) #' data(faahko_sub) #' ## Update the path to the files for the local system #' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") diff --git a/R/XcmsExperiment.R b/R/XcmsExperiment.R index 7b3672046..1c86f0496 100644 --- a/R/XcmsExperiment.R +++ b/R/XcmsExperiment.R @@ -1357,6 +1357,66 @@ setMethod( object }) +#'@rdname LamaParama +setMethod( + "adjustRtime", signature(object = "XcmsExperiment", param = "LamaParama"), + function(object, param, BPPARAM = bpparam(), ...) { + if (!hasChromPeaks(object)) + stop("'object' needs to have detected chromPeaks. ", + "Run 'findChromPeaks()' first") + if (hasAdjustedRtime(object)) + stop("Alignment results already present. Please either remove ", + "them with 'dropAdjustedRtime' in order to perform an ", + "alternative, new, alignment, or use 'applyAdjustedRtime'", + " prior 'adjustRtime' to perform a second round of ", + "alignment.") + fidx <- as.factor(fromFile(object)) + rt_raw <- split(rtime(object), fidx) + idx <- seq_along(object) + + # Check if user as ran matching lama vs chrompeaks beforehand + if (length(param@rtMap) == 0) + param <- matchLamasChromPeaks(object, param) + rtMap <- param@rtMap + if (length(rtMap) != length(object)) + stop("Mismatch between the number of files matched to lamas: ", + length(rtMap), " and files in the object: ", length(object)) + + # Make model and adjust retention for each file + rt_adj <- bpmapply(rtMap, rt_raw, idx, FUN = function(x, y, i, param) { + if (nrow(x) >= 10) { # too strict ? Gam always throws error when less than that and loess does not work that well either. + .adjust_rt_model(y, method = param@method, + rt_map = x, span = param@span, + resid_ratio = param@outlierTolerance, + zero_weight = param@zeroWeight, + bs = param@bs) + } else { + warning("Too few chrompeaks could be assigned to external", + " reference peaks (lamas) for sample ", i, + ". Skipping alignment for this sample.") + y + } + }, SIMPLIFY = FALSE, BPPARAM = BPPARAM, MoreArgs = list(param = param)) + + # post processing housekeeping steps + pt <- vapply(object@processHistory, processType, character(1)) + idx_pg <- .match_last(.PROCSTEP.PEAK.GROUPING, pt, + nomatch = -1L) + if (idx_pg > 0) + ph <- object@processHistory[idx_pg] + else ph <- list() + object <- dropFeatureDefinitions(object) + object@spectra$rtime_adjusted <- unlist(rt_adj, use.names = FALSE) + object@chromPeaks <-.applyRtAdjToChromPeaks( + .chromPeaks(object), rtraw = rt_raw, rtadj = rt_adj) + xph <- XProcessHistory( + param = param, type. = .PROCSTEP.RTIME.CORRECTION, + fileIndex = seq_along(object)) + object@processHistory <- c(object@processHistory, ph, list(xph)) + validObject(object) + object + }) + #' @rdname XcmsExperiment setMethod("dropAdjustedRtime", "XcmsExperiment", function(object) { if (!hasAdjustedRtime(object)) diff --git a/R/do_adjustRtime-functions.R b/R/do_adjustRtime-functions.R index 80aaf825d..c45b0940a 100644 --- a/R/do_adjustRtime-functions.R +++ b/R/do_adjustRtime-functions.R @@ -582,3 +582,382 @@ adjustRtimeSubset <- function(rtraw, rtadj, subset, } rtadj } + +########################################################### +###### LamaParama + +#' @title Landmark-based alignment: aligning a dataset against an external +#' reference +#' +#' @aliases LamaParama-class +#' +#' @description +#' Alignment is achieved using the ['adjustRtime()'] method with a `param` of +#' class `LamaParama`. This method corrects retention time by aligning +#' chromatographic data with an external reference dataset. +#' +#' Chromatographic peaks in the experimental data are first matched to +#' predefined (external) landmark features based on their mass-to-charge ratio +#' and retention time and subsequently the data is aligned by minimizing the +#' differences in retention times between the matched chromatographic peaks and +#' lamas. This adjustment is performed file by file. +#' +#' Adjustable parameters such as `ppm`, `tolerance`, and `toleranceRt` define +#' acceptable deviations during the matching process. It's crucial to note that +#' only lamas and chromatographic peaks exhibiting a one-to-one mapping are +#' considered when estimating retention time shifts. If a file has no peaks +#' matching with lamas, no adjustment will be performed, and the the retention +#' times will be returned as-is. Users can evaluate this matching, for example, +#' by checking the number of matches and ranges of the matching peaks, by first +#' running `[matchLamasChromPeaks()]`. +#' +#' Different warping methods are available; users can choose to fit a *loess* +#' (`method = "loess"`, the default) or a *gam* (`method = "gam"`) between the +#' reference data points and observed matching ChromPeaks. Additional +#' parameters such as `span`, `weight`, `outlierTolerance`, `zeroWeight`, +#' and `bs` are specific to these models. These parameters offer flexibility +#' in fine-tuning how the matching chromatographic peaks are fitted to the +#' lamas, thereby generating a model to align the overall retention time for +#' a single file. +#' +#' Other functions related to this method: +#' +#' - `LamaParama()`: return the respective parameter object for alignment +#' using `adjustRtime()` function. It is also the input for the functions +#' listed below. +#' +#' - `matchLamasChromPeaks()`: quickly matches each file's ChromPeaks +#' to Lamas, allowing the user to evaluate the matches for each file. +#' +#' - `summarizeLamaMatch()`: generates a summary of the `LamaParama` method. +#' See below for the details of the return object. +#' +#' - `matchedRtimes()`: Access the list of `data.frame` saved in the +#' `LamaParama` object, generated by the `matchLamasChromPeaks()` function. +#' +#' - `plot()`:plot the chromatographic peaks versus the reference lamas as +#' well as the fitting line for the chosen model type. The user can decide +#' what file to inspect by specifying the assay number with the parameter +#' `assay` +#' +#' +#' @param BPPARAM For `matchLamasChromPeaks()`: parallel processing setup. +#' Defaults to `BPPARAM = bpparam()`. See [bpparam()] for more information. +#' +#' @param bs For `LamaParama()`: `character(1)` defining the GAM smoothing method. +#' (defaults to thin plate, `bs = "tp"`) +#' +#' @param colPoints For `plot()`: color for the plotting of the datapoint. +#' +#' @param colFit For `plot()`: color of the fitting line. +#' +#' @param index For `plot()`: `numeric(1)` index of the file that should be +#' plotted. +#' +#' @param lamas For `LamaParama`: `matrix` or `data.frame` with the m/z and +#' retention times values of features (as first and second column) from the +#' external dataset on which the alignment will be based on. +#' +#' +#' @param method For `LamaParama`:`character(1)` with the type of warping. +#' Either `method = "gam"` or `method = "loess"` (default). +#' +#' @param object An object of class `XcmsExperiment` with defined ChromPeaks. +#' +#' @param outlierTolerance For `LamaParama`: `numeric(1)` defining the settings +#' for outlier removal during the fitting. By default +#' (with `outlierTolerance = 3`), all data points with absolute residuals +#' larger than 3 times the mean absolute residual of all data points from +#' the first, initial fit, are removed from the final model fit. +#' +#' @param param An object of class `LamaParama` that will later be used for +#' adjustment using the `[adjustRtime()]` function. +#' +#' @param ppm For `LamaParama`: `numeric(1)` defining the m/z-relative maximal +#' allowed difference in m/z between `lamas` and chromatographic peaks. Used +#' for the mapping of identified chromatographic peaks and lamas. +#' +#' @param span For `LamaParama`: `numeric(1)` defining +#' the degree of smoothing (`method = "loess"`). This parameter is passed +#' to the internal call to [loess()]. +#' +#' @param tolerance For `LamaParama`: `numeric(1)` defining the absolute +#' acceptable difference in m/z between lamas and chromatographic peaks. +#' Used for the mapping of identified chromatographic peaks and `lamas`. +#' +#' @param toleranceRt For `LamaParama`: `numeric(1)` defining the absolute +#' acceptable difference in retention time between lamas and +#' chromatographic peaks. Used for the mapping of identified chromatographic +#' peaks and `lamas`. +#' +#' @param x For `plot()`: object of class `LamaParama` to be plotted. +#' +#' @param xlab,ylab For `plot()`: x- and y-axis labels. +#' +#' @param zeroWeight For `LamaParama`: `numeric(1)`: defines the weight of the +#' first data point (i.e. retention times of the first lama-chromatographic +#' peak pair). Values larger than 1 reduce warping problems in the early RT +#' range. +#' +#' @param ... For `plot()`: extra parameters to be passed to the function. +#' +#' @return +#' For `matchLamasChromPeaks()`: A `LamaParama` object with new slot `rtMap` +#' composed of a list of matrices representing the 1:1 matches between Lamas +#' (ref) and ChromPeaks (obs). To access this, `matchedRtimes()` can be used. +#' +#' For `matchedRtimes()`: A list of `data.frame` representing matches +#' between chromPeaks and `lamas` for each files. +#' +#' For `summarizeLamaMatch()`:A `data.frame` with: +#' +#' - "Total_peaks": total number of chromatographic peaks in the file. +#' +#' - "Matched_peak": The number of matched peaks to Lamas. +#' +#' - "Total_Lamas": Total number of Lamas. +#' +#' - "Model_summary": `summary.loess` or `summary.gam` object for each file. +#' +#' @examples +#' ## load test and reference datasets +#' ref <- loadXcmsData("xmse") +#' tst <- loadXcmsData("faahko_sub2") +#' +#' ## create lamas input from the reference dataset +#' library(MsExperiment) +#' f <- sampleData(ref)$sample_type +#' f[f == "QC"] <- NA +#' ref <- filterFeatures(ref, PercentMissingFilter(threshold = 0, f = f)) +#' ref_mz_rt <- featureDefinitions(ref)[, c("mzmed","rtmed")] +#' +#' ## Set up the LamaParama object +#' param <- LamaParama(lamas = ref_mz_rt, method = "loess", span = 0.5, +#' outlierTolerance = 3, zeroWeight = 10, ppm = 20, +#' tolerance = 0, toleranceRt = 20, bs = "tp") +#' +#' ## input into `adjustRtime()` +#' tst_adjusted <- adjustRtime(tst, param = param) +#' +#' ## run diagnostic functions to pre-evaluate alignment +#' param <- matchLamasChromPeaks(tst, param = param) +#' mtch <- matchedRtimes(param) +#' +#' ## Access summary of matches and model information +#' summary <- summarizeLamaMatch(param) +#' +#' ##coverage for each file +#' summary$Matched_peaks / summary$Total_peaks * 100 +#' +#' ## Access the information on the model of for the first file +#' summary$model_summary[[1]] +#' +#' @note +#' If there are no matches when using `matchLamasChromPeaks()`, the file +#' retention will not be adjusted when calling [adjustRtime()] with the same +#' `LamaParama` and `XcmsExperiment` object. +#' +#' To see examples on how to utilize this methods and its functionality, +#' see the vignette. +#' +#' @author Carl Brunius, Philippine Louail +#' +#' @name LamaParama +NULL + +#' @description +#' +#' Match anchor (reference) peaks to chrompeaks based on rt and m/z. Peaks with +#' multiple matches are excluded. +#' +#' @param obs_peaks `matrix` of 2 columns with the m/z and retention times of +#' chrompeaks of one data file. +#' +#' @param ref_anchors `matrix` of (external) reference anchor peaks with +#' columns representing the anchor peaks' m/z and retention times (in that +#' order!). Possibly generated by the `.getAnchorePeaks()` function. +#' +#' @param ppm maximal acceptable (m/z relative) difference in m/z values for +#' peaks to be considered matching. +#' +#' @param tolerance maximal absolute difference in m/z values for peaks to be +#' considered matching. +#' +#' @param toleranceRt maximl absolute difference in retention times for peaks +#' to be considered matching. +#' +#' @return a `data.frame` with columns `"ref"` and `"obs"` with the retention +#' times of the pairs of matched peaks. This `data.frame` can be used +#' in `.adjust_rt_model`'s parameter `rt_raw`. +#' +#' @author Johannes Rainer, Philippine Louail +#' +#' @importFrom MetaboCoreUtils mclosest +#' +#' @noRd +.match_reference_anchors <- function(obs_peaks, ref_anchors, ppm = 20, + tolerance = 0, toleranceRt = 5) { + idx <- mclosest(obs_peaks, ref_anchors, + ppm = c(ppm, 0), tolerance = c(tolerance, toleranceRt)) + nna <- !is.na(idx) + idx <- cbind(obs = which(nna), ref = idx[nna]) + dups <- idx[duplicated(idx[, 2L]), 2L] + idx <- idx[!idx[, 2L] %in% dups, , drop = FALSE] + data.frame(ref = ref_anchors[idx[, 2L], 2L], + obs = obs_peaks[idx[, 1L], 2L]) +} + +#' @description +#' +#' Compute a model representing the relationship between observed and reference +#' retention times (`rt_map`) and adjust the raw retention times (`rt_raw`) +#' based on this. +#' +#' @param rt_map `data.frame` with *reference* retention times of LaMas and +#' *observed* retention times of matching peaks in the same sample from +#' which the retention times in `rt_raw` are. +#' +#' @author Carl Brunius, Philippine Louail +#' +#' @importFrom stats predict +#' +#' @importFrom MsCoreUtils force_sorted +#' +#' @noRd +.adjust_rt_model <- function(rt_raw, + method = c("loess", "gam"), + rt_map, + span = 0.5, + resid_ratio = 3, + zero_weight = 10, + bs = "tp") { + model <- .rt_model(method = method, + rt_map, span = span, + resid_ratio = resid_ratio, + zero_weight = zero_weight, + bs = bs) + adj <- predict(model, newdata = data.frame(obs = rt_raw)) + if (is.unsorted(adj, na.rm = TRUE)){ + warning("Adjusted retention times are not sorted, linear ", + "interpolation will be performed for the unsorted data points") + adj <- force_sorted(adj) + } + idx <- which(rt_raw < min(rt_map$obs)) + lidx <- length(idx) + if (lidx) + adj[idx] <- rt_raw[idx] - (rt_raw[lidx + 1L] - adj[lidx + 1L]) + idx <- which(rt_raw > max(rt_map$obs)) + if (length(idx)) + adj[idx] <- rt_raw[idx] - (rt_raw[idx[1L] - 1L] - adj[idx[1L] - 1L]) + adj +} + +#' @description +#' +#' Get a model representing the differences between observed and reference +#' retention times (parameter `rt_map`). After an initial fit, the model is +#' re-fitted excluding potential outliers. +#' +#' @param rt_map `data.frame` with the observed (column `"obs"`) and reference +#' (column `"ref"`) retention time pairs. +#' +#' @importFrom stats loess resid +#' +#' @author Carl Brunius, Philippine Louail +#' +#' @noRd +.rt_model <- function(method = c("loess", "gam"), + rt_map, span = 0.5, + resid_ratio = 3, + zero_weight = 10, + bs = "tp"){ + rt_map <- rt_map[order(rt_map$obs), ] + # add first row of c(0,0) to set a fix timepoint. + rt_map <- rbind(c(0,0), rt_map) + weights <- rep(1, nrow(rt_map)) + weights[1L] <- zero_weight + + if (method == "gam") { + .check_gam_library() + model <- mgcv::gam(ref ~ s(obs, bs = bs), weights = weights, + data = rt_map) + } else + model <- loess(ref ~ obs, data = rt_map, span = span, + weights = weights) + ## compute outliers + SSq <- resid(model)^2 + meanSSq <- mean(SSq) + not_outlier <- (SSq / meanSSq) < resid_ratio + + ## re-run only if there is outliers and keep the zero. + if (any(!not_outlier)){ + not_outlier[1] <- TRUE + rt_map <- rt_map[not_outlier, , drop = FALSE] + weights <- weights[not_outlier] + if (method == "gam") { + model <- mgcv::gam(ref ~ s(obs, bs = "tp"), weights = weights, + data = rt_map) + } else { + model <- loess(ref ~ obs, data = rt_map, span = span, + weights = weights) + } + } + model +} + +#' Simple helper to ensure gam package is installed - if needed. +#' +#' @noRd +.check_gam_library <- function() { + if (!requireNamespace("mgcv", quietly = TRUE)) + stop("'method = \"gam\"' requires the package 'mgcv'. Please ", + "install with 'BiocInstaller::install(\"mgcv\")'") +} + +#' @export +#' @rdname LamaParama +matchLamasChromPeaks <- function(object, param, BPPARAM = bpparam()){ + if (!hasChromPeaks(object)) + stop("'object' needs to have detected ChromPeaks. ", + "Run 'findChromPeaks()' first.") + f <- factor(chromPeaks(object)[, "sample"], levels = seq_along(object)) + cp_raw <- split.data.frame(chromPeaks(object)[, c("mz", "rt")], f) + param@nChromPeaks <- vapply(cp_raw, nrow, numeric(1)) + param@rtMap <- bplapply(cp_raw, FUN = function(x) { + .match_reference_anchors(obs_peaks = x, ref_anchors = param@lamas, + ppm = param@ppm, tolerance = param@tolerance, + toleranceRt = param@toleranceRt)}, + BPPARAM = BPPARAM) + param +} + +#' @export +#' @rdname LamaParama +summarizeLamaMatch <- function(param){ + if (!inherits(param, "LamaParama")) + stop("The input needs to be of class 'LamaParama'") + if (length(param@nChromPeaks) == 0 || length(param@rtMap) == 0) + stop("Summary inputs are missing. Please run `matchLamasChromPeaks` ", + "first.") + res <- data.frame(Total_peaks = param@nChromPeaks, + Matched_peaks = vapply(param@rtMap, nrow, numeric(1)), + Total_lamas = nrow(param@lamas)) + res_model <- lapply(param@rtMap, function(x){ + s <- summary(.rt_model(method = param@method, + rt_map= x, span = param@span, + resid_ratio = param@outlierTolerance, + zero_weight = param@zeroWeight, + bs = param@bs)) + }) + res$Model_summary <- res_model + res +} + +#' @export +#' @rdname LamaParama +matchedRtimes <- function(param){ + if(!inherits(param, "LamaParama")) + stop("The inputs need to be of class 'LamaParama'") + rtMap <- param@rtMap + rtMap +} diff --git a/R/do_findChromPeaks-functions.R b/R/do_findChromPeaks-functions.R index cb89383e2..9767c8f06 100644 --- a/R/do_findChromPeaks-functions.R +++ b/R/do_findChromPeaks-functions.R @@ -68,7 +68,8 @@ #' @family core peak detection functions #' #' @references -#' Ralf Tautenhahn, Christoph B\"{o}ttcher, and Steffen Neumann "Highly +#' +#' Ralf Tautenhahn, Christoph Böttcher, and Steffen Neumann "Highly #' sensitive feature detection for high resolution LC/MS" #' \emph{BMC Bioinformatics} 2008, 9:504 #' @@ -163,7 +164,7 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, verboseColumns = verboseColumns, roiList = roiList, firstBaselineCheck = firstBaselineCheck, roiScales = roiScales, sleep = sleep, - extendLengthMSW = extendLengthMSW, + extendLengthMSW = extendLengthMSW, verboseBetaColumns = verboseBetaColumns) } else { ## message("DEBUG: using modified centWave.") @@ -579,7 +580,7 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, lm <- .narrow_rt_boundaries(lm, d) lm_seq <- lm[1]:lm[2] pd <- d[lm_seq] - + # Implement a fit of a skewed gaussian (beta distribution) # for peak shape and within-peak signal-to-noise ratio # See https://doi.org/10.1186/s12859-023-05533-4 and @@ -1265,7 +1266,7 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, #' by specifying \code{withWave = TRUE}. #' #' @details This algorithm's performance has been tested rigorously -#' on high resolution LC/{OrbiTrap, TOF}-MS data in centroid mode. +#' on high resolution LC/(OrbiTrap, TOF)-MS data in centroid mode. #' Simultaneous kalman filters identify peaks and calculate their #' area under the curve. The default parameters are set to operate on #' a complex LC-MS Orbitrap sample. Users will find it useful to do some @@ -2673,7 +2674,7 @@ do_findChromPeaks_centWaveWithPredIsoROIs <- verboseColumns = FALSE, roiList = list(), firstBaselineCheck = TRUE, roiScales = NULL, snthreshIsoROIs = 6.25, maxCharge = 3, maxIso = 5, mzIntervalExtension = TRUE, - polarity = "unknown", extendLengthMSW = FALSE, + polarity = "unknown", extendLengthMSW = FALSE, verboseBetaColumns = FALSE) { ## Input argument checking: most of it will be done in ## do_findChromPeaks_centWave @@ -3271,9 +3272,9 @@ peaksWithMatchedFilter <- function(int, rt, fwhm = 30, sigma = fwhm / 2.3548, #' @examples #' #' ## Reading a file +#' library(MsExperiment) #' library(xcms) -#' od <- readMSData(system.file("cdf/KO/ko15.CDF", package = "faahKO"), -#' mode = "onDisk") +#' od <- readMsExperiment(system.file("cdf/KO/ko15.CDF", package = "faahKO")) #' #' ## Extract chromatographic data for a small m/z range #' mzr <- c(272.1, 272.2) @@ -3633,9 +3634,9 @@ peaksWithCentWave <- function(int, rt, #' #' @examples #' +#' library(MsExperiment) #' library(xcms) -#' od <- readMSData(system.file("cdf/KO/ko15.CDF", package = "faahKO"), -#' mode = "onDisk") +#' od <- readMsExperiment(system.file("cdf/KO/ko15.CDF", package = "faahKO")) #' #' ## Extract chromatographic data for a small m/z range #' chr <- chromatogram(od, mz = c(272.1, 272.3))[1, 1] @@ -3731,7 +3732,7 @@ peaksWithCentWave <- function(int, rt, #' @author William Kumler #' #' @noRd -.get_beta_values <- function(intensity, rtime = seq_along(intensity), +.get_beta_values <- function(intensity, rtime = seq_along(intensity), skews=c(3, 3.5, 4, 4.5, 5), zero.rm = TRUE){ if (zero.rm) { ## remove 0 or NA intensities @@ -3744,7 +3745,7 @@ peaksWithCentWave <- function(int, rt, beta_snr <- NA } else { beta_sequence <- rep(.scale_zero_one(rtime), each=length(skews)) - beta_vals <- t(matrix(dbeta(beta_sequence, shape1 = skews, shape2 = 5), + beta_vals <- t(matrix(dbeta(beta_sequence, shape1 = skews, shape2 = 5), nrow = length(skews))) # matplot(beta_vals) beta_cors <- cor(intensity, beta_vals) diff --git a/R/do_groupChromPeaks-functions.R b/R/do_groupChromPeaks-functions.R index 9bf3668bc..1170ef44d 100644 --- a/R/do_groupChromPeaks-functions.R +++ b/R/do_groupChromPeaks-functions.R @@ -75,6 +75,8 @@ #' #' @examples #' ## Load the test file +#' library(xcms) +#' library(MsExperiment) #' faahko_sub <- loadXcmsData("faahko_sub2") #' #' ## Disable parallel processing for this example diff --git a/R/functions-Chromatogram.R b/R/functions-Chromatogram.R index 0cede6580..6e90531de 100644 --- a/R/functions-Chromatogram.R +++ b/R/functions-Chromatogram.R @@ -62,8 +62,8 @@ #' #' @examples #' -#' xd <- readMSData(system.file('cdf/KO/ko15.CDF', package = "faahKO"), -#' mode = "onDisk") +#' library(MsExperiment) +#' xd <- readMsExperiment(system.file('cdf/KO/ko15.CDF', package = "faahKO")) #' chr <- chromatogram(xd, mz = c(-0.5, 0.5) + 453.2) #' xchr <- findChromPeaks(chr, param = CentWaveParam(snthresh = 0)) #' plot(xchr) diff --git a/R/functions-OnDiskMSnExp.R b/R/functions-OnDiskMSnExp.R index d0c16d5e4..b774b4000 100644 --- a/R/functions-OnDiskMSnExp.R +++ b/R/functions-OnDiskMSnExp.R @@ -703,6 +703,7 @@ setReplaceMethod("dirname", "OnDiskMSnExp", function(path, value) { #' #' @examples #' +#' library(MSnbase) #' fl <- system.file("TripleTOF-SWATH", "PestMix1_DDA.mzML", package = "msdata") #' pest_dda <- readMSData(fl, mode = "onDisk") #' res <- .estimate_prec_intensity(pest_dda) diff --git a/R/functions-Params.R b/R/functions-Params.R index 71c05f89d..104702ecd 100644 --- a/R/functions-Params.R +++ b/R/functions-Params.R @@ -57,6 +57,8 @@ return("nearest peaks") if (is(x, "PeakGroupsParam")) return("peak groups") + if (is(x, "LamaParama")) + return("lama") if (is(x, "ObiwarpParam")) return("obiwarp") return("unknown") @@ -272,6 +274,37 @@ PeakGroupsParam <- function(minFraction = 0.9, extraPeaks = 1, subset = as.integer(subset), subsetAdjust = subsetAdjust) } +#' @rdname LamaParama +LamaParama <- function(lamas = matrix(ncol = 2, nrow = 0, + dimnames = list(NULL, c("mz", "rt"))), + method = c("loess", "gam"), + span = 0.5, + outlierTolerance = 3, + zeroWeight = 10, + ppm = 20, + tolerance = 0, + toleranceRt = 5, + bs = "tp") { + method <- match.arg(method) + if (method == "gam") + .check_gam_library() + if (is.data.frame(lamas)) + lamas <- as.matrix(lamas) + if (ncol(lamas) != 2) + stop("the 'lamas' matrix needs to have two columns, composed of m/z, ", + "and retention time of the peaks from the reference dataset, in this ", + "order") + new("LamaParama", lamas = lamas, + method = method, + span = span, + outlierTolerance = outlierTolerance, + zeroWeight = zeroWeight, + ppm = ppm, + tolerance = tolerance, + toleranceRt = toleranceRt, + bs = bs) +} + #' @rdname adjustRtime ObiwarpParam <- function(binSize = 1, centerSample = integer(), response = 1L, distFun = "cor_opt", gapInit = numeric(), diff --git a/R/functions-XCMSnExp.R b/R/functions-XCMSnExp.R index fdbf738c7..2bd445721 100644 --- a/R/functions-XCMSnExp.R +++ b/R/functions-XCMSnExp.R @@ -796,6 +796,7 @@ adjustRtimePeakGroups <- function(object, param = PeakGroupsParam(), #' @examples #' #' ## Load a test data set with detected peaks +#' library(MSnbase) #' data(faahko_sub) #' ## Update the path to the files for the local system #' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") @@ -964,6 +965,7 @@ isCalibrated <- function(object) { #' @examples #' #' ## Load a test data set with detected peaks +#' library(MSnbase) #' data(faahko_sub) #' ## Update the path to the files for the local system #' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") @@ -1202,6 +1204,7 @@ featureSummary <- function(x, group, perSampleCounts = FALSE, #' @examples #' #' ## Load a test data set with detected peaks +#' library(MSnbase) #' data(faahko_sub) #' ## Update the path to the files for the local system #' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") @@ -1854,6 +1857,7 @@ setMethod("hasFilledChromPeaks", "XCMSnExp", function(object) { #' #' @examples #' +#' library(MSnbase) #' xd <- readMSData(system.file('cdf/KO/ko15.CDF', package = "faahKO"), #' mode = "onDisk") #' xd <- findChromPeaks(xd, param = CentWaveParam()) diff --git a/R/functions-XChromatogram.R b/R/functions-XChromatogram.R index bfdd1f47c..b17f591dd 100644 --- a/R/functions-XChromatogram.R +++ b/R/functions-XChromatogram.R @@ -141,6 +141,7 @@ #' #' @examples #' +#' library(MSnbase) #' ## Create a XChromatogram object #' pks <- matrix(nrow = 1, ncol = 6) #' colnames(pks) <- c("rt", "rtmin", "rtmax", "into", "maxo", "sn") diff --git a/R/functions-XChromatograms.R b/R/functions-XChromatograms.R index 3f41fa408..119a29a71 100644 --- a/R/functions-XChromatograms.R +++ b/R/functions-XChromatograms.R @@ -43,6 +43,7 @@ #' ## ---- Creation of XChromatograms ---- #' ## #' ## Create a XChromatograms from Chromatogram objects +#' library(MSnbase) #' dta <- list(Chromatogram(rtime = 1:7, c(3, 4, 6, 12, 8, 3, 2)), #' Chromatogram(1:10, c(4, 6, 3, 4, 7, 13, 43, 34, 23, 9))) #' diff --git a/R/functions-imputation.R b/R/functions-imputation.R index 9ae458329..3475db216 100644 --- a/R/functions-imputation.R +++ b/R/functions-imputation.R @@ -22,6 +22,7 @@ #' #' @examples #' +#' library(MSnbase) #' library(faahKO) #' data("faahko") #' @@ -111,6 +112,7 @@ imputeRowMin <- function(x, min_fraction = 1/2) { #' @examples #' #' library(faahKO) +#' library(MSnbase) #' data("faahko") #' #' xset <- group(faahko) diff --git a/R/functions-utils.R b/R/functions-utils.R index 8df509b18..77d236ebf 100644 --- a/R/functions-utils.R +++ b/R/functions-utils.R @@ -307,8 +307,9 @@ weightedMeanAroundApex <- function(x, w = rep(1, length(x)), i = 1) { #' #' @description #' -#' **UPDATE**: please use `plot(x, type = "XIC")` from the `MSnbase` package -#' instead. See examples below. +#' **UPDATE**: please use `plot()` from the `MsExperiment` or +#' `plot(x, type = "XIC")` from the `MSnbase` package instead. See examples +#' in the vignette for more information. #' #' The `plotMsData` creates a plot that combines an (base peak ) #' extracted ion chromatogram on top (rt against intensity) and a plot of @@ -338,19 +339,6 @@ weightedMeanAroundApex <- function(x, w = rep(1, length(x)), i = 1) { #' #' @md #' -#' @examples -#' -#' ## Read two files from the faahKO package -#' library(faahKO) -#' cdfs <- dir(system.file("cdf", package = "faahKO"), full.names = TRUE, -#' recursive = TRUE)[1:2] -#' raw_data <- readMSData(cdfs, mode = "onDisk") -#' -#' ## Subset the object to a rt and mz range and plot the data. -#' raw_data |> -#' filterRt(rt = c(2700, 2900)) |> -#' filterMz(mz = c(334.9, 335.1)) |> -#' plot(type = "XIC") plotMsData <- function(x, main = "", cex = 1, mfrow = c(2, 1), grid.color = "lightgrey", colramp = colorRampPalette( diff --git a/R/functions-xcmsSet.R b/R/functions-xcmsSet.R index 8eea47700..373ee20f0 100644 --- a/R/functions-xcmsSet.R +++ b/R/functions-xcmsSet.R @@ -418,12 +418,15 @@ phenoDataFromPaths <- function(paths) { ## patternVsRowScore patternVsRowScore <- function(currPeak, parameters, mplenv) { + if (!requireNamespace("RANN", quietly = TRUE)) + stop("The use of 'patternVsRowScore' requires package 'RANN'. Please ", + "install with 'BiocInstaller::install(\"RANN\")'") mplistmeanCurr <- mplenv$mplistmean[, c("mz", "rt")] mplistmeanCurr[, "mz"] <- mplistmeanCurr[, "mz"] * parameters$mzVsRTBalance peakmatCurr <- mplenv$peakmat[currPeak, c("mz", "rt"), drop = FALSE] peakmatCurr[, "mz"] <- peakmatCurr[, "mz"] * parameters$mzVsRTBalance - nnDist <- nn2(mplistmeanCurr, peakmatCurr[, c("mz", "rt"), drop = FALSE], + nnDist <- RANN::nn2(mplistmeanCurr, peakmatCurr[, c("mz", "rt"), drop = FALSE], k = min(length(mplistmeanCurr[, 1]), parameters$knn)) scoreListcurr <- data.frame(score = numeric(0), diff --git a/R/methods-Chromatogram.R b/R/methods-Chromatogram.R index f2b7463fa..cdff7f10a 100644 --- a/R/methods-Chromatogram.R +++ b/R/methods-Chromatogram.R @@ -45,6 +45,7 @@ #' #' @examples #' +#' library(MSnbase) #' ## Loading a test data set with identified chromatographic peaks #' faahko_sub <- loadXcmsData("faahko_sub2") #' faahko_sub <- filterRt(faahko_sub, c(2500, 3700)) @@ -192,6 +193,7 @@ setMethod("findChromPeaks", signature(object = "Chromatogram", #' #' @examples #' +#' library(MSnbase) #' chr1 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), #' intensity = c(5, 29, 50, NA, 100, 12, 3, 4, 1, 3)) #' chr2 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), @@ -261,6 +263,7 @@ setMethod("correlate", signature = c(x = "Chromatogram", y = "Chromatogram"), #' #' @examples #' +#' library(MSnbase) #' chr <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), #' intensity = c(5, 29, 50, NA, 100, 12, 3, 4, 1, 3)) #' diff --git a/R/methods-MChromatograms.R b/R/methods-MChromatograms.R index 0b7f13e5c..78bd598ff 100644 --- a/R/methods-MChromatograms.R +++ b/R/methods-MChromatograms.R @@ -4,11 +4,14 @@ #' #' @examples #' +#' library(MsExperiment) +#' library(xcms) #' ## Perform peak detection on an MChromatograms object -#' od3 <- readMSData(c(system.file("cdf/KO/ko15.CDF", package = "faahKO"), +#' +#' fls <- c(system.file("cdf/KO/ko15.CDF", package = "faahKO"), #' system.file("cdf/KO/ko16.CDF", package = "faahKO"), -#' system.file("cdf/KO/ko18.CDF", package = "faahKO")), -#' mode = "onDisk") +#' system.file("cdf/KO/ko18.CDF", package = "faahKO")) +#' od3 <- readMsExperiment(fls) #' #' ## Disable parallel processing for this example #' register(SerialParam()) @@ -190,6 +193,7 @@ setMethod("removeIntensity", "MChromatograms", #' #' @examples #' +#' library(MSnbase) #' chr1 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), #' intensity = c(5, 29, 50, NA, 100, 12, 3, 4, 1, 3)) #' chr2 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), @@ -365,6 +369,7 @@ setMethod("filterColumnsKeepTop", "MChromatograms", #' #' ## Load preprocessed data and extract EICs for some features. #' library(xcms) +#' library(MSnbase) #' xdata <- loadXcmsData() #' data(xdata) #' ## Update the path to the files for the local system diff --git a/R/methods-Params.R b/R/methods-Params.R index d29016e79..e08ffda42 100644 --- a/R/methods-Params.R +++ b/R/methods-Params.R @@ -1249,6 +1249,26 @@ setReplaceMethod("subsetAdjust", "PeakGroupsParam", function(object, value) { return(object) }) +############################################################ +## LamaParama + +#' @rdname LamaParama +setMethod("plot", signature(x = "LamaParama"), + function(x, index = 1L, + colPoints = "#00000060", + colFit = "#00000080", + xlab = "Matched Chromatographic peaks", + ylab = "Lamas",...){ + model <- .rt_model(method = x@method, + rt_map= x@rtMap[[index]], span = x@span, + resid_ratio = x@outlierTolerance, + zero_weight = x@zeroWeight, + bs = x@bs) + datap <- x@rtMap[[index]] + plot(datap, type = "p", xlab = xlab, ylab = ylab, col = colPoints, ...) + points(model, type = "l", col = colFit) +}) + ############################################################ ## ObiwarpParam @@ -1478,3 +1498,15 @@ setReplaceMethod("ppm", "FillChromPeaksParam", function(object, value) { if (validObject(object)) return(object) }) + +#' @rdname findChromPeaks-centWave +setMethod("as.list", "CentWaveParam", function(x, ...) { + x <- updateObject(x) + callNextMethod(x) +}) + +#' @rdname groupChromPeaks +setMethod("as.list", "PeakDensityParam", function(x, ...) { + x <- updateObject(x) + callNextMethod(x) +}) diff --git a/R/methods-XCMSnExp.R b/R/methods-XCMSnExp.R index 449a5f4a2..516e8e3f4 100644 --- a/R/methods-XCMSnExp.R +++ b/R/methods-XCMSnExp.R @@ -947,6 +947,7 @@ setMethod("dropAdjustedRtime", "XCMSnExp", function(object) { #' @examples #' #' ## Loading a test data set with identified chromatographic peaks +#' library(MSnbase) #' data(faahko_sub) #' ## Update the path to the files for the local system #' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") @@ -2085,6 +2086,7 @@ setMethod("featureValues", "XCMSnExp", function(object, method = c("medret", #' @examples #' #' ## Load a test data set with identified chromatographic peaks +#' library(MSnbase) #' data(faahko_sub) #' ## Update the path to the files for the local system #' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") @@ -2799,6 +2801,7 @@ setMethod("dropFilledChromPeaks", "XCMSnExp", function(object) { #' @examples #' #' ## Load a test data set with detected peaks +#' library(MSnbase) #' data(faahko_sub) #' ## Update the path to the files for the local system #' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") @@ -3169,6 +3172,7 @@ setMethod("writeMSData", signature(object = "XCMSnExp", file = "character"), #' @examples #' #' ## Load a test data set with detected peaks +#' library(MSnbase) #' data(faahko_sub) #' ## Update the path to the files for the local system #' dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") diff --git a/R/methods-group-features.R b/R/methods-group-features.R index d27b15b69..eb7482821 100644 --- a/R/methods-group-features.R +++ b/R/methods-group-features.R @@ -126,6 +126,7 @@ setReplaceMethod("featureGroups", "XcmsResult", function(object, value) { #' @examples #' #' library(MsFeatures) +#' library(MsExperiment) #' ## Load a test data set with detected peaks #' faahko_sub <- loadXcmsData("faahko_sub2") #' @@ -253,6 +254,7 @@ setMethod( #' @examples #' #' library(MsFeatures) +#' library(MsExperiment) #' ## Load a test data set with detected peaks #' faahko_sub <- loadXcmsData("faahko_sub2") #' @@ -788,6 +790,7 @@ plotFeatureGroups <- function(x, xlim = numeric(), ylim = numeric(), #' @examples #' #' library(MsFeatures) +#' library(MsExperiment) #' ## Load a test data set with detected peaks #' faahko_sub <- loadXcmsData("faahko_sub2") #' diff --git a/R/methods-xcmsSet.R b/R/methods-xcmsSet.R index 91b5a3c51..f2cb04913 100644 --- a/R/methods-xcmsSet.R +++ b/R/methods-xcmsSet.R @@ -1556,6 +1556,9 @@ setMethod("diffreport", "xcmsSet", function(object, h = 480, w = 640, mzdec=2, missing = numeric(), ...) { + if (!requireNamespace("multtest", quietly = TRUE)) + stop("The use of 'diffreport' requires package 'multtest'. Please ", + "install with 'BiocInstaller::install(\"multtest\")'") if ( nrow(object@groups)<1 || length(object@groupidx) <1) { stop("No group information. Use group().") } diff --git a/R/writemztab.R b/R/writemztab.R index 873ba10dd..8aa294b9d 100644 --- a/R/writemztab.R +++ b/R/writemztab.R @@ -10,15 +10,15 @@ ## utility function, combining different length objects into a dataframe ## padding short columns with NA rbind.ragged <- function(x, y) { - x <- as.data.frame(x) - y <- as.data.frame(y) + x <- as.data.frame(x) + y <- as.data.frame(y) colnames(x) <- seq(1:ncol(x)) colnames(y) <- seq(1:ncol(y)) - rbind.fill(x,y) + suppressWarnings(rbindFill(x,y)) } cvTerm <- function(CV, accession, name, value) { - paste("[", paste(CV, accession, name, value, sep=", "), "]", sep="") + paste("[", paste(CV, accession, name, value, sep=", "), "]", sep="") } #cvTerm("MS", "MS:1000443", "Mass Analyzer Type", "Orbitrap") @@ -52,9 +52,9 @@ mzTabHeader <- function(mztab, version, mode, type, description, xset) { samples <- paste("sample[", 1:length(runs), "]", sep="") names(samples) <- paste("assay[", 1:length(runs), "]-sample_ref", sep="") - + sampleDesc <- sampnames(xset) - names(sampleDesc) <- paste("sample[", 1:length(runs), "]-description", sep="") + names(sampleDesc) <- paste("sample[", 1:length(runs), "]-description", sep="") filetypes <- mzFileType(runs) names(runs) <- paste("ms_run[", 1:length(filetypes), "]-format", sep="") @@ -64,10 +64,10 @@ mzTabHeader <- function(mztab, version, mode, type, description, xset) { variableAssays <- unlist(tapply(seq(along=sampclass(xset)), sampclass(xset), function(x) paste(paste("assay[",x,"]", sep=""), collapse=","))) names(variableAssays) <- paste("study_variable[", seq(along=variableAssays), "]-assay_refs", sep="") - + variableDescriptions <- unique(as.character(sampclass(xset))) names(variableDescriptions) <- paste("study_variable[", seq(along=variableDescriptions), "]-description", sep="") - + mztab <- rbind.ragged(mztab, mzTabAddComment("Meta data section")) mztab <- rbind.ragged(mztab, mzTabAddTagValue("MTD", c("mzTab-version"=version, @@ -78,7 +78,7 @@ mzTabHeader <- function(mztab, version, mode, type, description, xset) { mztab <- rbind.ragged(mztab, mzTabAddTagValue("MTD", samples)) mztab <- rbind.ragged(mztab, mzTabAddTagValue("MTD", sampleDesc)) - + mztab <- rbind.ragged(mztab, mzTabAddTagValue("MTD", assays)) mztab <- rbind.ragged(mztab, mzTabAddTagValue("MTD", variableAssays)) mztab <- rbind.ragged(mztab, mzTabAddTagValue("MTD", variableDescriptions)) @@ -95,7 +95,7 @@ mzTabAddTagValue <- function(section, values) { mzTabAddValues <- function(mztab, headers, section, values) { h <- cbind.data.frame(headers, t(names(values)), stringsAsFactors=FALSE) v <- cbind.data.frame(section, values, stringsAsFactors=FALSE) - + mztab <- rbind.ragged(mztab, h) mztab <- rbind.ragged(mztab, v) } @@ -103,14 +103,14 @@ mzTabAddValues <- function(mztab, headers, section, values) { mzTabAddSME <- function(mztab, xset) { runs <- seq(along=sampnames(xset)) variables <- seq(along=levels(sampclass(xset))) - + idHeaders <- c("identifier", "description", "chemical_formula", "smiles", "inchi_key", "database", "database_version") searchHeaders1 <- c("search_engine", "best_search_engine_score") - + searchHeaders2 <- paste("search_engine_score_ms_run[", runs, "]", sep="") - + searchHeaders3 <- c("reliability", "modifications") featureHeaders <- c("charge", "adduct_ion", "exp_mass_to_charge", @@ -118,8 +118,8 @@ mzTabAddSME <- function(mztab, xset) { "retention_time_window", "uri", "spectra_ref") abundanceAssayHeaders <- paste("smallmolecule_abundance_assay[", runs, "]", sep="") - - + + abundanceVariableHeaders <- unlist(lapply(variables, FUN=function(v) c(paste("smallmolecule_abundance_study_variable[", v,"]", sep=""), paste("smallmolecule_abundance_stddev_study_variable[", v,"]", sep=""), paste("smallmolecule_abundance_std_error_study_variable[", v,"]", sep="")))) @@ -132,7 +132,7 @@ mzTabAddSME <- function(mztab, xset) { g <- groups(xset) v <- groupval(xset, value="into") - + result <- as.data.frame(matrix(character(0), ncol=length(headers), nrow=nrow(g))) colnames(result) <- headers @@ -140,14 +140,14 @@ mzTabAddSME <- function(mztab, xset) { #variableAssays <- unlist(tapply(seq(along=sampclass(xset)), sampclass(xset), function(x) # paste(paste("assay[",x,"]", sep=""), collapse=","))) #names(variableAssays) <- paste("study_variable[", seq(along=variableAssays), "]-assay_refs", sep="") - - + + result[,"retention_time"] <- g[,"rtmed"] result[,"exp_mass_to_charge"] <- g[,"mzmed"] result[, grepl("smallmolecule_abundance_assay", colnames(result))] <- v - + mztab <- mzTabAddValues(mztab, "SEH", "SME", result) - + } writeMzTab <- function(object, filename) { @@ -177,7 +177,7 @@ if (FALSE) { xset=xs) mzt <- xcms:::mzTabAddSME(mzt, xs) ##mzt - + xcms:::writeMzTab(mzt, "faahKO.mzTab") } @@ -186,7 +186,3 @@ if (FALSE) { library(MSnbase) m <- readMzTabData("faahKO.mzTab") } - - - - diff --git a/data/faahko_sub.RData b/data/faahko_sub.RData index f62256251..54490a1b0 100644 Binary files a/data/faahko_sub.RData and b/data/faahko_sub.RData differ diff --git a/data/faahko_sub2.RData b/data/faahko_sub2.RData index c58a21c5d..d7a9ffd61 100644 Binary files a/data/faahko_sub2.RData and b/data/faahko_sub2.RData differ diff --git a/data/xdata.RData b/data/xdata.RData index 4ba55713f..2cfa0b849 100644 Binary files a/data/xdata.RData and b/data/xdata.RData differ diff --git a/data/xmse.RData b/data/xmse.RData index 1e9a8b600..edbd17057 100644 Binary files a/data/xmse.RData and b/data/xmse.RData differ diff --git a/inst/NEWS b/inst/NEWS index 3db6b92f8..716c2a218 100644 --- a/inst/NEWS +++ b/inst/NEWS @@ -1,9 +1,30 @@ -Changes in version 4.1.10 +Changes in version 4.1.13 ---------------------- - Add a `param =` to generic function `storeResults`: `MzTabParam` to save an `XcmsExperiment` object as .mztabm file type. +Changes in version 4.1.12 +---------------------- + +- Implementation of the `LamaParama` class and method for the `adjustRtime()` + function. Allowing alignment of a dataset based on landmarks (lamas) from an + external reference dataset. +- Implementation of related user-level function `matchLamasChromPeaks()`, + `summarizeMatchLama()` and `plot(LamaParama)` which allows for evaluation of + matching between lamas and chromPeaks. + +Changes in version 4.1.11 +---------------------- + +- Clean up of required and suggested packages and namespace imports. + +Changes in version 4.1.10 +---------------------- + +- Ensure backward compatibility for parameter objects that gained additional + slots. + Changes in version 4.1.9 ---------------------- diff --git a/man/LamaParama.Rd b/man/LamaParama.Rd new file mode 100644 index 000000000..1d5e7904e --- /dev/null +++ b/man/LamaParama.Rd @@ -0,0 +1,216 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/XcmsExperiment.R, R/do_adjustRtime-functions.R, +% R/functions-Params.R, R/methods-Params.R +\name{adjustRtime,XcmsExperiment,LamaParama-method} +\alias{adjustRtime,XcmsExperiment,LamaParama-method} +\alias{LamaParama} +\alias{LamaParama-class} +\alias{matchLamasChromPeaks} +\alias{summarizeLamaMatch} +\alias{matchedRtimes} +\alias{plot,LamaParama,ANY-method} +\title{Landmark-based alignment: aligning a dataset against an external +reference} +\usage{ +\S4method{adjustRtime}{XcmsExperiment,LamaParama}(object, param, BPPARAM = bpparam(), ...) + +matchLamasChromPeaks(object, param, BPPARAM = bpparam()) + +summarizeLamaMatch(param) + +matchedRtimes(param) + +LamaParama( + lamas = matrix(ncol = 2, nrow = 0, dimnames = list(NULL, c("mz", "rt"))), + method = c("loess", "gam"), + span = 0.5, + outlierTolerance = 3, + zeroWeight = 10, + ppm = 20, + tolerance = 0, + toleranceRt = 5, + bs = "tp" +) + +\S4method{plot}{LamaParama,ANY}( + x, + index = 1L, + colPoints = "#00000060", + colFit = "#00000080", + xlab = "Matched Chromatographic peaks", + ylab = "Lamas", + ... +) +} +\arguments{ +\item{object}{An object of class `XcmsExperiment` with defined ChromPeaks.} + +\item{param}{An object of class `LamaParama` that will later be used for +adjustment using the `[adjustRtime()]` function.} + +\item{BPPARAM}{For `matchLamasChromPeaks()`: parallel processing setup. +Defaults to `BPPARAM = bpparam()`. See [bpparam()] for more information.} + +\item{...}{For `plot()`: extra parameters to be passed to the function.} + +\item{lamas}{For `LamaParama`: `matrix` or `data.frame` with the m/z and +retention times values of features (as first and second column) from the +external dataset on which the alignment will be based on.} + +\item{method}{For `LamaParama`:`character(1)` with the type of warping. +Either `method = "gam"` or `method = "loess"` (default).} + +\item{span}{For `LamaParama`: `numeric(1)` defining +the degree of smoothing (`method = "loess"`). This parameter is passed +to the internal call to [loess()].} + +\item{outlierTolerance}{For `LamaParama`: `numeric(1)` defining the settings +for outlier removal during the fitting. By default +(with `outlierTolerance = 3`), all data points with absolute residuals +larger than 3 times the mean absolute residual of all data points from +the first, initial fit, are removed from the final model fit.} + +\item{zeroWeight}{For `LamaParama`: `numeric(1)`: defines the weight of the +first data point (i.e. retention times of the first lama-chromatographic +peak pair). Values larger than 1 reduce warping problems in the early RT +range.} + +\item{ppm}{For `LamaParama`: `numeric(1)` defining the m/z-relative maximal +allowed difference in m/z between `lamas` and chromatographic peaks. Used +for the mapping of identified chromatographic peaks and lamas.} + +\item{tolerance}{For `LamaParama`: `numeric(1)` defining the absolute +acceptable difference in m/z between lamas and chromatographic peaks. +Used for the mapping of identified chromatographic peaks and `lamas`.} + +\item{toleranceRt}{For `LamaParama`: `numeric(1)` defining the absolute +acceptable difference in retention time between lamas and +chromatographic peaks. Used for the mapping of identified chromatographic +peaks and `lamas`.} + +\item{bs}{For `LamaParama()`: `character(1)` defining the GAM smoothing method. +(defaults to thin plate, `bs = "tp"`)} + +\item{x}{For `plot()`: object of class `LamaParama` to be plotted.} + +\item{index}{For `plot()`: `numeric(1)` index of the file that should be +plotted.} + +\item{colPoints}{For `plot()`: color for the plotting of the datapoint.} + +\item{colFit}{For `plot()`: color of the fitting line.} + +\item{xlab, ylab}{For `plot()`: x- and y-axis labels.} +} +\value{ +For `matchLamasChromPeaks()`: A `LamaParama` object with new slot `rtMap` +composed of a list of matrices representing the 1:1 matches between Lamas +(ref) and ChromPeaks (obs). To access this, `matchedRtimes()` can be used. + +For `matchedRtimes()`: A list of `data.frame` representing matches +between chromPeaks and `lamas` for each files. + +For `summarizeLamaMatch()`:A `data.frame` with: + +- "Total_peaks": total number of chromatographic peaks in the file. + +- "Matched_peak": The number of matched peaks to Lamas. + +- "Total_Lamas": Total number of Lamas. + +- "Model_summary": `summary.loess` or `summary.gam` object for each file. +} +\description{ +Alignment is achieved using the ['adjustRtime()'] method with a `param` of +class `LamaParama`. This method corrects retention time by aligning +chromatographic data with an external reference dataset. + +Chromatographic peaks in the experimental data are first matched to +predefined (external) landmark features based on their mass-to-charge ratio +and retention time and subsequently the data is aligned by minimizing the +differences in retention times between the matched chromatographic peaks and +lamas. This adjustment is performed file by file. + +Adjustable parameters such as `ppm`, `tolerance`, and `toleranceRt` define +acceptable deviations during the matching process. It's crucial to note that +only lamas and chromatographic peaks exhibiting a one-to-one mapping are +considered when estimating retention time shifts. If a file has no peaks +matching with lamas, no adjustment will be performed, and the the retention +times will be returned as-is. Users can evaluate this matching, for example, +by checking the number of matches and ranges of the matching peaks, by first +running `[matchLamasChromPeaks()]`. + +Different warping methods are available; users can choose to fit a *loess* +(`method = "loess"`, the default) or a *gam* (`method = "gam"`) between the +reference data points and observed matching ChromPeaks. Additional +parameters such as `span`, `weight`, `outlierTolerance`, `zeroWeight`, +and `bs` are specific to these models. These parameters offer flexibility +in fine-tuning how the matching chromatographic peaks are fitted to the +lamas, thereby generating a model to align the overall retention time for +a single file. + +Other functions related to this method: + + - `LamaParama()`: return the respective parameter object for alignment + using `adjustRtime()` function. It is also the input for the functions + listed below. + + - `matchLamasChromPeaks()`: quickly matches each file's ChromPeaks + to Lamas, allowing the user to evaluate the matches for each file. + + - `summarizeLamaMatch()`: generates a summary of the `LamaParama` method. + See below for the details of the return object. + + - `matchedRtimes()`: Access the list of `data.frame` saved in the + `LamaParama` object, generated by the `matchLamasChromPeaks()` function. + + - `plot()`:plot the chromatographic peaks versus the reference lamas as + well as the fitting line for the chosen model type. The user can decide + what file to inspect by specifying the assay number with the parameter + `assay` +} +\note{ +If there are no matches when using `matchLamasChromPeaks()`, the file +retention will not be adjusted when calling [adjustRtime()] with the same +`LamaParama` and `XcmsExperiment` object. + +To see examples on how to utilize this methods and its functionality, +see the vignette. +} +\examples{ +## load test and reference datasets +ref <- loadXcmsData("xmse") +tst <- loadXcmsData("faahko_sub2") + +## create lamas input from the reference dataset +library(MsExperiment) +f <- sampleData(ref)$sample_type +f[f == "QC"] <- NA +ref <- filterFeatures(ref, PercentMissingFilter(threshold = 0, f = f)) +ref_mz_rt <- featureDefinitions(ref)[, c("mzmed","rtmed")] + +## Set up the LamaParama object +param <- LamaParama(lamas = ref_mz_rt, method = "loess", span = 0.5, + outlierTolerance = 3, zeroWeight = 10, ppm = 20, + tolerance = 0, toleranceRt = 20, bs = "tp") + +## input into `adjustRtime()` +tst_adjusted <- adjustRtime(tst, param = param) + +## run diagnostic functions to pre-evaluate alignment +param <- matchLamasChromPeaks(tst, param = param) +mtch <- matchedRtimes(param) + +## Access summary of matches and model information +summary <- summarizeLamaMatch(param) + +##coverage for each file +summary$Matched_peaks / summary$Total_peaks * 100 + +## Access the information on the model of for the first file +summary$model_summary[[1]] + +} +\author{ +Carl Brunius, Philippine Louail +} diff --git a/man/XCMSnExp-class.Rd b/man/XCMSnExp-class.Rd index 1753ee985..4dc7303ce 100644 --- a/man/XCMSnExp-class.Rd +++ b/man/XCMSnExp-class.Rd @@ -685,6 +685,7 @@ with feature definitions and phenodata information, into a \examples{ ## Load a test data set with detected peaks +library(MSnbase) data(faahko_sub) ## Update the path to the files for the local system dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") diff --git a/man/XCMSnExp-filter-methods.Rd b/man/XCMSnExp-filter-methods.Rd index 98e14def3..ad854af25 100644 --- a/man/XCMSnExp-filter-methods.Rd +++ b/man/XCMSnExp-filter-methods.Rd @@ -191,6 +191,7 @@ the raw retention times with the adjusted ones. \examples{ ## Loading a test data set with identified chromatographic peaks +library(MSnbase) data(faahko_sub) ## Update the path to the files for the local system dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") diff --git a/man/XChromatogram.Rd b/man/XChromatogram.Rd index 72703f9fe..8b2330810 100644 --- a/man/XChromatogram.Rd +++ b/man/XChromatogram.Rd @@ -600,6 +600,7 @@ being samples and rows features. ## ---- Creation of XChromatograms ---- ## ## Create a XChromatograms from Chromatogram objects +library(MSnbase) dta <- list(Chromatogram(rtime = 1:7, c(3, 4, 6, 12, 8, 3, 2)), Chromatogram(1:10, c(4, 6, 3, 4, 7, 13, 43, 34, 23, 9))) @@ -689,6 +690,7 @@ plotChromPeakDensity(res, param = prm) res <- dropFeatureDefinitions(res) hasFeatures(res) +library(MSnbase) ## Create a XChromatogram object pks <- matrix(nrow = 1, ncol = 6) colnames(pks) <- c("rt", "rtmin", "rtmax", "into", "maxo", "sn") diff --git a/man/adjustRtime.Rd b/man/adjustRtime.Rd index 70b8182a0..884a3778a 100644 --- a/man/adjustRtime.Rd +++ b/man/adjustRtime.Rd @@ -259,9 +259,9 @@ sample that are assigned to the group.} be used to interpolate corrected retention times for all peak groups. Can be either \code{"loess"} or \code{"linear"}.} -\item{span}{For \code{PeakGroupsParam}: \code{numeric(1)} defining the degree of -smoothing (if \code{smooth = "loess"}). This parameter is passed to the -internal call to \code{\link[=loess]{loess()}}.} +\item{span}{For \code{PeakGroupsParam}: \code{numeric(1)} defining +the degree of smoothing (if \code{smooth = "loess"}). This parameter is +passed to the internal call to \code{\link[=loess]{loess()}}.} \item{family}{For \code{PeakGroupsParam}: \code{character(1)} defining the method for loess smoothing. Allowed values are \code{"gaussian"} and \code{"symmetric"}. See @@ -331,7 +331,7 @@ initiating an alignment (for local alignment only).} \item{value}{The value for the slot.} -\item{x}{An \code{ObiwarpParam} or \code{PeakGroupsParam} object.} +\item{x}{An \code{ObiwarpParam}, \code{PeakGroupsParam} or \code{LamaParama} object.} } \value{ \code{adjustRtime} on an \code{OnDiskMSnExp} or \code{XCMSnExp} object will return an @@ -341,21 +341,22 @@ initiating an alignment (for local alignment only).} \code{XcmsExperiment} with the adjusted retention times stored in an new \emph{spectra variable} \code{rtime_adjusted} in the object's \code{spectra}. -\code{ObiwarpParam} and \code{PeakGroupsParam} return the respective parameter object. +\code{ObiwarpParam}, \code{PeakGroupsParam} and \code{LamaParama} return the respective +parameter object. \code{adjustRtimeGroups} returns a \code{matrix} with the retention times of \emph{marker} features in each sample (each row one feature, each row one sample). } \description{ The \code{adjustRtime} method(s) perform retention time correction (alignment) -between chromatograms of different samples. Alignment is performed by defaul -on MS level 1 data. Retention times of spectra from other MS levels, if -present, are subsequently adjusted based on the adjusted retention times -of the MS1 spectra. Note that calling \code{adjustRtime} on a \emph{xcms} result object -will remove any eventually present previous alignment results as well as -any correspondence analysis results. To run a second round of alignment, -raw retention times need to be replaced with adjusted ones using the -\code{\link[=applyAdjustedRtime]{applyAdjustedRtime()}} function. +between chromatograms of different samples/dataset. Alignment is performed +by default on MS level 1 data. Retention times of spectra from other MS +levels, if present, are subsequently adjusted based on the adjusted +retention times of the MS1 spectra. Note that calling \code{adjustRtime} on a +\emph{xcms} result object will remove any eventually present previous alignment +results as well as any correspondence analysis results. To run a second +round of alignment, raw retention times need to be replaced with adjusted +ones using the \code{\link[=applyAdjustedRtime]{applyAdjustedRtime()}} function. The alignment method can be specified (and configured) using a dedicated \code{param} argument. @@ -368,7 +369,7 @@ rt data using the \emph{obiwarp} method (Prince (2006)). It is based on the alignment of multiple samples by aligning each against a \emph{center} sample. The alignment is performed directly on the \link{profile-matrix} and can hence be performed independently of the peak detection or peak grouping. -\item \code{PeakGroupsParam}: performs retention time correctoin based on the +\item \code{PeakGroupsParam}: performs retention time correction based on the alignment of features defined in all/most samples (corresponding to \emph{house keeping compounds} or marker compounds) (Smith 2006). First the retention time deviation of these features is described by fitting either a @@ -387,6 +388,14 @@ function is used to define this \code{matrix}. This function identifies peak groups (features) for alignment in \code{object} based on the parameters defined in \code{param}. See also \code{\link[=do_adjustRtime_peakGroups]{do_adjustRtime_peakGroups()}} for the core API function. +\item \code{LamaParama}: This function performs retention time correction by aligning +chromatographic data to an external reference dataset (concept and initial +implementation by Carl Brunius). The process involves identifying and +aligning peaks within the experimental chromatographic data, represented +as an \code{XcmsExperiment} object, to a predefined set of landmark features +called "lamas". These landmark features are characterized by their +mass-to-charge ratio (m/z) and retention time. see \code{\link[=LamaParama]{LamaParama()}} for more +information on the method. } } \section{Subset-based alignment}{ @@ -429,6 +438,6 @@ Nonlinear Peak Alignment, Matching, and Identification" \emph{Anal. Chem.} \code{\link[=plotAdjustedRtime]{plotAdjustedRtime()}} for visualization of alignment results. } \author{ -Colin Smith, Johannes Rainer +Colin Smith, Johannes Rainer, Philippine Louail, Carl Brunius } \concept{retention time correction methods} diff --git a/man/applyAdjustedRtime.Rd b/man/applyAdjustedRtime.Rd index 1006c14ce..9b96133fa 100644 --- a/man/applyAdjustedRtime.Rd +++ b/man/applyAdjustedRtime.Rd @@ -34,6 +34,7 @@ history is preserved. \examples{ ## Load a test data set with detected peaks +library(MSnbase) data(faahko_sub) ## Update the path to the files for the local system dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") diff --git a/man/chromPeakChromatograms.Rd b/man/chromPeakChromatograms.Rd index 276d96025..7fb8663dc 100644 --- a/man/chromPeakChromatograms.Rd +++ b/man/chromPeakChromatograms.Rd @@ -59,6 +59,9 @@ of length equal to the number of chromatographic peaks (and one column). \examples{ ## Load a test data set with detected peaks +library(MSnbase) +library(xcms) +library(MsExperiment) faahko_sub <- loadXcmsData("faahko_sub2") ## Get EICs for every detected chromatographic peak diff --git a/man/chromatogram-method.Rd b/man/chromatogram-method.Rd index 6522024c8..af488d48a 100644 --- a/man/chromatogram-method.Rd +++ b/man/chromatogram-method.Rd @@ -109,6 +109,7 @@ and use these (for the subsetting based on the provided parameter \examples{ ## Load a test data set with identified chromatographic peaks +library(MSnbase) data(faahko_sub) ## Update the path to the files for the local system dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") diff --git a/man/correlate-Chromatogram.Rd b/man/correlate-Chromatogram.Rd index 0bd641072..d5eb143a2 100644 --- a/man/correlate-Chromatogram.Rd +++ b/man/correlate-Chromatogram.Rd @@ -81,6 +81,7 @@ correlation matrix might thus be different. } \examples{ +library(MSnbase) chr1 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), intensity = c(5, 29, 50, NA, 100, 12, 3, 4, 1, 3)) chr2 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), diff --git a/man/do_adjustRtime_peakGroups.Rd b/man/do_adjustRtime_peakGroups.Rd index 28e4297c2..4d0d898ea 100644 --- a/man/do_adjustRtime_peakGroups.Rd +++ b/man/do_adjustRtime_peakGroups.Rd @@ -52,9 +52,9 @@ sample that are assigned to the group.} be used to interpolate corrected retention times for all peak groups. Can be either \code{"loess"} or \code{"linear"}.} -\item{span}{For \code{PeakGroupsParam}: \code{numeric(1)} defining the degree of -smoothing (if \code{smooth = "loess"}). This parameter is passed to the -internal call to \code{\link[=loess]{loess()}}.} +\item{span}{For \code{PeakGroupsParam}: \code{numeric(1)} defining +the degree of smoothing (if \code{smooth = "loess"}). This parameter is +passed to the internal call to \code{\link[=loess]{loess()}}.} \item{family}{For \code{PeakGroupsParam}: \code{character(1)} defining the method for loess smoothing. Allowed values are \code{"gaussian"} and \code{"symmetric"}. See diff --git a/man/do_findChromPeaks_centWave.Rd b/man/do_findChromPeaks_centWave.Rd index 9a91e2df9..4270e893d 100644 --- a/man/do_findChromPeaks_centWave.Rd +++ b/man/do_findChromPeaks_centWave.Rd @@ -219,7 +219,7 @@ res <- do_findChromPeaks_centWave(mz = unlist(mzs), int = unlist(ints), head(res) } \references{ -Ralf Tautenhahn, Christoph B\"{o}ttcher, and Steffen Neumann "Highly +Ralf Tautenhahn, Christoph Böttcher, and Steffen Neumann "Highly sensitive feature detection for high resolution LC/MS" \emph{BMC Bioinformatics} 2008, 9:504 } diff --git a/man/do_findChromPeaks_massifquant.Rd b/man/do_findChromPeaks_massifquant.Rd index 4c6e81951..d8f1cca69 100644 --- a/man/do_findChromPeaks_massifquant.Rd +++ b/man/do_findChromPeaks_massifquant.Rd @@ -155,7 +155,7 @@ Massifquant is a Kalman filter (KF)-based chromatographic peak } \details{ This algorithm's performance has been tested rigorously - on high resolution LC/{OrbiTrap, TOF}-MS data in centroid mode. + on high resolution LC/(OrbiTrap, TOF)-MS data in centroid mode. Simultaneous kalman filters identify peaks and calculate their area under the curve. The default parameters are set to operate on a complex LC-MS Orbitrap sample. Users will find it useful to do some diff --git a/man/do_groupChromPeaks_density.Rd b/man/do_groupChromPeaks_density.Rd index 4b27d8a4e..739e915cb 100644 --- a/man/do_groupChromPeaks_density.Rd +++ b/man/do_groupChromPeaks_density.Rd @@ -109,6 +109,8 @@ accordingly. } \examples{ ## Load the test file +library(xcms) +library(MsExperiment) faahko_sub <- loadXcmsData("faahko_sub2") ## Disable parallel processing for this example diff --git a/man/extractMsData-method.Rd b/man/extractMsData-method.Rd index a413ae3d7..7a1105412 100644 --- a/man/extractMsData-method.Rd +++ b/man/extractMsData-method.Rd @@ -55,6 +55,7 @@ data range if \code{rt} and \code{mz} are not defined). \examples{ ## Load a test data set with detected peaks +library(MSnbase) data(faahko_sub) ## Update the path to the files for the local system dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") diff --git a/man/featureChromatograms.Rd b/man/featureChromatograms.Rd index 8d194e81b..725865883 100644 --- a/man/featureChromatograms.Rd +++ b/man/featureChromatograms.Rd @@ -163,6 +163,8 @@ is smaller if \code{x} contains also filled-in peaks (with \code{fillChromPeaks} \examples{ ## Load a test data set with detected peaks +library(xcms) +library(MsExperiment) faahko_sub <- loadXcmsData("faahko_sub2") ## Disable parallel processing for this example diff --git a/man/fillChromPeaks.Rd b/man/fillChromPeaks.Rd index a57e33e05..25d671608 100644 --- a/man/fillChromPeaks.Rd +++ b/man/fillChromPeaks.Rd @@ -260,6 +260,8 @@ in the matrix returned by the \code{\link[=featureValues]{featureValues()}} meth \examples{ ## Load a test data set with identified chromatographic peaks +library(xcms) +library(MsExperiment) res <- loadXcmsData("faahko_sub2") ## Disable parallel processing for this example @@ -267,7 +269,7 @@ register(SerialParam()) ## Perform the correspondence. We assign all samples to the same group. res <- groupChromPeaks(res, - param = PeakDensityParam(sampleGroups = rep(1, length(fileNames(res))))) + param = PeakDensityParam(sampleGroups = rep(1, length(res)))) ## For how many features do we lack an integrated peak signal? sum(is.na(featureValues(res))) diff --git a/man/filter-MChromatograms.Rd b/man/filter-MChromatograms.Rd index ca96dfa68..2a8fea2d3 100644 --- a/man/filter-MChromatograms.Rd +++ b/man/filter-MChromatograms.Rd @@ -112,6 +112,7 @@ columns by column \code{sortBy = "maxo"} or \code{sortBy = "into"} of the } \examples{ +library(MSnbase) chr1 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), intensity = c(5, 29, 50, NA, 100, 12, 3, 4, 1, 3)) chr2 <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), diff --git a/man/findChromPeaks-Chromatogram-CentWaveParam.Rd b/man/findChromPeaks-Chromatogram-CentWaveParam.Rd index d7d5c2be7..9f342440b 100644 --- a/man/findChromPeaks-Chromatogram-CentWaveParam.Rd +++ b/man/findChromPeaks-Chromatogram-CentWaveParam.Rd @@ -47,6 +47,7 @@ detection artifacts. } \examples{ +library(MSnbase) ## Loading a test data set with identified chromatographic peaks faahko_sub <- loadXcmsData("faahko_sub2") faahko_sub <- filterRt(faahko_sub, c(2500, 3700)) @@ -64,11 +65,14 @@ xchr ## Plot data and identified peaks. plot(xchr) +library(MsExperiment) +library(xcms) ## Perform peak detection on an MChromatograms object -od3 <- readMSData(c(system.file("cdf/KO/ko15.CDF", package = "faahKO"), + +fls <- c(system.file("cdf/KO/ko15.CDF", package = "faahKO"), system.file("cdf/KO/ko16.CDF", package = "faahKO"), - system.file("cdf/KO/ko18.CDF", package = "faahKO")), - mode = "onDisk") + system.file("cdf/KO/ko18.CDF", package = "faahKO")) +od3 <- readMsExperiment(fls) ## Disable parallel processing for this example register(SerialParam()) diff --git a/man/findChromPeaks-centWave.Rd b/man/findChromPeaks-centWave.Rd index f7f2ac351..ec5ac694f 100644 --- a/man/findChromPeaks-centWave.Rd +++ b/man/findChromPeaks-centWave.Rd @@ -59,6 +59,7 @@ \alias{roiScales} \alias{roiScales<-,CentWaveParam-method} \alias{roiScales<-} +\alias{as.list,CentWaveParam-method} \title{Chromatographic peak detection using the centWave method} \usage{ CentWaveParam( @@ -139,6 +140,8 @@ CentWaveParam( \S4method{roiScales}{CentWaveParam}(object) \S4method{roiScales}{CentWaveParam}(object) <- value + +\S4method{as.list}{CentWaveParam}(x, ...) } \arguments{ \item{ppm}{\code{numeric(1)} defining the maximal tolerated m/z deviation in @@ -249,6 +252,8 @@ detection should be performed. Defaults to \code{msLevel = 1}.} \item{value}{The value for the slot.} \item{f}{For \code{integrate}: a \code{CentWaveParam} object.} + +\item{x}{The parameter object.} } \value{ The \code{CentWaveParam} function returns a \code{CentWaveParam} @@ -366,20 +371,21 @@ snthresh(cwp) <- 25 cwp ## Perform the peak detection using centWave on some of the files from the -## faahKO package. Files are read using the readMSData from the MSnbase -## package +## faahKO package. Files are read using the `readMsExperiment` function +## from the MsExperiment package library(faahKO) library(xcms) +library(MsExperiment) fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, full.names = TRUE) -raw_data <- readMSData(fls[1], mode = "onDisk") +raw_data <- readMsExperiment(fls[1]) ## Perform the peak detection using the settings defined above. res <- findChromPeaks(raw_data, param = cwp) head(chromPeaks(res)) } \references{ -Ralf Tautenhahn, Christoph B\"{o}ttcher, and Steffen Neumann "Highly +Ralf Tautenhahn, Christoph Böttcher, and Steffen Neumann "Highly sensitive feature detection for high resolution LC/MS" \emph{BMC Bioinformatics} 2008, 9:504 } diff --git a/man/findChromPeaks-massifquant.Rd b/man/findChromPeaks-massifquant.Rd index 06fb4ec13..385101e3f 100644 --- a/man/findChromPeaks-massifquant.Rd +++ b/man/findChromPeaks-massifquant.Rd @@ -338,7 +338,7 @@ The \code{findChromPeaks,OnDiskMSnExp,MassifquantParam} } \details{ This algorithm's performance has been tested rigorously - on high resolution LC/{OrbiTrap, TOF}-MS data in centroid mode. + on high resolution LC/(OrbiTrap, TOF)-MS data in centroid mode. Simultaneous kalman filters identify chromatographic peaks and calculate their area under the curve. The default parameters are set to operate on a complex LC-MS Orbitrap sample. Users will find it useful to do some diff --git a/man/findPeaks-MSW.Rd b/man/findPeaks-MSW.Rd index d09f6d041..9569fdd00 100644 --- a/man/findPeaks-MSW.Rd +++ b/man/findPeaks-MSW.Rd @@ -273,6 +273,7 @@ These methods and classes are part of the updated and modernized } \examples{ +library(MSnbase) ## Create a MSWParam object mp <- MSWParam() ## Change snthresh parameter diff --git a/man/findPeaks.addPredictedIsotopeFeatures-methods.Rd b/man/findPeaks.addPredictedIsotopeFeatures-methods.Rd index cb03ebefc..5ea4877bc 100644 --- a/man/findPeaks.addPredictedIsotopeFeatures-methods.Rd +++ b/man/findPeaks.addPredictedIsotopeFeatures-methods.Rd @@ -108,7 +108,7 @@ \author{Ralf Tautenhahn} \encoding{UTF-8} \references{ - Ralf Tautenhahn, Christoph B\"{o}ttcher, and Steffen Neumann + Ralf Tautenhahn, Christoph Böttcher, and Steffen Neumann "Highly sensitive feature detection for high resolution LC/MS" BMC Bioinformatics 2008, 9:504\\ Hendrik Treutler and Steffen Neumann. diff --git a/man/findPeaks.centWave-methods.Rd b/man/findPeaks.centWave-methods.Rd index d1734d2bd..811b4beec 100644 --- a/man/findPeaks.centWave-methods.Rd +++ b/man/findPeaks.centWave-methods.Rd @@ -120,7 +120,7 @@ \author{Ralf Tautenhahn} \encoding{UTF-8} \references{ - Ralf Tautenhahn, Christoph B\"{o}ttcher, and Steffen Neumann + Ralf Tautenhahn, Christoph Böttcher, and Steffen Neumann "Highly sensitive feature detection for high resolution LC/MS" BMC Bioinformatics 2008, 9:504 } diff --git a/man/findPeaks.centWaveWithPredictedIsotopeROIs-methods.Rd b/man/findPeaks.centWaveWithPredictedIsotopeROIs-methods.Rd index ed8cd0cf0..f2baba7ea 100644 --- a/man/findPeaks.centWaveWithPredictedIsotopeROIs-methods.Rd +++ b/man/findPeaks.centWaveWithPredictedIsotopeROIs-methods.Rd @@ -125,7 +125,7 @@ \author{Ralf Tautenhahn} \encoding{UTF-8} \references{ - Ralf Tautenhahn, Christoph B\"{o}ttcher, and Steffen Neumann + Ralf Tautenhahn, Christoph Böttcher, and Steffen Neumann "Highly sensitive feature detection for high resolution LC/MS" BMC Bioinformatics 2008, 9:504\\ Hendrik Treutler and Steffen Neumann. diff --git a/man/groupChromPeaks.Rd b/man/groupChromPeaks.Rd index fe6dbdaff..5a4049523 100644 --- a/man/groupChromPeaks.Rd +++ b/man/groupChromPeaks.Rd @@ -61,6 +61,7 @@ \alias{kNN} \alias{kNN<-,NearestPeaksParam-method} \alias{kNN<-} +\alias{as.list,PeakDensityParam-method} \alias{groupChromPeaks,XCMSnExp,PeakDensityParam-method} \alias{groupChromPeaks,XCMSnExp,MzClustParam-method} \alias{groupChromPeaks,XCMSnExp,NearestPeaksParam-method} @@ -162,6 +163,8 @@ NearestPeaksParam( \S4method{kNN}{NearestPeaksParam}(object) <- value +\S4method{as.list}{PeakDensityParam}(x, ...) + \S4method{groupChromPeaks}{XCMSnExp,PeakDensityParam}(object, param, msLevel = 1L, add = FALSE) \S4method{groupChromPeaks}{XCMSnExp,MzClustParam}(object, param, msLevel = 1L) @@ -236,6 +239,8 @@ distance for retention times.} nearest neighbors to check.} \item{value}{The value for the slot.} + +\item{x}{The parameter object.} } \value{ For \code{groupChromPeaks}: either an \code{\link[=XcmsExperiment]{XcmsExperiment()}} or \code{\link[=XCMSnExp]{XCMSnExp()}} diff --git a/man/groupFeatures-abundance-correlation.Rd b/man/groupFeatures-abundance-correlation.Rd index 27f139ba4..84477fa4a 100644 --- a/man/groupFeatures-abundance-correlation.Rd +++ b/man/groupFeatures-abundance-correlation.Rd @@ -62,6 +62,7 @@ parameters for the similarity calculation is available in the \examples{ library(MsFeatures) +library(MsExperiment) ## Load a test data set with detected peaks faahko_sub <- loadXcmsData("faahko_sub2") diff --git a/man/groupFeatures-eic-similarity.Rd b/man/groupFeatures-eic-similarity.Rd index 544b2a6ac..c3ceb206d 100644 --- a/man/groupFeatures-eic-similarity.Rd +++ b/man/groupFeatures-eic-similarity.Rd @@ -144,6 +144,7 @@ is excluded from the correlation. \examples{ library(MsFeatures) +library(MsExperiment) ## Load a test data set with detected peaks faahko_sub <- loadXcmsData("faahko_sub2") diff --git a/man/groupFeatures-similar-rtime.Rd b/man/groupFeatures-similar-rtime.Rd index 938f09ceb..0d56170ea 100644 --- a/man/groupFeatures-similar-rtime.Rd +++ b/man/groupFeatures-similar-rtime.Rd @@ -36,6 +36,7 @@ See \code{\link[MsFeatures:groupFeatures-similar-rtime]{MsFeatures::SimilarRtime \examples{ library(MsFeatures) +library(MsExperiment) ## Load a test data set with detected peaks faahko_sub <- loadXcmsData("faahko_sub2") diff --git a/man/highlightChromPeaks.Rd b/man/highlightChromPeaks.Rd index 14f83995e..e5e46fb90 100644 --- a/man/highlightChromPeaks.Rd +++ b/man/highlightChromPeaks.Rd @@ -73,6 +73,7 @@ peak definitions to an existing plot, such as one created by the \examples{ ## Load a test data set with detected peaks +library(MSnbase) data(faahko_sub) ## Update the path to the files for the local system dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") diff --git a/man/imputeRowMin.Rd b/man/imputeRowMin.Rd index ec9effea9..b3999e3e6 100644 --- a/man/imputeRowMin.Rd +++ b/man/imputeRowMin.Rd @@ -20,6 +20,7 @@ with a proportion of the minimal value for that row (i.e. } \examples{ +library(MSnbase) library(faahKO) data("faahko") diff --git a/man/imputeRowMinRand.Rd b/man/imputeRowMinRand.Rd index f2d7368b5..147aebea4 100644 --- a/man/imputeRowMinRand.Rd +++ b/man/imputeRowMinRand.Rd @@ -66,6 +66,7 @@ fractions of the row minimum. \examples{ library(faahKO) +library(MSnbase) data("faahko") xset <- group(faahko) diff --git a/man/overlappingFeatures.Rd b/man/overlappingFeatures.Rd index ae10651c2..8bb937510 100644 --- a/man/overlappingFeatures.Rd +++ b/man/overlappingFeatures.Rd @@ -35,6 +35,7 @@ the m/z - rt space. \examples{ ## Load a test data set with detected peaks +library(MSnbase) data(faahko_sub) ## Update the path to the files for the local system dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") diff --git a/man/peaksWithCentWave.Rd b/man/peaksWithCentWave.Rd index fc78b8dba..189b6c22c 100644 --- a/man/peaksWithCentWave.Rd +++ b/man/peaksWithCentWave.Rd @@ -107,9 +107,9 @@ resulting in different signal to noise ratios. \examples{ ## Reading a file +library(MsExperiment) library(xcms) -od <- readMSData(system.file("cdf/KO/ko15.CDF", package = "faahKO"), - mode = "onDisk") +od <- readMsExperiment(system.file("cdf/KO/ko15.CDF", package = "faahKO")) ## Extract chromatographic data for a small m/z range mzr <- c(272.1, 272.2) diff --git a/man/plotChromPeakDensity.Rd b/man/plotChromPeakDensity.Rd index b4726caf6..db48c5fdf 100644 --- a/man/plotChromPeakDensity.Rd +++ b/man/plotChromPeakDensity.Rd @@ -98,6 +98,7 @@ algorithm and its supported settings. \examples{ ## Load a test data set with detected peaks +library(MSnbase) data(faahko_sub) ## Update the path to the files for the local system dirname(faahko_sub) <- system.file("cdf/KO", package = "faahKO") diff --git a/man/plotChromatogramsOverlay.Rd b/man/plotChromatogramsOverlay.Rd index f558d4cb2..1030ad61d 100644 --- a/man/plotChromatogramsOverlay.Rd +++ b/man/plotChromatogramsOverlay.Rd @@ -116,6 +116,7 @@ the \code{plot} function for \code{XChromatogram()} object for details). ## Load preprocessed data and extract EICs for some features. library(xcms) +library(MSnbase) xdata <- loadXcmsData() data(xdata) ## Update the path to the files for the local system diff --git a/man/plotMsData.Rd b/man/plotMsData.Rd index 075398458..ad6f1a55b 100644 --- a/man/plotMsData.Rd +++ b/man/plotMsData.Rd @@ -35,27 +35,14 @@ based on their intensity. See argument \code{col.regions} in \link[lattice:level.colors]{lattice::level.colors} documentation.} } \description{ -\strong{UPDATE}: please use \code{plot(x, type = "XIC")} from the \code{MSnbase} package -instead. See examples below. +\strong{UPDATE}: please use \code{plot()} from the \code{MsExperiment} or +\code{plot(x, type = "XIC")} from the \code{MSnbase} package instead. See examples +in the vignette for more information. The \code{plotMsData} creates a plot that combines an (base peak ) extracted ion chromatogram on top (rt against intensity) and a plot of rt against m/z values at the bottom. } -\examples{ - -## Read two files from the faahKO package -library(faahKO) -cdfs <- dir(system.file("cdf", package = "faahKO"), full.names = TRUE, - recursive = TRUE)[1:2] -raw_data <- readMSData(cdfs, mode = "onDisk") - -## Subset the object to a rt and mz range and plot the data. -raw_data |> - filterRt(rt = c(2700, 2900)) |> - filterMz(mz = c(334.9, 335.1)) |> - plot(type = "XIC") -} \author{ Johannes Rainer } diff --git a/man/refineChromPeaks.Rd b/man/refineChromPeaks.Rd index d23bdeccb..b1472702b 100644 --- a/man/refineChromPeaks.Rd +++ b/man/refineChromPeaks.Rd @@ -203,6 +203,8 @@ calculation of the peak signal (\code{"into"}). \examples{ ## Load a test data set with detected peaks +library(xcms) +library(MsExperiment) faahko_sub <- loadXcmsData("faahko_sub2") ## Disable parallel processing for this example diff --git a/man/removeIntensity-Chromatogram.Rd b/man/removeIntensity-Chromatogram.Rd index 18cc89a16..5343689a6 100644 --- a/man/removeIntensity-Chromatogram.Rd +++ b/man/removeIntensity-Chromatogram.Rd @@ -52,6 +52,7 @@ chromatographic data. } \examples{ +library(MSnbase) chr <- Chromatogram(rtime = 1:10 + rnorm(n = 10, sd = 0.3), intensity = c(5, 29, 50, NA, 100, 12, 3, 4, 1, 3)) diff --git a/man/storeResults.Rd b/man/storeResults.Rd index c4da3169d..3d60b9140 100644 --- a/man/storeResults.Rd +++ b/man/storeResults.Rd @@ -37,6 +37,8 @@ listed above. \examples{ ## Load a test data set with detected peaks +library(xcms) +library(MsExperiment) faahko_sub <- loadXcmsData("faahko_sub2") ## Set up parameter to save as .RData file diff --git a/tests/testthat.R b/tests/testthat.R index 39c69e3d7..a3f2a3018 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,6 +1,7 @@ library(testthat) library(xcms) library(faahKO) +library(MSnbase) library(msdata) if (.Platform$OS.type == "unix") { @@ -75,6 +76,14 @@ pdp <- PeakDensityParam(sampleGroups = rep(1, 3)) xmseg <- groupChromPeaks(xmse, param = pdp, add = FALSE) expect_true(length(processHistory(xmseg)) == 2L) +## Data for LamaParama checks +ref <- loadXcmsData("xmse") +f <- sampleData(ref)$sample_type +f[f == "QC"] <- NA +ref <- filterFeatures(ref, PercentMissingFilter(threshold = 0, f = f)) +ref_mz_rt <- featureDefinitions(ref)[, c("mzmed","rtmed")] +tst <- loadXcmsData("faahko_sub2") + test_check("xcms") bpstop(prm) diff --git a/tests/testthat/test_XcmsExperiment.R b/tests/testthat/test_XcmsExperiment.R index f2b0e6303..d538a7623 100644 --- a/tests/testthat/test_XcmsExperiment.R +++ b/tests/testthat/test_XcmsExperiment.R @@ -506,6 +506,16 @@ test_that("adjustRtime,MsExperiment,PeakGroupsParam works", { expect_true(length(processHistory(res_3)) == 1L) }) + +test_that("LamaParama works", { + expect_no_error(LamaParama(lamas = ref_mz_rt)) + expect_error(LamaParama(), "cannot be empty") + param <- LamaParama(lamas = ref_mz_rt) + expect_equal(ncol(param@lamas), 2) + expect_true(inherits(param@lamas, "matrix")) + expect_equal(length(param@method), 1) +}) + test_that("findChromPeaks,XcmsExperiment,MatchedFilterParam works", { mfp <- MatchedFilterParam(binSize = 20, impute = "lin") ref <- findChromPeaks(faahko_od, param = mfp) @@ -1001,7 +1011,7 @@ test_that("manualChromPeaks,XcmsExperiment works", { res2 <- manualChromPeaks(tmp, pks, samples = 2) expect_equal(unname(chromPeaks(res2)), unname(pks_2)) - + }) test_that("filterChromPeaks,XcmsExperiment works", { diff --git a/tests/testthat/test_do_adjustRtime-functions.R b/tests/testthat/test_do_adjustRtime-functions.R index e8a3e6b02..1f90ec845 100644 --- a/tests/testthat/test_do_adjustRtime-functions.R +++ b/tests/testthat/test_do_adjustRtime-functions.R @@ -265,3 +265,117 @@ test_that(".getPeakGroupsRtMatrix works with subsets", { extraPeaks = 1L) expect_true(!any(is.na(res))) }) + +test_that(".rt_model works", { + obs <- ref_mz_rt + rt_m <- data.frame(ref = obs[, "rtmed"], + obs = obs[, "rtmed"] + 2 + rnorm(nrow(obs), 0, 0.03)) + res <- .rt_model(rt_map = rt_m, method = "loess") + expect_true(is(res, "loess")) + expect_true(length(res$residuals) < nrow(obs)) + ## skip outlier removal + res <- .rt_model(rt_map = rt_m, method = "loess", resid_ratio = 100) + expect_true(is(res, "loess")) + expect_equal(length(res$residuals), nrow(obs) + 1) # for the c(0,0) extra row + + ## gam + res <- .rt_model(rt_map = rt_m, method = "gam") + expect_true(is(res, "gam")) + expect_true(length(res$residuals) < nrow(obs)) + ## skip outlier removal + res <- .rt_model(rt_map = rt_m, method = "gam", resid_ratio = 100) + expect_true(is(res, "gam")) + expect_equal(length(res$residuals), nrow(obs) + 1) +}) + +test_that(".match_reference_anchors works", { + a <- cbind(mz = c(200.1, 200.1, 200.1, 232.1, 233.1, 233.2), + rt = c(100, 150.1, 190, 190, 190, 192)) + b <- cbind(mz = c(200.2, 232, 233.1, 234), + rt = c(150, 190.4, 193, 240)) + + res <- .match_reference_anchors(a, b) + expect_true(is.data.frame(res)) + expect_equal(colnames(res), c("ref", "obs")) + expect_true(nrow(res) == 1L) + expect_equal(res$ref, 193.0) + expect_equal(res$obs, 190.0) + + ## no matches: + res <- .match_reference_anchors(a, b, tolerance = 0, toleranceRt = 0) + expect_true(is.data.frame(res)) + expect_equal(colnames(res), c("ref", "obs")) + expect_true(nrow(res) == 0L) + + ## skip multiple matches: rows 1 and 2 from `a` match row 1 from `b` and + ## rows 5 and 6 from `a` match row 3 from `b` + res <- .match_reference_anchors(a, b, tolerance = 0.1, toleranceRt = 52) + expect_true(is.data.frame(res)) + expect_equal(colnames(res), c("ref", "obs")) + expect_equal(res$ref, 190.4) + expect_equal(res$obs, 190.0) + + ## little relaxed matching: should match row 2 in `a` with row 1 in `b` and + ## row 4 in `a` with row 2 in `b`. rows 5 and 6 in `a` match both row 3 in + ## `b` and should thus not be reported. + res <- .match_reference_anchors(a, b, tolerance = 0.1, toleranceRt = 5) + expect_true(is.data.frame(res)) + expect_equal(colnames(res), c("ref", "obs")) + expect_equal(res$ref, c(150, 190.4)) + expect_equal(res$obs, c(150.1, 190.0)) + + ## Same but reducing toleranceRt to have also a match between row 6 in `a` + ## with row 3 in `b`. + res <- .match_reference_anchors(a, b, tolerance = 0.1, toleranceRt = 2) + expect_true(is.data.frame(res)) + expect_equal(colnames(res), c("ref", "obs")) + expect_equal(res$ref, c(150, 190.4, 193.0)) + expect_equal(res$obs, c(150.1, 190.0, 192.0)) +}) + +test_that(".adjust_rt_model works", { + ref_anchors <- ref_mz_rt + ## Filter the test data to the same rt range than the reference data + tst <- filterRt(tst, rt = c(2550, 4250)) + obs <- chromPeaks(tst[2]) + + rt_map <- .match_reference_anchors(obs[, c("mz", "rt")], ref_anchors, + tolerance = 0.01, + toleranceRt = 5) + + rt_raw <- rtime(tst[2]) + rt_adj <- .adjust_rt_model(rt_raw, method = "loess", rt_map = rt_map) + expect_equal(length(rt_raw), length(rt_adj)) + expect_true(!any(is.na(rt_adj))) + ## Adjusted retention times should be closer to the ones in the preprocessed + ## data set + rt_ref <- rtime(ref[2, keepAdjustedRtime = TRUE]) + expect_true(mean(abs(rt_adj - rt_ref)) < mean(abs(rt_raw - rt_ref))) +}) + +test_that("matchLamasChromPeaks works", { + param <- LamaParama(lamas = ref_mz_rt) + expect_equal(param@rtMap, list()) + param <- matchLamasChromPeaks(tst, param) + expect_true(inherits(param, "LamaParama")) + expect_equal(length(param@rtMap), length(tst)) + expect_equal(length(param@nChromPeaks), length(tst)) +}) + +test_that("summarizeLamaMatch works", { + param <- LamaParama(lamas = ref_mz_rt, toleranceRt = 10) + expect_error(summarizeLamaMatch(param), "missing") + param <- matchLamasChromPeaks(tst, param) + res <- summarizeLamaMatch(param) + expect_equal(nrow(res), length(tst)) + expect_equal(ncol(res), 4) + expect_true(inherits(res$Model_summary[[1]], "summary.loess")) +}) + +test_that("Accessing rtMap from LamaParama object works", { + param <- LamaParama(lamas = ref_mz_rt, toleranceRt = 10) + param <- matchLamasChromPeaks(tst, param) + expect_error(matchedRtimes(ObiwarpParam()), "class") + mtch <- matchedRtimes(param) + expect_equal(length(mtch), length(param@rtMap)) +}) diff --git a/vignettes/LC-MS-feature-grouping.Rmd b/vignettes/LC-MS-feature-grouping.Rmd index bac3e990d..5269f33c4 100644 --- a/vignettes/LC-MS-feature-grouping.Rmd +++ b/vignettes/LC-MS-feature-grouping.Rmd @@ -67,6 +67,7 @@ result from this pre-processing updating also the location of the respective raw data files on the current machine. ```{r load-data} +library(MSnbase) library(xcms) library(faahKO) library(MsFeatures) diff --git a/vignettes/xcms-direct-injection.Rmd b/vignettes/xcms-direct-injection.Rmd index 3a5868944..ed496adce 100644 --- a/vignettes/xcms-direct-injection.Rmd +++ b/vignettes/xcms-direct-injection.Rmd @@ -12,7 +12,7 @@ vignette: > %\VignetteEngine{knitr::rmarkdown} %\VignetteKeywords{Mass Spectrometry, MS, Metabolomics, Bioinformatics} %\VignetteEncoding{UTF-8} - %\VignetteDepends{xcms,msdata,MassSpecWavelet,BiocStyle,signal} + %\VignetteDepends{xcms,msdata,MassSpecWavelet,BiocStyle,signal,MSnbase} --- ```{r style, echo = FALSE, results = 'asis'} @@ -41,6 +41,7 @@ Below we load the required packages. For information on the parallel processing setup please see the *BiocParallel* vignette. ```{r load-libs, message = FALSE, results = "hide"} +library(MSnbase) library(xcms) library(MassSpecWavelet) diff --git a/vignettes/xcms.Rmd b/vignettes/xcms.Rmd index 1e1fa101a..924e1b870 100644 --- a/vignettes/xcms.Rmd +++ b/vignettes/xcms.Rmd @@ -136,7 +136,6 @@ performance. See the package vignette from the `r Biocpkg("Spectra")` package or the [SpectraTutorials](https://jorainer.github.io/SpectraTutorials) tutorial for more details on `Spectra` backends and how to change between them. - ## Initial data inspection The `MsExperiment` object is a simple and flexible container for MS @@ -1285,6 +1284,138 @@ identification of e.g. features with significant different intensities/abundances it is suggested to use functionality provided in other R packages, such as Bioconductor's excellent `limma` package. +## Alignment to an external reference dataset + +In certain experiments, aligning two different datasets is necessary. This +can involve comparing runs of the same samples conducted across different +laboratories or runs with MS2 recorded after the initial MS1 run. Across +laboratories and over time, the same samples may result in variation in +retention time, especially because the LC system can be quite unstable. In these +cases, an alignment step using the `adjustRtime()` function with the +`LamaParam` parameter can allow the user to perform this type of alignment. +We will go through this step by step below. + +Let's load an already analyzed dataset `ref` and our previous dataset before +alignment, which will be `tst`. We will first restrict their retention time +range to be the same for both dataset. + +```{r} +ref <- loadXcmsData("xmse") +tst <- loadXcmsData("faahko_sub2") +``` + +Now, we will attempt to align these two samples with the previous dataset. The +first step is to extract landmark features (referred to as `lamas`). To achieve +this, we will identify the features present in every QC sample of the `ref` +dataset. To do so, we will categorize (using `factor()`) our data by +`sample_type` and only retain the QC samples. This variable will be utilized to +filter the features using the `PercentMissingFilter()` parameter within the +`filterFeatures()` function (see section above for more information on this +method) + +```{r} +f <- sampleData(ref)$sample_type +f[f != "QC"] <- NA +ref <- filterFeatures(ref, PercentMissingFilter(threshold = 0, f = f)) +ref_mz_rt <- featureDefinitions(ref)[, c("mzmed","rtmed")] +ref_mz_rt +``` + +This is what the `lamas` input should look like for alignment. In terms of +how this method works, the alignment algorithm matches chromatographic peaks +from the experimental data to the lamas, fitting a model based on this +match to adjust their retention times and minimize differences between +the two datasets. + +Now we can define our `param` object `LamaParama` to prepare for the +alignment. Parameters such as `tolerance`, `toleranceRt`, and `ppm` relate +to the matching between chromatographic peaks and lamas. Other parameters +are related to the type of fitting generated between these data points. +More details on each parameter and the overall method can be found by +searching `?adjustRtime`. Below is an example using default parameters. + +```{r} +param <- LamaParama(lamas = ref_mz_rt, method = "loess", span = 0.5, + outlierTolerance = 3, zeroWeight = 10, ppm = 20, + tolerance = 0, toleranceRt = 20, bs = "tp") + +#' input into `adjustRtime()` +tst_adjusted <- adjustRtime(tst, param = param) +tst_adjusted <- applyAdjustedRtime(tst_adjusted) +``` + +We extract the base peak chromatogram (BPC) to visualize and evaluate the +alignment: + +```{r fig.height=12, fig.width=5} +#' evaluate the results with BPC +bpc <- chromatogram(ref, chromPeaks = "none") +bpc_tst_raw <- chromatogram(tst, chromPeaks = "none") +bpc_tst_adj <- chromatogram(tst_adjusted, chromPeaks = "none") +``` + +We generate plots to visually compare the alignment to the reference +dataset (black) both before (red) and after (blue) adjustment: + +```{r fig.height=4, fig.width=10} +#' BPC of a sample +par(mfrow = c(1, 2), mar = c(4, 2.5, 1, 0.5)) +plot(bpc[1, 1], col = "#00000080", main = "Before Alignment") +points(rtime(bpc_tst_raw[1, 1]), intensity(bpc_tst_raw[1, 1]), type = "l", + col = "#ff000080") +grid() + +plot(bpc[1, 1], col = "#00000080", main = "After Alignment") +points(rtime(bpc_tst_adj[1, 1]), intensity(bpc_tst_adj[1, 1]), type = "l", + col = "#0000ff80") +grid() +``` + +It appears that certain time intervals (2500 to 3000 and 3500 to 4500 seconds) +exhibit better alignment than others. This variance can be elucidated by +examining the distribution of matched peaks, as illustrated below. The +`matchLamaChromPeaks()` function facilitates the assessment of how well the +`lamas` correspond with the chromatographic peaks in each file. This analysis +can be conducted prior to any adjustments. + +```{r} +param <- matchLamasChromPeaks(tst, param = param) +mtch <- matchedRtimes(param) + +#' BPC of the first sample with matches to lamas overlay +par(mfrow = c(1, 1)) +plot(bpc[1, 1], col = "#00000080", main = "Distribution CP matched to Lamas") +points(rtime(bpc_tst_adj[1, 1]), intensity(bpc_tst_adj[1, 1]), type = "l", + col = "#0000ff80") +grid() +abline(v = mtch[[1]]$obs) +``` + +The overlay of BPC above provides insight into the correlation between accurate +alignment and the presence of peaks matching with `lamas.` Furthermore, a more +detailed examination of the matching and the model used for fitting each file +is possible. Numerical information can be obtained using the +`summarizeLamaMatch()` function. From this, the percentage of chromatographic +peaks utilized for alignment can be computed relative to the total number of +peaks in the file. Additionally, it is feasible to directly `plot()` the +`param` object for the file of interest, showcasing the distribution of these +chromatographic peaks along with the fitted model line. + +```{r} +#access summary of matches and model information +summary <- summarizeLamaMatch(param) +summary + +# coverage for each file +summary$Matched_peaks / summary$Total_peaks * 100 + +#access the information on the model of for the first file +summary$model_summary[[1]] + +# Plot obs vs. ref with fitting line +plot(param, index = 1L, main = "ChromPeaks versus Lamas for the first file", + colPoint = "red") +``` # Additional details and notes diff --git a/xcms.Rproj b/xcms.Rproj deleted file mode 100644 index 21a4da087..000000000 --- a/xcms.Rproj +++ /dev/null @@ -1,17 +0,0 @@ -Version: 1.0 - -RestoreWorkspace: Default -SaveWorkspace: Default -AlwaysSaveHistory: Default - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 2 -Encoding: UTF-8 - -RnwWeave: Sweave -LaTeX: pdfLaTeX - -BuildType: Package -PackageUseDevtools: Yes -PackageInstallArgs: --no-multiarch --with-keep.source