-
Notifications
You must be signed in to change notification settings - Fork 81
/
DataClasses.R
2407 lines (2357 loc) · 105 KB
/
DataClasses.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
## All class definitions should go in here.
#' @include AllGenerics.R
############################################################
## Class unions
setClassUnion("characterOrNULL", c("character", "NULL"))
setClassUnion("logicalOrNumeric", c("logical", "numeric"))
##setClassUnion("ANYorNULL", c("ANY", "NULL"))
############################################################
## xcmsSet
##
setClass("xcmsSet",
representation = representation(peaks = "matrix",
groups = "matrix",
groupidx = "list",
filled="numeric",
phenoData = "data.frame",
rt = "list",
filepaths = "character",
profinfo = "list",
dataCorrection="numeric",
polarity = "character",
progressInfo = "list",
progressCallback="function",
mslevel = "numeric",
scanrange = "numeric",
.processHistory = "list"),
prototype = prototype(peaks = matrix(nrow = 0, ncol = 0),
groups = matrix(nrow = 0, ncol = 0),
groupidx = list(),
filled = integer(0),
phenoData = data.frame(),
rt = list(),
filepaths = character(0),
profinfo = vector("list"),
dataCorrection=integer(0),
polarity = character(0),
progressInfo = list(),
mslevel = numeric(0),
scanrange= numeric(0),
progressCallback = function(progress) NULL,
.processHistory = list()),
validity = function(object) {
msg <- character()
## Check if all slots are present.
slNames <- slotNames(object)
missingSlots <- character()
for (i in 1:length(slNames)) {
if (!.hasSlot(object, slNames[i]))
missingSlots <- c(missingSlots, slNames[i])
}
if (length(missingSlots) > 0)
msg <- c(msg, paste0("This xcmsSet lacks slot(s): ",
paste(missingSlots, collapse = ","),
". Please update the object using",
" the 'updateObject' method."))
## Check the .processHistory slot.
if (!any(missingSlots == ".processHistory")) {
inh <- unlist(lapply(object@.processHistory,
FUN = function(z) {
return(inherits(z, "ProcessHistory"))
}))
if (!all(inh))
msg <- c(msg,
paste0("Slot '.processHistory' should",
" only contain 'ProcessHistory'",
" objects!"))
}
if (length(msg))
return(msg)
return(TRUE)
})
############################################################
## xcmsEIC
setClass("xcmsEIC",
representation(eic = "list",
mzrange = "matrix",
rtrange = "matrix",
rt = "character",
groupnames = "character"),
prototype(eic = list(),
mzrange = matrix(nrow = 0, ncol = 0),
rtrange = matrix(nrow = 0, ncol = 0),
rt = character(0),
groupnames = character(0)))
############################################################
## xcmsFragments
setClass("xcmsFragments",
representation(peaks = "matrix",
MS2spec = "list",
specinfo = "matrix"
##, pipeline = "xcmsRawPipeline"
),
prototype(peaks = matrix(nrow = 0, ncol = 6),
MS2spec=NULL,
specinfo=NULL
##, pipeline = new("xcmsRawPipeline")
))
############################################################
## xcmsSource
setClass("xcmsSource", representation("VIRTUAL"))
## If given an xcmsSource object, simply return it unchanged
setMethod("xcmsSource", "xcmsSource", function(object) object)
############################################################
## xcmsFileSource
setClass("xcmsFileSource",
representation("character"),
contains="xcmsSource",
validity=function(object) {
if (file.exists(object)) TRUE
else paste("File not found:", object)
})
############################################################
## xcmsRaw
setClass("xcmsRaw", representation(env = "environment",
tic = "numeric",
scantime = "numeric",
scanindex = "integer",
polarity = "factor",
acquisitionNum = "integer",
profmethod = "character",
profparam = "list",
mzrange = "numeric",
gradient = "matrix",
msnScanindex = "integer",
msnAcquisitionNum = "integer",
msnPrecursorScan = "integer",
msnLevel = "integer",
msnRt = "numeric",
msnPrecursorMz = "numeric",
msnPrecursorIntensity = "numeric",
msnPrecursorCharge = "numeric",
msnCollisionEnergy = "numeric",
filepath = "xcmsSource",
scanrange = "numeric",
mslevel = "numeric"),
prototype(env = new.env(parent=.GlobalEnv),
tic = numeric(0),
scantime = numeric(0),
scanindex = integer(0),
polarity = factor(integer(0)),
acquisitionNum = integer(0),
profmethod = "bin",
profparam = list(),
mzrange = numeric(0),
gradient = matrix(nrow=0, ncol=0),
msnScanindex = integer(0),
msnAcquisitionNum = integer(0),
msnLevel = integer(0),
msnRt = numeric(0),
msnPrecursorScan = integer(0),
msnPrecursorMz = numeric(0),
msnPrecursorIntensity = numeric(0),
msnPrecursorCharge = numeric(0),
msnCollisionEnergy = numeric(0),
scanrange = NULL,
mslevel = 1
))
############################################################
## netCdfSource
setClass("netCdfSource", contains="xcmsFileSource")
############################################################
## rampSource
setClass("rampSource", contains="xcmsFileSource")
############################################################
## pwizSource
setClass("pwizSource", contains="xcmsFileSource")
############################################################
## xcmsPeaks
setClass("xcmsPeaks", contains = "matrix")
############################################################
## Processing history type statics
.PROCSTEP.UNKNOWN <- "Unknown"
.PROCSTEP.PEAK.DETECTION <- "Peak detection"
.PROCSTEP.PEAK.GROUPING <- "Peak grouping"
.PROCSTEP.RTIME.CORRECTION <- "Retention time correction"
.PROCSTEP.PEAK.FILLING <- "Missing peak filling"
.PROCSTEPS <- c(
.PROCSTEP.UNKNOWN,
.PROCSTEP.PEAK.DETECTION,
.PROCSTEP.PEAK.GROUPING,
.PROCSTEP.RTIME.CORRECTION,
.PROCSTEP.PEAK.FILLING
)
############################################################
## ProcessHistory
#' @aliases ProcessHistory
#'
#' @title Tracking data processing
#'
#' @description Objects of the type \code{ProcessHistory} allow to keep track
#' of any data processing step in an metabolomics experiment. They are
#' created by the data processing methods, such as
#' \code{\link{findChromPeaks}} and added to the corresponding results
#' objects. Thus, usually, users don't need to create them.
#'
#' @slot type character(1): string defining the type of the processing step.
#' This string has to match predefined values. Use
#' \code{\link{processHistoryTypes}} to list them.
#'
#' @slot date character(1): date time stamp when the processing step was started.
#'
#' @slot info character(1): optional additional information.
#'
#' @slot fileIndex integer of length 1 or > 1 to specify on which
#' samples of the object the processing was performed.
#'
#' @slot error (ANY): used to store eventual calculation errors.
#'
#' @rdname ProcessHistory-class
setClass("ProcessHistory",
slots = c(
type = "character",
date = "character",
info = "character",
fileIndex = "integer",
error = "ANY"
),
contains = "Versioned",
prototype = prototype(
type = .PROCSTEP.UNKNOWN,
date = character(),
info = character(),
fileIndex = integer(), ## This can be of length 1 or > 1.
error = NULL
),
validity = function(object) {
msg <- character()
## check type:
if (!any(object@type == .PROCSTEPS))
msg <- c(msg, paste0("Got invalid type '", object@type,
"'! Allowd are: ",
paste0("\"", .PROCSTEPS, "\"",
collapse = ", ")))
if (length(object@type) > 1)
msg <- c(msg, paste0("length of 'type' should not be ",
"larger than 1!"))
if (length(object@date) > 1)
msg <- c(msg, paste0("length of 'date' should not be ",
"larger than 1!"))
if (length(object@info) > 1)
msg <- c(msg, paste0("length of 'info' should not be ",
"larger than 1!"))
if (length(msg))
msg
else
TRUE
}
)
## BasicParam class
## CentWaveParam
setClass("Param",
representation = representation("VIRTUAL"),
contains = c("Versioned"))
setClassUnion("ParamOrNULL", c("Param", "NULL"))
#' @aliases GenericParam
#'
#' @title Generic parameter class
#'
#' @description The \code{GenericParam} class allows to store generic parameter
#' information such as the name of the function that was/has to be called
#' (slot \code{fun}) and its arguments (slot \code{args}). This object is
#' used to track the process history of the data processings of an
#' \code{\link{XCMSnExp}} object. This is in contrast to e.g. the
#' \code{\link{CentWaveParam}} object that is passed to the actual
#' processing method.
#'
#' @seealso \code{\link{processHistory}} for how to access the process history
#' of an \code{\link{XCMSnExp}} object.
#'
#' @slot fun \code{character} specifying the function name.
#'
#' @slot args \code{list} (ideally named) with the arguments to the
#' function.
#'
#' @slot .__classVersion__ the version of the class.
#'
#' @author Johannes Rainer
#'
#' @rdname GenericParam
#'
#' @examples
#' prm <- GenericParam(fun = "mean")
#'
#' prm <- GenericParam(fun = "mean", args = list(na.rm = TRUE))
setClass("GenericParam",
slots = c(fun = "character",
args = "list"),
contains = "Param",
prototype = prototype(
fun = character(),
args = list()
),
validity = function(object) {
msg <- character()
if (length(object@args) > 0)
if (!length(object@fun) > 0)
msg <- c(msg, paste0("No function name specified in '@fun'",
" but got '@args'"))
if (length(object@fun) > 1)
msg <- c(msg, paste0("'@fun' has to be of length 1"))
if (length(msg)) msg
else TRUE
}
)
#' @aliases XProcessHistory
#'
#' @title Tracking data processing
#'
#' @description The \code{XProcessHistory} extends the \code{ProcessHistory} by
#' adding a slot \code{param} that allows to store the actual parameter
#' class of the processing step.
#'
#' @slot param (Param): an object of type \code{Param} (e.g.
#' \code{\link{CentWaveParam}}) specifying the settings of the processing
#' step.
#'
#' @rdname ProcessHistory-class
setClass("XProcessHistory",
slots = c(
param = "ParamOrNULL"
),
contains = "ProcessHistory",
prototype = prototype(
param = NULL
),
validity = function(object) {
msg <- character()
if (length(object@param) > 0)
if(!is(object@param, "Param"))
msg <- c(msg,
paste0("Only objects from type 'Param' ",
"allowed in slot '@param'! I got ",
class(object@param)))
if (length(msg)) msg
else TRUE
})
#' @aliases findChromPeaks
#'
#' @title Chromatographic peak detection methods.
#'
#' @description The \code{findChromPeaks} methods perform the chromatographic
#' peak detection on LC/GC-MS data and are part of the modernized
#' \code{xcms} user interface.
#'
#' The implemented peak detection methods in chromatographic space are:
#' \describe{
#' \item{centWave}{chromatographic peak detection using the \emph{centWave}
#' method. See \code{\link{centWave}} for more details.}
#'
#' \item{centWave with predicted isotopes}{peak detection using a two-step
#' centWave-based approach considering also feature isotopes. See
#' \code{\link{centWaveWithPredIsoROIs}} for more details.}
#'
#' \item{matchedFilter}{peak detection in chromatographic space. See
#' \code{\link{matchedFilter}} for more details.}
#'
#' \item{massifquant}{peak detection using the Kalman filter-based
#' method. See \code{\link{massifquant}} for more details.}
#'
#' \item{MSW}{single-spectrum non-chromatography MS data peak detection.
#' See \code{\link{MSW}} for more details.}
#'
#' }
#'
#' @name chromatographic-peak-detection
#'
#' @family peak detection methods
#'
#' @seealso \code{\link{findPeaks}} for the \emph{old} peak detection
#' methods.
#'
#' \code{\link{plotChromPeaks}} to plot identified chromatographic peaks
#' for one file.
#'
#' \code{\link{highlightChromPeaks}} to highlight identified chromatographic
#' peaks in an extracted ion chromatogram plot.
#'
#' @author Johannes Rainer
NULL
#> NULL
## Main centWave documentation.
#' @title Chromatographic peak detection using the centWave method
#'
#' @aliases centWave
#'
#' @description The centWave algorithm perform peak density and wavelet based
#' chromatographic peak detection for high resolution LC/MS data in centroid
#' mode [Tautenhahn 2008].
#'
#' @param ppm \code{numeric(1)} defining the maximal tolerated m/z deviation in
#' consecutive scans in parts per million (ppm) for the initial ROI
#' definition.
#'
#' @param peakwidth \code{numeric(2)} with the expected approximate
#' peak width in chromatographic space. Given as a range (min, max)
#' in seconds.
#'
#' @param snthresh \code{numeric(1)} defining the signal to noise ratio cutoff.
#'
#' @param prefilter \code{numeric(2)}: \code{c(k, I)} specifying the prefilter
#' step for the first analysis step (ROI detection). Mass traces are only
#' retained if they contain at least \code{k} peaks with intensity
#' \code{>= I}.
#'
#' @param mzCenterFun Name of the function to calculate the m/z center of the
#' chromatographic peak. Allowed are: \code{"wMean"}: intensity weighted
#' mean of the peak's m/z values, \code{"mean"}: mean of the peak's m/z
#' values, \code{"apex"}: use the m/z value at the peak apex,
#' \code{"wMeanApex3"}: intensity weighted mean of the m/z value at the
#' peak apex and the m/z values left and right of it and \code{"meanApex3"}:
#' mean of the m/z value of the peak apex and the m/z values left and right
#' of it.
#'
#' @param integrate Integration method. For \code{integrate = 1} peak limits
#' are found through descent on the mexican hat filtered data, for
#' \code{integrate = 2} the descent is done on the real data. The latter
#' method is more accurate but prone to noise, while the former is more
#' robust, but less exact.
#'
#' @param mzdiff \code{numeric(1)} representing the minimum difference in m/z
#' dimension for peaks with overlapping retention times; can be negatove to
#' allow overlap.
#'
#' @param fitgauss \code{logical(1)} whether or not a Gaussian should be fitted
#' to each peak.
#'
#' @param noise \code{numeric(1)} allowing to set a minimum intensity required
#' for centroids to be considered in the first analysis step (centroids with
#' intensity \code{< noise} are omitted from ROI detection).
#'
#' @param verboseColumns \code{logical(1)} whether additional peak meta data
#' columns should be returned.
#'
#' @param roiList An optional list of regions-of-interest (ROI) representing
#' detected mass traces. If ROIs are submitted the first analysis step is
#' omitted and chromatographic peak detection is performed on the submitted
#' ROIs. Each ROI is expected to have the following elements specified:
#' \code{scmin} (start scan index), \code{scmax} (end scan index),
#' \code{mzmin} (minimum m/z), \code{mzmax} (maximum m/z), \code{length}
#' (number of scans), \code{intensity} (summed intensity). Each ROI should
#' be represented by a \code{list} of elements or a single row
#' \code{data.frame}.
#'
#' @param firstBaselineCheck \code{logical(1)}. If \code{TRUE} continuous
#' data within regions of interest is checked to be above the first baseline.
#'
#' @param roiScales Optional numeric vector with length equal to \code{roiList}
#' defining the scale for each region of interest in \code{roiList} that
#' should be used for the centWave-wavelets.
#'
#' @details The centWave algorithm is most suitable for high resolution
#' LC/\{TOF,OrbiTrap,FTICR\}-MS data in centroid mode. In the first phase
#' the method identifies \emph{regions of interest} (ROIs) representing
#' mass traces that are characterized as regions with less than \code{ppm}
#' m/z deviation in consecutive scans in the LC/MS map. These ROIs are
#' then subsequently analyzed using continuous wavelet transform (CWT)
#' to locate chromatographic peaks on different scales. The first analysis
#' step is skipped, if regions of interest are passed \emph{via} the
#' \code{param} parameter.
#'
#' @note These methods and classes are part of the updated and modernized
#' \code{xcms} user interface which will eventually replace the
#' \code{\link{findPeaks}} methods. It supports peak detection on
#' \code{\link[MSnbase]{MSnExp}} and \code{\link[MSnbase]{OnDiskMSnExp}}
#' objects (both defined in the \code{MSnbase} package). All of the settings
#' to the centWave algorithm can be passed with a \code{CentWaveParam}
#' object.
#'
#' @family peak detection methods
#'
#' @seealso The \code{\link{do_findChromPeaks_centWave}} core API function and
#' \code{\link{findPeaks.centWave}} for the old user interface.
#'
#' @references
#' Ralf Tautenhahn, Christoph B\"{o}ttcher, and Steffen Neumann "Highly
#' sensitive feature detection for high resolution LC/MS" \emph{BMC Bioinformatics}
#' 2008, 9:504
#'
#' @name findChromPeaks-centWave
#'
#' @author Ralf Tautenhahn, Johannes Rainer
NULL
#> NULL
#' @description The \code{CentWaveParam} class allows to specify all settings
#' for a chromatographic peak detection using the centWave method. Instances
#' should be created with the \code{CentWaveParam} constructor.
#'
#' @slot .__classVersion__,ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,roiList,firstBaselineCheck,roiScales See corresponding parameter above. \code{.__classVersion__} stores
#' the version from the class. Slots values should exclusively be accessed
#' \emph{via} the corresponding getter and setter methods listed above.
#'
#' @rdname findChromPeaks-centWave
#'
#' @examples
#'
#' ## Create a CentWaveParam object. Note that the noise is set to 10000 to
#' ## speed up the execution of the example - in a real use case the default
#' ## value should be used, or it should be set to a reasonable value.
#' cwp <- CentWaveParam(ppm = 20, noise = 10000)
#' ## Change snthresh parameter
#' snthresh(cwp) <- 25
#' cwp
#'
#' ## Perform the peak detection using centWave on some of the files from the
#' ## faahKO package. Files are read using the readMSData2 from the MSnbase
#' ## package
#' library(faahKO)
#' library(xcms)
#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE,
#' full.names = TRUE)
#' raw_data <- readMSData2(fls[1:2])
#'
#' ## Perform the peak detection using the settings defined above.
#' res <- findChromPeaks(raw_data, param = cwp)
#' head(chromPeaks(res))
setClass("CentWaveParam",
slots = c(
ppm = "numeric",
peakwidth = "numeric",
snthresh = "numeric",
prefilter = "numeric",
mzCenterFun = "character",
integrate = "integer",
mzdiff = "numeric",
fitgauss = "logical",
noise = "numeric",
verboseColumns = "logical",
roiList = "list",
firstBaselineCheck = "logical",
roiScales = "numeric"
),
contains = c("Param"),
prototype = prototype(
ppm = 25,
peakwidth = c(20, 50),
snthresh = 10,
prefilter = c(3, 100),
mzCenterFun = "wMean",
integrate = 1L,
mzdiff = -0.001,
fitgauss = FALSE,
noise = 0,
verboseColumns = FALSE,
roiList = list(),
firstBaselineCheck = TRUE,
roiScales = numeric()
),
validity = function(object) {
msg <- character()
if (length(object@ppm) != 1 | any(object@ppm < 0))
msg <- c(msg, paste0("'ppm' has to be positive numeric",
" of length 1."))
if (length(object@peakwidth) != 2 | any(object@peakwidth < 0))
msg <- c(msg, paste0("'peakwidth' has to be a numeric",
" of length 2 with only positive",
" values."))
if (length(object@snthresh) != 1 | any(object@snthresh < 0))
msg <- c(msg, paste0("'snthresh' has to be a positive",
" numeric of length 1."))
if (length(object@prefilter) != 2)
msg <- c(msg, paste0("'prefilter' has to be a numeric",
" of length 2."))
allowed_vals <- c("wMean", "mean", "apex", "wMeanApex3",
"meanApex3")
if (!(object@mzCenterFun) %in% allowed_vals)
msg <- c(msg, paste0("'mzCenterFun' has to be one of ",
paste0("'", allowed_vals, "'",
collapse = ", "), "."))
if (!(object@integrate %in% c(1L, 2L)))
msg <- c(msg, paste0("'integrate' has to be either 1",
" or 2."))
if (length(object@mzdiff) != 1)
msg <- c(msg, paste0("'mzdiff' has to be a numeric of",
" length 1."))
if (length(object@noise) != 1)
msg <- c(msg, paste0("'noise' has to be a numeric of",
" length 1."))
if (length(object@fitgauss) != 1)
msg <- c(msg, paste0("'fitgauss' has to be a numeric of",
" length 1."))
if (length(object@verboseColumns) != 1)
msg <- c(msg, paste0("'verboseColumns' has to be a ",
"numeric of length 1."))
if (length(object@firstBaselineCheck) != 1)
msg <- c(msg, paste0("'firstBaselineCheck' has to be a",
" numeric of length 1."))
if (length(object@roiList) > 0) {
doHaveExpectedEls <- function(z) {
need <- c("scmax", "scmin", "mzmin", "mzmax", "length",
"intensity")
if (is.null(nrow(z))) {
OK <- all(need %in% names(z))
} else {
OK <- all(need %in% colnames(z))
}
return(OK)
}
OKs <- unlist(lapply(object@roiList, doHaveExpectedEls))
if (any(!OKs))
msg <- c(msg, paste0("'roiList' does not provide ",
"all required fields!"))
}
if (length(object@roiScales) > 0) {
if (length(object@roiList) != length(object@roiScales))
msg <- c(msg, paste0("'roiScales' has to have the same",
" length than 'roiList'."))
}
if (length(msg))
msg
else
TRUE
})
## Main matchedFilter documentation.
#' @title Peak detection in the chromatographic time domain
#'
#' @aliases matchedFilter
#'
#' @description The \emph{matchedFilter} algorithm identifies peaks in the
#' chromatographic time domain as described in [Smith 2006]. The intensity
#' values are binned by cutting The LC/MS data into slices (bins) of a mass
#' unit (\code{binSize} m/z) wide. Within each bin the maximal intensity is
#' selected. The chromatographic peak detection is then performed in each
#' bin by extending it based on the \code{steps} parameter to generate
#' slices comprising bins \code{current_bin - steps +1} to
#' \code{current_bin + steps - 1}. Each of these slices is then filtered
#' with matched filtration using a second-derative Gaussian as the model
#' peak shape. After filtration peaks are detected using a signal-to-ratio
#' cut-off. For more details and illustrations see [Smith 2006].
#'
#' @param binSize \code{numeric(1)} specifying the width of the
#' bins/slices in m/z dimension.
#'
#' @param impute Character string specifying the method to be used for missing
#' value imputation. Allowed values are \code{"none"} (no linear
#' interpolation), \code{"lin"} (linear interpolation), \code{"linbase"}
#' (linear interpolation within a certain bin-neighborhood) and
#' \code{"intlin"}. See \code{\link{imputeLinInterpol}} for more details.
#'
#' @param fwhm \code{numeric(1)} specifying the full width at half maximum
#' of matched filtration gaussian model peak. Only used to calculate the
#' actual sigma, see below.
#'
#' @param sigma \code{numeric(1)} specifying the standard deviation (width)
#' of the matched filtration model peak.
#'
#' @param max \code{numeric(1)} representing the maximum number of peaks
#' that are expected/will be identified per slice.
#'
#' @param snthresh \code{numeric(1)} defining the signal to noise cutoff
#' to be used in the chromatographic peak detection step.
#'
#' @param steps \code{numeric(1)} defining the number of bins to be
#' merged before filtration (i.e. the number of neighboring bins that will
#' be joined to the slice in which filtration and peak detection will be
#' performed).
#'
#' @param mzdiff \code{numeric(1)} defining the minimum difference
#' in m/z for peaks with overlapping retention times
#'
#' @param index \code{logical(1)} specifying whether indicies should be
#' returned instead of values for m/z and retention times.
#'
#' @details The intensities are binned by the provided m/z values within each
#' spectrum (scan). Binning is performed such that the bins are centered
#' around the m/z values (i.e. the first bin includes all m/z values between
#' \code{min(mz) - bin_size/2} and \code{min(mz) + bin_size/2}).
#'
#' For more details on binning and missing value imputation see
#' \code{\link{binYonX}} and \code{\link{imputeLinInterpol}} methods.
#'
#' @note These methods and classes are part of the updated and modernized
#' \code{xcms} user interface which will eventually replace the
#' \code{\link{findPeaks}} methods. It supports chromatographic peak
#' detection on \code{\link[MSnbase]{MSnExp}} and
#' \code{\link[MSnbase]{OnDiskMSnExp}} objects (both defined in the
#' \code{MSnbase} package). All of the settings to the matchedFilter
#' algorithm can be passed with a \code{MatchedFilterParam} object.
#'
#' @inheritParams imputeLinInterpol
#'
#' @inheritParams findChromPeaks-centWave
#'
#' @family peak detection methods
#'
#' @seealso The \code{\link{do_findChromPeaks_matchedFilter}} core API function
#' and \code{\link{findPeaks.matchedFilter}} for the old user interface.
#'
#' @references
#' Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and
#' Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite
#' Profiling Using Nonlinear Peak Alignment, Matching, and Identification"
#' \emph{Anal. Chem.} 2006, 78:779-787.
#'
#' @author Colin A Smith, Johannes Rainer
#'
#' @name findChromPeaks-matchedFilter
NULL
#> NULL
#' @description The \code{MatchedFilterParam} class allows to specify all
#' settings for a chromatographic peak detection using the matchedFilter
#' method. Instances should be created with the \code{MatchedFilterParam}
#' constructor.
#'
#' @slot .__classVersion__,binSize,impute,baseValue,distance,fwhm,sigma,max,snthresh,steps,mzdiff,index See corresponding parameter above. \code{.__classVersion__} stores
#' the version from the class. Slots values should exclusively be accessed
#' \emph{via} the corresponding getter and setter methods listed above.
#'
#' @rdname findChromPeaks-matchedFilter
#'
#' @examples
#'
#' ## Create a MatchedFilterParam object. Note that we use a unnecessarily large
#' ## binSize parameter to reduce the run-time of the example.
#' mfp <- MatchedFilterParam(binSize = 5)
#' ## Change snthresh parameter
#' snthresh(mfp) <- 15
#' mfp
#'
#' ## Perform the peak detection using matchecFilter on the files from the
#' ## faahKO package. Files are read using the readMSData2 from the MSnbase
#' ## package
#' library(faahKO)
#' library(MSnbase)
#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE,
#' full.names = TRUE)
#' raw_data <- readMSData2(fls[1:2])
#' ## Perform the chromatographic peak detection using the settings defined
#' ## above. Note that we are also disabling parallel processing in this
#' ## example by registering a "SerialParam"
#' register(SerialParam())
#' res <- findChromPeaks(raw_data, param = mfp)
#' head(chromPeaks(res))
setClass("MatchedFilterParam",
slots = c(
binSize = "numeric",
impute = "character",
baseValue = "numeric",
distance = "numeric",
fwhm = "numeric",
sigma = "numeric",
max = "numeric",
snthresh = "numeric",
steps = "numeric",
mzdiff = "numeric",
index = "logical"
),
contains = c("Param"),
prototype = prototype(
binSize = 0.1,
impute = "none",
baseValue = numeric(),
distance = numeric(),
fwhm = 30,
sigma = 12.73994,
max = 5,
snthresh = 10,
steps = 2,
mzdiff = 0.6,
index = FALSE
),
validity = function(object) {
msg <- character()
if (length(object@binSize) != 1 | any(object@binSize < 0))
msg <- c(msg, paste0("'binSize' has to be positive",
" numeric of length 1."))
if (!any(c("none", "lin", "linbase") == object@impute))
msg <- c(msg,
paste0("Only values 'none', 'lin' and ",
"'linbase' are allowed for'impute'"))
if (length(object@baseValue) > 1)
msg <- c(msg, paste0("'baseValue' has to be a",
" numeric of length 1."))
if (length(object@distance) > 1)
msg <- c(msg, paste0("'distance' has to be a numeric",
" of length 1."))
if (length(object@fwhm) != 1)
msg <- c(msg, paste0("'fwhm' has to be a numeric",
" of length 1."))
if (length(object@sigma) != 1)
msg <- c(msg, paste0("'sigma' has to be a numeric",
" of length 1."))
if (length(object@max) != 1)
msg <- c(msg, paste0("'max' has to be a numeric",
" of length 1."))
if (length(object@snthresh) != 1)
msg <- c(msg, paste0("'snthresh' has to be a numeric",
" of length 1."))
if (length(object@steps) != 1)
msg <- c(msg, paste0("'steps' has to be a numeric",
" of length 1."))
if (length(object@mzdiff) != 1)
msg <- c(msg, paste0("'mzdiff' has to be a numeric",
" of length 1."))
if (length(object@index) != 1)
msg <- c(msg, paste0("'index' has to be a logical",
" of length 1."))
if (length(msg))
msg
else
TRUE
})
## Main massifquant documentation.
#' @title Chromatographic peak detection using the massifquant method
#'
#' @aliases massifquant
#'
#' @description Massifquant is a Kalman filter (KF)-based chromatographic peak
#' detection for XC-MS data in centroid mode. The identified peaks
#' can be further refined with the \emph{centWave} method (see
#' \code{\link{findChromPeaks-centWave}} for details on centWave)
#' by specifying \code{withWave = TRUE}.
#'
#' @param peakwidth \code{numeric(2)}. Only the first element is used by
#' massifquant, which specifices the minimum peak length in time scans.
#' For \code{withWave = TRUE} the second argument represents the maximum
#' peak length subject to being greater than the mininum peak length
#' (see also documentation of \code{\link{do_findChromPeaks_centWave}}).
#'
#' @param prefilter \code{numeric(2)}. The first argument is only used
#' if (\code{withWave = TRUE}); see \code{\link{findChromPeaks-centWave}}
#' for details. The second argument specifies the minimum threshold for the
#' maximum intensity of a chromatographic peak that must be met.
#'
#' @param criticalValue \code{numeric(1)}. Suggested values:
#' (\code{0.1-3.0}). This setting helps determine the the Kalman Filter
#' prediciton margin of error. A real centroid belonging to a bonafide
#' peak must fall within the KF prediction margin of error. Much like
#' in the construction of a confidence interval, \code{criticalVal} loosely
#' translates to be a multiplier of the standard error of the prediction
#' reported by the Kalman Filter. If the peak in the XC-MS sample have
#' a small mass deviance in ppm error, a smaller critical value might be
#' better and vice versa.
#'
#' @param consecMissedLimit \code{integer(1)} Suggested values: (\code{1,2,3}).
#' While a peak is in the proces of being detected by a Kalman Filter, the
#' Kalman Filter may not find a predicted centroid in every scan. After 1
#' or more consecutive failed predictions, this setting informs Massifquant
#' when to stop a Kalman Filter from following a candidate peak.
#'
#' @param unions \code{integer(1)} set to \code{1} if apply t-test union on
#' segmentation; set to \code{0} if no t-test to be applied on
#' chromatographically continous peaks sharing same m/z range.
#' Explanation: With very few data points, sometimes a Kalman Filter stops
#' tracking a peak prematurely. Another Kalman Filter is instantiated
#' and begins following the rest of the signal. Because tracking is done
#' backwards to forwards, this algorithmic defect leaves a real peak
#' divided into two segments or more. With this option turned on, the
#' program identifies segmented peaks and combines them (merges them)
#' into one with a two sample t-test. The potential danger of this option
#' is that some truly distinct peaks may be merged.
#'
#' @param checkBack \code{integer(1)} set to \code{1} if turned on; set to
#' \code{0} if turned off. The convergence of a Kalman Filter to a peak's
#' precise m/z mapping is very fast, but sometimes it incorporates erroneous
#' centroids as part of a peak (especially early on). The \code{scanBack}
#' option is an attempt to remove the occasional outlier that lies beyond
#' the converged bounds of the Kalman Filter. The option does not directly
#' affect identification of a peak because it is a postprocessing measure;
#' it has not shown to be a extremely useful thus far and the default is set
#' to being turned off.
#'
#' @param withWave \code{logical(1)} if \code{TRUE}, the peaks identified first
#' with Massifquant are subsequently filtered with the second step of the
#' centWave algorithm, which includes wavelet estimation.
#'
#' @details This algorithm's performance has been tested rigorously
#' on high resolution LC/{OrbiTrap, TOF}-MS data in centroid mode.
#' Simultaneous kalman filters identify chromatographic peaks and calculate
#' their area under the curve. The default parameters are set to operate on
#' a complex LC-MS Orbitrap sample. Users will find it useful to do some
#' simple exploratory data analysis to find out where to set a minimum
#' intensity, and identify how many scans an average peak spans. The
#' \code{consecMissedLimit} parameter has yielded good performance on
#' Orbitrap data when set to (\code{2}) and on TOF data it was found best
#' to be at (\code{1}). This may change as the algorithm has yet to be
#' tested on many samples. The \code{criticalValue} parameter is perhaps
#' most dificult to dial in appropriately and visual inspection of peak
#' identification is the best suggested tool for quick optimization.
#' The \code{ppm} and \code{checkBack} parameters have shown less influence
#' than the other parameters and exist to give users flexibility and
#' better accuracy.
#'
#' @note These methods and classes are part of the updated and modernized
#' \code{xcms} user interface which will eventually replace the
#' \code{\link{findPeaks}} methods. It supports chromatographic peak
#' detection on \code{\link[MSnbase]{MSnExp}} and
#' \code{\link[MSnbase]{OnDiskMSnExp}} objects (both defined in the
#' \code{MSnbase} package). All of the settings to the massifquant and
#' centWave algorithm can be passed with a \code{MassifquantParam} object.
#'
#' @inheritParams findChromPeaks-centWave
#'
#' @family peak detection methods
#'
#' @seealso The \code{\link{do_findChromPeaks_massifquant}} core API function
#' and \code{\link{findPeaks.massifquant}} for the old user interface.
#'
#' @references
#' Conley CJ, Smith R, Torgrip RJ, Taylor RM, Tautenhahn R and Prince JT
#' "Massifquant: open-source Kalman filter-based XC-MS isotope trace feature
#' detection" \emph{Bioinformatics} 2014, 30(18):2636-43.
#'
#' @author Christopher Conley, Johannes Rainer
#'
#' @name findChromPeaks-massifquant
NULL
#> NULL
#' @description The \code{MassifquantParam} class allows to specify all
#' settings for a chromatographic peak detection using the massifquant
#' method eventually in combination with the centWave algorithm. Instances
#' should be created with the \code{MassifquantParam} constructor.
#'
#' @slot .__classVersion__,ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,criticalValue,consecMissedLimit,unions,checkBack,withWave See corresponding parameter above. \code{.__classVersion__} stores
#' the version from the class. Slots values should exclusively be accessed
#' \emph{via} the corresponding getter and setter methods listed above.
#'
#' @rdname findChromPeaks-massifquant
#'
#' @examples
#'
#' ## Create a MassifquantParam object.
#' mqp <- MassifquantParam()
#' ## Change snthresh parameter
#' snthresh(mqp) <- 30
#' mqp
#'
#' ## Perform the peak detection using massifquant on the files from the
#' ## faahKO package. Files are read using the readMSData2 from the MSnbase
#' ## package
#' library(faahKO)
#' library(MSnbase)
#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE,
#' full.names = TRUE)
#' raw_data <- readMSData2(fls[1:2])
#' ## Perform the peak detection using the settings defined above.
#' res <- findChromPeaks(raw_data, param = mqp)
#' head(chromPeaks(res))
setClass("MassifquantParam",
slots = c(
ppm = "numeric",
peakwidth = "numeric",
snthresh = "numeric",
prefilter = "numeric",
mzCenterFun = "character",
integrate = "integer",
mzdiff = "numeric",
fitgauss = "logical",
noise = "numeric",
verboseColumns = "logical",
criticalValue = "numeric",
consecMissedLimit = "integer",
unions = "integer",
checkBack = "integer",
withWave = "logical"
),
contains = c("Param"),
prototype = prototype(
ppm = 25,
peakwidth = c(20, 50),
snthresh = 10,
prefilter = c(3, 100),
mzCenterFun = "wMean",
integrate = 1L,
mzdiff = -0.001,
fitgauss = FALSE,
noise = 0,
verboseColumns = FALSE,
criticalValue = 1.125,
consecMissedLimit = 2L,
unions = 1L,
checkBack = 0L,
withWave = FALSE
),
validity = function(object) {
msg <- character()