-
Notifications
You must be signed in to change notification settings - Fork 1
/
integrateFIR.R
121 lines (115 loc) · 6.04 KB
/
integrateFIR.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#' @title Integrate fallback integration regions
#'
#' @description Integrate region defined in FIR if a feature is not found
#'
#' @param rawSpec an \code{\link[MSnbase]{OnDiskMSnExp-class}}
#' @param FIR (data.frame) Fallback Integration Regions (FIR) to integrate when
#' a feature is not found. Compounds as row are identical to the targeted
#' features, columns are \code{rtMin} (float in seconds), \code{rtMax} (float in
#' seconds), \code{mzMin} (float), \code{mzMax} (float)
#' @param foundPeakTable a \code{data.frame} as generated by
#' \link{findTargetFeatures}, with features as rows and peak properties as
#' columns. The following columns are mandatory: \code{found}, \code{is_filled},
#' \code{mz}, \code{mzMin}, \code{mzMax}, \code{rt}, \code{rtMin}, \code{rtMax},
#' \code{peakArea}, \code{peakAreaRaw}, \code{maxIntMeasured},
#' \code{maxIntPredicted}.
#' @param verbose (bool) if TRUE message progress
#'
#' @return an updated foundPeakTable with FIR integration values
integrateFIR <- function(rawSpec, FIR, foundPeakTable, verbose = TRUE) {
# Check input
if (dim(FIR)[1] != dim(foundPeakTable)[1]) {
stop("Check input, FIR must have the same number of rows as ",
"foundPeakTable") }
# init
stime <- Sys.time()
needsFilling <- !(foundPeakTable$found)
needsFilling_idx <- seq(dim(foundPeakTable)[1])[needsFilling]
outTable <- foundPeakTable
# only run where replacement is needed
if (sum(needsFilling) != 0) { if (verbose) {
message(sum(needsFilling), " features to integrate with FIR") }
# store results
tmpResult <- data.frame(matrix(vector(), sum(needsFilling), 10,
dimnames = list(c(),
c("mzMin", "mz", "mzMax",
"rtMin", "rt", "rtMax",
"peakArea", "peakAreaRaw",
"maxIntMeasured",
"maxIntPredicted"))))
# extract data for all fallback windows from raw (list of windows)
all_peakData <- extractSignalRawData(rawSpec,
mz = data.frame(mzMin = FIR$mzMin[needsFilling_idx],
mzMax = FIR$mzMax[needsFilling_idx]),
rt = data.frame(rtMin = FIR$rtMin[needsFilling_idx],
rtMax = FIR$rtMax[needsFilling_idx]),
verbose = verbose)
# iterate over features to integrate
tmpResult <- integrateFIR_features(needsFilling_idx, all_peakData, FIR,
tmpResult, verbose)
# Replace results with FIR integration
outTable[needsFilling_idx,
c("mzMin", "mz", "mzMax", "rtMin", "rt", "rtMax", "peakArea",
"peakAreaRaw", "maxIntMeasured", "maxIntPredicted")] <-
tmpResult[needsFilling_idx, c("mzMin", "mz", "mzMax", "rtMin", "rt",
"rtMax", "peakArea", "peakAreaRaw", "maxIntMeasured",
"maxIntPredicted")]
outTable$is_filled[needsFilling_idx] <- TRUE
outTable$found[needsFilling_idx] <- TRUE }
# Output
etime <- Sys.time()
if (verbose) {
message("FIR integrated in: ",round(as.double(difftime(etime,stime)),2),
" ", units(difftime(etime, stime)))
}
return(outTable)
}
# -----------------------------------------------------------------------------
# integrateFIR helper functions
# iterate over features to integrate
integrateFIR_features <- function(needsFilling_idx, all_peakData, FIR,
tmpResult, verbose){
for (cnt in seq_len(length(needsFilling_idx))) {
peakData <- all_peakData[[cnt]]
i <- needsFilling_idx[cnt]
if (dim(peakData)[1] != 0){ #Only continue if a scan is found in the box
rtRange <- unique(peakData$rt)
# mzMin, mzMax, rtMin, rtMax (values taken from input)
tmpResult[i, c("mzMin", "mzMax", "rtMin", "rtMax")] <- FIR[i,
c("mzMin", "mzMax", "rtMin", "rtMax")]
# rt (rt of max intensity)
tmpResult[i, "rt"] <- peakData$rt[which.max(peakData$i)]
# mz (weighted average of total intensity across all rt for each mz)
# total intensity across rt for each mz
mzRange <- unique(peakData$mz)
mzTotalIntensity <- vapply(mzRange, function(x) {
sum(peakData$i[peakData$mz == x])}, FUN.VALUE = numeric(1))
# mz (is weighted average)
tmpResult[i, "mz"] <- stats::weighted.mean(mzRange,mzTotalIntensity)
# maxIntMeasured (max intensity)
tmpResult[i, "maxIntMeasured"] <- max(peakData$i)
# maxIntPredicted is NA (we don't have a fit)
tmpResult[i, "maxIntPredicted"] <- as.numeric(NA)
# into max intensity across mz for each rt
rtMaxIntensity <- vapply(rtRange, function(x) {
max(peakData$i[peakData$rt == x])}, FUN.VALUE = numeric(1))
# peakArea/peakAreaRaw calculated trapezoid rule
peakArea <- pracma::trapz(x=rtRange, y=rtMaxIntensity)
tmpResult[i, "peakArea"] <- peakArea
tmpResult[i, "peakAreaRaw"] <- peakArea
} else { # If no scan found in that region, return default values
if (verbose) { message('No scan present in the FIR # ', i,
': rt and mz are set as the middle of the FIR box;',
' peakArea, maxIntMeasured and maxIntPredicted are',
' set to 0') }
tmpResult[i, c("mzMin", "mzMax", "rtMin", "rtMax")] <- FIR[i,
c("mzMin", "mzMax", "rtMin", "rtMax")]
tmpResult[i, "rt"] <- mean(c(FIR$rtMin[i], FIR$rtMax[i]))
tmpResult[i, "mz"] <- mean(c(FIR$mzMin[i], FIR$mzMax[i]))
tmpResult[i, "peakArea"] <- 0
tmpResult[i, "peakAreaRaw"] <- 0
tmpResult[i, "maxIntMeasured"] <- 0
tmpResult[i, "maxIntPredicted"] <- 0 }
}
return(tmpResult)
}