/
plotQC.R
159 lines (144 loc) · 6.34 KB
/
plotQC.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#' Plot m/z and RT deviations for QC purposes without external reference data
#'
#' Use "democracy" to determine the average m/z and RT deviations
#' for a grouped xcmsSet, and dependency on sample or absolute m/z
#'
#' plotQC() is a warpper to create a set of diagnostic plots.
#' For the m/z deviations, the median of all m/z withon one group are assumed.
#'
#' @usage plotQC(object, sampNames, sampColors, sampOrder, what)
#'
#' @aliases plotQC
#'
#' @param object A grouped \code{\link{xcmsSet}}
#' @param sampNames Override sample names (e.g. with simplified names)
#' @param sampColors Provide a set of colors (default: monochrome ?)
#' @param sampOrder Override the order of samples, e.g. to bring them in order
#' of measurement to detect time drift
#' @param what A vector of which QC plots to generate.
#' "mzdevhist": histogram of mz deviations. Should be gaussian shaped. If it is multimodal, then some peaks seem to have a systematically higher m/z deviation
#' "rtdevhist": histogram of RT deviations. Should be gaussian shaped. If it is multimodal, then some peaks seem to have a systematically higher RT deviation
#' "mzdevmass": Shows whether m/z deviations are absolute m/z dependent, could indicate miscalibration
#' "mzdevtime": Shows whether m/z deviations are RT dependent, could indicate instrument drift
#' "mzdevsample": median mz deviation for each sample, indicates outliers
#' "rtdevsample": median RT deviation for each sample, indicates outliers
#'
#'
#' @return List with four matrices, each of dimension features * samples:
#' "mz": median mz deviation for each sample
#' "mzdev": median mz deviation for each sample
#' "rt": median RT deviation for each sample
#' "rtdev": median RT deviation for each sample
#'
#' @examples
#' library(faahKO)
#' xsg <- group(faahko)
#'
#' plotQC(xsg, what="mzdevhist")
#' plotQC(xsg, what="rtdevhist")
#' plotQC(xsg, what="mzdevmass")
#' plotQC(xsg, what="mzdevtime")
#' plotQC(xsg, what="mzdevsample")
#' plotQC(xsg, what="rtdevsample")
#'
#' @author Michael Wenk, Michael Wenk <michael.wenk@@student.uni-halle.de>
plotQC <- function(object,
sampNames = NULL,
sampColors = NULL,
sampOrder=NULL,
what=c("mzdevhist",
"rtdevhist",
"mzdevmass",
"mzdevtime",
"mzdevsample",
"rtdevsample")) {
if (inherits(object, "xcmsSet")) {
deviations <- list(mzs=groupval(object, value = "mz"),
rts=groupval(object, value = "rt"))
deviations$mzdev <- deviations$mzs - groups(object)[,"mzmed"]
deviations$rtdev <- deviations$rts - groups(object)[,"rtmed"]
} else if (inherits(object, "XCMSnExp")) {
deviations <- list(mzs=featureValues(object, value = "mz"),
rts=featureValues(object, value = "rt"))
deviations$mzdev <- deviations$mzs - featureDefinitions(object)[,"mzmed"]
deviations$rtdev <- deviations$rts - featureDefinitions(object)[,"rtmed"]
} else {
stop("object not xcmsSet nor XCMSnExp")
}
if (missing(sampNames) || is.null(sampNames)) {
if (class(object)=="xcmsSet") {
sampNames <- sampnames(object)
} else if (class(object)=="XCMSnExp") {
sampNames <- sampleNames(object)
} else {
stop("object not xcmsSet nor XCMSnExp")
}
}
n <- ncol(deviations$mzs)
if (missing(sampColors) || is.null(sampColors)) {
sampColors <- rainbow(n)
}
if (missing(sampOrder) || is.null(sampOrder)) {
sampOrder <- 1:n
}
## Plot histograms of deviation
if ("mzdevhist" %in% what) {
hist(deviations$mzdev, breaks=100,
ylab = "Number of Peaks",
xlab = "m/z Deviation",
main = "m/z Deviation")
}
if ("rtdevhist" %in% what) {
hist(deviations$rtdev, breaks=100, ylab = "Number of Peaks",
xlab = "Retention Time Deviation",
main = "Retention Time Deviation")
}
if ("mzdevmass" %in% what) {
## Plot mass deviation depending on absolute mass
# Add extra space to right of plot area; change clipping to figure
# par(mar=c(6, 8, 8.1, 10), xpd=TRUE)
# par(oma = c(0,0,0,10))
plot(x = as.vector(deviations$mzs),
y = deviations$mzdev,
col=sampColors, pch = ".", type = "p",
main = "m/z Deviation vs. m/z",
xlab = "m/z", ylab = "m/z deviation")
for(i in 1:n) {
data <- na.omit(data.frame(mzs = deviations$mzs[,i], mzdev = deviations$mzdev[,i]))
lo <- loess(formula = mzdev ~ mzs, data = data)
currSampleValues <- deviations$mzs[!is.na(deviations$mzs[,i]),i]
currSampleValues <- currSampleValues[order(currSampleValues)]
lines(currSampleValues, predict(lo), col=sampColors[i], lwd = 2)
}
legend("topright", legend=sampNames, col=sampColors, pch = c(1), cex=0.35, ncol=4, title = "samples")
}
if ("mzdevtime" %in% what) {
## Plot mass deviation depending on retention time
## Add extra space to right of plot area; change clipping to figure
##par(mar=c(5.1, 5.1, 8.1, 8.1), xpd=TRUE)
plot(x = as.vector(deviations$rts), y = deviations$mzdev, pch = ".", type = "p",
col=sampColors, main = "m/z deviation vs. retention time",
xlab = "Retention Time", ylab = "m/z deviation")
for(i in 1:n){
data <- na.omit(data.frame(rts = deviations$rts[,i], mzdev = deviations$mzdev[,i]))
lo <- loess(formula = mzdev ~ rts, data = data)
currSampleValues <- deviations$rts[!is.na(deviations$rts[,i]),i]
currSampleValues <- currSampleValues[order(currSampleValues)]
lines(currSampleValues, predict(lo), col=sampColors[i], lwd = 1)
}
legend("topright", legend=sampNames, col=sampColors, pch = c(1), cex=0.35, ncol=4, title = "samples")
}
## still to come: median deviations per sample,
## to detect corrupt samples
if ("mzdevsample" %in% what) {
barplot(apply(deviations$mzdev[,sampOrder], MARGIN=2, FUN=function(x) median(x, na.rm=TRUE)),
col = sampColors, xlab = "", ylab = "m/z Deviation",
names.arg = sampNames, las = 2)
}
if ("rtdevsample" %in% what) {
barplot(apply(deviations$rtdev[,sampOrder], MARGIN=2, FUN=function(x) median(x, na.rm=TRUE)),
col = sampColors, xlab = "", ylab = "Retention Time Deviation",
names.arg = sampNames, las = 2)
}
invisible(deviations)
}