-
Notifications
You must be signed in to change notification settings - Fork 12
/
preprocess-spc.R
261 lines (249 loc) · 11.8 KB
/
preprocess-spc.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
#' @title Preprocess spectra
#' @description Preprocesses spectra in tibble column by sample_id after
#' averaging spectra by \code{simplerspec::average_spc()}.
#' @param spc_tbl Tibble that contains spectra to be preprocessed within
#' a list-column.
#' @param select Character vector of predefined preprocessing options to be
#' applied to the spectra list-column specified in \code{column_in}.
#' Common prefined values are stated as abbreviated preprocessing methods and
#' options such as \code{"sg_1_w21"}, where \code{"sg"} stands for
#' Savitzky-Golay and \code{1} for first derivative and \code{"w21"}
#' for a window size of 21 points.
#' @param column_in Character vector of single list-column in \code{spc_tbl} that
#' contain list of spectra (1 row matrix) to be processed by function supplied
#' in \code{select}.
#' @param custom_function A character string of a custom processing function
#' that is later parsed (produces expression in a list) and evaluated within
#' the function \code{preprocess_spc}.
#' The character vector argument of \code{custom_function}
#' needs to contain \code{"spc_raw"}, which is the single data table of spectra
#' that results from binding a list of data.tables (spectra to preprocess)
#' from the spectra list-column specified in \code{column_in}.
#' An example for a value is
#' \code{"prospectr::savitzkyGolay(X = spc_raw, m = 0, p = 3, w = 9)"}.
#' Optional argument. Default is \code{NULL}.
#' @export
preprocess_spc <- function(spc_tbl, select, column_in = "spc_mean",
custom_function = NULL) {
# Convert list of spectral data.tables to one data.table
spc_raw <- data.table::rbindlist(spc_tbl[column_in][[column_in]])
## Perform preprocessing =====================================================
# Use custom function when supplying option ----------------------------------
if (!is.null(custom_function) & select == "custom") {
# Create full character string for parsing
custom_fct <- paste0("custom <- ", custom_function)
# parse converts the character string into an expression
# eval evaluates the expression; as a result, custom object is computed
# and saved within the current workspace
eval(parse(text = custom_fct))
## x <- spc_raw
## custom <- eval(substitute(custom_function), envir = parent.frame())
# -> Error in is.data.frame(X) : object 'x' not found
}
# -> returns error:
# custom_function = prospectr::savitzkyGolay(X = x, m = 0, p = 3, w = 9)
# Error in is.data.frame(X) : object 'x' not found
# -> Maybe solution: http://stackoverflow.com/questions/30563745/non-standard-evaluation-from-another-function-in-r
# Savitzky-Golay preprocessing
# use different derivatives and window sizes ---------------------------------
# Zero order Savitzky-Golay (no derivative) -> only smoothing
if (select == "sg_0_w9") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)}
# First derivative Savitzky-Golay
if (select == "sg_1_w5") {
sg_1_w5 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 5)}
if (select == "sg_1_w9") {
sg_1_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 9)}
if (select == "sg_1_w11") {
sg_1_w11 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 11)}
if (select == "sg_1_w13") {
sg_1_w13 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 13)}
if (select == "sg_1_p2_w13") {
sg_1_p2_w13 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 2, w = 13)}
if (select == "sg_1_w15") {
sg_1_w15 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 15)}
if (select == "sg_1_w17") {
sg_1_w17 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 17)}
if (select == "sg_1_w19") {
sg_1_w19 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 19)}
# Implement window size of 21, corresponds to ICRAF standard;
# see e.g. Terhoeven-Urselmans et al. (2010)
if (select == "sg_1_w21") {
sg_1_w21 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 21)}
if (select == "sg_1_w23") {
sg_1_w23 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 23)}
if (select == "sg_1_w25") {
sg_1_w25 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 25)}
if (select == "sg_1_w27") {
sg_1_w27 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 27)}
if (select == "sg_1_w35") {
sg_1_w35 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 35)}
if (select == "sg_1_w41") {
sg_1_w41 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 41)}
if (select == "sg_1_w51") {
sg_1_w51 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 51)}
# Second derivative Savitzky-Golay
if (select == "sg_2_w11") {
sg_2_w11 <- prospectr::savitzkyGolay(X = spc_raw,
m = 2, p = 3, w = 11)}
if (select == "sg_2_w21") {
sg_2_w21 <- prospectr::savitzkyGolay(X = spc_raw,
m = 2, p = 3, w = 21)}
# Savitzky-Golay (order 0) smoothing and derivative with a window size of
# 21 points
if (select == "sg_0_1_w21") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_1_w21 <- prospectr::savitzkyGolay(X = sg_0_w9,
m = 1, p = 3, w = 21)}
# Savitzky-Golay second derivative
if (select == "sg_2_w5") {
sg_2_w5 <- prospectr::savitzkyGolay(X = spc_raw,
m = 2, p = 3, w = 5)}
if (select == "sg_2_w11") {
sg_2_w11 <- prospectr::savitzkyGolay(X = spc_raw,
m = 2, p = 3, w = 11)}
# Standard normal variate (SNV) ----------------------------------------------
# Calculate standard normal variate (SNV) after Savitzky-Golay smoothing
if (select == "sg_0_snv") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)}
if (select == "sg_1_snv") {
sg_1_snv <- prospectr::standardNormalVariate(sg_1_w5)}
if (select == "sg_1_p2_w13_snv") {
sg_1_p2_w13 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 2, w = 13)
sg_1_p2_w13_snv <- prospectr::standardNormalVariate(sg_1_p2_w13)}
# Standard normal variate (SNV) and first gap-segment derivative
if (select == "snv_gsd_m1_w11_s1") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
snv_gsd_m1_w11_s1 <- prospectr::gapDer(X = sg_0_snv, m = 1, w = 11, s = 1)}
if (select == "snv_gsd_m1_w21_s5") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
snv_gsd_m1_w21_s5 <- prospectr::gapDer(X = sg_0_snv, m = 1, w = 21, s = 5)}
if (select == "snv_gsd_m1_w31_s1") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
snv_gsd_m1_w31_s1 <- prospectr::gapDer(X = sg_0_snv, m = 1, w = 31, s = 5)}
if (select == "snv_gsd_m1_w31_s5") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
snv_gsd_m1_w31_s5 <- prospectr::gapDer(X = sg_0_snv, m = 1, w = 31, s = 5)}
# Standard normal variate (SNV) and second gap-segment derivative
if (select == "snv_gsd_m2_w5_s1") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
snv_gsd_m2_w5_s1 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 5, s = 1)}
if (select == "snv_gsd_m2_w21_s1") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
snv_gsd_m2_w21_s1 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 21, s = 1)}
if (select == "snv_gsd_m2_w31_s1") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
snv_gsd_m2_w31_s1 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 31, s = 5)}
if (select == "snv_gsd_m2_w31_s5") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
snv_gsd_m2_w31_s5 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 31, s = 1)}
if (select == "snv_gsd_m2_w51_s1") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
snv_gsd_m2_w51_s1 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 51, s = 1)}
if (select == "snv_gsd_m2_w51_s5") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
snv_gsd_m2_w51_s5 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 51, s = 5)}
# 1rst Gap-segement derivative
if (select == "gsd_m1_w5_s4") {
gsd_m1_w5_s4 <- prospectr::gapDer(X = spc_raw, m = 1, w = 5, s = 4)}
if (select == "gsd_m1_w11_s5") {
gsd_m1_w11_s5 <- prospectr::gapDer(X = spc_raw, m = 1, w = 11, s = 5)}
if (select == "gsd_m1_w11_s21") {
gsd_m1_w11_s21 <- prospectr::gapDer(X = spc_raw, m = 1, w = 11, s = 21)}
if (select == "gsd_m1_w21_s1") {
gsd_m1_w21_s1 <- prospectr::gapDer(X = spc_raw, m = 1, w = 21, s = 1)}
if (select == "gsd_m1_w21_s21") {
gsd_m1_w21_s21 <- prospectr::gapDer(X = spc_raw, m = 1, w = 21, s = 21)}
if (select == "gsd_m1_w35_s21") {
gsd_m1_w35_s21 <- prospectr::gapDer(X = spc_raw, m = 1, w = 35, s = 21)}
if (select == "gsd_m1_w5_s21") {
gsd_m1_w5_s21 <- prospectr::gapDer(X = spc_raw, m = 1, w = 5, s = 21)}
# 2nd Gap-segment derivative
if (select == "gsd_m2_w21_s21") {
gsd_m2_w21_s21 <- prospectr::gapDer(X = spc_raw, m = 2, w = 21, s = 21)}
# 4th Gap-segment derivative
if (select == "gsd_m4_w21_s21") {
gsd_m4_w21_s21 <- prospectr::gapDer(X = spc_raw, m = 4, w = 21, s = 21)}
# Savitzky-Golay combined with multiple scatter correction (MSC --------------
# Savitzky-Golay with 3rd order polynomial, a window size of 21
# and first derivative + MSC
if (select == "sg_1_w21_msc") {
sg_1_w21 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 21)
# Use msc function from the pls package; use column means of X as reference
# spectrum
sg_1_w21_msc <- pls::msc(X = sg_1_w21, reference = NULL)
}
# Savitzky-Golay combined with multiple scatter correction (MSC --------------
# Savitzky-Golay with 4th order polynomial, a window size of 21
# and second derivative + MSC
if (select == "sg_2_w21_msc") {
sg_2_w21 <- prospectr::savitzkyGolay(X = spc_raw,
m = 2, p = 4, w = 21)
# Use msc function from the pls package; use column means of X as reference
# spectrum
sg_2_w21_msc <- pls::msc(X = sg_2_w21, reference = NULL)
}
# Continuum-removal ----------------------------------------------------------
if (select == "cr") {
cr <- prospectr::continuumRemoval(X = spc_raw,
wav = as.numeric(colnames(spc_raw)), type = "A")}
if (select == "cr_refl") {
cr_refl <- prospectr::continuumRemoval(X = spc_raw,
wav = as.numeric(colnames(spc_raw)), type = "R")}
# Select final preprocessing based on selection argument and
# save matrix in data.table
pre <- select
spc_pre <- data.table::as.data.table(get(pre))
# Convert preprocessed spectra in data.table to list of data.table spectra
# https://github.com/jennybc/row-oriented-workflows/blob/master/iterate-over-rows.md
spc_pre_list <- map(purrr::transpose(spc_pre), data.table::as.data.table)
# Convert x-values of preprocessed spectra in list of vectors
# prospectr only hands over new xunits in matrix colnames of type character
xvalues_pre_list <- lapply(spc_pre_list,
function(x) as.numeric(colnames(x)))
# Add list of preprocessed spectra and correspoding wavenumbers to tibble
spc_tbl_out <- tibble::add_column(spc_tbl,
spc_pre = spc_pre_list, xvalues_pre = xvalues_pre_list)
return(spc_tbl_out)
}