-
Notifications
You must be signed in to change notification settings - Fork 0
/
drift.R
174 lines (152 loc) · 7.97 KB
/
drift.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#' Evaluate drift
#'
#' This function evaluates drift and can correct for it (use parameter \code{correct = TRUE}). Note that it corrects drift based on the 45/44 and 46/44 ratio, although one could certainly debate the merits of doing the O17 correction first. Although designed to be specific for d45 and d46, it can be used on any two columns really, only note that the new columns are currently always called d45.drift and d46.drift
#'
#' @param data the data frame, must be from a single run because this should really be evaluated
#' for individual runs
#' @param d45 the name of the d45 column (the direct name, not in parantheses)
#' @param d46 the name of the d46 column (the direct name, not in parantheses)
#' @param group expression for what to group by in order to normalize properly
#' this is also used for color coding the data points in the generated plot (if \code{plot = TRUE})
#' @param correct whether to correct for drift or not (default is FALSE)
#' @param correct_with expression for what all to include in the correction
#' @param method which method to use for drift correction, common approaches
#' are linear models (lm) and local polynomial regression fitting (loess)
#' @param plot whether to output the drift plots [default is FALSE]
#' @param span degree of smoothing for the loess (if this method is used)
#' @param ... additional parameters for the fitting method
#' @export
#' @return introduces the new columns d45.drift and d46.drift + parameter column p.drift
evaluate_drift <- function(data, d45, d46, group = name, correct = FALSE,
correct_with = category %in% c("USGS-34", "IAEA-NO3", "N2O"),
plot = TRUE, quiet = FALSE, method = "lm", span = 0.75, ...) {
if (is.null(data$run_number)) stop("need to know the run_number, please parse_file_names first")
if (missing(d45)) stop("please specify the column that holds the d45 data")
if (missing(d46)) stop("please specify the columm that holds the d46 data")
# prepare data frame for correction model
d45_expr <- rlang::enexpr(d45)
d46_expr <- rlang::enexpr(d46)
correct_with_expr <- rlang::enexpr(correct_with)
group_expr <- rlang::enexpr(group)
data <- data %>%
mutate(
x = run_number,
.d45_orig = !!d45_expr,
.d46_orig = !!d46_expr,
.d45 = isoreader::iso_strip_units(.d45_orig),
.d46 = isoreader::iso_strip_units(.d46_orig),
.included = !!correct_with_expr,
..group = !!group_expr
)
# run using do in case there is a grouping
df <-
data %>% do({
# included
mdf <-
filter(., .included) %>%
group_by(..group) %>%
mutate(d45 = .d45 - mean(.d45), d46 = .d46 - mean(.d46)) %>%
ungroup()
# group info
grps_text <- ""
if (length(groups(data)) > 0) {
grps <- groups(data) %>% as.character() %>% sapply(function(i) mdf[[i]][1])
grps_text <- paste0(names(grps), " = ", grps) %>% paste(collapse = ", ")
}
# regressions
corr_cats <- mdf$category %>% unique() %>% paste(collapse = ", ") # for drift into
args <- list(...)
reg_notes <- method
if (method == "loess") reg_notes <- paste0(reg_notes, " (span: ", span, ")")
if (method == "loess") args <- c(args, list(span = span, control = loess.control(surface = "direct")))
# "direct" is to allow extrapolation
m45 <- do.call(method, c(list(quote(d45 ~ x), data = quote(mdf)), args))
m46 <- do.call(method, c(list(quote(d46 ~ x), data = quote(mdf)), args))
# correction
run_numbers <- .$run_number %>% unique()
data.drift <-
left_join(., ggplot2:::predictdf(m45, run_numbers, se = F) %>%
rename(.d45.adjust = y, run_number = x), by = "run_number") %>%
left_join(ggplot2:::predictdf(m46, run_numbers, se = F) %>%
rename(.d46.adjust = y, run_number = x), by = "run_number") %>%
mutate(p.drift = paste0(reg_notes, ": ", corr_cats),
d45.drift = .d45 - .d45.adjust,
d46.drift = .d46 - .d46.adjust) %>%
select(-.d45.adjust, -.d46.adjust)
# fitting overview plot
if (plot) {
# plotting data
mdf$d45.drift <- NULL # just in case already defined
mdf$d46.drift <- NULL # just in case already defined
mdf <-
left_join(mdf, data.drift[c("run_number", "d45.drift", "d46.drift")],
by = "run_number") %>%
group_by(..group) %>%
mutate(
`d45 corrected` = d45.drift - mean(d45.drift),
`d46 corrected` = d46.drift - mean(d46.drift)
)
# plot label
label <- paste0(
"Correction ", if (correct) "applied" else "NOT applied",
"\n(method: ", method, ")\n", if (grps_text != "") grps_text)
# construct plot
method_args <- list(...)
(mdf %>% gather(panel, y, d45, d46, `d45 corrected`, `d46 corrected`) %>%
ggplot() + aes(x, y) +
stat_smooth(aes(color = ..group, fill = ..group), method = method, formula = y ~ x, se = F) +
stat_smooth(method = method, span = span, method.args=method_args, formula = y ~ x, se = T, size = 1.5, color = "black") +
geom_point(aes(color = ..group), size = 3) +
facet_wrap(~panel, ncol = 2, scales = "free_y") +
theme_bw() + theme(legend.position = "left") +
labs(x = "Run #", color = "", fill = "", y = "deviation from mean in each grouping [permil]")
) -> p1
(rbind(data.frame(residuals = m45$residuals, fitted = m45$fitted, panel = "d45 residual vs. fitted"),
data.frame(residuals = m46$residuals, fitted = m46$fitted, panel = "d46 residual vs. fitted")) %>%
ggplot() +
aes(fitted, residuals) + geom_point() +
stat_smooth(method = "loess", formula = y ~ x, color = "red", se = F) +
facet_wrap(~panel, ncol = 1, scales = "free") + theme_bw()
) -> p2
print(
cowplot::plot_grid(p1, p2, align = "h", rel_widths = c(5,2),
labels=c(label, ''), hjust = 0, vjust = 1)
#+cowplot::draw_label(paste("Drift correction with fitting method", method), y = 1, vjust = 1, size = 18)
)
}
# assign data frame
if (correct) {
out <- data.drift
d45_units <- isoreader::iso_get_units(out$.d45_orig)
d46_units <- isoreader::iso_get_units(out$.d46_orig)
if (!is.na(d45_units)) out$d45.drift <- iso_double_with_units(out$d45.drift, d45_units)
if (!is.na(d46_units)) out$d46.drift <- iso_double_with_units(out$d46.drift, d46_units)
} else {
out <- mutate(., p.drift = "none", d45.drift = .d45_orig, d46.drift = .d46_orig)
}
# info messages
if (!quiet & correct) {
grp_sum <- filter(out, .included) %>% group_by(..group) %>%
summarize(d45.sd = sd(.d45), d46.sd = sd(.d46), d45.drift.sd = sd(d45.drift), d46.drift.sd = sd(d46.drift)) %>%
mutate(before = paste0(round(d45.sd, 2), "/", round(d46.sd, 2), " (", ..group, ")"),
after = paste0(round(d45.drift.sd, 2), "/", round(d46.drift.sd, 2), " (", ..group, ")"))
if (grps_text != "")
grps_text <- paste0("\n group: ", grps_text)
sprintf(paste(
"INFO: %s N2O analyses drift corrected (new data columns 'd45.drift' & 'd46.drift', and parameter 'p.drift' added)%s",
"\n Used the '%s' method with included categories '%s'.",
"\n Residual sum of squares: %.3f (d45), %.3f (d46)",
"\n Standard deviations in d45/d46 before and after drift correction, by groupings:",
"\n Before: %s",
"\n After: %s"),
nrow(.), grps_text, method, corr_cats,
sqrt(sum(m45$residuals^2)), sqrt(sum(m46$residuals^2)),
grp_sum$before %>% paste(collapse = ", "), grp_sum$after %>% paste(collapse = ", ")) %>% message()
} else if (!quiet & !correct) {
message("INFO: no drift correction is being applied to this run (parameter column 'p.drift' added)")
if (grps_text != "") message(" group: ", grps_text)
}
out
})
df %>% select(-x, -.d45, -.d46, -.d45_orig, -.d46_orig, -.included, -..group) %>% return()
}