-
Notifications
You must be signed in to change notification settings - Fork 0
/
aggregate_pheno.R
137 lines (126 loc) · 4.71 KB
/
aggregate_pheno.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#' @title Temporally aggregate time series values
#' @description Aggregate values of time series over phenological
#' time windows.
#' @param data List of fitted time series as generated by function `fit_curve()`
#' or time series in `s2ts` format (generated using `fill_s2ts()`).
#' @param pheno (optional) Output of `extract_pheno()` or `cut_cycles()`.
#' If missing, the whole windows are used (in this case, `data` can only be
#' a list of fitted time series as generated by function `fit_curve()`).
#' @param metrics (optional) Two-length character: name of metrics to be used
#' as beginning and ending dates of the windows. Object `pheno` must contain
#' two fields with the corresponding names.
#' Default is `c("begin", "end")` (but they could be e.g. `c("sos", "eos")`).
#' This parameter is skipped if `pheno` is missing.
#' @param fun (optional) A vector of aggregation function names
#' (default is `"median"`).
#' @param reshape (optional) Logical: should outputs be wide-to-log reshaped?
#' If TRUE (default), the output is returned in the "long" format (as described)
#' here below); if FALSE, n columns named as `fun` argument are returned
#' instead than columns `fun` and `value`.
#' @param skip_fun (optional) Logical: return also the aggregation function name
#' among outputs (default is FALSE)?
#' This parameter is used only if `fun` is 1-length (otherwise it is coerced
#' to TRUE) and if `reshape = TRUE`.
#' @param include_pheno (optional) Logical: return also the input information
#' provided in argument `pheno` (default is FALSE)?
#' @param ... Additional arguments passed to `fun`.
#' @return A data table with the following fields:
#' - `id`: the time series ID (see `s2ts`);
#' - `year`: the year (integer);
#' - `cycle`: the cycle ID (integer);
#' - `fun`: the aggregation function (if `skip_fun = TRUE` and `fun` is a
#' 1-length argument value, this is skipped);
#' - `value`: output aggregated value.
#' @author Luigi Ranghetti, PhD (2021) \email{luigi@@ranghetti.info}
#' @import data.table
#' @export
#' @examples
#' # Load input data
#' data("ts_filled")
#' data("dt_cycles")
#' data("dt_pheno")
#'
#' # Aggregate time series over detected cycles (computing the median, as default)
#' dt_aggr_0 <- aggregate_pheno(ts_filled, dt_cycles)
#' dt_aggr_0
#'
#' # Aggregate time series over phenological metrics using 95% percentile
#' dt_aggr <- aggregate_pheno(
#' ts_filled, dt_pheno,
#' metrics = c("sos", "eos"),
#' fun = "quantile", probs = 0.95, na.rm = TRUE
#' )
#' dt_aggr
aggregate_pheno <- function(
data,
pheno,
metrics = c("begin", "end"),
fun = "median",
reshape = TRUE,
skip_fun = TRUE,
include_pheno = FALSE,
...
) {
# Avoid check notes for data.table related variables
id <- cycle <- NULL
## Check arguments
# TODO
# Check for empty pheno
if (nrow(pheno) == 0) {
return(data.table(
id = character(), year = character(), cycle = character(), value = numeric()
))
}
# Check for duplicates in pheno
pheno_uids <- table(table(pheno[,paste(id, year, cycle)]))
if (length(pheno_uids) > 1 || names(pheno_uids) != "1") {
print_message(
type = "error",
"Duplicated detected in argument `pheno`."
)
}
# Convert data if it is a s2ts object
if (inherits(data, "s2ts")) {
data <- fit_curve(data, pheno, fit = "no")
}
# Extract metrics
out_dt_l <- sapply(names(data), function(sel_id) {
sapply(names(data[[sel_id]]), function(sel_year) {
sapply(names(data[[sel_id]][[sel_year]]), function(sel_cycle) {
window_metrics <- pheno[id == sel_id & year == sel_year & cycle == sel_cycle,]
sel_data <- data[[sel_id]][[sel_year]][[sel_cycle]]$ts[
date >= window_metrics[,get(metrics[1])] &
date < window_metrics[,get(metrics[2])]
]
as.list(unlist( # syntax to avoid multi-length outputs (e.g. quantile())
sapply(fun, do.call, list(sel_data$value, ...), simplify = FALSE, USE.NAMES = TRUE)
))
}, simplify = FALSE, USE.NAMES = TRUE)
}, simplify = FALSE, USE.NAMES = TRUE)
}, simplify = FALSE, USE.NAMES = TRUE)
out_dt <- rbindlist(
lapply(
lapply(
out_dt_l,
lapply, rbindlist, idcol = "cycle"
),
rbindlist, idcol = "year"
),
idcol = "id"
)
# Post-processing operations
if (reshape == TRUE) {
out_dt <- melt(
out_dt,
id.vars = c("id", "year", "cycle"),
variable.name = "fun", value.name = "value"
)
}
if (all(reshape == TRUE, length(fun) == 1, skip_fun == TRUE)) {
out_dt <- out_dt[,names(out_dt) != "fun", with = FALSE]
}
if (all(!missing(pheno), include_pheno == TRUE)) {
out_dt <- merge(pheno, out_dt, by = c("id", "year", "cycle"))
}
out_dt
}