R/aggregate_pheno.R

#' @title Temporally aggregate time series values
#' @description Aggregate values of time series over phenological 
#'  time windows.
#' @param data List of fitted time series as generated by function `fit_curve()`
#'  or time series in `s2ts` format (generated using `fill_s2ts()`).
#' @param pheno (optional) Output of `extract_pheno()` or `cut_cycles()`.
#'  If missing, the whole windows are used (in this case, `data` can only be 
#'  a list of fitted time series as generated by function `fit_curve()`).
#' @param metrics (optional) Two-length character: name of metrics to be used 
#'  as beginning and ending dates of the windows. Object `pheno` must contain
#'  two fields with the corresponding names. 
#'  Default is `c("begin", "end")` (but they could be e.g. `c("sos", "eos")`).
#'  This parameter is skipped if `pheno` is missing.
#' @param fun (optional) A vector of aggregation function names
#'  (default is `"median"`).
#' @param reshape (optional) Logical: should outputs be wide-to-log reshaped?
#'  If TRUE (default), the output is returned in the "long" format (as described)
#'  here below); if FALSE, n columns named as `fun` argument are returned
#'  instead than columns `fun` and `value`.
#' @param skip_fun (optional) Logical: return also the aggregation function name 
#'  among outputs (default is FALSE)? 
#'  This parameter is used only if `fun` is 1-length (otherwise it is coerced
#'  to TRUE) and if `reshape = TRUE`.
#' @param include_pheno (optional) Logical: return also the input information 
#'  provided in  argument `pheno` (default is FALSE)? 
#' @param ... Additional arguments passed to `fun`.
#' @return A data table with the following fields:
#'  - `id`: the time series ID (see `s2ts`);
#'  - `year`: the year (integer);
#'  - `cycle`: the cycle ID (integer);
#'  - `fun`: the aggregation function (if `skip_fun = TRUE` and `fun` is a 
#'      1-length argument value, this is skipped);
#'  - `value`: output aggregated value.
#' @author Luigi Ranghetti, PhD (2021) \email{luigi@@ranghetti.info}
#' @import data.table
#' @export
#' @examples
#' # Load input data
#' data("ts_filled")
#' data("dt_cycles")
#' data("dt_pheno")
#' 
#' # Aggregate time series over detected cycles (computing the median, as default)
#' dt_aggr_0 <- aggregate_pheno(ts_filled, dt_cycles)
#' dt_aggr_0
#' 
#' # Aggregate time series over phenological metrics using 95% percentile
#' dt_aggr <- aggregate_pheno(
#'   ts_filled, dt_pheno, 
#'   metrics = c("sos", "eos"), 
#'   fun = "quantile", probs = 0.95, na.rm = TRUE
#' )
#' dt_aggr


aggregate_pheno <- function(
  data, 
  pheno,
  metrics = c("begin", "end"),
  fun = "median",
  reshape = TRUE,
  skip_fun = TRUE,
  include_pheno = FALSE,
  ...
) {
  
  # Avoid check notes for data.table related variables
  id <- cycle <- NULL
  
  ## Check arguments
  # TODO
  
  # Check for empty pheno
  if (nrow(pheno) == 0) {
    return(data.table(
      id = character(), year = character(), cycle = character(), value = numeric()
    ))
  }

  # Check for duplicates in pheno
  pheno_uids <- table(table(pheno[,paste(id, year, cycle)]))
  if (length(pheno_uids) > 1 || names(pheno_uids) != "1") {
    print_message(
      type = "error",
      "Duplicated detected in argument `pheno`."
    )
  }

  # Convert data if it is a s2ts object
  if (inherits(data, "s2ts")) {
    data <- fit_curve(data, pheno, fit = "no")
  }

  # Extract metrics
  out_dt_l <- sapply(names(data), function(sel_id) {
    sapply(names(data[[sel_id]]), function(sel_year) {
      sapply(names(data[[sel_id]][[sel_year]]), function(sel_cycle) {
        window_metrics <- pheno[id == sel_id & year == sel_year & cycle == sel_cycle,]
        sel_data <- data[[sel_id]][[sel_year]][[sel_cycle]]$ts[
          date >= window_metrics[,get(metrics[1])] &
            date < window_metrics[,get(metrics[2])]
        ]
        as.list(unlist( # syntax to avoid multi-length outputs (e.g. quantile())
          sapply(fun, do.call, list(sel_data$value, ...), simplify = FALSE, USE.NAMES = TRUE)
        ))
      }, simplify = FALSE, USE.NAMES = TRUE)
    }, simplify = FALSE, USE.NAMES = TRUE)
  }, simplify = FALSE, USE.NAMES = TRUE)
  out_dt <- rbindlist(
    lapply(
      lapply(
        out_dt_l, 
        lapply, rbindlist, idcol = "cycle"
      ), 
      rbindlist, idcol = "year"
    ), 
    idcol = "id"
  )
  
  # Post-processing operations
  if (reshape == TRUE) {
    out_dt <- melt(
      out_dt, 
      id.vars = c("id", "year", "cycle"), 
      variable.name = "fun", value.name = "value"
    )
  }
  if (all(reshape == TRUE, length(fun) == 1, skip_fun == TRUE)) {
    out_dt <- out_dt[,names(out_dt) != "fun", with = FALSE]
  }
  if (all(!missing(pheno), include_pheno == TRUE)) {
    out_dt <- merge(pheno, out_dt, by = c("id", "year", "cycle"))
  }
  
  out_dt
  
}