-
Notifications
You must be signed in to change notification settings - Fork 0
/
generalize_wetland_mode.R
69 lines (67 loc) · 2.89 KB
/
generalize_wetland_mode.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#' Generalize monthly management modes
#'
#' Identify typical water management schedule across multiple water years of
#' data
#'
#' @details For each unit, identifies and returns the most frequent management
#' mode in each month and calculates `weight` as the proportion of water years
#' in which that mode was detected. Low values for weight reflect months with
#' more annual variability in the monthly management schedules.
#'
#' Note that ties are not returned, and in the case of ties, the function
#' favors returning modes in the following order: F, D, I, M, H, N (see
#' [estimate_wetland_mode()] for details). It is therefore possible for the
#' function to return multiple months with values for F and D. Setting
#' `clean=TRUE` will reclassify duplicate values of F and D to `fullmode`, but
#' will also remove `weight` estimates since they may no longer be valid.
#'
#' @param df Input tibble from [estimate_wetland_mode()]
#' @param fullmode One of `M` or `H`; only necessary if `clean = TRUE`; see
#' Details
#' @param clean Logical; determines whether to correct for repeat values of F
#' and D
#'
#' @return Input tibble with only one entry for each month for each unit, the
#' most frequent value for "mode", and an additional field `weight` giving the
#' proportion of years observed containing that management mode in that month.
#'
#' @export
#' @importFrom rlang .data
#'
#' @examples
#' df = format_watertracker(sampledat) |> estimate_flood_extent() |>
#' estimate_flood_delta() |> estimate_wetland_mode()
#' generalize_wetland_mode(df)
#'
generalize_wetland_mode = function(df, fullmode = NULL, clean = FALSE) {
df = df |>
#set order of importance
dplyr::mutate(mode = factor(.data$mode, levels = c('F', 'D', 'I', 'M', 'H', 'N'))) |>
dplyr::group_by(.data$WETLAND, .data$unit, .data$CLASS, .data$AREA_HA,
.data$AREA_AC, .data$AREA_AC_WETTED,
.data$month_name, .data$month) |>
# frequency/prop of each mode in each month
dplyr::count(mode) |>
dplyr::mutate(total = sum(.data$n),
weight = .data$n / .data$total) |>
dplyr::arrange(mode) |> #sort by factor level = order of importance
# most frequent: with_ties = FALSE, means if ties, will return first one
dplyr::slice_max(n = 1, with_ties = FALSE, order_by = .data$n) |>
dplyr::ungroup() |>
dplyr::select(-.data$total, -.data$n)
if (clean) {
df = df |>
#handle repeat F and D, or lack of F and D
dplyr::mutate(
mode = dplyr::case_when(
.data$mode == 'D' & dplyr::lead(.data$mode) == 'D' ~ fullmode,
.data$mode == 'F' & dplyr::lag(.data$mode) == 'F' ~ fullmode,
.data$mode == fullmode & dplyr::lag(.data$mode) == 'N' ~ 'F',
.data$mode == fullmode & dplyr::lead(.data$mode) == 'N' ~ 'D',
TRUE ~ mode
)
) |>
dplyr::select(-.data$weight)
}
return(df)
}