-
Notifications
You must be signed in to change notification settings - Fork 0
/
pellets.R
260 lines (230 loc) · 10.4 KB
/
pellets.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
#' @title Filter Pellets
#' @description This function filters the pellet events
#' @export
filter_pellets <- function(df){
df %>% dplyr::filter(Event == "Pellet")
}
#' @title Recalculate Pellets
#' @description This function recalculates pellets if given a FED_data `data.frame` that contains an identifier column. The main reason behind it is that one animal can receive more than one FED device. This might happen due to the experiment design or because a FED needed to be replaced during the experiment. Alternatively, it could be used to analyze several datasets coming from different animals independent of `Device_Number`.
#' @param df A data frame containing FED data. If your `data.frame` is grouped, `group_var` will be disregarded.
#' @param group_var A string specifying the column to group by. If NULL (default), no grouping is performed.
#' @return A data frame identical to `df` but with recalculated pellet counts and arranged by `datetime`.
#' @export
recalculate_pellets <- function(df, group_var = NULL) {
filtered_df <- df %>%
filter_pellets()
if (nrow(filtered_df) == 0) {
usethis::ui_warn("No pellet events found in the input data frame.")
return(filtered_df)
}
if (dplyr::is_grouped_df(filtered_df)) {
if (!rlang::is_null(rlang::enexpr(group_var))) {
warning("Ignoring 'group_var' argument as the input dataframe is already grouped.")
}
groups <- dplyr::group_vars(filtered_df)
# Check for empty groups within the filtered data frame
empty_groups <- filtered_df %>%
dplyr::group_by(dplyr::across(groups)) %>%
dplyr::filter(n() == 0) %>%
dplyr::group_vars()
if (length(empty_groups) > 0) {
usethis::ui_warn("The following groups have no pellet events:", paste(empty_groups, collapse = ", "))
}
output <- filtered_df %>%
dplyr::arrange(dplyr::across(groups), datetime) %>%
dplyr::mutate(pellets = 1:n())
} else {
# Case: Not grouped but group_var provided
if (!rlang::is_null(rlang::enexpr(group_var))) {
group_var_enquo <- rlang::enquo(group_var)
group_var_name <- rlang::quo_name(group_var_enquo)
if (group_var_name %in% names(filtered_df)) {
output <- filtered_df %>%
dplyr::arrange(!!group_var_enquo, datetime) %>%
dplyr::group_by(!!group_var_enquo) %>%
dplyr::mutate(pellets = 1:n())
# Check for empty groups within the filtered data frame
empty_groups <- output %>%
dplyr::group_by(!!group_var_enquo) %>%
dplyr::filter(n() == 0) %>%
dplyr::pull(!!group_var_enquo)
if (length(empty_groups) > 0) {
usethis::ui_warn("The following groups have no pellet events:", paste(empty_groups, collapse = ", "))
}
}
} else {
# Case: Not grouped, no group_var provided
output <- filtered_df %>%
dplyr::arrange(datetime) %>%
dplyr::mutate(pellets = 1:n())
}
}
return(output)
}
#' @title Bin Pellets
#'
#' @description Bins the pellets based on the specified time interval in `bin` and the data range.
#'
#' @param data A data frame containing the pellet data.
#' @param time_col The `datetime` column to use as
#' @param bin A character string specifying the time interval for binning (e.g., "1 hour", "30 minunte").
#' @param label_first_break Logical indicating whether to label the first break as the start time (default is TRUE).
#'
#' @return A data frame with binned pellet counts and corresponding bin timestamps.
#' @seealso [recalculate_pellets()], [clock::time_point_round()]
#' @export
bin_pellets <- function(data, time_col, bin, label_first_break = TRUE) {
# check if we have 100% pellet events
if (fed3:::check_pellets(data)) {
stop("Data contains Events other than Pellets.")
}
# get the proper column
time_column <- dplyr::pull(data, {{time_col}})
# split bin into n and precision
bin_components <- fed3:::parse_bin(bin)
n <- bin_components$n
precision <- bin_components$precision
# get grouping variables
groups <- dplyr::group_vars(data)
# generate the breaks using clock package
breaks <- seq(from = clock::date_floor(min(time_column), n = n, precision = precision),
to = clock::date_ceiling(max(time_column), n = n, precision = precision),
by = paste(n, fed3:::standardize_unit(precision)))
# We still want to keep the labels as a POSIXct object for later merging
if (label_first_break) {
labels <- breaks[-length(breaks)]
} else {
labels <- breaks[-1]
}
# bin the time column using clock::date_group
data <- data %>%
dplyr::mutate(bin = clock::date_group({{time_col}}, n = n, precision = precision))
# nest to perform calculation
data_nested <- data %>%
dplyr::group_by(dplyr::across(dplyr::all_of(groups)), bin) %>%
tidyr::nest()
data_nested <- data_nested %>%
dplyr::mutate(data = purrr::map(data, ~dplyr::summarise(.x, pellet_rate = n()))) %>%
tidyr::unnest(data) %>%
dplyr::ungroup()
# create a data frame that includes all possible combinations of bins and groups
unique_groups <- data %>%
dplyr::select(all_of(groups)) %>%
dplyr::distinct()
# Generate possible combinations of bin and groups without duplicates
complete_data <- tidyr::crossing(bin = as.POSIXct(labels, tz = "UTC"), unique_groups)
# join the computed data with the complete data
return(
complete_data %>%
dplyr::left_join(data_nested, by = c(groups, "bin")) %>%
tidyr::replace_na(list(pellet_rate = 0)) %>%
dplyr::arrange(dplyr::across(dplyr::all_of(groups)), bin)
)
}
#' @title Bin Pellet Events According to Light Cycle
#'
#' @description This function assigns each event to a light or dark period, then counts the number of events in each period.
#' It requires a data frame that contains a datetime column and only pellet events.
#'
#' Because dates and light cycles are not aligned (dark phase often spans more than one date), it is recommended that users call `add_zt()` first and bin by `zt` instead of `datetime`.
#'
#' @param data A data frame that contains a datetime column and only pellet events.
#' @param time_col The datetime column in your data frame. You can use a bare column name.
#' It is recommended to use the "zt" column created by `add_zt()` to ensure that the light and dark periods align correctly.
#' @param lights_on_hour The hour (0-23) when the light period starts. Default is 7.
#' @param lights_off_hour The hour (0-23) when the light period ends. Default is 19.
#'
#' @return A `data.frame` grouped by the original grouping variables, date and light cycle period,
#' with an additional column `pellets` indicating the number of events in each period.
#'
#' @seealso [fed3::bin_pellets()], [fed3::add_zt()], [fed3::filter_pellets()], [fed3::recalculate_pellets()]
#' @examples
#'
#' \dontrun{
#' # will have light cycle split by date
#' read_fed(path) %>% recalculate_pellets() %>% add_zt(datetime) %>% bin_pellets_lightcycle(datetime)
#' read_fed(path) %>% recalculate_pellets() %>% add_zt(datetime) %>% bin_pellets_lightcycle(datetime)
#' # zt date will only contain full light/dark periods
#' read_fed(path) %>% recalculate_pellets() %>% add_zt(datetime) %>% bin_pellets_lightcycle(zt)
#' }
#'
#' @export
bin_pellets_lightcycle <- function(data, time_col, lights_on_hour = 7, lights_off_hour = 19) {
# Check if we have 100% pellet events
if (fed3:::check_pellets(data)) {
stop("Data contains Events other than Pellets.\nUse filter_pellets() or recalculate_pellets() as needed.")
}
# intercept wrong use of zt
time_col_name <- rlang::as_name(rlang::enquo(time_col))
if (time_col_name == "zt") {
usethis::ui_warn("Using `zt`, adjusting `lights_on_hours`=0 and `lights_off_hour`=12")
lights_on_hour <- 0
lights_off_hour <- 12
}
# Get grouping variables
groups <- dplyr::group_vars(data)
# Create a new column to indicate whether it's light or dark
data <- data %>%
dplyr::mutate(light_cycle =
purrr::map_chr(.x = {{time_col}},
.f = ~ {
if (lights_off_hour > lights_on_hour) {
if (lubridate::hour(.x) >= lights_on_hour &
lubridate::hour(.x) < lights_off_hour) {"light"}
else {"dark"}
} else {
if (lubridate::hour(.x) >= lights_on_hour |
lubridate::hour(.x) < lights_off_hour) {"light"}
else {"dark"}
}
}))
# light_cycle should be a factor with levels c("light", "dark")
data <- dplyr::mutate(data, light_cycle = factor(light_cycle, levels = c("light", "dark")))
# Count the events
data <- data %>%
dplyr::group_by(dplyr::across(dplyr::all_of(groups)),
date = lubridate::date({{time_col}}),
light_cycle) %>%
dplyr::count(name = "pellets")
# rename if needed
if (time_col_name == "zt") {
data <- dplyr::rename(data, zt_date = date)
}
return(data)
}
check_pellets <- function(data) {
return(any(data$Event != "Pellet"))
}
# This function comes handy to use the units from the clock package with seq()
standardize_unit <- function(unit) {
switch(unit,
minute = "min",
hour = "hour",
day = "day",
week = "week",
month = "month",
year = "year",
stop("Invalid unit.")
)
}
# This is a nice helper to parse a bin written as "1 hour" into n and precision
# the idea is to feed this
parse_bin <- function(bin) {
# Split bin argument by spaces
bin_components <- strsplit(bin, " ")[[1]]
# Error handling
if (length(bin_components) != 2) {
stop("bin argument must be a string in the format of 'n unit', where n is a number and unit is a time unit (e.g., '1 hour').")
}
n <- bin_components[1]
if (!is.numeric(as.numeric(n)) || as.numeric(n) <= 0) {
stop("The first part of the `bin` argument must be a positive number.")
}
n <- as.integer(n)
precision <- bin_components[2]
valid_units <- c("second", "minute", "hour", "day", "week", "month", "year")
if (!(precision %in% valid_units)) {
stop(paste("The second part of the `bin` argument must be a valid time unit.\nChoose from:", paste(valid_units, collapse = ", "), "."))
}
return(list(n = n, precision = precision))
}