-
Notifications
You must be signed in to change notification settings - Fork 0
/
dilp_functions.R
515 lines (461 loc) · 24.3 KB
/
dilp_functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
#' Process raw leaf physiognomic data
#'
#' @description
#' `dilp_processing()` will typically only be called internally by `dilp()`.
#' However, it can be used on its own to generate and view a processed DiLP
#' dataset that includes raw and derived physiognomic values useful for DiLP and
#' other physiognomic analyses. Returns a data frame.
#'
#' @param specimen_data A data frame containing specimen level leaf physiognomic
#' data. A good reference for how to put together the data: \code{\link{McAbeeExample}}
#' @return A data frame containing cleaned and processed specimen level leaf
#' physiognomic data. New variables calculated are:
#' * Leaf area
#' * Feret diameter
#' * Feret diameter ratio (FDR)
#' * Raw blade perimeter corrected (Raw blade perimeter - length of cut perimeter)
#' * Internal raw blade perimeter corrected (Internal raw blade perimeter - length of cut perimeter)
#' * Total tooth count
#' * Total tooth count : internal perimeter (TC:IP)
#' * Perimeter ratio
#' * Petiole metric
#' * Aspect ratio
#' * Shape factor
#' * Compactness
#' * Tooth area
#' * Tooth area : perimeter (TA:P)
#' * Tooth area: internal perimeter (TA:IP)
#' * Tooth area : blade area (TA:BA)
#' * Average primary tooth area (Avg TA)
#' * Tooth count : blade area (TC:BA)
#' * Tooth count : perimeter (TC:P)
#' @export
#'
#' @examples
#' dilp_dataset <- dilp_processing(McAbeeExample)
#' dilp_dataset
dilp_processing <- function(specimen_data) {
colnames(specimen_data) <- colnameClean(specimen_data)
required_columns <- c(
"site", "specimen_number", "morphotype", "margin", "feret", "blade_area",
"raw_blade_perimeter", "internal_raw_blade_perimeter", "length_of_cut_perimeter",
"no_of_primary_teeth", "no_of_subsidiary_teeth"
)
recommended_columns <- c(
"petiole_width", "petiole_area", "blade_perimeter",
"minimum_feret", "raw_blade_area", "internal_raw_blade_area"
)
missing_columns <- required_columns[!required_columns %in% colnames(specimen_data)]
if (length(missing_columns) > 0) {
stop(paste("specimen_data is missing required columns:", stringr::str_flatten(missing_columns, collapse = ", ")))
} else {
other_columns <- recommended_columns[!recommended_columns %in% colnames(specimen_data)]
if (length(other_columns) > 0) {
warning(paste("specimen_data is missing recommended columns and has filled them with NAs:", stringr::str_flatten(other_columns, collapse = ", ")), call. = FALSE)
for (column in other_columns) {
specimen_data[[column]] <- NA
}
}
# Identify 0.5 margin species
mixed_margins <- specimen_data %>%
dplyr::group_by(.data$site, .data$morphotype) %>%
dplyr::filter(1 %in% .data$margin & 0 %in% .data$margin) %>%
dplyr::filter(.data$margin == 1) %>%
dplyr::select("site", "morphotype", "specimen_number")
if (length(mixed_margins$specimen_number) > 0) {
for (i in 1:length(mixed_margins$specimen_number)) {
specimen_data[which(specimen_data$specimen_number == mixed_margins$specimen_number[i]), ]$length_of_cut_perimeter <- 0
specimen_data[which(specimen_data$specimen_number == mixed_margins$specimen_number[i]), ]$no_of_primary_teeth <- 0
specimen_data[which(specimen_data$specimen_number == mixed_margins$specimen_number[i]), ]$no_of_subsidiary_teeth <- 0
}
}
## For specimens where blade area was measured, but there is an NA for petiole area, convert NA to 0 to permit the subsequent addition
temp3 <- specimen_data$petiole_area
specimen_data$petiole_area <- ifelse(specimen_data$blade_area > 0 & is.na(specimen_data$petiole_area), 0, specimen_data$petiole_area)
# Sum petiole area and blade area
specimen_data$leaf_area <- specimen_data$blade_area + specimen_data$petiole_area # if petiole area was measured but there is a NA for blade area, this addition will appropriately return a NA for leaf area
# Feret Diameter Ratio
specimen_data$feret_diameter <- 2 * sqrt(specimen_data$leaf_area / pi)
specimen_data$fdr <- specimen_data$feret_diameter / specimen_data$feret
# change NA's to zeros for length.of.cut.perimeter so that derived variables can be calculated
temp1 <- specimen_data$length_of_cut_perimeter
specimen_data$length_of_cut_perimeter <- ifelse(is.na(specimen_data$length_of_cut_perimeter), 0, specimen_data$length_of_cut_perimeter)
# Corrected perimeters
specimen_data$raw_blade_perimeter <- ifelse(is.na(specimen_data$raw_blade_perimeter) & !is.na(specimen_data$blade_perimeter) & specimen_data$margin == 0 & specimen_data$length_of_cut_perimeter == 0,
specimen_data$blade_perimeter, specimen_data$raw_blade_perimeter
)
specimen_data$raw_blade_perimeter_corrected <- specimen_data$raw_blade_perimeter - specimen_data$length_of_cut_perimeter
specimen_data$internal_raw_blade_perimeter_corrected <- specimen_data$internal_raw_blade_perimeter - specimen_data$length_of_cut_perimeter
# Corrected raw blade area if there is no cut perimeter on a toothed leaf.
specimen_data$raw_blade_area <- ifelse(is.na(specimen_data$raw_blade_area) & specimen_data$margin == 0 & specimen_data$length_of_cut_perimeter == 0,
specimen_data$blade_area, specimen_data$raw_blade_area
)
## Toothed variables
# Total tooth count
temp2 <- specimen_data$no_of_subsidiary_teeth
specimen_data$no_of_subsidiary_teeth[is.na(specimen_data$no_of_subsidiary_teeth)] <- 0 # if it is left as NA then total tooth count will also return NA
specimen_data$total_tooth_count <- ifelse(specimen_data$margin == 1, NA, specimen_data$no_of_primary_teeth + specimen_data$no_of_subsidiary_teeth)
if (length(mixed_margins$specimen_number) > 0) {
for (i in 1:length(mixed_margins$specimen_number)) {
specimen_data[which(specimen_data$specimen_number == mixed_margins$specimen_number[i]), ]$total_tooth_count <- 0
}
}
# Total tooth count : internal perimeter
specimen_data$tc_ip <- specimen_data$total_tooth_count / specimen_data$internal_raw_blade_perimeter_corrected
if (length(mixed_margins$specimen_number) > 0) {
for (i in 1:length(mixed_margins$specimen_number)) {
specimen_data[which(specimen_data$specimen_number == mixed_margins$specimen_number[i]), ]$tc_ip <- 0
}
}
# Perimeter ratio
specimen_data$perimeter_ratio <- specimen_data$raw_blade_perimeter_corrected / specimen_data$internal_raw_blade_perimeter_corrected
if (length(mixed_margins$specimen_number) > 0) {
for (i in 1:length(mixed_margins$specimen_number)) {
specimen_data[which(specimen_data$specimen_number == mixed_margins$specimen_number[i]), ]$perimeter_ratio <- 1
}
}
#### Apply required natural logs to appropriate variables
specimen_data$ln_leaf_area <- log(100 * specimen_data$leaf_area) # Leaf area is expressed in mm2 in DiLP MLR and SLR models, so here also converting from cm2 to mm2
specimen_data$ln_pr <- log(specimen_data$perimeter_ratio)
specimen_data$ln_tc_ip <- log(specimen_data$tc_ip)
if (length(mixed_margins$specimen_number) > 0) {
for (i in 1:length(mixed_margins$specimen_number)) {
specimen_data[which(specimen_data$specimen_number == mixed_margins$specimen_number[i]), ]$ln_tc_ip <- NA
}
}
# Petiole metric for reconstructing leaf mass per area
specimen_data$petiole_metric <- (specimen_data$petiole_width^2) / specimen_data$leaf_area
# Aspect ratio
specimen_data$aspect_ratio <- specimen_data$feret / specimen_data$minimum_feret
# Shape factor
specimen_data$shape_factor <- 4 * pi * (specimen_data$blade_area / (specimen_data$blade_perimeter^2))
# Compactness
specimen_data$compactness <- (specimen_data$blade_perimeter^2) / specimen_data$blade_area
# Tooth area
specimen_data$tooth_area <- specimen_data$raw_blade_area - specimen_data$internal_raw_blade_area
if (length(mixed_margins$specimen_number) > 0) {
for (i in 1:length(mixed_margins$specimen_number)) {
specimen_data[which(specimen_data$specimen_number == mixed_margins$specimen_number[i]), ]$tooth_area <- 0
}
}
# Tooth area : perimeter
specimen_data$ta_p <- specimen_data$tooth_area / specimen_data$raw_blade_perimeter_corrected
if (length(mixed_margins$specimen_number) > 0) {
for (i in 1:length(mixed_margins$specimen_number)) {
specimen_data[which(specimen_data$specimen_number == mixed_margins$specimen_number[i]), ]$ta_p <- 0
}
}
# Tooth area : internal perimeter
specimen_data$ta_ip <- specimen_data$tooth_area / specimen_data$internal_raw_blade_perimeter_corrected
if (length(mixed_margins$specimen_number) > 0) {
for (i in 1:length(mixed_margins$specimen_number)) {
specimen_data[which(specimen_data$specimen_number == mixed_margins$specimen_number[i]), ]$ta_ip <- 0
}
}
# Tooth area : blade area
specimen_data$ta_ba <- specimen_data$tooth_area / specimen_data$raw_blade_area
if (length(mixed_margins$specimen_number) > 0) {
for (i in 1:length(mixed_margins$specimen_number)) {
specimen_data[which(specimen_data$specimen_number == mixed_margins$specimen_number[i]), ]$ta_ba <- 0
}
}
# Average primary tooth area
specimen_data$avg_ta <- specimen_data$tooth_area / specimen_data$no_of_primary_teeth # double check
if (length(mixed_margins$specimen_number) > 0) {
for (i in 1:length(mixed_margins$specimen_number)) {
specimen_data[which(specimen_data$specimen_number == mixed_margins$specimen_number[i]), ]$avg_ta <- 0
}
}
# Tooth count : blade area
specimen_data$tc_ba <- specimen_data$total_tooth_count / specimen_data$raw_blade_area
if (length(mixed_margins$specimen_number) > 0) {
for (i in 1:length(mixed_margins$specimen_number)) {
specimen_data[which(specimen_data$specimen_number == mixed_margins$specimen_number[i]), ]$tc_ba <- 0
}
}
# tooth count : perimeter
specimen_data$tc_p <- specimen_data$total_tooth_count / specimen_data$raw_blade_perimeter_corrected
if (length(mixed_margins$specimen_number) > 0) {
for (i in 1:length(mixed_margins$specimen_number)) {
specimen_data[which(specimen_data$specimen_number == mixed_margins$specimen_number[i]), ]$tc_p <- 0
}
}
# Return length of cut perimeter and no subsidiary teeth back to original values to keep averages intact
specimen_data$length_of_cut_perimeter <- temp1
specimen_data$no_of_subsidiary_teeth <- temp2
specimen_data$petiole_area <- temp3
return(specimen_data)
}
}
#' Check for common errors in DiLP measurements
#'
#' @description
#' `dilp_errors()` will typically only be called internally by [dilp()].
#' However, it can be used on its own to evaluate errors that commonly occur
#' during the data collection and processing steps. A `dilp_errors()` call
#' will nearly always follow a [dilp_processing()] call. Returns a data frame.
#'
#' @param specimen_data Processed specimen level leaf physiognomic data. The
#' structure should match the structure of the output from [dilp_processing()]
#'
#' @return A 7 by X data frame. Each row shows a common error, and which specimens
#' from the input dataset are tripping it.
#' @export
#'
#' @examples
#' # Check for errors in the provided McAbeeExample dataset.
#' dilp_dataset <- dilp_processing(McAbeeExample)
#' dilp_errors <- dilp_errors(dilp_dataset)
#' dilp_errors
dilp_errors <- function(specimen_data) {
### This data frame will be filled with the results of the following error checks
errors <- data.frame()
mixed_margins <- specimen_data %>%
dplyr::group_by(.data$site, .data$morphotype) %>%
dplyr::filter(1 %in% .data$margin & 0 %in% .data$margin) %>%
dplyr::filter(.data$margin == 1) %>%
dplyr::select("site", "morphotype", "specimen_number")
dilp.check.1 <- specimen_data %>%
dplyr::filter(.data$margin == 1)
error <- specimen_data[which(dilp.check.1$total_tooth_count > -1 & !(dilp.check.1$specimen_number %in% mixed_margins$specimen_number)), 2]
temp1 <- data.frame(Check = "Entire tooth count not NA", t(error))
error <- specimen_data[which(dilp.check.1$tc_ip > -1 & !(dilp.check.1$specimen_number %in% mixed_margins$specimen_number)), 2]
temp2 <- data.frame(Check = "Entire tooth count : IP not NA", t(error))
error <- specimen_data[which(dilp.check.1$perimeter_ratio > -1 & !(dilp.check.1$specimen_number %in% mixed_margins$specimen_number)), 2]
temp3 <- data.frame(Check = "Entire perimeter ratio not NA", t(error))
dilp.check.2 <- tidyr::drop_na(specimen_data, "fdr")
error <- specimen_data[which(dilp.check.2$fdr < 0 | dilp.check.2$fdr > 1), 2]
temp4 <- data.frame(Check = "FDR not between 0-1", t(error))
error <- specimen_data[which(specimen_data$internal_raw_blade_perimeter_corrected > specimen_data$raw_blade_perimeter_corrected), 2]
temp5 <- data.frame(Check = "External perimeter not larger than internal perimeter", t(error))
error <- specimen_data[which(specimen_data$minimum_feret > specimen_data$feret), 2]
temp6 <- data.frame(Check = "Feret is not larger than minimum Feret", t(error))
error <- as.data.frame(specimen_data[which(specimen_data$perimeter_ratio <= 1 & !(specimen_data$specimen_number %in% mixed_margins$specimen_number)), 2])
temp7 <- data.frame(Check = "Perimeter ratio not greater than 1", t(error))
errors <- dplyr::bind_rows(temp1, temp2, temp3, temp4, temp5, temp6, temp7)
if (length(errors) == 1) {
errors$Specimen1 <- "none"
}
names(errors) <- gsub(x = names(errors), pattern = "X", replacement = "Specimen")
rownames(errors) <- NULL
return(errors)
}
#' Identify outlier specimens
#'
#' @description
#' `dilp_outliers()` will typically only be called internally by [dilp()].
#' However, it can be used on its own to locate specimens that may have been
#' misreported or measured incorrectly. `dilp_outliers()` returns a data frame
#' listing specimens that have unusually high or low values for the four key
#' parameters used in DiLP analyses. If flagged, it may be worth taking a look at the
#' raw measurements and evaluating if the specimen should be used.
#'
#'
#' @param specimen_data Processed specimen level leaf physiognomic data. The
#' structure should match the structure of the output from [dilp_processing()]
#'
#' @return A 4 by X data frame. Each row represents one of the DiLP parameters,
#' and the specimens that are outliers for that parameter.
#' @export
#'
#' @examples
#' # Check for outliers in the provided McAbeeExample dataset. Each
#' # of these outliers has been manually re-examined and was found acceptable.
#' dilp_dataset <- dilp_processing(McAbeeExample)
#' dilp_outliers <- dilp_outliers(dilp_dataset)
#' dilp_outliers
dilp_outliers <- function(specimen_data) {
vars <- c("fdr", "tc_ip", "leaf_area", "perimeter_ratio") # DiLP variables
outliers <- data.frame()
index <- which(colnames(specimen_data) == "specimen_number")
for (i in 1:length(vars)) {
temp <- specimen_data
colnames(temp)[colnames(temp) == vars[i]] <- "trait" # rename variable of focus
temp.outliers <- grDevices::boxplot.stats(temp$trait)$out # check it for outliers
temp.specimen <- temp[which(temp$trait %in% c(temp.outliers)), index] # determine specimen numbers for any outliers
temp.specimen <- as.data.frame(t(temp.specimen))
temp.output <- (trait <- vars[i])
temp.output <- cbind(temp.output, temp.specimen) # create output table with variable name and any potential rows
outliers <- dplyr::bind_rows(outliers, temp.output) # bind to summary table
}
# Rename column headers
if (length(outliers) == 1) {
outliers$Outlier1 <- "none"
}
names(outliers) <- gsub(x = names(outliers), pattern = "V", replacement = "Outlier")
names(outliers) <- gsub(x = names(outliers), pattern = "temp.output", replacement = "Variable")
rownames(outliers) <- NULL
return(outliers)
}
#' Generate DiLP results
#'
#' @description
#' `dilp()` processes raw leaf physiognomic data, checks for common
#' errors/outliers, and returns the processed data, keys to finding potential
#' errors or outliers, and paleoclimate reconstructions.
#'
#' @param specimen_data A data frame containing specimen level leaf physiognomic
#' data. See Lowe et al. 2024 for more information on how to collect this data.
#' A good reference for how to put together the data: \code{\link{McAbeeExample}}
#'
#' Required columns:
#' * site
#' * specimen_number
#' * morphotype
#' * margin
#' * feret
#' * blade_area
#' * raw_blade_perimeter
#' * internal_raw_blade_perimeter
#' * length_of_cut_perimeter
#' * no_of_primary_teeth
#' * no_of_subsidiary_teeth
#'
#' Recommended columns:
#' * petiole_width
#' * petiole_area
#' * blade_perimeter
#' * minimum_feret
#' * raw_blade_area
#' * internal_raw_blade_area
#'
#' @param params Either a string referring to one of two preloaded parameter sets
#' of a list of custom parameters (same format as the list below).
#'
#' Preloaded parameter sets are "PeppeGlobal" and "PeppeNH" which are calibrated based on
#' global and northern hemisphere data respectively. Allen et al. (2020) illustrates a situation
#' in which the northern hemisphere parameters may be preferable. The "PeppeNH" parameters
#' only estimate MAT. Use "PeppeGlobal" for all MAP estimates. Defaults to "PeppeGlobal" as follows (Peppe et al. 2011):
#'
#' * MAT.MLR.M = 0.21,
#' * MAT.MLR.FDR = 42.296,
#' * MAT.MLR.TC.IP = -2.609,
#' * MAT.MLR.constant = -16.004,
#' * MAT.MLR.error = 4,
#' * MAT.SLR.M = 0.204,
#' * MAT.SLR.constant = 4.6,
#' * MAT.SLR.error = 4.9,
#' * MAP.MLR.LA = 0.298,
#' * MAP.MLR.TC.IP = 0.279,
#' * MAP.MLR.PR = -2.717,
#' * MAP.MLR.constant = 3.033,
#' * MAP.MLR.SE = 0.6,
#' * MAP.SLR.LA = 0.283,
#' * MAP.SLR.constant = 2.92,
#' * MAP.SLR.SE = 0.61
#'
#' @param subsite_cols A vector or list of columns present in `specimen_data` to calculate
#' paleoclimate estimates for. A completely optional parameter - allows different groupings of
#' specimens to be tested, or comparisons of paleoclimate estimates at different levels of grouping.
#' Adds additional estimates to $results.
#'
#' @return A list of tables that includes all pertinent DiLP
#' information:
#'
#' * processed_leaf_data: the full set of cleaned and newly calculated leaf
#' physiognomic data that is necessary for DiLP analysis. See [dilp_processing()]
#' for more information.
#' * processed_morphotype_data: morphospecies-site pair means for all leaf
#' physiognomic data.
#' * processed_site_data: site means for all leaf physiognomic data.
#' * errors: lists any specimens that may be causing common errors in DiLP
#' calculations. See [dilp_errors()] for more information.
#' * outliers: flags outliers in variables used for DiLP analysis that may
#' represent incorrect data. See [dilp_outliers()] for more information.
#' * results: climate reconstructions of MAT and MAP using single and multi-linear
#' regressions.
#'
#' @references
#' * Allen, S. E., Lowe, A. J., Peppe, D. J., & Meyer, H. W. (2020). Paleoclimate and paleoecology of the latest Eocene Florissant flora of central Colorado, USA. Palaeogeography, Palaeoclimatology, Palaeoecology, 551, 109678.
#' * Peppe, D.J., Royer, D.L., Cariglino, B., Oliver, S.Y., Newman, S., Leight, E., Enikolopov, G., Fernandez-Burgos, M., Herrera, F., Adams, J.M., Correa, E., Currano, E.D., Erickson, J.M., Hinojosa, L.F., Hoganson, J.W., Iglesias, A., Jaramillo, C.A., Johnson, K.R., Jordan, G.J., Kraft, N.J.B., Lovelock, E.C., Lusk, C.H., Niinemets, Ü., Peñuelas, J., Rapson, G., Wing, S.L. and Wright, I.J. (2011), Sensitivity of leaf size and shape to climate: global patterns and paleoclimatic applications. New Phytologist, 190: 724-739. https://doi.org/10.1111/j.1469-8137.2010.03615.x
#' * Lowe. A.J., Flynn, A.G., Butrim, M.J., Baumgartner, A., Peppe, D.J., and Royer, D.L. (2024), Reconstructing terrestrial paleoclimate and paleoecology with fossil leaves using Digital Leaf Physiognomy and leaf mass per area. JoVE.
#' @export
#'
#' @examples
#' dilp_results <- dilp(McAbeeExample)
#' dilp_results$processed_leaf_data
#' dilp_results$processed_morphotype_data
#' dilp_results$processed_site_data
#' dilp_results$errors
#' dilp_results$outliers
#' dilp_results$results
dilp <- function(specimen_data, params = "PeppeGlobal", subsite_cols = NULL) {
subsite_cols <- subsite_cols %>%
stringr::str_trim() %>%
stringr::str_to_lower() %>%
stringr::str_replace_all("[.]", " ") %>%
stringr::str_replace_all("[ ]", "_") %>%
stringr::str_replace_all("__", "_")
colnames(specimen_data) <- colnameClean(specimen_data)
processed_specimen_data <- dilp_processing(specimen_data)
errors <- dilp_errors(processed_specimen_data)
outliers <- dilp_outliers(processed_specimen_data)
if (is.list(params)) {
} else {
params <- grab_regression(params, "dilp")
}
####### Morphotype average by site
dilp_morphotype <- processed_specimen_data %>%
dplyr::select(-"specimen_number") %>%
dplyr::summarize(.by = c(.data$site, .data$morphotype), dplyr::across(dplyr::where(~ !is.character(.)), \(x) mean(x, na.rm = TRUE)))
##### Morphotypes that have variable leaf margin states require a margin state of 0.5
# is just 1 and 0 listed? If so, the following finds margin state values between 0 and 1 and replaces them with 0.5
if (length(unique(stats::na.omit(dilp_morphotype$margin))) > 2) {
dilp_morphotype$margin[dilp_morphotype$margin > 0 & dilp_morphotype$margin < 1] <- 0.5
if (length(unique(dilp_morphotype$margin)) > 2) {
warning("Margin states outside the bounds of [0 - 1] present. Non 0 and 1 values converted to 0.5")
}
}
####### Site average
dilp_site <- dilp_morphotype %>%
dplyr::group_by(.data$site) %>%
dplyr::select(-"morphotype") %>%
dplyr::summarize(dplyr::across(dplyr::where(~ !is.character(.)), \(x) mean(x, na.rm = TRUE)))
### Convert site margin from proportion to percentage
dilp_site$margin <- dilp_site$margin * 100
### Conditionally set site tc_ip to 0 and perimeter_ratio to 1 if all morphotypes are untoothed
dilp_site$tc_ip <- ifelse(dilp_site$margin == 100, 0, dilp_site$tc_ip)
dilp_site$perimeter_ratio <- ifelse(dilp_site$margin == 100, 1, dilp_site$perimeter_ratio)
# dilp_site$ln_tc_ip <- ifelse(dilp_site$margin == 100, -20.7, dilp_site$ln_tc_ip)
# dilp_site$ln_pr <- ifelse(dilp_site$margin == 100, 0, dilp_site$ln_pr)
## A loop is constructed to handle spreadsheets that contain either multiple sites or a single site. For the latter, the loop will run only once.
sites <- c(unique(dilp_site$site))
Results <- data.frame()
for (i in 1:length(sites)) {
temp <- dilp_site[dilp_site$site == sites[i], ] # isolate site
#### MAT
# MLR
MAT.MLR <- (temp$margin * params$MAT.MLR.M) + (temp$fdr * params$MAT.MLR.FDR) + (temp$tc_ip * params$MAT.MLR.TC.IP) + params$MAT.MLR.constant
# SLR
MAT.SLR <- (temp$margin * params$MAT.SLR.M) + params$MAT.SLR.constant
#### MAP
# MLR
MAP.MLR.exp <- (temp$ln_leaf_area * params$MAP.MLR.LA) + (temp$ln_tc_ip * params$MAP.MLR.TC.IP) + (temp$ln_pr * params$MAP.MLR.PR) + params$MAP.MLR.constant
MAP.MLR <- exp(MAP.MLR.exp)
MAP.MLR.error.plus <- (exp(MAP.MLR.exp + params$MAP.MLR.SE)) - MAP.MLR
MAP.MLR.error.minus <- MAP.MLR - (exp(MAP.MLR.exp - params$MAP.MLR.SE))
# SLR
MAP.SLR.exp <- (temp$ln_leaf_area * params$MAP.SLR.LA) + params$MAP.SLR.constant
MAP.SLR <- exp(MAP.SLR.exp)
MAP.SLR.error.plus <- (exp(MAP.SLR.exp + params$MAP.SLR.SE)) - MAP.SLR
MAP.SLR.error.minus <- MAP.SLR - (exp(MAP.SLR.exp - params$MAP.SLR.SE))
# Results table
temp_output <- data.frame(
site = temp$site, margin = temp$margin, fdr = temp$fdr, tc_ip = temp$tc_ip, ln_leaf_area = temp$ln_leaf_area, ln_tc_ip = temp$ln_tc_ip, ln_pr = temp$ln_pr, MAT.MLR, MAT.MLR.error = params$MAT.MLR.error, MAT.SLR, MAT.SLR.error = params$MAT.SLR.error, MAP.MLR,
MAP.MLR.error.plus, MAP.MLR.error.minus, MAP.SLR, MAP.SLR.error.plus, MAP.SLR.error.minus
)
Results <- rbind(Results, temp_output)
}
for (subsite in subsite_cols) {
if (is.null(specimen_data[[subsite]])) {
stop(paste("Subsite column:", subsite, "not found in specimen_data."))
} else {
subsite_data <- specimen_data %>%
dplyr::select(-.data$site) %>%
dplyr::mutate(site = .data[[subsite]])
temp <- dilp(subsite_data)
Results <- rbind(Results, temp$results)
}
}
return(list(processed_leaf_data = processed_specimen_data, processed_morphotype_data = dilp_morphotype, processed_site_data = dilp_site, errors = errors, outliers = outliers, results = Results))
}