/
sdm_eval.R
248 lines (227 loc) · 8.81 KB
/
sdm_eval.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
#' Calculate different model performance metrics
#'
#' @description This function calculates threshold dependent and independent model performance
#' metrics.
#'
#' @param p numeric. Predicted suitability for presences
#' @param a numeric. Predicted suitability for absences
#' @param thr character. Threshold criterion used to get binary suitability values (i.e. 0,1).
#' Used for threshold-dependent performance metrics.
#' It is possible to use more than one threshold type.
#' A vector must be provided for this argument. The following threshold criteria are available:
#' \itemize{
#' \item lpt: The highest threshold at which there is no omission.
#' \item equal_sens_spec: Threshold at which the Sensitivity and Specificity are equal.
#' \item max_sens_spec: Threshold at which the sum of the Sensitivity and Specificity
#' is the highest (aka threshold that maximizes the TSS).
#' \item max_jaccard: The threshold at which the Jaccard index is the highest.
#' \item max_sorensen: The threshold at which the Sorensen index is the highest.
#' \item max_fpb: The threshold at which FPB (F-measure on presence-background data) is the highest.
#' \item sensitivity: Threshold based on a specified Sensitivity value.
#' Usage thr = c('sensitivity', sens='0.6') or thr = c('sensitivity'). 'sens' refers
#' to Sensitivity value. If a sensitivity value is not specified, the
#' default value is 0.9
#' }
#' If more than one threshold type is used, concatenate threshold types,
#' e.g., thr=c('lpt', 'max_sens_spec', 'max_jaccard'), or thr=c('lpt', 'max_sens_spec',
#' 'sensitivity', sens='0.8'), or thr=c('lpt', 'max_sens_spec', 'sensitivity').
#' Function will use all thresholds if no threshold type is specified
#' @param bg numeric. Predicted suitability for background points, used for BOYCE metric.
#' It bg is set as NULL, BOYCE metric will be calculated with presences and absences suitabilities
#' values
#'
#' @details This function is used for evaluating different models approaches base on the combination
#' of presence-absences or presence-pseudo-absences and background point data and
#' suitability predicted by any model or flexsdm modeling function families (fit_, esm_, and tune_.)
#'
#' It calculates the next performance metric:
#'
#' | Performance metric | Threshold dependent | Values ranges |
#' | :------------- |:-------------:| -----:|
#' | TPR (True Positive Rate, also called Sensitivity) | yes | 0 - 1 |
#' | TNR (True Negative Rate, also called Specificity) | yes | 0 - 1 |
#' | SORENSEN | yes | 0 - 1 |
#' | JACCARD | yes | 0 - 1 |
#' | FPB (F-measure on presence-background) | yes | 0 - 2 |
#' | OR (Omission Rate) | yes | 0 - 1 |
#' | TSS (True Skill Statistic) | yes | -1 - 1 |
#' | KAPPA | yes | 0 - 1 |
#' | AUC (Area Under Curve) | no | 0 - 1 |
#' | BOYCE (continuous Boyce index)* | no | -1 - 1 |
#' | IMAE (Inverse Mean Absolute Error)** | no | 0 - 1 |
#'
#' \* BOYCE is calculated based on presences and background points, in case that background points
#' is not provided it is calculated using presences and absences. The codes for calculating
#' this metric is and adaptation of enmSdm package (https://github.com/adamlilith/enmSdm)
#'
#' \** IMAE is calculated as 1-(Mean Absolute Error) in order to be consistent with the other
#' metrics where the higher the value of a given performance metric, the greater the model's
#' accuracy
#'
#' @md
#'
#' @return a tibble with next columns
#' \itemize{
#' \item threshold: threshold names
#' \item thr_value: threshold values
#' \item n_presences: number of presences
#' \item n_absences: number of absences
#' \item from TPR to IMAE: performance metrics
#' }
#'
#'
#' @export
#'
#' @importFrom dplyr %>% tibble mutate filter pull bind_cols left_join all_of
#' @importFrom stats quantile
#'
#' @examples
#' \dontrun{
#' require(dplyr)
#'
#' set.seed(0)
#' p <- rnorm(50, mean = 0.7, sd = 0.3) %>% abs()
#' p[p > 1] <- 1
#' p[p < 0] <- 0
#'
#' set.seed(0)
#' a <- rnorm(50, mean = 0.3, sd = 0.2) %>% abs()
#' a[a > 1] <- 1
#' a[a < 0] <- 0
#'
#' set.seed(0)
#' backg <- rnorm(1000, mean = 0.4, sd = 0.4) %>% abs()
#' backg[backg > 1] <- 1
#' backg[backg < 0] <- 0
#'
#' # Function use without threshold specification
#' e <- sdm_eval(p, a)
#' e
#'
#' # Function use with threshold specification
#' sdm_eval(p, a, thr = "max_sorensen")
#' sdm_eval(p, a, thr = c("lpt", "max_sens_spec", "max_jaccard"))
#' sdm_eval(p, a, thr = c("lpt", "max_sens_spec", "sensitivity"))
#' sdm_eval(p, a, thr = c("lpt", "max_sens_spec", "sensitivity", sens = "0.95"))
#'
#' # Use of bg argument (it will only be used for calculating BOYCE index)
#' sdm_eval(p, a, thr = "max_sens_spec")
#' sdm_eval(p, a, thr = c("max_sens_spec"), bg = backg)
#'
#' # If background will be used to calculate all other metrics
#' # background values can be used in "a" argument
#' sdm_eval(p, backg, thr = "max_sens_spec")
#' }
#'
sdm_eval <- function(p, a, bg = NULL, thr = NULL) {
TPR <- TNR <- JACCARD <- SORENSEN <- threshold <- FPB <- TSS <- NULL
if (any(
!(thr[is.na(suppressWarnings(as.numeric(thr)))]) %in% c(
"lpt",
"max_sens_spec",
"equal_sens_spec",
"sensitivity",
"max_jaccard",
"max_sorensen",
"max_fpb"
)
)) {
stop("'thr' Argument is not valid!")
}
if (is.null(thr)) {
thr <- c(
"lpt",
"max_sens_spec",
"max_kappa",
"equal_sens_spec",
"sensitivity",
"max_jaccard",
"max_sorensen",
"max_fpb"
)
}
if (any(thr %in% "sensitivity") &&
!any(names(thr) %in% "sens")) {
thr <- c(thr, sens = 0.9)
}
np <- length(p)
na <- length(a)
if (na == 0 | np == 0) {
stop("cannot evaluate a model without absence and presence data")
}
# Threshold breaks:
if (length(p) > 1000) {
tr <- as.vector(stats::quantile(p, 0:1000 / 1000))
} else {
tr <- p
}
if (length(a) > 1000) {
tr <- c(tr, as.vector(stats::quantile(a, 0:1000 / 1000)))
} else {
tr <- c(tr, a)
}
tr <- sort(unique(round(tr, 8)))
res <- matrix(ncol = 4, nrow = length(tr))
colnames(res) <- c("tp", "fp", "fn", "tn")
# Confusion Matrix
for (i in 1:length(tr)) {
res[i, 1] <- length(p[p >= tr[i]]) # a true positives
res[i, 2] <- length(a[a >= tr[i]]) # b false positives
res[i, 3] <- length(p[p < tr[i]]) # c false negatives
res[i, 4] <- length(a[a < tr[i]]) # d true negatives
}
res <- data.frame(res)
# Performance metrics
performance <- dplyr::tibble(
threshold = tr,
n_presences = np,
n_absences = na,
TPR = res$tp / (res$tp + res$fn),
TNR = res$tn / (res$tn + res$fp),
SORENSEN = 2 * res$tp / (res$fn + (2 * res$tp) + res$fp),
JACCARD = res$tp / (res$fn + res$tp + res$fp),
FPB = 2 * JACCARD,
OR = (1 - TPR),
TSS = (TPR + TNR) - 1
)
R <- sum(rank(c(p, a))[1:np]) - (np * (np + 1) / 2)
performance <- performance %>% dplyr::mutate(AUC = R / (as.numeric(na) * as.numeric(np)))
if (is.null(bg)) {
performance <- performance %>% dplyr::mutate(BOYCE = boyce(pres = p, contrast = c(p, a)))
} else {
performance <- performance %>% dplyr::mutate(BOYCE = boyce(pres = p, contrast = c(p, bg)))
}
real <- c(rep(1, length(p)), rep(0, length(a)))
pred <- c(p, a)
performance <- performance %>% dplyr::mutate(IMAE = 1 - (sum(abs(real - pred)) / length(pred)))
# Thresholds
thresholds <- list()
thresholds$max_sorensen <-
max(performance %>% dplyr::filter(SORENSEN == max(SORENSEN)) %>% dplyr::pull(threshold))
thresholds$max_jaccard <-
max(performance %>% dplyr::filter(SORENSEN == max(SORENSEN)) %>% dplyr::pull(threshold))
thresholds$max_fpb <-
max(performance %>% dplyr::filter(FPB == max(FPB)) %>% dplyr::pull(threshold))
thresholds$max_sens_spec <-
max(performance %>% dplyr::filter(TSS == max(TSS)) %>% dplyr::pull(threshold))
thresholds$equal_sens_spec <-
performance$threshold[which(abs(
performance$TPR - performance$TNR
) ==
min(abs(performance$TPR - performance$TNR)))] %>% max()
suppressWarnings(thresholds$lpt <- max(performance$threshold[performance$TPR == 1]))
if (any(thr == "sensitivity")) {
thresholds$sensitivity <- performance$threshold[which.min(performance$TPR >
as.numeric(thr["sens"]))]
}
thresholds <- dplyr::bind_cols(thresholds)
thr_table <- dplyr::tibble(threshold = names(thresholds), thr_value = unlist(thresholds))
thr_table <- dplyr::left_join(thr_table, performance, by = c("thr_value" = "threshold"))
# Return final result
result <- thr_table
if (!is.null(thr)) {
result <- result %>%
dplyr::filter(threshold %in% thr)
}
return(result)
}