-
Notifications
You must be signed in to change notification settings - Fork 25
/
estimateEvenness.R
259 lines (231 loc) · 8.53 KB
/
estimateEvenness.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
#' Estimate Evenness measures
#'
#' This function calculates community evenness indices.
#' These include the \sQuote{Camargo}, \sQuote{Pielou}, \sQuote{Simpson},
#' \sQuote{Evar} and \sQuote{Bulla} evenness measures.
#' See details for more information and references.
#'
#' @param x a \code{\link{SummarizedExperiment}} object
#'
#' @param assay.type A single character value for selecting the
#' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{assay}} used for
#' calculation of the sample-wise estimates.
#'
#' @param assay_name a single \code{character} value for specifying which
#' assay to use for calculation.
#' (Please use \code{assay.type} instead. At some point \code{assay_name}
#' will be disabled.)
#'
#' @param index a \code{character} vector, specifying the evenness measures to be
#' calculated.
#'
#' @param name a name for the column(s) of the colData the results should be
#' stored in.
#'
#' @param BPPARAM A
#' \code{\link[BiocParallel:BiocParallelParam-class]{BiocParallelParam}}
#' object specifying whether calculation of estimates should be parallelized.
#'
#' @param ... optional arguments:
#' \itemize{
#' \item threshold: a numeric threshold. assay values below or equal
#' to this threshold will be set to zero.
#' }
#'
#' @return \code{x} with additional \code{\link{colData}} named \code{*name*}
#'
#' @details
#' Evenness is a standard index in community ecology, and it quantifies how evenly the abundances
#' of different species are distributed. The following evenness indices are provided:
#'
#' By default, this function returns all indices.
#'
#' The available evenness indices include the following (all in lowercase):
#' \itemize{
#' \item 'camargo': Camargo's evenness (Camargo 1992)
#' \item 'simpson_evenness': Simpson’s evenness is calculated as inverse Simpson diversity (1/lambda) divided by
#' observed species richness S: (1/lambda)/S.
#' \item 'pielou': Pielou's evenness (Pielou, 1966), also known as Shannon or Shannon-Weaver/Wiener/Weiner
#' evenness; H/ln(S). The Shannon-Weaver is the preferred term; see Spellerberg and Fedor (2003).
#' \item 'evar': Smith and Wilson’s Evar index (Smith & Wilson 1996).
#' \item 'bulla': Bulla’s index (O) (Bulla 1994).
#' }
#'
#' Desirable statistical evenness metrics avoid strong bias towards very
#' large or very small abundances; are independent of richness; and range
#' within the unit interval with increasing evenness (Smith & Wilson 1996).
#' Evenness metrics that fulfill these criteria include at least camargo,
#' simpson, smith-wilson, and bulla. Also see Magurran & McGill (2011)
#' and Beisel et al. (2003) for further details.
#'
#' @references
#'
#' Beisel J-N. et al. (2003)
#' A Comparative Analysis of Evenness Index Sensitivity.
#' _Internal Rev. Hydrobiol._ 88(1):3-15.
#' URL: \url{https://portais.ufg.br/up/202/o/2003-comparative_evennes_index.pdf}
#'
#' Bulla L. (1994)
#' An index of evenness and its associated diversity measure.
#' _Oikos_ 70:167--171.
#'
#' Camargo, JA. (1992)
#' New diversity index for assessing structural alterations in aquatic communities.
#' _Bull. Environ. Contam. Toxicol._ 48:428--434.
#'
#' Locey KJ and Lennon JT. (2016)
#' Scaling laws predict global microbial diversity.
#' _PNAS_ 113(21):5970-5975; doi:10.1073/pnas.1521291113.
#'
#' Magurran AE, McGill BJ, eds (2011)
#' Biological Diversity: Frontiers in Measurement and Assessment
#' (Oxford Univ Press, Oxford), Vol 12.
#'
#' Pielou, EC. (1966)
#' The measurement of diversity in different types of
#' biological collections. _J Theoretical Biology_ 13:131--144.
#'
#' Smith B and Wilson JB. (1996)
#' A Consumer's Guide to Evenness Indices.
#' _Oikos_ 76(1):70-82.
#'
#' Spellerberg and Fedor (2003).
#' A tribute to Claude Shannon (1916 –2001) and a plea for more rigorous use of species richness,
#' species diversity and the ‘Shannon–Wiener’ Index.
#' _Alpha Ecology & Biogeography_ 12, 177–197.
#'
#' @seealso
#' \code{\link[scater:plotColData]{plotColData}}
#' \itemize{
#' \item{\code{\link[mia:estimateRichness]{estimateRichness}}}
#' \item{\code{\link[mia:estimateDominance]{estimateDominance}}}
#' \item{\code{\link[mia:estimateDiversity]{estimateDiversity}}}
#' }
#'
#' @name estimateEvenness
#'
#' @examples
#' data(esophagus)
#' tse <- esophagus
#'
#' # Specify index and their output names
#' index <- c("pielou", "camargo", "simpson_evenness", "evar", "bulla")
#' name <- c("Pielou", "Camargo", "SimpsonEvenness", "Evar", "Bulla")
#'
#' # Estimate evenness and give polished names to be used in the output
#' tse <- estimateEvenness(tse, index = index, name = name)
#'
#' # Check the output
#' head(colData(tse))
#'
NULL
#' @rdname estimateEvenness
#' @export
setGeneric("estimateEvenness",signature = c("x"),
function(x, assay.type = assay_name, assay_name = "counts",
index = c("pielou", "camargo", "simpson_evenness", "evar",
"bulla"),
name = index, ...)
standardGeneric("estimateEvenness"))
#' @rdname estimateEvenness
#' @export
setMethod("estimateEvenness", signature = c(x = "SummarizedExperiment"),
function(x, assay.type = assay_name, assay_name = "counts",
index = c("camargo", "pielou", "simpson_evenness", "evar", "bulla"),
name = index, ..., BPPARAM = SerialParam()){
# input check
index <- match.arg(index, several.ok = TRUE)
if(!.is_non_empty_character(name) || length(name) != length(index)){
stop("'name' must be a non-empty character value and have the ",
"same length than 'index'.",
call. = FALSE)
}
.check_assay_present(assay.type, x)
#
vnss <- BiocParallel::bplapply(index,
.get_evenness_values,
mat = assay(x, assay.type),
BPPARAM = BPPARAM, ...)
.add_values_to_colData(x, vnss, name)
}
)
.calc_bulla_evenness <- function(mat) {
# Species richness (number of species)
S <- colSums2(mat > 0, na.rm = TRUE)
# Relative abundances
p <- t(mat)/colSums2(mat, na.rm = TRUE)
i <- seq_len(nrow(p))
O <- vapply(i,function(i){sum(pmin(p[i,], 1/S[i]))},numeric(1))
# Bulla's Evenness
(O - 1/S)/(1 - 1/S)
}
# Camargo's eveness x: species counts zeroes: include zeros Inspired
# by code from Pepijn de Vries and Zhou Xiang at
# researchgate.net/post/How_can_we_calculate_the_Camargo_evenness_index_in_R
# but rewritten here
.calc_camargo_evenness <- function(mat) {
N <- colSums2(mat > 0, na.rm = TRUE)
seq <- IntegerList(lapply(N - 1,seq_len))
x <- mapply(
function(i, n, s){
xx <- 0
for (j in s) {
xx <- xx + sum(abs(mat[(j + 1):n,i] - mat[j,i]))
}
xx
},
seq_along(N),
N,
seq)
# Return
1 - x/(colSums2(mat, na.rm = TRUE) * N)
}
# x: Species count vector
.calc_simpson_evenness <- function(mat) {
# Species richness (number of detected species)
S <- colSums2(mat > 0, na.rm = TRUE)
# Simpson evenness (Simpson diversity per richness)
.calc_inverse_simpson(mat)/S
}
# x: Species count vector
.calc_pielou_evenness <- function(mat) {
# Remove zeroes
mat[mat == 0] <- NA
# Species richness (number of detected species)
S <- colSums2(mat > 0, na.rm = TRUE)
# Relative abundances
p <- t(mat)/colSums2(mat, na.rm = TRUE)
# Shannon index
H <- (-rowSums2(p * log(p), na.rm = TRUE))
# Simpson evenness
H/log(S)
}
# Smith and Wilson’s Evar index
.calc_evar_evenness <- function(mat) {
N <- colSums2(mat, na.rm = TRUE)
# Log abundance
a <- log(mat)
a[is.na(a) | is.infinite(a)] <- 0
# Richness
S <- colSums2(mat > 0, na.rm = TRUE)
c <- colSums2(a, na.rm = TRUE)/S
d <- t((t(a) - c)^2/S)
d[mat == 0] <- 0
f <- colSums2(d, na.rm = TRUE)
(1 - 2/pi * atan(f))
}
.get_evenness_values <- function(index, mat, threshold = 0, ...){
if(!is.numeric(threshold) || length(threshold) != 1L){
stop("'threshold' must be a single numeric value.", call. = FALSE)
}
if(threshold > 0){
mat[mat <= threshold] <- 0
}
FUN <- switch(index,
camargo = .calc_camargo_evenness,
pielou = .calc_pielou_evenness,
simpson_evenness = .calc_simpson_evenness,
evar = .calc_evar_evenness,
bulla = .calc_bulla_evenness)
FUN(mat = mat, ...)
}