-
Notifications
You must be signed in to change notification settings - Fork 23
/
amp_alphadiv.R
214 lines (201 loc) · 7.47 KB
/
amp_alphadiv.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
#' Alpha-diversity analysis
#'
#' Calculate alpha-diversity indices for each sample and combines with the metadata.
#'
#' @param data (\emph{required}) Data list as loaded with \code{\link{amp_load}}.
#' @param measure Alpha-diversity measure(s) to be included if not all. A vector of one or more of:
#' \itemize{
#' \item \code{"uniqueotus"}
#' \item \code{"shannon"}
#' \item \code{"simpson"}
#' \item \code{"invsimpson"}
#' }
#' @param richness (\emph{logical}) Also calculate sample richness estimates (Chao1 and ACE) as calculated by \code{\link[vegan]{estimateR}}. (\emph{default:} \code{FALSE})
#' @param rarefy Rarefy species richness to this value before calculating alpha diversity and/or richness. Passed directly as the \code{sample} argument to \code{\link[vegan]{rrarefy}}. (\emph{default:} \code{NULL})
#' @param plot (\emph{logical}) Produce a boxplot instead of a table. (\emph{default:} \code{FALSE})
#' @param plot_scatter (\emph{logical}) If \code{TRUE} produce a scatter plot instead of a boxplot. (\emph{default:} \code{FALSE})
#' @param plot_group_by Group the samples by a categorical variable in the metadata. If \code{NULL} then all samples are shown in a single group.
#' @export
#' @importFrom dplyr arrange select
#' @importFrom vegan diversity estimateR
#' @importFrom data.table setDT melt
#'
#' @details The alpha-diversity indices are calculated per sample using the vegan function \code{\link[vegan]{diversity}}, where the read abundances are first rarefied using \code{\link[vegan]{rrarefy}} by the size of the \code{rarefy} argument. Refer to the vegan documentation for details about the different indices and how they are calculated. If no measure(s) are chosen, all diversity indices will be returned.
#'
#' @references
#' McMurdie, P.J. & Holmes, S. (2014). Waste not, want not: Why
#' rarefying microbiome data is inadmissible. \emph{PLoS Comput Biol}
#' \strong{10(4):} e1003531. DOI:\code{10.1371/journal.pcbi.1003531}
#'
#' @seealso
#' \code{\link{amp_load}}
#'
#' @examples
#' # Load example data
#' data("AalborgWWTPs")
#'
#' # Subsample/rarefy to 20000 reads and then calculate
#' # Shannon and Simpson alpha-diversity indices
#' alphadiversityresult <- amp_alphadiv(
#' AalborgWWTPs,
#' measure = c("shannon", "simpson"),
#' rarefy = 20000
#' )
#'
#' # Explore the results in the data frame
#' # View(alphadiversityresult)
#'
#' # Generate a plot instead
#' amp_alphadiv(
#' AalborgWWTPs,
#' measure = c("shannon", "simpson"),
#' rarefy = 20000,
#' plot = TRUE,
#' plot_group_by = "Plant"
#' )
#' @return A data frame or a ggplot if \code{plot} is set to TRUE
#' @author Kasper Skytte Andersen \email{ksa@@bio.aau.dk}
#' @author Mads Albertsen \email{MadsAlbertsen85@@gmail.com}
amp_alphadiv <- function(data,
measure = NULL,
richness = FALSE,
rarefy = NULL,
plot = FALSE,
plot_scatter = FALSE,
plot_group_by = NULL) {
### Data must be in ampvis2 format
is_ampvis2(data)
# what's the name of the first column (sample IDs)?
sampleIDvarname <- colnames(data$metadata)[1]
# check measures
validMeasures <- c("uniqueotus", "shannon", "simpson", "invsimpson")
if (is.null(measure)) {
measure <- validMeasures
} else if (!is.null(measure) & any(!measure %in% validMeasures)) {
measure <- measure %>% tolower()
warning("Some or none of the provided measures were not recognised, calculating all. Valid options are:\n", paste0(validMeasures, collapse = ", "), call. = FALSE)
measure <- validMeasures
}
results <- data$metadata
names <- results[[1]] # for making sure the ordering of the values in the vectors calculated later match the order of the metadata samples
# nreads before rarefying
RawReads <- colSums(data$abund)
RawReads <- RawReads[names]
results$RawReads <- RawReads
if (!is.null(rarefy)) {
data <- amp_rarefy(data, rarefy)
}
# Add Reads column
Reads <- colSums(data$abund)
Reads <- Reads[names]
results$Reads <- Reads
# warning from phyloseq::estimate_richness
if (!any(data$abund == 1)) {
warning("The data you have provided does not have\n",
"any singletons. This is highly suspicious. Results of richness\n",
"estimates (for example) are probably unreliable, or wrong, if you have already\n",
"trimmed low-abundance taxa from the data.\n", "\n",
"We recommend that you find the untrimmed data and retry.",
call. = FALSE
)
}
tabund <- t(data$abund)
if (any("uniqueotus" %in% measure) | is.null(measure)) {
uniqueOTUs <- colSums(data$abund > 0) # not transposed
results$uniqueOTUs <- uniqueOTUs[names]
}
if (any("shannon" %in% measure)) {
Shannon <- vegan::diversity(tabund, index = "shannon")
# For some reason vegan throws away names if only 1 sample
if (length(Shannon) == 1) {
results$Shannon <- Shannon
} else if (length(Shannon) > 1) {
results$Shannon <- Shannon[names]
}
}
if (any("simpson" %in% measure)) {
Simpson <- vegan::diversity(tabund, index = "simpson")
# For some reason vegan throws away names if only 1 sample
if (length(Simpson) == 1) {
results$Simpson <- Simpson
} else if (length(Simpson) > 1) {
results$Simpson <- Simpson[names]
}
}
if (any("invsimpson" %in% measure)) {
invSimpson <- vegan::diversity(tabund, index = "invsimpson")
# For some reason vegan throws away names if only 1 sample
if (length(invSimpson) == 1) {
results$invSimpson <- invSimpson
} else if (length(invSimpson) > 1) {
results$invSimpson <- invSimpson[names]
}
}
if (isTRUE(richness)) {
richness <- t(vegan::estimateR(tabund)) %>% as.data.frame()
richness <- richness[names, , drop = FALSE]
results$Chao1 <- richness[, "S.chao1"]
results$ACE <- richness[, "S.ACE"]
}
if (isTRUE(plot)) {
setDT(results)
plot_data <- melt(
results,
id.vars = c(sampleIDvarname, plot_group_by),
measure.vars = colnames(results)[tolower(colnames(results)) %in% tolower(measure)],
variable.name = "measure",
value.name = "value"
)
ggplot(
plot_data,
aes(
x = if(is.null(plot_group_by)) {
sampleIDvarname
} else {
eval(parse(text = plot_group_by))
},
y = value
)
) +
{
if (!isTRUE(plot_scatter)) {
geom_boxplot()
} else if (isTRUE(plot_scatter)) {
geom_point()
}
} +
scale_fill_viridis_d(
option = "turbo"
) +
facet_wrap(~measure, nrow = 1, scales = "free_y") +
theme_classic() +
theme(
strip.background = element_rect(
colour = "gray8", fill = "gray98"
),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.grid.major.y = element_line(color = "gray70"),
strip.text = element_text(size = 16),
axis.ticks.x = element_blank(),
axis.text.x = if(is.null(plot_group_by)) {
element_blank()
} else {
element_text(angle = 45, size = 14, hjust = 1, vjust = 1)
},
axis.text.y = element_text(size = 14),
axis.title = element_blank()
)
} else {
# arrange by RawReads no matter if rarefied or not, remove the column if rarefy is not set
results <- results %>%
dplyr::arrange(RawReads) %>%
{
if (is.null(rarefy)) dplyr::select(., -RawReads) else .
}
return(results)
}
}
#' @rdname amp_alphadiv
#' @export
amp_alpha_diversity <- amp_alphadiv