/
intra_physig.R
205 lines (191 loc) · 7.44 KB
/
intra_physig.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
#' Intraspecific variability - Phylogenetic signal
#'
#' Performs Phylogenetic signal estimates evaluating
#' trait intraspecific variability
#'
#' @param trait.col The name of a column in the provided data frame with trait
#' to be analyzed (e.g. "Body_mass").
#' @param data Data frame containing species traits with row names matching tips
#' in \code{phy}.
#' @param phy A phylogeny (class 'phylo', see ?\code{ape}).
#' @param V Name of the column containing the standard deviation or the standard error of the trait
#' variable. When information is not available for one taxon, the value can be 0 or \code{NA}.
#' @param n.intra Number of times to repeat the analysis generating a random trait value.
#' If NULL, \code{n.intra} = 30
#' @param distrib A character string indicating which distribution to use to generate a random value for the response
#' and/or predictor variables. Default is normal distribution: "normal" (function \code{\link{rnorm}}).
#' Uniform distribution: "uniform" (\code{\link{runif}})
#' Warning: we recommend to use normal distribution with Vx or Vy = standard deviation of the mean.
#' @param method Method to compute signal: can be "K" or "lambda".
#' @param track Print a report tracking function progress (default = TRUE)
#' @details
#' This function estimates phylogenetic signal using \code{\link[phytools]{phylosig}}.
#' The analysis is repeated \code{n.intra} times. At each iteration the function generates a random value
#' for each row in the dataset using the standard deviation or errors supplied and assuming a normal or uniform distribution.
#' To calculate means and se for your raw data, you can use the \code{summarySE} function from the
#' package \code{Rmisc}.
#'
#' Output can be visualised using \code{sensi_plot}.
#'
#' @note The argument "se" from \code{\link[phytools]{phylosig}} is not available in this function. Use the
#' argument "V" instead with \code{\link{intra_physig}} to indicate the name of the column containing the standard
#' deviation or the standard error of the trait variable instead.
#'
#' @return The function \code{intra_physig} returns a list with the following
#' components:
#' @return \code{Trait}: Column name of the trait analysed
#' @return \code{data}: Original full dataset
#' @return \code{intra.physig.estimates}: Run number, phylogenetic signal estimate
#' (lambda or K) and the p-value for each run with a different simulated datset.
#' @return \code{N.obs}: Size of the dataset after matching it with tree tips and removing NA's.
#' @return \code{stats}: Main statistics for signal estimate\code{CI_low} and \code{CI_high} are the lower
#' and upper limits of the 95% confidence interval.
#' @author Caterina Penone & Pablo Ariel Martinez & Gustavo Paterno
#' @seealso \code{\link[phytools]{phylosig}}, \code{\link{sensi_plot}}
#' @references
#'
#' Paterno, G. B., Penone, C. Werner, G. D. A.
#' \href{http://doi.wiley.com/10.1111/2041-210X.12990}{sensiPhy:
#' An r-package for sensitivity analysis in phylogenetic
#' comparative methods.} Methods in Ecology and Evolution
#' 2018, 9(6):1461-1467
#'
#' Martinez, P. a., Zurano, J.P., Amado, T.F., Penone, C., Betancur-R, R.,
#' Bidau, C.J. & Jacobina, U.P. (2015). Chromosomal diversity in tropical reef
#' fishes is related to body size and depth range. Molecular Phylogenetics and
#' Evolution, 93, 1-4
#'
#' Blomberg, S. P., T. Garland Jr., A. R. Ives (2003)
#' Testing for phylogenetic signal in comparative data:
#' Behavioral traits are more labile. Evolution, 57, 717-745.
#'
#' Pagel, M. (1999) Inferring the historical patterns of biological evolution.
#' Nature, 401, 877-884.
#'
#' Kamilar, J. M., & Cooper, N. (2013). Phylogenetic signal in primate behaviour,
#' ecology and life history. Philosophical Transactions of the Royal Society
#' B: Biological Sciences, 368: 20120341.
#'
#' @examples
#' \dontrun{
#'data(alien)
#'alien.data<-alien$data
#'alien.phy<-alien$phy
#'# Run sensitivity analysis:
#'intra <- intra_physig(trait.col = "gestaLen", V = "SD_gesta" ,
#' data = alien.data, phy = alien.phy[[1]])
#'summary(intra)
#'sensi_plot(intra)
#'sensi_plot(intra, graphs = 1)
#'sensi_plot(intra, graphs = 2)
#'}
#'
#'\dontshow{
#'data(alien)
#'# Run sensitivity analysis:
#'intra <- intra_physig(trait.col = "gestaLen", V = "SD_gesta" ,
#' data = alien.data, n.intra = 5,
#' phy = alien.phy[[1]])
#'summary(intra)
#'}
#' @export
intra_physig <- function(trait.col,
data,
phy,
V = NULL,
n.intra = 100,
distrib = "normal",
method = "K",
track = TRUE) {
#Error check
if (is.null(V))
stop("V must be defined")
if (!inherits(data, "data.frame"))
stop("data must be class 'data.frame'")
if (distrib == "normal")
message (
paste(
"distrib = normal: make sure that standard deviation",
"is provided for V",
sep = " "
)
)
if (!inherits(phy, "phylo"))
stop("tree should be an object of class \"phylo\".")
if (!inherits(trait.col, "character"))
stop("trait.col should be an object of class \"character\".")
# Check match between data and phy
datphy <- match_dataphy(get(trait.col) ~ 1, data, phy)
full.data <- datphy$data
phy <- datphy$phy
trait <- full.data[[trait.col]]
names(trait) <- phy$tip.label
N <- nrow(full.data)
if (!is.null(V) && sum(is.na(full.data[, V])) != 0) {
full.data[is.na(full.data[, V]), V] <- 0
}
#Function to pick a random value in the interval
if (distrib == "normal")
funr <- function(a, b) {
stats::rnorm(1, a, b)
}
else
funr <- function(a, b) {
stats::runif(1, a - b, a + b)
}
#Create the results data.frame
intra.physig.estimates <-
data.frame("n.intra" = numeric(),
"estimate" = numeric(),
"pval" = numeric())
counter = 1
if (track == TRUE)
pb <- utils::txtProgressBar(min = 0, max = n.intra, style = 3)
for (i in 1:n.intra) {
#choose a random value in [mean-se,mean+se] if Vx is provided
predV <-
apply(full.data[, c(trait.col, V)], 1, function(x)
funr(x[1], x[2]))
#model
mod.s <-
phytools::phylosig(
tree = phy,
x = predV,
method = method,
test = TRUE
)
estimate <- mod.s[[1]]
pval <- mod.s$P
if (track == TRUE)
(utils::setTxtProgressBar(pb, i))
#write in a table
estim.simu <- data.frame(i, estimate, pval)
intra.physig.estimates[counter,] <- estim.simu
counter = counter + 1
}
if (track == TRUE)
on.exit(close(pb))
statresults <-
data.frame(
min = apply(intra.physig.estimates, 2, min),
max = apply(intra.physig.estimates, 2, max),
mean = apply(intra.physig.estimates, 2, mean),
sd_intra = apply(intra.physig.estimates, 2, stats::sd)
)[-1,]
statresults$CI_low <-
statresults$mean - stats::qt(0.975, df = n.intra - 1) * statresults$sd_intra / sqrt(n.intra)
statresults$CI_high <-
statresults$mean + stats::qt(0.975, df = n.intra - 1) * statresults$sd_intra / sqrt(n.intra)
stats <- round(statresults[c(1:2), c(3, 5, 6, 1, 2)], digits = 5)
cl <- match.call()
res <- list(
call = cl,
Trait = trait.col,
intra.physig.estimates = intra.physig.estimates,
N.obs = N,
stats = stats,
data = full.data
)
class(res) <- "intra.physig"
return(res)
}