/
influ_physig.R
189 lines (172 loc) · 7.21 KB
/
influ_physig.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
#' Influential species detection - Phylogenetic signal
#'
#' Performs leave-one-out deletion analysis for phylogenetic signal estimates,
#' and detects influential species for K or lambda.
#' @param trait.col The name of a column in the provided data frame with trait
#' to be analyzed (e.g. "Body_mass").
#' @param data Data frame containing species traits with row names matching tips
#' in \code{phy}.
#' @param phy A phylogeny (class 'phylo') matching \code{data}.
#' @param method Method to compute signal: can be "K" or "lambda".
#' @param cutoff The cutoff value used to identify for influential species
#' (see Details)
#' @param track Print a report tracking function progress (default = TRUE)
#' @param ... Further arguments to be passed to \code{\link[phytools]{phylosig}}
#'
#' @details
#' This function sequentially removes one species at a time, ans estimates phylogenetic
#' signal (K or lambda) using \code{\link[phytools]{phylosig}}, stores the
#' results and detects the most influential species.
#'
#' \code{influ_physig} detects influential species based on the standardised
#' difference in signal estimate (K or lambda) when removing a given species compared
#' to the full data estimate (with all species). Species with a standardised difference
#' above the value of \code{cutoff} are identified as influential. The default
#' value for the cutoff is 2 standardised differences in signal estimate.
#'
#' Output can be visualised using \code{sensi_plot}.
#'
#' @note The argument "se" from \code{\link[phytools]{phylosig}} is not available in this function. Use the
#' argument "V" instead with \code{\link{intra_physig}} to indicate the name of the column containing the standard
#' deviation or the standard error of the trait variable instead.
#'
#'
#' @return The function \code{influ_physig} returns a list with the following
#' components:
#' @return \code{cutoff}: The value selected for \code{cutoff}
#' @return \code{trait.col}: Column name of the trait analysed
#' @return \code{full.data.estimates}: Phylogenetic signal estimate (K or lambda)
#' and the P value (for the full data).
#' @return \code{influential_species}: List of influential species,
#' based on standardised difference in K or lambda.
#' Species are ordered from most influential to less influential and
#' only include species with a standardised difference > \code{cutoff}.
#' @return \code{influ.physig.estimates}: A data frame with all simulation
#' estimates. Each row represents a deleted species
#' Columns report the calculated signal estimate (\code{k}) or (\code{lambda}),
#' difference between signal estimation of the reduced and full data
#' (\code{DF}), the percentage of change in signal compared
#' to the full data signal (\code{perc}) and p-value for the phylogenetic signal
#' test (\code{pval})
#' @return \code{data}: Original full dataset.
#' @author Gustavo Paterno
#' @seealso \code{\link[phytools]{phylosig}},
#' \code{\link{influ_phylm}},\code{\link{sensi_plot}}
#' @references
#'
#' Paterno, G. B., Penone, C. Werner, G. D. A.
#' \href{http://doi.wiley.com/10.1111/2041-210X.12990}{sensiPhy:
#' An r-package for sensitivity analysis in phylogenetic
#' comparative methods.} Methods in Ecology and Evolution
#' 2018, 9(6):1461-1467
#'
#' Blomberg, S. P., T. Garland Jr., A. R. Ives (2003)
#' Testing for phylogenetic signal in comparative data:
#' Behavioral traits are more labile. Evolution, 57, 717-745.
#'
#' Pagel, M. (1999) Inferring the historical patterns of biological evolution.
#' Nature, 401, 877-884.
#'
#' Kamilar, J. M., & Cooper, N. (2013). Phylogenetic signal in primate behaviour,
#' ecology and life history. Philosophical Transactions of the Royal Society
#' B: Biological Sciences, 368: 20120341.
#'
#' @importFrom phytools phylosig
#' @examples
#' \dontshow{
#'# Load data:
#'data(alien)
#'alien.data<-alien$data
#'alien.phy<-alien$phy
#'# Logtransform data
#'alien.data$logMass <- log(alien.data$adultMass)
#'# Run sensitivity analysis:
#'influ <- influ_physig("logMass", data = alien.data[1:20,],
#' phy = alien.phy[[1]])
#'# To check summary results:
#'summary(influ)
#' }
#' \dontrun{
#'# Load data:
#'data(alien)
#'# Logtransform data
#'alien.data$logMass <- log(alien.data$adultMass)
#'# Run sensitivity analysis:
#'influ <- influ_physig("logMass", data = alien.data, phy = alien.phy[[1]])
#'# To check summary results:
#'summary(influ)
#'# Most influential speciesL
#'influ$influential.species
#'# Visual diagnostics
#'sensi_plot(influ)
#'# You can specify which graph to print:
#'sensi_plot(influ, graphs = 1)
#'sensi_plot(influ, graphs = 2)
#'}
#' @export
influ_physig <- function(trait.col, data, phy, method = "K", cutoff = 2, track = TRUE, ...){
### Basic checking
if(!inherits(phy,"phylo"))
stop("tree should be an object of class \"phylo\".")
if(!inherits(trait.col,"character"))
stop("trait.col should be an object of class \"character\".")
#Matching tree and phylogeny using utils.R
datphy <- match_dataphy(get(trait.col) ~ 1, data, phy)
full.data <- datphy$data
phy <- datphy$phy
trait <- full.data[[trait.col]]
names(trait) <- phy$tip.label
N <- nrow(full.data)
### Fit full data model
mod.0 <- phytools::phylosig(x = trait, tree = phy, method = method, test = T, ...)
#Creates empty data frame to store model outputs
influ.physig.estimates<-
data.frame("species" =numeric(), "estimate" = numeric(),
"DF"=numeric(),"perc"=numeric(),
"pval"=numeric())
### Start leave-one-out analysis (between all species)
counter <- 1
if(track==TRUE) pb <- utils::txtProgressBar(min = 0, max = N, style = 3)
for (i in 1:N){
# Crop data: remove one species from data and phy:
crop.data <- trait[-i]
crop.phy <- ape::drop.tip(phy,phy$tip.label[i])
# Fit reduced data model
mod.s <- phytools::phylosig(tree = crop.phy, crop.data, method = method, test = T, ...)
estimate <- mod.s[[1]]
sp <- phy$tip.label[i]
DF <- mod.s[[1]] - mod.0[[1]]
perc <- round((abs(DF/mod.0[[1]]))*100,digits=1)
pval <- mod.s$P
# Stores values for each simulation
estim.simu <- data.frame(sp, estimate, DF, perc, pval,
stringsAsFactors = F)
influ.physig.estimates[counter, ] <- estim.simu
counter=counter+1
if(track==TRUE) (utils::setTxtProgressBar(pb, i))
}
if(track==TRUE) on.exit(close(pb))
#Calculates Standardized DFbeta and DFintercept
sDF <- influ.physig.estimates$DF/
stats::sd(influ.physig.estimates$DF)
influ.physig.estimates$sDF <- sDF
#Creates a list with full dataset estimates:
param0 <- list(estimate = mod.0[[1]],
pval = mod.0$P)
#Identifies influencital species (sDF > cutoff) and orders by influence
reorder <- influ.physig.estimates[order(abs(
influ.physig.estimates$sDF),decreasing=T),c("species","sDF")]
influ.sp <- as.character(reorder$species[abs(reorder$sDF)>cutoff])
#Generates output:
res <- list(
call = match.call(),
cutoff=cutoff,
trait.col=trait.col,
full.data.estimates = param0,
influential.species = influ.sp,
influ.physig.estimates = influ.physig.estimates,
data = full.data,
phy = phy)
class(res) <- "influ.physig"
return(res)
}