Permalink
Browse files

new models for ratebytree

  • Loading branch information...
liamrevell committed Sep 4, 2017
1 parent 12815ca commit 5a769e5bb011d7c341f2eec1d78eaee8b6f05710
Showing with 147 additions and 44 deletions.
  1. +4 −4 DESCRIPTION
  2. +140 −37 R/ratebytree.R
  3. +3 −3 man/ratebytree.Rd
View
@@ -1,6 +1,6 @@
Package: phytools
Version: 0.6-23
Date: 2017-08-28
Version: 0.6-24
Date: 2017-09-04
Title: Phylogenetic Tools for Comparative Biology (and Other Things)
Author: Liam J. Revell
Maintainer: Liam J. Revell <liam.revell@umb.edu>
@@ -56,6 +56,6 @@ Description: Package contains various functions for phylogenetic analysis.
research.
License: GPL (>= 2)
URL: http://github.com/liamrevell/phytools
Packaged: 2017-08-28 12:00:00 EST
Packaged: 2017-09-04 12:00:00 EST
Repository:
Date/Publication: 2017-08-28 12:00:00 EST
Date/Publication: 2017-09-04 12:00:00 EST
View
@@ -8,11 +8,17 @@ ratebytree<-function(trees,x,...){
if(hasArg(trace)) trace<-list(...)$trace
else trace<-FALSE
if(hasArg(digits)) digits<-list(...)$digits
else digits<-6
else digits<-4
if(hasArg(test)) test<-list(...)$test
else test<-"chisq"
if(hasArg(quiet)) quiet<-list(...)$quiet
else quiet<-FALSE
if(hasArg(model)) model<-list(...)$model
else model<-"BM"
if(!(model%in%c("BM","OU","EB"))){
cat(paste("model =",model,"not recognized. using model = \"BM\"\n"))
model<-"BM"
}
## check trees & x
if(!inherits(trees,"multiPhylo"))
stop("trees should be object of class \"multiPhylo\".")
@@ -27,24 +33,33 @@ ratebytree<-function(trees,x,...){
x<-mapply(function(x,t) x<-x[t$tip.label],x=x,t=trees,SIMPLIFY=FALSE)
se<-mapply(function(x,t) x<-x[t$tip.label],x=se,t=trees,SIMPLIFY=FALSE)
## first, fit multi-rate model
lik.multi<-function(theta,trees,x,se,trace=FALSE){
lik.multi<-function(theta,trees,x,se,model,trace=FALSE){
n<-sapply(trees,Ntip)
N<-length(trees)
sig<-theta[1:N]
a<-theta[(N+1):(2*N)]
C<-lapply(trees,vcv)
a<-theta[1:N+N]
if(model=="OU") alpha<-theta[1:N+2*N]
if(model=="EB") r<-theta[1:N+2*N]
if(model=="BM") C<-lapply(trees,vcv)
else if(model=="OU") C<-mapply(vcvPhylo,tree=trees,alpha=alpha,MoreArgs=list(model="OU",
anc.nodes=FALSE),SIMPLIFY=FALSE)
else if(model=="EB") C<-mapply(vcvPhylo,tree=trees,r=r,MoreArgs=list(model="EB",
anc.nodes=FALSE),SIMPLIFY=FALSE)
E<-lapply(se,diag)
V<-mapply("+",mapply("*",C,sig,SIMPLIFY=FALSE),E,SIMPLIFY=FALSE)
logL<-0
for(i in 1:N)
logL<-logL-t(x[[i]]-a[i])%*%solve(V[[i]])%*%(x[[i]]-
a[i])/2-n[i]*log(2*pi)/2-determinant(V[[i]])$modulus[1]/2
if(trace){
cat(paste(paste(round(sig,digits),collapse="\t"),round(logL,digits),"\n",sep="\t"))
cat(paste(paste(round(sig,digits),collapse="\t"),
if(model=="OU") paste(round(alpha,digits),collapse="\t"),
if(model=="EB") paste(round(r,digits),collapse="\t"),
round(logL,digits),"\n",sep="\t"))
flush.console()
}
-logL
}
}
f1<-function(tree,x){
pvcv<-phyl.vcv(as.matrix(x),vcv(tree),1)
c(pvcv$R[1,1],pvcv$a[1,1])
@@ -54,47 +69,90 @@ ratebytree<-function(trees,x,...){
init<-list(...)$init
if(!is.null(init$sigm)) PP[1,]<-init$sigm
if(!is.null(init$am)) PP[2,]<-init$am
if(model=="OU") if(!is.null(init$alpham)) PP<-rbind(PP,init$alpham)
if(model=="EB") if(!is.null(init$rm)) PP<-rbind(PP,init$rm)
}
if((model%in%c("OU","EB"))&&nrow(PP)==2) PP<-rbind(PP,rep(0,ncol(PP)))
p<-as.vector(t(PP))
if(trace){
cat("\nOptimizing multi-rate model....\n")
cat(paste(paste("sig[",1:N,"] ",sep="",collapse="\t"),"logL\n",sep="\t"))
if(model=="BM"){
cat("\nOptimizing multi-rate model....\n")
cat(paste(paste("sig[",1:N,"]",sep="",collapse="\t"),"logL\n",sep="\t"))
} else if(model=="OU"){
cat("\nOptimizing multi-regime model....\n")
cat(paste(paste("sig[",1:N,"]",sep="",collapse="\t"),
paste("alpha[",1:N,"]",sep="",collapse="\t"),"logL\n",sep="\t"))
} else if(model=="EB"){
cat("\nOptimizing multi-regime model....\n")
cat(paste(paste("sig[",1:N,"]",sep="",collapse="\t"),
paste("r[",1:N,"]",sep="",collapse="\t"),"logL\n",sep="\t"))
}
}
fit.multi<-optim(p,lik.multi,trees=trees,x=x,se=se,trace=trace,method="L-BFGS-B",
lower=c(rep(tol,N),rep(-Inf,N)),upper=c(rep(Inf,N),rep(Inf,N)))
fit.multi<-optim(p,lik.multi,trees=trees,x=x,se=se,model=model,trace=trace,
method="L-BFGS-B",lower=c(rep(tol,N),rep(-Inf,N),
if(model%in%c("OU","EB")) rep(-Inf,N)),upper=c(rep(Inf,N),rep(Inf,N),
if(model%in%c("OU","EB")) rep(Inf,N)))
## now fit single-rate model
lik.onerate<-function(theta,trees,x,se,trace=FALSE){
lik.onerate<-function(theta,trees,x,se,model,trace=FALSE){
n<-sapply(trees,Ntip)
N<-length(trees)
sig<-theta[1]
a<-theta[1:N+1]
C<-lapply(trees,vcv)
if(model=="OU") alpha<-theta[N+2]
if(model=="EB") r<-theta[N+2]
if(model=="BM") C<-lapply(trees,vcv)
else if(model=="OU") C<-lapply(trees,vcvPhylo,model="OU",alpha=alpha,
anc.nodes=FALSE)
else if(model=="EB") C<-lapply(trees,vcvPhylo,model="EB",r=r,
anc.nodes=FALSE)
E<-lapply(se,diag)
V<-mapply("+",lapply(C,"*",sig),E,SIMPLIFY=FALSE)
logL<-0
for(i in 1:N)
logL<-logL-t(x[[i]]-a[i])%*%solve(V[[i]])%*%(x[[i]]-
a[i])/2-n[i]*log(2*pi)/2-determinant(V[[i]])$modulus[1]/2
if(trace){
cat(paste(round(sig,digits),round(logL,digits),"\n",sep="\t"))
cat(paste(round(sig,digits),
if(model=="OU") round(alpha,digits),
if(model=="EB") round(r,digits),
round(logL,digits),"\n",sep="\t"))
flush.console()
}
-logL
}
p<-c(mean(fit.multi$par[1:N]),fit.multi$par[(N+1):(2*N)])
p<-c(mean(fit.multi$par[1:N]),fit.multi$par[1:N+1])
if(model%in%c("OU","EB")) p<-c(p,mean(fit.multi$par[N+2]))
if(hasArg(init)){
if(!is.null(init$sigc)) p[1]<-init$sigc
if(!is.null(init$ac)) p[1:N+1]<-init$ac
if(model=="OU") if(!is.null(init$alphac)) p[N+2]<-init$alphac
if(model=="EB") if(!is.null(init$rc)) p[N+2]<-init$rc
}
if(trace){
cat("\nOptimizing common-rate model....\n")
cat(paste("sig ","logL\n",sep="\t"))
if(model=="BM"){
cat("\nOptimizing common-rate model....\n")
cat(paste("sig ","logL\n",sep="\t"))
} else if(model=="OU"){
cat("\nOptimizing common-regime model....\n")
cat(paste("sig ","alpha ","logL\n",sep="\t"))
} else if(model=="EB"){
cat("\nOptimizing common-regime mode.....\n")
cat(paste("sig ","r ","logL\n",sep="\t"))
}
}
fit.onerate<-optim(p,lik.onerate,trees=trees,x=x,se=se,trace=trace,method="L-BFGS-B",
lower=c(tol,rep(-Inf,N)),upper=c(Inf,rep(Inf,N)))
fit.onerate<-optim(p,lik.onerate,trees=trees,x=x,se=se,model=model,
trace=trace,method="L-BFGS-B",
lower=c(tol,rep(-Inf,N),if(model=="OU") tol else if(model=="EB") -Inf),
upper=c(Inf,rep(Inf,N),if(model%in%c("OU","EB")) Inf))
## compare models:
LR<-2*(-fit.multi$value+fit.onerate$value)
if(test=="chisq") P.chisq<-pchisq(LR,df=N-1,lower.tail=FALSE)
km<-2*N+if(model=="BM") 0 else if(model%in%c("OU","EB")) N
k1<-N+if(model=="BM") 1 else if(model%in%c("OU","EB")) 2
if(test=="simulation"&&model%in%c("OU","EB")){
cat("Simulation test not yet available for chosen model. Using chi-square test.\n")
test<-"chisq"
}
if(test=="chisq") P.chisq<-pchisq(LR,df=km-k1,lower.tail=FALSE)
else if(test=="simulation"){
if(!quiet) cat("Generating null distribution via simulation -> |")
flush.console()
@@ -122,17 +180,22 @@ ratebytree<-function(trees,x,...){
}
obj<-list(
multi.rate.model=list(sig2=fit.multi$par[1:N],
a=fit.multi$par[(N+1):(2*N)],
k=2*N,
a=fit.multi$par[1:N+N],
alpha=if(model=="OU") fit.multi$par[1:N+2*N] else NULL,
r=if(model=="EB") fit.multi$par[1:N+2*N] else NULL,
k=km,
logL=-fit.multi$value,
counts=fit.multi$counts,convergence=fit.multi$convergence,
message=fit.multi$message),
common.rate.model=list(sig2=fit.onerate$par[1],
a=fit.onerate$par[1:N+1],
k=N+1,
alpha=if(model=="OU") fit.onerate$par[N+2] else NULL,
r=if(model=="EB") fit.onerate$par[N+2] else NULL,
k=k1,
logL=-fit.onerate$value,
counts=fit.onerate$counts,convergence=fit.onerate$convergence,
message=fit.onerate$message),
model=model,
N=N,likelihood.ratio=LR,
P.chisq=if(test=="chisq") P.chisq else NULL,
P.sim=if(test=="simulation") P.sim else NULL)
@@ -145,21 +208,61 @@ print.ratebytree<-function(x,...){
if(hasArg(digits)) digits<-list(...)$digits
else digits<-4
N<-x$N
cat("ML common-rate model:\n")
cat(paste("\ts^2\t",paste(paste("a[",1:N,"]",sep=""),collapse="\t")),
"\tk\tlogL\n")
cat(paste("value",round(x$common.rate.model$sig2,digits),
paste(round(x$common.rate.model$a,digits),collapse="\t"),
x$common.rate.model$k,round(x$common.rate.model$logL,digits),
"\n\n",sep="\t"))
cat("ML multi-rate model:\n")
cat(paste("\t",paste(paste("s^2[",1:N,"]",sep=""),collapse="\t"),"\t",
paste(paste("a[",1:N,"]",sep=""),collapse="\t")),
"\tk\tlogL\n")
cat(paste("value",paste(round(x$multi.rate.model$sig2,digits),collapse="\t"),
paste(round(x$multi.rate.model$a,digits),collapse="\t"),
x$multi.rate.model$k,round(x$multi.rate.model$logL,digits),
"\n\n",sep="\t"))
if(x$model=="BM"){
cat("ML common-rate model:\n")
cat(paste("\ts^2\t",paste(paste("a[",1:N,"]",sep=""),collapse="\t")),
"\tk\tlogL\n")
cat(paste("value",round(x$common.rate.model$sig2,digits),
paste(round(x$common.rate.model$a,digits),collapse="\t"),
x$common.rate.model$k,round(x$common.rate.model$logL,digits),
"\n\n",sep="\t"))
cat("ML multi-rate model:\n")
cat(paste("\t",paste(paste("s^2[",1:N,"]",sep=""),collapse="\t"),"\t",
paste(paste("a[",1:N,"]",sep=""),collapse="\t")),
"\tk\tlogL\n")
cat(paste("value",paste(round(x$multi.rate.model$sig2,digits),collapse="\t"),
paste(round(x$multi.rate.model$a,digits),collapse="\t"),
x$multi.rate.model$k,round(x$multi.rate.model$logL,digits),
"\n\n",sep="\t"))
} else if(x$model=="OU"){
cat("ML common-regime OU model:\n")
cat(paste("\ts^2\t",paste(paste("a[",1:N,"]",sep=""),collapse="\t")),
"\talpha\tk\tlogL\n")
cat(paste("value",round(x$common.rate.model$sig2,digits),
paste(round(x$common.rate.model$a,digits),collapse="\t"),
round(x$common.rate.model$alpha,digits),
x$common.rate.model$k,round(x$common.rate.model$logL,digits),
"\n\n",sep="\t"))
cat("ML multi-regime OU model:\n")
cat(paste("\t",paste(paste("s^2[",1:N,"]",sep=""),collapse="\t"),"\t",
paste(paste("a[",1:N,"]",sep=""),collapse="\t"),"\t",
paste(paste("alp[",1:N,"]",sep=""),collapse="\t")),
"\tk\tlogL\n")
cat(paste("value",paste(round(x$multi.rate.model$sig2,digits),collapse="\t"),
paste(round(x$multi.rate.model$a,digits),collapse="\t"),
paste(round(x$multi.rate.model$alpha,digits),collapse="\t"),
x$multi.rate.model$k,round(x$multi.rate.model$logL,digits),
"\n\n",sep="\t"))
} else if(x$model=="EB"){
cat("ML common-regime EB model:\n")
cat(paste("\ts^2\t",paste(paste("a[",1:N,"]",sep=""),collapse="\t")),
"\tr\tk\tlogL\n")
cat(paste("value",round(x$common.rate.model$sig2,digits),
paste(round(x$common.rate.model$a,digits),collapse="\t"),
round(x$common.rate.model$r,digits),
x$common.rate.model$k,round(x$common.rate.model$logL,digits),
"\n\n",sep="\t"))
cat("ML multi-regime EB model:\n")
cat(paste("\t",paste(paste("s^2[",1:N,"]",sep=""),collapse="\t"),"\t",
paste(paste("a[",1:N,"]",sep=""),collapse="\t"),"\t",
paste(paste("r[",1:N,"]",sep=""),collapse="\t")),
"\tk\tlogL\n")
cat(paste("value",paste(round(x$multi.rate.model$sig2,digits),collapse="\t"),
paste(round(x$multi.rate.model$a,digits),collapse="\t"),
paste(round(x$multi.rate.model$r,digits),collapse="\t"),
x$multi.rate.model$k,round(x$multi.rate.model$logL,digits),
"\n\n",sep="\t"))
}
cat(paste("Likelihood ratio:",round(x$likelihood.ratio,digits),"\n"))
if(!is.null(x$P.chisq))
cat(paste("P-value (based on X^2):",round(x$P.chisq,digits),"\n\n"))
View
@@ -7,12 +7,12 @@ ratebytree(trees, x, ...)
\arguments{
\item{trees}{an object of class \code{"multiPhylo"}.}
\item{x}{a list of trait vectors for a continuous trait in which the names of each vectors correspond to the tip labels of \code{trees}.}
\item{...}{optional arguments. Presently, these include the following: \code{tol}, used as a minimum value for the fitting rates, to prevent problems in optimization; \code{trace}, a logical value indicating whether or not to report progress in the optimization; \code{test}, the method for hypothesis testing (options are \code{"chisq"} and \code{"simulation"}); \code{quiet}, a logical value indicating whether or not to run perfectly quietly; and \code{se}, a list of vectors containing the standard errors for each value of \code{x}.}
\item{...}{optional arguments. Presently, these include the following: \code{model}, the model of continuous trait evolution (options are \code{"BM"}, the default, \code{"OU"}, and \code{"EB"}); \code{tol}, used as a minimum value for the fitting rates, to prevent problems in optimization; \code{trace}, a logical value indicating whether or not to report progress in the optimization; \code{test}, the method for hypothesis testing (options are \code{"chisq"} and \code{"simulation"}); \code{quiet}, a logical value indicating whether or not to run perfectly quietly; and \code{se}, a list of vectors containing the standard errors for each value of \code{x}.}
}
\description{
This function takes a object of class \code{"multiPhylo"} containing two or more phylogenies (\code{trees}), and a list of trait vectors (\code{x}), and fits two models: one in which the rate of trait evolution is equal among all trees; and a second in which the rates can differ between trees.
This function takes a object of class \code{"multiPhylo"} containing two or more phylogenies (\code{trees}), and a list of trait vectors (\code{x}), and fits two models: one in which the rate (or regime, for models \code{"OU"} and \code{"EB"}) of trait evolution is equal among all trees; and a second in which the rates or regimes can differ between trees.
This model corresponds to the \emph{censored} approach of O'Meara et al. (2006; \emph{Evolution}) and should also be related to the method of Adams (2012; \emph{Systematic Biology}) for comparing rates among traits. See \code{\link{brownie.lite}} for a different implementation of the \emph{noncensored} approach of O'Meara et al. (2006).
This model corresponds to an extension the \emph{censored} approach of O'Meara et al. (2006; \emph{Evolution}) and should also be related to the method of Adams (2012; \emph{Systematic Biology}) for comparing rates among traits. See \code{\link{brownie.lite}} for a different implementation of the \emph{noncensored} approach of O'Meara et al. (2006).
}
\details{
The function also conducts a likelihood-ratio test to compare the two models.

0 comments on commit 5a769e5

Please sign in to comment.