From e6837dae3ab410b965d37b6445018523b697fdcf Mon Sep 17 00:00:00 2001 From: Alex Washburne Date: Sat, 7 Jul 2018 10:28:08 -0600 Subject: [PATCH] phylofactorization --- DESCRIPTION | 1 - R/PhyloFactor.R | 53 ++++++++++++------------------------------ R/PhyloRegression.R | 21 +++++------------ R/findWinner.R | 12 ++++------ R/pglm.R | 7 +----- man/PhyloFactor.Rd | 29 +++++++++-------------- man/PhyloRegression.Rd | 8 ++----- man/findWinner.Rd | 5 +--- man/getObjective.Rd | 23 +++++++++--------- man/gpf.Rd | 4 ++-- man/pf.tree.Rd | 7 +++--- man/pglm.Rd | 2 -- man/twoSampleFactor.Rd | 6 ++--- 13 files changed, 60 insertions(+), 118 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2d875e4..8f16677 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,6 @@ Imports: grDevices(>= 3.2.2), caper (>= 0.5.2), Biostrings (>= 2.38.4), - biglm (>= 0.9.1), mgcv (>= 1.8-16), ggplot2 (>= 2.2.1), ggtree (>= 1.8.2), diff --git a/R/PhyloFactor.R b/R/PhyloFactor.R index 8c26253..170e60b 100644 --- a/R/PhyloFactor.R +++ b/R/PhyloFactor.R @@ -9,8 +9,6 @@ #' @param contrast.fcn Contrast function. Default is an efficient version of \code{BalanceContrast}. Another built-in option is \code{\link{amalgamate}} - for amalgamation-based analyses of compositional data, set \code{transform.fcn=I} and \code{contrast.fcn=amalgamate}. #' @param method Which default objective function to use either "glm", "max.var" or "gam". #' @param nfactors Number of clades or factors to produce in phylofactorization. Default, NULL, will iterate phylofactorization until either dim(Data)[1]-1 factors, or until stop.fcn returns T -#' @param quiet Logical, default is \code{FALSE}, indicating whether or not to display standard warnings. -#' @param trust.me Logical, default \code{FALSE}, indicating whether or not to trust the input Data to be compositional with no zeros. #' @param small.output Logical, indicating whether or not to trim output. If \code{TRUE}, output may not work with downstream summary and plotting wrappers. #' @param stop.fcn Currently, accepts input of 'KS'. Coming soon: input your own function of the environment in phylofactor to determine when to stop. #' @param stop.early Logical indicating if stop.fcn should be evaluated before (stop.early=T) or after (stop.early=F) choosing an edge maximizing the objective function. @@ -19,7 +17,6 @@ #' @param ncores Number of cores for built-in parallelization of phylofactorization. Parallelizes the extraction of groups, amalgamation of data based on groups, regression, and calculation of objective function. Be warned - this can lead to R taking over a system's memory. #' @param tolerance Tolerance for deviation of column sums of data from 1. if abs(colSums(Data)-1)>tolerance, a warning message will be displayed. #' @param delta Numerical value for replacement of zeros. Default is 0.65, so zeros will be replaced with 0.65*min(Data[Data>0]) -#' @param smallglm Logical allowing use of \code{bigglm} when \code{ncores} is not \code{NULL}. If \code{TRUE}, will use regular \code{glm()} at base of regression. If \code{FALSE}, will use slower but memory-efficient \code{bigglm}. Default is false. #' @param choice.fcn Function for customized choice function. Must take as input the numeric vector of ilr coefficients \code{y}, the input meta-data/independent-variable \code{X}, and a logical \code{PF.output}. If \code{PF.output==F}, the output of \code{choice.fcn} must be a two-member list containing numerics \code{output$objective} and \code{output$stopStatistic}. Phylofactor will choose the edge which maximizes \code{output$objective} and a customzed input \code{stop.fcn} can be used with the \code{output$stopStatistics} to stop phylofactor internally. #' @param cluster.depends Character parsed and evaluated by cluster to load all dependencies for custom choice.fcn. e.g. \code{cluster.depends <- 'library(bayesm)'} #' @param ... optional input arguments for \code{\link{glm}} or, if \code{method=='gam'}, input for \code{nlme::gam} @@ -315,7 +312,7 @@ #' #The optimal environment for this simulated organism is mu=-1 #' c('sigma'=sigma,'sigma.hat'=sigma.hat) #The standard deviation is ~0.9. -PhyloFactor <- function(Data,tree,X=NULL,frmla = Data~X,choice='var',transform.fcn=log,contrast.fcn=NULL,method='glm',nfactors=NULL,quiet=T,trust.me=F,small.output=F,stop.fcn=NULL,stop.early=NULL,KS.Pthreshold=0.01,alternative='greater',ncores=NULL,tolerance=1e-10,delta=0.65,smallglm=T,choice.fcn=NULL,cluster.depends='',...){ +PhyloFactor <- function(Data,tree,X=NULL,frmla = Data~X,choice='var',transform.fcn=log,contrast.fcn=NULL,method='glm',nfactors=NULL,small.output=F,stop.fcn=NULL,stop.early=NULL,KS.Pthreshold=0.01,alternative='greater',ncores=NULL,tolerance=1e-10,delta=0.65,choice.fcn=NULL,cluster.depends='',...){ ######################################################## Housekeeping ################################################################################# @@ -332,11 +329,9 @@ PhyloFactor <- function(Data,tree,X=NULL,frmla = Data~X,choice='var',transform.f } } } - if (all(rownames(Data) %in% tree$tip.label)==F){stop('some rownames of Data are not found in tree')} - if (all(tree$tip.label %in% rownames(Data))==F){ - if(!quiet){ - warning('some tips in tree are not found in dataset - output PF$tree will contain a trimmed tree') - } + if (!all(rownames(Data) %in% tree$tip.label)){stop('some rownames of Data are not found in tree')} + if (!all(tree$tip.label %in% rownames(Data))){ + warning('some tips in tree are not found in dataset - output PF$tree will contain a trimmed tree') tree <- ape::drop.tip(tree,setdiff(tree$tip.label,rownames(Data)))} if (!all(rownames(Data)==tree$tip.label)){ warning('rows of data are in different order of tree tip-labels - use output$data for downstream analysis, or set Data <- Data[output$tree$tip.label,]') @@ -385,37 +380,19 @@ PhyloFactor <- function(Data,tree,X=NULL,frmla = Data~X,choice='var',transform.f ###################### Default treatment of Data ################################# - if (all.equal(transform.fcn,log)==TRUE){ + if (all.equal(transform.fcn,log)==T){ if (any(c(Data)<0)){ stop('For log-transformed data analysis, all entries of Data must be greater than or equal to 0') } - if (!trust.me){ - if (any(Data==0)){ - if (delta==0.65){ - if (!quiet){ - warning('Data has zeros and will receive default modification of zeros. Zeros will be replaced with delta*min(Data[Data>0]), default delta=0.65') - } - } - rplc <- function(x,delta){ - x[x==0]=min(x[x>0])*delta - return(x) - } - - Data <- apply(Data,MARGIN=2,FUN=rplc,delta=delta) - + if (any(Data==0)){ + if (delta==0.65){ + warning('Data has zeros and will receive default modification of zeros. Zeros will be replaced column wise with delta*min(x[x>0]), default delta=0.65') + } + rplc <- function(x,delta){ + x[x==0]=min(x[x>0])*delta + return(x) } - # if (any(abs(colSums(Data)-1)>tolerance)){ - # if (!quiet){ - # warning('Column Sums of Data are not sufficiently close to 1 - Data will be re-normalized by column sums') - # } - # Data <- t(clo(t(Data))) - # - # if (any(abs(colSums(Data)-1)>tolerance)){ - # if (!quiet){ - # warning('Attempt to divide Data by column sums did not bring column sums within "tolerance" of 1 - will proceed with factorization, but such numerical instability may affect the accuracy of the results') - # } - # } - # } + Data <- apply(Data,MARGIN=2,FUN=rplc,delta=delta) } } ##################################################################################### @@ -569,8 +546,8 @@ PhyloFactor <- function(Data,tree,X=NULL,frmla = Data~X,choice='var',transform.f } ############# Perform Regression on all of Groups, and implement choice function ############## - # PhyloReg <- PhyloRegression(TransformedData,X,frmla,Grps,contrast.fcn,choice,treeList,cl,totalvar,ix_cl,treetips,grpsizes,tree_map,quiet,nms,smallglm,choice.fcn) - PhyloReg <- PhyloRegression(TransformedData,X,frmla,Grps,contrast.fcn,choice,treeList,cl,totalvar,ix_cl,treetips,grpsizes,tree_map,quiet,nms,smallglm,choice.fcn=choice.fcn,method,...) + # PhyloReg <- PhyloRegression(TransformedData,X,frmla,Grps,contrast.fcn,choice,treeList,cl,totalvar,ix_cl,treetips,grpsizes,tree_map,nms,choice.fcn) + PhyloReg <- PhyloRegression(TransformedData,X,frmla,Grps,contrast.fcn,choice,treeList,cl,totalvar,ix_cl,treetips,grpsizes,tree_map,nms,choice.fcn=choice.fcn,method,...) ############################## EARLY STOP ##################################### ############################################################################### diff --git a/R/PhyloRegression.R b/R/PhyloRegression.R index 660a49d..7933197 100644 --- a/R/PhyloRegression.R +++ b/R/PhyloRegression.R @@ -13,13 +13,11 @@ #' @param treetips Number of tips in treeList for quickly identifying whether nodes correspond to a root #' @param grpsizes Number of nodes in each tree of treeList #' @param tree_map Cumulative number of nodes in trees of treeList - allows rapid mapping of nodes in ix_cl to appropriate tree in treeList. -#' @param quiet Logical to supress warnings #' @param nms rownames of TransformedData, allowing reliable mapping of rows of data to tree. -#' @param smallglm Logical. See \code{\link{PhyloFactor}} #' @param choice.fcn optional customized choice function to choose 'best' edge; see \code{\link{PhyloFactor}} #' @param method See \code{\link{PhyloFactor}} #' @param ... optional input arguments for \code{\link{glm}} -PhyloRegression <- function(TransformedData,X,frmla,Grps=NULL,contrast.fcn=NULL,choice,treeList=NULL,cl,totalvar=NULL,ix_cl,treetips=NULL,grpsizes=NULL,tree_map=NULL,quiet=T,nms=NULL,smallglm=F,choice.fcn,method='glm',...){ +PhyloRegression <- function(TransformedData,X,frmla,Grps=NULL,contrast.fcn=NULL,choice,treeList=NULL,cl,totalvar=NULL,ix_cl,treetips=NULL,grpsizes=NULL,tree_map=NULL,nms=NULL,choice.fcn,method='glm',...){ #cl - optional phyloCluster input for parallelization of regression across multiple groups. D <- dim(TransformedData)[1] xx=X @@ -36,9 +34,8 @@ PhyloRegression <- function(TransformedData,X,frmla,Grps=NULL,contrast.fcn=NULL, } if (choice != 'custom'){ ################ DEFAULT choice.fcn ######################### - # GLMs <- apply(Y,1,FUN = phylofactor::pglm,xx=X,frmla=frmla,smallglm=T) if (method != 'max.var'){ - GLMs <- apply(Y,1,FUN = pglm,xx=X,frmla=frmla,smallglm=T,...) + GLMs <- apply(Y,1,FUN = pglm,xx=X,frmla=frmla,...) stats <- matrix(sapply(GLMs,FUN=phylofactor::getStats),ncol=3,byrow=T) #contains Pvalues, F statistics, and explained var rownames(stats) <- 1:ngrps @@ -80,12 +77,8 @@ PhyloRegression <- function(TransformedData,X,frmla,Grps=NULL,contrast.fcn=NULL, winner <- sample(winner,1) } } else { ##### PARALLEL ##### - Winners=parallel::clusterApply(cl,x=ix_cl,fun= function(x,tree_map,treeList,treetips,contrast.fcn,choice,method,smallglm,frmla,xx,choice.fcn,...) findWinner(x,tree_map=tree_map,treeList=treeList,treetips=treetips,contrast.fcn=contrast.fcn,choice=choice,method=method,smallglm=smallglm,frmla=frmla,xx=xx,choice.fcn=choice.fcn,...) ,tree_map=tree_map,treeList=treeList,treetips=treetips,contrast.fcn=contrast.fcn,choice=choice,method=method,smallglm=smallglm,frmla=frmla,xx=xx,choice.fcn=choice.fcn,...) - # Winners=parallel::clusterApply(cl,x=ix_cl,fun= function(x,tree_map,treeList,treetips,contrast.fcn,choice,smallglm,frmla,xx,choice.fcn) findWinner(x,tree_map=tree_map,treeList=treeList,treetips=treetips,contrast.fcn=contrast.fcn,choice=choice,smallglm=smallglm,frmla=frmla,xx=xx,choice.fcn=choice.fcn) ,tree_map=tree_map,treeList=treeList,treetips=treetips,contrast.fcn=contrast.fcn,choice=choice,smallglm=smallglm,frmla=frmla,xx=xx,choice.fcn=choice.fcn) - - # Winners=lapply(ix_cl,FUN=function(x,tree_map,treeList,treetips,contrast.fcn,choice,smallglm,frmla,xx,choice.fcn) findWinner(nset=x,tree_map=tree_map,treeList=treeList,treetips=treetips,contrast.fcn=contrast.fcn,choice=choice,smallglm=smallglm,frmla=frmla,xx=xx,choice.fcn=choice.fcn) ,tree_map=tree_map,treeList=treeList,treetips=treetips,choice=choice,smallglm=smallglm,frmla=frmla,xx=xx,choice.fcn=choice.fcn) - # Recall: output from findWinner is $grp and then our objective function output: $objective, $Fstat, or $ExplainedVar, corresponding to choice='custom','F', and 'var', respectivley. - + Winners=parallel::clusterApply(cl,x=ix_cl,fun= function(x,tree_map,treeList,treetips,contrast.fcn,choice,method,frmla,xx,choice.fcn,...) findWinner(x,tree_map=tree_map,treeList=treeList,treetips=treetips,contrast.fcn=contrast.fcn,choice=choice,method=method,frmla=frmla,xx=xx,choice.fcn=choice.fcn,...) ,tree_map=tree_map,treeList=treeList,treetips=treetips,contrast.fcn=contrast.fcn,choice=choice,method=method,frmla=frmla,xx=xx,choice.fcn=choice.fcn,...) + grps <- lapply(Winners,getElement,'grp') Y <- lapply(grps,BalanceContrast,TransformedData=TransformedData) @@ -93,7 +86,7 @@ PhyloRegression <- function(TransformedData,X,frmla,Grps=NULL,contrast.fcn=NULL, ####################################### DEFAULT REGRESSIONS ##################### if (choice != 'custom'){ if (method != 'max.var'){ - gg <- lapply(Y,FUN = pglm,xx=X,frmla=frmla,smallglm=T,...) + gg <- lapply(Y,FUN = pglm,xx=X,frmla=frmla,...) stats <- lapply(gg,getStats) if (choice=='var'){ objective <- sapply(stats,function(x) x['ExplainedVar']) @@ -113,10 +106,8 @@ PhyloRegression <- function(TransformedData,X,frmla,Grps=NULL,contrast.fcn=NULL, winner=which(objective==max(objective)) if (length(winner)>1){ - if (!quiet){ - warning(paste('Objective function produced',toString(length(winner)), + warning(paste('Objective function produced',toString(length(winner)), 'identical groups. Will choose group at random.',sep=' ')) - } winner <- sample(winner,1) } } diff --git a/R/findWinner.R b/R/findWinner.R index 6d35923..75fe819 100644 --- a/R/findWinner.R +++ b/R/findWinner.R @@ -7,12 +7,11 @@ #' @param contrast.fcn See \code{\link{PhyloFactor}} or example functions \code{\link{BalanceContrast}}, \code{\link{amalgamate}} #' @param choice string indicating how we choose the winner. Must be either \code{'var'}, \code{'F'}, or \code{'phyca'} #' @param method See \code{\link{PhyloFactor}} -#' @param smallglm Logical - whether or not to use regular \code{glm}. if smallglm=F, will use \code{\link{bigglm}} from the \code{\link{biglm}} package. #' @param frmla Formula for \code{\link{glm}}. See \code{\link{PhyloFactor}} for more details. #' @param xx data frame containing non-ILR (\code{Data}) variables used in \code{frmla} #' @param choice.fcn See \code{\link{PhyloFactor}} #' @param ... optional input arguments to \code{\link{glm}} -findWinner <- function(nset,tree_map,treeList,treetips,contrast.fcn=NULL,choice,method='glm',smallglm=F,frmla=NULL,xx=NULL,choice.fcn=NULL,...){ +findWinner <- function(nset,tree_map,treeList,treetips,contrast.fcn=NULL,choice,method='glm',frmla=NULL,xx=NULL,choice.fcn=NULL,...){ ########### set-up and prime variables ############# @@ -100,11 +99,8 @@ findWinner <- function(nset,tree_map,treeList,treetips,contrast.fcn=NULL,choice, ######################################################### args <- list('data'=dataset,'formula'=frmla,...) ############ Performing Regression ###################### - if(smallglm){ - gg=do.call(stats::glm,args) - } else { - gg=do.call(biglm::bigglm,args) - } + gg=do.call(stats::glm,args) + ######################################################### ############# Update output if objective is larger ####### @@ -149,7 +145,7 @@ findWinner <- function(nset,tree_map,treeList,treetips,contrast.fcn=NULL,choice, ################## modify output glm for default choices ################# - if (choice %in% c('var','F') & !smallglm & method!='max.var'){ #convert bigglm to glm + if (choice %in% c('var','F') & method!='max.var'){ #convert bigglm to glm if (is.null(contrast.fcn)){ Y <- BalanceContrast(output$grp,TransformedData) } else { diff --git a/R/pglm.R b/R/pglm.R index 7a9eb9b..8450439 100644 --- a/R/pglm.R +++ b/R/pglm.R @@ -3,7 +3,6 @@ #' @param y response variable #' @param xx independent variable #' @param frmla Formula for dependence of y on x -#' @param smallglm Logical. See \code{\link{PhyloFactor}} #' @param ... optional input arguments to \code{\link{glm}} #' @return glm object #' @examples @@ -18,9 +17,5 @@ pglm <- function(y,xx,frmla,smallglm=T,...){ names(dataset) <- c('Data',names(xx)) # dataset <- stats::model.frame(frmla,data = dataset) args <- list('data'=dataset,'formula'=frmla,...) - if(smallglm){ - return(do.call(stats::glm,args)) - } else { - return(do.call(biglm::bigglm,args)) - } + return(do.call(stats::glm,args)) } diff --git a/man/PhyloFactor.Rd b/man/PhyloFactor.Rd index e076927..cb59a90 100644 --- a/man/PhyloFactor.Rd +++ b/man/PhyloFactor.Rd @@ -6,11 +6,10 @@ \usage{ PhyloFactor(Data, tree, X = NULL, frmla = Data ~ X, choice = "var", transform.fcn = log, contrast.fcn = NULL, method = "glm", - nfactors = NULL, quiet = T, trust.me = F, small.output = F, - stop.fcn = NULL, stop.early = NULL, KS.Pthreshold = 0.01, - alternative = "greater", ncores = NULL, tolerance = 1e-10, - delta = 0.65, smallglm = T, choice.fcn = NULL, - choice.fcn.dependencies = function() { NULL }, ...) + nfactors = NULL, small.output = F, stop.fcn = NULL, stop.early = NULL, + KS.Pthreshold = 0.01, alternative = "greater", ncores = NULL, + tolerance = 1e-10, delta = 0.65, choice.fcn = NULL, + cluster.depends = "", ...) } \arguments{ \item{Data}{Data matrix whose rows are tip labels of the tree, columns are samples of the same length as X, and whose columns sum to 1} @@ -31,10 +30,6 @@ PhyloFactor(Data, tree, X = NULL, frmla = Data ~ X, choice = "var", \item{nfactors}{Number of clades or factors to produce in phylofactorization. Default, NULL, will iterate phylofactorization until either dim(Data)[1]-1 factors, or until stop.fcn returns T} -\item{quiet}{Logical, default is \code{FALSE}, indicating whether or not to display standard warnings.} - -\item{trust.me}{Logical, default \code{FALSE}, indicating whether or not to trust the input Data to be compositional with no zeros.} - \item{small.output}{Logical, indicating whether or not to trim output. If \code{TRUE}, output may not work with downstream summary and plotting wrappers.} \item{stop.fcn}{Currently, accepts input of 'KS'. Coming soon: input your own function of the environment in phylofactor to determine when to stop.} @@ -51,11 +46,9 @@ PhyloFactor(Data, tree, X = NULL, frmla = Data ~ X, choice = "var", \item{delta}{Numerical value for replacement of zeros. Default is 0.65, so zeros will be replaced with 0.65*min(Data[Data>0])} -\item{smallglm}{Logical allowing use of \code{bigglm} when \code{ncores} is not \code{NULL}. If \code{TRUE}, will use regular \code{glm()} at base of regression. If \code{FALSE}, will use slower but memory-efficient \code{bigglm}. Default is false.} - \item{choice.fcn}{Function for customized choice function. Must take as input the numeric vector of ilr coefficients \code{y}, the input meta-data/independent-variable \code{X}, and a logical \code{PF.output}. If \code{PF.output==F}, the output of \code{choice.fcn} must be a two-member list containing numerics \code{output$objective} and \code{output$stopStatistic}. Phylofactor will choose the edge which maximizes \code{output$objective} and a customzed input \code{stop.fcn} can be used with the \code{output$stopStatistics} to stop phylofactor internally.} -\item{choice.fcn.dependencies}{Function called by cluster to load all dependencies for custom choice.fcn. e.g. \code{choice.fcn.dependencies <- function(){library(bayesm)}}} +\item{cluster.depends}{Character parsed and evaluated by cluster to load all dependencies for custom choice.fcn. e.g. \code{cluster.depends <- 'library(bayesm)'}} \item{...}{optional input arguments for \code{\link{glm}} or, if \code{method=='gam'}, input for \code{nlme::gam}} } @@ -210,7 +203,7 @@ all.equal(PF.M$factors,PF.M.par$factors) ############################# CUSTOMIZED CHOICE FUNCTIONS ################################ #PhyloFactor can also be used for generalized additive models by inputting choice.fcn -#and choice.fcn.dependencies to load required packages onto the cluster +#and cluster.depends to load required packages onto the cluster ### Let's work with some newly simulated data #### set.seed(1.1) @@ -260,14 +253,14 @@ my_gam <- function(y,X,PF.output=FALSE,...){ } } -load.mgcv <- function(){library(mgcv)} -######### For parallelization of customized choice function, we also need to define a function, -######### choice.fcn,dependencies, which loads all dependencies to cluster. -######### The exact call will be clusterEvalQ(cl,choice.fcn.dependencies()) +load.mgcv <- 'library(mgcv)' +######### For parallelization of customized choice function, we may also need to input +######### cluster.depends which loads all dependencies to cluster. +######### The exact call will be clusterEvalQ(cl,eval(parse(text=cluster.depends))) PF.G.par <- PhyloFactor(Data,tree,X,choice.fcn=my_gam,sp=c(1,1), - choice.fcn.dependencies = load.mgcv,nfactors=2,ncores=2) + cluster.depends = load.mgcv,nfactors=2,ncores=2) ######### Or we can use the built-in method='gam' and input e.g. smoothing penalty sp PF.G.par2 <- PhyloFactor(Data,tree,X,method='gam', frmla=Data~s(a)+s(b),sp=c(1,1),nfactors=2,ncores=2) diff --git a/man/PhyloRegression.Rd b/man/PhyloRegression.Rd index 36b99a2..50a88f5 100644 --- a/man/PhyloRegression.Rd +++ b/man/PhyloRegression.Rd @@ -6,8 +6,8 @@ \usage{ PhyloRegression(TransformedData, X, frmla, Grps = NULL, contrast.fcn = NULL, choice, treeList = NULL, cl, totalvar = NULL, ix_cl, treetips = NULL, - grpsizes = NULL, tree_map = NULL, quiet = T, nms = NULL, - smallglm = F, choice.fcn, method = "glm", ...) + grpsizes = NULL, tree_map = NULL, nms = NULL, choice.fcn, + method = "glm", ...) } \arguments{ \item{TransformedData}{Transformed data matrix whose rows are species and columns are samples.} @@ -36,12 +36,8 @@ PhyloRegression(TransformedData, X, frmla, Grps = NULL, contrast.fcn = NULL, \item{tree_map}{Cumulative number of nodes in trees of treeList - allows rapid mapping of nodes in ix_cl to appropriate tree in treeList.} -\item{quiet}{Logical to supress warnings} - \item{nms}{rownames of TransformedData, allowing reliable mapping of rows of data to tree.} -\item{smallglm}{Logical. See \code{\link{PhyloFactor}}} - \item{choice.fcn}{optional customized choice function to choose 'best' edge; see \code{\link{PhyloFactor}}} \item{method}{See \code{\link{PhyloFactor}}} diff --git a/man/findWinner.Rd b/man/findWinner.Rd index ced1552..6fb36de 100644 --- a/man/findWinner.Rd +++ b/man/findWinner.Rd @@ -5,8 +5,7 @@ \title{Internal PhyloRegression function for finding the winning edge.} \usage{ findWinner(nset, tree_map, treeList, treetips, contrast.fcn = NULL, choice, - method = "glm", smallglm = F, frmla = NULL, xx = NULL, - choice.fcn = NULL, ...) + method = "glm", frmla = NULL, xx = NULL, choice.fcn = NULL, ...) } \arguments{ \item{nset}{set of nodes} @@ -23,8 +22,6 @@ findWinner(nset, tree_map, treeList, treetips, contrast.fcn = NULL, choice, \item{method}{See \code{\link{PhyloFactor}}} -\item{smallglm}{Logical - whether or not to use regular \code{glm}. if smallglm=F, will use \code{\link{bigglm}} from the \code{\link{biglm}} package.} - \item{frmla}{Formula for \code{\link{glm}}. See \code{\link{PhyloFactor}} for more details.} \item{xx}{data frame containing non-ILR (\code{Data}) variables used in \code{frmla}} diff --git a/man/getObjective.Rd b/man/getObjective.Rd index 62edc00..ca0fec3 100644 --- a/man/getObjective.Rd +++ b/man/getObjective.Rd @@ -4,33 +4,32 @@ \alias{getObjective} \title{objective function for \code{\link{gpf}}} \usage{ -getObjective(grp, frmla, PartitioningVariables = "", mStableAgg, - expfamily = "gaussian", tree. = tree, Data. = Data, - MetaData. = MetaData, model.fcn. = model.fcn, - objective.fcn. = objective.fcn, ...) +getObjective(grp, tree, Data, frmla, MetaData = NULL, + PartitioningVariables = "", mStableAgg, expfamily = "gaussian", + model.fcn = stats::glm, objective.fcn = pvDeviance, ...) } \arguments{ \item{grp}{list containing two disjoint lists of species, such as thouse output from \code{\link{getGroups}}} +\item{tree}{phylo class object} + +\item{Data}{If \code{mStableAgg==TRUE}, a matrix whose rows are species and columns are samples. Otherwise, a data table whose columns include "Species" and "Sample" and whose key is "Species".} + \item{frmla}{formula for \code{model.fcn}} +\item{MetaData}{meta-data containing variables in formula and the column "Sample". If \code{mStableAgg==F}, this input is not used - all variables must be contained in \code{Data}} + \item{PartitioningVariables}{character vector containing of interest for phylofactorization partitioning.} \item{mStableAgg}{logical. See \code{\link{gpf}}} \item{expfamily}{character string indicating manner of m-stable aggregation for \code{\link{mAggregation}}. Only "binomial" is meaningfully different.} -\item{...}{additional arguments for \code{model.fcn}} - -\item{tree}{phylo class object} - -\item{Data}{If \code{mStableAgg==TRUE}, a matrix whose rows are species and columns are samples. Otherwise, a data table whose columns include "Species" and "Sample" and whose key is "Species".} - -\item{MetaData}{meta-data containing variables in formula and the column "Sample". If \code{mStableAgg==F}, this input is not used - all variables must be contained in \code{Data}} - \item{model.fcn}{model function, such as \code{\link{glm}} or \code{gam}.} \item{objective.fcn}{Objective function taking output from \code{model.fcn} as input. See \code{\link{gpf}}.} + +\item{...}{additional arguments for \code{model.fcn}} } \description{ objective function for \code{\link{gpf}} diff --git a/man/gpf.Rd b/man/gpf.Rd index eb999c1..e0657f4 100644 --- a/man/gpf.Rd +++ b/man/gpf.Rd @@ -7,7 +7,7 @@ gpf(Data, tree, frmla.phylo = NULL, frmla = NULL, PartitioningVariables = NULL, MetaData = NULL, nfactors = NULL, ncores = NULL, model.fcn = stats::glm, objective.fcn = pvDeviance, - algorithm = "mix", alpha = 0.2, cluster.depends = { }, ...) + algorithm = "mix", alpha = 0.2, cluster.depends = "", ...) } \arguments{ \item{Data}{data table containing columns of "Species", and terms in the \code{frmla}. If \code{algorithm=="mStable"}, \code{Data} must also include a column of "Sample" or, alternatively, \code{Data} can be a matrix whose rows are species and columns are samples and \code{MetaData} a data frame of meta-data with rows corresponding to columns of \code{Data} and the terms in \code{frmla} or non-phylo terms in \code{frmla.phylo}.} @@ -34,7 +34,7 @@ gpf(Data, tree, frmla.phylo = NULL, frmla = NULL, \item{alpha}{Numeric between 0 and 1 (strictly greater than 0), indicating the top fraction of edges to use when \code{algorithm=='mix'}. Default is alpha=0.2 selecting top 20 percent of edges.} -\item{cluster.depends}{Expression. Will be evaluated in clusters to prime them - useful for customized \code{model.fcn} and \code{objective.fcn}} +\item{cluster.depends}{Character expression input to \code{eval(parse(text=cluster.depends))}. Evaluated in clusters to prime local environment - useful for customized \code{model.fcn} and \code{objective.fcn}} \item{...}{Additional arguments for \code{model.fcn}, e.g. for default \code{\link{glm}}, can use \code{family=binomial}, \code{weights}, etc. For \code{algorithm!='mStable'}, \code{subset} is not a valid optional argument due to \code{gpf} recursively subsetting based on phylogenetic factors. For \code{algorithm='mStable'}, \code{subset} indexes correspond to the Samples in order of \code{unique(Data$Sample)}} } diff --git a/man/pf.tree.Rd b/man/pf.tree.Rd index e8c0790..7742fcf 100644 --- a/man/pf.tree.Rd +++ b/man/pf.tree.Rd @@ -5,9 +5,10 @@ \title{ggtree-based plotting of phylofactor bins or factors} \usage{ pf.tree(pf, tree = NULL, method = "factors", factors = NULL, - groups = NULL, colors = NULL, GroupList = NULL, bg.color = NA, - bg.alpha = 0.1, alphas = NULL, layout = "circular", rootnode = FALSE, - top.layer = F, top.alpha = 0.1, color.fcn = viridis::viridis, ...) + ignore.tips = TRUE, groups = NULL, colors = NULL, GroupList = NULL, + bg.color = NA, bg.alpha = 0.1, alphas = NULL, layout = "circular", + rootnode = FALSE, top.layer = F, top.alpha = 0.1, + color.fcn = viridis::viridis, ...) } \arguments{ \item{pf}{phylofactor class object} diff --git a/man/pglm.Rd b/man/pglm.Rd index 3510dcd..71f84b8 100644 --- a/man/pglm.Rd +++ b/man/pglm.Rd @@ -13,8 +13,6 @@ pglm(y, xx, frmla, smallglm = T, ...) \item{frmla}{Formula for dependence of y on x} -\item{smallglm}{Logical. See \code{\link{PhyloFactor}}} - \item{...}{optional input arguments to \code{\link{glm}}} } \value{ diff --git a/man/twoSampleFactor.Rd b/man/twoSampleFactor.Rd index 0098625..8e48d66 100644 --- a/man/twoSampleFactor.Rd +++ b/man/twoSampleFactor.Rd @@ -5,7 +5,7 @@ \title{Phylofactorization of vector data using two-sample tests} \usage{ twoSampleFactor(Z, tree, nfactors, method = "contrast", TestFunction = NULL, - ncores = NULL, stopFcn = NULL, cluster.depends = NULL, Metropolis = F, + ncores = NULL, stop.fcn = NULL, cluster.depends = "", Metropolis = F, sampleFcn = NULL, lambda = 1, ...) } \arguments{ @@ -17,11 +17,11 @@ twoSampleFactor(Z, tree, nfactors, method = "contrast", TestFunction = NULL, \item{method}{string indicating two-sample test two use. Can take values of "contrast" (default), "Fisher", "Wilcox", 't.test', or "custom", indicating the two-sample test to be used.} -\item{TestFunction}{optional input customized test function, taking input \code{{grps,tree,Z,p.value,..}} and output objective omega. \code{grps} is a two-element list containing indexes for each group; see \code{\link{getPhyloGroups}}. p.value is a logical: the output from p.value=T should be a P-value and can be input into the \code{stopFcn}.} +\item{TestFunction}{optional input customized test function, taking input \code{{grps,tree,Z,PF.output,..}} and output objective omega. \code{grps} is a two-element list containing indexes for each group; see \code{\link{getPhyloGroups}}. PF.output is a logical: the output from PF.output=T should be a P-value and can be input into the \code{stop.fcn}.} \item{ncores}{number of cores to use for parallelization} -\item{stopFcn}{stop function taking as input the output from \code{TestFunction} when \code{p.value=T} and returning logical where an output of \code{TRUE} will stop phylofactorization. Inputting character string "KS", will use KS-test on the P-values output from \code{TestFunction}.} +\item{stop.fcn}{stop function taking as input the output from \code{TestFunction} when \code{PF.output=T} and returning logical where an output of \code{TRUE} will stop phylofactorization. Inputting character string "KS", will use KS-test on the P-values output from \code{TestFunction}.} \item{cluster.depends}{expression loading dependencies for \code{TestFunction} onto cluster.}