From e6837dae3ab410b965d37b6445018523b697fdcf Mon Sep 17 00:00:00 2001
From: Alex Washburne <bigalculus@gmail.com>
Date: Sat, 7 Jul 2018 10:28:08 -0600
Subject: [PATCH] phylofactorization

---
 DESCRIPTION            |  1 -
 R/PhyloFactor.R        | 53 ++++++++++++------------------------------
 R/PhyloRegression.R    | 21 +++++------------
 R/findWinner.R         | 12 ++++------
 R/pglm.R               |  7 +-----
 man/PhyloFactor.Rd     | 29 +++++++++--------------
 man/PhyloRegression.Rd |  8 ++-----
 man/findWinner.Rd      |  5 +---
 man/getObjective.Rd    | 23 +++++++++---------
 man/gpf.Rd             |  4 ++--
 man/pf.tree.Rd         |  7 +++---
 man/pglm.Rd            |  2 --
 man/twoSampleFactor.Rd |  6 ++---
 13 files changed, 60 insertions(+), 118 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 2d875e4..8f16677 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -22,7 +22,6 @@ Imports:
   grDevices(>= 3.2.2),
   caper (>= 0.5.2),
   Biostrings (>= 2.38.4),
-  biglm (>= 0.9.1),
   mgcv (>= 1.8-16),
   ggplot2 (>= 2.2.1),
   ggtree (>= 1.8.2),
diff --git a/R/PhyloFactor.R b/R/PhyloFactor.R
index 8c26253..170e60b 100644
--- a/R/PhyloFactor.R
+++ b/R/PhyloFactor.R
@@ -9,8 +9,6 @@
 #' @param contrast.fcn Contrast function. Default is an efficient version of \code{BalanceContrast}. Another built-in option is \code{\link{amalgamate}} - for amalgamation-based analyses of compositional data, set \code{transform.fcn=I} and \code{contrast.fcn=amalgamate}.
 #' @param method Which default objective function to use either "glm", "max.var" or "gam".
 #' @param nfactors Number of clades or factors to produce in phylofactorization. Default, NULL, will iterate phylofactorization until either dim(Data)[1]-1 factors, or until stop.fcn returns T
-#' @param quiet Logical, default is \code{FALSE}, indicating whether or not to display standard warnings. 
-#' @param trust.me Logical, default \code{FALSE}, indicating whether or not to trust the input Data to be compositional with no zeros.
 #' @param small.output Logical, indicating whether or not to trim output. If \code{TRUE}, output may not work with downstream summary and plotting wrappers.
 #' @param stop.fcn Currently, accepts input of 'KS'. Coming soon: input your own function of the environment in phylofactor to determine when to stop.
 #' @param stop.early Logical indicating if stop.fcn should be evaluated before (stop.early=T) or after (stop.early=F) choosing an edge maximizing the objective function.
@@ -19,7 +17,6 @@
 #' @param ncores Number of cores for built-in parallelization of phylofactorization. Parallelizes the extraction of groups, amalgamation of data based on groups, regression, and calculation of objective function. Be warned - this can lead to R taking over a system's memory.
 #' @param tolerance Tolerance for deviation of column sums of data from 1. if abs(colSums(Data)-1)>tolerance, a warning message will be displayed.
 #' @param delta Numerical value for replacement of zeros. Default is 0.65, so zeros will be replaced with 0.65*min(Data[Data>0])
-#' @param smallglm Logical allowing use of \code{bigglm} when \code{ncores} is not \code{NULL}. If \code{TRUE}, will use regular \code{glm()} at base of regression. If \code{FALSE}, will use slower but memory-efficient \code{bigglm}. Default is false. 
 #' @param choice.fcn Function for customized choice function. Must take as input the numeric vector of ilr coefficients \code{y}, the input meta-data/independent-variable \code{X}, and a logical \code{PF.output}. If \code{PF.output==F}, the output of \code{choice.fcn} must be a two-member list containing numerics \code{output$objective} and \code{output$stopStatistic}. Phylofactor will choose the edge which maximizes \code{output$objective} and a customzed input \code{stop.fcn} can be used with the \code{output$stopStatistics} to stop phylofactor internally. 
 #' @param cluster.depends Character parsed and evaluated by cluster to load all dependencies for custom choice.fcn. e.g. \code{cluster.depends <- 'library(bayesm)'}
 #' @param ... optional input arguments for \code{\link{glm}} or, if \code{method=='gam'}, input for \code{nlme::gam}
@@ -315,7 +312,7 @@
 #' #The optimal environment for this simulated organism is mu=-1
 #' c('sigma'=sigma,'sigma.hat'=sigma.hat) #The standard deviation is ~0.9. 
 
-PhyloFactor <- function(Data,tree,X=NULL,frmla = Data~X,choice='var',transform.fcn=log,contrast.fcn=NULL,method='glm',nfactors=NULL,quiet=T,trust.me=F,small.output=F,stop.fcn=NULL,stop.early=NULL,KS.Pthreshold=0.01,alternative='greater',ncores=NULL,tolerance=1e-10,delta=0.65,smallglm=T,choice.fcn=NULL,cluster.depends='',...){
+PhyloFactor <- function(Data,tree,X=NULL,frmla = Data~X,choice='var',transform.fcn=log,contrast.fcn=NULL,method='glm',nfactors=NULL,small.output=F,stop.fcn=NULL,stop.early=NULL,KS.Pthreshold=0.01,alternative='greater',ncores=NULL,tolerance=1e-10,delta=0.65,choice.fcn=NULL,cluster.depends='',...){
   
   
   ######################################################## Housekeeping #################################################################################
@@ -332,11 +329,9 @@ PhyloFactor <- function(Data,tree,X=NULL,frmla = Data~X,choice='var',transform.f
       }
     }
   }
-  if (all(rownames(Data) %in% tree$tip.label)==F){stop('some rownames of Data are not found in tree')}
-  if (all(tree$tip.label %in% rownames(Data))==F){
-    if(!quiet){
-      warning('some tips in tree are not found in dataset - output PF$tree will contain a trimmed tree')
-    }
+  if (!all(rownames(Data) %in% tree$tip.label)){stop('some rownames of Data are not found in tree')}
+  if (!all(tree$tip.label %in% rownames(Data))){
+    warning('some tips in tree are not found in dataset - output PF$tree will contain a trimmed tree')
     tree <- ape::drop.tip(tree,setdiff(tree$tip.label,rownames(Data)))}
   if (!all(rownames(Data)==tree$tip.label)){
     warning('rows of data are in different order of tree tip-labels - use output$data for downstream analysis, or set Data <- Data[output$tree$tip.label,]')
@@ -385,37 +380,19 @@ PhyloFactor <- function(Data,tree,X=NULL,frmla = Data~X,choice='var',transform.f
   
   ###################### Default treatment of Data #################################
   
-  if (all.equal(transform.fcn,log)==TRUE){
+  if (all.equal(transform.fcn,log)==T){
     if (any(c(Data)<0)){
       stop('For log-transformed data analysis, all entries of Data must be greater than or equal to 0')
     }
-    if (!trust.me){
-      if (any(Data==0)){
-        if (delta==0.65){
-          if (!quiet){
-            warning('Data has zeros and will receive default modification of zeros. Zeros will be replaced with delta*min(Data[Data>0]), default delta=0.65')
-          }
-        }
-        rplc <- function(x,delta){
-          x[x==0]=min(x[x>0])*delta
-          return(x)
-        }
-        
-        Data <- apply(Data,MARGIN=2,FUN=rplc,delta=delta)
-        
+    if (any(Data==0)){
+      if (delta==0.65){
+        warning('Data has zeros and will receive default modification of zeros. Zeros will be replaced column wise with delta*min(x[x>0]), default delta=0.65')
+      }
+      rplc <- function(x,delta){
+        x[x==0]=min(x[x>0])*delta
+        return(x)
       }
-      # if (any(abs(colSums(Data)-1)>tolerance)){
-      #   if (!quiet){
-      #     warning('Column Sums of Data are not sufficiently close to 1 - Data will be re-normalized by column sums')
-      #   }
-      #   Data <- t(clo(t(Data)))
-      #   
-      #   if (any(abs(colSums(Data)-1)>tolerance)){
-      #     if (!quiet){
-      #       warning('Attempt to divide Data by column sums did not bring column sums within "tolerance" of 1 - will proceed with factorization, but such numerical instability may affect the accuracy of the results')
-      #     }
-      #   }
-      # }
+      Data <- apply(Data,MARGIN=2,FUN=rplc,delta=delta)
     }
   }
   #####################################################################################
@@ -569,8 +546,8 @@ PhyloFactor <- function(Data,tree,X=NULL,frmla = Data~X,choice='var',transform.f
     }
     
     ############# Perform Regression on all of Groups, and implement choice function ##############
-    # PhyloReg <- PhyloRegression(TransformedData,X,frmla,Grps,contrast.fcn,choice,treeList,cl,totalvar,ix_cl,treetips,grpsizes,tree_map,quiet,nms,smallglm,choice.fcn)
-    PhyloReg <- PhyloRegression(TransformedData,X,frmla,Grps,contrast.fcn,choice,treeList,cl,totalvar,ix_cl,treetips,grpsizes,tree_map,quiet,nms,smallglm,choice.fcn=choice.fcn,method,...)
+    # PhyloReg <- PhyloRegression(TransformedData,X,frmla,Grps,contrast.fcn,choice,treeList,cl,totalvar,ix_cl,treetips,grpsizes,tree_map,nms,choice.fcn)
+    PhyloReg <- PhyloRegression(TransformedData,X,frmla,Grps,contrast.fcn,choice,treeList,cl,totalvar,ix_cl,treetips,grpsizes,tree_map,nms,choice.fcn=choice.fcn,method,...)
     ############################## EARLY STOP #####################################
     ###############################################################################
     
diff --git a/R/PhyloRegression.R b/R/PhyloRegression.R
index 660a49d..7933197 100644
--- a/R/PhyloRegression.R
+++ b/R/PhyloRegression.R
@@ -13,13 +13,11 @@
 #' @param treetips Number of tips in treeList for quickly identifying whether nodes correspond to a root
 #' @param grpsizes Number of nodes in each tree of treeList
 #' @param tree_map Cumulative number of nodes in trees of treeList - allows rapid mapping of nodes in ix_cl to appropriate tree in treeList.
-#' @param quiet Logical to supress warnings
 #' @param nms rownames of TransformedData, allowing reliable mapping of rows of data to tree.
-#' @param smallglm Logical. See \code{\link{PhyloFactor}}
 #' @param choice.fcn optional customized choice function to choose 'best' edge; see \code{\link{PhyloFactor}}
 #' @param method See \code{\link{PhyloFactor}}
 #' @param ... optional input arguments for \code{\link{glm}}
-PhyloRegression <- function(TransformedData,X,frmla,Grps=NULL,contrast.fcn=NULL,choice,treeList=NULL,cl,totalvar=NULL,ix_cl,treetips=NULL,grpsizes=NULL,tree_map=NULL,quiet=T,nms=NULL,smallglm=F,choice.fcn,method='glm',...){
+PhyloRegression <- function(TransformedData,X,frmla,Grps=NULL,contrast.fcn=NULL,choice,treeList=NULL,cl,totalvar=NULL,ix_cl,treetips=NULL,grpsizes=NULL,tree_map=NULL,nms=NULL,choice.fcn,method='glm',...){
    #cl - optional phyloCluster input for parallelization of regression across multiple groups.
   D <- dim(TransformedData)[1]
   xx=X
@@ -36,9 +34,8 @@ PhyloRegression <- function(TransformedData,X,frmla,Grps=NULL,contrast.fcn=NULL,
     }
     if (choice != 'custom'){
     ################ DEFAULT choice.fcn #########################
-      # GLMs <- apply(Y,1,FUN = phylofactor::pglm,xx=X,frmla=frmla,smallglm=T)
       if (method != 'max.var'){
-        GLMs <- apply(Y,1,FUN = pglm,xx=X,frmla=frmla,smallglm=T,...)
+        GLMs <- apply(Y,1,FUN = pglm,xx=X,frmla=frmla,...)
         stats <- matrix(sapply(GLMs,FUN=phylofactor::getStats),ncol=3,byrow=T) 
         #contains Pvalues, F statistics, and explained var
         rownames(stats) <- 1:ngrps
@@ -80,12 +77,8 @@ PhyloRegression <- function(TransformedData,X,frmla,Grps=NULL,contrast.fcn=NULL,
       winner <- sample(winner,1)
     }
   } else {  ##### PARALLEL #####
-    Winners=parallel::clusterApply(cl,x=ix_cl,fun= function(x,tree_map,treeList,treetips,contrast.fcn,choice,method,smallglm,frmla,xx,choice.fcn,...) findWinner(x,tree_map=tree_map,treeList=treeList,treetips=treetips,contrast.fcn=contrast.fcn,choice=choice,method=method,smallglm=smallglm,frmla=frmla,xx=xx,choice.fcn=choice.fcn,...) ,tree_map=tree_map,treeList=treeList,treetips=treetips,contrast.fcn=contrast.fcn,choice=choice,method=method,smallglm=smallglm,frmla=frmla,xx=xx,choice.fcn=choice.fcn,...)
-    # Winners=parallel::clusterApply(cl,x=ix_cl,fun= function(x,tree_map,treeList,treetips,contrast.fcn,choice,smallglm,frmla,xx,choice.fcn) findWinner(x,tree_map=tree_map,treeList=treeList,treetips=treetips,contrast.fcn=contrast.fcn,choice=choice,smallglm=smallglm,frmla=frmla,xx=xx,choice.fcn=choice.fcn) ,tree_map=tree_map,treeList=treeList,treetips=treetips,contrast.fcn=contrast.fcn,choice=choice,smallglm=smallglm,frmla=frmla,xx=xx,choice.fcn=choice.fcn)
-    
-    # Winners=lapply(ix_cl,FUN=function(x,tree_map,treeList,treetips,contrast.fcn,choice,smallglm,frmla,xx,choice.fcn) findWinner(nset=x,tree_map=tree_map,treeList=treeList,treetips=treetips,contrast.fcn=contrast.fcn,choice=choice,smallglm=smallglm,frmla=frmla,xx=xx,choice.fcn=choice.fcn) ,tree_map=tree_map,treeList=treeList,treetips=treetips,choice=choice,smallglm=smallglm,frmla=frmla,xx=xx,choice.fcn=choice.fcn)
-    # Recall: output from findWinner is $grp and then our objective function output: $objective, $Fstat, or $ExplainedVar, corresponding to choice='custom','F', and 'var', respectivley.
-    
+    Winners=parallel::clusterApply(cl,x=ix_cl,fun= function(x,tree_map,treeList,treetips,contrast.fcn,choice,method,frmla,xx,choice.fcn,...) findWinner(x,tree_map=tree_map,treeList=treeList,treetips=treetips,contrast.fcn=contrast.fcn,choice=choice,method=method,frmla=frmla,xx=xx,choice.fcn=choice.fcn,...) ,tree_map=tree_map,treeList=treeList,treetips=treetips,contrast.fcn=contrast.fcn,choice=choice,method=method,frmla=frmla,xx=xx,choice.fcn=choice.fcn,...)
+
     grps <- lapply(Winners,getElement,'grp')
     Y <- lapply(grps,BalanceContrast,TransformedData=TransformedData)
     
@@ -93,7 +86,7 @@ PhyloRegression <- function(TransformedData,X,frmla,Grps=NULL,contrast.fcn=NULL,
     ####################################### DEFAULT REGRESSIONS #####################
     if (choice != 'custom'){
       if (method != 'max.var'){
-        gg <- lapply(Y,FUN = pglm,xx=X,frmla=frmla,smallglm=T,...)
+        gg <- lapply(Y,FUN = pglm,xx=X,frmla=frmla,...)
         stats <- lapply(gg,getStats)
         if (choice=='var'){
           objective <- sapply(stats,function(x) x['ExplainedVar'])
@@ -113,10 +106,8 @@ PhyloRegression <- function(TransformedData,X,frmla,Grps=NULL,contrast.fcn=NULL,
     
     winner=which(objective==max(objective))
     if (length(winner)>1){
-      if (!quiet){
-        warning(paste('Objective function produced',toString(length(winner)),
+      warning(paste('Objective function produced',toString(length(winner)),
                       'identical groups. Will choose group at random.',sep=' '))
-      }
       winner <- sample(winner,1)
     }
   }
diff --git a/R/findWinner.R b/R/findWinner.R
index 6d35923..75fe819 100644
--- a/R/findWinner.R
+++ b/R/findWinner.R
@@ -7,12 +7,11 @@
 #' @param contrast.fcn See \code{\link{PhyloFactor}} or example functions \code{\link{BalanceContrast}}, \code{\link{amalgamate}}
 #' @param choice string indicating how we choose the winner. Must be either \code{'var'}, \code{'F'}, or \code{'phyca'}
 #' @param method See \code{\link{PhyloFactor}}
-#' @param smallglm Logical - whether or not to use regular \code{glm}. if smallglm=F, will use \code{\link{bigglm}} from the \code{\link{biglm}} package.
 #' @param frmla Formula for \code{\link{glm}}. See \code{\link{PhyloFactor}} for more details.
 #' @param xx data frame containing non-ILR (\code{Data}) variables used in \code{frmla}
 #' @param choice.fcn See \code{\link{PhyloFactor}}
 #' @param ... optional input arguments to \code{\link{glm}}
-findWinner <- function(nset,tree_map,treeList,treetips,contrast.fcn=NULL,choice,method='glm',smallglm=F,frmla=NULL,xx=NULL,choice.fcn=NULL,...){
+findWinner <- function(nset,tree_map,treeList,treetips,contrast.fcn=NULL,choice,method='glm',frmla=NULL,xx=NULL,choice.fcn=NULL,...){
   
   
   ########### set-up and prime variables #############
@@ -100,11 +99,8 @@ findWinner <- function(nset,tree_map,treeList,treetips,contrast.fcn=NULL,choice,
         #########################################################
         args <- list('data'=dataset,'formula'=frmla,...)
         ############ Performing Regression ######################
-        if(smallglm){
-          gg=do.call(stats::glm,args)
-        } else {
-          gg=do.call(biglm::bigglm,args)
-        }
+        gg=do.call(stats::glm,args)
+       
         #########################################################
         
         ############# Update output if objective is larger #######
@@ -149,7 +145,7 @@ findWinner <- function(nset,tree_map,treeList,treetips,contrast.fcn=NULL,choice,
   
   
   ################## modify output glm for default choices #################
-  if (choice %in% c('var','F') & !smallglm & method!='max.var'){ #convert bigglm to glm
+  if (choice %in% c('var','F') & method!='max.var'){ #convert bigglm to glm
     if (is.null(contrast.fcn)){
       Y <- BalanceContrast(output$grp,TransformedData)
     } else {
diff --git a/R/pglm.R b/R/pglm.R
index 7a9eb9b..8450439 100644
--- a/R/pglm.R
+++ b/R/pglm.R
@@ -3,7 +3,6 @@
 #' @param y response variable
 #' @param xx independent variable
 #' @param frmla Formula for dependence of y on x
-#' @param smallglm Logical. See \code{\link{PhyloFactor}}
 #' @param ... optional input arguments to \code{\link{glm}}
 #' @return glm object
 #' @examples
@@ -18,9 +17,5 @@ pglm <- function(y,xx,frmla,smallglm=T,...){
   names(dataset) <- c('Data',names(xx))
   # dataset <- stats::model.frame(frmla,data = dataset)
   args <- list('data'=dataset,'formula'=frmla,...)
-  if(smallglm){
-    return(do.call(stats::glm,args))
-  } else {
-    return(do.call(biglm::bigglm,args))
-  }
+  return(do.call(stats::glm,args))
 }
diff --git a/man/PhyloFactor.Rd b/man/PhyloFactor.Rd
index e076927..cb59a90 100644
--- a/man/PhyloFactor.Rd
+++ b/man/PhyloFactor.Rd
@@ -6,11 +6,10 @@
 \usage{
 PhyloFactor(Data, tree, X = NULL, frmla = Data ~ X, choice = "var",
   transform.fcn = log, contrast.fcn = NULL, method = "glm",
-  nfactors = NULL, quiet = T, trust.me = F, small.output = F,
-  stop.fcn = NULL, stop.early = NULL, KS.Pthreshold = 0.01,
-  alternative = "greater", ncores = NULL, tolerance = 1e-10,
-  delta = 0.65, smallglm = T, choice.fcn = NULL,
-  choice.fcn.dependencies = function() {     NULL }, ...)
+  nfactors = NULL, small.output = F, stop.fcn = NULL, stop.early = NULL,
+  KS.Pthreshold = 0.01, alternative = "greater", ncores = NULL,
+  tolerance = 1e-10, delta = 0.65, choice.fcn = NULL,
+  cluster.depends = "", ...)
 }
 \arguments{
 \item{Data}{Data matrix whose rows are tip labels of the tree, columns are samples of the same length as X, and whose columns sum to 1}
@@ -31,10 +30,6 @@ PhyloFactor(Data, tree, X = NULL, frmla = Data ~ X, choice = "var",
 
 \item{nfactors}{Number of clades or factors to produce in phylofactorization. Default, NULL, will iterate phylofactorization until either dim(Data)[1]-1 factors, or until stop.fcn returns T}
 
-\item{quiet}{Logical, default is \code{FALSE}, indicating whether or not to display standard warnings.}
-
-\item{trust.me}{Logical, default \code{FALSE}, indicating whether or not to trust the input Data to be compositional with no zeros.}
-
 \item{small.output}{Logical, indicating whether or not to trim output. If \code{TRUE}, output may not work with downstream summary and plotting wrappers.}
 
 \item{stop.fcn}{Currently, accepts input of 'KS'. Coming soon: input your own function of the environment in phylofactor to determine when to stop.}
@@ -51,11 +46,9 @@ PhyloFactor(Data, tree, X = NULL, frmla = Data ~ X, choice = "var",
 
 \item{delta}{Numerical value for replacement of zeros. Default is 0.65, so zeros will be replaced with 0.65*min(Data[Data>0])}
 
-\item{smallglm}{Logical allowing use of \code{bigglm} when \code{ncores} is not \code{NULL}. If \code{TRUE}, will use regular \code{glm()} at base of regression. If \code{FALSE}, will use slower but memory-efficient \code{bigglm}. Default is false.}
-
 \item{choice.fcn}{Function for customized choice function. Must take as input the numeric vector of ilr coefficients \code{y}, the input meta-data/independent-variable \code{X}, and a logical \code{PF.output}. If \code{PF.output==F}, the output of \code{choice.fcn} must be a two-member list containing numerics \code{output$objective} and \code{output$stopStatistic}. Phylofactor will choose the edge which maximizes \code{output$objective} and a customzed input \code{stop.fcn} can be used with the \code{output$stopStatistics} to stop phylofactor internally.}
 
-\item{choice.fcn.dependencies}{Function called by cluster to load all dependencies for custom choice.fcn. e.g. \code{choice.fcn.dependencies <- function(){library(bayesm)}}}
+\item{cluster.depends}{Character parsed and evaluated by cluster to load all dependencies for custom choice.fcn. e.g. \code{cluster.depends <- 'library(bayesm)'}}
 
 \item{...}{optional input arguments for \code{\link{glm}} or, if \code{method=='gam'}, input for \code{nlme::gam}}
 }
@@ -210,7 +203,7 @@ all.equal(PF.M$factors,PF.M.par$factors)
 
 ############################# CUSTOMIZED CHOICE FUNCTIONS ################################
 #PhyloFactor can also be used for generalized additive models by inputting choice.fcn 
-#and choice.fcn.dependencies to load required packages onto the cluster
+#and cluster.depends to load required packages onto the cluster
 
 ### Let's work with some newly simulated data ####
 set.seed(1.1)
@@ -260,14 +253,14 @@ my_gam <- function(y,X,PF.output=FALSE,...){
   }
 }
 
-load.mgcv <- function(){library(mgcv)}
-######### For parallelization of customized choice function, we also need to define a function, 
-######### choice.fcn,dependencies, which loads all dependencies to cluster.
-######### The exact call will be clusterEvalQ(cl,choice.fcn.dependencies())
+load.mgcv <- 'library(mgcv)'
+######### For parallelization of customized choice function, we may also need to input 
+######### cluster.depends which loads all dependencies to cluster.
+######### The exact call will be clusterEvalQ(cl,eval(parse(text=cluster.depends)))
 
 
 PF.G.par <- PhyloFactor(Data,tree,X,choice.fcn=my_gam,sp=c(1,1),
-           choice.fcn.dependencies = load.mgcv,nfactors=2,ncores=2)
+           cluster.depends = load.mgcv,nfactors=2,ncores=2)
 ######### Or we can use the built-in method='gam' and input e.g. smoothing penalty sp
 PF.G.par2 <- PhyloFactor(Data,tree,X,method='gam',
               frmla=Data~s(a)+s(b),sp=c(1,1),nfactors=2,ncores=2)
diff --git a/man/PhyloRegression.Rd b/man/PhyloRegression.Rd
index 36b99a2..50a88f5 100644
--- a/man/PhyloRegression.Rd
+++ b/man/PhyloRegression.Rd
@@ -6,8 +6,8 @@
 \usage{
 PhyloRegression(TransformedData, X, frmla, Grps = NULL, contrast.fcn = NULL,
   choice, treeList = NULL, cl, totalvar = NULL, ix_cl, treetips = NULL,
-  grpsizes = NULL, tree_map = NULL, quiet = T, nms = NULL,
-  smallglm = F, choice.fcn, method = "glm", ...)
+  grpsizes = NULL, tree_map = NULL, nms = NULL, choice.fcn,
+  method = "glm", ...)
 }
 \arguments{
 \item{TransformedData}{Transformed data matrix whose rows are species and columns are samples.}
@@ -36,12 +36,8 @@ PhyloRegression(TransformedData, X, frmla, Grps = NULL, contrast.fcn = NULL,
 
 \item{tree_map}{Cumulative number of nodes in trees of treeList - allows rapid mapping of nodes in ix_cl to appropriate tree in treeList.}
 
-\item{quiet}{Logical to supress warnings}
-
 \item{nms}{rownames of TransformedData, allowing reliable mapping of rows of data to tree.}
 
-\item{smallglm}{Logical. See \code{\link{PhyloFactor}}}
-
 \item{choice.fcn}{optional customized choice function to choose 'best' edge; see \code{\link{PhyloFactor}}}
 
 \item{method}{See \code{\link{PhyloFactor}}}
diff --git a/man/findWinner.Rd b/man/findWinner.Rd
index ced1552..6fb36de 100644
--- a/man/findWinner.Rd
+++ b/man/findWinner.Rd
@@ -5,8 +5,7 @@
 \title{Internal PhyloRegression function for finding the winning edge.}
 \usage{
 findWinner(nset, tree_map, treeList, treetips, contrast.fcn = NULL, choice,
-  method = "glm", smallglm = F, frmla = NULL, xx = NULL,
-  choice.fcn = NULL, ...)
+  method = "glm", frmla = NULL, xx = NULL, choice.fcn = NULL, ...)
 }
 \arguments{
 \item{nset}{set of nodes}
@@ -23,8 +22,6 @@ findWinner(nset, tree_map, treeList, treetips, contrast.fcn = NULL, choice,
 
 \item{method}{See \code{\link{PhyloFactor}}}
 
-\item{smallglm}{Logical - whether or not to use regular \code{glm}. if smallglm=F, will use \code{\link{bigglm}} from the \code{\link{biglm}} package.}
-
 \item{frmla}{Formula for \code{\link{glm}}. See \code{\link{PhyloFactor}} for more details.}
 
 \item{xx}{data frame containing non-ILR (\code{Data}) variables used in \code{frmla}}
diff --git a/man/getObjective.Rd b/man/getObjective.Rd
index 62edc00..ca0fec3 100644
--- a/man/getObjective.Rd
+++ b/man/getObjective.Rd
@@ -4,33 +4,32 @@
 \alias{getObjective}
 \title{objective function for \code{\link{gpf}}}
 \usage{
-getObjective(grp, frmla, PartitioningVariables = "", mStableAgg,
-  expfamily = "gaussian", tree. = tree, Data. = Data,
-  MetaData. = MetaData, model.fcn. = model.fcn,
-  objective.fcn. = objective.fcn, ...)
+getObjective(grp, tree, Data, frmla, MetaData = NULL,
+  PartitioningVariables = "", mStableAgg, expfamily = "gaussian",
+  model.fcn = stats::glm, objective.fcn = pvDeviance, ...)
 }
 \arguments{
 \item{grp}{list containing two disjoint lists of species, such as thouse output from \code{\link{getGroups}}}
 
+\item{tree}{phylo class object}
+
+\item{Data}{If \code{mStableAgg==TRUE}, a matrix whose rows are species and columns are samples. Otherwise, a data table whose columns include "Species" and "Sample" and whose key is "Species".}
+
 \item{frmla}{formula for \code{model.fcn}}
 
+\item{MetaData}{meta-data containing variables in formula and the column "Sample". If \code{mStableAgg==F}, this input is not used - all variables must be contained in \code{Data}}
+
 \item{PartitioningVariables}{character vector containing of interest for phylofactorization partitioning.}
 
 \item{mStableAgg}{logical. See \code{\link{gpf}}}
 
 \item{expfamily}{character string indicating manner of m-stable aggregation for \code{\link{mAggregation}}. Only "binomial" is meaningfully different.}
 
-\item{...}{additional arguments for \code{model.fcn}}
-
-\item{tree}{phylo class object}
-
-\item{Data}{If \code{mStableAgg==TRUE}, a matrix whose rows are species and columns are samples. Otherwise, a data table whose columns include "Species" and "Sample" and whose key is "Species".}
-
-\item{MetaData}{meta-data containing variables in formula and the column "Sample". If \code{mStableAgg==F}, this input is not used - all variables must be contained in \code{Data}}
-
 \item{model.fcn}{model function, such as \code{\link{glm}} or \code{gam}.}
 
 \item{objective.fcn}{Objective function taking output from \code{model.fcn} as input. See \code{\link{gpf}}.}
+
+\item{...}{additional arguments for \code{model.fcn}}
 }
 \description{
 objective function for \code{\link{gpf}}
diff --git a/man/gpf.Rd b/man/gpf.Rd
index eb999c1..e0657f4 100644
--- a/man/gpf.Rd
+++ b/man/gpf.Rd
@@ -7,7 +7,7 @@
 gpf(Data, tree, frmla.phylo = NULL, frmla = NULL,
   PartitioningVariables = NULL, MetaData = NULL, nfactors = NULL,
   ncores = NULL, model.fcn = stats::glm, objective.fcn = pvDeviance,
-  algorithm = "mix", alpha = 0.2, cluster.depends = { }, ...)
+  algorithm = "mix", alpha = 0.2, cluster.depends = "", ...)
 }
 \arguments{
 \item{Data}{data table containing columns of "Species", and terms in the \code{frmla}. If \code{algorithm=="mStable"}, \code{Data} must also include a column of "Sample" or, alternatively, \code{Data} can be a matrix whose rows are species and columns are samples and \code{MetaData} a data frame of meta-data with rows corresponding to columns of \code{Data} and the terms in \code{frmla} or non-phylo terms in \code{frmla.phylo}.}
@@ -34,7 +34,7 @@ gpf(Data, tree, frmla.phylo = NULL, frmla = NULL,
 
 \item{alpha}{Numeric between 0 and 1 (strictly greater than 0), indicating the top fraction of edges to use when \code{algorithm=='mix'}. Default is alpha=0.2 selecting top 20 percent of edges.}
 
-\item{cluster.depends}{Expression. Will be evaluated in clusters to prime them - useful for customized \code{model.fcn} and \code{objective.fcn}}
+\item{cluster.depends}{Character expression input to \code{eval(parse(text=cluster.depends))}. Evaluated in clusters to prime local environment - useful for customized \code{model.fcn} and \code{objective.fcn}}
 
 \item{...}{Additional arguments for \code{model.fcn}, e.g. for default \code{\link{glm}}, can use \code{family=binomial}, \code{weights}, etc. For \code{algorithm!='mStable'}, \code{subset} is not a valid optional argument due to \code{gpf} recursively subsetting based on phylogenetic factors. For \code{algorithm='mStable'}, \code{subset} indexes correspond to the Samples in order of \code{unique(Data$Sample)}}
 }
diff --git a/man/pf.tree.Rd b/man/pf.tree.Rd
index e8c0790..7742fcf 100644
--- a/man/pf.tree.Rd
+++ b/man/pf.tree.Rd
@@ -5,9 +5,10 @@
 \title{ggtree-based plotting of phylofactor bins or factors}
 \usage{
 pf.tree(pf, tree = NULL, method = "factors", factors = NULL,
-  groups = NULL, colors = NULL, GroupList = NULL, bg.color = NA,
-  bg.alpha = 0.1, alphas = NULL, layout = "circular", rootnode = FALSE,
-  top.layer = F, top.alpha = 0.1, color.fcn = viridis::viridis, ...)
+  ignore.tips = TRUE, groups = NULL, colors = NULL, GroupList = NULL,
+  bg.color = NA, bg.alpha = 0.1, alphas = NULL, layout = "circular",
+  rootnode = FALSE, top.layer = F, top.alpha = 0.1,
+  color.fcn = viridis::viridis, ...)
 }
 \arguments{
 \item{pf}{phylofactor class object}
diff --git a/man/pglm.Rd b/man/pglm.Rd
index 3510dcd..71f84b8 100644
--- a/man/pglm.Rd
+++ b/man/pglm.Rd
@@ -13,8 +13,6 @@ pglm(y, xx, frmla, smallglm = T, ...)
 
 \item{frmla}{Formula for dependence of y on x}
 
-\item{smallglm}{Logical. See \code{\link{PhyloFactor}}}
-
 \item{...}{optional input arguments to \code{\link{glm}}}
 }
 \value{
diff --git a/man/twoSampleFactor.Rd b/man/twoSampleFactor.Rd
index 0098625..8e48d66 100644
--- a/man/twoSampleFactor.Rd
+++ b/man/twoSampleFactor.Rd
@@ -5,7 +5,7 @@
 \title{Phylofactorization of vector data using two-sample tests}
 \usage{
 twoSampleFactor(Z, tree, nfactors, method = "contrast", TestFunction = NULL,
-  ncores = NULL, stopFcn = NULL, cluster.depends = NULL, Metropolis = F,
+  ncores = NULL, stop.fcn = NULL, cluster.depends = "", Metropolis = F,
   sampleFcn = NULL, lambda = 1, ...)
 }
 \arguments{
@@ -17,11 +17,11 @@ twoSampleFactor(Z, tree, nfactors, method = "contrast", TestFunction = NULL,
 
 \item{method}{string indicating two-sample test two use. Can take values of "contrast" (default), "Fisher", "Wilcox", 't.test', or "custom", indicating the two-sample test to be used.}
 
-\item{TestFunction}{optional input customized test function, taking input \code{{grps,tree,Z,p.value,..}} and output objective omega. \code{grps} is a two-element list containing indexes for each group; see \code{\link{getPhyloGroups}}. p.value is a logical: the output from p.value=T should be a P-value and can be input into the \code{stopFcn}.}
+\item{TestFunction}{optional input customized test function, taking input \code{{grps,tree,Z,PF.output,..}} and output objective omega. \code{grps} is a two-element list containing indexes for each group; see \code{\link{getPhyloGroups}}. PF.output is a logical: the output from PF.output=T should be a P-value and can be input into the \code{stop.fcn}.}
 
 \item{ncores}{number of cores to use for parallelization}
 
-\item{stopFcn}{stop function taking as input the output from \code{TestFunction} when \code{p.value=T} and returning logical where an output of \code{TRUE} will stop phylofactorization. Inputting character string "KS", will use KS-test on the P-values output from \code{TestFunction}.}
+\item{stop.fcn}{stop function taking as input the output from \code{TestFunction} when \code{PF.output=T} and returning logical where an output of \code{TRUE} will stop phylofactorization. Inputting character string "KS", will use KS-test on the P-values output from \code{TestFunction}.}
 
 \item{cluster.depends}{expression loading dependencies for \code{TestFunction} onto cluster.}