Parameterize size of clades to estimate by diffusion frequency likeli…


Exposes a previously hardcoded parameter for the diffusion tree frequency
estimator that determines how many tips a clade must have to be considered for
frequency estimation by the diffusion likelihood calculation. Clades that are
smaller than the specified threshold inherit their parent's frequencies
proportionally to their contribution to the parent clade.

This commit exposes the `min_clades` parameter through a keyword argument to the
diffusion frequencies class and also through the `augur frequencies` command
with the new `--minimal-clade-size-to-estimate` argument. This commit modifies
the description of the existing `--minimal-clade-size` argument to clarify its
use for filtering output in contrast to the new argument's use for controlling
frequency estimation itself. The previously hardcoded default value of 10 is now
the default for both the frequencies class and the command line argument.
huddlej committed Oct 17, 2019
1 parent abe930b commit ef48dea37c610a66fe72d0134a3371f53ff85fe9
10 additions and 5 deletions.
  augur/
  augur/
@@ -32,8 +32,6 @@ def register_arguments(parser):
help="tree to estimate clade frequencies for")
parser.add_argument("--include-internal-nodes", action="store_true",
help="calculate frequencies for internal nodes as well as tips")
parser.add_argument('--minimal-clade-size', type=int, default=0,
help="minimal size of a clade to have frequencies estimated")

# Alignment-specific arguments
parser.add_argument('--alignments', type=str, nargs='+',
@@ -54,6 +52,12 @@ def register_arguments(parser):
parser.add_argument("--censored", action="store_true", help="calculate censored frequencies at each pivot")

# Diffusion frequency specific arguments
parser.add_argument('--minimal-clade-size', type=int, default=0,
help="minimal number of tips a clade must have for its diffusion frequencies to be reported")
parser.add_argument('--minimal-clade-size-to-estimate', type=int, default=10,
help="""minimal number of tips a clade must have for its diffusion frequencies to be estimated
by the diffusion likelihood; all smaller clades will inherit frequencies from their
parser.add_argument("--stiffness", type=float, default=10.0, help="parameter penalizing curvature of the frequency trajectory")
parser.add_argument("--inertia", type=float, default=0.0, help="determines how frequencies continue "
"in absense of data (inertia=0 -> go flat, inertia=1.0 -> continue current trend)")
@@ -116,7 +120,8 @@ def run(args):
tree_freqs = tree_frequencies(tree, pivots, method='SLSQP',
node_filter = node_filter_func,
ws = max(2, tree.count_terminals()//10),
stiffness = stiffness, inertia=inertia)
stiffness = stiffness, inertia=inertia,


@@ -436,7 +436,7 @@ class that estimates frequencies for nodes in the tree. each internal node is as
will be numbered in preorder. Each node is assumed to have an attribute `attr` with a
key "num_date".
def __init__(self, tree, pivots, node_filter=None, min_clades = 20, verbose=0, pc=1e-4, **kwargs):
def __init__(self, tree, pivots, node_filter=None, min_clades=10, verbose=0, pc=1e-4, **kwargs):
set up the internal tree, the pivots and cutoffs
@@ -458,7 +458,7 @@ def __init__(self, tree, pivots, node_filter=None, min_clades = 20, verbose=0, p
self.tree = tree
self.min_clades = 10 #min_clades
self.min_clades = min_clades
self.pivots = pivots
self.kwargs = kwargs
self.verbose = verbose

