Skip to content

Commit

Permalink
Merge pull request #443 from nextstrain/mugration_weights
Browse files Browse the repository at this point in the history
Mugration weights
  • Loading branch information
trvrb committed Feb 13, 2020
2 parents 7d4a10d + db75000 commit 12c1720
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 6 deletions.
28 changes: 25 additions & 3 deletions augur/traits.py
Expand Up @@ -10,7 +10,7 @@
TINY = 1e-12

def mugration_inference(tree=None, seq_meta=None, field='country', confidence=True,
missing='?', sampling_bias_correction=None):
missing='?', sampling_bias_correction=None, weights=None):
"""
Infer likely ancestral states of a discrete character assuming a time reversible model.
Expand All @@ -26,6 +26,10 @@ def mugration_inference(tree=None, seq_meta=None, field='country', confidence=Tr
calculate confidence values for inferences
missing : str, optional
character that is to be interpreted as missing data, default='?'
sampling_bias_correction : None, optional
factor by which the transition rate is scaled up to counter sampling bias
weights : None, optional
vector of equilibrium frequencies that one expects the far ancestor to be sampled from
Returns
-------
Expand Down Expand Up @@ -63,7 +67,7 @@ def mugration_inference(tree=None, seq_meta=None, field='country', confidence=Tr
elif len(unique_states)<180:
tt, letter_to_state, reverse_alphabet = \
reconstruct_discrete_traits(T, traits, missing_data=missing,
sampling_bias_correction=sampling_bias_correction)
sampling_bias_correction=sampling_bias_correction, weights=weights)
else:
print("ERROR: 180 or more distinct discrete states found. TreeTime is currently not set up to handle that many states.")
sys.exit(1)
Expand Down Expand Up @@ -102,6 +106,7 @@ def register_arguments(parser):
"""
parser.add_argument('--tree', '-t', required=True, help="tree to perform trait reconstruction on")
parser.add_argument('--metadata', required=True, help="tsv/csv table with meta data")
parser.add_argument('--weights', required=False, help="tsv/csv table with equilibrium probabilities of discrete states")
parser.add_argument('--columns', required=True, nargs='+',
help='metadata fields to perform discrete reconstruction on')
parser.add_argument('--confidence',action="store_true",
Expand Down Expand Up @@ -138,13 +143,30 @@ def run(args):
print("*** augur refine --tree %s --output-tree <filename>.nwk"%(tree_fname) )
print("*** And use <filename>.nwk as the tree when running 'ancestral', 'translate', and 'traits'")

if args.weights:
weight_dict = {c:{} for c in args.columns}
sep = ',' if args.weights.endswith('csv') else '\t'
with open(args.weights, 'r') as fh:
for line in fh:
if line[0]=='#':
continue
name, trait, value = line.strip().split(sep)
if name in weight_dict:
weight_dict[name][trait] = float(value)
for c in weight_dict:
if len(weight_dict[c])==0:
weight_dict[c]=None
else:
weight_dict = {c:None for c in args.columns}

mugration_states = defaultdict(dict)
models = defaultdict(dict)
out_prefix = '.'.join(args.tree.split('.')[:-1])
for column in args.columns:
T, gtr, alphabet = mugration_inference(tree=tree_fname, seq_meta=traits,
field=column, confidence=args.confidence,
sampling_bias_correction=args.sampling_bias_correction)
sampling_bias_correction=args.sampling_bias_correction,
weights=weight_dict[column])
if T is None: # something went wrong
continue

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -38,7 +38,7 @@
"jsonschema >=3.0.0, ==3.*",
"packaging >=19.2",
"pandas >=0.20.0, ==0.*",
"phylo-treetime ==0.7.*",
"phylo-treetime >=0.7.2, ==0.7.*",
"snakemake >=5.4.0, ==5.*"
],
extras_require = {
Expand Down
7 changes: 5 additions & 2 deletions tests/builds/zika/Snakefile
Expand Up @@ -135,6 +135,7 @@ rule ancestral:
augur ancestral \
--tree {input.tree} \
--alignment {input.alignment} \
--infer-ambiguous \
--output {output.node_data} \
--inference {params.inference}
"""
Expand Down Expand Up @@ -164,12 +165,14 @@ rule traits:
output:
node_data = "results/traits.json",
params:
columns = "region country",
sampling_bias_correction = 3
columns = "country region",
sampling_bias_correction = 3,
weights = "config/trait_weights.csv"
shell:
"""
augur traits \
--tree {input.tree} \
--weights {params.weights} \
--metadata {input.metadata} \
--output {output.node_data} \
--columns {params.columns} \
Expand Down
22 changes: 22 additions & 0 deletions tests/builds/zika/config/trait_weights.csv
@@ -0,0 +1,22 @@
#name,trait,weight
American Samoa,country, 55
Brazil,country, 2000000
China,country, 10000
Colombia,country, 50000
Cuba,country, 12000
Dominican Republic,country, 10000
Ecuador,country, 16000
French guiana,country, 280
French polynesia,country, 276
Guadeloupe,country, 400
Guatemala,country, 17000
Haiti,country, 10000
Honduras,country, 9000
Martinique,country, 385
Mexico,country, 124000
Panama,country, 400000
Puerto rico,country, 3400
Suriname,country, 558
North America,region,0.5
South America,region,5.0
Oceania,region,0.1

0 comments on commit 12c1720

Please sign in to comment.