Skip to content

Commit

Permalink
Merge pull request #150 from nextstrain/translate
Browse files Browse the repository at this point in the history
Reduce/Clarify arguments in translate
  • Loading branch information
rneher committed Jul 5, 2018
2 parents dcff2b8 + 9fe28f8 commit a908e85
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 29 deletions.
38 changes: 22 additions & 16 deletions augur/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,16 +224,16 @@ def run(args):

## check file format and read in sequences
is_vcf = False
if args.vcf_input:
if any([args.ancestral_sequences.lower().endswith(x) for x in ['.vcf', '.vcf.gz']]):
if not args.vcf_reference:
print("ERROR: a reference Fasta is required with VCF-format input")
return -1
compress_seq = read_vcf(args.vcf_input, args.vcf_reference)
compress_seq = read_vcf(args.ancestral_sequences, args.vcf_reference)
sequences = compress_seq['sequences']
ref = compress_seq['reference']
is_vcf = True
else:
node_data = read_node_data(args.node_data, args.tree)
node_data = read_node_data(args.ancestral_sequences, args.tree)
if node_data is None:
print("ERROR: could not read node data (incl sequences)")
return -1
Expand Down Expand Up @@ -300,16 +300,22 @@ def run(args):
print("amino acid mutations written to",args.output, file=sys.stdout)

## write alignments to file is requested
if args.alignment_output and '%GENE' in args.alignment_output:
for fname, seqs in translations.items():
SeqIO.write([SeqRecord.SeqRecord(seq=Seq.Seq(s), id=sname, name=sname, description='')
for sname, s in seqs.items()],
args.alignment_output.replace('%GENE', fname), 'fasta')

## write VCF-style output if requested
if args.vcf_output:
fileEndings = -1
if args.vcf_output.lower().endswith('.gz'):
fileEndings = -2
vcf_out_ref = '.'.join(args.vcf_output.split('.')[:fileEndings]) + '_reference.fasta'
write_VCF_translation(translations, args.vcf_output, vcf_out_ref)
if args.alignment_output:
if is_vcf:
## write VCF-style output if requested
fileEndings = -1
if args.alignment_output.lower().endswith('.gz'):
fileEndings = -2
vcf_out_ref = '.'.join(args.alignment_output.split('.')[:fileEndings]) + '_reference.fasta'
write_VCF_translation(translations, args.alignment_output, vcf_out_ref)
else:
## write fasta-style output if requested
if '%GENE' in args.alignment_output:
for fname, seqs in translations.items():
SeqIO.write([SeqRecord.SeqRecord(seq=Seq.Seq(s), id=sname, name=sname, description='')
for sname, s in seqs.items()],
args.alignment_output.replace('%GENE', fname), 'fasta')
else:
print("ERROR: alignment output file does not contain '%GENE', so will not be written.")


11 changes: 4 additions & 7 deletions bin/augur
Original file line number Diff line number Diff line change
Expand Up @@ -110,18 +110,15 @@ if __name__=="__main__":
## TRANSLATE.PY
translate_parser = subparsers.add_parser('translate', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
translate_parser.add_argument('--tree', help="prebuilt Newick -- no tree will be built if provided")
translate_parser.add_argument('--node-data', type=str, help='file with additional node data including ancestral sequences')
translate_parser.add_argument('--ancestral-sequences', type=str, help='JSON (fasta input) or VCF (VCF input) containing ancestral and tip sequences')
translate_parser.add_argument('--reference-sequence', required=True,
help='GenBank or GFF file containing the annotation')
translate_parser.add_argument('--genes', nargs='+', help="genes to translate (list or file containing list)")
translate_parser.add_argument('--output', type=str, help="name of JSON files for aa mutations")
translate_parser.add_argument('--alignment-output', type=str, help="write the alignment of each gene to file,"
"specify the file name like so: 'my_alignment_GENE.fasta' where 'GENE'"
"will be replaced by the name of the gene (fasta-input only)")
translate_parser.add_argument('--vcf-input', type=str, help='VCF file with original and ancestral sequences')
translate_parser.add_argument('--alignment-output', type=str, help="write out translated gene alignments. "
"If a VCF-input, a .vcf or .vcf.gz will be output here (depending on file ending). If fasta-input, specify the file name "
"like so: 'my_alignment_%GENE.fasta', where '%GENE' will be replaced by the name of the gene")
translate_parser.add_argument('--vcf-reference', type=str, help='fasta file of the sequence the VCF was mapped to')
translate_parser.add_argument('--vcf-output', type=str, help='write a faux-VCF alignment of the translations to a file.'
'a corresponding fasta reference will be generated')
translate_parser.set_defaults(func=translate.run)

## TRAITS.PY
Expand Down
10 changes: 6 additions & 4 deletions builds/tb/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ rule refine:
"""
augur refine --tree {input.tree} --alignment {input.aln} --metadata {input.metadata} \
--output-tree {output.tree} --output-node-data {output.node_data} --vcf-reference {input.ref} \
--timetree --root {params.root} --coalescent 0.0
--timetree --root {params.root} --coalescent 0.0001
"""

rule ancestral:
Expand Down Expand Up @@ -105,9 +105,11 @@ rule translate:
aa_data = "results/aa_muts.json",
vcf_out = "results/translations.vcf"
shell:
'augur translate --tree {input.tree} --genes {input.genes} --vcf-reference {input.ref}'
' --vcf-input {input.vcf} --output {output.aa_data} --reference-sequence {input.gene_ref}'
' --vcf-output {output.vcf_out}'
"""
augur translate --tree {input.tree} --genes {input.genes} --vcf-reference {input.ref} \
--ancestral-sequences {input.vcf} --output {output.aa_data} --reference-sequence {input.gene_ref} \
--alignment-output {output.vcf_out}
"""

rule traits:
input:
Expand Down
4 changes: 2 additions & 2 deletions builds/zika/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ rule translate:
node_data = "results/aa_muts.json"
shell:
"""
augur translate --tree {input.tree} --node-data {input.node_data} \
--output {output.node_data} --reference-sequence {input.reference}
augur translate --tree {input.tree} --ancestral-sequences {input.node_data} \
--output {output.node_data} --reference-sequence {input.reference}
"""

rule traits:
Expand Down

0 comments on commit a908e85

Please sign in to comment.