Skip to content

Commit

Permalink
ENH: add support for addfragments flag
Browse files Browse the repository at this point in the history
  • Loading branch information
misialq committed Oct 27, 2020
1 parent 1ac4660 commit 5c12aeb
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 10 deletions.
12 changes: 7 additions & 5 deletions q2_alignment/_mafft.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def run_command(cmd, output_fp, verbose=True):
subprocess.run(cmd, stdout=output_f, check=True)


def _mafft(sequences_fp, alignment_fp, n_threads, parttree):
def _mafft(sequences_fp, alignment_fp, n_threads, parttree, fragments):
# Save original sequence IDs since long ids (~250 chars) can be truncated
# by mafft. We'll replace the IDs in the aligned sequences file output by
# mafft with the originals.
Expand Down Expand Up @@ -92,7 +92,8 @@ def _mafft(sequences_fp, alignment_fp, n_threads, parttree):
cmd += ['--parttree']

if alignment_fp is not None:
cmd += ['--add', sequences_fp, alignment_fp]
add_flag = '--addfragments' if fragments else '--add'
cmd += [add_flag, sequences_fp, alignment_fp]
else:
cmd += [sequences_fp]

Expand Down Expand Up @@ -124,13 +125,14 @@ def mafft(sequences: DNAFASTAFormat,
n_threads: int = 1,
parttree: bool = False) -> AlignedDNAFASTAFormat:
sequences_fp = str(sequences)
return _mafft(sequences_fp, None, n_threads, parttree)
return _mafft(sequences_fp, None, n_threads, parttree, False)


def mafft_add(alignment: AlignedDNAFASTAFormat,
sequences: DNAFASTAFormat,
n_threads: int = 1,
parttree: bool = False) -> AlignedDNAFASTAFormat:
parttree: bool = False,
fragments: bool = False) -> AlignedDNAFASTAFormat:
alignment_fp = str(alignment)
sequences_fp = str(sequences)
return _mafft(sequences_fp, alignment_fp, n_threads, parttree)
return _mafft(sequences_fp, alignment_fp, n_threads, parttree, fragments)
7 changes: 5 additions & 2 deletions q2_alignment/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@
inputs={'alignment': FeatureData[AlignedSequence],
'sequences': FeatureData[Sequence]},
parameters={'n_threads': Int % Range(1, None) | Str % Choices(['auto']),
'parttree': Bool},
'parttree': Bool,
'fragments': Bool},
outputs=[('expanded_alignment', FeatureData[AlignedSequence])],
input_descriptions={'alignment': 'The alignment to which '
'sequences should be added.',
Expand All @@ -55,7 +56,9 @@
'n_threads': 'The number of threads. (Use `auto` to automatically use '
'all available cores)',
'parttree': 'This flag is required if the number of sequences being '
'aligned are larger than 1000000. Disabled by default'},
'aligned are larger than 1000000. Disabled by default',
'fragments': 'This flag indicates that alignment optimized for '
'addition of fragmentary sequences should be used.'},
output_descriptions={
'expanded_alignment': 'Alignment containing the provided aligned and '
'unaligned sequences.'},
Expand Down
29 changes: 26 additions & 3 deletions q2_alignment/tests/test_mafft.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# ----------------------------------------------------------------------------
import os
import unittest
from unittest.mock import patch, ANY
import subprocess

import skbio
Expand All @@ -19,7 +20,6 @@


class MafftTests(TestPluginBase):

package = 'q2_alignment.tests'

def _prepare_sequence_data(self):
Expand Down Expand Up @@ -84,7 +84,6 @@ def test_mafft_parttree_exception(self):


class MafftAddTests(TestPluginBase):

package = 'q2_alignment.tests'

def _prepare_sequence_data(self):
Expand Down Expand Up @@ -114,6 +113,31 @@ def test_mafft_add(self):
constructor=skbio.DNA)
self.assertEqual(obs, exp)

def test_mafft_add_fragments(self):
alignment, sequences, exp = self._prepare_sequence_data()

with redirected_stdio(stderr=os.devnull):
result = mafft_add(alignment, sequences, fragments=True)
obs = skbio.io.read(str(result), into=skbio.TabularMSA,
constructor=skbio.DNA)
self.assertEqual(obs, exp)

def test_mafft_add_flags(self):
alignment, sequences, exp = self._prepare_sequence_data()

with patch('q2_alignment._mafft.run_command') as patched_run_cmd:
with patch('q2_alignment._mafft.skbio.TabularMSA.read',
return_value=exp):
_ = mafft_add(alignment, sequences)
patched_run_cmd.assert_called_with(
["mafft", "--preservecase", "--inputorder", "--thread",
"1", "--add", ANY, ANY], ANY)

_ = mafft_add(alignment, sequences, fragments=True)
patched_run_cmd.assert_called_with(
["mafft", "--preservecase", "--inputorder", "--thread",
"1", "--addfragments", ANY, ANY], ANY)

def test_duplicate_input_ids_in_unaligned(self):
input_fp = self.get_data_path('unaligned-duplicate-ids.fasta')
sequences = DNAFASTAFormat(input_fp, mode='r')
Expand Down Expand Up @@ -181,7 +205,6 @@ def test_long_ids_are_not_truncated_aligned(self):


class RunCommandTests(TestPluginBase):

package = 'q2_alignment.tests'

def test_failed_run(self):
Expand Down

0 comments on commit 5c12aeb

Please sign in to comment.