Skip to content

Commit

Permalink
IMP: additional aligned sequence transformers (#248)
Browse files Browse the repository at this point in the history
  • Loading branch information
gregcaporaso committed Jun 17, 2020
1 parent 3836562 commit 4490758
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 4 deletions.
30 changes: 26 additions & 4 deletions q2_types/feature_data/_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,13 @@ def _dnafastaformats_to_metadata(ff):
return qiime2.Metadata(df)


def _series_to_fasta_format(ff, data):
with ff.open() as f:
for id_, seq in data.iteritems():
sequence = skbio.DNA(seq, metadata={'id': id_})
skbio.io.write(sequence, format='fasta', into=f)


@plugin.register_transformer
def _9(ff: DNAFASTAFormat) -> DNAIterator:
generator = _read_dna_fasta(str(ff))
Expand Down Expand Up @@ -339,10 +346,7 @@ def _31(ff: DNAFASTAFormat) -> qiime2.Metadata:
@plugin.register_transformer
def _16(data: pd.Series) -> DNAFASTAFormat:
ff = DNAFASTAFormat()
with ff.open() as f:
for id_, seq in data.iteritems():
sequence = skbio.DNA(seq, metadata={'id': id_})
skbio.io.write(sequence, format='fasta', into=f)
_series_to_fasta_format(ff, data)
return ff


Expand All @@ -364,6 +368,24 @@ def _33(ff: AlignedDNAFASTAFormat) -> qiime2.Metadata:
return _dnafastaformats_to_metadata(ff)


@plugin.register_transformer
def _34(ff: AlignedDNAFASTAFormat) -> pd.Series:
return _dnafastaformats_to_series(ff)


@plugin.register_transformer
def _35(data: pd.Series) -> AlignedDNAFASTAFormat:
ff = AlignedDNAFASTAFormat()
_series_to_fasta_format(ff, data)
return ff


@plugin.register_transformer
def _36(fmt: AlignedDNAFASTAFormat) -> DNAIterator:
generator = _read_dna_fasta(str(fmt))
return DNAIterator(generator)


# differential types
@plugin.register_transformer
def _222(ff: DifferentialFormat) -> pd.DataFrame:
Expand Down
49 changes: 49 additions & 0 deletions q2_types/feature_data/tests/test_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,55 @@ def test_aligned_dnafasta_format_to_metadata(self):

self.assertEqual(exp, obs)

def test_aligned_dnafasta_format_to_series(self):
_, obs = self.transform_format(AlignedDNAFASTAFormat, pd.Series,
'aligned-dna-sequences.fasta')

obs = obs.astype(str)

index = pd.Index(['SEQUENCE1', 'SEQUENCE2'])
exp = pd.Series(['------------------------ACGTACGTACGTACGTACGTAC'
'GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT',
'ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC'
'GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT'],
index=index, dtype=object)

assert_series_equal(exp, obs)

def test_series_to_aligned_dnafasta_format(self):
transformer = self.get_transformer(pd.Series, AlignedDNAFASTAFormat)

index = pd.Index(['SEQUENCE1', 'SEQUENCE2'])
input = pd.Series(['------------------------ACGTACGTACGTACGTACGTAC'
'GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT',
'ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC'
'GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT'],
index=index, dtype=object)

obs = transformer(input)

self.assertIsInstance(obs, AlignedDNAFASTAFormat)

obs_lines = list(open(str(obs)))
self.assertEqual(obs_lines[0], '>SEQUENCE1\n')
self.assertEqual(obs_lines[1],
'------------------------ACGTACGTACGTACGTACGTAC'
'GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT\n')
self.assertEqual(obs_lines[2], '>SEQUENCE2\n')
self.assertEqual(obs_lines[3],
'ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC'
'GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT\n')

def test_aligned_dna_fasta_format_to_dna_iterator(self):
input, obs = self.transform_format(
AlignedDNAFASTAFormat, DNAIterator,
filename='aligned-dna-sequences.fasta')

exp = skbio.read(str(input), format='fasta', constructor=skbio.DNA)

for observed, expected in zip(obs, exp):
self.assertEqual(observed, expected)


class TestDifferentialTransformer(TestPluginBase):
package = 'q2_types.feature_data.tests'
Expand Down

0 comments on commit 4490758

Please sign in to comment.