Skip to content

Commit

Permalink
Merge 5cae317 into c9ad329
Browse files Browse the repository at this point in the history
  • Loading branch information
David-Rod committed Aug 6, 2019
2 parents c9ad329 + 5cae317 commit ffa9685
Show file tree
Hide file tree
Showing 11 changed files with 80 additions and 45 deletions.
6 changes: 0 additions & 6 deletions q2_types/feature_data/_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,6 @@ def sniff(self):
elif line.lstrip(' ') == '\n':
# Blank line
continue
elif line.startswith('#'):
# Comment line
continue
else:
cells = line.split('\t')
if len(cells) < 2:
Expand Down Expand Up @@ -113,9 +110,6 @@ def sniff(self):
elif line.lstrip(' ') == '\n':
# Blank line
continue
elif line.startswith('#'):
# Comment line
continue

cells = line.rstrip('\n').split('\t')
if header is None:
Expand Down
4 changes: 3 additions & 1 deletion q2_types/feature_data/_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def _taxonomy_formats_to_dataframe(filepath, has_header=None):
"""
# Using `dtype=object` and `set_index()` to avoid type casting/inference of
# any columns or the index.
df = pd.read_csv(filepath, sep='\t', comment='#', skip_blank_lines=True,
df = pd.read_csv(filepath, sep='\t', skip_blank_lines=True,
header=None, dtype=object)

if len(df.columns) < 2:
Expand Down Expand Up @@ -168,6 +168,7 @@ def _6(ff: TaxonomyFormat) -> pd.Series:
@plugin.register_transformer
def _28(ff: TaxonomyFormat) -> qiime2.Metadata:
df = _taxonomy_formats_to_dataframe(str(ff), has_header=None)
df['Taxon'] = df['Taxon'].str.strip()
return qiime2.Metadata(df)


Expand All @@ -191,6 +192,7 @@ def _23(ff: TSVTaxonomyFormat) -> pd.Series:
@plugin.register_transformer
def _29(ff: TSVTaxonomyFormat) -> qiime2.Metadata:
df = _taxonomy_formats_to_dataframe(str(ff), has_header=True)
df['Taxon'] = df['Taxon'].str.strip()
return qiime2.Metadata(df)


Expand Down
17 changes: 17 additions & 0 deletions q2_types/feature_data/tests/data/taxonomy/blanks
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@

















17 changes: 0 additions & 17 deletions q2_types/feature_data/tests/data/taxonomy/blanks-and-comments

This file was deleted.

6 changes: 0 additions & 6 deletions q2_types/feature_data/tests/data/taxonomy/header-only.tsv
Original file line number Diff line number Diff line change
@@ -1,7 +1 @@
# This file

# only has a
# header!


Feature ID Taxon
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Feature ID Taxon Confidence
seq1 k__Foo; p__Bar -1.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Feature ID Taxon Confidence
seq1 k__Foo; p__Bar -1.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Feature ID Taxon Confidence
seq1 k__Foo; p__Bar -1.0
20 changes: 10 additions & 10 deletions q2_types/feature_data/tests/data/taxonomy/valid-but-messy.tsv
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@


# There's some important whitespace in this file for testing, take care not to
# remove :)

# hello

# world #







Expand All @@ -15,20 +15,20 @@



# comment


Feature ID Taxon Extra Column

# hello, peanut
#

#





SEQUENCE1 k__Bar; p__Baz foo

# GWAR

seq2 some; taxonomy; for; ya bar baz


# FOOTER

6 changes: 3 additions & 3 deletions q2_types/feature_data/tests/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def test_taxonomy_format_validate_positive(self):
format.validate()

def test_taxonomy_format_validate_negative(self):
filenames = ['empty', 'blanks-and-comments', '1-column.tsv']
filenames = ['empty', 'blanks', '1-column.tsv']
filepaths = [self.get_data_path(os.path.join('taxonomy', filename))
for filename in filenames]

Expand Down Expand Up @@ -78,7 +78,7 @@ def test_headerless_tsv_taxonomy_format_validate_positive(self):
format.validate()

def test_headerless_tsv_taxonomy_format_validate_negative(self):
filenames = ['empty', 'blanks-and-comments', '1-column.tsv']
filenames = ['empty', 'blanks', '1-column.tsv']
filepaths = [self.get_data_path(os.path.join('taxonomy', filename))
for filename in filenames]

Expand Down Expand Up @@ -113,7 +113,7 @@ def test_tsv_taxonomy_format_validate_positive(self):
format.validate()

def test_tsv_taxonomy_format_validate_negative(self):
filenames = ['empty', 'blanks-and-comments', '1-column.tsv',
filenames = ['empty', 'blanks', '1-column.tsv',
'headerless.tsv', 'header-only.tsv', 'jagged.tsv']
filepaths = [self.get_data_path(os.path.join('taxonomy', filename))
for filename in filenames]
Expand Down
43 changes: 41 additions & 2 deletions q2_types/feature_data/tests/test_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,45 @@ def test_tsv_taxonomy_format_to_metadata(self):

self.assertEqual(exp, obs)

def test_tsv_taxonomy_to_metadata_trailing_whitespace_taxon(self):
_, obs = self.transform_format(TSVTaxonomyFormat, qiime2.Metadata,
os.path.join(
'taxonomy',
'trailing_space_taxon.tsv'))

index = pd.Index(['seq1'], name='Feature ID', dtype=object)
exp_df = pd.DataFrame([['k__Foo; p__Bar', '-1.0']], index=index,
columns=['Taxon', 'Confidence'], dtype=object)
exp = qiime2.Metadata(exp_df)

self.assertEqual(exp, obs)

def test_tsv_taxonomy_to_metadata_leading_whitespace_taxon(self):
_, obs = self.transform_format(TSVTaxonomyFormat, qiime2.Metadata,
os.path.join(
'taxonomy',
'leading_space_taxon.tsv'))

index = pd.Index(['seq1'], name='Feature ID', dtype=object)
exp_df = pd.DataFrame([['k__Foo; p__Bar', '-1.0']], index=index,
columns=['Taxon', 'Confidence'], dtype=object)
exp = qiime2.Metadata(exp_df)

self.assertEqual(exp, obs)

def test_tsv_taxonomy_to_metadata_trailing_leading_whitespace_taxon(self):
_, obs = self.transform_format(TSVTaxonomyFormat, qiime2.Metadata,
os.path.join(
'taxonomy',
'start_end_space_taxon.tsv'))

index = pd.Index(['seq1'], name='Feature ID', dtype=object)
exp_df = pd.DataFrame([['k__Foo; p__Bar', '-1.0']], index=index,
columns=['Taxon', 'Confidence'], dtype=object)
exp = qiime2.Metadata(exp_df)

self.assertEqual(exp, obs)


# In-depth testing of the `_taxonomy_formats_to_dataframe` helper function,
# which does the heavy lifting for the transformers.
Expand All @@ -275,11 +314,11 @@ def test_one_column(self):
_taxonomy_formats_to_dataframe(
self.get_data_path(os.path.join('taxonomy', '1-column.tsv')))

def test_blanks_and_comments(self):
def test_blanks(self):
with self.assertRaises(pandas.io.common.EmptyDataError):
_taxonomy_formats_to_dataframe(
self.get_data_path(os.path.join('taxonomy',
'blanks-and-comments')))
'blanks')))

def test_empty(self):
with self.assertRaises(pandas.io.common.EmptyDataError):
Expand Down

0 comments on commit ffa9685

Please sign in to comment.