Skip to content

Commit

Permalink
Merge 51cbb8f into cf45830
Browse files Browse the repository at this point in the history
  • Loading branch information
David-Rod committed Aug 8, 2019
2 parents cf45830 + 51cbb8f commit e25ad2c
Showing 1 changed file with 29 additions and 10 deletions.
39 changes: 29 additions & 10 deletions q2_types/feature_data/_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,38 +90,57 @@ class TSVTaxonomyFormat(model.TextFileFormat):
Optionally followed by other arbitrary columns.
This format supports comment lines starting with #, and blank lines. The
expected header must be the first non-comment, non-blank line. In addition
to the header, there must be at least one line of data.
This format supports blank lines. The expected header must be the first
non-blank line. In addition to the header, there must be at least one line
of data.
"""
HEADER = ['Feature ID', 'Taxon']

def sniff(self):
def _check_n_records(self, n=None):
with self.open() as fh:
data_lines = 0
header = None
while data_lines < 10:
line = fh.readline()

file_ = enumerate(fh) if n is None else zip(range(n), fh)

for i, line in file_:
# Checks rows in the file, excludes header row
i = i + 1
if line == '':
# EOF
break
elif line.lstrip(' ') == '\n':
# Blank line
continue

cells = line.rstrip('\n').split('\t')
cells = line.strip('\n').split('\t')

if header is None:
if cells[:2] != self.HEADER:
return False
raise ValidationError("['Feature ID' and 'Taxon'] "
"must be the first two header "
"values to be valid.\n\n The "
"first two header values "
"provided are: {}."
.format(cells[:2]))
header = cells
else:
if len(cells) != len(header):
return False
raise ValidationError('Number of columns are not the '
'same as number of headers in '
'the file. \nHeader values: '
'{} \nColumn values: {} '
.format(header, cells[:], i))

data_lines += 1

return header is not None and data_lines > 0
if data_lines == 0:
raise ValidationError("No feature records found, only blank "
"lines and/or a header row.")

def _validate_(self, level):
self._check_n_records(n={'min': 10, 'max': None}[level])


TSVTaxonomyDirectoryFormat = model.SingleFileDirectoryFormat(
Expand Down

0 comments on commit e25ad2c

Please sign in to comment.