From 97f19a7f4375b5d95fbf5a98d4a46cf5a6504b7b Mon Sep 17 00:00:00 2001
From: David Rodriguez <davidrod805@gmail.com>
Date: Thu, 18 Jul 2019 10:31:20 -0700
Subject: [PATCH 01/10] MAINT: Initial commit that attempts to implement
 validate API.

---
 q2_types/feature_data/_format.py  | 24 ++++++++++++++++++++----
 q2_types/feature_table/_format.py | 12 ++++++++++--
 2 files changed, 30 insertions(+), 6 deletions(-)
diff --git a/q2_types/feature_data/_format.py b/q2_types/feature_data/_format.py
index 64cc5c62..52ca2878 100644
--- a/q2_types/feature_data/_format.py
+++ b/q2_types/feature_data/_format.py
@@ -38,7 +38,8 @@ class TaxonomyFormat(model.TextFileFormat):
 
     """
 
-    def sniff(self):
+    # Was formerly a sniff method that I renamed
+    def _check_file_format(self, root, n=None):
         with self.open() as fh:
             count = 0
             while count < 10:
@@ -61,6 +62,9 @@ def sniff(self):
 
             return False if count == 0 else True
 
+    def _validate_(self, level):
+        self._check_file_format(self, n={'min': 10, 'max': None}[level])
+
 
 TaxonomyDirectoryFormat = model.SingleFileDirectoryFormat(
     'TaxonomyDirectoryFormat', 'taxonomy.tsv', TaxonomyFormat)
@@ -96,7 +100,7 @@ class TSVTaxonomyFormat(model.TextFileFormat):
     """
     HEADER = ['Feature ID', 'Taxon']
 
-    def sniff(self):
+    def _check_tsv_tax_format(self, root, n=None):
         with self.open() as fh:
             data_lines = 0
             header = None
@@ -125,13 +129,17 @@ def sniff(self):
 
             return header is not None and data_lines > 0
 
+    def _validate_(self, level):
+        self._check_tsv_tax_format(root=str(self.path.parent),
+                                   n={'min': 10, 'max': None}[level])
+
 
 TSVTaxonomyDirectoryFormat = model.SingleFileDirectoryFormat(
     'TSVTaxonomyDirectoryFormat', 'taxonomy.tsv', TSVTaxonomyFormat)
 
 
 class DNAFASTAFormat(model.TextFileFormat):
-    def sniff(self):
+    def _check_dna_fasta_format(self, root, n=None):
         filepath = str(self)
         sniffer = skbio.io.io_registry.get_sniffer('fasta')
         if sniffer(filepath)[0]:
@@ -149,6 +157,10 @@ def sniff(self):
         empty_sniffer = skbio.io.io_registry.get_sniffer('<emptyfile>')
         return empty_sniffer(filepath)[0]
 
+    def _validate_(self, level):
+        self._check_dna_fasta_format(root=str(self.path.parent),
+                                     n={'min': 10, 'max': None}[level])
+
 
 DNASequencesDirectoryFormat = model.SingleFileDirectoryFormat(
     'DNASequencesDirectoryFormat', 'dna-sequences.fasta', DNAFASTAFormat)
@@ -162,7 +174,7 @@ class PairedDNASequencesDirectoryFormat(model.DirectoryFormat):
 
 
 class AlignedDNAFASTAFormat(model.TextFileFormat):
-    def sniff(self):
+    def _check_aligned_dna_fasta_format(self, root, n=None):
         filepath = str(self)
         sniffer = skbio.io.io_registry.get_sniffer('fasta')
         if sniffer(filepath)[0]:
@@ -179,6 +191,10 @@ def sniff(self):
                 pass
         return False
 
+    def _validate_(self, level):
+        self._check_aligned_dna_fasta_format(root=str(self.path.parent),
+                                             n={'min': 10, 'max': None}[level])
+
 
 AlignedDNASequencesDirectoryFormat = model.SingleFileDirectoryFormat(
     'AlignedDNASequencesDirectoryFormat', 'aligned-dna-sequences.fasta',
diff --git a/q2_types/feature_table/_format.py b/q2_types/feature_table/_format.py
index 0fa76a7f..9a1779d3 100644
--- a/q2_types/feature_table/_format.py
+++ b/q2_types/feature_table/_format.py
@@ -21,7 +21,7 @@ class BIOMV100Format(model.TextFileFormat):
         'shape', 'data', 'comment'
     }
 
-    def sniff(self):
+    def _check_biomv100_format(self, root, n=None):
         with self.open() as fh:
             try:
                 parser = ijson.parse(fh)
@@ -36,6 +36,10 @@ def sniff(self):
                 pass
             return False
 
+    def _validate_(self, level):
+        self._check_biomv100_format(root=str(self.path.parent),
+                                    n={'min': 10, 'max': None}[level])
+
 
 class BIOMV210Format(model.BinaryFileFormat):
     # minimum requirements as described by
@@ -68,7 +72,7 @@ class BIOMV210Format(model.BinaryFileFormat):
     def open(self):
         return h5py.File(str(self), mode=self._mode)
 
-    def sniff(self):
+    def _check_biomv210_format(self, root, n=None):
         try:
             with self.open() as fh:
                 for grp in self.groups:
@@ -84,6 +88,10 @@ def sniff(self):
         except Exception:
             return False
 
+    def _validate_(self, level):
+        self._check_biomv210_format(root=str(self.path.parent),
+                                    n={'min': 10, 'max': None}[level])
+
 
 BIOMV100DirFmt = model.SingleFileDirectoryFormat('BIOMV100DirFmt',
                                                  'feature-table.biom',

From 65a69e83cc896272ec510ed2a4a74afc50eeadad Mon Sep 17 00:00:00 2001
From: David Rodriguez <davidrod805@gmail.com>
Date: Fri, 19 Jul 2019 16:20:52 -0700
Subject: [PATCH 02/10] Added validation API to TSVTaxonomyFormat class only.
 Removed validation from all other classes in repository.

---
 q2_types/feature_data/_format.py  | 45 +++++++++++++++++--------------
 q2_types/feature_table/_format.py | 12 ++-------
 2 files changed, 27 insertions(+), 30 deletions(-)

diff --git a/q2_types/feature_data/_format.py b/q2_types/feature_data/_format.py
index e5cf65d6..1bbbd895 100644
--- a/q2_types/feature_data/_format.py
+++ b/q2_types/feature_data/_format.py
@@ -42,8 +42,7 @@ class TaxonomyFormat(model.TextFileFormat):
 
     """
 
-    # Was formerly a sniff method that I renamed
-    def _check_file_format(self, root, n=None):
+    def sniff(self):
         with self.open() as fh:
             count = 0
             while count < 10:
@@ -66,9 +65,6 @@ def _check_file_format(self, root, n=None):
 
             return False if count == 0 else True
 
-    def _validate_(self, level):
-        self._check_file_format(self, n={'min': 10, 'max': None}[level])
-
 
 TaxonomyDirectoryFormat = model.SingleFileDirectoryFormat(
     'TaxonomyDirectoryFormat', 'taxonomy.tsv', TaxonomyFormat)
@@ -108,34 +104,47 @@ def _check_tsv_tax_format(self, root, n=None):
         with self.open() as fh:
             data_lines = 0
             header = None
-            while data_lines < 10:
-                line = fh.readline()
+
+            file_ = enumerate(fh) if n is None else zip(range(n), fh)
+
+            for iter, line in file_:
+                iter = iter + 1
 
                 if line == '':
                     # EOF
                     break
-                elif line.lstrip(' ') == '\n':
-                    # Blank line
+                elif line.strip(' ') == '\n':
                     continue
                 elif line.startswith('#'):
                     # Comment line
                     continue
 
-                cells = line.rstrip('\n').split('\t')
+                cells = line.strip('\n').split('\t')
+
                 if header is None:
                     if cells[:2] != self.HEADER:
-                        return False
+                        raise ValidationError("Anthony TSVTaxonomy")
                     header = cells
                 else:
                     if len(cells) != len(header):
-                        return False
+                        raise ValidationError("Number of headers are not the "
+                                              "same as number of colums in "
+                                              "the file.")
                     data_lines += 1
 
-            return header is not None and data_lines > 0
+            if header is None:
+                raise ValidationError("This file must contain 'Feature ID' "
+                                      "and 'Taxon' as header values to "
+                                      "meet formatting requirements.")
+
+            if data_lines == 0:
+                raise ValidationError("No sample records found in manifest, "
+                                      "only observed comments, blank lines, "
+                                      "and/or a header row.")
 
     def _validate_(self, level):
-        self._check_tsv_tax_format(root=str(self.path.parent),
-                                   n={'min': 10, 'max': None}[level])
+        self._check_tsv_tax_format(root=str(self.path.parent), n={'min': 1,
+                                   'max': None}[level])
 
 
 TSVTaxonomyDirectoryFormat = model.SingleFileDirectoryFormat(
@@ -198,7 +207,7 @@ class PairedDNASequencesDirectoryFormat(model.DirectoryFormat):
 
 
 class AlignedDNAFASTAFormat(model.TextFileFormat):
-    def _check_aligned_dna_fasta_format(self, root, n=None):
+    def sniff(self):
         filepath = str(self)
         sniffer = skbio.io.io_registry.get_sniffer('fasta')
         if sniffer(filepath)[0]:
@@ -215,10 +224,6 @@ def _check_aligned_dna_fasta_format(self, root, n=None):
                 pass
         return False
 
-    def _validate_(self, level):
-        self._check_aligned_dna_fasta_format(root=str(self.path.parent),
-                                             n={'min': 10, 'max': None}[level])
-
 
 AlignedDNASequencesDirectoryFormat = model.SingleFileDirectoryFormat(
     'AlignedDNASequencesDirectoryFormat', 'aligned-dna-sequences.fasta',
diff --git a/q2_types/feature_table/_format.py b/q2_types/feature_table/_format.py
index 9a1779d3..0fa76a7f 100644
--- a/q2_types/feature_table/_format.py
+++ b/q2_types/feature_table/_format.py
@@ -21,7 +21,7 @@ class BIOMV100Format(model.TextFileFormat):
         'shape', 'data', 'comment'
     }
 
-    def _check_biomv100_format(self, root, n=None):
+    def sniff(self):
         with self.open() as fh:
             try:
                 parser = ijson.parse(fh)
@@ -36,10 +36,6 @@ def _check_biomv100_format(self, root, n=None):
                 pass
             return False
 
-    def _validate_(self, level):
-        self._check_biomv100_format(root=str(self.path.parent),
-                                    n={'min': 10, 'max': None}[level])
-
 
 class BIOMV210Format(model.BinaryFileFormat):
     # minimum requirements as described by
@@ -72,7 +68,7 @@ class BIOMV210Format(model.BinaryFileFormat):
     def open(self):
         return h5py.File(str(self), mode=self._mode)
 
-    def _check_biomv210_format(self, root, n=None):
+    def sniff(self):
         try:
             with self.open() as fh:
                 for grp in self.groups:
@@ -88,10 +84,6 @@ def _check_biomv210_format(self, root, n=None):
         except Exception:
             return False
 
-    def _validate_(self, level):
-        self._check_biomv210_format(root=str(self.path.parent),
-                                    n={'min': 10, 'max': None}[level])
-
 
 BIOMV100DirFmt = model.SingleFileDirectoryFormat('BIOMV100DirFmt',
                                                  'feature-table.biom',

From 747d0aec59e5a023440a48d329607c9185414827 Mon Sep 17 00:00:00 2001
From: David Rodriguez <davidrod805@gmail.com>
Date: Thu, 25 Jul 2019 10:14:25 -0700
Subject: [PATCH 03/10] MAINT: Changed  minimum number of lines to validate
 file. Improved validation error messages for users.

---
 q2_types/feature_data/_format.py | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/q2_types/feature_data/_format.py b/q2_types/feature_data/_format.py
index 1bbbd895..170a9789 100644
--- a/q2_types/feature_data/_format.py
+++ b/q2_types/feature_data/_format.py
@@ -100,15 +100,15 @@ class TSVTaxonomyFormat(model.TextFileFormat):
     """
     HEADER = ['Feature ID', 'Taxon']
 
-    def _check_tsv_tax_format(self, root, n=None):
+    def _check_tsv_tax_format(self, n=None):
         with self.open() as fh:
             data_lines = 0
             header = None
 
             file_ = enumerate(fh) if n is None else zip(range(n), fh)
 
-            for iter, line in file_:
-                iter = iter + 1
+            for i, line in file_:
+                i = i + 1
 
                 if line == '':
                     # EOF
@@ -123,19 +123,22 @@ def _check_tsv_tax_format(self, root, n=None):
 
                 if header is None:
                     if cells[:2] != self.HEADER:
-                        raise ValidationError("Anthony TSVTaxonomy")
+                        raise ValidationError("'Feature ID' and 'Taxon' must"
+                                              " be included as headers to be"
+                                              " a valid TSV file. Please check"
+                                              " header values in your file.")
                     header = cells
                 else:
                     if len(cells) != len(header):
-                        raise ValidationError("Number of headers are not the "
-                                              "same as number of colums in "
-                                              "the file.")
-                    data_lines += 1
+                        raise ValidationError('Number of headers are not the '
+                                              'same as number of columns in '
+                                              'the file. \nNumber of headers: '
+                                              '{} \nNumber of columns: {} '
+                                              '\nIssue on line: {}'
+                                              .format(len(header), len(cells),
+                                                      i))
 
-            if header is None:
-                raise ValidationError("This file must contain 'Feature ID' "
-                                      "and 'Taxon' as header values to "
-                                      "meet formatting requirements.")
+                    data_lines += 1
 
             if data_lines == 0:
                 raise ValidationError("No sample records found in manifest, "
@@ -143,8 +146,7 @@ def _check_tsv_tax_format(self, root, n=None):
                                       "and/or a header row.")
 
     def _validate_(self, level):
-        self._check_tsv_tax_format(root=str(self.path.parent), n={'min': 1,
-                                   'max': None}[level])
+        self._check_tsv_tax_format(n={'min': 10, 'max': None}[level])
 
 
 TSVTaxonomyDirectoryFormat = model.SingleFileDirectoryFormat(

From 58899781d68fbbde033ec193448f59c356f68c6d Mon Sep 17 00:00:00 2001
From: David Rodriguez <davidrod805@gmail.com>
Date: Thu, 25 Jul 2019 13:28:15 -0700
Subject: [PATCH 04/10] MAINT: Substituted index values for counts pertaining
 to header and column arrays in error message.

---
 q2_types/feature_data/_format.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/q2_types/feature_data/_format.py b/q2_types/feature_data/_format.py
index 170a9789..0ef46a68 100644
--- a/q2_types/feature_data/_format.py
+++ b/q2_types/feature_data/_format.py
@@ -7,6 +7,7 @@
 # ----------------------------------------------------------------------------
 
 import re
+import textwrap
 import skbio.io
 
 import qiime2.plugin.model as model
@@ -132,11 +133,10 @@ def _check_tsv_tax_format(self, n=None):
                     if len(cells) != len(header):
                         raise ValidationError('Number of headers are not the '
                                               'same as number of columns in '
-                                              'the file. \nNumber of headers: '
-                                              '{} \nNumber of columns: {} '
+                                              'the file. \nHeader values: '
+                                              '{} \nColumn values: {} '
                                               '\nIssue on line: {}'
-                                              .format(len(header), len(cells),
-                                                      i))
+                                              .format(header, cells[:], i))
 
                     data_lines += 1
 

From 8ef5a2ccc2278ac1334a0d0ebc2949257a32ed13 Mon Sep 17 00:00:00 2001
From: David Rodriguez <davidrod805@gmail.com>
Date: Thu, 25 Jul 2019 13:31:35 -0700
Subject: [PATCH 05/10] Removed import of textwrap

---
 q2_types/feature_data/_format.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/q2_types/feature_data/_format.py b/q2_types/feature_data/_format.py
index 0ef46a68..fdd7fd60 100644
--- a/q2_types/feature_data/_format.py
+++ b/q2_types/feature_data/_format.py
@@ -7,7 +7,6 @@
 # ----------------------------------------------------------------------------
 
 import re
-import textwrap
 import skbio.io
 
 import qiime2.plugin.model as model

From b60f0c2ff4a9fda93bd0df6850a1f2694def3628 Mon Sep 17 00:00:00 2001
From: David Rodriguez <davidrod805@gmail.com>
Date: Mon, 29 Jul 2019 14:58:00 -0700
Subject: [PATCH 06/10] MAINT: Changed method name which validates taxonomy
 files. Updated raised error messages.

---
 q2_types/feature_data/_format.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/q2_types/feature_data/_format.py b/q2_types/feature_data/_format.py
index fdd7fd60..7035e55c 100644
--- a/q2_types/feature_data/_format.py
+++ b/q2_types/feature_data/_format.py
@@ -100,7 +100,7 @@ class TSVTaxonomyFormat(model.TextFileFormat):
     """
     HEADER = ['Feature ID', 'Taxon']
 
-    def _check_tsv_tax_format(self, n=None):
+    def _check_n_records(self, n=None):
         with self.open() as fh:
             data_lines = 0
             header = None
@@ -123,10 +123,12 @@ def _check_tsv_tax_format(self, n=None):
 
                 if header is None:
                     if cells[:2] != self.HEADER:
-                        raise ValidationError("'Feature ID' and 'Taxon' must"
-                                              " be included as headers to be"
-                                              " a valid TSV file. Please check"
-                                              " header values in your file.")
+                        raise ValidationError("['Feature ID' and 'Taxon'] "
+                                              "must be the first two header "
+                                              "values to be a valid axonomy "
+                                              "file.\n\nThe first two header "
+                                              "values provided are: {}."
+                                              .format(cells[:2]))
                     header = cells
                 else:
                     if len(cells) != len(header):
@@ -134,18 +136,17 @@ def _check_tsv_tax_format(self, n=None):
                                               'same as number of columns in '
                                               'the file. \nHeader values: '
                                               '{} \nColumn values: {} '
-                                              '\nIssue on line: {}'
                                               .format(header, cells[:], i))
 
                     data_lines += 1
 
             if data_lines == 0:
-                raise ValidationError("No sample records found in manifest, "
+                raise ValidationError("No feature records found in manifest, "
                                       "only observed comments, blank lines, "
                                       "and/or a header row.")
 
     def _validate_(self, level):
-        self._check_tsv_tax_format(n={'min': 10, 'max': None}[level])
+        self._check_n_records(n={'min': 10, 'max': None}[level])
 
 
 TSVTaxonomyDirectoryFormat = model.SingleFileDirectoryFormat(

From eae936705c810fd78f2cf5cb70596ac042ddecdb Mon Sep 17 00:00:00 2001
From: David Rodriguez <davidrod805@gmail.com>
Date: Mon, 5 Aug 2019 12:15:29 -0700
Subject: [PATCH 07/10] SQUASH: Corrected misspelling of taxonomy

---
 q2_types/feature_data/_format.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/q2_types/feature_data/_format.py b/q2_types/feature_data/_format.py
index 7035e55c..78fcfebf 100644
--- a/q2_types/feature_data/_format.py
+++ b/q2_types/feature_data/_format.py
@@ -125,7 +125,7 @@ def _check_n_records(self, n=None):
                     if cells[:2] != self.HEADER:
                         raise ValidationError("['Feature ID' and 'Taxon'] "
                                               "must be the first two header "
-                                              "values to be a valid axonomy "
+                                              "values to be a valid taxonomy "
                                               "file.\n\nThe first two header "
                                               "values provided are: {}."
                                               .format(cells[:2]))

From 0206b83211de38db15a0ecd88d2202601746c086 Mon Sep 17 00:00:00 2001
From: David Rodriguez <davidrod805@gmail.com>
Date: Thu, 8 Aug 2019 13:14:22 -0700
Subject: [PATCH 08/10] SQUASH: Edited error messages, added comments, updated
 class description

---
 q2_types/feature_data/_format.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/q2_types/feature_data/_format.py b/q2_types/feature_data/_format.py
index 78fcfebf..1885272c 100644
--- a/q2_types/feature_data/_format.py
+++ b/q2_types/feature_data/_format.py
@@ -93,9 +93,9 @@ class TSVTaxonomyFormat(model.TextFileFormat):
 
     Optionally followed by other arbitrary columns.
 
-    This format supports comment lines starting with #, and blank lines. The
-    expected header must be the first non-comment, non-blank line. In addition
-    to the header, there must be at least one line of data.
+    This format supports blank lines. The expected header must be the first
+    non-blank line. In addition to the header, there must be at least one line
+    of data.
 
     """
     HEADER = ['Feature ID', 'Taxon']
@@ -109,11 +109,12 @@ def _check_n_records(self, n=None):
 
             for i, line in file_:
                 i = i + 1
-
+                # Checks rows in the file, excludes header row
                 if line == '':
                     # EOF
                     break
-                elif line.strip(' ') == '\n':
+                elif line.lstrip(' ') == '\n':
+                    # Blank line
                     continue
                 elif line.startswith('#'):
                     # Comment line
@@ -125,15 +126,15 @@ def _check_n_records(self, n=None):
                     if cells[:2] != self.HEADER:
                         raise ValidationError("['Feature ID' and 'Taxon'] "
                                               "must be the first two header "
-                                              "values to be a valid taxonomy "
-                                              "file.\n\nThe first two header "
-                                              "values provided are: {}."
+                                              "values to be valid.\n\n The "
+                                              "first two header values "
+                                              "provided are: {}."
                                               .format(cells[:2]))
                     header = cells
                 else:
                     if len(cells) != len(header):
-                        raise ValidationError('Number of headers are not the '
-                                              'same as number of columns in '
+                        raise ValidationError('Number of columns are not the '
+                                              'same as number of headers in '
                                               'the file. \nHeader values: '
                                               '{} \nColumn values: {} '
                                               .format(header, cells[:], i))
@@ -141,9 +142,8 @@ def _check_n_records(self, n=None):
                     data_lines += 1
 
             if data_lines == 0:
-                raise ValidationError("No feature records found in manifest, "
-                                      "only observed comments, blank lines, "
-                                      "and/or a header row.")
+                raise ValidationError("No feature records found, only blank "
+                                      "lines and/or a header row.")
 
     def _validate_(self, level):
         self._check_n_records(n={'min': 10, 'max': None}[level])

From 51cbb8fedf7873c6a5f675188d0f0c121ce0c0e2 Mon Sep 17 00:00:00 2001
From: David Rodriguez <davidrod805@gmail.com>
Date: Thu, 8 Aug 2019 13:18:37 -0700
Subject: [PATCH 09/10] SQUASH: Merged feature branch with upstream

---
 q2_types/feature_data/_format.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/q2_types/feature_data/_format.py b/q2_types/feature_data/_format.py
index 4237adb4..e4d04dcc 100644
--- a/q2_types/feature_data/_format.py
+++ b/q2_types/feature_data/_format.py
@@ -105,8 +105,8 @@ def _check_n_records(self, n=None):
             file_ = enumerate(fh) if n is None else zip(range(n), fh)
 
             for i, line in file_:
-                i = i + 1
                 # Checks rows in the file, excludes header row
+                i = i + 1
                 if line == '':
                     # EOF
                     break

From 608dfc6788610f1845a4438c44b3972432aee04a Mon Sep 17 00:00:00 2001
From: David Rodriguez <davidrod805@gmail.com>
Date: Fri, 9 Aug 2019 11:16:44 -0700
Subject: [PATCH 10/10] SQUASH: Added test .tsv files added a test method

---
 q2_types/feature_data/_format.py                 | 16 +++++++++-------
 .../data/taxonomy/greater-column-length.tsv      |  3 +++
 .../data/taxonomy/greater-header-length.tsv      |  3 +++
 q2_types/feature_data/tests/test_format.py       | 12 ++++++++++++
 4 files changed, 27 insertions(+), 7 deletions(-)
 create mode 100644 q2_types/feature_data/tests/data/taxonomy/greater-column-length.tsv
 create mode 100644 q2_types/feature_data/tests/data/taxonomy/greater-header-length.tsv

diff --git a/q2_types/feature_data/_format.py b/q2_types/feature_data/_format.py
index e4d04dcc..5012388e 100644
--- a/q2_types/feature_data/_format.py
+++ b/q2_types/feature_data/_format.py
@@ -105,7 +105,7 @@ def _check_n_records(self, n=None):
             file_ = enumerate(fh) if n is None else zip(range(n), fh)
 
             for i, line in file_:
-                # Checks rows in the file, excludes header row
+                # Tracks line count for error reporting
                 i = i + 1
                 if line == '':
                     # EOF
@@ -122,15 +122,17 @@ def _check_n_records(self, n=None):
                                               "must be the first two header "
                                               "values to be valid.\n\n The "
                                               "first two header values "
-                                              "provided are: {}."
-                                              .format(cells[:2]))
+                                              "provided are: {}.\nIssue on "
+                                              "line {}"
+                                              .format(cells[:2], i))
                     header = cells
                 else:
                     if len(cells) != len(header):
-                        raise ValidationError('Number of columns are not the '
-                                              'same as number of headers in '
-                                              'the file. \nHeader values: '
-                                              '{} \nColumn values: {} '
+                        raise ValidationError("Number of columns are not the "
+                                              "same as number of headers in "
+                                              "the file. \nHeader values: "
+                                              "{} \nColumn values: {}\nIssue "
+                                              "on line: {}"
                                               .format(header, cells[:], i))
 
                     data_lines += 1
diff --git a/q2_types/feature_data/tests/data/taxonomy/greater-column-length.tsv b/q2_types/feature_data/tests/data/taxonomy/greater-column-length.tsv
new file mode 100644
index 00000000..00040321
--- /dev/null
+++ b/q2_types/feature_data/tests/data/taxonomy/greater-column-length.tsv
@@ -0,0 +1,3 @@
+Feature ID	Taxon
+seq1	k__Bacteria; p__Proteobacteria	-1.0
+seq2	k__Bacteria	1.0
diff --git a/q2_types/feature_data/tests/data/taxonomy/greater-header-length.tsv b/q2_types/feature_data/tests/data/taxonomy/greater-header-length.tsv
new file mode 100644
index 00000000..f53aa340
--- /dev/null
+++ b/q2_types/feature_data/tests/data/taxonomy/greater-header-length.tsv
@@ -0,0 +1,3 @@
+Feature ID	Taxon	Confidence	Random
+seq1	k__Foo; p__Bar	-1.0
+seq2	k__Foo; p__Baz	-42.0
diff --git a/q2_types/feature_data/tests/test_format.py b/q2_types/feature_data/tests/test_format.py
index 4329bd38..9a4b514d 100644
--- a/q2_types/feature_data/tests/test_format.py
+++ b/q2_types/feature_data/tests/test_format.py
@@ -134,6 +134,18 @@ def test_tsv_taxonomy_directory_format(self):
 
         format.validate()
 
+    def test_tsv_taxonomy_format_column_header_lengths(self):
+        filenames = ['greater-column-length.tsv', 'greater-header-length.tsv']
+
+        filepaths = [self.get_data_path(os.path.join('taxonomy', filename))
+                     for filename in filenames]
+
+        for filepath in filepaths:
+            format = TSVTaxonomyFormat(filepath, mode='r')
+
+            with self.assertRaisesRegex(ValidationError, 'Number of columns'):
+                format.validate()
+
 
 class TestDNAFASTAFormats(TestPluginBase):
     package = 'q2_types.feature_data.tests'