add suggestions

squirrelo · squirrelo · commit c415e74aef08 · 2015-05-20T11:50:31.000-07:00
diff --git a/qiita_db/metadata_template/test/test_util.py b/qiita_db/metadata_template/test/test_util.py
@@ -13,7 +13,8 @@
 import pandas as pd
 from pandas.util.testing import assert_frame_equal
 
-from qiita_db.exceptions import QiitaDBColumnError, QiitaDBWarning
+from qiita_db.exceptions import (QiitaDBColumnError, QiitaDBWarning,
+                                 QiitaDBError)
 from qiita_db.metadata_template.util import (
     get_datatypes, as_python_types, prefix_sample_names_with_id,
     load_template_to_dataframe, get_invalid_sample_names)
@@ -153,6 +154,11 @@ def test_load_template_to_dataframe_lowercase(self):
         exp.rename(columns={"str_column": "str_CoLumn"}, inplace=True)
         assert_frame_equal(obs, exp)
 
+    def test_load_template_to_dataframe_non_utf8(self):
+        bad = EXP_SAMPLE_TEMPLATE.replace('Test Sample 2', 'Test Sample\x962')
+        with self.assertRaises(QiitaDBError):
+            load_template_to_dataframe(StringIO(bad))
+
     def test_load_template_to_dataframe_typechecking(self):
         obs = load_template_to_dataframe(
             StringIO(EXP_SAMPLE_TEMPLATE_LAT_ALL_INT))
diff --git a/qiita_db/metadata_template/util.py b/qiita_db/metadata_template/util.py
@@ -15,7 +15,8 @@
 import warnings
 from skbio.io.util import open_file
 
-from qiita_db.exceptions import QiitaDBColumnError, QiitaDBWarning
+from qiita_db.exceptions import (QiitaDBColumnError, QiitaDBWarning,
+                                 QiitaDBError)
 from .constants import CONTROLLED_COLS
 
 if PY3:
@@ -147,6 +148,8 @@ def load_template_to_dataframe(fn, strip_whitespace=True):
         to the needed type.
     QiitaDBWarning
         When columns are dropped because they have no content for any sample.
+    QiitaDBError
+        When non UTF-8 characters are found in the file.
 
     Notes
     -----
@@ -233,15 +236,16 @@ def load_template_to_dataframe(fn, strip_whitespace=True):
                                    'center_projct_name': str})
     except UnicodeDecodeError:
         # Find row number and col number for utf-8 encoding errors
+        headers = holdfile[0].strip().split('\t')
         errors = []
-        for row, line in enumerate(holdfile):
+        for row, line in enumerate(holdfile, 1):
             for col, cell in enumerate(line.split('\t')):
                 try:
                     cell.encode('utf-8')
                 except UnicodeError:
-                    errors.append('Non-unicode value in cell at '
-                                  'row %d col %d' % (row+1, col+1))
-        raise ValueError(', '.join(errors))
+                    errors.append('row %d, header %s' % (row, headers[col]))
+        raise QiitaDBError('Non UTF-8 characters found at ' +
+                           '; '.join(errors))
 
     # let pandas infer the dtypes of these columns, if the inference is
     # not correct, then we have to raise an error