|
15 | 15 | import warnings |
16 | 16 | from skbio.io.util import open_file |
17 | 17 |
|
18 | | -from qiita_db.exceptions import QiitaDBColumnError, QiitaDBWarning |
| 18 | +from qiita_db.exceptions import (QiitaDBColumnError, QiitaDBWarning, |
| 19 | + QiitaDBError) |
19 | 20 | from .constants import CONTROLLED_COLS |
20 | 21 |
|
21 | 22 | if PY3: |
@@ -147,6 +148,8 @@ def load_template_to_dataframe(fn, strip_whitespace=True): |
147 | 148 | to the needed type. |
148 | 149 | QiitaDBWarning |
149 | 150 | When columns are dropped because they have no content for any sample. |
| 151 | + QiitaDBError |
| 152 | + When non UTF-8 characters are found in the file. |
150 | 153 |
|
151 | 154 | Notes |
152 | 155 | ----- |
@@ -233,15 +236,16 @@ def load_template_to_dataframe(fn, strip_whitespace=True): |
233 | 236 | 'center_projct_name': str}) |
234 | 237 | except UnicodeDecodeError: |
235 | 238 | # Find row number and col number for utf-8 encoding errors |
| 239 | + headers = holdfile[0].strip().split('\t') |
236 | 240 | errors = [] |
237 | | - for row, line in enumerate(holdfile): |
| 241 | + for row, line in enumerate(holdfile, 1): |
238 | 242 | for col, cell in enumerate(line.split('\t')): |
239 | 243 | try: |
240 | 244 | cell.encode('utf-8') |
241 | 245 | except UnicodeError: |
242 | | - errors.append('Non-unicode value in cell at ' |
243 | | - 'row %d col %d' % (row+1, col+1)) |
244 | | - raise ValueError(', '.join(errors)) |
| 246 | + errors.append('row %d, header %s' % (row, headers[col])) |
| 247 | + raise QiitaDBError('Non UTF-8 characters found at ' + |
| 248 | + '; '.join(errors)) |
245 | 249 |
|
246 | 250 | # let pandas infer the dtypes of these columns, if the inference is |
247 | 251 | # not correct, then we have to raise an error |
|
0 commit comments