Skip to content

Commit c415e74

Browse files
committed
add suggestions
1 parent 4c91cae commit c415e74

File tree

2 files changed

+16
-6
lines changed

2 files changed

+16
-6
lines changed

qiita_db/metadata_template/test/test_util.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
import pandas as pd
1414
from pandas.util.testing import assert_frame_equal
1515

16-
from qiita_db.exceptions import QiitaDBColumnError, QiitaDBWarning
16+
from qiita_db.exceptions import (QiitaDBColumnError, QiitaDBWarning,
17+
QiitaDBError)
1718
from qiita_db.metadata_template.util import (
1819
get_datatypes, as_python_types, prefix_sample_names_with_id,
1920
load_template_to_dataframe, get_invalid_sample_names)
@@ -153,6 +154,11 @@ def test_load_template_to_dataframe_lowercase(self):
153154
exp.rename(columns={"str_column": "str_CoLumn"}, inplace=True)
154155
assert_frame_equal(obs, exp)
155156

157+
def test_load_template_to_dataframe_non_utf8(self):
158+
bad = EXP_SAMPLE_TEMPLATE.replace('Test Sample 2', 'Test Sample\x962')
159+
with self.assertRaises(QiitaDBError):
160+
load_template_to_dataframe(StringIO(bad))
161+
156162
def test_load_template_to_dataframe_typechecking(self):
157163
obs = load_template_to_dataframe(
158164
StringIO(EXP_SAMPLE_TEMPLATE_LAT_ALL_INT))

qiita_db/metadata_template/util.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
import warnings
1616
from skbio.io.util import open_file
1717

18-
from qiita_db.exceptions import QiitaDBColumnError, QiitaDBWarning
18+
from qiita_db.exceptions import (QiitaDBColumnError, QiitaDBWarning,
19+
QiitaDBError)
1920
from .constants import CONTROLLED_COLS
2021

2122
if PY3:
@@ -147,6 +148,8 @@ def load_template_to_dataframe(fn, strip_whitespace=True):
147148
to the needed type.
148149
QiitaDBWarning
149150
When columns are dropped because they have no content for any sample.
151+
QiitaDBError
152+
When non UTF-8 characters are found in the file.
150153
151154
Notes
152155
-----
@@ -233,15 +236,16 @@ def load_template_to_dataframe(fn, strip_whitespace=True):
233236
'center_projct_name': str})
234237
except UnicodeDecodeError:
235238
# Find row number and col number for utf-8 encoding errors
239+
headers = holdfile[0].strip().split('\t')
236240
errors = []
237-
for row, line in enumerate(holdfile):
241+
for row, line in enumerate(holdfile, 1):
238242
for col, cell in enumerate(line.split('\t')):
239243
try:
240244
cell.encode('utf-8')
241245
except UnicodeError:
242-
errors.append('Non-unicode value in cell at '
243-
'row %d col %d' % (row+1, col+1))
244-
raise ValueError(', '.join(errors))
246+
errors.append('row %d, header %s' % (row, headers[col]))
247+
raise QiitaDBError('Non UTF-8 characters found at ' +
248+
'; '.join(errors))
245249

246250
# let pandas infer the dtypes of these columns, if the inference is
247251
# not correct, then we have to raise an error

0 commit comments

Comments
 (0)