Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion qiita_db/metadata_template/test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
import pandas as pd
from pandas.util.testing import assert_frame_equal

from qiita_db.exceptions import QiitaDBColumnError, QiitaDBWarning
from qiita_db.exceptions import (QiitaDBColumnError, QiitaDBWarning,
QiitaDBError)
from qiita_db.metadata_template.util import (
get_datatypes, as_python_types, prefix_sample_names_with_id,
load_template_to_dataframe, get_invalid_sample_names)
Expand Down Expand Up @@ -153,6 +154,11 @@ def test_load_template_to_dataframe_lowercase(self):
exp.rename(columns={"str_column": "str_CoLumn"}, inplace=True)
assert_frame_equal(obs, exp)

def test_load_template_to_dataframe_non_utf8(self):
bad = EXP_SAMPLE_TEMPLATE.replace('Test Sample 2', 'Test Sample\x962')
with self.assertRaises(QiitaDBError):
load_template_to_dataframe(StringIO(bad))

def test_load_template_to_dataframe_typechecking(self):
obs = load_template_to_dataframe(
StringIO(EXP_SAMPLE_TEMPLATE_LAT_ALL_INT))
Expand Down
48 changes: 32 additions & 16 deletions qiita_db/metadata_template/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
import warnings
from skbio.io.util import open_file

from qiita_db.exceptions import QiitaDBColumnError, QiitaDBWarning
from qiita_db.exceptions import (QiitaDBColumnError, QiitaDBWarning,
QiitaDBError)
from .constants import CONTROLLED_COLS

if PY3:
Expand Down Expand Up @@ -147,6 +148,8 @@ def load_template_to_dataframe(fn, strip_whitespace=True):
to the needed type.
QiitaDBWarning
When columns are dropped because they have no content for any sample.
QiitaDBError
When non UTF-8 characters are found in the file.

Notes
-----
Expand Down Expand Up @@ -215,21 +218,34 @@ def load_template_to_dataframe(fn, strip_whitespace=True):
# comment:
# using the tab character as "comment" we remove rows that are
# constituted only by delimiters i. e. empty rows.
template = pd.read_csv(StringIO(''.join(holdfile)), sep='\t',
infer_datetime_format=True,
keep_default_na=False, na_values=[''],
parse_dates=True, index_col=False, comment='\t',
mangle_dupe_cols=False, converters={
'sample_name': lambda x: str(x).strip(),
# required sample template information
'physical_location': str,
'sample_type': str,
# collection_timestamp is not added here
'host_subject_id': str,
'description': str,
# common prep template information
'center_name': str,
'center_projct_name': str})
try:
template = pd.read_csv(StringIO(''.join(holdfile)), sep='\t',
encoding='utf-8', infer_datetime_format=True,
keep_default_na=False, na_values=[''],
parse_dates=True, index_col=False, comment='\t',
mangle_dupe_cols=False, converters={
'sample_name': lambda x: str(x).strip(),
# required sample template information
'physical_location': str,
'sample_type': str,
# collection_timestamp is not added here
'host_subject_id': str,
'description': str,
# common prep template information
'center_name': str,
'center_projct_name': str})
except UnicodeDecodeError:
# Find row number and col number for utf-8 encoding errors
headers = holdfile[0].strip().split('\t')
errors = []
for row, line in enumerate(holdfile, 1):
for col, cell in enumerate(line.split('\t')):
try:
cell.encode('utf-8')
except UnicodeError:
errors.append('row %d, header %s' % (row, headers[col]))
raise QiitaDBError('Non UTF-8 characters found at ' +
'; '.join(errors))

# let pandas infer the dtypes of these columns, if the inference is
# not correct, then we have to raise an error
Expand Down
8 changes: 4 additions & 4 deletions qiita_pet/handlers/study_handlers/description_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def process_sample_template(self, study, user, callback):

except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
QiitaDBDuplicateError, IOError, ValueError, KeyError,
CParserError, QiitaDBDuplicateHeaderError) as e:
CParserError, QiitaDBDuplicateHeaderError, QiitaDBError) as e:
# Some error occurred while processing the sample template
# Show the error to the user so they can fix the template
msg = html_error_message % ('parsing the sample template:',
Expand Down Expand Up @@ -422,9 +422,9 @@ def add_prep_template(self, study, user, callback):
if warns:
msg = '; '.join([str(w.message) for w in warns])
msg_level = 'warning'
except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
QiitaDBDuplicateError, IOError, ValueError,
CParserError) as e:
except (TypeError, QiitaDBError, QiitaDBColumnError,
QiitaDBExecutionError, QiitaDBDuplicateError, IOError,
ValueError, CParserError) as e:
pt_id = None
# Some error occurred while processing the prep template
# Show the error to the user so he can fix the template
Expand Down