diff --git a/qiita_db/metadata_template/__init__.py b/qiita_db/metadata_template/__init__.py
index ece9a5ff0..193ad1ce4 100644
--- a/qiita_db/metadata_template/__init__.py
+++ b/qiita_db/metadata_template/__init__.py
@@ -8,12 +8,13 @@
from .sample_template import SampleTemplate
from .prep_template import PrepTemplate
-from .util import load_template_to_dataframe
+from .util import load_template_to_dataframe, looks_like_qiime_mapping_file
from .constants import (TARGET_GENE_DATA_TYPES, SAMPLE_TEMPLATE_COLUMNS,
PREP_TEMPLATE_COLUMNS,
- PREP_TEMPLATE_COLUMNS_TARGET_GENE)
+ PREP_TEMPLATE_COLUMNS_TARGET_GENE, CONTROLLED_COLS)
__all__ = ['SampleTemplate', 'PrepTemplate', 'load_template_to_dataframe',
'TARGET_GENE_DATA_TYPES', 'SAMPLE_TEMPLATE_COLUMNS',
- 'PREP_TEMPLATE_COLUMNS', 'PREP_TEMPLATE_COLUMNS_TARGET_GENE']
+ 'PREP_TEMPLATE_COLUMNS', 'PREP_TEMPLATE_COLUMNS_TARGET_GENE',
+ 'CONTROLLED_COLS', 'looks_like_qiime_mapping_file']
diff --git a/qiita_db/metadata_template/test/test_util.py b/qiita_db/metadata_template/test/test_util.py
index 199269495..95a61701c 100644
--- a/qiita_db/metadata_template/test/test_util.py
+++ b/qiita_db/metadata_template/test/test_util.py
@@ -17,7 +17,8 @@
QiitaDBError)
from qiita_db.metadata_template.util import (
get_datatypes, as_python_types, prefix_sample_names_with_id,
- load_template_to_dataframe, get_invalid_sample_names)
+ load_template_to_dataframe, get_invalid_sample_names,
+ looks_like_qiime_mapping_file, _parse_mapping_file)
class TestUtil(TestCase):
@@ -64,6 +65,17 @@ def test_load_template_to_dataframe(self):
exp.index.name = 'sample_name'
assert_frame_equal(obs, exp)
+ def test_load_template_to_dataframe_qiime_map(self):
+ obs = load_template_to_dataframe(StringIO(QIIME_TUTORIAL_MAP_SUBSET),
+ index='#SampleID')
+ exp = pd.DataFrame.from_dict(QIIME_TUTORIAL_MAP_DICT_FORM)
+ exp.index.name = 'SampleID'
+ obs.sort_index(axis=0, inplace=True)
+ obs.sort_index(axis=1, inplace=True)
+ exp.sort_index(axis=0, inplace=True)
+ exp.sort_index(axis=1, inplace=True)
+ assert_frame_equal(obs, exp)
+
def test_load_template_to_dataframe_duplicate_cols(self):
obs = load_template_to_dataframe(
StringIO(EXP_SAMPLE_TEMPLATE_DUPE_COLS))
@@ -218,6 +230,43 @@ def test_invalid_lat_long(self):
# prevent flake8 from complaining
str(obs)
+ def test_looks_like_qiime_mapping_file(self):
+ obs = looks_like_qiime_mapping_file(
+ StringIO(EXP_SAMPLE_TEMPLATE))
+ self.assertFalse(obs)
+
+ obs = looks_like_qiime_mapping_file(
+ StringIO(QIIME_TUTORIAL_MAP_SUBSET))
+ self.assertTrue(obs)
+
+ obs = looks_like_qiime_mapping_file(StringIO())
+ self.assertFalse(obs)
+
+ def test_parse_mapping_file(self):
+ # Tests ported over from QIIME
+ s1 = ['#sample\ta\tb', '#comment line to skip',
+ 'x \t y \t z ', ' ', '#more skip', 'i\tj\tk']
+ exp = ([['x', 'y', 'z'], ['i', 'j', 'k']],
+ ['sample', 'a', 'b'],
+ ['comment line to skip', 'more skip'])
+ obs = _parse_mapping_file(s1)
+ self.assertEqual(obs, exp)
+
+ # check that we strip double quotes by default
+ s2 = ['#sample\ta\tb', '#comment line to skip',
+ '"x "\t" y "\t z ', ' ', '"#more skip"', 'i\t"j"\tk']
+ obs = _parse_mapping_file(s2)
+ self.assertEqual(obs, exp)
+
+
+QIIME_TUTORIAL_MAP_SUBSET = (
+ "#SampleID\tBarcodeSequence\tLinkerPrimerSequence\tTreatment\tDOB\t"
+ "Description\n"
+ "PC.354\tAGCACGAGCCTA\tYATGCTGCCTCCCGTAGGAGT\tControl\t20061218\t"
+ "Control_mouse_I.D._354\n"
+ "PC.607\tAACTGTGCGTAC\tYATGCTGCCTCCCGTAGGAGT\tFast\t20071112\t"
+ "Fasting_mouse_I.D._607\n"
+)
EXP_SAMPLE_TEMPLATE = (
"sample_name\tcollection_timestamp\tdescription\thas_extracted_data\t"
@@ -685,6 +734,19 @@ def test_invalid_lat_long(self):
'2.Sample3': 'type1'},
'str_column': {'2.Sample1': 'NA', '2.Sample2': 'NA', '2.Sample3': 'NA'}}
+QIIME_TUTORIAL_MAP_DICT_FORM = {
+ 'BarcodeSequence': {'PC.354': 'AGCACGAGCCTA',
+ 'PC.607': 'AACTGTGCGTAC'},
+ 'LinkerPrimerSequence': {'PC.354': 'YATGCTGCCTCCCGTAGGAGT',
+ 'PC.607': 'YATGCTGCCTCCCGTAGGAGT'},
+ 'Treatment': {'PC.354': 'Control',
+ 'PC.607': 'Fast'},
+ 'DOB': {'PC.354': 20061218,
+ 'PC.607': 20071112},
+ 'Description': {'PC.354': 'Control_mouse_I.D._354',
+ 'PC.607': 'Fasting_mouse_I.D._607'}
+}
+
EXP_PREP_TEMPLATE = (
'sample_name\tbarcodesequence\tcenter_name\tcenter_project_name\t'
'ebi_submission_accession\temp_status\texperiment_design_description\t'
diff --git a/qiita_db/metadata_template/util.py b/qiita_db/metadata_template/util.py
index b6a6521fc..25b6e8b5e 100644
--- a/qiita_db/metadata_template/util.py
+++ b/qiita_db/metadata_template/util.py
@@ -122,8 +122,8 @@ def prefix_sample_names_with_id(md_template, study_id):
md_template.index.name = None
-def load_template_to_dataframe(fn, strip_whitespace=True):
- """Load a sample or a prep template into a data frame
+def load_template_to_dataframe(fn, strip_whitespace=True, index='sample_name'):
+ """Load a sample/prep template or a QIIME mapping file into a data frame
Parameters
----------
@@ -132,6 +132,8 @@ def load_template_to_dataframe(fn, strip_whitespace=True):
strip_whitespace : bool, optional
Defaults to True. Whether or not to strip whitespace from values in the
input file
+ index : str, optional
+ Defaults to 'sample_name'. The index to use in the loaded information
Returns
-------
@@ -167,6 +169,8 @@ def load_template_to_dataframe(fn, strip_whitespace=True):
+=======================+==============+
| sample_name | str |
+-----------------------+--------------+
+ | #SampleID | str |
+ +-----------------------+--------------+
| physical_location | str |
+-----------------------+--------------+
| has_physical_specimen | bool |
@@ -203,6 +207,17 @@ def load_template_to_dataframe(fn, strip_whitespace=True):
controlled_cols.update(CONTROLLED_COLS)
holdfile[0] = '\t'.join(c.lower() if c.lower() in controlled_cols else c
for c in cols)
+
+ if index == "#SampleID":
+ # We're going to parse a QIIME mapping file. We are going to first
+ # parse it with the QIIME function so we can remove the comments
+ # easily and make sure that QIIME will accept this as a mapping file
+ data, headers, comments = _parse_mapping_file(holdfile)
+ holdfile = ["%s\n" % '\t'.join(d) for d in data]
+ holdfile.insert(0, "%s\n" % '\t'.join(headers))
+ # The QIIME parser fixes the index and removes the #
+ index = 'SampleID'
+
# index_col:
# is set as False, otherwise it is cast as a float and we want a string
# keep_default:
@@ -224,7 +239,7 @@ def load_template_to_dataframe(fn, strip_whitespace=True):
keep_default_na=False, na_values=[''],
parse_dates=True, index_col=False, comment='\t',
mangle_dupe_cols=False, converters={
- 'sample_name': lambda x: str(x).strip(),
+ index: lambda x: str(x).strip(),
# required sample template information
'physical_location': str,
'sample_type': str,
@@ -263,21 +278,22 @@ def load_template_to_dataframe(fn, strip_whitespace=True):
initial_columns = set(template.columns)
- if 'sample_name' not in template.columns:
- raise QiitaDBColumnError("The 'sample_name' column is missing from "
- "your template, this file cannot be parsed.")
+ if index not in template.columns:
+ raise QiitaDBColumnError("The '%s' column is missing from "
+ "your template, this file cannot be parsed."
+ % index)
# remove rows that have no sample identifier but that may have other data
# in the rest of the columns
- template.dropna(subset=['sample_name'], how='all', inplace=True)
+ template.dropna(subset=[index], how='all', inplace=True)
# set the sample name as the index
- template.set_index('sample_name', inplace=True)
+ template.set_index(index, inplace=True)
# it is not uncommon to find templates that have empty columns
template.dropna(how='all', axis=1, inplace=True)
- initial_columns.remove('sample_name')
+ initial_columns.remove(index)
dropped_cols = initial_columns - set(template.columns)
if dropped_cols:
warnings.warn('The following column(s) were removed from the template '
@@ -315,3 +331,119 @@ def get_invalid_sample_names(sample_names):
inv.append(s)
return inv
+
+
+def looks_like_qiime_mapping_file(fp):
+ """Checks if the file looks like a QIIME mapping file
+
+ Parameters
+ ----------
+ fp : str or file-like object
+ filepath to check if it looks like a QIIME mapping file
+
+ Returns
+ -------
+ bool
+ True if fp looks like a QIIME mapping file, false otherwise.
+
+
+ Notes
+ -----
+ This is not doing a validation of the QIIME mapping file. It simply checks
+ the first line in the file and it returns true if the line starts with
+ '#SampleID', since a sample/prep template will start with 'sample_name' or
+ some other different column.
+ """
+ first_line = None
+ with open_file(fp, mode='U') as f:
+ first_line = f.readline()
+ if not first_line:
+ return False
+
+ first_col = first_line.split()[0]
+ return first_col == '#SampleID'
+
+
+def _parse_mapping_file(lines, strip_quotes=True, suppress_stripping=False):
+ """Parser for map file that relates samples to metadata.
+
+ Format: header line with fields
+ optionally other comment lines starting with #
+ tab-delimited fields
+
+ Parameters
+ ----------
+ lines : iterable of str
+ The contents of the QIIME mapping file
+ strip_quotes : bool, optional
+ Defaults to true. If true, quotes are removed from the data
+ suppress_stripping : bool, optional
+ Defaults to false. If true, spaces are not stripped
+
+ Returns
+ -------
+ list of lists, list of str, list of str
+ The data in the mapping file, the headers and the comments
+
+ Raises
+ ------
+ QiitaDBError
+ If there is any error parsing the mapping file
+
+ Notes
+ -----
+ This code has been ported from QIIME.
+ """
+ if strip_quotes:
+ if suppress_stripping:
+ # remove quotes but not spaces
+
+ def strip_f(x):
+ return x.replace('"', '')
+ else:
+ # remove quotes and spaces
+
+ def strip_f(x):
+ return x.replace('"', '').strip()
+ else:
+ if suppress_stripping:
+ # don't remove quotes or spaces
+
+ def strip_f(x):
+ return x
+ else:
+ # remove spaces but not quotes
+
+ def strip_f(x):
+ return x.strip()
+
+ # Create lists to store the results
+ mapping_data = []
+ header = []
+ comments = []
+
+ # Begin iterating over lines
+ for line in lines:
+ line = strip_f(line)
+ if not line or (suppress_stripping and not line.strip()):
+ # skip blank lines when not stripping lines
+ continue
+
+ if line.startswith('#'):
+ line = line[1:]
+ if not header:
+ header = line.strip().split('\t')
+ else:
+ comments.append(line)
+ else:
+ # Will add empty string to empty fields
+ tmp_line = map(strip_f, line.split('\t'))
+ if len(tmp_line) < len(header):
+ tmp_line.extend([''] * (len(header) - len(tmp_line)))
+ mapping_data.append(tmp_line)
+ if not header:
+ raise QiitaDBError("No header line was found in mapping file.")
+ if not mapping_data:
+ raise QiitaDBError("No data found in mapping file.")
+
+ return mapping_data, header, comments
diff --git a/qiita_pet/handlers/study_handlers/description_handlers.py b/qiita_pet/handlers/study_handlers/description_handlers.py
index ebdc85a1a..e5ea8c16a 100644
--- a/qiita_pet/handlers/study_handlers/description_handlers.py
+++ b/qiita_pet/handlers/study_handlers/description_handlers.py
@@ -23,11 +23,15 @@
from qiita_db.ontology import Ontology
from qiita_db.metadata_template import (PrepTemplate, SampleTemplate,
load_template_to_dataframe,
- SAMPLE_TEMPLATE_COLUMNS)
+ SAMPLE_TEMPLATE_COLUMNS,
+ looks_like_qiime_mapping_file)
from qiita_db.util import convert_to_id, get_mountpoint
from qiita_db.exceptions import (QiitaDBUnknownIDError, QiitaDBColumnError,
QiitaDBExecutionError, QiitaDBDuplicateError,
QiitaDBDuplicateHeaderError, QiitaDBError)
+from qiita_ware.metadata_pipeline import (
+ create_templates_from_qiime_mapping_file)
+from qiita_ware.exceptions import QiitaWareError
from qiita_pet.handlers.base_handlers import BaseHandler
from qiita_pet.handlers.util import check_access
from qiita_pet.handlers.study_handlers.listing_handlers import (
@@ -161,13 +165,11 @@ def process_sample_template(self, study, user, callback):
HTTPError
If the sample template file does not exists
"""
- # If we are on this function, the argument "sample_template" must
- # defined. If not, let tornado raise its error
+ # If we are on this function, the arguments "sample_template" and
+ # "data_type" must be defined. If not, let tornado raise its error
sample_template = self.get_argument('sample_template')
+ data_type = self.get_argument('data_type')
- # Define here the message and message level in case of success
- msg = "The sample template '%s' has been added" % sample_template
- msg_level = "success"
# Get the uploads folder
_, base_fp = get_mountpoint("uploads")[0]
# Get the path of the sample template in the uploads folder
@@ -177,25 +179,35 @@ def process_sample_template(self, study, user, callback):
# The file does not exist, fail nicely
raise HTTPError(404, "This file doesn't exist: %s" % fp_rsp)
+ # Define here the message and message level in case of success
+ msg = "The sample template '%s' has been added" % sample_template
+ msg_level = "success"
+ is_mapping_file = looks_like_qiime_mapping_file(fp_rsp)
+
try:
with warnings.catch_warnings(record=True) as warns:
# deleting previous uploads and inserting new one
self.remove_add_study_template(study.raw_data, study.id,
- fp_rsp)
+ fp_rsp, data_type,
+ is_mapping_file)
- # join all the warning messages into one. Note that this info
- # will be ignored if an exception is raised
+ # join all the warning messages into one. Note that this
+ # info will be ignored if an exception is raised
if warns:
msg = '; '.join([str(w.message) for w in warns])
msg_level = 'warning'
except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
QiitaDBDuplicateError, IOError, ValueError, KeyError,
- CParserError, QiitaDBDuplicateHeaderError, QiitaDBError) as e:
+ CParserError, QiitaDBDuplicateHeaderError,
+ QiitaDBError, QiitaWareError) as e:
# Some error occurred while processing the sample template
# Show the error to the user so they can fix the template
- msg = html_error_message % ('parsing the sample template:',
- basename(fp_rsp), str(e))
+ error_msg = ('parsing the QIIME mapping file'
+ if is_mapping_file
+ else 'parsing the sample template')
+ msg = html_error_message % (error_msg, basename(fp_rsp),
+ str(e))
msg_level = "danger"
callback((msg, msg_level, None, None, None))
@@ -564,9 +576,14 @@ def unspecified_action(self, study, user, callback):
msg_level = 'danger'
callback((msg, msg_level, 'study_information_tab', None, None))
- def remove_add_study_template(self, raw_data, study_id, fp_rsp):
+ def remove_add_study_template(self, raw_data, study_id, fp_rsp, data_type,
+ is_mapping_file):
"""Replace prep templates, raw data, and sample template with a new one
"""
+ if is_mapping_file and data_type == "":
+ raise ValueError("Please, choose a data type if uploading a QIIME "
+ "mapping file")
+
for rd in raw_data():
rd = RawData(rd)
for pt in rd.prep_templates:
@@ -575,8 +592,13 @@ def remove_add_study_template(self, raw_data, study_id, fp_rsp):
if SampleTemplate.exists(study_id):
SampleTemplate.delete(study_id)
- SampleTemplate.create(load_template_to_dataframe(fp_rsp),
- Study(study_id))
+ if is_mapping_file:
+ create_templates_from_qiime_mapping_file(fp_rsp, Study(study_id),
+ int(data_type))
+ else:
+ SampleTemplate.create(load_template_to_dataframe(fp_rsp),
+ Study(study_id))
+
remove(fp_rsp)
def remove_add_prep_template(self, fp_rpt, study, data_type_id,
@@ -616,6 +638,9 @@ def display_template(self, study, user, msg, msg_level, full_access,
# modify the information of the study
show_edit_btn = study_status != 'public' or user_level == 'admin'
+ # Make the error message suitable for html
+ msg = msg.replace('\n', "
")
+
self.render('study_description.html',
message=msg,
level=msg_level,
diff --git a/qiita_pet/templates/study_description.html b/qiita_pet/templates/study_description.html
index a9f564b45..c8d430ace 100644
--- a/qiita_pet/templates/study_description.html
+++ b/qiita_pet/templates/study_description.html
@@ -67,6 +67,10 @@
.attr("type", "hidden")
.attr("name", "sample_template")
.attr("value", $("#sample_template").val()))
+ .append($("")
+ .attr("type", "hidden")
+ .attr("name", "data_type")
+ .attr("value", $("#qiime_data_type").val()))
.append($("")
.attr("type", "hidden")
.attr("name", "action")
diff --git a/qiita_pet/templates/study_description_templates/study_information_tab.html b/qiita_pet/templates/study_description_templates/study_information_tab.html
index 227ce8d23..5290d338a 100644
--- a/qiita_pet/templates/study_description_templates/study_information_tab.html
+++ b/qiita_pet/templates/study_description_templates/study_information_tab.html
@@ -12,15 +12,25 @@