diff --git a/qiita_db/metadata_template/__init__.py b/qiita_db/metadata_template/__init__.py index ece9a5ff0..193ad1ce4 100644 --- a/qiita_db/metadata_template/__init__.py +++ b/qiita_db/metadata_template/__init__.py @@ -8,12 +8,13 @@ from .sample_template import SampleTemplate from .prep_template import PrepTemplate -from .util import load_template_to_dataframe +from .util import load_template_to_dataframe, looks_like_qiime_mapping_file from .constants import (TARGET_GENE_DATA_TYPES, SAMPLE_TEMPLATE_COLUMNS, PREP_TEMPLATE_COLUMNS, - PREP_TEMPLATE_COLUMNS_TARGET_GENE) + PREP_TEMPLATE_COLUMNS_TARGET_GENE, CONTROLLED_COLS) __all__ = ['SampleTemplate', 'PrepTemplate', 'load_template_to_dataframe', 'TARGET_GENE_DATA_TYPES', 'SAMPLE_TEMPLATE_COLUMNS', - 'PREP_TEMPLATE_COLUMNS', 'PREP_TEMPLATE_COLUMNS_TARGET_GENE'] + 'PREP_TEMPLATE_COLUMNS', 'PREP_TEMPLATE_COLUMNS_TARGET_GENE', + 'CONTROLLED_COLS', 'looks_like_qiime_mapping_file'] diff --git a/qiita_db/metadata_template/test/test_util.py b/qiita_db/metadata_template/test/test_util.py index 199269495..95a61701c 100644 --- a/qiita_db/metadata_template/test/test_util.py +++ b/qiita_db/metadata_template/test/test_util.py @@ -17,7 +17,8 @@ QiitaDBError) from qiita_db.metadata_template.util import ( get_datatypes, as_python_types, prefix_sample_names_with_id, - load_template_to_dataframe, get_invalid_sample_names) + load_template_to_dataframe, get_invalid_sample_names, + looks_like_qiime_mapping_file, _parse_mapping_file) class TestUtil(TestCase): @@ -64,6 +65,17 @@ def test_load_template_to_dataframe(self): exp.index.name = 'sample_name' assert_frame_equal(obs, exp) + def test_load_template_to_dataframe_qiime_map(self): + obs = load_template_to_dataframe(StringIO(QIIME_TUTORIAL_MAP_SUBSET), + index='#SampleID') + exp = pd.DataFrame.from_dict(QIIME_TUTORIAL_MAP_DICT_FORM) + exp.index.name = 'SampleID' + obs.sort_index(axis=0, inplace=True) + obs.sort_index(axis=1, inplace=True) + exp.sort_index(axis=0, inplace=True) + exp.sort_index(axis=1, inplace=True) + assert_frame_equal(obs, exp) + def test_load_template_to_dataframe_duplicate_cols(self): obs = load_template_to_dataframe( StringIO(EXP_SAMPLE_TEMPLATE_DUPE_COLS)) @@ -218,6 +230,43 @@ def test_invalid_lat_long(self): # prevent flake8 from complaining str(obs) + def test_looks_like_qiime_mapping_file(self): + obs = looks_like_qiime_mapping_file( + StringIO(EXP_SAMPLE_TEMPLATE)) + self.assertFalse(obs) + + obs = looks_like_qiime_mapping_file( + StringIO(QIIME_TUTORIAL_MAP_SUBSET)) + self.assertTrue(obs) + + obs = looks_like_qiime_mapping_file(StringIO()) + self.assertFalse(obs) + + def test_parse_mapping_file(self): + # Tests ported over from QIIME + s1 = ['#sample\ta\tb', '#comment line to skip', + 'x \t y \t z ', ' ', '#more skip', 'i\tj\tk'] + exp = ([['x', 'y', 'z'], ['i', 'j', 'k']], + ['sample', 'a', 'b'], + ['comment line to skip', 'more skip']) + obs = _parse_mapping_file(s1) + self.assertEqual(obs, exp) + + # check that we strip double quotes by default + s2 = ['#sample\ta\tb', '#comment line to skip', + '"x "\t" y "\t z ', ' ', '"#more skip"', 'i\t"j"\tk'] + obs = _parse_mapping_file(s2) + self.assertEqual(obs, exp) + + +QIIME_TUTORIAL_MAP_SUBSET = ( + "#SampleID\tBarcodeSequence\tLinkerPrimerSequence\tTreatment\tDOB\t" + "Description\n" + "PC.354\tAGCACGAGCCTA\tYATGCTGCCTCCCGTAGGAGT\tControl\t20061218\t" + "Control_mouse_I.D._354\n" + "PC.607\tAACTGTGCGTAC\tYATGCTGCCTCCCGTAGGAGT\tFast\t20071112\t" + "Fasting_mouse_I.D._607\n" +) EXP_SAMPLE_TEMPLATE = ( "sample_name\tcollection_timestamp\tdescription\thas_extracted_data\t" @@ -685,6 +734,19 @@ def test_invalid_lat_long(self): '2.Sample3': 'type1'}, 'str_column': {'2.Sample1': 'NA', '2.Sample2': 'NA', '2.Sample3': 'NA'}} +QIIME_TUTORIAL_MAP_DICT_FORM = { + 'BarcodeSequence': {'PC.354': 'AGCACGAGCCTA', + 'PC.607': 'AACTGTGCGTAC'}, + 'LinkerPrimerSequence': {'PC.354': 'YATGCTGCCTCCCGTAGGAGT', + 'PC.607': 'YATGCTGCCTCCCGTAGGAGT'}, + 'Treatment': {'PC.354': 'Control', + 'PC.607': 'Fast'}, + 'DOB': {'PC.354': 20061218, + 'PC.607': 20071112}, + 'Description': {'PC.354': 'Control_mouse_I.D._354', + 'PC.607': 'Fasting_mouse_I.D._607'} +} + EXP_PREP_TEMPLATE = ( 'sample_name\tbarcodesequence\tcenter_name\tcenter_project_name\t' 'ebi_submission_accession\temp_status\texperiment_design_description\t' diff --git a/qiita_db/metadata_template/util.py b/qiita_db/metadata_template/util.py index b6a6521fc..25b6e8b5e 100644 --- a/qiita_db/metadata_template/util.py +++ b/qiita_db/metadata_template/util.py @@ -122,8 +122,8 @@ def prefix_sample_names_with_id(md_template, study_id): md_template.index.name = None -def load_template_to_dataframe(fn, strip_whitespace=True): - """Load a sample or a prep template into a data frame +def load_template_to_dataframe(fn, strip_whitespace=True, index='sample_name'): + """Load a sample/prep template or a QIIME mapping file into a data frame Parameters ---------- @@ -132,6 +132,8 @@ def load_template_to_dataframe(fn, strip_whitespace=True): strip_whitespace : bool, optional Defaults to True. Whether or not to strip whitespace from values in the input file + index : str, optional + Defaults to 'sample_name'. The index to use in the loaded information Returns ------- @@ -167,6 +169,8 @@ def load_template_to_dataframe(fn, strip_whitespace=True): +=======================+==============+ | sample_name | str | +-----------------------+--------------+ + | #SampleID | str | + +-----------------------+--------------+ | physical_location | str | +-----------------------+--------------+ | has_physical_specimen | bool | @@ -203,6 +207,17 @@ def load_template_to_dataframe(fn, strip_whitespace=True): controlled_cols.update(CONTROLLED_COLS) holdfile[0] = '\t'.join(c.lower() if c.lower() in controlled_cols else c for c in cols) + + if index == "#SampleID": + # We're going to parse a QIIME mapping file. We are going to first + # parse it with the QIIME function so we can remove the comments + # easily and make sure that QIIME will accept this as a mapping file + data, headers, comments = _parse_mapping_file(holdfile) + holdfile = ["%s\n" % '\t'.join(d) for d in data] + holdfile.insert(0, "%s\n" % '\t'.join(headers)) + # The QIIME parser fixes the index and removes the # + index = 'SampleID' + # index_col: # is set as False, otherwise it is cast as a float and we want a string # keep_default: @@ -224,7 +239,7 @@ def load_template_to_dataframe(fn, strip_whitespace=True): keep_default_na=False, na_values=[''], parse_dates=True, index_col=False, comment='\t', mangle_dupe_cols=False, converters={ - 'sample_name': lambda x: str(x).strip(), + index: lambda x: str(x).strip(), # required sample template information 'physical_location': str, 'sample_type': str, @@ -263,21 +278,22 @@ def load_template_to_dataframe(fn, strip_whitespace=True): initial_columns = set(template.columns) - if 'sample_name' not in template.columns: - raise QiitaDBColumnError("The 'sample_name' column is missing from " - "your template, this file cannot be parsed.") + if index not in template.columns: + raise QiitaDBColumnError("The '%s' column is missing from " + "your template, this file cannot be parsed." + % index) # remove rows that have no sample identifier but that may have other data # in the rest of the columns - template.dropna(subset=['sample_name'], how='all', inplace=True) + template.dropna(subset=[index], how='all', inplace=True) # set the sample name as the index - template.set_index('sample_name', inplace=True) + template.set_index(index, inplace=True) # it is not uncommon to find templates that have empty columns template.dropna(how='all', axis=1, inplace=True) - initial_columns.remove('sample_name') + initial_columns.remove(index) dropped_cols = initial_columns - set(template.columns) if dropped_cols: warnings.warn('The following column(s) were removed from the template ' @@ -315,3 +331,119 @@ def get_invalid_sample_names(sample_names): inv.append(s) return inv + + +def looks_like_qiime_mapping_file(fp): + """Checks if the file looks like a QIIME mapping file + + Parameters + ---------- + fp : str or file-like object + filepath to check if it looks like a QIIME mapping file + + Returns + ------- + bool + True if fp looks like a QIIME mapping file, false otherwise. + + + Notes + ----- + This is not doing a validation of the QIIME mapping file. It simply checks + the first line in the file and it returns true if the line starts with + '#SampleID', since a sample/prep template will start with 'sample_name' or + some other different column. + """ + first_line = None + with open_file(fp, mode='U') as f: + first_line = f.readline() + if not first_line: + return False + + first_col = first_line.split()[0] + return first_col == '#SampleID' + + +def _parse_mapping_file(lines, strip_quotes=True, suppress_stripping=False): + """Parser for map file that relates samples to metadata. + + Format: header line with fields + optionally other comment lines starting with # + tab-delimited fields + + Parameters + ---------- + lines : iterable of str + The contents of the QIIME mapping file + strip_quotes : bool, optional + Defaults to true. If true, quotes are removed from the data + suppress_stripping : bool, optional + Defaults to false. If true, spaces are not stripped + + Returns + ------- + list of lists, list of str, list of str + The data in the mapping file, the headers and the comments + + Raises + ------ + QiitaDBError + If there is any error parsing the mapping file + + Notes + ----- + This code has been ported from QIIME. + """ + if strip_quotes: + if suppress_stripping: + # remove quotes but not spaces + + def strip_f(x): + return x.replace('"', '') + else: + # remove quotes and spaces + + def strip_f(x): + return x.replace('"', '').strip() + else: + if suppress_stripping: + # don't remove quotes or spaces + + def strip_f(x): + return x + else: + # remove spaces but not quotes + + def strip_f(x): + return x.strip() + + # Create lists to store the results + mapping_data = [] + header = [] + comments = [] + + # Begin iterating over lines + for line in lines: + line = strip_f(line) + if not line or (suppress_stripping and not line.strip()): + # skip blank lines when not stripping lines + continue + + if line.startswith('#'): + line = line[1:] + if not header: + header = line.strip().split('\t') + else: + comments.append(line) + else: + # Will add empty string to empty fields + tmp_line = map(strip_f, line.split('\t')) + if len(tmp_line) < len(header): + tmp_line.extend([''] * (len(header) - len(tmp_line))) + mapping_data.append(tmp_line) + if not header: + raise QiitaDBError("No header line was found in mapping file.") + if not mapping_data: + raise QiitaDBError("No data found in mapping file.") + + return mapping_data, header, comments diff --git a/qiita_pet/handlers/study_handlers/description_handlers.py b/qiita_pet/handlers/study_handlers/description_handlers.py index ebdc85a1a..e5ea8c16a 100644 --- a/qiita_pet/handlers/study_handlers/description_handlers.py +++ b/qiita_pet/handlers/study_handlers/description_handlers.py @@ -23,11 +23,15 @@ from qiita_db.ontology import Ontology from qiita_db.metadata_template import (PrepTemplate, SampleTemplate, load_template_to_dataframe, - SAMPLE_TEMPLATE_COLUMNS) + SAMPLE_TEMPLATE_COLUMNS, + looks_like_qiime_mapping_file) from qiita_db.util import convert_to_id, get_mountpoint from qiita_db.exceptions import (QiitaDBUnknownIDError, QiitaDBColumnError, QiitaDBExecutionError, QiitaDBDuplicateError, QiitaDBDuplicateHeaderError, QiitaDBError) +from qiita_ware.metadata_pipeline import ( + create_templates_from_qiime_mapping_file) +from qiita_ware.exceptions import QiitaWareError from qiita_pet.handlers.base_handlers import BaseHandler from qiita_pet.handlers.util import check_access from qiita_pet.handlers.study_handlers.listing_handlers import ( @@ -161,13 +165,11 @@ def process_sample_template(self, study, user, callback): HTTPError If the sample template file does not exists """ - # If we are on this function, the argument "sample_template" must - # defined. If not, let tornado raise its error + # If we are on this function, the arguments "sample_template" and + # "data_type" must be defined. If not, let tornado raise its error sample_template = self.get_argument('sample_template') + data_type = self.get_argument('data_type') - # Define here the message and message level in case of success - msg = "The sample template '%s' has been added" % sample_template - msg_level = "success" # Get the uploads folder _, base_fp = get_mountpoint("uploads")[0] # Get the path of the sample template in the uploads folder @@ -177,25 +179,35 @@ def process_sample_template(self, study, user, callback): # The file does not exist, fail nicely raise HTTPError(404, "This file doesn't exist: %s" % fp_rsp) + # Define here the message and message level in case of success + msg = "The sample template '%s' has been added" % sample_template + msg_level = "success" + is_mapping_file = looks_like_qiime_mapping_file(fp_rsp) + try: with warnings.catch_warnings(record=True) as warns: # deleting previous uploads and inserting new one self.remove_add_study_template(study.raw_data, study.id, - fp_rsp) + fp_rsp, data_type, + is_mapping_file) - # join all the warning messages into one. Note that this info - # will be ignored if an exception is raised + # join all the warning messages into one. Note that this + # info will be ignored if an exception is raised if warns: msg = '; '.join([str(w.message) for w in warns]) msg_level = 'warning' except (TypeError, QiitaDBColumnError, QiitaDBExecutionError, QiitaDBDuplicateError, IOError, ValueError, KeyError, - CParserError, QiitaDBDuplicateHeaderError, QiitaDBError) as e: + CParserError, QiitaDBDuplicateHeaderError, + QiitaDBError, QiitaWareError) as e: # Some error occurred while processing the sample template # Show the error to the user so they can fix the template - msg = html_error_message % ('parsing the sample template:', - basename(fp_rsp), str(e)) + error_msg = ('parsing the QIIME mapping file' + if is_mapping_file + else 'parsing the sample template') + msg = html_error_message % (error_msg, basename(fp_rsp), + str(e)) msg_level = "danger" callback((msg, msg_level, None, None, None)) @@ -564,9 +576,14 @@ def unspecified_action(self, study, user, callback): msg_level = 'danger' callback((msg, msg_level, 'study_information_tab', None, None)) - def remove_add_study_template(self, raw_data, study_id, fp_rsp): + def remove_add_study_template(self, raw_data, study_id, fp_rsp, data_type, + is_mapping_file): """Replace prep templates, raw data, and sample template with a new one """ + if is_mapping_file and data_type == "": + raise ValueError("Please, choose a data type if uploading a QIIME " + "mapping file") + for rd in raw_data(): rd = RawData(rd) for pt in rd.prep_templates: @@ -575,8 +592,13 @@ def remove_add_study_template(self, raw_data, study_id, fp_rsp): if SampleTemplate.exists(study_id): SampleTemplate.delete(study_id) - SampleTemplate.create(load_template_to_dataframe(fp_rsp), - Study(study_id)) + if is_mapping_file: + create_templates_from_qiime_mapping_file(fp_rsp, Study(study_id), + int(data_type)) + else: + SampleTemplate.create(load_template_to_dataframe(fp_rsp), + Study(study_id)) + remove(fp_rsp) def remove_add_prep_template(self, fp_rpt, study, data_type_id, @@ -616,6 +638,9 @@ def display_template(self, study, user, msg, msg_level, full_access, # modify the information of the study show_edit_btn = study_status != 'public' or user_level == 'admin' + # Make the error message suitable for html + msg = msg.replace('\n', "
") + self.render('study_description.html', message=msg, level=msg_level, diff --git a/qiita_pet/templates/study_description.html b/qiita_pet/templates/study_description.html index a9f564b45..c8d430ace 100644 --- a/qiita_pet/templates/study_description.html +++ b/qiita_pet/templates/study_description.html @@ -67,6 +67,10 @@ .attr("type", "hidden") .attr("name", "sample_template") .attr("value", $("#sample_template").val())) + .append($("") + .attr("type", "hidden") + .attr("name", "data_type") + .attr("value", $("#qiime_data_type").val())) .append($("") .attr("type", "hidden") .attr("name", "action") diff --git a/qiita_pet/templates/study_description_templates/study_information_tab.html b/qiita_pet/templates/study_description_templates/study_information_tab.html index 227ce8d23..5290d338a 100644 --- a/qiita_pet/templates/study_description_templates/study_information_tab.html +++ b/qiita_pet/templates/study_description_templates/study_information_tab.html @@ -12,15 +12,25 @@
{% if show_select_sample %} - Select your sample template
+ Select your sample template or, alternatively, a QIIME mapping file
(only files with the "txt" and "tsv" file extensions will be displayed here): + +
+ + {% if not sample_templates %} + If you are uploading a QIIME mapping file, please choose a data type: + + {% end %} {% end %}
diff --git a/qiita_pet/uimodules/study_information_tab.py b/qiita_pet/uimodules/study_information_tab.py index 5d2290269..b2e1c154e 100644 --- a/qiita_pet/uimodules/study_information_tab.py +++ b/qiita_pet/uimodules/study_information_tab.py @@ -7,8 +7,11 @@ # ----------------------------------------------------------------------------- from functools import partial +from operator import itemgetter -from qiita_db.util import get_files_from_uploads_folders +from future.utils import viewitems + +from qiita_db.util import get_files_from_uploads_folders, get_data_types from qiita_db.study import StudyPerson from qiita_db.metadata_template import SampleTemplate from qiita_pet.util import linkify @@ -34,10 +37,13 @@ def render(self, study): number_samples_promised = study_info['number_samples_promised'] number_samples_collected = study_info['number_samples_collected'] metadata_complete = study_info['metadata_complete'] + data_types = sorted(viewitems(get_data_types()), key=itemgetter(1)) # Retrieve the files from the uploads folder, so the user can choose - # the sample template of the study - files = [f for _, f in get_files_from_uploads_folders(str(study.id))] + # the sample template of the study. Filter them to only include the + # ones that ends with 'txt' or 'tsv'. + files = [f for _, f in get_files_from_uploads_folders(str(study.id)) + if f.endswith(('txt', 'tsv'))] # If the sample template exists, retrieve all its filepaths if SampleTemplate.exists(study.id): @@ -68,4 +74,5 @@ def render(self, study): files=files, study_id=study.id, sample_templates=sample_templates, - is_local_request=is_local_request) + is_local_request=is_local_request, + data_types=data_types) diff --git a/qiita_ware/metadata_pipeline.py b/qiita_ware/metadata_pipeline.py new file mode 100644 index 000000000..cbe57e243 --- /dev/null +++ b/qiita_ware/metadata_pipeline.py @@ -0,0 +1,87 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2014--, The Qiita Development Team. +# +# Distributed under the terms of the BSD 3-clause License. +# +# The full license is in the file LICENSE, distributed with this software. +# ----------------------------------------------------------------------------- + +from future.utils import viewvalues, viewkeys + +from qiita_db.metadata_template import (load_template_to_dataframe, + SampleTemplate, PrepTemplate, + PREP_TEMPLATE_COLUMNS, + PREP_TEMPLATE_COLUMNS_TARGET_GENE, + CONTROLLED_COLS, + TARGET_GENE_DATA_TYPES) +from qiita_db.util import convert_from_id +from qiita_ware.exceptions import QiitaWareError + + +def create_templates_from_qiime_mapping_file(fp, study, data_type): + """Creates a sample template and a prep template from qiime mapping file + + Parameters + ---------- + fp : str or file-like object + Path to the QIIME mapping file + study : Study + The study to which the sample template belongs to + data_type : str or int + The data_type of the prep_template + + Returns + ------- + (SampleTemplate, PrepTemplate) + The templates created from the QIIME mapping file + """ + qiime_map = load_template_to_dataframe(fp, index='#SampleID') + + # There are a few columns in the QIIME mapping file that are special and + # we know how to deal with them + rename_cols = { + 'BarcodeSequence': 'barcode', + 'LinkerPrimerSequence': 'primer', + 'Description': 'description', + } + + if 'ReverseLinkerPrimer' in qiime_map: + rename_cols['ReverseLinkerPrimer'] = 'reverselinkerprimer' + + missing = set(rename_cols).difference(qiime_map.columns) + if missing: + raise QiitaWareError( + "Error generating the templates from the QIIME mapping file. " + "Missing QIIME mapping file columns: %s" % ', '.join(missing)) + + qiime_map.rename(columns=rename_cols, inplace=True) + + # Fix the casing in the columns that we control + qiime_map.columns = [c.lower() if c.lower() in CONTROLLED_COLS else c + for c in qiime_map.columns] + + # Figure out which columns belong to the prep template + def _col_iterator(restriction_set): + for restriction in viewvalues(restriction_set): + for cols in viewkeys(restriction.columns): + yield cols + + pt_cols = set(col for col in _col_iterator(PREP_TEMPLATE_COLUMNS)) + + data_type_str = (convert_from_id(data_type, "data_type") + if isinstance(data_type, (int, long)) else data_type) + + if data_type_str in TARGET_GENE_DATA_TYPES: + pt_cols.update( + col for col in _col_iterator(PREP_TEMPLATE_COLUMNS_TARGET_GENE)) + pt_cols.add('reverselinkerprimer') + + qiime_cols = set(qiime_map.columns) + pt_cols = qiime_cols.intersection(pt_cols) + st_cols = qiime_cols.difference(pt_cols) + + st_md = qiime_map.ix[:, st_cols] + pt_md = qiime_map.ix[:, pt_cols] + + return (SampleTemplate.create(st_md, study), + PrepTemplate.create(pt_md, study, data_type)) diff --git a/qiita_ware/test/test_metadata_pipeline.py b/qiita_ware/test/test_metadata_pipeline.py new file mode 100644 index 000000000..060b2b103 --- /dev/null +++ b/qiita_ware/test/test_metadata_pipeline.py @@ -0,0 +1,160 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2014--, The Qiita Development Team. +# +# Distributed under the terms of the BSD 3-clause License. +# +# The full license is in the file LICENSE, distributed with this software. +# ----------------------------------------------------------------------------- + +from StringIO import StringIO +from unittest import TestCase, main +from os import remove +from os.path import exists + +from qiita_core.util import qiita_test_checker +from qiita_ware.exceptions import QiitaWareError +from qiita_db.study import Study, StudyPerson +from qiita_db.user import User +from qiita_db.util import get_count +from qiita_ware.metadata_pipeline import ( + create_templates_from_qiime_mapping_file) + + +@qiita_test_checker() +class TestMetadataPipeline(TestCase): + def setUp(self): + info = { + "timeseries_type_id": 1, + "metadata_complete": True, + "mixs_compliant": True, + "number_samples_collected": 25, + "number_samples_promised": 28, + "portal_type_id": 3, + "study_alias": "FCM", + "study_description": "Microbiome of people who eat nothing but " + "fried chicken", + "study_abstract": "Exploring how a high fat diet changes the " + "gut microbiome", + "emp_person_id": StudyPerson(2), + "principal_investigator_id": StudyPerson(3), + "lab_person_id": StudyPerson(1) + } + self.new_study = Study.create( + User('test@foo.bar'), "Fried Chicken Microbiome", [1], info) + self._clean_up_files = [] + + def tearDown(self): + for fp in self._clean_up_files: + if exists(fp): + remove(fp) + + def test_create_templates_from_qiime_mapping_file(self): + new_pt_id = get_count('qiita.prep_template') + 1 + obs_st, obs_pt = create_templates_from_qiime_mapping_file( + StringIO(QIIME_MAP), self.new_study, "16S") + + # Be green: clean the environment + for template in [obs_st, obs_pt]: + for _, fp in template.get_filepaths(): + self._clean_up_files.append(fp) + + self.assertEqual(obs_st.id, self.new_study.id) + self.assertEqual(obs_pt.id, new_pt_id) + + # Check that each template has the correct columns + exp = {"physical_specimen_location", "physical_specimen_remaining", + "dna_extracted", "sample_type", "host_subject_id", "latitude", + "longitude", "taxon_id", "scientific_name", + "collection_timestamp", "description"} + self.assertEqual(set(obs_st.categories()), exp) + + exp = {"barcode", "primer", "center_name", "run_prefix", "platform", + "library_construction_protocol", + "experiment_design_description"} + self.assertEqual(set(obs_pt.categories()), exp) + + def test_create_templates_from_qiime_mapping_file_reverse_linker(self): + new_pt_id = get_count('qiita.prep_template') + 1 + obs_st, obs_pt = create_templates_from_qiime_mapping_file( + StringIO(QIIME_MAP_WITH_REVERSE_LINKER_PRIMER), + self.new_study, "16S") + + # Be green: clean the environment + for template in [obs_st, obs_pt]: + for _, fp in template.get_filepaths(): + self._clean_up_files.append(fp) + + self.assertEqual(obs_st.id, self.new_study.id) + self.assertEqual(obs_pt.id, new_pt_id) + + # Check that each template has the correct columns + exp = {"physical_specimen_location", "physical_specimen_remaining", + "dna_extracted", "sample_type", "host_subject_id", "latitude", + "longitude", "taxon_id", "scientific_name", + "collection_timestamp", "description"} + self.assertEqual(set(obs_st.categories()), exp) + + exp = {"barcode", "primer", "center_name", "run_prefix", "platform", + "library_construction_protocol", + "experiment_design_description", "reverselinkerprimer"} + self.assertEqual(set(obs_pt.categories()), exp) + + def test_create_templates_from_qiime_mapping_file_error(self): + with self.assertRaises(QiitaWareError): + create_templates_from_qiime_mapping_file( + StringIO(QIIME_MAP_ERROR), self.new_study, "16S") + + +QIIME_MAP = ( + "#SampleID\tBarcodeSequence\tLinkerPrimerSequence\t" + "physical_specimen_location\tphysical_specimen_remaining\tdna_extracted\t" + "sample_type\thost_subject_id\tlatitude\tlongitude\ttaxon_id\t" + "scientific_name\tcenter_name\trun_prefix\tplatform\t" + "library_construction_protocol\texperiment_design_description\t" + "collection_timestamp\tDescription\n" + "Sample1\tGTCCGCAAGTTA\tGTGCCAGCMGCCGCGGTAA\tUCSD\tTRUE\tTRUE\ttype1\t" + "NotIdentified\t4.1\t4.1\t9606\thomo sapiens\tANL\trp_1\tILLUMINA\t" + "protocol_1\tedd_1\t05/28/15 11:00\tDescription S1\n" + "Sample2\tCGTAGAGCTCTC\tGTGCCAGCMGCCGCGGTAA\tUCSD\tTRUE\tTRUE\ttype2\t" + "NotIdentified\t4.2\t4.2\t9606\thomo sapiens\tANL\trp_1\tILLUMINA\t" + "protocol_1\tedd_1\t05/28/15 11:00\tDescription S2\n" + "Sample3\tCCTCTGAGAGCT\tGTGCCAGCMGCCGCGGTAA\tUCSD\tTRUE\tTRUE\ttype3\t" + "NotIdentified\t4.3\t4.3\t9606\thomo sapiens\tANL\trp_2\tILLUMINA\t" + "protocol_1\tedd_1\t05/28/15 11:00\tDescription S3\n") + +QIIME_MAP_WITH_REVERSE_LINKER_PRIMER = ( + "#SampleID\tBarcodeSequence\tLinkerPrimerSequence\tReverseLinkerPrimer\t" + "physical_specimen_location\tphysical_specimen_remaining\tdna_extracted\t" + "sample_type\thost_subject_id\tlatitude\tlongitude\ttaxon_id\t" + "scientific_name\tcenter_name\trun_prefix\tplatform\t" + "library_construction_protocol\texperiment_design_description\t" + "collection_timestamp\tDescription\n" + "Sample1\tGTCCGCAAGTTA\tGTGCCAGCMGCCGCGGTAA\tGTGCCAGCMGCCGCGGTAA\tUCSD\t" + "TRUE\tTRUE\ttype1\tNotIdentified\t4.1\t4.1\t9606\thomo sapiens\tANL\t" + "rp_1\tILLUMINA\tprotocol_1\tedd_1\t05/28/15 11:00\tDescription S1\n" + "Sample2\tCGTAGAGCTCTC\tGTGCCAGCMGCCGCGGTAA\tGTGCCAGCMGCCGCGGTAA\tUCSD\t" + "TRUE\tTRUE\ttype2\tNotIdentified\t4.2\t4.2\t9606\thomo sapiens\tANL\t" + "rp_1\tILLUMINA\tprotocol_1\tedd_1\t05/28/15 11:00\tDescription S2\n" + "Sample3\tCCTCTGAGAGCT\tGTGCCAGCMGCCGCGGTAA\tGTGCCAGCMGCCGCGGTAA\tUCSD\t" + "TRUE\tTRUE\ttype3\tNotIdentified\t4.3\t4.3\t9606\thomo sapiens\tANL\t" + "rp_2\tILLUMINA\tprotocol_1\tedd_1\t05/28/15 11:00\tDescription S3\n") + +QIIME_MAP_ERROR = ( + "#SampleID\tBarcodeSequence\tphysical_specimen_location\t" + "physical_specimen_remaining\tdna_extracted\tsample_type\t" + "host_subject_id\tlatitude\tlongitude\ttaxon_id\tscientific_name\t" + "center_name\trun_prefix\tplatform\tlibrary_construction_protocol\t" + "experiment_design_description\tcollection_timestamp\tDescription\n" + "Sample1\tGTCCGCAAGTTA\tUCSD\tTRUE\tTRUE\ttype1\tNotIdentified\t4.1\t4.1\t" + "9606\thomo sapiens\tANL\trp_1\tILLUMINA\tprotocol_1\tedd_1\t" + "05/28/15 11:00\tDescription S1\n" + "Sample2\tCGTAGAGCTCTC\tUCSD\tTRUE\tTRUE\ttype2\tNotIdentified\t4.2\t4.2\t" + "9606\thomo sapiens\tANL\trp_1\tILLUMINA\tprotocol_1\tedd_1\t" + "05/28/15 11:00\tDescription S2\n" + "Sample3\tCCTCTGAGAGCT\tUCSD\tTRUE\tTRUE\ttype3\tNotIdentified\t4.3\t4.3\t" + "9606\thomo sapiens\tANL\trp_2\tILLUMINA\tprotocol_1\tedd_1\t" + "05/28/15 11:00\tDescription S3\n") + + +if __name__ == "__main__": + main()