From b76b85f578f168942b3019bf310de32eb4613313 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Mon, 25 May 2015 22:03:06 -0500 Subject: [PATCH 01/14] Creating looks like qiime map function --- qiita_db/metadata_template/util.py | 35 ++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/qiita_db/metadata_template/util.py b/qiita_db/metadata_template/util.py index b6a6521fc..98257791c 100644 --- a/qiita_db/metadata_template/util.py +++ b/qiita_db/metadata_template/util.py @@ -315,3 +315,38 @@ def get_invalid_sample_names(sample_names): inv.append(s) return inv + + +def looks_like_qiime_mapping_file(fp): + """Checks if the file looks like a QIIME mapping file + + Parameters + ---------- + fp : str + filepath to check if it looks like a QIIME mapping file + + Returns + ------- + bool + True if fp looks like a QIIME mapping file, false otherwise. + + Raises + ------ + QiitaDBError + If an empty file is passed + + Notes + ----- + This is not doing a validation of the QIIME mapping file. It simply checks + the first line in the file and it returns true if the line starts with + '#SampleID', since a sample/prep template will start with 'sample_name' or + some other different column. + """ + line = None + with open_file(fp, mode='U') as f: + first_line = f.readline() + if not first_line: + raise QiitaDBError('Empty file passed!') + + first_col = first_line.split()[0] + return first_col == '#SampleID' From 4eb1cbc9e8926ac5e8a34a4a3dd6dc4bdabdd0a4 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Mon, 25 May 2015 22:10:17 -0500 Subject: [PATCH 02/14] Adding tests --- qiita_db/metadata_template/test/test_util.py | 42 +++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/qiita_db/metadata_template/test/test_util.py b/qiita_db/metadata_template/test/test_util.py index 199269495..0998430a4 100644 --- a/qiita_db/metadata_template/test/test_util.py +++ b/qiita_db/metadata_template/test/test_util.py @@ -17,7 +17,8 @@ QiitaDBError) from qiita_db.metadata_template.util import ( get_datatypes, as_python_types, prefix_sample_names_with_id, - load_template_to_dataframe, get_invalid_sample_names) + load_template_to_dataframe, get_invalid_sample_names, + looks_like_qiime_mapping_file) class TestUtil(TestCase): @@ -218,6 +219,45 @@ def test_invalid_lat_long(self): # prevent flake8 from complaining str(obs) + def test_looks_like_qiime_mapping_file(self): + obs = looks_like_qiime_mapping_file( + StringIO(EXP_SAMPLE_TEMPLATE)) + self.assertFalse(obs) + + obs = looks_like_qiime_mapping_file( + StringIO(QIIME_TUTORIAL_MAP)) + self.assertTrue(obs) + + def test_looks_like_qiime_mmapping_file_error(self): + with self.assertRaises(QiitaDBError): + looks_like_qiime_mapping_file(StringIO()) + + +QIIME_TUTORIAL_MAP = ( + "#SampleID\tBarcodeSequence\tLinkerPrimerSequence\tTreatment\tDOB\t" + "Description\n" + "#Example mapping file for the QIIME analysis package. These 9 samples " + "are from a study of the effects of exercise and diet on mouse cardiac " + "physiology (Crawford, et al, PNAS, 2009).\n" + "PC.354\tAGCACGAGCCTA\tYATGCTGCCTCCCGTAGGAGT\tControl\t20061218\t" + "Control_mouse_I.D._354\n" + "PC.355\tAACTCGTCGATG\tYATGCTGCCTCCCGTAGGAGT\tControl\t20061218\t" + "Control_mouse_I.D._355\n" + "PC.356\tACAGACCACTCA\tYATGCTGCCTCCCGTAGGAGT\tControl\t20061126\t" + "Control_mouse_I.D._356\n" + "PC.481\tACCAGCGACTAG\tYATGCTGCCTCCCGTAGGAGT\tControl\t20070314\t" + "Control_mouse_I.D._481\n" + "PC.593\tAGCAGCACTTGT\tYATGCTGCCTCCCGTAGGAGT\tControl\t20071210\t" + "Control_mouse_I.D._593\n" + "PC.607\tAACTGTGCGTAC\tYATGCTGCCTCCCGTAGGAGT\tFast\t20071112\t" + "Fasting_mouse_I.D._607\n" + "PC.634\tACAGAGTCGGCT\tYATGCTGCCTCCCGTAGGAGT\tFast\t20080116\t" + "Fasting_mouse_I.D._634\n" + "PC.635\tACCGCAGAGTCA\tYATGCTGCCTCCCGTAGGAGT\tFast\t20080116\t" + "Fasting_mouse_I.D._635\n" + "PC.636\tACGGTGAGTGTC\tYATGCTGCCTCCCGTAGGAGT\tFast\t20080116\t" + "Fasting_mouse_I.D._636\n" +) EXP_SAMPLE_TEMPLATE = ( "sample_name\tcollection_timestamp\tdescription\thas_extracted_data\t" From 2fbc2ee72b3e8d864b77c9254b8037ea1f46e3d3 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Tue, 26 May 2015 13:13:41 -0500 Subject: [PATCH 03/14] Making load_template_to_dataframe to parse qiime mapping files --- qiita_db/metadata_template/util.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/qiita_db/metadata_template/util.py b/qiita_db/metadata_template/util.py index 98257791c..5e8e82378 100644 --- a/qiita_db/metadata_template/util.py +++ b/qiita_db/metadata_template/util.py @@ -122,8 +122,8 @@ def prefix_sample_names_with_id(md_template, study_id): md_template.index.name = None -def load_template_to_dataframe(fn, strip_whitespace=True): - """Load a sample or a prep template into a data frame +def load_template_to_dataframe(fn, strip_whitespace=True, index='sample_name'): + """Load a sample/prep template or a QIIME mapping file into a data frame Parameters ---------- @@ -132,6 +132,8 @@ def load_template_to_dataframe(fn, strip_whitespace=True): strip_whitespace : bool, optional Defaults to True. Whether or not to strip whitespace from values in the input file + index : str, optional + Defaults to 'sample_name'. The index to use in the loaded information Returns ------- @@ -167,6 +169,8 @@ def load_template_to_dataframe(fn, strip_whitespace=True): +=======================+==============+ | sample_name | str | +-----------------------+--------------+ + | #SampleID | str | + +-----------------------+--------------+ | physical_location | str | +-----------------------+--------------+ | has_physical_specimen | bool | @@ -224,7 +228,7 @@ def load_template_to_dataframe(fn, strip_whitespace=True): keep_default_na=False, na_values=[''], parse_dates=True, index_col=False, comment='\t', mangle_dupe_cols=False, converters={ - 'sample_name': lambda x: str(x).strip(), + index: lambda x: str(x).strip(), # required sample template information 'physical_location': str, 'sample_type': str, @@ -263,21 +267,22 @@ def load_template_to_dataframe(fn, strip_whitespace=True): initial_columns = set(template.columns) - if 'sample_name' not in template.columns: - raise QiitaDBColumnError("The 'sample_name' column is missing from " - "your template, this file cannot be parsed.") + if index not in template.columns: + raise QiitaDBColumnError("The '%s' column is missing from " + "your template, this file cannot be parsed." + % index) # remove rows that have no sample identifier but that may have other data # in the rest of the columns - template.dropna(subset=['sample_name'], how='all', inplace=True) + template.dropna(subset=[index], how='all', inplace=True) # set the sample name as the index - template.set_index('sample_name', inplace=True) + template.set_index(index, inplace=True) # it is not uncommon to find templates that have empty columns template.dropna(how='all', axis=1, inplace=True) - initial_columns.remove('sample_name') + initial_columns.remove(index) dropped_cols = initial_columns - set(template.columns) if dropped_cols: warnings.warn('The following column(s) were removed from the template ' @@ -322,7 +327,7 @@ def looks_like_qiime_mapping_file(fp): Parameters ---------- - fp : str + fp : str or file-like object filepath to check if it looks like a QIIME mapping file Returns From 8d0120aa280e2fa60ab6a3b1dfe1576915b5ec55 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Tue, 26 May 2015 13:17:21 -0500 Subject: [PATCH 04/14] Adding test for qiime parsing --- qiita_db/metadata_template/test/test_util.py | 45 +++++++++++--------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/qiita_db/metadata_template/test/test_util.py b/qiita_db/metadata_template/test/test_util.py index 0998430a4..746df3879 100644 --- a/qiita_db/metadata_template/test/test_util.py +++ b/qiita_db/metadata_template/test/test_util.py @@ -65,6 +65,17 @@ def test_load_template_to_dataframe(self): exp.index.name = 'sample_name' assert_frame_equal(obs, exp) + def test_load_template_to_dataframe_qiime_map(self): + obs = load_template_to_dataframe(StringIO(QIIME_TUTORIAL_MAP_SUBSET), + index='#SampleID') + exp = pd.DataFrame.from_dict(QIIME_TUTORIAL_MAP_DICT_FORM) + exp.index.name = '#SampleID' + obs.sort_index(axis=0, inplace=True) + obs.sort_index(axis=1, inplace=True) + exp.sort_index(axis=0, inplace=True) + exp.sort_index(axis=1, inplace=True) + assert_frame_equal(obs, exp) + def test_load_template_to_dataframe_duplicate_cols(self): obs = load_template_to_dataframe( StringIO(EXP_SAMPLE_TEMPLATE_DUPE_COLS)) @@ -225,7 +236,7 @@ def test_looks_like_qiime_mapping_file(self): self.assertFalse(obs) obs = looks_like_qiime_mapping_file( - StringIO(QIIME_TUTORIAL_MAP)) + StringIO(QIIME_TUTORIAL_MAP_SUBSET)) self.assertTrue(obs) def test_looks_like_qiime_mmapping_file_error(self): @@ -233,30 +244,13 @@ def test_looks_like_qiime_mmapping_file_error(self): looks_like_qiime_mapping_file(StringIO()) -QIIME_TUTORIAL_MAP = ( +QIIME_TUTORIAL_MAP_SUBSET = ( "#SampleID\tBarcodeSequence\tLinkerPrimerSequence\tTreatment\tDOB\t" "Description\n" - "#Example mapping file for the QIIME analysis package. These 9 samples " - "are from a study of the effects of exercise and diet on mouse cardiac " - "physiology (Crawford, et al, PNAS, 2009).\n" "PC.354\tAGCACGAGCCTA\tYATGCTGCCTCCCGTAGGAGT\tControl\t20061218\t" "Control_mouse_I.D._354\n" - "PC.355\tAACTCGTCGATG\tYATGCTGCCTCCCGTAGGAGT\tControl\t20061218\t" - "Control_mouse_I.D._355\n" - "PC.356\tACAGACCACTCA\tYATGCTGCCTCCCGTAGGAGT\tControl\t20061126\t" - "Control_mouse_I.D._356\n" - "PC.481\tACCAGCGACTAG\tYATGCTGCCTCCCGTAGGAGT\tControl\t20070314\t" - "Control_mouse_I.D._481\n" - "PC.593\tAGCAGCACTTGT\tYATGCTGCCTCCCGTAGGAGT\tControl\t20071210\t" - "Control_mouse_I.D._593\n" "PC.607\tAACTGTGCGTAC\tYATGCTGCCTCCCGTAGGAGT\tFast\t20071112\t" "Fasting_mouse_I.D._607\n" - "PC.634\tACAGAGTCGGCT\tYATGCTGCCTCCCGTAGGAGT\tFast\t20080116\t" - "Fasting_mouse_I.D._634\n" - "PC.635\tACCGCAGAGTCA\tYATGCTGCCTCCCGTAGGAGT\tFast\t20080116\t" - "Fasting_mouse_I.D._635\n" - "PC.636\tACGGTGAGTGTC\tYATGCTGCCTCCCGTAGGAGT\tFast\t20080116\t" - "Fasting_mouse_I.D._636\n" ) EXP_SAMPLE_TEMPLATE = ( @@ -725,6 +719,19 @@ def test_looks_like_qiime_mmapping_file_error(self): '2.Sample3': 'type1'}, 'str_column': {'2.Sample1': 'NA', '2.Sample2': 'NA', '2.Sample3': 'NA'}} +QIIME_TUTORIAL_MAP_DICT_FORM = { + 'BarcodeSequence': {'PC.354': 'AGCACGAGCCTA', + 'PC.607': 'AACTGTGCGTAC'}, + 'LinkerPrimerSequence': {'PC.354': 'YATGCTGCCTCCCGTAGGAGT', + 'PC.607': 'YATGCTGCCTCCCGTAGGAGT'}, + 'Treatment': {'PC.354': 'Control', + 'PC.607': 'Fast'}, + 'DOB': {'PC.354': 20061218, + 'PC.607': 20071112}, + 'Description': {'PC.354': 'Control_mouse_I.D._354', + 'PC.607': 'Fasting_mouse_I.D._607'} +} + EXP_PREP_TEMPLATE = ( 'sample_name\tbarcodesequence\tcenter_name\tcenter_project_name\t' 'ebi_submission_accession\temp_status\texperiment_design_description\t' From 4b0816f869b73fd5c3dc7655ec7628d0a15ff449 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Wed, 27 May 2015 17:54:01 -0500 Subject: [PATCH 05/14] Adding metadata pipeline --- qiita_ware/metadata_pipeline.py | 60 +++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 qiita_ware/metadata_pipeline.py diff --git a/qiita_ware/metadata_pipeline.py b/qiita_ware/metadata_pipeline.py new file mode 100644 index 000000000..cd97c4101 --- /dev/null +++ b/qiita_ware/metadata_pipeline.py @@ -0,0 +1,60 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2014--, The Qiita Development Team. +# +# Distributed under the terms of the BSD 3-clause License. +# +# The full license is in the file LICENSE, distributed with this software. +# ----------------------------------------------------------------------------- + +from future.utils import viewvalues, viewkeys + +from qiita_db.metadata_template import (load_template_to_dataframe, + SampleTemplate, PrepTemplate, + PREP_TEMPLATE_COLUMNS, + PREP_TEMPLATE_COLUMNS_TARGET_GENE, + CONTROLLED_COLS) +from qiita_db.util import convert_to_id + + +def create_templates_from_qiime_mapping_file(fp, study, data_type): + """Creates a sample template and a prep template from qiime mapping file + + Parameters + ---------- + fp : str or file-like object + Path to the QIIME mapping file + study : Study + The study to which the sample template belongs to + data_type : str or int + The data_type of the prep_template + + Returns + ------- + (SampleTemplate, PrepTemplate) + The templates created from the QIIME mapping file + """ + qiime_map = load_template_to_dataframe(fp) + + # Fix the casing in the columns that we control + qiime_map.columns = [c.lower() if c.lower() in CONTROLLED_COLS else c + for c in qiime_map.columns] + + # Figure out which columns belong to the prep template + def _col_iterator(restriction_set): + for restriction in viewvalues(restriction_set): + for cols in viewkeys(restriction.columns): + yield cols + + pt_cols = set(col for col in _col_iterator(PREP_TEMPLATE_COLUMNS)) + + data_type_str = (convert_from_id(data_type, "data_type") + if isinstance(data_type, (int, long)) else data_type) + + if data_type_str in TARGET_GENE_DATA_TYPES: + pt_cols.update( + col for col in _col_iterator(PREP_TEMPLATE_COLUMNS_TARGET_GENE)) + + st_md = TODO + pt_md = TODO + + return SampleTemplate(st_md, study), PrepTemplate(pt_md, data_type) From 233f3b73ef5e32866cd1161fb1bf9bb9b5478954 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 28 May 2015 10:58:43 -0700 Subject: [PATCH 06/14] Adding create_templates_from_qiime_mapping_file functionality --- qiita_db/metadata_template/__init__.py | 5 +++-- qiita_ware/metadata_pipeline.py | 26 +++++++++++++++++++++----- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/qiita_db/metadata_template/__init__.py b/qiita_db/metadata_template/__init__.py index ece9a5ff0..deb4124ef 100644 --- a/qiita_db/metadata_template/__init__.py +++ b/qiita_db/metadata_template/__init__.py @@ -11,9 +11,10 @@ from .util import load_template_to_dataframe from .constants import (TARGET_GENE_DATA_TYPES, SAMPLE_TEMPLATE_COLUMNS, PREP_TEMPLATE_COLUMNS, - PREP_TEMPLATE_COLUMNS_TARGET_GENE) + PREP_TEMPLATE_COLUMNS_TARGET_GENE, CONTROLLED_COLS) __all__ = ['SampleTemplate', 'PrepTemplate', 'load_template_to_dataframe', 'TARGET_GENE_DATA_TYPES', 'SAMPLE_TEMPLATE_COLUMNS', - 'PREP_TEMPLATE_COLUMNS', 'PREP_TEMPLATE_COLUMNS_TARGET_GENE'] + 'PREP_TEMPLATE_COLUMNS', 'PREP_TEMPLATE_COLUMNS_TARGET_GENE', + 'CONTROLLED_COLS'] diff --git a/qiita_ware/metadata_pipeline.py b/qiita_ware/metadata_pipeline.py index cd97c4101..f6a5f6cfd 100644 --- a/qiita_ware/metadata_pipeline.py +++ b/qiita_ware/metadata_pipeline.py @@ -12,7 +12,8 @@ SampleTemplate, PrepTemplate, PREP_TEMPLATE_COLUMNS, PREP_TEMPLATE_COLUMNS_TARGET_GENE, - CONTROLLED_COLS) + CONTROLLED_COLS, + TARGET_GENE_DATA_TYPES) from qiita_db.util import convert_to_id @@ -33,7 +34,17 @@ def create_templates_from_qiime_mapping_file(fp, study, data_type): (SampleTemplate, PrepTemplate) The templates created from the QIIME mapping file """ - qiime_map = load_template_to_dataframe(fp) + qiime_map = load_template_to_dataframe(fp, index='#SampleID') + + # There are a few columns in the QIIME mapping file that are special and + # we know how to deal with them + rename_cols = { + 'BarcodeSequence': 'barcode', + 'LinkerPrimerSequence': 'primer', + 'ReverseLinkerPrimer': 'reverselinkerprimer', + 'Description': 'description', + } + qiime_map.rename(columns=rename_cols, inplace=True) # Fix the casing in the columns that we control qiime_map.columns = [c.lower() if c.lower() in CONTROLLED_COLS else c @@ -54,7 +65,12 @@ def _col_iterator(restriction_set): pt_cols.update( col for col in _col_iterator(PREP_TEMPLATE_COLUMNS_TARGET_GENE)) - st_md = TODO - pt_md = TODO + qiime_cols = set(qiime_map.columns) + pt_cols = qiime_cols.intersection(pt_cols) + st_cols = qiime_cols.difference(pt_cols) + + st_md = qiime_map.ix[:, st_cols] + pt_md = qiime_map.ix[:, pt_cols] - return SampleTemplate(st_md, study), PrepTemplate(pt_md, data_type) + return (SampleTemplate.create(st_md, study), + PrepTemplate.create(pt_md, data_type)) From 360f7a7e6d0045fa0234d5133d84c916984237b3 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 28 May 2015 14:31:34 -0700 Subject: [PATCH 07/14] Adding test --- qiita_ware/metadata_pipeline.py | 2 +- qiita_ware/test/test_metadata_pipeline.py | 95 +++++++++++++++++++++++ 2 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 qiita_ware/test/test_metadata_pipeline.py diff --git a/qiita_ware/metadata_pipeline.py b/qiita_ware/metadata_pipeline.py index f6a5f6cfd..eabef4a04 100644 --- a/qiita_ware/metadata_pipeline.py +++ b/qiita_ware/metadata_pipeline.py @@ -73,4 +73,4 @@ def _col_iterator(restriction_set): pt_md = qiime_map.ix[:, pt_cols] return (SampleTemplate.create(st_md, study), - PrepTemplate.create(pt_md, data_type)) + PrepTemplate.create(pt_md, study, data_type)) diff --git a/qiita_ware/test/test_metadata_pipeline.py b/qiita_ware/test/test_metadata_pipeline.py new file mode 100644 index 000000000..095371c8f --- /dev/null +++ b/qiita_ware/test/test_metadata_pipeline.py @@ -0,0 +1,95 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2014--, The Qiita Development Team. +# +# Distributed under the terms of the BSD 3-clause License. +# +# The full license is in the file LICENSE, distributed with this software. +# ----------------------------------------------------------------------------- + +from StringIO import StringIO +from unittest import TestCase, main +from os import remove +from os.path import exists + +from qiita_core.util import qiita_test_checker +from qiita_db.study import Study, StudyPerson +from qiita_db.user import User +from qiita_db.util import get_count +from qiita_ware.metadata_pipeline import ( + create_templates_from_qiime_mapping_file) + + +@qiita_test_checker() +class TestMetadataPipeline(TestCase): + def setUp(self): + info = { + "timeseries_type_id": 1, + "metadata_complete": True, + "mixs_compliant": True, + "number_samples_collected": 25, + "number_samples_promised": 28, + "portal_type_id": 3, + "study_alias": "FCM", + "study_description": "Microbiome of people who eat nothing but " + "fried chicken", + "study_abstract": "Exploring how a high fat diet changes the " + "gut microbiome", + "emp_person_id": StudyPerson(2), + "principal_investigator_id": StudyPerson(3), + "lab_person_id": StudyPerson(1) + } + self.new_study = Study.create( + User('test@foo.bar'), "Fried Chicken Microbiome", [1], info) + self._clean_up_files = [] + + def tearDown(self): + for fp in self._clean_up_files: + if exists(fp): + remove(fp) + + def test_create_templates_from_qiime_mapping_file(self): + new_pt_id = get_count('qiita.prep_template') + 1 + obs_st, obs_pt = create_templates_from_qiime_mapping_file( + StringIO(QIIME_MAP), self.new_study, "16S") + + # Be green: clean the environment + for template in [obs_st, obs_pt]: + for _, fp in template.get_filepaths(): + self._clean_up_files.append(fp) + + self.assertEqual(obs_st.id, self.new_study.id) + self.assertEqual(obs_pt.id, new_pt_id) + + # Check that each template has the correct columns + exp = {"physical_specimen_location", "physical_specimen_remaining", + "dna_extracted", "sample_type", "host_subject_id", "latitude", + "longitude", "taxon_id", "scientific_name", + "collection_timestamp", "description"} + self.assertEqual(set(obs_st.categories()), exp) + + exp = {"barcode", "primer", "center_name", "run_prefix", "platform", + "library_construction_protocol", + "experiment_design_description"} + self.assertEqual(set(obs_pt.categories()), exp) + + +QIIME_MAP = ( + "#SampleID\tBarcodeSequence\tLinkerPrimerSequence\t" + "physical_specimen_location\tphysical_specimen_remaining\tdna_extracted\t" + "sample_type\thost_subject_id\tlatitude\tlongitude\ttaxon_id\t" + "scientific_name\tcenter_name\trun_prefix\tplatform\t" + "library_construction_protocol\texperiment_design_description\t" + "collection_timestamp\tDescription\n" + "Sample1\tGTCCGCAAGTTA\tGTGCCAGCMGCCGCGGTAA\tUCSD\tTRUE\tTRUE\ttype1\t" + "NotIdentified\t4.1\t4.1\t9606\thomo sapiens\tANL\trp_1\tILLUMINA\t" + "protocol_1\tedd_1\t05/28/15 11:00\tDescription S1\n" + "Sample2\tCGTAGAGCTCTC\tGTGCCAGCMGCCGCGGTAA\tUCSD\tTRUE\tTRUE\ttype2\t" + "NotIdentified\t4.2\t4.2\t9606\thomo sapiens\tANL\trp_1\tILLUMINA\t" + "protocol_1\tedd_1\t05/28/15 11:00\tDescription S2\n" + "Sample3\tCCTCTGAGAGCT\tGTGCCAGCMGCCGCGGTAA\tUCSD\tTRUE\tTRUE\ttype3\t" + "NotIdentified\t4.3\t4.3\t9606\thomo sapiens\tANL\trp_2\tILLUMINA\t" + "protocol_1\tedd_1\t05/28/15 11:00\tDescription S3\n") + + +if __name__ == "__main__": + main() From 94560b14d6aa00bd6f1be6fbdc08f44d45665562 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 28 May 2015 15:11:39 -0700 Subject: [PATCH 08/14] Moving code to handlers instead that in the template --- .../study_description_templates/study_information_tab.html | 2 -- qiita_pet/uimodules/study_information_tab.py | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/qiita_pet/templates/study_description_templates/study_information_tab.html b/qiita_pet/templates/study_description_templates/study_information_tab.html index 227ce8d23..dd517ec34 100644 --- a/qiita_pet/templates/study_description_templates/study_information_tab.html +++ b/qiita_pet/templates/study_description_templates/study_information_tab.html @@ -16,9 +16,7 @@ (only files with the "txt" and "tsv" file extensions will be displayed here): {% end %} diff --git a/qiita_pet/uimodules/study_information_tab.py b/qiita_pet/uimodules/study_information_tab.py index 5d2290269..56ca6867e 100644 --- a/qiita_pet/uimodules/study_information_tab.py +++ b/qiita_pet/uimodules/study_information_tab.py @@ -37,7 +37,8 @@ def render(self, study): # Retrieve the files from the uploads folder, so the user can choose # the sample template of the study - files = [f for _, f in get_files_from_uploads_folders(str(study.id))] + files = [f for _, f in get_files_from_uploads_folders(str(study.id)) + if f.endswith(('txt', 'tsv'))] # If the sample template exists, retrieve all its filepaths if SampleTemplate.exists(study.id): From 9db35a54ed5fe5bf528ab3e239d2ff831a766d4c Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 28 May 2015 16:35:49 -0700 Subject: [PATCH 09/14] Modifying interface so we can upload a QIIME mapping file --- qiita_db/metadata_template/__init__.py | 4 +- qiita_db/metadata_template/util.py | 2 +- .../study_handlers/description_handlers.py | 49 +++++++++++++------ qiita_pet/templates/study_description.html | 4 ++ .../study_information_tab.html | 14 +++++- qiita_pet/uimodules/study_information_tab.py | 12 +++-- qiita_ware/metadata_pipeline.py | 2 +- 7 files changed, 64 insertions(+), 23 deletions(-) diff --git a/qiita_db/metadata_template/__init__.py b/qiita_db/metadata_template/__init__.py index deb4124ef..193ad1ce4 100644 --- a/qiita_db/metadata_template/__init__.py +++ b/qiita_db/metadata_template/__init__.py @@ -8,7 +8,7 @@ from .sample_template import SampleTemplate from .prep_template import PrepTemplate -from .util import load_template_to_dataframe +from .util import load_template_to_dataframe, looks_like_qiime_mapping_file from .constants import (TARGET_GENE_DATA_TYPES, SAMPLE_TEMPLATE_COLUMNS, PREP_TEMPLATE_COLUMNS, PREP_TEMPLATE_COLUMNS_TARGET_GENE, CONTROLLED_COLS) @@ -17,4 +17,4 @@ __all__ = ['SampleTemplate', 'PrepTemplate', 'load_template_to_dataframe', 'TARGET_GENE_DATA_TYPES', 'SAMPLE_TEMPLATE_COLUMNS', 'PREP_TEMPLATE_COLUMNS', 'PREP_TEMPLATE_COLUMNS_TARGET_GENE', - 'CONTROLLED_COLS'] + 'CONTROLLED_COLS', 'looks_like_qiime_mapping_file'] diff --git a/qiita_db/metadata_template/util.py b/qiita_db/metadata_template/util.py index 5e8e82378..632e7a46c 100644 --- a/qiita_db/metadata_template/util.py +++ b/qiita_db/metadata_template/util.py @@ -347,7 +347,7 @@ def looks_like_qiime_mapping_file(fp): '#SampleID', since a sample/prep template will start with 'sample_name' or some other different column. """ - line = None + first_line = None with open_file(fp, mode='U') as f: first_line = f.readline() if not first_line: diff --git a/qiita_pet/handlers/study_handlers/description_handlers.py b/qiita_pet/handlers/study_handlers/description_handlers.py index ebdc85a1a..9cc6cbb00 100644 --- a/qiita_pet/handlers/study_handlers/description_handlers.py +++ b/qiita_pet/handlers/study_handlers/description_handlers.py @@ -23,11 +23,14 @@ from qiita_db.ontology import Ontology from qiita_db.metadata_template import (PrepTemplate, SampleTemplate, load_template_to_dataframe, - SAMPLE_TEMPLATE_COLUMNS) + SAMPLE_TEMPLATE_COLUMNS, + looks_like_qiime_mapping_file) from qiita_db.util import convert_to_id, get_mountpoint from qiita_db.exceptions import (QiitaDBUnknownIDError, QiitaDBColumnError, QiitaDBExecutionError, QiitaDBDuplicateError, QiitaDBDuplicateHeaderError, QiitaDBError) +from qiita_ware.metadata_pipeline import ( + create_templates_from_qiime_mapping_file) from qiita_pet.handlers.base_handlers import BaseHandler from qiita_pet.handlers.util import check_access from qiita_pet.handlers.study_handlers.listing_handlers import ( @@ -161,13 +164,11 @@ def process_sample_template(self, study, user, callback): HTTPError If the sample template file does not exists """ - # If we are on this function, the argument "sample_template" must - # defined. If not, let tornado raise its error + # If we are on this function, the arguments "sample_template" and + # "data_type" must be defined. If not, let tornado raise its error sample_template = self.get_argument('sample_template') + data_type = self.get_argument('data_type') - # Define here the message and message level in case of success - msg = "The sample template '%s' has been added" % sample_template - msg_level = "success" # Get the uploads folder _, base_fp = get_mountpoint("uploads")[0] # Get the path of the sample template in the uploads folder @@ -177,25 +178,33 @@ def process_sample_template(self, study, user, callback): # The file does not exist, fail nicely raise HTTPError(404, "This file doesn't exist: %s" % fp_rsp) + # Define here the message and message level in case of success + is_mapping_file = looks_like_qiime_mapping_file(fp_rsp) + try: with warnings.catch_warnings(record=True) as warns: # deleting previous uploads and inserting new one self.remove_add_study_template(study.raw_data, study.id, - fp_rsp) + fp_rsp, data_type, + is_mapping_file) - # join all the warning messages into one. Note that this info - # will be ignored if an exception is raised + # join all the warning messages into one. Note that this + # info will be ignored if an exception is raised if warns: msg = '; '.join([str(w.message) for w in warns]) msg_level = 'warning' except (TypeError, QiitaDBColumnError, QiitaDBExecutionError, QiitaDBDuplicateError, IOError, ValueError, KeyError, - CParserError, QiitaDBDuplicateHeaderError, QiitaDBError) as e: + CParserError, QiitaDBDuplicateHeaderError, + QiitaDBError) as e: # Some error occurred while processing the sample template # Show the error to the user so they can fix the template - msg = html_error_message % ('parsing the sample template:', - basename(fp_rsp), str(e)) + error_msg = ('parsing the QIIME mapping file' + if is_mapping_file + else 'parsing the sample template') + msg = html_error_message % (error_msg, basename(fp_rsp), + str(e)) msg_level = "danger" callback((msg, msg_level, None, None, None)) @@ -564,9 +573,14 @@ def unspecified_action(self, study, user, callback): msg_level = 'danger' callback((msg, msg_level, 'study_information_tab', None, None)) - def remove_add_study_template(self, raw_data, study_id, fp_rsp): + def remove_add_study_template(self, raw_data, study_id, fp_rsp, data_type, + is_mapping_file): """Replace prep templates, raw data, and sample template with a new one """ + if is_mapping_file and data_type == "": + raise ValueError("Please, choose a data type if uploading a QIIME " + "mapping file") + for rd in raw_data(): rd = RawData(rd) for pt in rd.prep_templates: @@ -575,8 +589,13 @@ def remove_add_study_template(self, raw_data, study_id, fp_rsp): if SampleTemplate.exists(study_id): SampleTemplate.delete(study_id) - SampleTemplate.create(load_template_to_dataframe(fp_rsp), - Study(study_id)) + if is_mapping_file: + create_templates_from_qiime_mapping_file(fp_rsp, Study(study_id), + int(data_type)) + else: + SampleTemplate.create(load_template_to_dataframe(fp_rsp), + Study(study_id)) + remove(fp_rsp) def remove_add_prep_template(self, fp_rpt, study, data_type_id, diff --git a/qiita_pet/templates/study_description.html b/qiita_pet/templates/study_description.html index c610750a4..0ca9b705a 100644 --- a/qiita_pet/templates/study_description.html +++ b/qiita_pet/templates/study_description.html @@ -67,6 +67,10 @@ .attr("type", "hidden") .attr("name", "sample_template") .attr("value", $("#sample_template").val())) + .append($("") + .attr("type", "hidden") + .attr("name", "data_type") + .attr("value", $("#qiime_data_type").val())) .append($("") .attr("type", "hidden") .attr("name", "action") diff --git a/qiita_pet/templates/study_description_templates/study_information_tab.html b/qiita_pet/templates/study_description_templates/study_information_tab.html index dd517ec34..5290d338a 100644 --- a/qiita_pet/templates/study_description_templates/study_information_tab.html +++ b/qiita_pet/templates/study_description_templates/study_information_tab.html @@ -12,13 +12,25 @@
{% if show_select_sample %} - Select your sample template
+ Select your sample template or, alternatively, a QIIME mapping file
(only files with the "txt" and "tsv" file extensions will be displayed here): + +
+ + {% if not sample_templates %} + If you are uploading a QIIME mapping file, please choose a data type: + + {% end %} {% end %}
diff --git a/qiita_pet/uimodules/study_information_tab.py b/qiita_pet/uimodules/study_information_tab.py index 56ca6867e..b2e1c154e 100644 --- a/qiita_pet/uimodules/study_information_tab.py +++ b/qiita_pet/uimodules/study_information_tab.py @@ -7,8 +7,11 @@ # ----------------------------------------------------------------------------- from functools import partial +from operator import itemgetter -from qiita_db.util import get_files_from_uploads_folders +from future.utils import viewitems + +from qiita_db.util import get_files_from_uploads_folders, get_data_types from qiita_db.study import StudyPerson from qiita_db.metadata_template import SampleTemplate from qiita_pet.util import linkify @@ -34,9 +37,11 @@ def render(self, study): number_samples_promised = study_info['number_samples_promised'] number_samples_collected = study_info['number_samples_collected'] metadata_complete = study_info['metadata_complete'] + data_types = sorted(viewitems(get_data_types()), key=itemgetter(1)) # Retrieve the files from the uploads folder, so the user can choose - # the sample template of the study + # the sample template of the study. Filter them to only include the + # ones that ends with 'txt' or 'tsv'. files = [f for _, f in get_files_from_uploads_folders(str(study.id)) if f.endswith(('txt', 'tsv'))] @@ -69,4 +74,5 @@ def render(self, study): files=files, study_id=study.id, sample_templates=sample_templates, - is_local_request=is_local_request) + is_local_request=is_local_request, + data_types=data_types) diff --git a/qiita_ware/metadata_pipeline.py b/qiita_ware/metadata_pipeline.py index eabef4a04..07f599b2b 100644 --- a/qiita_ware/metadata_pipeline.py +++ b/qiita_ware/metadata_pipeline.py @@ -14,7 +14,7 @@ PREP_TEMPLATE_COLUMNS_TARGET_GENE, CONTROLLED_COLS, TARGET_GENE_DATA_TYPES) -from qiita_db.util import convert_to_id +from qiita_db.util import convert_from_id def create_templates_from_qiime_mapping_file(fp, study, data_type): From 555ce64b4fd80c7367ed6bf7dd4fd0f1dbde7759 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Fri, 29 May 2015 14:05:09 -0700 Subject: [PATCH 10/14] Adding QIIME parser for mapping files and fix load_template_to_datafrme --- qiita_db/metadata_template/test/test_util.py | 20 ++++- qiita_db/metadata_template/util.py | 88 ++++++++++++++++++++ 2 files changed, 106 insertions(+), 2 deletions(-) diff --git a/qiita_db/metadata_template/test/test_util.py b/qiita_db/metadata_template/test/test_util.py index 746df3879..6fa5f970d 100644 --- a/qiita_db/metadata_template/test/test_util.py +++ b/qiita_db/metadata_template/test/test_util.py @@ -18,7 +18,7 @@ from qiita_db.metadata_template.util import ( get_datatypes, as_python_types, prefix_sample_names_with_id, load_template_to_dataframe, get_invalid_sample_names, - looks_like_qiime_mapping_file) + looks_like_qiime_mapping_file, _parse_mapping_file) class TestUtil(TestCase): @@ -69,7 +69,7 @@ def test_load_template_to_dataframe_qiime_map(self): obs = load_template_to_dataframe(StringIO(QIIME_TUTORIAL_MAP_SUBSET), index='#SampleID') exp = pd.DataFrame.from_dict(QIIME_TUTORIAL_MAP_DICT_FORM) - exp.index.name = '#SampleID' + exp.index.name = 'SampleID' obs.sort_index(axis=0, inplace=True) obs.sort_index(axis=1, inplace=True) exp.sort_index(axis=0, inplace=True) @@ -243,6 +243,22 @@ def test_looks_like_qiime_mmapping_file_error(self): with self.assertRaises(QiitaDBError): looks_like_qiime_mapping_file(StringIO()) + def test_parse_mapping_file(self): + # Tests ported over from QIIME + s1 = ['#sample\ta\tb', '#comment line to skip', + 'x \t y \t z ', ' ', '#more skip', 'i\tj\tk'] + exp = ([['x', 'y', 'z'], ['i', 'j', 'k']], + ['sample', 'a', 'b'], + ['comment line to skip', 'more skip']) + obs = _parse_mapping_file(s1) + self.assertEqual(obs, exp) + + # check that we strip double quotes by default + s2 = ['#sample\ta\tb', '#comment line to skip', + '"x "\t" y "\t z ', ' ', '"#more skip"', 'i\t"j"\tk'] + obs = _parse_mapping_file(s2) + self.assertEqual(obs, exp) + QIIME_TUTORIAL_MAP_SUBSET = ( "#SampleID\tBarcodeSequence\tLinkerPrimerSequence\tTreatment\tDOB\t" diff --git a/qiita_db/metadata_template/util.py b/qiita_db/metadata_template/util.py index 632e7a46c..83a6bdd74 100644 --- a/qiita_db/metadata_template/util.py +++ b/qiita_db/metadata_template/util.py @@ -207,6 +207,17 @@ def load_template_to_dataframe(fn, strip_whitespace=True, index='sample_name'): controlled_cols.update(CONTROLLED_COLS) holdfile[0] = '\t'.join(c.lower() if c.lower() in controlled_cols else c for c in cols) + + if index == "#SampleID": + # We're going to parse a QIIME mapping file. We are going to first + # parse it with the QIIME function so we can remove the comments + # easily and make sure that QIIME will accept this as a mapping file + data, headers, comments = _parse_mapping_file(holdfile) + holdfile = ["%s\n" % '\t'.join(d) for d in data] + holdfile.insert(0, "%s\n" % '\t'.join(headers)) + # The QIIME parser fixes the index and removes the # + index = 'SampleID' + # index_col: # is set as False, otherwise it is cast as a float and we want a string # keep_default: @@ -355,3 +366,80 @@ def looks_like_qiime_mapping_file(fp): first_col = first_line.split()[0] return first_col == '#SampleID' + + +def _parse_mapping_file(lines, strip_quotes=True, suppress_stripping=False): + """Parser for map file that relates samples to metadata. + + Format: header line with fields + optionally other comment lines starting with # + tab-delimited fields + + Parameters + ---------- + lines : iterable of str + The contents of the QIIME mapping file + strip_quotes : bool, optional + Defaults to true. If true, quotes are removed from the data + suppress_stripping : bool, optional + Defaults to false. If true, spaces are not stripped + + Returns + ------- + list of lists, list of str, list of str + The data in the mapping file, the headers and the comments + + Raises + ------ + QiitaDBError + If there is any error parsing the mapping file + + Notes + ----- + This code has been ported from QIIME. + """ + if strip_quotes: + if suppress_stripping: + # remove quotes but not spaces + strip_f = lambda x: x.replace('"', '') + else: + # remove quotes and spaces + strip_f = lambda x: x.replace('"', '').strip() + else: + if suppress_stripping: + # don't remove quotes or spaces + strip_f = lambda x: x + else: + # remove spaces but not quotes + strip_f = lambda x: x.strip() + + # Create lists to store the results + mapping_data = [] + header = [] + comments = [] + + # Begin iterating over lines + for line in lines: + line = strip_f(line) + if not line or (suppress_stripping and not line.strip()): + # skip blank lines when not stripping lines + continue + + if line.startswith('#'): + line = line[1:] + if not header: + header = line.strip().split('\t') + else: + comments.append(line) + else: + # Will add empty string to empty fields + tmp_line = map(strip_f, line.split('\t')) + if len(tmp_line) < len(header): + tmp_line.extend([''] * (len(header) - len(tmp_line))) + mapping_data.append(tmp_line) + if not header: + raise QiitaDBError("No header line was found in mapping file.") + if not mapping_data: + raise QiitaDBError("No data found in mapping file.") + + return mapping_data, header, comments From d9b9bc4bac1df7bf28762f4c260ec8f921787c68 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Fri, 29 May 2015 14:13:30 -0700 Subject: [PATCH 11/14] Making error messages prettier --- qiita_pet/handlers/study_handlers/description_handlers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/qiita_pet/handlers/study_handlers/description_handlers.py b/qiita_pet/handlers/study_handlers/description_handlers.py index 9cc6cbb00..3f9334c4d 100644 --- a/qiita_pet/handlers/study_handlers/description_handlers.py +++ b/qiita_pet/handlers/study_handlers/description_handlers.py @@ -635,6 +635,9 @@ def display_template(self, study, user, msg, msg_level, full_access, # modify the information of the study show_edit_btn = study_status != 'public' or user_level == 'admin' + # Make the error message suitable for html + msg = msg.replace('\n', "
") + self.render('study_description.html', message=msg, level=msg_level, From df108b9c0bcfde9db24f3a14e35f7b9ccd07c90f Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Fri, 29 May 2015 16:04:41 -0700 Subject: [PATCH 12/14] Adding check for missing QIIME mapping columns plus specially handle ReverseLinkerPrimer --- .../study_handlers/description_handlers.py | 2 + qiita_ware/metadata_pipeline.py | 13 +++- qiita_ware/test/test_metadata_pipeline.py | 65 +++++++++++++++++++ 3 files changed, 79 insertions(+), 1 deletion(-) diff --git a/qiita_pet/handlers/study_handlers/description_handlers.py b/qiita_pet/handlers/study_handlers/description_handlers.py index 3f9334c4d..1ecfd4a2e 100644 --- a/qiita_pet/handlers/study_handlers/description_handlers.py +++ b/qiita_pet/handlers/study_handlers/description_handlers.py @@ -179,6 +179,8 @@ def process_sample_template(self, study, user, callback): raise HTTPError(404, "This file doesn't exist: %s" % fp_rsp) # Define here the message and message level in case of success + msg = "The sample template '%s' has been added" % sample_template + msg_level = "success" is_mapping_file = looks_like_qiime_mapping_file(fp_rsp) try: diff --git a/qiita_ware/metadata_pipeline.py b/qiita_ware/metadata_pipeline.py index 07f599b2b..cbe57e243 100644 --- a/qiita_ware/metadata_pipeline.py +++ b/qiita_ware/metadata_pipeline.py @@ -15,6 +15,7 @@ CONTROLLED_COLS, TARGET_GENE_DATA_TYPES) from qiita_db.util import convert_from_id +from qiita_ware.exceptions import QiitaWareError def create_templates_from_qiime_mapping_file(fp, study, data_type): @@ -41,9 +42,18 @@ def create_templates_from_qiime_mapping_file(fp, study, data_type): rename_cols = { 'BarcodeSequence': 'barcode', 'LinkerPrimerSequence': 'primer', - 'ReverseLinkerPrimer': 'reverselinkerprimer', 'Description': 'description', } + + if 'ReverseLinkerPrimer' in qiime_map: + rename_cols['ReverseLinkerPrimer'] = 'reverselinkerprimer' + + missing = set(rename_cols).difference(qiime_map.columns) + if missing: + raise QiitaWareError( + "Error generating the templates from the QIIME mapping file. " + "Missing QIIME mapping file columns: %s" % ', '.join(missing)) + qiime_map.rename(columns=rename_cols, inplace=True) # Fix the casing in the columns that we control @@ -64,6 +74,7 @@ def _col_iterator(restriction_set): if data_type_str in TARGET_GENE_DATA_TYPES: pt_cols.update( col for col in _col_iterator(PREP_TEMPLATE_COLUMNS_TARGET_GENE)) + pt_cols.add('reverselinkerprimer') qiime_cols = set(qiime_map.columns) pt_cols = qiime_cols.intersection(pt_cols) diff --git a/qiita_ware/test/test_metadata_pipeline.py b/qiita_ware/test/test_metadata_pipeline.py index 095371c8f..060b2b103 100644 --- a/qiita_ware/test/test_metadata_pipeline.py +++ b/qiita_ware/test/test_metadata_pipeline.py @@ -12,6 +12,7 @@ from os.path import exists from qiita_core.util import qiita_test_checker +from qiita_ware.exceptions import QiitaWareError from qiita_db.study import Study, StudyPerson from qiita_db.user import User from qiita_db.util import get_count @@ -72,6 +73,37 @@ def test_create_templates_from_qiime_mapping_file(self): "experiment_design_description"} self.assertEqual(set(obs_pt.categories()), exp) + def test_create_templates_from_qiime_mapping_file_reverse_linker(self): + new_pt_id = get_count('qiita.prep_template') + 1 + obs_st, obs_pt = create_templates_from_qiime_mapping_file( + StringIO(QIIME_MAP_WITH_REVERSE_LINKER_PRIMER), + self.new_study, "16S") + + # Be green: clean the environment + for template in [obs_st, obs_pt]: + for _, fp in template.get_filepaths(): + self._clean_up_files.append(fp) + + self.assertEqual(obs_st.id, self.new_study.id) + self.assertEqual(obs_pt.id, new_pt_id) + + # Check that each template has the correct columns + exp = {"physical_specimen_location", "physical_specimen_remaining", + "dna_extracted", "sample_type", "host_subject_id", "latitude", + "longitude", "taxon_id", "scientific_name", + "collection_timestamp", "description"} + self.assertEqual(set(obs_st.categories()), exp) + + exp = {"barcode", "primer", "center_name", "run_prefix", "platform", + "library_construction_protocol", + "experiment_design_description", "reverselinkerprimer"} + self.assertEqual(set(obs_pt.categories()), exp) + + def test_create_templates_from_qiime_mapping_file_error(self): + with self.assertRaises(QiitaWareError): + create_templates_from_qiime_mapping_file( + StringIO(QIIME_MAP_ERROR), self.new_study, "16S") + QIIME_MAP = ( "#SampleID\tBarcodeSequence\tLinkerPrimerSequence\t" @@ -90,6 +122,39 @@ def test_create_templates_from_qiime_mapping_file(self): "NotIdentified\t4.3\t4.3\t9606\thomo sapiens\tANL\trp_2\tILLUMINA\t" "protocol_1\tedd_1\t05/28/15 11:00\tDescription S3\n") +QIIME_MAP_WITH_REVERSE_LINKER_PRIMER = ( + "#SampleID\tBarcodeSequence\tLinkerPrimerSequence\tReverseLinkerPrimer\t" + "physical_specimen_location\tphysical_specimen_remaining\tdna_extracted\t" + "sample_type\thost_subject_id\tlatitude\tlongitude\ttaxon_id\t" + "scientific_name\tcenter_name\trun_prefix\tplatform\t" + "library_construction_protocol\texperiment_design_description\t" + "collection_timestamp\tDescription\n" + "Sample1\tGTCCGCAAGTTA\tGTGCCAGCMGCCGCGGTAA\tGTGCCAGCMGCCGCGGTAA\tUCSD\t" + "TRUE\tTRUE\ttype1\tNotIdentified\t4.1\t4.1\t9606\thomo sapiens\tANL\t" + "rp_1\tILLUMINA\tprotocol_1\tedd_1\t05/28/15 11:00\tDescription S1\n" + "Sample2\tCGTAGAGCTCTC\tGTGCCAGCMGCCGCGGTAA\tGTGCCAGCMGCCGCGGTAA\tUCSD\t" + "TRUE\tTRUE\ttype2\tNotIdentified\t4.2\t4.2\t9606\thomo sapiens\tANL\t" + "rp_1\tILLUMINA\tprotocol_1\tedd_1\t05/28/15 11:00\tDescription S2\n" + "Sample3\tCCTCTGAGAGCT\tGTGCCAGCMGCCGCGGTAA\tGTGCCAGCMGCCGCGGTAA\tUCSD\t" + "TRUE\tTRUE\ttype3\tNotIdentified\t4.3\t4.3\t9606\thomo sapiens\tANL\t" + "rp_2\tILLUMINA\tprotocol_1\tedd_1\t05/28/15 11:00\tDescription S3\n") + +QIIME_MAP_ERROR = ( + "#SampleID\tBarcodeSequence\tphysical_specimen_location\t" + "physical_specimen_remaining\tdna_extracted\tsample_type\t" + "host_subject_id\tlatitude\tlongitude\ttaxon_id\tscientific_name\t" + "center_name\trun_prefix\tplatform\tlibrary_construction_protocol\t" + "experiment_design_description\tcollection_timestamp\tDescription\n" + "Sample1\tGTCCGCAAGTTA\tUCSD\tTRUE\tTRUE\ttype1\tNotIdentified\t4.1\t4.1\t" + "9606\thomo sapiens\tANL\trp_1\tILLUMINA\tprotocol_1\tedd_1\t" + "05/28/15 11:00\tDescription S1\n" + "Sample2\tCGTAGAGCTCTC\tUCSD\tTRUE\tTRUE\ttype2\tNotIdentified\t4.2\t4.2\t" + "9606\thomo sapiens\tANL\trp_1\tILLUMINA\tprotocol_1\tedd_1\t" + "05/28/15 11:00\tDescription S2\n" + "Sample3\tCCTCTGAGAGCT\tUCSD\tTRUE\tTRUE\ttype3\tNotIdentified\t4.3\t4.3\t" + "9606\thomo sapiens\tANL\trp_2\tILLUMINA\tprotocol_1\tedd_1\t" + "05/28/15 11:00\tDescription S3\n") + if __name__ == "__main__": main() From 3995ae81e4166e1271a08839a015b9f038c45d1a Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Fri, 29 May 2015 16:25:23 -0700 Subject: [PATCH 13/14] Addressing @adamrp's comments --- qiita_db/metadata_template/test/test_util.py | 5 ++--- qiita_db/metadata_template/util.py | 6 +----- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/qiita_db/metadata_template/test/test_util.py b/qiita_db/metadata_template/test/test_util.py index 6fa5f970d..95a61701c 100644 --- a/qiita_db/metadata_template/test/test_util.py +++ b/qiita_db/metadata_template/test/test_util.py @@ -239,9 +239,8 @@ def test_looks_like_qiime_mapping_file(self): StringIO(QIIME_TUTORIAL_MAP_SUBSET)) self.assertTrue(obs) - def test_looks_like_qiime_mmapping_file_error(self): - with self.assertRaises(QiitaDBError): - looks_like_qiime_mapping_file(StringIO()) + obs = looks_like_qiime_mapping_file(StringIO()) + self.assertFalse(obs) def test_parse_mapping_file(self): # Tests ported over from QIIME diff --git a/qiita_db/metadata_template/util.py b/qiita_db/metadata_template/util.py index 83a6bdd74..6048b5b66 100644 --- a/qiita_db/metadata_template/util.py +++ b/qiita_db/metadata_template/util.py @@ -346,10 +346,6 @@ def looks_like_qiime_mapping_file(fp): bool True if fp looks like a QIIME mapping file, false otherwise. - Raises - ------ - QiitaDBError - If an empty file is passed Notes ----- @@ -362,7 +358,7 @@ def looks_like_qiime_mapping_file(fp): with open_file(fp, mode='U') as f: first_line = f.readline() if not first_line: - raise QiitaDBError('Empty file passed!') + return False first_col = first_line.split()[0] return first_col == '#SampleID' From 8b6a73eee561bb217d2fe4ca6980bb82c9d7a7d8 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Fri, 29 May 2015 17:53:21 -0700 Subject: [PATCH 14/14] Solving comments and pep8 --- qiita_db/metadata_template/util.py | 16 ++++++++++++---- .../study_handlers/description_handlers.py | 3 ++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/qiita_db/metadata_template/util.py b/qiita_db/metadata_template/util.py index 6048b5b66..25b6e8b5e 100644 --- a/qiita_db/metadata_template/util.py +++ b/qiita_db/metadata_template/util.py @@ -397,17 +397,25 @@ def _parse_mapping_file(lines, strip_quotes=True, suppress_stripping=False): if strip_quotes: if suppress_stripping: # remove quotes but not spaces - strip_f = lambda x: x.replace('"', '') + + def strip_f(x): + return x.replace('"', '') else: # remove quotes and spaces - strip_f = lambda x: x.replace('"', '').strip() + + def strip_f(x): + return x.replace('"', '').strip() else: if suppress_stripping: # don't remove quotes or spaces - strip_f = lambda x: x + + def strip_f(x): + return x else: # remove spaces but not quotes - strip_f = lambda x: x.strip() + + def strip_f(x): + return x.strip() # Create lists to store the results mapping_data = [] diff --git a/qiita_pet/handlers/study_handlers/description_handlers.py b/qiita_pet/handlers/study_handlers/description_handlers.py index 1ecfd4a2e..e5ea8c16a 100644 --- a/qiita_pet/handlers/study_handlers/description_handlers.py +++ b/qiita_pet/handlers/study_handlers/description_handlers.py @@ -31,6 +31,7 @@ QiitaDBDuplicateHeaderError, QiitaDBError) from qiita_ware.metadata_pipeline import ( create_templates_from_qiime_mapping_file) +from qiita_ware.exceptions import QiitaWareError from qiita_pet.handlers.base_handlers import BaseHandler from qiita_pet.handlers.util import check_access from qiita_pet.handlers.study_handlers.listing_handlers import ( @@ -199,7 +200,7 @@ def process_sample_template(self, study, user, callback): except (TypeError, QiitaDBColumnError, QiitaDBExecutionError, QiitaDBDuplicateError, IOError, ValueError, KeyError, CParserError, QiitaDBDuplicateHeaderError, - QiitaDBError) as e: + QiitaDBError, QiitaWareError) as e: # Some error occurred while processing the sample template # Show the error to the user so they can fix the template error_msg = ('parsing the QIIME mapping file'