From c5c38a68df9b7affac8b1042cade0c9a1b814806 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Thu, 15 Dec 2016 16:14:36 -0700 Subject: [PATCH] fix #388 (#2024) * fix #388 * fixing errors --- qiita_db/metadata_template/prep_template.py | 31 ++++++++++++++++ .../test/test_prep_template.py | 12 +++++++ qiita_db/support_files/patches/45.sql | 4 +++ .../patches/python_patches/45.py | 36 +++++++++++++++++++ qiita_db/test/test_commands.py | 2 +- 5 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 qiita_db/support_files/patches/45.sql create mode 100644 qiita_db/support_files/patches/python_patches/45.py diff --git a/qiita_db/metadata_template/prep_template.py b/qiita_db/metadata_template/prep_template.py index e26933c1b..d24e02c76 100644 --- a/qiita_db/metadata_template/prep_template.py +++ b/qiita_db/metadata_template/prep_template.py @@ -13,6 +13,7 @@ from time import strftime from copy import deepcopy import warnings +from skbio.util import find_duplicates import pandas as pd @@ -23,6 +24,30 @@ from .base_metadata_template import BaseSample, MetadataTemplate +def _check_duplicated_columns(prep_cols, sample_cols): + r"""Check for duplicated colums in the prep_cols and sample_cols + + Parameters + ---------- + prep_cols : list of str + Column names in the prep info file + sample_cols : list of str + Column names in the sample info file + + Raises + ------ + QiitaDBColumnError + If there are duplicated columns names in the sample and the prep + """ + prep_cols.extend(sample_cols) + dups = find_duplicates(prep_cols) + if dups: + raise qdb.exceptions.QiitaDBColumnError( + 'Duplicated column names in the sample and prep info ' + 'files: %s. You need to delete that duplicated field' % + ','.join(dups)) + + class PrepSample(BaseSample): r"""Class that models a sample present in a PrepTemplate. @@ -113,6 +138,8 @@ def create(cls, md_template, study, data_type, investigation_type=None): pt_cols.update(PREP_TEMPLATE_COLUMNS_TARGET_GENE) md_template = cls._clean_validate_template(md_template, study.id) + _check_duplicated_columns(list(md_template.columns), + study.sample_template.categories()) # Insert the metadata template sql = """INSERT INTO qiita.prep_template @@ -358,6 +385,10 @@ def can_be_extended(self, new_samples, new_columns): "template has already been processed. " "No new samples can be added to the " "prep template") + + _check_duplicated_columns(list(new_columns), qdb.study.Study( + self.study_id).sample_template.categories()) + return True, "" @property diff --git a/qiita_db/metadata_template/test/test_prep_template.py b/qiita_db/metadata_template/test/test_prep_template.py index 64fed7e55..254fdc125 100644 --- a/qiita_db/metadata_template/test/test_prep_template.py +++ b/qiita_db/metadata_template/test/test_prep_template.py @@ -243,6 +243,11 @@ def test_can_be_extended(self): "template") self.assertEqual(obs_msg, exp_msg) + def test_can_be_extended_duplicated_column(self): + """test if the template can be extended""" + with self.assertRaises(qdb.exceptions.QiitaDBColumnError): + self.prep_template.can_be_extended([], ["season_environment"]) + def test_metadata_headers(self): PT = qdb.metadata_template.prep_template.PrepTemplate obs = PT.metadata_headers() @@ -1003,6 +1008,13 @@ def test_create_investigation_type_error(self): self.metadata, self.test_study, self.data_type_id, 'Not a term') + def test_create_duplicated_column_error(self): + """Create raises an error if the prep has a duplicated column name""" + self.metadata['season_environment'] = self.metadata['primer'] + with self.assertRaises(qdb.exceptions.QiitaDBColumnError): + qdb.metadata_template.prep_template.PrepTemplate.create( + self.metadata, self.test_study, self.data_type_id) + def test_delete_error(self): """Try to delete a prep template that already has preprocessed data""" with self.assertRaises(qdb.exceptions.QiitaDBExecutionError): diff --git a/qiita_db/support_files/patches/45.sql b/qiita_db/support_files/patches/45.sql new file mode 100644 index 000000000..18efd4b25 --- /dev/null +++ b/qiita_db/support_files/patches/45.sql @@ -0,0 +1,4 @@ +-- Dec 15, 2016 +-- Making sure there are no duplicated columns, much easier via python + +SELECT 42; diff --git a/qiita_db/support_files/patches/python_patches/45.py b/qiita_db/support_files/patches/python_patches/45.py new file mode 100644 index 000000000..7836f74f7 --- /dev/null +++ b/qiita_db/support_files/patches/python_patches/45.py @@ -0,0 +1,36 @@ +from future.utils import viewitems + +from qiita_db.metadata_template.sample_template import SampleTemplate +from qiita_db.metadata_template.prep_template import PrepTemplate +from qiita_db.sql_connection import TRN + +with TRN: + # a few notes: just getting the preps with duplicated values; ignoring + # column 'sample_id' and tables 'study_sample', 'prep_template', + # 'prep_template_sample' + sql = """SELECT table_name, array_agg(column_name::text) + FROM information_schema.columns + WHERE column_name IN %s + AND column_name != 'sample_id' + AND table_name LIKE 'prep_%%' + AND table_name NOT IN ( + 'prep_template', 'prep_template_sample') + GROUP BY table_name""" + # note that we are looking for those columns with duplicated names in + # the headers + TRN.add(sql, [tuple( + set(PrepTemplate.metadata_headers()) & + set(SampleTemplate.metadata_headers()))]) + overlapping = dict(TRN.execute_fetchindex()) + +# finding actual duplicates +for table_name, cols in viewitems(overlapping): + # leaving print so when we patch in the main system we know that + # nothing was renamed or deal with that + print table_name + with TRN: + for c in cols: + sql = 'ALTER TABLE qiita.%s RENAME COLUMN %s TO %s_renamed' % ( + table_name, c, c) + TRN.add(sql) + TRN.execute() diff --git a/qiita_db/test/test_commands.py b/qiita_db/test/test_commands.py index 31a4b712d..c1853f178 100644 --- a/qiita_db/test/test_commands.py +++ b/qiita_db/test/test_commands.py @@ -457,7 +457,7 @@ def test_update_artifact_from_cmd(self): PREP_TEMPLATE = ( 'sample_name\tbarcode\tcenter_name\tcenter_project_name\t' - 'description\tebi_submission_accession\temp_status\tprimer\t' + 'description_prep\tebi_submission_accession\temp_status\tprimer\t' 'run_prefix\tstr_column\tplatform\tlibrary_construction_protocol\t' 'experiment_design_description\tinstrument_model\n' 'SKB7.640196\tCCTCTGAGAGCT\tANL\tTest Project\tskb7\tNone\tEMP\t'