Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix #388 #2024

Merged
merged 2 commits into from
Dec 15, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions qiita_db/metadata_template/prep_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from time import strftime
from copy import deepcopy
import warnings
from skbio.util import find_duplicates

import pandas as pd

Expand All @@ -23,6 +24,30 @@
from .base_metadata_template import BaseSample, MetadataTemplate


def _check_duplicated_columns(prep_cols, sample_cols):
r"""Check for duplicated colums in the prep_cols and sample_cols

Parameters
----------
prep_cols : list of str
Column names in the prep info file
sample_cols : list of str
Column names in the sample info file

Raises
------
QiitaDBColumnError
If there are duplicated columns names in the sample and the prep
"""
prep_cols.extend(sample_cols)
dups = find_duplicates(prep_cols)
if dups:
raise qdb.exceptions.QiitaDBColumnError(
'Duplicated column names in the sample and prep info '
'files: %s. You need to delete that duplicated field' %
','.join(dups))


class PrepSample(BaseSample):
r"""Class that models a sample present in a PrepTemplate.

Expand Down Expand Up @@ -113,6 +138,8 @@ def create(cls, md_template, study, data_type, investigation_type=None):
pt_cols.update(PREP_TEMPLATE_COLUMNS_TARGET_GENE)

md_template = cls._clean_validate_template(md_template, study.id)
_check_duplicated_columns(list(md_template.columns),
study.sample_template.categories())

# Insert the metadata template
sql = """INSERT INTO qiita.prep_template
Expand Down Expand Up @@ -358,6 +385,10 @@ def can_be_extended(self, new_samples, new_columns):
"template has already been processed. "
"No new samples can be added to the "
"prep template")

_check_duplicated_columns(list(new_columns), qdb.study.Study(
self.study_id).sample_template.categories())

return True, ""

@property
Expand Down
12 changes: 12 additions & 0 deletions qiita_db/metadata_template/test/test_prep_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,11 @@ def test_can_be_extended(self):
"template")
self.assertEqual(obs_msg, exp_msg)

def test_can_be_extended_duplicated_column(self):
"""test if the template can be extended"""
with self.assertRaises(qdb.exceptions.QiitaDBColumnError):
self.prep_template.can_be_extended([], ["season_environment"])

def test_metadata_headers(self):
PT = qdb.metadata_template.prep_template.PrepTemplate
obs = PT.metadata_headers()
Expand Down Expand Up @@ -1003,6 +1008,13 @@ def test_create_investigation_type_error(self):
self.metadata, self.test_study, self.data_type_id,
'Not a term')

def test_create_duplicated_column_error(self):
"""Create raises an error if the prep has a duplicated column name"""
self.metadata['season_environment'] = self.metadata['primer']
with self.assertRaises(qdb.exceptions.QiitaDBColumnError):
qdb.metadata_template.prep_template.PrepTemplate.create(
self.metadata, self.test_study, self.data_type_id)

def test_delete_error(self):
"""Try to delete a prep template that already has preprocessed data"""
with self.assertRaises(qdb.exceptions.QiitaDBExecutionError):
Expand Down
4 changes: 4 additions & 0 deletions qiita_db/support_files/patches/45.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-- Dec 15, 2016
-- Making sure there are no duplicated columns, much easier via python

SELECT 42;
36 changes: 36 additions & 0 deletions qiita_db/support_files/patches/python_patches/45.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from future.utils import viewitems

from qiita_db.metadata_template.sample_template import SampleTemplate
from qiita_db.metadata_template.prep_template import PrepTemplate
from qiita_db.sql_connection import TRN

with TRN:
# a few notes: just getting the preps with duplicated values; ignoring
# column 'sample_id' and tables 'study_sample', 'prep_template',
# 'prep_template_sample'
sql = """SELECT table_name, array_agg(column_name::text)
FROM information_schema.columns
WHERE column_name IN %s
AND column_name != 'sample_id'
AND table_name LIKE 'prep_%%'
AND table_name NOT IN (
'prep_template', 'prep_template_sample')
GROUP BY table_name"""
# note that we are looking for those columns with duplicated names in
# the headers
TRN.add(sql, [tuple(
set(PrepTemplate.metadata_headers()) &
set(SampleTemplate.metadata_headers()))])
overlapping = dict(TRN.execute_fetchindex())

# finding actual duplicates
for table_name, cols in viewitems(overlapping):
# leaving print so when we patch in the main system we know that
# nothing was renamed or deal with that
print table_name
with TRN:
for c in cols:
sql = 'ALTER TABLE qiita.%s RENAME COLUMN %s TO %s_renamed' % (
table_name, c, c)
TRN.add(sql)
TRN.execute()
2 changes: 1 addition & 1 deletion qiita_db/test/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ def test_update_artifact_from_cmd(self):

PREP_TEMPLATE = (
'sample_name\tbarcode\tcenter_name\tcenter_project_name\t'
'description\tebi_submission_accession\temp_status\tprimer\t'
'description_prep\tebi_submission_accession\temp_status\tprimer\t'
'run_prefix\tstr_column\tplatform\tlibrary_construction_protocol\t'
'experiment_design_description\tinstrument_model\n'
'SKB7.640196\tCCTCTGAGAGCT\tANL\tTest Project\tskb7\tNone\tEMP\t'
Expand Down