Merge branch 'master' of https://github.com/biocore/qiita into improv…

…e-sql-queues
qiita-spots · Jun 19, 2015 · ff9f304 · ff9f304
2 parents 00244b8 + 1f9308e
commit ff9f304
Show file tree

Hide file tree

Showing 22 changed files with 686 additions and 129 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -23,6 +23,7 @@ http://qiime.org/documentation/file_formats.html#mapping-file-overview.
  * `qiita webserver` has been moved to `qiita pet webserver`
 * Cluster names now use dashes instead of underscores (e.g., `qiita_general` is now `qiita-general`)
 * `qiita-general` is now used as a default argument to `qiita-env start_cluster` and `qiita-env stop_cluster` if no cluster name is specified
+* Qiita now allows to process already demultiplexed data without any technical (barcode and primer) section of the read.  
 
 Version 0.1.0 (2015-04-30)
 --------------------------

diff --git a/qiita_db/metadata_template/base_metadata_template.py b/qiita_db/metadata_template/base_metadata_template.py
@@ -474,6 +474,7 @@ class MetadataTemplate(QiitaObject):
     get
     to_file
     add_filepath
+    update
 
     See Also
     --------
@@ -1092,6 +1093,66 @@ def categories(self):
 
         return cols
 
+    def update(self, md_template):
+        r"""Update values in the template
+
+        Parameters
+        ----------
+        md_template : DataFrame
+            The metadata template file contents indexed by samples Ids
+
+        Raises
+        ------
+        QiitaDBError
+            If md_template and db do not have the same sample ids
+            If md_template and db do not have the same column headers
+            If self.can_be_updated is not True
+        """
+        conn_handler = SQLConnectionHandler()
+
+        # Clean and validate the metadata template given
+        new_map = self._clean_validate_template(md_template, self.study_id,
+                                                self.columns_restrictions)
+        # Retrieving current metadata
+        current_map = self._transform_to_dict(conn_handler.execute_fetchall(
+            "SELECT * FROM qiita.{0}".format(self._table_name(self.id))))
+        current_map = pd.DataFrame.from_dict(current_map, orient='index')
+
+        # simple validations of sample ids and column names
+        samples_diff = set(new_map.index).difference(current_map.index)
+        if samples_diff:
+            raise QiitaDBError('The new template differs from what is stored '
+                               'in database by these samples names: %s'
+                               % ', '.join(samples_diff))
+        columns_diff = set(new_map.columns).difference(current_map.columns)
+        if columns_diff:
+            raise QiitaDBError('The new template differs from what is stored '
+                               'in database by these columns names: %s'
+                               % ', '.join(columns_diff))
+
+        # here we are comparing two dataframes following:
+        # http://stackoverflow.com/a/17095620/4228285
+        current_map.sort(axis=0, inplace=True)
+        current_map.sort(axis=1, inplace=True)
+        new_map.sort(axis=0, inplace=True)
+        new_map.sort(axis=1, inplace=True)
+        map_diff = (current_map != new_map).stack()
+        map_diff = map_diff[map_diff]
+        map_diff.index.names = ['id', 'column']
+        changed_cols = map_diff.index.get_level_values('column').unique()
+
+        if not self.can_be_updated(columns=set(changed_cols)):
+            raise QiitaDBError('The new template is modifying fields that '
+                               'cannot be modified. Try removing the target '
+                               'gene fields or deleting the processed data. '
+                               'You are trying to modify: %s'
+                               % ', '.join(changed_cols))
+
+        for col in changed_cols:
+            self.update_category(col, new_map[col].to_dict())
+
+        self.generate_files()
+
     def update_category(self, category, samples_and_values):
         """Update an existing column
 

diff --git a/qiita_db/metadata_template/prep_template.py b/qiita_db/metadata_template/prep_template.py
@@ -8,6 +8,7 @@
 
 from __future__ import division
 from future.utils import viewvalues
+from itertools import chain
 from os.path import join
 from time import strftime
 from copy import deepcopy
@@ -288,6 +289,55 @@ def data_type(self, ret_id=False):
             "qiita.prep_template p ON p.data_type_id = d.data_type_id WHERE "
             "p.prep_template_id=%s".format(ret), (self.id,))[0]
 
+    @property
+    def columns_restrictions(self):
+        """Gets the dictionary of colums required based on data_type
+
+        Returns
+        -------
+        dict
+            The dict of restictions based on the data_type
+        """
+        pt_cols = deepcopy(PREP_TEMPLATE_COLUMNS)
+        if self.data_type() in TARGET_GENE_DATA_TYPES:
+            pt_cols.update(PREP_TEMPLATE_COLUMNS_TARGET_GENE)
+
+        return pt_cols
+
+    def can_be_updated(self, columns):
+        """Gets if the template can be updated
+
+        Parameters
+        ----------
+        columns : set
+            A set of the names of the columns to be updated
+
+        Returns
+        -------
+        bool
+            If the template can be updated
+
+        Notes
+        -----
+        The prep template can be updated when (1) it has no preprocessed data
+        or the prep template data-type is not part of TARGET_GENE_DATA_TYPES,
+        (2) if is part of TARGET_GENE_DATA_TYPES then we will only update if
+        the columns being updated are not part of
+        PREP_TEMPLATE_COLUMNS_TARGET_GENE
+        """
+        if (not self.preprocessed_data or
+           self.data_type() not in TARGET_GENE_DATA_TYPES):
+            return True
+
+        tg_columns = set(chain.from_iterable(
+            [v.columns for v in
+             viewvalues(PREP_TEMPLATE_COLUMNS_TARGET_GENE)]))
+
+        if not columns & tg_columns:
+            return True
+
+        return False
+
     @property
     def raw_data(self):
         conn_handler = SQLConnectionHandler()

diff --git a/qiita_db/metadata_template/sample_template.py b/qiita_db/metadata_template/sample_template.py
@@ -10,8 +10,6 @@
 from os.path import join
 from time import strftime
 
-import pandas as pd
-
 from qiita_core.exceptions import IncompetentQiitaDeveloperError
 from qiita_db.exceptions import (QiitaDBDuplicateError, QiitaDBError,
                                  QiitaDBUnknownIDError)
@@ -189,6 +187,41 @@ def study_id(self):
         """
         return self._id
 
+    @property
+    def columns_restrictions(self):
+        """Gets the dictionary of colums required
+
+        Returns
+        -------
+        dict
+            The dict of restictions
+        """
+        return SAMPLE_TEMPLATE_COLUMNS
+
+    def can_be_updated(self, **kwargs):
+        """Gets if the template can be updated
+
+        Parameters
+        ----------
+        kwargs : ignored
+            Necessary to have in parameters to support other objects.
+
+        Returns
+        -------
+        bool
+            As this is the sample template, it will always return True. See the
+            notes.
+
+        Notes
+        -----
+            The prep template can't be updated in certain situations, see the
+            its documentation for more info. However, the sample template
+            doesn't have those restrictions. Thus, to be able to use the same
+            update code in the base class, we need to have this method and it
+            should always return True.
+        """
+        return True
+
     def generate_files(self):
         r"""Generates all the files that contain data from this template
         """
@@ -226,66 +259,3 @@ def extend(self, md_template):
         conn_handler.execute_queue(queue_name)
 
         self.generate_files()
-
-    def update(self, md_template):
-        r"""Update values in the sample template
-
-        Parameters
-        ----------
-        md_template : DataFrame
-            The metadata template file contents indexed by samples Ids
-
-        Raises
-        ------
-        QiitaDBError
-            If md_template and db do not have the same sample ids
-            If md_template and db do not have the same column headers
-        """
-        conn_handler = SQLConnectionHandler()
-
-        # Clean and validate the metadata template given
-        new_map = self._clean_validate_template(md_template, self.id,
-                                                SAMPLE_TEMPLATE_COLUMNS)
-        # Retrieving current metadata
-        current_map = self._transform_to_dict(conn_handler.execute_fetchall(
-            "SELECT * FROM qiita.{0} WHERE {1}=%s".format(self._table,
-                                                          self._id_column),
-            (self.id,)))
-        dyn_vals = self._transform_to_dict(conn_handler.execute_fetchall(
-            "SELECT * FROM qiita.{0}".format(self._table_name(self.id))))
-
-        for k in current_map:
-            current_map[k].update(dyn_vals[k])
-            current_map[k].pop('study_id', None)
-
-        # converting sql results to dataframe
-        current_map = pd.DataFrame.from_dict(current_map, orient='index')
-
-        # simple validations of sample ids and column names
-        samples_diff = set(
-            new_map.index.tolist()) - set(current_map.index.tolist())
-        if samples_diff:
-            raise QiitaDBError('The new sample template differs from what is '
-                               'stored in database by these samples names: %s'
-                               % ', '.join(samples_diff))
-        columns_diff = set(new_map.columns) - set(current_map.columns)
-        if columns_diff:
-            raise QiitaDBError('The new sample template differs from what is '
-                               'stored in database by these columns names: %s'
-                               % ', '.join(columns_diff))
-
-        # here we are comparing two dataframes following:
-        # http://stackoverflow.com/a/17095620/4228285
-        current_map.sort(axis=0, inplace=True)
-        current_map.sort(axis=1, inplace=True)
-        new_map.sort(axis=0, inplace=True)
-        new_map.sort(axis=1, inplace=True)
-        map_diff = (current_map != new_map).stack()
-        map_diff = map_diff[map_diff]
-        map_diff.index.names = ['id', 'column']
-        changed_cols = map_diff.index.get_level_values('column').unique()
-
-        for col in changed_cols:
-            self.update_category(col, new_map[col].to_dict())
-
-        self.generate_files()
diff --git a/qiita_db/metadata_template/test/test_prep_template.py b/qiita_db/metadata_template/test/test_prep_template.py
@@ -12,6 +12,7 @@
 from os import close, remove
 from os.path import join
 from collections import Iterable
+from copy import deepcopy
 
 import numpy.testing as npt
 import pandas as pd
@@ -255,6 +256,19 @@ def test_get_none(self):
         """get returns none if the sample id is not present"""
         self.assertTrue(self.tester.get('Not_a_Category') is None)
 
+    def test_columns_restrictions(self):
+        """that it returns SAMPLE_TEMPLATE_COLUMNS"""
+        exp = deepcopy(PREP_TEMPLATE_COLUMNS)
+        exp.update(PREP_TEMPLATE_COLUMNS_TARGET_GENE)
+        self.assertEqual(self.prep_template.columns_restrictions, exp)
+
+    def test_can_be_updated(self):
+        """test if the template can be updated"""
+        # you can't update restricted colums in a pt with data
+        self.assertFalse(self.prep_template.can_be_updated({'barcode'}))
+        # but you can if not restricted
+        self.assertTrue(self.prep_template.can_be_updated({'center_name'}))
+
 
 @qiita_test_checker()
 class TestPrepSampleReadWrite(BaseTestPrepSample):
@@ -1343,6 +1357,13 @@ def test_raw_data_setter(self):
         pt.raw_data = rd
         self.assertEqual(pt.raw_data, rd.id)
 
+    def test_can_be_updated_on_new(self):
+        """test if the template can be updated"""
+        # you can update a newly created pt
+        pt = PrepTemplate.create(self.metadata, self.test_study,
+                                 self.data_type)
+        self.assertTrue(pt.can_be_updated({'barcode'}))
+
 
 EXP_PREP_TEMPLATE = (
     'sample_name\tbarcode\tcenter_name\tcenter_project_name\t'

diff --git a/qiita_db/metadata_template/test/test_sample_template.py b/qiita_db/metadata_template/test/test_sample_template.py
@@ -237,6 +237,15 @@ def test_get_none(self):
         """get returns none if the sample id is not present"""
         self.assertTrue(self.tester.get('Not_a_Category') is None)
 
+    def test_columns_restrictions(self):
+        """that it returns SAMPLE_TEMPLATE_COLUMNS"""
+        self.assertEqual(self.sample_template.columns_restrictions,
+                         SAMPLE_TEMPLATE_COLUMNS)
+
+    def test_can_be_updated(self):
+        """test if the template can be updated"""
+        self.assertTrue(self.sample_template.can_be_updated)
+
 
 @qiita_test_checker()
 class TestSampleReadWrite(BaseTestSample):

diff --git a/qiita_db/support_files/patches/26.sql b/qiita_db/support_files/patches/26.sql
@@ -0,0 +1,15 @@
+-- Jun 11, 2015
+
+-- Updating FASTA-Sanger -> FASTA_Sanger, needed so we can put restrictions on
+-- what kind of files the user can select in the GUI
+UPDATE qiita.filetype SET type='FASTA_Sanger' WHERE type = 'FASTA-Sanger';
+
+-- Adding new filetype
+INSERT INTO qiita.filetype (type) VALUES ('per_sample_FASTQ');
+
+
+-- Adding new illumina processing params if they do not exists
+-- adapted from: http://stackoverflow.com/a/13902402
+INSERT INTO qiita.preprocessed_sequence_illumina_params (param_set_name, barcode_type)
+  SELECT DISTINCT 'per sample FASTQ defaults', 'not-barcoded' FROM qiita.preprocessed_sequence_illumina_params
+  WHERE NOT EXISTS (SELECT 1 FROM qiita.preprocessed_sequence_illumina_params WHERE barcode_type = 'not-barcoded');
diff --git a/qiita_db/test/test_setup.py b/qiita_db/test/test_setup.py
@@ -88,7 +88,7 @@ def test_preprocessed_filepath(self):
 
     def test_preprocessed_sequence_illumina_params(self):
         self.assertEqual(
-            get_count("qiita.preprocessed_sequence_illumina_params"), 6)
+            get_count("qiita.preprocessed_sequence_illumina_params"), 7)
 
     def test_processed_data(self):
         self.assertEqual(get_count("qiita.processed_data"), 1)

diff --git a/qiita_db/test/test_util.py b/qiita_db/test/test_util.py
@@ -181,6 +181,9 @@ def test_exists_dynamic_table(self):
     def test_convert_to_id(self):
         """Tests that ids are returned correctly"""
         self.assertEqual(convert_to_id("directory", "filepath_type"), 8)
+        self.assertEqual(convert_to_id("running", "analysis_status",
+                                       "status"), 3)
+        self.assertEqual(convert_to_id("EMP", "portal_type", "portal"), 2)
 
     def test_convert_to_id_bad_value(self):
         """Tests that ids are returned correctly"""
@@ -191,7 +194,8 @@ def test_get_filetypes(self):
         """Tests that get_filetypes works with valid arguments"""
 
         obs = get_filetypes()
-        exp = {'SFF': 1, 'FASTA-Sanger': 2, 'FASTQ': 3, 'FASTA': 4}
+        exp = {'SFF': 1, 'FASTA_Sanger': 2, 'FASTQ': 3, 'FASTA': 4,
+               'per_sample_FASTQ': 5}
         self.assertEqual(obs, exp)
 
         obs = get_filetypes(key='filetype_id')

diff --git a/qiita_db/util.py b/qiita_db/util.py
@@ -809,7 +809,7 @@ def filepath_ids_to_rel_paths(filepath_ids):
         return {}
 
 
-def convert_to_id(value, table):
+def convert_to_id(value, table, text_col=None):
     """Converts a string value to its corresponding table identifier
 
     Parameters
@@ -818,6 +818,8 @@ def convert_to_id(value, table):
         The string value to convert
     table : str
         The table that has the conversion
+    text_col : str, optional
+        Column holding the string value. Defaults to same as table name.
 
     Returns
     -------
@@ -829,8 +831,9 @@ def convert_to_id(value, table):
     IncompetentQiitaDeveloperError
         The passed string has no associated id
     """
+    text_col = table if text_col is None else text_col
     conn_handler = SQLConnectionHandler()
-    sql = "SELECT {0}_id FROM qiita.{0} WHERE {0} = %s".format(table)
+    sql = "SELECT {0}_id FROM qiita.{0} WHERE {1} = %s".format(table, text_col)
     _id = conn_handler.execute_fetchone(sql, (value, ))
     if _id is None:
         raise IncompetentQiitaDeveloperError("%s not valid for table %s"