qiita-spots · ElDeveloper · Jul 16, 2014 · Jul 15, 2014 · Jul 15, 2014 · Jul 15, 2014
diff --git a/qiita_db/data.py b/qiita_db/data.py
@@ -86,7 +86,7 @@
 from .base import QiitaObject
 from .sql_connection import SQLConnectionHandler
 from .util import (exists_dynamic_table, get_db_files_base_dir,
-                   insert_filepaths)
+                   insert_filepaths, convert_to_id)
 
 
 class BaseData(QiitaObject):
@@ -190,6 +190,7 @@ class RawData(BaseData):
     Methods
     -------
     create
+    data_type
 
     See Also
     --------
@@ -253,6 +254,27 @@ def studies(self):
             [self._id])
         return [id[0] for id in ids]
 
+    def data_type(self, ret_id=False):
+        """Returns the data_type or data_type_id
+
+        Parameters
+        ----------
+        ret_id : bool, optional
+            Return the id instead of the string, default False
+
+        Returns
+        -------
+        str or int
+            string value of data_type or data_type_id
+        """
+        ret = "_id" if ret_id else ""
+        conn_handler = SQLConnectionHandler()
+        data_type = conn_handler.execute_fetchone(
+            "SELECT d.data_type{0} FROM qiita.data_type d JOIN "
+            "qiita.common_prep_info c ON c.data_type_id = d.data_type_id WHERE"
+            " c.raw_data_id = %s".format(ret), (self._id, ))
+        return data_type[0]
+
 
 class PreprocessedData(BaseData):
     r"""Object for dealing with preprocessed data
@@ -266,6 +288,7 @@ class PreprocessedData(BaseData):
     -------
     create
     is_submitted_to_insdc
+    data_type
 
     See Also
     --------
@@ -280,7 +303,8 @@ class PreprocessedData(BaseData):
 
     @classmethod
     def create(cls, study, preprocessed_params_table, preprocessed_params_id,
-               filepaths, raw_data=None, submitted_to_insdc=False):
+               filepaths, raw_data=None, data_type=None,
+               submitted_to_insdc=False):
         r"""Creates a new object with a new id on the storage system
 
         Parameters
@@ -299,14 +323,31 @@ def create(cls, study, preprocessed_params_table, preprocessed_params_id,
             If true, the raw data files have been submitted to insdc
         raw_data : RawData, optional
             The RawData object used as base to this preprocessed data
+        data_type : str, optional
+            The data_type of the preprocessed_data
+
 
         Raises
         ------
         IncompetentQiitaDeveloperError
             If the table `preprocessed_params_table` does not exists
+        IncompetentQiitaDeveloperError
+            If data_type does not match that of raw_data passed
         """
         conn_handler = SQLConnectionHandler()
-        # We first check that the preprocessed_params_table exists
+        if (data_type and raw_data) and data_type != raw_data.data_type:
+            raise IncompetentQiitaDeveloperError(
+                "data_type passed does not match raw_data data_type!")
+        elif data_type is None and raw_data is None:
+            raise IncompetentQiitaDeveloperError("Neither data_type nor "
+                                                 "raw_data passed!")
+        elif raw_data:
+            # raw_data passed but no data_type, so set to raw data data_type
+            data_type = raw_data.data_type(ret_id=True)
+        else:
+            # only data_type, so need id from the text
+            data_type = convert_to_id(data_type, "data_type", conn_handler)
+        # Check that the preprocessed_params_table exists
         if not exists_dynamic_table(preprocessed_params_table, "preprocessed_",
                                     "_params", conn_handler):
             raise IncompetentQiitaDeveloperError(
@@ -316,12 +357,13 @@ def create(cls, study, preprocessed_params_table, preprocessed_params_id,
         # and get the preprocessed data id back
         ppd_id = conn_handler.execute_fetchone(
             "INSERT INTO qiita.{0} (preprocessed_params_table, "
-            "preprocessed_params_id, submitted_to_insdc) VALUES "
-            "(%(param_table)s, %(param_id)s, %(insdc)s) "
+            "preprocessed_params_id, submitted_to_insdc, data_type_id) VALUES "
+            "(%(param_table)s, %(param_id)s, %(insdc)s, %(data_type)s) "
             "RETURNING preprocessed_data_id".format(cls._table),
             {'param_table': preprocessed_params_table,
              'param_id': preprocessed_params_id,
-             'insdc': submitted_to_insdc})[0]
+             'insdc': submitted_to_insdc,
+             'data_type': data_type})[0]
         ppd = cls(ppd_id)
 
         # Connect the preprocessed data with its study
@@ -363,6 +405,28 @@ def study(self):
             "preprocessed_data_id=%s".format(self._study_preprocessed_table),
             [self._id])[0]
 
+    def data_type(self, ret_id=False):
+        """Returns the data_type or data_type_id
+
+        Parameters
+        ----------
+        ret_id : bool, optional
+            Return the id instead of the string, default False
+
+        Returns
+        -------
+        str or int
+            string value of data_type or data_type_id
+        """
+        conn_handler = SQLConnectionHandler()
+        ret = "_id" if ret_id else ""
+        data_type = conn_handler.execute_fetchone(
+            "SELECT d.data_type{0} FROM qiita.data_type d JOIN "
+            "qiita.{1} p ON p.data_type_id = d.data_type_id WHERE"
+            " p.preprocessed_data_id = %s".format(ret, self._table),
+            (self._id, ))
+        return data_type[0]
+
     def is_submitted_to_insdc(self):
         r"""Tells if the raw data has been submitted to insdc
 
@@ -387,6 +451,7 @@ class ProcessedData(BaseData):
     Methods
     -------
     create
+    data_type
 
     See Also
     --------
@@ -401,7 +466,8 @@ class ProcessedData(BaseData):
 
     @classmethod
     def create(cls, processed_params_table, processed_params_id, filepaths,
-               preprocessed_data=None, study=None, processed_date=None):
+               preprocessed_data=None, study=None, processed_date=None,
+               data_type=None):
         r"""
         Parameters
         ----------
@@ -420,6 +486,9 @@ def create(cls, processed_params_table, processed_params_id, filepaths,
             belongs to
         processed_date : datetime, optional
             Date in which the data have been processed. Default: now
+        data_type : str, optional
+            data_type of the processed_data. Otherwise taken from passed
+            preprocessed_data.
 
         Raises
         ------
@@ -428,17 +497,30 @@ def create(cls, processed_params_table, processed_params_id, filepaths,
             If `preprocessed_data` and `study` are provided at the same time
             If `preprocessed_data` and `study` are not provided
         """
+        conn_handler = SQLConnectionHandler()
         if preprocessed_data is not None:
             if study is not None:
                 raise IncompetentQiitaDeveloperError(
                     "You should provide either preprocessed_data or study, "
                     "but not both")
+            elif data_type is not None and \
+                    data_type != preprocessed_data.data_type():
+                raise IncompetentQiitaDeveloperError(
+                    "data_type passed does not match preprocessed_data "
+                    "data_type!")
+            else:
+                data_type = preprocessed_data.data_type(ret_id=True)
         else:
             if study is None:
                 raise IncompetentQiitaDeveloperError(
                     "You should provide either a preprocessed_data or a study")
+            if data_type is None:
+                raise IncompetentQiitaDeveloperError(
+                    "You must provide either a preprocessed_data, a "
+                    "data_type, or both")
+            else:
+                data_type = convert_to_id(data_type, "data_type", conn_handler)
 
-        conn_handler = SQLConnectionHandler()
         # We first check that the processed_params_table exists
         if not exists_dynamic_table(processed_params_table,
                                     "processed_params_", "", conn_handler):
@@ -454,12 +536,13 @@ def create(cls, processed_params_table, processed_params_id, filepaths,
         # and get the processed data id back
         pd_id = conn_handler.execute_fetchone(
             "INSERT INTO qiita.{0} (processed_params_table, "
-            "processed_params_id, processed_date) VALUES (%(param_table)s, "
-            "%(param_id)s, %(date)s) RETURNING "
-            "processed_data_id".format(cls._table),
+            "processed_params_id, processed_date, data_type_id) VALUES ("
+            "%(param_table)s, %(param_id)s, %(date)s, %(data_type)s) RETURNING"
+            " processed_data_id".format(cls._table),
             {'param_table': processed_params_table,
              'param_id': processed_params_id,
-             'date': processed_date})[0]
+             'date': processed_date,
+             'data_type': data_type})[0]
 
         pd = cls(pd_id)
 
@@ -491,15 +574,24 @@ def preprocessed_data(self):
             "processed_data_id=%s".format(self._preprocessed_processed_table),
             [self._id])[0]
 
-    @property
-    def data_type(self):
-        r"""The data_type of the data used"""
+    def data_type(self, ret_id=False):
+        """Returns the data_type or data_type_id
+
+        Parameters
+        ----------
+        ret_id : bool, optional
+            Return the id instead of the string, default False
+
+        Returns
+        -------
+        str or int
+            string value of data_type or data_type_id
+        """
         conn_handler = SQLConnectionHandler()
-        sql = ("SELECT DISTINCT DT.data_type FROM "
-               "qiita.preprocessed_processed_data PPD JOIN "
-               "qiita.raw_preprocessed_data RPD on PPD.preprocessed_data_id = "
-               "RPD.preprocessed_data_id JOIN qiita.common_prep_info CPI ON "
-               "RPD.raw_data_id = CPI.raw_data_id JOIN qiita.data_type DT ON "
-               "CPI.data_type_id = DT.data_type_id WHERE "
-               "PPD.processed_data_id = %s")
-        return conn_handler.execute_fetchone(sql, [self._id])[0]
+        ret = "_id" if ret_id else ""
+        data_type = conn_handler.execute_fetchone(
+            "SELECT d.data_type{0} FROM qiita.data_type d JOIN "
+            "qiita.{1} p ON p.data_type_id = d.data_type_id WHERE"
+            " p.processed_data_id = %s".format(ret, self._table),
+            (self._id, ))
+        return data_type[0]
diff --git a/qiita_db/sql_connection.py b/qiita_db/sql_connection.py
@@ -102,10 +102,10 @@ def _sql_executor(self, sql, sql_args=None, many=False):
                 self._connection.commit()
             except PostgresError as e:
                 self._connection.rollback()
-                if sql_args and isinstance(sql_args[0], Iterable):
-                    err_sql = cur.mogrify(sql, sql_args[0])
-                else:
+                try:
                     err_sql = cur.mogrify(sql, sql_args)
+                except ValueError:
+                    err_sql = cur.mogrify(sql, sql_args[0])
                 raise QiitaDBExecutionError(("\nError running SQL query: %s"
                                              "\nError: %s" % (err_sql, e)))
 

diff --git a/qiita_db/support_files/populate_test_db.sql b/qiita_db/support_files/populate_test_db.sql
@@ -288,7 +288,7 @@ INSERT INTO qiita.prep_1 (sample_id, BarcodeSequence, LIBRARY_CONSTRUCTION_PROTO
 	('SKM9.640192', 'AGCAGGCACGAA', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME');
 
 -- Insert preprocessed information for raw data 1
-INSERT INTO qiita.preprocessed_data (preprocessed_params_table, preprocessed_params_id, submitted_to_insdc) VALUES ('preprocessed_sequence_illumina_params', 1, TRUE), ('preprocessed_sequence_illumina_params', 2, FALSE);
+INSERT INTO qiita.preprocessed_data (preprocessed_params_table, preprocessed_params_id, submitted_to_insdc, data_type_id) VALUES ('preprocessed_sequence_illumina_params', 1, TRUE, 2), ('preprocessed_sequence_illumina_params', 2, FALSE, 2);
 
 -- Link the new preprocessed data with the raw data
 INSERT INTO qiita.raw_preprocessed_data (raw_data_id, preprocessed_data_id) VALUES (1, 1), (1, 2);
@@ -306,7 +306,7 @@ INSERT INTO qiita.preprocessed_filepath (preprocessed_data_id, filepath_id) VALU
 INSERT INTO qiita.preprocessed_sequence_illumina_params (trim_length) VALUES (151), (100);
 
 -- Insert processed information for study 0 and processed data 1
-INSERT INTO qiita.processed_data (processed_params_table, processed_params_id, processed_date) VALUES ('processed_params_uclust', 1, 'Mon Oct 1 09:30:27 2012');
+INSERT INTO qiita.processed_data (processed_params_table, processed_params_id, processed_date, data_type_id) VALUES ('processed_params_uclust', 1, 'Mon Oct 1 09:30:27 2012', 2);
 
 -- Link the processed data with the preprocessed data
 INSERT INTO qiita.preprocessed_processed_data (preprocessed_data_id, processed_data_id) VALUES (1, 1);

diff --git a/qiita_db/support_files/qiita-db.dbs b/qiita_db/support_files/qiita-db.dbs
@@ -574,9 +574,16 @@ Linked by y being raw_data_id from raw data table.</comment>
 			</column>
 			<column name="preprocessed_params_id" type="bigint" jt="-5" mandatory="y" />
 			<column name="submitted_to_insdc" type="bool" jt="-7" mandatory="y" />
+			<column name="data_type_id" type="bigint" jt="-5" mandatory="y" />
 			<index name="pk_preprocessed_data" unique="PRIMARY_KEY" >
 				<column name="preprocessed_data_id" />
 			</index>
+			<index name="idx_preprocessed_data" unique="NORMAL" >
+				<column name="data_type_id" />
+			</index>
+			<fk name="fk_preprocessed_data" to_schema="qiita" to_table="data_type" >
+				<fk_column name="data_type_id" pk="data_type_id" />
+			</fk>
 		</table>
 		<table name="preprocessed_filepath" >
 			<column name="preprocessed_data_id" type="bigint" jt="-5" mandatory="y" />
@@ -660,9 +667,16 @@ Linked by y being raw_data_id from raw data table.</comment>
 				<comment><![CDATA[Link to a table with the parameters used to generate processed data]]></comment>
 			</column>
 			<column name="processed_date" type="timestamp" jt="93" mandatory="y" />
+			<column name="data_type_id" type="bigint" jt="-5" mandatory="y" />
 			<index name="pk_processed_data" unique="PRIMARY_KEY" >
 				<column name="processed_data_id" />
 			</index>
+			<index name="idx_processed_data" unique="NORMAL" >
+				<column name="data_type_id" />
+			</index>
+			<fk name="fk_processed_data" to_schema="qiita" to_table="data_type" >
+				<fk_column name="data_type_id" pk="data_type_id" />
+			</fk>
 		</table>
 		<table name="processed_filepath" >
 			<column name="processed_data_id" type="bigint" jt="-5" mandatory="y" />
@@ -1303,8 +1317,6 @@ Controlled Vocabulary]]></comment>
 		<entity schema="qiita" name="portal_type" color="c0d4f3" x="1845" y="720" />
 		<entity schema="qiita" name="raw_data" color="d0def5" x="1230" y="480" />
 		<entity schema="qiita" name="raw_preprocessed_data" color="b2cdf7" x="1230" y="585" />
-		<entity schema="qiita" name="preprocessed_filepath" color="c0d4f3" x="990" y="705" />
-		<entity schema="qiita" name="preprocessed_data" color="c0d4f3" x="1200" y="690" />
 		<entity schema="qiita" name="processed_filepath" color="c0d4f3" x="1005" y="930" />
 		<entity schema="qiita" name="command" color="d0def5" x="210" y="1095" />
 		<entity schema="qiita" name="logging" color="c0d4f3" x="1365" y="1200" />
@@ -1313,9 +1325,11 @@ Controlled Vocabulary]]></comment>
 		<entity schema="qiita" name="preprocessed_spectra_params" color="d0def5" x="1830" y="825" />
 		<entity schema="qiita" name="preprocessed_sequence_454_params" color="c0d4f3" x="1815" y="915" />
 		<entity schema="qiita" name="preprocessed_sequence_illumina_params" color="d0def5" x="1800" y="1005" />
-		<entity schema="qiita" name="processed_data" color="d0def5" x="1215" y="930" />
 		<entity schema="qiita" name="study_processed_data" color="b2cdf7" x="1455" y="930" />
 		<entity schema="qiita" name="command_data_type" color="c0d4f3" x="390" y="1155" />
+		<entity schema="qiita" name="preprocessed_data" color="c0d4f3" x="1200" y="690" />
+		<entity schema="qiita" name="processed_data" color="d0def5" x="1215" y="930" />
+		<entity schema="qiita" name="preprocessed_filepath" color="c0d4f3" x="990" y="690" />
 		<group name="Group_analyses" color="c4e0f9" >
 			<comment>analysis tables</comment>
 			<entity schema="qiita" name="analysis" />