Merge pull request #1205 from josenavas/1084-qiita-db

ElDeveloper · ElDeveloper · commit 250ae6993bd5 · 2015-05-27T17:20:00.000-07:00
1084 qiita db
diff --git a/qiita_db/commands.py b/qiita_db/commands.py
@@ -153,27 +153,23 @@ def load_sample_template_from_cmd(sample_temp_path, study_id):
     return SampleTemplate.create(sample_temp, Study(study_id))
 
 
-def load_prep_template_from_cmd(prep_temp_path, raw_data_id, study_id,
-                                data_type):
+def load_prep_template_from_cmd(prep_temp_path, study_id, data_type):
     r"""Adds a prep template to the database
 
     Parameters
     ----------
     prep_temp_path : str
         Path to the prep template file
-    raw_data_id : int
-        The raw data id to which the prep template belongs
     study_id : int
         The study id to which the prep template belongs
     data_type : str
         The data type of the prep template
     """
     prep_temp = load_template_to_dataframe(prep_temp_path)
-    return PrepTemplate.create(prep_temp, RawData(raw_data_id),
-                               Study(study_id), data_type)
+    return PrepTemplate.create(prep_temp, Study(study_id), data_type)
 
 
-def load_raw_data_cmd(filepaths, filepath_types, filetype, study_ids):
+def load_raw_data_cmd(filepaths, filepath_types, filetype, prep_template_ids):
     """Add new raw data by populating the relevant tables
 
     Parameters
@@ -184,8 +180,8 @@ def load_raw_data_cmd(filepaths, filepath_types, filetype, study_ids):
         Describes the contents of the files.
     filetype : str
         The type of file being loaded
-    study_ids : iterable of int
-        The IDs of the studies with which to associate this raw data
+    prep_template_ids : iterable of int
+        The IDs of the prep templates with which to associate this raw data
 
     Returns
     -------
@@ -202,9 +198,9 @@ def load_raw_data_cmd(filepaths, filepath_types, filetype, study_ids):
     filepath_types_dict = get_filepath_types()
     filepath_types = [filepath_types_dict[x] for x in filepath_types]
 
-    studies = [Study(x) for x in study_ids]
+    prep_templates = [PrepTemplate(x) for x in prep_template_ids]
 
-    return RawData.create(filetype_id, studies,
+    return RawData.create(filetype_id, prep_templates,
                           filepaths=list(zip(filepaths, filepath_types)))
 
 
diff --git a/qiita_db/data.py b/qiita_db/data.py
@@ -87,9 +87,8 @@
 from .sql_connection import SQLConnectionHandler
 from .exceptions import QiitaDBError, QiitaDBUnknownIDError, QiitaDBStatusError
 from .util import (exists_dynamic_table, insert_filepaths, convert_to_id,
-                   convert_from_id, purge_filepaths, get_filepath_id,
-                   get_mountpoint, move_filepaths_to_upload_folder,
-                   infer_status)
+                   convert_from_id, get_filepath_id, get_mountpoint,
+                   move_filepaths_to_upload_folder, infer_status)
 
 
 class BaseData(QiitaObject):
diff --git a/qiita_db/metadata_template/test/test_prep_template.py b/qiita_db/metadata_template/test/test_prep_template.py
@@ -10,7 +10,7 @@
 from unittest import TestCase, main
 from tempfile import mkstemp
 from os import close, remove
-from os.path import join, basename
+from os.path import join
 from collections import Iterable
 
 import numpy.testing as npt
@@ -29,8 +29,7 @@
 from qiita_db.sql_connection import SQLConnectionHandler
 from qiita_db.study import Study
 from qiita_db.data import RawData, ProcessedData
-from qiita_db.util import (exists_table, get_db_files_base_dir, get_mountpoint,
-                           get_count)
+from qiita_db.util import exists_table, get_mountpoint, get_count
 from qiita_db.metadata_template.prep_template import PrepTemplate, PrepSample
 from qiita_db.metadata_template.sample_template import SampleTemplate, Sample
 from qiita_db.metadata_template import (PREP_TEMPLATE_COLUMNS,
diff --git a/qiita_db/study.py b/qiita_db/study.py
@@ -642,10 +642,11 @@ def data_types(self):
         list of str
         """
         conn_handler = SQLConnectionHandler()
-        sql = ("SELECT DISTINCT DT.data_type FROM qiita.study_raw_data SRD "
-               "JOIN qiita.prep_template PT ON SRD.raw_data_id = "
-               "PT.raw_data_id JOIN qiita.data_type DT ON PT.data_type_id = "
-               "DT.data_type_id WHERE SRD.study_id = %s")
+        sql = """SELECT DISTINCT data_type
+                 FROM qiita.study_prep_template
+                    JOIN qiita.prep_template USING (prep_template_id)
+                    JOIN qiita.data_type USING (data_type_id)
+                 WHERE study_id = %s"""
         return [x[0] for x in conn_handler.execute_fetchall(sql, (self._id,))]
 
     @property
@@ -759,36 +760,30 @@ def raw_data(self, data_type=None):
 
         return [x[0] for x in conn_handler.execute_fetchall(sql, (self._id,))]
 
-    def add_raw_data(self, raw_data):
-        """ Adds raw_data to the current study
+    def prep_templates(self, data_type=None):
+        """Return list of prep template ids
 
         Parameters
         ----------
-        raw_data : list of RawData
-            The RawData objects to be added to the study
+        data_type : str, optional
+            If given, retrieve only prep templates for given datatype.
+            Default None.
 
-        Raises
-        ------
-        QiitaDBError
-            If the raw_data is already linked to the current study
+        Returns
+        -------
+        list of PrepTemplate ids
         """
-        conn_handler = SQLConnectionHandler()
-        self._lock_non_sandbox(conn_handler)
-        queue = "%d_add_raw_data" % self.id
-        sql = ("SELECT EXISTS(SELECT * FROM qiita.study_raw_data WHERE "
-               "study_id=%s AND raw_data_id=%s)")
-        conn_handler.create_queue(queue)
-        sql_args = [(self.id, rd.id) for rd in raw_data]
-        conn_handler.add_to_queue(queue, sql, sql_args, many=True)
-        linked = conn_handler.execute_queue(queue)
-
-        if any(linked):
-            raise QiitaDBError("Some of the passed raw datas have been already"
-                               " linked to the study %s" % self.id)
+        spec_data = ""
+        if data_type:
+            spec_data = " AND data_type_id = %s" % convert_to_id(data_type,
+                                                                 "data_type")
 
-        conn_handler.executemany(
-            "INSERT INTO qiita.study_raw_data (study_id, raw_data_id) "
-            "VALUES (%s, %s)", sql_args)
+        conn_handler = SQLConnectionHandler()
+        sql = """SELECT prep_template_id
+                 FROM qiita.study_prep_template
+                    JOIN qiita.prep_template USING (prep_template_id)
+                 WHERE study_id = %s{0}""".format(spec_data)
+        return [x[0] for x in conn_handler.execute_fetchall(sql, (self._id,))]
 
     def preprocessed_data(self, data_type=None):
         """ Returns list of data ids for preprocessed data info
diff --git a/qiita_db/test/test_analysis.py b/qiita_db/test/test_analysis.py
@@ -276,7 +276,7 @@ def test_retrieve_biom_tables(self):
         self.assertEqual(self.analysis.biom_tables, exp)
 
     def test_all_associated_filepaths(self):
-        exp = {12, 13, 14, 15}
+        exp = {10, 11, 12, 13}
         self.assertEqual(self.analysis.all_associated_filepath_ids, exp)
 
     def test_retrieve_biom_tables_none(self):
@@ -427,7 +427,7 @@ def test_build_mapping_file(self):
         obs = self.conn_handler.execute_fetchall(
             sql, ("%d_analysis_mapping.txt" % self.analysis.id,))
 
-        exp = [[15, '1_analysis_mapping.txt', 9, '852952723', 1, 1],
+        exp = [[13, '1_analysis_mapping.txt', 9, '852952723', 1, 1],
                [new_id, '1_analysis_mapping.txt', 9, '1606265094', 1, 1]]
         self.assertEqual(obs, exp)
 
diff --git a/qiita_db/test/test_base.py b/qiita_db/test/test_base.py
@@ -13,7 +13,7 @@
 from qiita_db.base import QiitaObject, QiitaStatusObject
 from qiita_db.exceptions import QiitaDBUnknownIDError
 from qiita_db.data import RawData
-from qiita_db.study import Study
+from qiita_db.study import Study, StudyPerson
 from qiita_db.analysis import Analysis
 
 
@@ -63,8 +63,9 @@ def test_equal(self):
 
     def test_not_equal(self):
         """Not equals works with object of the same type"""
-        new = RawData(2)
-        self.assertNotEqual(self.tester, new)
+        sp1 = StudyPerson(1)
+        sp2 = StudyPerson(2)
+        self.assertNotEqual(sp1, sp2)
 
     def test_not_equal_type(self):
         """Not equals works with object of different type"""
diff --git a/qiita_db/test/test_commands.py b/qiita_db/test/test_commands.py
@@ -15,6 +15,8 @@
 from future import standard_library
 from functools import partial
 
+import pandas as pd
+
 from qiita_db.commands import (load_study_from_cmd, load_raw_data_cmd,
                                load_sample_template_from_cmd,
                                load_prep_template_from_cmd,
@@ -25,9 +27,10 @@
 from qiita_db.environment_manager import patch
 from qiita_db.study import Study, StudyPerson
 from qiita_db.user import User
-from qiita_db.data import RawData, PreprocessedData
+from qiita_db.data import PreprocessedData
 from qiita_db.util import (get_count, check_count, get_db_files_base_dir,
                            get_mountpoint)
+from qiita_db.metadata_template import PrepTemplate
 from qiita_core.util import qiita_test_checker
 from qiita_ware.processing_pipeline import generate_demux_file
 
@@ -154,36 +157,12 @@ def test_load_sample_template_from_cmd(self):
 @qiita_test_checker()
 class TestLoadPrepTemplateFromCmd(TestCase):
     def setUp(self):
-        # Create a sample template file
-        fd, seqs_fp = mkstemp(suffix='_seqs.fastq')
-        close(fd)
-        fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq')
-        close(fd)
-
-        with open(seqs_fp, "w") as f:
-            f.write("\n")
-        with open(barcodes_fp, "w") as f:
-            f.write("\n")
-
         self.pt_contents = PREP_TEMPLATE
 
-        self.raw_data = RawData.create(
-            2, [Study(1)], filepaths=[(seqs_fp, 1), (barcodes_fp, 2)])
-
-        join_f = partial(join, join(get_db_files_base_dir(), 'raw_data'))
-        self.files_to_remove = [
-            join_f("%s_%s" % (self.raw_data.id, basename(seqs_fp))),
-            join_f("%s_%s" % (self.raw_data.id, basename(barcodes_fp)))]
-
-    def tearDown(self):
-        for fp in self.files_to_remove:
-            if exists(fp):
-                remove(fp)
-
     def test_load_prep_template_from_cmd(self):
         """Correctly adds a prep template to the DB"""
         fh = StringIO(self.pt_contents)
-        st = load_prep_template_from_cmd(fh, self.raw_data.id, 1, '18S')
+        st = load_prep_template_from_cmd(fh, 1, '18S')
         self.assertEqual(st.id, 2)
 
 
@@ -222,14 +201,24 @@ def test_load_data_from_cmd(self):
                           'raw_barcodes']
 
         filetype = 'FASTQ'
-        study_ids = [1]
+        metadata_dict = {
+            'SKB8.640193': {'center_name': 'ANL',
+                            'primer': 'GTGCCAGCMGCCGCGGTAA',
+                            'barcode': 'GTCCGCAAGTTA',
+                            'run_prefix': "s_G1_L001_sequences",
+                            'platform': 'ILLUMINA',
+                            'library_construction_protocol': 'AAAA',
+                            'experiment_design_description': 'BBBB'}}
+        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')
+        pt1 = PrepTemplate.create(metadata, Study(1), "16S")
+        prep_templates = [pt1.id]
 
         initial_raw_count = get_count('qiita.raw_data')
         initial_fp_count = get_count('qiita.filepath')
         initial_raw_fp_count = get_count('qiita.raw_filepath')
 
         new = load_raw_data_cmd(filepaths, filepath_types, filetype,
-                                study_ids)
+                                prep_templates)
         raw_data_id = new.id
         self.files_to_remove.append(
             join(self.db_test_raw_dir,
@@ -246,14 +235,12 @@ def test_load_data_from_cmd(self):
                                     initial_fp_count + 3))
         self.assertTrue(check_count('qiita.raw_filepath',
                                     initial_raw_fp_count + 3))
-        self.assertTrue(check_count('qiita.study_raw_data',
-                                    initial_raw_count + 1))
 
         # Ensure that the ValueError is raised when a filepath_type is not
         # provided for each and every filepath
         with self.assertRaises(ValueError):
             load_raw_data_cmd(filepaths, filepath_types[:-1], filetype,
-                              study_ids)
+                              prep_templates)
 
 
 @qiita_test_checker()
diff --git a/qiita_db/test/test_job.py b/qiita_db/test/test_job.py
@@ -123,7 +123,7 @@ def test_delete_files(self):
                 Job(1)
 
             obs = self.conn_handler.execute_fetchall(
-                "SELECT * FROM qiita.filepath WHERE filepath_id = 12")
+                "SELECT * FROM qiita.filepath WHERE filepath_id = 10")
             self.assertEqual(obs, [])
 
             obs = self.conn_handler.execute_fetchall(
@@ -149,7 +149,7 @@ def test_delete_folders(self):
                 Job(2)
 
             obs = self.conn_handler.execute_fetchall(
-                "SELECT * FROM qiita.filepath WHERE filepath_id = 13")
+                "SELECT * FROM qiita.filepath WHERE filepath_id = 11")
             self.assertEqual(obs, [])
 
             obs = self.conn_handler.execute_fetchall(
@@ -300,7 +300,7 @@ def test_add_results(self):
         obs = self.conn_handler.execute_fetchall(
             "SELECT * FROM qiita.job_results_filepath WHERE job_id = 1")
 
-        self.assertEqual(obs, [[1, 12], [1, fp_count + 1]])
+        self.assertEqual(obs, [[1, 10], [1, fp_count + 1]])
 
     def test_add_results_dir(self):
         fp_count = get_count('qiita.filepath')
@@ -313,7 +313,7 @@ def test_add_results_dir(self):
         # make sure files attached to job properly
         obs = self.conn_handler.execute_fetchall(
             "SELECT * FROM qiita.job_results_filepath WHERE job_id = 1")
-        self.assertEqual(obs, [[1, 12], [1, fp_count + 1]])
+        self.assertEqual(obs, [[1, 10], [1, fp_count + 1]])
 
     def test_add_results_completed(self):
         self.job.status = "completed"
diff --git a/qiita_db/test/test_meta_util.py b/qiita_db/test/test_meta_util.py
@@ -36,13 +36,12 @@ def test_get_accessible_filepath_ids(self):
         # shared has access to all study files and analysis files
 
         obs = get_accessible_filepath_ids(User('shared@foo.bar'))
-        self.assertEqual(obs, set([1, 2, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17,
-                                   18, 19, 20]))
+        self.assertEqual(obs, {1, 2, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16})
 
         # Now shared should not have access to the study files
         self._unshare_studies()
         obs = get_accessible_filepath_ids(User('shared@foo.bar'))
-        self.assertEqual(obs, set([12, 13, 14, 15]))
+        self.assertEqual(obs, {10, 11, 12, 13})
 
         # Now shared should not have access to any files
         self._unshare_analyses()
@@ -52,10 +51,10 @@ def test_get_accessible_filepath_ids(self):
         # Now shared has access to public study files
         self._set_processed_data_public()
         obs = get_accessible_filepath_ids(User('shared@foo.bar'))
-        self.assertEqual(obs, set([1, 2, 5, 6, 7, 11, 16, 19, 20]))
+        self.assertEqual(obs, {1, 2, 3, 4, 5, 9, 14, 15, 16})
 
         # Test that it doesn't break: if the SampleTemplate hasn't been added
-        exp = set([1, 2, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20])
+        exp = {1, 2, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16}
         obs = get_accessible_filepath_ids(User('test@foo.bar'))
         self.assertEqual(obs, exp)
 
diff --git a/qiita_db/test/test_setup.py b/qiita_db/test/test_setup.py
@@ -39,19 +39,19 @@ def test_processed_data_status(self):
         self.assertEqual(get_count("qiita.processed_data_status"), 4)
 
     def test_filepath(self):
-        self.assertEqual(get_count("qiita.filepath"), 20)
+        self.assertEqual(get_count("qiita.filepath"), 16)
 
     def test_filepath_type(self):
         self.assertEqual(get_count("qiita.filepath_type"), 19)
 
     def test_raw_data(self):
-        self.assertEqual(get_count("qiita.raw_data"), 4)
+        self.assertEqual(get_count("qiita.raw_data"), 1)
 
     def test_raw_filepath(self):
-        self.assertEqual(get_count("qiita.raw_filepath"), 4)
+        self.assertEqual(get_count("qiita.raw_filepath"), 2)
 
-    def test_study_raw_data(self):
-        self.assertEqual(get_count("qiita.study_raw_data"), 4)
+    def test_study_prep_template(self):
+        self.assertEqual(get_count("qiita.study_prep_template"), 1)
 
     def test_required_sample_info(self):
         self.assertEqual(get_count("qiita.study_sample"), 27)
diff --git a/qiita_db/test/test_study.py b/qiita_db/test/test_study.py
diff --git a/qiita_db/test/test_util.py b/qiita_db/test/test_util.py
diff --git a/scripts/qiita b/scripts/qiita