From ed12aa70e99f2d245e39c5a36d0588a76441b5b3 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Tue, 19 May 2015 20:07:04 -0700 Subject: [PATCH 01/19] Fixing RawData.create --- qiita_db/data.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/qiita_db/data.py b/qiita_db/data.py index 85f7114d4..bb876b7bd 100644 --- a/qiita_db/data.py +++ b/qiita_db/data.py @@ -288,21 +288,18 @@ class RawData(BaseData): _table = "raw_data" _data_filepath_table = "raw_filepath" _data_filepath_column = "raw_data_id" - # Define here the class name, so in case it changes in the database we - # only need to change it here - _study_raw_table = "study_raw_data" @classmethod - def create(cls, filetype, studies, filepaths=None): + def create(cls, filetype, prep_templates, filepaths): r"""Creates a new object with a new id on the storage system Parameters ---------- filetype : int The filetype identifier - studies : list of Study - The list of Study objects to which the raw data belongs to - filepaths : iterable of tuples (str, int), optional + prep_templates : list of PrepTemplates + The list of PrepTemplate objects to which the raw data is attached + filepaths : iterable of tuples (str, int) The list of paths to the raw files and its filepath type identifier Returns @@ -318,11 +315,11 @@ def create(cls, filetype, studies, filepaths=None): # Instantiate the object with the new id rd = cls(rd_id) - # Connect the raw data with its studies - values = [(study.id, rd_id) for study in studies] - conn_handler.executemany( - "INSERT INTO qiita.{0} (study_id, raw_data_id) VALUES " - "(%s, %s)".format(rd._study_raw_table), values) + # Connect the raw data with its prep templates + values = [(rd_id, pt.id) for pt in prep_templates] + sql = """UPDATE qiita.prep_template + SET raw_data_id = %s WHERE prep_template_id = %s""" + conn_handler.executemany(sql, values) # If file paths have been provided, add them to the raw data object if filepaths: From a10b8287c8d0bf819fd4bebbf7e6c0c70cf72d41 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Wed, 20 May 2015 09:01:47 -0700 Subject: [PATCH 02/19] Adding check on RawData creation --- qiita_db/data.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/qiita_db/data.py b/qiita_db/data.py index bb876b7bd..221395c4e 100644 --- a/qiita_db/data.py +++ b/qiita_db/data.py @@ -305,9 +305,30 @@ def create(cls, filetype, prep_templates, filepaths): Returns ------- A new instance of `cls` to access to the RawData stored in the DB + + Raises + ------ + IncompetentQiitaDeveloperError + If any of the passed prep templates already have a raw data id """ - # Add the raw data to the database, and get the raw data id back conn_handler = SQLConnectionHandler() + # We first need to check if the passed prep templates doesn't have + # a raw data already attached to them + sql = """SELECT EXISTS( + SELECT raw_data_id + FROM qiita.prep_template + WHERE prep_template_id IN ({}) + AND raw_data_id IS NOT NULL)""".format( + ', '.join(['%s'] * len(prep_templates))) + exists = conn_handler.execute_fetchone( + sql, [pt.id for pt in prep_templates])[0] + if exists: + raise IncompetentQiitaDeveloperError( + "Cannot create raw data because on the passed prep templates " + "already has a raw data associated with it. Prep templates: %s" + % ', '.join([pt.id for pt in prep_templates])) + + # Add the raw data to the database, and get the raw data id back rd_id = conn_handler.execute_fetchone( "INSERT INTO qiita.{0} (filetype_id) VALUES (%s) " "RETURNING raw_data_id".format(cls._table), (filetype,))[0] From af0d31fbfe7a0a8ac4beac48cb787be405a56692 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Wed, 20 May 2015 09:02:02 -0700 Subject: [PATCH 03/19] Initial test fixing --- qiita_db/test/test_data.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/qiita_db/test/test_data.py b/qiita_db/test/test_data.py index 929987a54..267ad1f4f 100644 --- a/qiita_db/test/test_data.py +++ b/qiita_db/test/test_data.py @@ -20,7 +20,7 @@ QiitaDBStatusError) from qiita_db.study import Study, StudyPerson from qiita_db.user import User -from qiita_db.util import get_mountpoint +from qiita_db.util import get_mountpoint, get_count from qiita_db.data import BaseData, RawData, PreprocessedData, ProcessedData from qiita_db.metadata_template import PrepTemplate @@ -46,7 +46,6 @@ def setUp(self): close(fd) self.filetype = 2 self.filepaths = [(self.seqs_fp, 1), (self.barcodes_fp, 2)] - self.studies = [Study(1)] _, self.db_test_raw_dir = get_mountpoint('raw_data')[0] with open(self.seqs_fp, "w") as f: @@ -55,6 +54,10 @@ def setUp(self): f.write("\n") self._clean_up_files = [] + # Create some new PrepTemplates + + + # Create a new study info = { "timeseries_type_id": 1, @@ -81,9 +84,9 @@ def tearDown(self): def test_create(self): """Correctly creates all the rows in the DB for the raw data""" # Check that the returned object has the correct id - exp_id = 1 + self.conn_handler.execute_fetchone( - "SELECT count(1) from qiita.raw_data")[0] - obs = RawData.create(self.filetype, self.studies, self.filepaths) + exp_id = get_count("qiita.raw_data") + 1 + obs = RawData.create(self.filetype, self.prep_templates, + self.filepaths) self.assertEqual(obs.id, exp_id) # Check that the raw data have been correctly added to the DB @@ -92,10 +95,12 @@ def test_create(self): # raw_data_id, filetype, link_filepaths_status self.assertEqual(obs, [[exp_id, 2, 'idle']]) - # Check that the raw data have been correctly linked with the study - obs = self.conn_handler.execute_fetchall( - "SELECT * FROM qiita.study_raw_data WHERE raw_data_id=%d" % exp_id) + # Check that the raw data has been correctly linked with the prep + # template + sql = "SELECT raw_data_id FROM qiita.prep_template WHERE prep_template_id=%s" + obs = self.conn_handler.execute_fetchall(sql, (exp_id,)) # study_id , raw_data_id + self.assertEqual(obs, [[self.pt.id]]) self.assertEqual(obs, [[1, exp_id]]) # Check that the files have been copied to right location From a98740390abd2491f0125d1f29c520cf19165e1c Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 21 May 2015 09:47:56 -0700 Subject: [PATCH 04/19] Improving SQL in creation --- qiita_db/data.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/qiita_db/data.py b/qiita_db/data.py index 221395c4e..18bb2c812 100644 --- a/qiita_db/data.py +++ b/qiita_db/data.py @@ -315,7 +315,7 @@ def create(cls, filetype, prep_templates, filepaths): # We first need to check if the passed prep templates doesn't have # a raw data already attached to them sql = """SELECT EXISTS( - SELECT raw_data_id + SELECT * FROM qiita.prep_template WHERE prep_template_id IN ({}) AND raw_data_id IS NOT NULL)""".format( @@ -324,8 +324,9 @@ def create(cls, filetype, prep_templates, filepaths): sql, [pt.id for pt in prep_templates])[0] if exists: raise IncompetentQiitaDeveloperError( - "Cannot create raw data because on the passed prep templates " - "already has a raw data associated with it. Prep templates: %s" + "Cannot create raw data because the passed prep templates " + "already have a raw data associated with it. " + "Prep templates: %s" % ', '.join([pt.id for pt in prep_templates])) # Add the raw data to the database, and get the raw data id back @@ -349,23 +350,23 @@ def create(cls, filetype, prep_templates, filepaths): return rd @classmethod - def delete(cls, raw_data_id, study_id): + def delete(cls, raw_data_id, prep_template_id): """Removes the raw data with id raw_data_id Parameters ---------- raw_data_id : int The raw data id - study_id : int - The study id + prep_template_id : int + The prep_template_id Raises ------ QiitaDBUnknownIDError If the raw data id doesn't exist QiitaDBError - If the raw data is not linked to that study_id - If the raw data has prep templates associated + If the raw data is not linked to that prep_template_id + If the raw data has files linked """ conn_handler = SQLConnectionHandler() From 0e34bc480df85da3ef5126029b1eefe33735b5ed Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 21 May 2015 12:26:40 -0700 Subject: [PATCH 05/19] Fixing creation tests --- qiita_db/test/test_data.py | 71 +++++++++++--------------------------- 1 file changed, 20 insertions(+), 51 deletions(-) diff --git a/qiita_db/test/test_data.py b/qiita_db/test/test_data.py index 267ad1f4f..06df925c4 100644 --- a/qiita_db/test/test_data.py +++ b/qiita_db/test/test_data.py @@ -14,6 +14,8 @@ from os.path import join, basename, exists from tempfile import mkstemp +import pandas as pd + from qiita_core.util import qiita_test_checker from qiita_core.exceptions import IncompetentQiitaDeveloperError from qiita_db.exceptions import (QiitaDBError, QiitaDBUnknownIDError, @@ -55,27 +57,18 @@ def setUp(self): self._clean_up_files = [] # Create some new PrepTemplates - - - - # Create a new study - info = { - "timeseries_type_id": 1, - "metadata_complete": True, - "mixs_compliant": True, - "number_samples_collected": 25, - "number_samples_promised": 28, - "portal_type_id": 3, - "study_alias": "FCM", - "study_description": "Microbiome of people who eat nothing but " - "fried chicken", - "study_abstract": "Exploring how a high fat diet changes the " - "gut microbiome", - "emp_person_id": StudyPerson(2), - "principal_investigator_id": StudyPerson(3), - "lab_person_id": StudyPerson(1) - } - Study.create(User("test@foo.bar"), "Test study 2", [1], info) + metadata_dict = { + 'SKB8.640193': {'center_name': 'ANL', + 'primer': 'GTGCCAGCMGCCGCGGTAA', + 'barcode': 'GTCCGCAAGTTA', + 'run_prefix': "s_G1_L001_sequences", + 'platform': 'ILLUMINA', + 'library_construction_protocol': 'AAAA', + 'experiment_design_description': 'BBBB'}} + metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') + self.pt1 = PrepTemplate.create(metadata, Study(1), "16S") + self.pt2 = PrepTemplate.create(metadata, Study(1), "18S") + self.prep_templates = [self.pt1, self.pt2] def tearDown(self): for f in self._clean_up_files: @@ -96,12 +89,13 @@ def test_create(self): self.assertEqual(obs, [[exp_id, 2, 'idle']]) # Check that the raw data has been correctly linked with the prep - # template - sql = "SELECT raw_data_id FROM qiita.prep_template WHERE prep_template_id=%s" + # templates + sql = """SELECT prep_template_id + FROM qiita.prep_template + WHERE raw_data_id = %s + ORDER BY prep_template_id""" obs = self.conn_handler.execute_fetchall(sql, (exp_id,)) - # study_id , raw_data_id - self.assertEqual(obs, [[self.pt.id]]) - self.assertEqual(obs, [[1, exp_id]]) + self.assertEqual(obs, [[self.pt1.id], [self.pt2.id]]) # Check that the files have been copied to right location exp_seqs_fp = join(self.db_test_raw_dir, @@ -133,31 +127,6 @@ def test_create(self): # raw_data_id, filepath_id self.assertEqual(obs, [[exp_id, top_id - 1], [exp_id, top_id]]) - def test_create_no_filepaths(self): - """Correctly creates a raw data object with no filepaths attached""" - # Check that the returned object has the correct id - exp_id = 1 + self.conn_handler.execute_fetchone( - "SELECT count(1) from qiita.raw_data")[0] - obs = RawData.create(self.filetype, self.studies) - self.assertEqual(obs.id, exp_id) - - # Check that the raw data have been correctly added to the DB - obs = self.conn_handler.execute_fetchall( - "SELECT * FROM qiita.raw_data WHERE raw_data_id=%d" % exp_id) - # raw_data_id, filetype, link_filepaths_status - self.assertEqual(obs, [[exp_id, 2, 'idle']]) - - # Check that the raw data have been correctly linked with the study - obs = self.conn_handler.execute_fetchall( - "SELECT * FROM qiita.study_raw_data WHERE raw_data_id=%d" % exp_id) - # study_id , raw_data_id - self.assertEqual(obs, [[1, exp_id]]) - - # Check that no files have been linked with the filepaths - obs = self.conn_handler.execute_fetchall( - "SELECT * FROM qiita.raw_filepath WHERE raw_data_id=%d" % exp_id) - self.assertEqual(obs, []) - def test_get_filepaths(self): """Correctly returns the filepaths to the raw files""" rd = RawData(1) From 5abf76d38fc368fb78dc978ef46fba62388e44c6 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 21 May 2015 12:27:00 -0700 Subject: [PATCH 06/19] Fixing rawdata deletion --- qiita_db/data.py | 67 +++++++++++++++++++----------------------------- 1 file changed, 27 insertions(+), 40 deletions(-) diff --git a/qiita_db/data.py b/qiita_db/data.py index 18bb2c812..028b4fca3 100644 --- a/qiita_db/data.py +++ b/qiita_db/data.py @@ -374,55 +374,42 @@ def delete(cls, raw_data_id, prep_template_id): if not cls.exists(raw_data_id): raise QiitaDBUnknownIDError(raw_data_id, "raw data") - study_raw_data_exists = conn_handler.execute_fetchone( - "SELECT EXISTS(SELECT * FROM qiita.study_raw_data WHERE " - "study_id = {0} AND raw_data_id = {1})".format(study_id, - raw_data_id))[0] - if not study_raw_data_exists: - raise QiitaDBError( - "Raw data %d is not linked to study %d or the study " - "doesn't exist" % (raw_data_id, study_id)) - - # check if there are any prep templates for this study - prep_template_exists = conn_handler.execute_fetchone( - """ - SELECT EXISTS( - SELECT * FROM qiita.prep_template AS pt - LEFT JOIN qiita.prep_template_sample AS cpi ON - (pt.prep_template_id=cpi.prep_template_id) - LEFT JOIN qiita.study_sample AS rsi ON - (cpi.sample_id=rsi.sample_id) - WHERE raw_data_id = {0} and study_id = {1} - ) - """.format(raw_data_id, study_id))[0] - if prep_template_exists: + # Check if the raw data is linked to the prep template + sql = """SELECT EXISTS( + SELECT * FROM qiita.prep_template + WHERE prep_template_id = %s AND raw_data_id = %s)""" + pt_rd_exists = conn_handler.execute_fetchone( + sql, (prep_template_id, raw_data_id)) + if not pt_rd_exists: raise QiitaDBError( - "Raw data %d has prep template(s) associated so it can't be " - "erased." % raw_data_id) + "Raw data %d is not linked to prep template %d or the prep " + "template doesn't exist" % (raw_data_id, prep_template_id)) - # check how many raw data are left, if last one, check that there - # are no linked files + # Check to how many prep templates the raw data is still linked. + # If last one, check that are no linked files raw_data_count = conn_handler.execute_fetchone( - "SELECT COUNT(*) FROM qiita.study_raw_data WHERE " - "raw_data_id = {0}".format(raw_data_id))[0] + "SELECT COUNT(*) FROM qiita.prep_template WHERE " + "raw_data_id = %s", (raw_data_id,))[0] if raw_data_count == 1 and RawData(raw_data_id).get_filepath_ids(): raise QiitaDBError( "Raw data (%d) can't be remove because it has linked files. " "To remove it, first unlink files." % raw_data_id) # delete - conn_handler.execute("DELETE FROM qiita.study_raw_data WHERE " - "raw_data_id = {0} AND " - "study_id = {1}".format(raw_data_id, study_id)) - - # delete the connecting tables if there is no other study linked to - # the raw data - study_raw_data_count = conn_handler.execute_fetchone( - "SELECT COUNT(*) FROM qiita.study_raw_data WHERE " - "raw_data_id = {0}".format(raw_data_id))[0] - if study_raw_data_count == 0: - conn_handler.execute("DELETE FROM qiita.raw_data WHERE " - "raw_data_id = {0}".format(raw_data_id)) + queue = "DELETE_%d_%d" % (raw_data_id, prep_template_id) + conn_handler.create_queue(queue) + sql = """UPDATE qiita.prep_template + SET raw_data_id = %s + WHERE prep_template_id = %s""" + conn_handler.add_to_queue(sql, (None, prep_template_id)) + + # If there is no other prep template pointing to the raw data, it can + # be removed + if raw_data_count == 1: + sql = "DELETE FROM qiita.raw_data WHERE raw_data_id = %s" + conn_handler.add_to_queue(queue, sql, (raw_data_id,)) + + conn_handler.execute_queue(queue) @property def studies(self): From 9f8a2707b996ed1e32c9d6bdca1450593a81f387 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 21 May 2015 12:31:41 -0700 Subject: [PATCH 07/19] Partially addressing delete tests --- qiita_db/test/test_data.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/qiita_db/test/test_data.py b/qiita_db/test/test_data.py index 06df925c4..6cebcf788 100644 --- a/qiita_db/test/test_data.py +++ b/qiita_db/test/test_data.py @@ -247,27 +247,26 @@ def test_delete(self): with self.assertRaises(QiitaDBUnknownIDError): RawData.delete(1000, 1) - # the raw data and the study id are not linked or - # the study doesn't exits + # the raw data and the prep template id are not linked with self.assertRaises(QiitaDBError): - RawData.delete(1, 1000) + RawData.delete(1, self.pt2) - # the raw data has prep templates + # the prep template does not exist with self.assertRaises(QiitaDBError): - RawData.delete(1, 1) + RawData.delete(1, 1000) # the raw data has linked files with self.assertRaises(QiitaDBError): RawData.delete(3, 1) - # the raw data is linked to a study that has not prep templates - Study(2).add_raw_data([RawData(1)]) - RawData.delete(1, 2) + # # the raw data is linked to a study that has not prep templates + # Study(2).add_raw_data([RawData(1)]) + # RawData.delete(1, 2) - # delete raw data - self.assertTrue(RawData.exists(2)) - RawData.delete(2, 1) - self.assertFalse(RawData.exists(2)) + # # delete raw data + # self.assertTrue(RawData.exists(2)) + # RawData.delete(2, 1) + # self.assertFalse(RawData.exists(2)) def test_status(self): rd = RawData(1) From d5b582795390171d2a3588ba37d8215871e824cf Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 21 May 2015 19:36:32 -0700 Subject: [PATCH 08/19] Fixing bug on delete --- qiita_db/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qiita_db/data.py b/qiita_db/data.py index 028b4fca3..eee311526 100644 --- a/qiita_db/data.py +++ b/qiita_db/data.py @@ -401,7 +401,7 @@ def delete(cls, raw_data_id, prep_template_id): sql = """UPDATE qiita.prep_template SET raw_data_id = %s WHERE prep_template_id = %s""" - conn_handler.add_to_queue(sql, (None, prep_template_id)) + conn_handler.add_to_queue(queue, sql, (None, prep_template_id)) # If there is no other prep template pointing to the raw data, it can # be removed From d93dd9da37335280a32e4745eeab1bf497904737 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 21 May 2015 19:36:51 -0700 Subject: [PATCH 09/19] Improve delete tests --- qiita_db/test/test_data.py | 56 +++++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/qiita_db/test/test_data.py b/qiita_db/test/test_data.py index 6cebcf788..6a506e17e 100644 --- a/qiita_db/test/test_data.py +++ b/qiita_db/test/test_data.py @@ -242,31 +242,67 @@ def test_exists(self): self.assertTrue(RawData.exists(1)) self.assertFalse(RawData.exists(1000)) - def test_delete(self): + def test_delete_error_no_exists(self): # the raw data doesn't exist with self.assertRaises(QiitaDBUnknownIDError): - RawData.delete(1000, 1) + RawData.delete(1000, 0) + def test_delete_error_raw_data_not_linked(self): # the raw data and the prep template id are not linked with self.assertRaises(QiitaDBError): RawData.delete(1, self.pt2) + def test_delete_error_prep_template_no_exists(self): # the prep template does not exist with self.assertRaises(QiitaDBError): RawData.delete(1, 1000) + def test_delete_error_linked_files(self): # the raw data has linked files with self.assertRaises(QiitaDBError): - RawData.delete(3, 1) + RawData.delete(1, 1) + + def test_delete(self): + rd = RawData.create(self.filetype, self.prep_templates, + self.filepaths) + + sql_pt = """SELECT prep_template_id + FROM qiita.prep_template + WHERE raw_data_id = %s + ORDER BY prep_template_id""" + obs = self.conn_handler.execute_fetchall(sql_pt, (rd.id,)) + self.assertEqual(obs, [[self.pt1.id], [self.pt2.id]]) + + # This delete call will only unlink the raw data from the prep template + RawData.delete(rd.id, self.pt2.id) + + # Check that it successfully unlink the raw data from pt2 + obs = self.conn_handler.execute_fetchall(sql_pt, (rd.id,)) + self.assertEqual(obs, [[self.pt1.id]]) + self.assertEqual(self.pt2.raw_data, None) + + # If we try to remove the RawData now, it should raise an error + # because it still has files attached to it + with self.assertRaises(QiitaDBError): + RawData.delete(rd.id, self.pt1.id) + + # Clear the files so we can actually remove the RawData + rd.clear_filepaths() + + RawData.delete(rd.id, self.pt1.id) + obs = self.conn_handler.execute_fetchall(sql_pt, (rd.id,)) + self.assertEqual(obs, []) - # # the raw data is linked to a study that has not prep templates - # Study(2).add_raw_data([RawData(1)]) - # RawData.delete(1, 2) + # Check that all expected rows have been deleted + sql = """SELECT EXISTS( + SELECT * FROM qiita.raw_filepath + WHERE raw_data_id = %s""" + self.assertFalse(conn_handler.execute_fetchone(sql, (rd.id,))[0]) - # # delete raw data - # self.assertTrue(RawData.exists(2)) - # RawData.delete(2, 1) - # self.assertFalse(RawData.exists(2)) + sql = """SELECT EXISTS( + SELECT * FROM qiita.raw_data + WHERE raw_data_id=%s)""" + self.assertFalse(conn_handler.execute_fetchone(sql, (rd.id,))[0]) def test_status(self): rd = RawData(1) From 0cac9e881b8ba9c3ab3ec8ad5b55c8f16f33cb8d Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 21 May 2015 20:35:59 -0700 Subject: [PATCH 10/19] Fixing studies property --- qiita_db/data.py | 9 +++++---- qiita_db/test/test_data.py | 6 +++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/qiita_db/data.py b/qiita_db/data.py index eee311526..a1b5b07ea 100644 --- a/qiita_db/data.py +++ b/qiita_db/data.py @@ -421,10 +421,11 @@ def studies(self): The list of study ids to which the raw data belongs to """ conn_handler = SQLConnectionHandler() - ids = conn_handler.execute_fetchall( - "SELECT study_id FROM qiita.{0} WHERE " - "raw_data_id=%s".format(self._study_raw_table), - [self._id]) + sql = """SELECT study_id + FROM qiita.study_prep_template + JOIN qiita.prep_template USING (prep_template_id) + WHERE raw_data_id = %s""" + ids = conn_handler.execute_fetchall(sql, (self.id,)) return [id[0] for id in ids] @property diff --git a/qiita_db/test/test_data.py b/qiita_db/test/test_data.py index 6a506e17e..9c419b53f 100644 --- a/qiita_db/test/test_data.py +++ b/qiita_db/test/test_data.py @@ -296,13 +296,13 @@ def test_delete(self): # Check that all expected rows have been deleted sql = """SELECT EXISTS( SELECT * FROM qiita.raw_filepath - WHERE raw_data_id = %s""" - self.assertFalse(conn_handler.execute_fetchone(sql, (rd.id,))[0]) + WHERE raw_data_id = %s)""" + self.assertFalse(self.conn_handler.execute_fetchone(sql, (rd.id,))[0]) sql = """SELECT EXISTS( SELECT * FROM qiita.raw_data WHERE raw_data_id=%s)""" - self.assertFalse(conn_handler.execute_fetchone(sql, (rd.id,))[0]) + self.assertFalse(self.conn_handler.execute_fetchone(sql, (rd.id,))[0]) def test_status(self): rd = RawData(1) From 3c700ce86d53ebd7c617952e0d7d02f0f20cce00 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 21 May 2015 20:39:13 -0700 Subject: [PATCH 11/19] Fix clear_filpeaths test --- qiita_db/test/test_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qiita_db/test/test_data.py b/qiita_db/test/test_data.py index 9c419b53f..60a9211ab 100644 --- a/qiita_db/test/test_data.py +++ b/qiita_db/test/test_data.py @@ -185,7 +185,7 @@ def test_is_preprocessed(self): self.assertFalse(RawData(2)._is_preprocessed()) def test_clear_filepaths(self): - rd = RawData.create(self.filetype, self.studies, self.filepaths) + rd = RawData.create(self.filetype, [self.pt1], self.filepaths) self.assertTrue(self.conn_handler.execute_fetchone( "SELECT EXISTS(SELECT * FROM qiita.raw_filepath " "WHERE raw_data_id=%s)", (rd.id,))[0]) From 2ea7a482d1728eee550113adb54ca3cf5653c38d Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 21 May 2015 20:43:33 -0700 Subject: [PATCH 12/19] Fixing test_is_preprocessed --- qiita_db/test/test_data.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/qiita_db/test/test_data.py b/qiita_db/test/test_data.py index 60a9211ab..3fc83af45 100644 --- a/qiita_db/test/test_data.py +++ b/qiita_db/test/test_data.py @@ -182,7 +182,8 @@ def test_link_filepaths_status_setter_error(self): def test_is_preprocessed(self): self.assertTrue(RawData(1)._is_preprocessed()) - self.assertFalse(RawData(2)._is_preprocessed()) + rd = RawData.create(self.filetype, self.prep_templates, self.filepaths) + self.assertFalse(rd._is_preprocessed()) def test_clear_filepaths(self): rd = RawData.create(self.filetype, [self.pt1], self.filepaths) From f599b242dd2e0d17571d24593aadb8848c45845f Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 21 May 2015 20:49:35 -0700 Subject: [PATCH 13/19] Removing remove filepath because is not used in the code base - also it was unsafe becasue it was using purge_filepaths --- qiita_db/data.py | 34 ---------------------------------- qiita_db/test/test_data.py | 31 ------------------------------- 2 files changed, 65 deletions(-) diff --git a/qiita_db/data.py b/qiita_db/data.py index a1b5b07ea..5d3ab46aa 100644 --- a/qiita_db/data.py +++ b/qiita_db/data.py @@ -583,40 +583,6 @@ def clear_filepaths(self): # self.studies should only have one element, thus self.studies[0] move_filepaths_to_upload_folder(self.studies[0], filepaths) - def remove_filepath(self, fp): - """Removes the filepath from the RawData - - Parameters - ---------- - fp : str - The filepath to remove - """ - conn_handler = SQLConnectionHandler() - queue = "remove_fp_%s" % self.id - conn_handler.create_queue(queue) - - # Set the current status to unlinking - self._set_link_filepaths_status("unlinking") - - self._remove_filepath(fp, conn_handler, queue) - - try: - # Execute the queue - conn_handler.execute_queue(queue) - except Exception as e: - self._set_link_filepaths_status("failed: %s" % e) - LogEntry.create('Runtime', str(e), - info={self.__class__.__name__: self.id}) - raise e - - # We can already update the status to done, as the files have been - # unlinked, the move_filepaths_to_upload_folder call will not change - # the status of the raw data object - self._set_link_filepaths_status("idle") - - # Delete the files, if they are not used anywhere - purge_filepaths() - def status(self, study): """The status of the raw data within the given study diff --git a/qiita_db/test/test_data.py b/qiita_db/test/test_data.py index 3fc83af45..660380d52 100644 --- a/qiita_db/test/test_data.py +++ b/qiita_db/test/test_data.py @@ -208,37 +208,6 @@ def test_clear_filepaths_error(self): with self.assertRaises(QiitaDBError): RawData(1).clear_filepaths() - def test_remove_filepath(self): - top_id = self.conn_handler.execute_fetchone( - "SELECT count(1) FROM qiita.raw_filepath")[0] - raw_id = self.conn_handler.execute_fetchone( - "SELECT count(1) FROM qiita.raw_data")[0] - rd = RawData.create(self.filetype, self.studies, self.filepaths) - fp = join(self.db_test_raw_dir, "%d_%s" % (raw_id + 1, - basename(self.seqs_fp))) - rd.remove_filepath(fp) - self.assertFalse(self.conn_handler.execute_fetchone( - "SELECT EXISTS(SELECT * FROM qiita.raw_filepath " - "WHERE filepath_id=%d)" % (top_id - 1))[0]) - self.assertTrue(self.conn_handler.execute_fetchone( - "SELECT EXISTS(SELECT * FROM qiita.raw_filepath " - "WHERE filepath_id=%d)" % (top_id - 2))[0]) - - def test_remove_filepath_errors(self): - fp = join(self.db_test_raw_dir, '1_s_G1_L001_sequences.fastq.gz') - with self.assertRaises(QiitaDBError): - RawData(1).remove_filepath(fp) - - # filepath doesn't belong to that raw data - with self.assertRaises(ValueError): - RawData(2).remove_filepath(fp) - - # the raw data has been linked to more than 1 study so it can't be - # unliked - Study(2).add_raw_data([RawData(2)]) - with self.assertRaises(QiitaDBError): - RawData(2).remove_filepath(fp) - def test_exists(self): self.assertTrue(RawData.exists(1)) self.assertFalse(RawData.exists(1000)) From dcc559746eab63f066cf06ccb56e0ead2d267f07 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 21 May 2015 20:58:49 -0700 Subject: [PATCH 14/19] Fixing raw_data attribute from study so we can fix the status property of the raw data, and fix the status property and its tests --- qiita_db/data.py | 2 -- qiita_db/study.py | 8 ++++++-- qiita_db/test/test_data.py | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/qiita_db/data.py b/qiita_db/data.py index 5d3ab46aa..46c6144bd 100644 --- a/qiita_db/data.py +++ b/qiita_db/data.py @@ -626,8 +626,6 @@ def status(self, study): USING (preprocessed_data_id) JOIN qiita.prep_template pt USING (prep_template_id) - JOIN qiita.raw_data rd - USING (raw_data_id) JOIN qiita.study_processed_data spd USING (processed_data_id) WHERE pt.raw_data_id=%s AND spd.study_id=%s""" diff --git a/qiita_db/study.py b/qiita_db/study.py index b0e086b8b..d32019721 100644 --- a/qiita_db/study.py +++ b/qiita_db/study.py @@ -751,8 +751,12 @@ def raw_data(self, data_type=None): spec_data = " AND data_type_id = %d" % convert_to_id(data_type, "data_type") conn_handler = SQLConnectionHandler() - sql = ("SELECT raw_data_id FROM qiita.study_raw_data WHERE " - "study_id = %s{0}".format(spec_data)) + sql = """SELECT raw_data_id + FROM qiita.study_prep_template + JOIN qiita.prep_template USING (prep_template_id) + JOIN qiita.raw_data USING (raw_data_id) + WHERE study_id = %s{0}""".format(spec_data) + return [x[0] for x in conn_handler.execute_fetchall(sql, (self._id,))] def add_raw_data(self, raw_data): diff --git a/qiita_db/test/test_data.py b/qiita_db/test/test_data.py index 660380d52..84c96993b 100644 --- a/qiita_db/test/test_data.py +++ b/qiita_db/test/test_data.py @@ -288,7 +288,7 @@ def test_status(self): # Check that new raw data has sandbox as status since no # processed data exists for them - rd = RawData.create(self.filetype, self.studies, self.filepaths) + rd = RawData.create(self.filetype, self.prep_templates, self.filepaths) self.assertEqual(rd.status(s), 'sandbox') def test_status_error(self): From ed73979351014357661d8c41071ac217cc36e273 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 21 May 2015 21:10:52 -0700 Subject: [PATCH 15/19] Fixing test for PreprocessedData --- qiita_db/test/test_data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/qiita_db/test/test_data.py b/qiita_db/test/test_data.py index 84c96993b..b62db7970 100644 --- a/qiita_db/test/test_data.py +++ b/qiita_db/test/test_data.py @@ -535,11 +535,11 @@ def test_get_filepaths(self): """Correctly returns the filepaths to the preprocessed files""" ppd = PreprocessedData(1) obs = ppd.get_filepaths() - exp = [(5, join(self.db_test_ppd_dir, '1_seqs.fna'), + exp = [(3, join(self.db_test_ppd_dir, '1_seqs.fna'), "preprocessed_fasta"), - (6, join(self.db_test_ppd_dir, '1_seqs.qual'), + (4, join(self.db_test_ppd_dir, '1_seqs.qual'), "preprocessed_fastq"), - (7, join(self.db_test_ppd_dir, '1_seqs.demux'), + (5, join(self.db_test_ppd_dir, '1_seqs.demux'), "preprocessed_demux")] self.assertEqual(obs, exp) From 736199a863a904f5bd39a7af69c2c3b8d9731307 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 21 May 2015 21:12:25 -0700 Subject: [PATCH 16/19] Fixing test for ProcessedData --- qiita_db/test/test_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qiita_db/test/test_data.py b/qiita_db/test/test_data.py index b62db7970..82e95c69b 100644 --- a/qiita_db/test/test_data.py +++ b/qiita_db/test/test_data.py @@ -939,13 +939,13 @@ def test_get_filepath(self): # check the test data pd = ProcessedData(1) obs = pd.get_filepaths() - exp = [(11, join(self.db_test_pd_dir, + exp = [(9, join(self.db_test_pd_dir, '1_study_1001_closed_reference_otu_table.biom'), "biom")] self.assertEqual(obs, exp) def test_get_filepath_ids(self): pd = ProcessedData(1) - self.assertEqual(pd.get_filepath_ids(), [11]) + self.assertEqual(pd.get_filepath_ids(), [9]) def test_preprocessed_data(self): """Correctly returns the preprocessed_data""" From 6d443660397199e46a9a3d7fde5717876984c927 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 21 May 2015 23:12:43 -0700 Subject: [PATCH 17/19] Fixing bug on error report and adding a test for it --- qiita_db/data.py | 2 +- qiita_db/test/test_data.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/qiita_db/data.py b/qiita_db/data.py index 46c6144bd..f44e62769 100644 --- a/qiita_db/data.py +++ b/qiita_db/data.py @@ -327,7 +327,7 @@ def create(cls, filetype, prep_templates, filepaths): "Cannot create raw data because the passed prep templates " "already have a raw data associated with it. " "Prep templates: %s" - % ', '.join([pt.id for pt in prep_templates])) + % ', '.join([str(pt.id) for pt in prep_templates])) # Add the raw data to the database, and get the raw data id back rd_id = conn_handler.execute_fetchone( diff --git a/qiita_db/test/test_data.py b/qiita_db/test/test_data.py index 82e95c69b..fca114608 100644 --- a/qiita_db/test/test_data.py +++ b/qiita_db/test/test_data.py @@ -127,6 +127,10 @@ def test_create(self): # raw_data_id, filepath_id self.assertEqual(obs, [[exp_id, top_id - 1], [exp_id, top_id]]) + def test_create_error(self): + with self.assertRaises(IncompetentQiitaDeveloperError): + RawData.create(self.filetype, [PrepTemplate(1)], self.filepaths) + def test_get_filepaths(self): """Correctly returns the filepaths to the raw files""" rd = RawData(1) From afc0d6814b4d0037913d61aa80bbf1f3b1224da7 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Mon, 25 May 2015 02:48:53 -0500 Subject: [PATCH 18/19] Fixing clear_filepaths error message --- qiita_db/data.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/qiita_db/data.py b/qiita_db/data.py index f44e62769..9513dfa14 100644 --- a/qiita_db/data.py +++ b/qiita_db/data.py @@ -515,14 +515,14 @@ def _remove_filepath(self, fp, conn_handler, queue): self._set_link_filepaths_status("failed: %s" % msg) raise QiitaDBError(msg) - # The filepath belongs to one or more studies - studies_linked = self.studies - if len(studies_linked) > 1: + # The filepath belongs to one or more prep templates + prep_templates = self.prep_templates + if len(prep_templates) > 1: msg = ("Can't clear all the filepaths from raw data %s because " - "it has been shared with other studies: %s. If you want to " - "remove it, first remove the raw data from the other " - "studies." % (self._id, - ', '.join(map(str, studies_linked)))) + "it has been used with other prep templates: %s. If you " + "want to remove it, first remove the raw data from the " + "other prep templates." + % (self._id, ', '.join(map(str, prep_templates)))) self._set_link_filepaths_status("failed: %s" % msg) raise QiitaDBError(msg) From 0b83bff769c2c934f4591194dc7ab4a3872ea67d Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Wed, 27 May 2015 18:02:39 -0500 Subject: [PATCH 19/19] Addressing comments --- qiita_db/data.py | 6 +++--- qiita_db/test/test_data.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/qiita_db/data.py b/qiita_db/data.py index 9513dfa14..e9f3c6d98 100644 --- a/qiita_db/data.py +++ b/qiita_db/data.py @@ -308,11 +308,11 @@ def create(cls, filetype, prep_templates, filepaths): Raises ------ - IncompetentQiitaDeveloperError + QiitaDBError If any of the passed prep templates already have a raw data id """ conn_handler = SQLConnectionHandler() - # We first need to check if the passed prep templates doesn't have + # We first need to check if the passed prep templates don't have # a raw data already attached to them sql = """SELECT EXISTS( SELECT * @@ -323,7 +323,7 @@ def create(cls, filetype, prep_templates, filepaths): exists = conn_handler.execute_fetchone( sql, [pt.id for pt in prep_templates])[0] if exists: - raise IncompetentQiitaDeveloperError( + raise QiitaDBError( "Cannot create raw data because the passed prep templates " "already have a raw data associated with it. " "Prep templates: %s" diff --git a/qiita_db/test/test_data.py b/qiita_db/test/test_data.py index fca114608..66cbfae81 100644 --- a/qiita_db/test/test_data.py +++ b/qiita_db/test/test_data.py @@ -128,7 +128,7 @@ def test_create(self): self.assertEqual(obs, [[exp_id, top_id - 1], [exp_id, top_id]]) def test_create_error(self): - with self.assertRaises(IncompetentQiitaDeveloperError): + with self.assertRaises(QiitaDBError): RawData.create(self.filetype, [PrepTemplate(1)], self.filepaths) def test_get_filepaths(self):