Skip to content

Commit 7b227a3

Browse files
committed
Merge pull request #1204 from josenavas/1084-data
1084 data changes
2 parents dcdec1f + 0b83bff commit 7b227a3

File tree

3 files changed

+170
-212
lines changed

3 files changed

+170
-212
lines changed

qiita_db/data.py

Lines changed: 76 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -288,41 +288,60 @@ class RawData(BaseData):
288288
_table = "raw_data"
289289
_data_filepath_table = "raw_filepath"
290290
_data_filepath_column = "raw_data_id"
291-
# Define here the class name, so in case it changes in the database we
292-
# only need to change it here
293-
_study_raw_table = "study_raw_data"
294291

295292
@classmethod
296-
def create(cls, filetype, studies, filepaths=None):
293+
def create(cls, filetype, prep_templates, filepaths):
297294
r"""Creates a new object with a new id on the storage system
298295
299296
Parameters
300297
----------
301298
filetype : int
302299
The filetype identifier
303-
studies : list of Study
304-
The list of Study objects to which the raw data belongs to
305-
filepaths : iterable of tuples (str, int), optional
300+
prep_templates : list of PrepTemplates
301+
The list of PrepTemplate objects to which the raw data is attached
302+
filepaths : iterable of tuples (str, int)
306303
The list of paths to the raw files and its filepath type identifier
307304
308305
Returns
309306
-------
310307
A new instance of `cls` to access to the RawData stored in the DB
308+
309+
Raises
310+
------
311+
QiitaDBError
312+
If any of the passed prep templates already have a raw data id
311313
"""
312-
# Add the raw data to the database, and get the raw data id back
313314
conn_handler = SQLConnectionHandler()
315+
# We first need to check if the passed prep templates don't have
316+
# a raw data already attached to them
317+
sql = """SELECT EXISTS(
318+
SELECT *
319+
FROM qiita.prep_template
320+
WHERE prep_template_id IN ({})
321+
AND raw_data_id IS NOT NULL)""".format(
322+
', '.join(['%s'] * len(prep_templates)))
323+
exists = conn_handler.execute_fetchone(
324+
sql, [pt.id for pt in prep_templates])[0]
325+
if exists:
326+
raise QiitaDBError(
327+
"Cannot create raw data because the passed prep templates "
328+
"already have a raw data associated with it. "
329+
"Prep templates: %s"
330+
% ', '.join([str(pt.id) for pt in prep_templates]))
331+
332+
# Add the raw data to the database, and get the raw data id back
314333
rd_id = conn_handler.execute_fetchone(
315334
"INSERT INTO qiita.{0} (filetype_id) VALUES (%s) "
316335
"RETURNING raw_data_id".format(cls._table), (filetype,))[0]
317336

318337
# Instantiate the object with the new id
319338
rd = cls(rd_id)
320339

321-
# Connect the raw data with its studies
322-
values = [(study.id, rd_id) for study in studies]
323-
conn_handler.executemany(
324-
"INSERT INTO qiita.{0} (study_id, raw_data_id) VALUES "
325-
"(%s, %s)".format(rd._study_raw_table), values)
340+
# Connect the raw data with its prep templates
341+
values = [(rd_id, pt.id) for pt in prep_templates]
342+
sql = """UPDATE qiita.prep_template
343+
SET raw_data_id = %s WHERE prep_template_id = %s"""
344+
conn_handler.executemany(sql, values)
326345

327346
# If file paths have been provided, add them to the raw data object
328347
if filepaths:
@@ -331,79 +350,66 @@ def create(cls, filetype, studies, filepaths=None):
331350
return rd
332351

333352
@classmethod
334-
def delete(cls, raw_data_id, study_id):
353+
def delete(cls, raw_data_id, prep_template_id):
335354
"""Removes the raw data with id raw_data_id
336355
337356
Parameters
338357
----------
339358
raw_data_id : int
340359
The raw data id
341-
study_id : int
342-
The study id
360+
prep_template_id : int
361+
The prep_template_id
343362
344363
Raises
345364
------
346365
QiitaDBUnknownIDError
347366
If the raw data id doesn't exist
348367
QiitaDBError
349-
If the raw data is not linked to that study_id
350-
If the raw data has prep templates associated
368+
If the raw data is not linked to that prep_template_id
369+
If the raw data has files linked
351370
"""
352371
conn_handler = SQLConnectionHandler()
353372

354373
# check if the raw data exist
355374
if not cls.exists(raw_data_id):
356375
raise QiitaDBUnknownIDError(raw_data_id, "raw data")
357376

358-
study_raw_data_exists = conn_handler.execute_fetchone(
359-
"SELECT EXISTS(SELECT * FROM qiita.study_raw_data WHERE "
360-
"study_id = {0} AND raw_data_id = {1})".format(study_id,
361-
raw_data_id))[0]
362-
if not study_raw_data_exists:
363-
raise QiitaDBError(
364-
"Raw data %d is not linked to study %d or the study "
365-
"doesn't exist" % (raw_data_id, study_id))
366-
367-
# check if there are any prep templates for this study
368-
prep_template_exists = conn_handler.execute_fetchone(
369-
"""
370-
SELECT EXISTS(
371-
SELECT * FROM qiita.prep_template AS pt
372-
LEFT JOIN qiita.prep_template_sample AS cpi ON
373-
(pt.prep_template_id=cpi.prep_template_id)
374-
LEFT JOIN qiita.study_sample AS rsi ON
375-
(cpi.sample_id=rsi.sample_id)
376-
WHERE raw_data_id = {0} and study_id = {1}
377-
)
378-
""".format(raw_data_id, study_id))[0]
379-
if prep_template_exists:
377+
# Check if the raw data is linked to the prep template
378+
sql = """SELECT EXISTS(
379+
SELECT * FROM qiita.prep_template
380+
WHERE prep_template_id = %s AND raw_data_id = %s)"""
381+
pt_rd_exists = conn_handler.execute_fetchone(
382+
sql, (prep_template_id, raw_data_id))
383+
if not pt_rd_exists:
380384
raise QiitaDBError(
381-
"Raw data %d has prep template(s) associated so it can't be "
382-
"erased." % raw_data_id)
385+
"Raw data %d is not linked to prep template %d or the prep "
386+
"template doesn't exist" % (raw_data_id, prep_template_id))
383387

384-
# check how many raw data are left, if last one, check that there
385-
# are no linked files
388+
# Check to how many prep templates the raw data is still linked.
389+
# If last one, check that are no linked files
386390
raw_data_count = conn_handler.execute_fetchone(
387-
"SELECT COUNT(*) FROM qiita.study_raw_data WHERE "
388-
"raw_data_id = {0}".format(raw_data_id))[0]
391+
"SELECT COUNT(*) FROM qiita.prep_template WHERE "
392+
"raw_data_id = %s", (raw_data_id,))[0]
389393
if raw_data_count == 1 and RawData(raw_data_id).get_filepath_ids():
390394
raise QiitaDBError(
391395
"Raw data (%d) can't be remove because it has linked files. "
392396
"To remove it, first unlink files." % raw_data_id)
393397

394398
# delete
395-
conn_handler.execute("DELETE FROM qiita.study_raw_data WHERE "
396-
"raw_data_id = {0} AND "
397-
"study_id = {1}".format(raw_data_id, study_id))
398-
399-
# delete the connecting tables if there is no other study linked to
400-
# the raw data
401-
study_raw_data_count = conn_handler.execute_fetchone(
402-
"SELECT COUNT(*) FROM qiita.study_raw_data WHERE "
403-
"raw_data_id = {0}".format(raw_data_id))[0]
404-
if study_raw_data_count == 0:
405-
conn_handler.execute("DELETE FROM qiita.raw_data WHERE "
406-
"raw_data_id = {0}".format(raw_data_id))
399+
queue = "DELETE_%d_%d" % (raw_data_id, prep_template_id)
400+
conn_handler.create_queue(queue)
401+
sql = """UPDATE qiita.prep_template
402+
SET raw_data_id = %s
403+
WHERE prep_template_id = %s"""
404+
conn_handler.add_to_queue(queue, sql, (None, prep_template_id))
405+
406+
# If there is no other prep template pointing to the raw data, it can
407+
# be removed
408+
if raw_data_count == 1:
409+
sql = "DELETE FROM qiita.raw_data WHERE raw_data_id = %s"
410+
conn_handler.add_to_queue(queue, sql, (raw_data_id,))
411+
412+
conn_handler.execute_queue(queue)
407413

408414
@property
409415
def studies(self):
@@ -415,10 +421,11 @@ def studies(self):
415421
The list of study ids to which the raw data belongs to
416422
"""
417423
conn_handler = SQLConnectionHandler()
418-
ids = conn_handler.execute_fetchall(
419-
"SELECT study_id FROM qiita.{0} WHERE "
420-
"raw_data_id=%s".format(self._study_raw_table),
421-
[self._id])
424+
sql = """SELECT study_id
425+
FROM qiita.study_prep_template
426+
JOIN qiita.prep_template USING (prep_template_id)
427+
WHERE raw_data_id = %s"""
428+
ids = conn_handler.execute_fetchall(sql, (self.id,))
422429
return [id[0] for id in ids]
423430

424431
@property
@@ -508,14 +515,14 @@ def _remove_filepath(self, fp, conn_handler, queue):
508515
self._set_link_filepaths_status("failed: %s" % msg)
509516
raise QiitaDBError(msg)
510517

511-
# The filepath belongs to one or more studies
512-
studies_linked = self.studies
513-
if len(studies_linked) > 1:
518+
# The filepath belongs to one or more prep templates
519+
prep_templates = self.prep_templates
520+
if len(prep_templates) > 1:
514521
msg = ("Can't clear all the filepaths from raw data %s because "
515-
"it has been shared with other studies: %s. If you want to "
516-
"remove it, first remove the raw data from the other "
517-
"studies." % (self._id,
518-
', '.join(map(str, studies_linked))))
522+
"it has been used with other prep templates: %s. If you "
523+
"want to remove it, first remove the raw data from the "
524+
"other prep templates."
525+
% (self._id, ', '.join(map(str, prep_templates))))
519526
self._set_link_filepaths_status("failed: %s" % msg)
520527
raise QiitaDBError(msg)
521528

@@ -576,40 +583,6 @@ def clear_filepaths(self):
576583
# self.studies should only have one element, thus self.studies[0]
577584
move_filepaths_to_upload_folder(self.studies[0], filepaths)
578585

579-
def remove_filepath(self, fp):
580-
"""Removes the filepath from the RawData
581-
582-
Parameters
583-
----------
584-
fp : str
585-
The filepath to remove
586-
"""
587-
conn_handler = SQLConnectionHandler()
588-
queue = "remove_fp_%s" % self.id
589-
conn_handler.create_queue(queue)
590-
591-
# Set the current status to unlinking
592-
self._set_link_filepaths_status("unlinking")
593-
594-
self._remove_filepath(fp, conn_handler, queue)
595-
596-
try:
597-
# Execute the queue
598-
conn_handler.execute_queue(queue)
599-
except Exception as e:
600-
self._set_link_filepaths_status("failed: %s" % e)
601-
LogEntry.create('Runtime', str(e),
602-
info={self.__class__.__name__: self.id})
603-
raise e
604-
605-
# We can already update the status to done, as the files have been
606-
# unlinked, the move_filepaths_to_upload_folder call will not change
607-
# the status of the raw data object
608-
self._set_link_filepaths_status("idle")
609-
610-
# Delete the files, if they are not used anywhere
611-
purge_filepaths()
612-
613586
def status(self, study):
614587
"""The status of the raw data within the given study
615588
@@ -653,8 +626,6 @@ def status(self, study):
653626
USING (preprocessed_data_id)
654627
JOIN qiita.prep_template pt
655628
USING (prep_template_id)
656-
JOIN qiita.raw_data rd
657-
USING (raw_data_id)
658629
JOIN qiita.study_processed_data spd
659630
USING (processed_data_id)
660631
WHERE pt.raw_data_id=%s AND spd.study_id=%s"""

qiita_db/study.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -751,8 +751,12 @@ def raw_data(self, data_type=None):
751751
spec_data = " AND data_type_id = %d" % convert_to_id(data_type,
752752
"data_type")
753753
conn_handler = SQLConnectionHandler()
754-
sql = ("SELECT raw_data_id FROM qiita.study_raw_data WHERE "
755-
"study_id = %s{0}".format(spec_data))
754+
sql = """SELECT raw_data_id
755+
FROM qiita.study_prep_template
756+
JOIN qiita.prep_template USING (prep_template_id)
757+
JOIN qiita.raw_data USING (raw_data_id)
758+
WHERE study_id = %s{0}""".format(spec_data)
759+
756760
return [x[0] for x in conn_handler.execute_fetchall(sql, (self._id,))]
757761

758762
def add_raw_data(self, raw_data):

0 commit comments

Comments
 (0)