Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions qiita_db/support_files/patches/25.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- May 19, 2015

SELECT 42;
121 changes: 121 additions & 0 deletions qiita_db/support_files/patches/python_patches/25.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
# May 19, 2015
# We attach the prep template directly to the study. The raw data is no longer
# attached to the study directly, the prep template points to them. This will
# make the RawData to be effectively just a container for the raw files,
# which is how it was acting previously.

from qiita_db.sql_connection import SQLConnectionHandler
from qiita_db.data import RawData
from qiita_db.util import move_filepaths_to_upload_folder

conn_handler = SQLConnectionHandler()
queue = "PATCH_25"
conn_handler.create_queue(queue)

# the system may contain raw data with no prep template associated to it.
# Retrieve all those raw data ids
sql = """SELECT raw_data_id
FROM qiita.raw_data
WHERE raw_data_id NOT IN (
SELECT DISTINCT raw_data_id FROM qiita.prep_template);"""
rd_ids = [x[0] for x in conn_handler.execute_fetchall(sql)]

# We will delete those RawData. However, if they have files attached, we should
# move them to the uploads folder of the study
sql_detach = """DELETE FROM qiita.study_raw_data
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not use the already build unlink and delete functions?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason is that the patch will be executed with the new code. In the new code, the RawData does not know to which studies it belongs unless it is connected to a PrepTemplate. The RawData objects that are here, are "duplicating" the existing code, but once all the pieces are in place, that code is different. Does this make sense to you?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this is accounting for the changes that are coming in the other open PRs right? Such that the patch will succeed to execute with all the changes to the codebase.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exactly. That's why I didn't do the PR until I get the changes done in the codebase. I then just break up the entire work in multiple PR for easy review.

WHERE raw_data_id = %s AND study_id = %s"""
sql_unlink = "DELETE FROM qiita.raw_filepath WHERE raw_data_id = %s"
sql_delete = "DELETE FROM qiita.raw_data WHERE raw_data_id = %s"
move_files = []
for rd_id in rd_ids:
rd = RawData(rd_id)
filepaths = rd.get_filepaths()
studies = sorted(rd.studies)
if filepaths:
# we need to move the files to a study. We chose the one with lower
# study id. Currently there is no case in the live database in which a
# RawData with no prep templates is attached to more than one study,
# but I think it is better to normalize this just in case
move_files.append((min(studies), filepaths))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to confirm, there's no possible way we would get to this line if there are no studies right? i.e. is there any way that studies could be an empty list? If so, then min would error.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope, there will be at least one study.


# To delete the RawData we first need to unlink all the files
conn_handler.add_to_queue(queue, sql_unlink, (rd_id,))

# Then, remove the raw data from all the studies
for st_id in studies:
conn_handler.add_to_queue(queue, sql_detach, (rd_id, st_id))

conn_handler.add_to_queue(queue, sql_delete, (rd_id,))

# We can now perform all changes in the DB. Although these changes can be
# done in an SQL patch, they are done here because we need to execute the
# previous clean up in the database before we can actually execute the SQL
# patch.
sql = """CREATE TABLE qiita.study_prep_template (
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm assuming the first part of this SQL string was generated by DBSchema?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, you're correct, I just copied here.

study_id bigint NOT NULL,
prep_template_id bigint NOT NULL,
CONSTRAINT idx_study_prep_template
PRIMARY KEY ( study_id, prep_template_id )
);

CREATE INDEX idx_study_prep_template_0
ON qiita.study_prep_template ( study_id );

CREATE INDEX idx_study_prep_template_1
ON qiita.study_prep_template ( prep_template_id );

COMMENT ON TABLE qiita.study_prep_template IS
'links study to its prep templates';

ALTER TABLE qiita.study_prep_template
ADD CONSTRAINT fk_study_prep_template_study
FOREIGN KEY ( study_id ) REFERENCES qiita.study( study_id );

ALTER TABLE qiita.study_prep_template
ADD CONSTRAINT fk_study_prep_template_pt
FOREIGN KEY ( prep_template_id )
REFERENCES qiita.prep_template( prep_template_id );

-- Connect the existing prep templates in the system with their studies
DO $do$
DECLARE
vals RECORD;
BEGIN
FOR vals IN
SELECT prep_template_id, study_id
FROM qiita.prep_template
JOIN qiita.study_raw_data USING (raw_data_id)
LOOP
INSERT INTO qiita.study_prep_template (study_id, prep_template_id)
VALUES (vals.study_id, vals.prep_template_id);
END LOOP;
END $do$;

--- Drop the study_raw__data table as it's not longer used
DROP TABLE qiita.study_raw_data;

-- The raw_data_id column now can be nullable
ALTER TABLE qiita.prep_template
ALTER COLUMN raw_data_id DROP NOT NULL;
"""
conn_handler.add_to_queue(queue, sql)
conn_handler.execute_queue(queue)

# After the changes in the database have been performed, move the files
# to the uploads folder
errors = []
for st_id, fps in move_files:
try:
move_filepaths_to_upload_folder(st_id, fps)
except Exception, e:
# An error here is unlikely. However, it's possible and there is no
# clean way that we can unroll all the previous changes in the DB.
errors.append((st_id, fps, str(e)))

# Show the user any error that could have been generated during the files
# movement
if errors:
print ("The following errors where generated when trying to move files "
"to the upload folder")
for st_id, fps, e in errors:
print "Study: %d, Filepaths: %s, Error: %s" % (st_id, fps, e)
64 changes: 32 additions & 32 deletions qiita_db/support_files/qiita-db.dbs
Original file line number Diff line number Diff line change
Expand Up @@ -747,9 +747,6 @@
<index name="idx_common_prep_info_1" unique="NORMAL" >
<column name="prep_template_id" />
</index>
<fk name="fk_common_prep_info" to_schema="qiita" to_table="study_sample" >
<fk_column name="sample_id" pk="sample_id" />
</fk>
<fk name="fk_prep_template" to_schema="qiita" to_table="prep_template" >
<fk_column name="prep_template_id" pk="prep_template_id" />
</fk>
Expand Down Expand Up @@ -1340,6 +1337,27 @@ Controlled Vocabulary]]></comment>
<fk_column name="study_id" pk="study_id" />
</fk>
</table>
<table name="study_prep_template" >
<comment>links study to its prep templates</comment>
<column name="study_id" type="bigint" jt="-5" mandatory="y" />
<column name="prep_template_id" type="bigint" jt="-5" mandatory="y" />
<index name="idx_study_raw_data" unique="PRIMARY_KEY" >
<column name="study_id" />
<column name="prep_template_id" />
</index>
<index name="idx_study_raw_data_0" unique="NORMAL" >
<column name="study_id" />
</index>
<index name="idx_study_raw_data_1" unique="NORMAL" >
<column name="prep_template_id" />
</index>
<fk name="fk_study_prep_template_study" to_schema="qiita" to_table="study" >
<fk_column name="study_id" pk="study_id" />
</fk>
<fk name="fk_study_prep_template_pt" to_schema="qiita" to_table="prep_template" >
<fk_column name="prep_template_id" pk="prep_template_id" />
</fk>
</table>
<table name="study_preprocessed_data" >
<column name="study_id" type="bigint" jt="-5" mandatory="y" />
<column name="preprocessed_data_id" type="bigint" jt="-5" mandatory="y" />
Expand Down Expand Up @@ -1380,24 +1398,6 @@ Controlled Vocabulary]]></comment>
<fk_column name="processed_data_id" pk="processed_data_id" />
</fk>
</table>
<table name="study_raw_data" >
<comment>links study to its raw data</comment>
<column name="study_id" type="bigint" jt="-5" mandatory="y" />
<column name="raw_data_id" type="bigint" jt="-5" mandatory="y" />
<index name="idx_study_raw_data" unique="NORMAL" >
<column name="study_id" />
</index>
<index name="idx_study_raw_data_0" unique="PRIMARY_KEY" >
<column name="study_id" />
<column name="raw_data_id" />
</index>
<fk name="fk_study_raw_data_study" to_schema="qiita" to_table="study" >
<fk_column name="study_id" pk="study_id" />
</fk>
<fk name="fk_study_raw_data_raw_data" to_schema="qiita" to_table="raw_data" >
<fk_column name="raw_data_id" pk="raw_data_id" />
</fk>
</table>
<table name="study_sample" >
<comment>Required info for each sample. One row is one sample.</comment>
<column name="sample_id" type="varchar" jt="12" mandatory="y" />
Expand Down Expand Up @@ -1522,7 +1522,6 @@ Controlled Vocabulary]]></comment>
<entity schema="qiita" name="study_preprocessed_data" color="c0d4f3" x="1545" y="720" />
<entity schema="qiita" name="study_users" color="d0def5" x="1065" y="60" />
<entity schema="qiita" name="sample_x" color="d0def5" x="1635" y="165" />
<entity schema="qiita" name="study_raw_data" color="d0def5" x="1575" y="510" />
<entity schema="qiita" name="processed_filepath" color="c0d4f3" x="1065" y="945" />
<entity schema="qiita" name="command" color="d0def5" x="210" y="1110" />
<entity schema="qiita" name="logging" color="c0d4f3" x="1335" y="1290" />
Expand All @@ -1540,27 +1539,20 @@ Controlled Vocabulary]]></comment>
<entity schema="qiita" name="preprocessed_processed_data" color="b2cdf7" x="1275" y="870" />
<entity schema="qiita" name="qiita_user" color="d0def5" x="330" y="90" />
<entity schema="qiita" name="prep_y" color="d0def5" x="1230" y="195" />
<entity schema="qiita" name="prep_columns" color="b2cdf7" x="1275" y="345" />
<entity schema="qiita" name="raw_filepath" color="c0d4f3" x="1080" y="510" />
<entity schema="qiita" name="filetype" color="d0def5" x="1560" y="600" />
<entity schema="qiita" name="filepath_type" color="c0d4f3" x="585" y="885" />
<entity schema="qiita" name="checksum_algorithm" color="b2cdf7" x="735" y="885" />
<entity schema="qiita" name="data_type" color="d0def5" x="690" y="1020" />
<entity schema="qiita" name="user_level" color="d0def5" x="165" y="75" />
<entity schema="qiita" name="job_status" color="d0def5" x="210" y="1020" />
<entity schema="qiita" name="severity" color="c0d4f3" x="1470" y="1290" />
<entity schema="qiita" name="prep_template" color="b2cdf7" x="1065" y="360" />
<entity schema="qiita" name="raw_data" color="d0def5" x="1275" y="495" />
<entity schema="qiita" name="job" color="d0def5" x="405" y="1005" />
<entity schema="qiita" name="filepath" color="c0d4f3" x="645" y="675" />
<entity schema="qiita" name="data_directory" color="b2cdf7" x="840" y="585" />
<entity schema="qiita" name="term" color="d0def5" x="810" y="1650" />
<entity schema="qiita" name="environmental_package" color="b2cdf7" x="2250" y="150" />
<entity schema="qiita" name="study_environmental_package" color="b2cdf7" x="2250" y="45" />
<entity schema="qiita" name="timeseries_type" color="c0d4f3" x="1680" y="615" />
<entity schema="qiita" name="prep_template_filepath" color="b2cdf7" x="1035" y="600" />
<entity schema="qiita" name="sample_template_filepath" color="b2cdf7" x="1050" y="795" />
<entity schema="qiita" name="prep_template_preprocessed_data" color="b2cdf7" x="750" y="450" />
<entity schema="qiita" name="preprocessed_data" color="c0d4f3" x="1260" y="675" />
<entity schema="qiita" name="reference" color="c0d4f3" x="2280" y="960" />
<entity schema="qiita" name="preprocessed_sequence_454_params" color="c0d4f3" x="1740" y="960" />
Expand All @@ -1582,8 +1574,16 @@ Controlled Vocabulary]]></comment>
<entity schema="qiita" name="processed_data_status" color="c0d4f3" x="1500" y="1050" />
<entity schema="qiita" name="portal_type" color="c0d4f3" x="1995" y="660" />
<entity schema="qiita" name="analysis_sample" color="d0def5" x="45" y="1170" />
<entity schema="qiita" name="study_sample" color="d0def5" x="1410" y="120" />
<entity schema="qiita" name="prep_template_sample" color="d0def5" x="1050" y="165" />
<entity schema="qiita" name="study_sample" color="d0def5" x="1515" y="105" />
<entity schema="qiita" name="raw_data" color="d0def5" x="1020" y="300" />
<entity schema="qiita" name="filetype" color="d0def5" x="1035" y="180" />
<entity schema="qiita" name="prep_template_preprocessed_data" color="b2cdf7" x="1275" y="555" />
<entity schema="qiita" name="prep_template" color="b2cdf7" x="1305" y="375" />
<entity schema="qiita" name="raw_filepath" color="c0d4f3" x="1035" y="435" />
<entity schema="qiita" name="prep_template_filepath" color="b2cdf7" x="1035" y="525" />
<entity schema="qiita" name="study_prep_template" color="d0def5" x="1590" y="420" />
<entity schema="qiita" name="prep_columns" color="b2cdf7" x="1470" y="285" />
<entity schema="qiita" name="prep_template_sample" color="d0def5" x="1335" y="195" />
<group name="Group_analyses" color="c4e0f9" >
<comment>analysis tables</comment>
<entity schema="qiita" name="analysis" />
Expand All @@ -1610,7 +1610,7 @@ Controlled Vocabulary]]></comment>
<entity schema="qiita" name="study_sample_columns" />
<entity schema="qiita" name="raw_data" />
<entity schema="qiita" name="filetype" />
<entity schema="qiita" name="study_raw_data" />
<entity schema="qiita" name="study_prep_template" />
<entity schema="qiita" name="sample_x" />
<entity schema="qiita" name="preprocessed_spectra_params" />
<entity schema="qiita" name="preprocessed_sequence_illumina_params" />
Expand Down
Loading