qiita-spots · antgonza · May 26, 2015 · May 7, 2015 · May 19, 2015 · May 20, 2015
diff --git a/qiita_db/support_files/patches/25.sql b/qiita_db/support_files/patches/25.sql
@@ -0,0 +1,3 @@
+-- May 19, 2015
+
+SELECT 42;
diff --git a/qiita_db/support_files/patches/python_patches/25.py b/qiita_db/support_files/patches/python_patches/25.py
@@ -0,0 +1,121 @@
+# May 19, 2015
+# We attach the prep template directly to the study. The raw data is no longer
+# attached to the study directly, the prep template points to them. This will
+# make the RawData to be effectively just a container for the raw files,
+# which is how it was acting previously.
+
+from qiita_db.sql_connection import SQLConnectionHandler
+from qiita_db.data import RawData
+from qiita_db.util import move_filepaths_to_upload_folder
+
+conn_handler = SQLConnectionHandler()
+queue = "PATCH_25"
+conn_handler.create_queue(queue)
+
+# the system may contain raw data with no prep template associated to it.
+# Retrieve all those raw data ids
+sql = """SELECT raw_data_id
+         FROM qiita.raw_data
+         WHERE raw_data_id NOT IN (
+            SELECT DISTINCT raw_data_id FROM qiita.prep_template);"""
+rd_ids = [x[0] for x in conn_handler.execute_fetchall(sql)]
+
+# We will delete those RawData. However, if they have files attached, we should
+# move them to the uploads folder of the study
+sql_detach = """DELETE FROM qiita.study_raw_data
+                WHERE raw_data_id = %s AND study_id = %s"""
+sql_unlink = "DELETE FROM qiita.raw_filepath WHERE raw_data_id = %s"
+sql_delete = "DELETE FROM qiita.raw_data WHERE raw_data_id = %s"
+move_files = []
+for rd_id in rd_ids:
+    rd = RawData(rd_id)
+    filepaths = rd.get_filepaths()
+    studies = sorted(rd.studies)
+    if filepaths:
+        # we need to move the files to a study. We chose the one with lower
+        # study id. Currently there is no case in the live database in which a
+        # RawData with no prep templates is attached to more than one study,
+        # but I think it is better to normalize this just in case
+        move_files.append((min(studies), filepaths))
+
+    # To delete the RawData we first need to unlink all the files
+    conn_handler.add_to_queue(queue, sql_unlink, (rd_id,))
+
+    # Then, remove the raw data from all the studies
+    for st_id in studies:
+        conn_handler.add_to_queue(queue, sql_detach, (rd_id, st_id))
+
+    conn_handler.add_to_queue(queue, sql_delete, (rd_id,))
+
+# We can now perform all changes in the DB. Although these changes can be
+# done in an SQL patch, they are done here because we need to execute the
+# previous clean up in the database before we can actually execute the SQL
+# patch.
+sql = """CREATE TABLE qiita.study_prep_template (
+    study_id             bigint  NOT NULL,
+    prep_template_id     bigint  NOT NULL,
+    CONSTRAINT idx_study_prep_template
+        PRIMARY KEY ( study_id, prep_template_id )
+ );
+
+CREATE INDEX idx_study_prep_template_0
+    ON qiita.study_prep_template ( study_id );
+
+CREATE INDEX idx_study_prep_template_1
+    ON qiita.study_prep_template ( prep_template_id );
+
+COMMENT ON TABLE qiita.study_prep_template IS
+    'links study to its prep templates';
+
+ALTER TABLE qiita.study_prep_template
+    ADD CONSTRAINT fk_study_prep_template_study
+    FOREIGN KEY ( study_id ) REFERENCES qiita.study( study_id );
+
+ALTER TABLE qiita.study_prep_template
+    ADD CONSTRAINT fk_study_prep_template_pt
+    FOREIGN KEY ( prep_template_id )
+    REFERENCES qiita.prep_template( prep_template_id );
+
+-- Connect the existing prep templates in the system with their studies
+DO $do$
+DECLARE
+    vals RECORD;
+BEGIN
+FOR vals IN
+    SELECT prep_template_id, study_id
+    FROM qiita.prep_template
+    JOIN qiita.study_raw_data USING (raw_data_id)
+LOOP
+    INSERT INTO qiita.study_prep_template (study_id, prep_template_id)
+    VALUES (vals.study_id, vals.prep_template_id);
+END LOOP;
+END $do$;
+
+--- Drop the study_raw__data table as it's not longer used
+DROP TABLE qiita.study_raw_data;
+
+-- The raw_data_id column now can be nullable
+ALTER TABLE qiita.prep_template
+    ALTER COLUMN raw_data_id DROP NOT NULL;
+"""
+conn_handler.add_to_queue(queue, sql)
+conn_handler.execute_queue(queue)
+
+# After the changes in the database have been performed, move the files
+# to the uploads folder
+errors = []
+for st_id, fps in move_files:
+    try:
+        move_filepaths_to_upload_folder(st_id, fps)
+    except Exception, e:
+        # An error here is unlikely. However, it's possible and there is no
+        # clean way that we can unroll all the previous changes in the DB.
+        errors.append((st_id, fps, str(e)))
+
+# Show the user any error that could have been generated during the files
+# movement
+if errors:
+    print ("The following errors where generated when trying to move files "
+           "to the upload folder")
+    for st_id, fps, e in errors:
+        print "Study: %d, Filepaths: %s, Error: %s" % (st_id, fps, e)
diff --git a/qiita_db/support_files/qiita-db.dbs b/qiita_db/support_files/qiita-db.dbs
@@ -747,9 +747,6 @@
 			<index name="idx_common_prep_info_1" unique="NORMAL" >
 				<column name="prep_template_id" />
 			</index>
-			<fk name="fk_common_prep_info" to_schema="qiita" to_table="study_sample" >
-				<fk_column name="sample_id" pk="sample_id" />
-			</fk>
 			<fk name="fk_prep_template" to_schema="qiita" to_table="prep_template" >
 				<fk_column name="prep_template_id" pk="prep_template_id" />
 			</fk>
@@ -1340,6 +1337,27 @@ Controlled Vocabulary]]></comment>
 				<fk_column name="study_id" pk="study_id" />
 			</fk>
 		</table>
+		<table name="study_prep_template" >
+			<comment>links study to its prep templates</comment>
+			<column name="study_id" type="bigint" jt="-5" mandatory="y" />
+			<column name="prep_template_id" type="bigint" jt="-5" mandatory="y" />
+			<index name="idx_study_raw_data" unique="PRIMARY_KEY" >
+				<column name="study_id" />
+				<column name="prep_template_id" />
+			</index>
+			<index name="idx_study_raw_data_0" unique="NORMAL" >
+				<column name="study_id" />
+			</index>
+			<index name="idx_study_raw_data_1" unique="NORMAL" >
+				<column name="prep_template_id" />
+			</index>
+			<fk name="fk_study_prep_template_study" to_schema="qiita" to_table="study" >
+				<fk_column name="study_id" pk="study_id" />
+			</fk>
+			<fk name="fk_study_prep_template_pt" to_schema="qiita" to_table="prep_template" >
+				<fk_column name="prep_template_id" pk="prep_template_id" />
+			</fk>
+		</table>
 		<table name="study_preprocessed_data" >
 			<column name="study_id" type="bigint" jt="-5" mandatory="y" />
 			<column name="preprocessed_data_id" type="bigint" jt="-5" mandatory="y" />
@@ -1380,24 +1398,6 @@ Controlled Vocabulary]]></comment>
 				<fk_column name="processed_data_id" pk="processed_data_id" />
 			</fk>
 		</table>
-		<table name="study_raw_data" >
-			<comment>links study to its raw data</comment>
-			<column name="study_id" type="bigint" jt="-5" mandatory="y" />
-			<column name="raw_data_id" type="bigint" jt="-5" mandatory="y" />
-			<index name="idx_study_raw_data" unique="NORMAL" >
-				<column name="study_id" />
-			</index>
-			<index name="idx_study_raw_data_0" unique="PRIMARY_KEY" >
-				<column name="study_id" />
-				<column name="raw_data_id" />
-			</index>
-			<fk name="fk_study_raw_data_study" to_schema="qiita" to_table="study" >
-				<fk_column name="study_id" pk="study_id" />
-			</fk>
-			<fk name="fk_study_raw_data_raw_data" to_schema="qiita" to_table="raw_data" >
-				<fk_column name="raw_data_id" pk="raw_data_id" />
-			</fk>
-		</table>
 		<table name="study_sample" >
 			<comment>Required info for each sample. One row is one sample.</comment>
 			<column name="sample_id" type="varchar" jt="12" mandatory="y" />
@@ -1522,7 +1522,6 @@ Controlled Vocabulary]]></comment>
 		<entity schema="qiita" name="study_preprocessed_data" color="c0d4f3" x="1545" y="720" />
 		<entity schema="qiita" name="study_users" color="d0def5" x="1065" y="60" />
 		<entity schema="qiita" name="sample_x" color="d0def5" x="1635" y="165" />
-		<entity schema="qiita" name="study_raw_data" color="d0def5" x="1575" y="510" />
 		<entity schema="qiita" name="processed_filepath" color="c0d4f3" x="1065" y="945" />
 		<entity schema="qiita" name="command" color="d0def5" x="210" y="1110" />
 		<entity schema="qiita" name="logging" color="c0d4f3" x="1335" y="1290" />
@@ -1540,27 +1539,20 @@ Controlled Vocabulary]]></comment>
 		<entity schema="qiita" name="preprocessed_processed_data" color="b2cdf7" x="1275" y="870" />
 		<entity schema="qiita" name="qiita_user" color="d0def5" x="330" y="90" />
 		<entity schema="qiita" name="prep_y" color="d0def5" x="1230" y="195" />
-		<entity schema="qiita" name="prep_columns" color="b2cdf7" x="1275" y="345" />
-		<entity schema="qiita" name="raw_filepath" color="c0d4f3" x="1080" y="510" />
-		<entity schema="qiita" name="filetype" color="d0def5" x="1560" y="600" />
 		<entity schema="qiita" name="filepath_type" color="c0d4f3" x="585" y="885" />
 		<entity schema="qiita" name="checksum_algorithm" color="b2cdf7" x="735" y="885" />
 		<entity schema="qiita" name="data_type" color="d0def5" x="690" y="1020" />
 		<entity schema="qiita" name="user_level" color="d0def5" x="165" y="75" />
 		<entity schema="qiita" name="job_status" color="d0def5" x="210" y="1020" />
 		<entity schema="qiita" name="severity" color="c0d4f3" x="1470" y="1290" />
-		<entity schema="qiita" name="prep_template" color="b2cdf7" x="1065" y="360" />
-		<entity schema="qiita" name="raw_data" color="d0def5" x="1275" y="495" />
 		<entity schema="qiita" name="job" color="d0def5" x="405" y="1005" />
 		<entity schema="qiita" name="filepath" color="c0d4f3" x="645" y="675" />
 		<entity schema="qiita" name="data_directory" color="b2cdf7" x="840" y="585" />
 		<entity schema="qiita" name="term" color="d0def5" x="810" y="1650" />
 		<entity schema="qiita" name="environmental_package" color="b2cdf7" x="2250" y="150" />
 		<entity schema="qiita" name="study_environmental_package" color="b2cdf7" x="2250" y="45" />
 		<entity schema="qiita" name="timeseries_type" color="c0d4f3" x="1680" y="615" />
-		<entity schema="qiita" name="prep_template_filepath" color="b2cdf7" x="1035" y="600" />
 		<entity schema="qiita" name="sample_template_filepath" color="b2cdf7" x="1050" y="795" />
-		<entity schema="qiita" name="prep_template_preprocessed_data" color="b2cdf7" x="750" y="450" />
 		<entity schema="qiita" name="preprocessed_data" color="c0d4f3" x="1260" y="675" />
 		<entity schema="qiita" name="reference" color="c0d4f3" x="2280" y="960" />
 		<entity schema="qiita" name="preprocessed_sequence_454_params" color="c0d4f3" x="1740" y="960" />
@@ -1582,8 +1574,16 @@ Controlled Vocabulary]]></comment>
 		<entity schema="qiita" name="processed_data_status" color="c0d4f3" x="1500" y="1050" />
 		<entity schema="qiita" name="portal_type" color="c0d4f3" x="1995" y="660" />
 		<entity schema="qiita" name="analysis_sample" color="d0def5" x="45" y="1170" />
-		<entity schema="qiita" name="study_sample" color="d0def5" x="1410" y="120" />
-		<entity schema="qiita" name="prep_template_sample" color="d0def5" x="1050" y="165" />
+		<entity schema="qiita" name="study_sample" color="d0def5" x="1515" y="105" />
+		<entity schema="qiita" name="raw_data" color="d0def5" x="1020" y="300" />
+		<entity schema="qiita" name="filetype" color="d0def5" x="1035" y="180" />
+		<entity schema="qiita" name="prep_template_preprocessed_data" color="b2cdf7" x="1275" y="555" />
+		<entity schema="qiita" name="prep_template" color="b2cdf7" x="1305" y="375" />
+		<entity schema="qiita" name="raw_filepath" color="c0d4f3" x="1035" y="435" />
+		<entity schema="qiita" name="prep_template_filepath" color="b2cdf7" x="1035" y="525" />
+		<entity schema="qiita" name="study_prep_template" color="d0def5" x="1590" y="420" />
+		<entity schema="qiita" name="prep_columns" color="b2cdf7" x="1470" y="285" />
+		<entity schema="qiita" name="prep_template_sample" color="d0def5" x="1335" y="195" />
 		<group name="Group_analyses" color="c4e0f9" >
 			<comment>analysis tables</comment>
 			<entity schema="qiita" name="analysis" />
@@ -1610,7 +1610,7 @@ Controlled Vocabulary]]></comment>
 			<entity schema="qiita" name="study_sample_columns" />
 			<entity schema="qiita" name="raw_data" />
 			<entity schema="qiita" name="filetype" />
-			<entity schema="qiita" name="study_raw_data" />
+			<entity schema="qiita" name="study_prep_template" />
 			<entity schema="qiita" name="sample_x" />
 			<entity schema="qiita" name="preprocessed_spectra_params" />
 			<entity schema="qiita" name="preprocessed_sequence_illumina_params" />