|
| 1 | +# May 19, 2015 |
| 2 | +# We attach the prep template directly to the study. The raw data is no longer |
| 3 | +# attached to the study directly, the prep template point to them. This will |
| 4 | +# make the RawData to be effectively just a container for the raw files, |
| 5 | +# which is how it was acting previously. |
| 6 | + |
| 7 | +from qiita_db.sql_connection import SQLConnectionHandler |
| 8 | +from qiita_db.data import RawData |
| 9 | +from qiita_db.util import move_filepaths_to_upload_folder |
| 10 | + |
| 11 | +conn_handler = SQLConnectionHandler() |
| 12 | +queue = "PATCH_25" |
| 13 | +conn_handler.create_queue(queue) |
| 14 | + |
| 15 | +# the system may contain raw data with no prep template associated to it. |
| 16 | +# Retrieve all those raw data ids |
| 17 | +sql = """SELECT raw_data_id |
| 18 | + FROM qiita.raw_data |
| 19 | + WHERE raw_data_id NOT IN ( |
| 20 | + SELECT DISTINCT raw_data_id FROM qiita.prep_template);""" |
| 21 | +rd_ids = [x[0] for x in conn_handler.execute_fetchall(sql)] |
| 22 | + |
| 23 | +# We will delete those RawData. However, if they have files attached, we should |
| 24 | +# move them to the uploads folder of the study |
| 25 | +sql_detach = """DELETE FROM qiita.study_raw_data |
| 26 | + WHERE raw_data_id = %s AND study_id = %s""" |
| 27 | +sql_unlink = "DELETE FROM qiita.raw_filepath WHERE raw_data_id = %s" |
| 28 | +sql_delete = "DELETE FROM qiita.raw_data WHERE raw_data_id = %s" |
| 29 | +move_files = [] |
| 30 | +for rd_id in rd_ids: |
| 31 | + rd = RawData(rd_id) |
| 32 | + filepaths = rd.get_filepaths() |
| 33 | + studies = sorted(rd.studies) |
| 34 | + if filepaths: |
| 35 | + # we need to move the files to a study. We chose the one with lower |
| 36 | + # study id. Currently there is no case in the live database in which a |
| 37 | + # RawData with no prep templates is attached to more than one study, |
| 38 | + # but I think it is better to normalize this just in case |
| 39 | + move_files.append((min(studies), filepaths)) |
| 40 | + |
| 41 | + # To delete the RawData we first need to unlink all the files |
| 42 | + conn_handler.add_to_queue(queue, sql_unlink, (rd_id,)) |
| 43 | + |
| 44 | + # Then, remove the raw data from all the studies |
| 45 | + for st_id in studies: |
| 46 | + conn_handler.add_to_queue(queue, sql_detach, (rd_id, st_id)) |
| 47 | + |
| 48 | + conn_handler.add_to_queue(queue, sql_delete, (rd_id,)) |
| 49 | + |
| 50 | +# We can now perform all changes in the DB. Although this changes can be |
| 51 | +# done in an SQL patch, they are done here because we need to execute the |
| 52 | +# previous clean up in the database before we can actually execute the SQL |
| 53 | +# patch. |
| 54 | +sql = """CREATE TABLE qiita.study_prep_template ( |
| 55 | + study_id bigint NOT NULL, |
| 56 | + prep_template_id bigint NOT NULL, |
| 57 | + CONSTRAINT idx_study_prep_template |
| 58 | + PRIMARY KEY ( study_id, prep_template_id ) |
| 59 | + ); |
| 60 | +
|
| 61 | +CREATE INDEX idx_study_prep_template_0 |
| 62 | + ON qiita.study_prep_template ( study_id ); |
| 63 | +
|
| 64 | +CREATE INDEX idx_study_prep_template_1 |
| 65 | + ON qiita.study_prep_template ( prep_template_id ); |
| 66 | +
|
| 67 | +COMMENT ON TABLE qiita.study_prep_template IS |
| 68 | + 'links study to its prep templates'; |
| 69 | +
|
| 70 | +ALTER TABLE qiita.study_prep_template |
| 71 | + ADD CONSTRAINT fk_study_prep_template_study |
| 72 | + FOREIGN KEY ( study_id ) REFERENCES qiita.study( study_id ); |
| 73 | +
|
| 74 | +ALTER TABLE qiita.study_prep_template |
| 75 | + ADD CONSTRAINT fk_study_prep_template_pt |
| 76 | + FOREIGN KEY ( prep_template_id ) |
| 77 | + REFERENCES qiita.prep_template( prep_template_id ); |
| 78 | +
|
| 79 | +-- Connect the existing prep templates in the system with their studies |
| 80 | +DO $do$ |
| 81 | +DECLARE |
| 82 | + vals RECORD; |
| 83 | +BEGIN |
| 84 | +FOR vals IN |
| 85 | + SELECT prep_template_id, study_id |
| 86 | + FROM qiita.prep_template |
| 87 | + JOIN qiita.study_raw_data USING (raw_data_id) |
| 88 | +LOOP |
| 89 | + INSERT INTO qiita.study_prep_template (study_id, prep_template_id) |
| 90 | + VALUES (vals.study_id, vals.prep_template_id); |
| 91 | +END LOOP; |
| 92 | +END $do$; |
| 93 | +
|
| 94 | +--- Drop the study_raw__data table as it's not longer used |
| 95 | +DROP TABLE qiita.study_raw_data; |
| 96 | +""" |
| 97 | +conn_handler.add_to_queue(queue, sql) |
| 98 | +conn_handler.execute_queue(queue) |
| 99 | + |
| 100 | +# After the changes in the database have been performed, move the files |
| 101 | +# to the uploads folder |
| 102 | +errors = [] |
| 103 | +for st_id, fps in move_files: |
| 104 | + try: |
| 105 | + move_filepaths_to_upload_folder(st_id, fps) |
| 106 | + except Exception, e: |
| 107 | + # An error here is unlikely. However, it's possible and there is no |
| 108 | + # clean way that we can unroll all the previous changes in the DB. |
| 109 | + errors.append((st_id, fps, str(e))) |
| 110 | + |
| 111 | +# Show the user any error that could have been generated during the files |
| 112 | +# movement |
| 113 | +if errors: |
| 114 | + print ("The following errors where generated when trying to move files " |
| 115 | + "to the upload folder") |
| 116 | + for st_id, fps, e in errors: |
| 117 | + print "Study: %d, Filepaths: %s, Error: %s" % (st_id, fps, e) |
0 commit comments