Skip to content

Commit 250ae69

Browse files
committed
Merge pull request #1205 from josenavas/1084-qiita-db
1084 qiita db
2 parents 7b227a3 + 7c9e1d2 commit 250ae69

File tree

13 files changed

+120
-128
lines changed

13 files changed

+120
-128
lines changed

qiita_db/commands.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -153,27 +153,23 @@ def load_sample_template_from_cmd(sample_temp_path, study_id):
153153
return SampleTemplate.create(sample_temp, Study(study_id))
154154

155155

156-
def load_prep_template_from_cmd(prep_temp_path, raw_data_id, study_id,
157-
data_type):
156+
def load_prep_template_from_cmd(prep_temp_path, study_id, data_type):
158157
r"""Adds a prep template to the database
159158
160159
Parameters
161160
----------
162161
prep_temp_path : str
163162
Path to the prep template file
164-
raw_data_id : int
165-
The raw data id to which the prep template belongs
166163
study_id : int
167164
The study id to which the prep template belongs
168165
data_type : str
169166
The data type of the prep template
170167
"""
171168
prep_temp = load_template_to_dataframe(prep_temp_path)
172-
return PrepTemplate.create(prep_temp, RawData(raw_data_id),
173-
Study(study_id), data_type)
169+
return PrepTemplate.create(prep_temp, Study(study_id), data_type)
174170

175171

176-
def load_raw_data_cmd(filepaths, filepath_types, filetype, study_ids):
172+
def load_raw_data_cmd(filepaths, filepath_types, filetype, prep_template_ids):
177173
"""Add new raw data by populating the relevant tables
178174
179175
Parameters
@@ -184,8 +180,8 @@ def load_raw_data_cmd(filepaths, filepath_types, filetype, study_ids):
184180
Describes the contents of the files.
185181
filetype : str
186182
The type of file being loaded
187-
study_ids : iterable of int
188-
The IDs of the studies with which to associate this raw data
183+
prep_template_ids : iterable of int
184+
The IDs of the prep templates with which to associate this raw data
189185
190186
Returns
191187
-------
@@ -202,9 +198,9 @@ def load_raw_data_cmd(filepaths, filepath_types, filetype, study_ids):
202198
filepath_types_dict = get_filepath_types()
203199
filepath_types = [filepath_types_dict[x] for x in filepath_types]
204200

205-
studies = [Study(x) for x in study_ids]
201+
prep_templates = [PrepTemplate(x) for x in prep_template_ids]
206202

207-
return RawData.create(filetype_id, studies,
203+
return RawData.create(filetype_id, prep_templates,
208204
filepaths=list(zip(filepaths, filepath_types)))
209205

210206

qiita_db/data.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,8 @@
8787
from .sql_connection import SQLConnectionHandler
8888
from .exceptions import QiitaDBError, QiitaDBUnknownIDError, QiitaDBStatusError
8989
from .util import (exists_dynamic_table, insert_filepaths, convert_to_id,
90-
convert_from_id, purge_filepaths, get_filepath_id,
91-
get_mountpoint, move_filepaths_to_upload_folder,
92-
infer_status)
90+
convert_from_id, get_filepath_id, get_mountpoint,
91+
move_filepaths_to_upload_folder, infer_status)
9392

9493

9594
class BaseData(QiitaObject):

qiita_db/metadata_template/test/test_prep_template.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from unittest import TestCase, main
1111
from tempfile import mkstemp
1212
from os import close, remove
13-
from os.path import join, basename
13+
from os.path import join
1414
from collections import Iterable
1515

1616
import numpy.testing as npt
@@ -29,8 +29,7 @@
2929
from qiita_db.sql_connection import SQLConnectionHandler
3030
from qiita_db.study import Study
3131
from qiita_db.data import RawData, ProcessedData
32-
from qiita_db.util import (exists_table, get_db_files_base_dir, get_mountpoint,
33-
get_count)
32+
from qiita_db.util import exists_table, get_mountpoint, get_count
3433
from qiita_db.metadata_template.prep_template import PrepTemplate, PrepSample
3534
from qiita_db.metadata_template.sample_template import SampleTemplate, Sample
3635
from qiita_db.metadata_template import (PREP_TEMPLATE_COLUMNS,

qiita_db/study.py

Lines changed: 23 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -642,10 +642,11 @@ def data_types(self):
642642
list of str
643643
"""
644644
conn_handler = SQLConnectionHandler()
645-
sql = ("SELECT DISTINCT DT.data_type FROM qiita.study_raw_data SRD "
646-
"JOIN qiita.prep_template PT ON SRD.raw_data_id = "
647-
"PT.raw_data_id JOIN qiita.data_type DT ON PT.data_type_id = "
648-
"DT.data_type_id WHERE SRD.study_id = %s")
645+
sql = """SELECT DISTINCT data_type
646+
FROM qiita.study_prep_template
647+
JOIN qiita.prep_template USING (prep_template_id)
648+
JOIN qiita.data_type USING (data_type_id)
649+
WHERE study_id = %s"""
649650
return [x[0] for x in conn_handler.execute_fetchall(sql, (self._id,))]
650651

651652
@property
@@ -759,36 +760,30 @@ def raw_data(self, data_type=None):
759760

760761
return [x[0] for x in conn_handler.execute_fetchall(sql, (self._id,))]
761762

762-
def add_raw_data(self, raw_data):
763-
""" Adds raw_data to the current study
763+
def prep_templates(self, data_type=None):
764+
"""Return list of prep template ids
764765
765766
Parameters
766767
----------
767-
raw_data : list of RawData
768-
The RawData objects to be added to the study
768+
data_type : str, optional
769+
If given, retrieve only prep templates for given datatype.
770+
Default None.
769771
770-
Raises
771-
------
772-
QiitaDBError
773-
If the raw_data is already linked to the current study
772+
Returns
773+
-------
774+
list of PrepTemplate ids
774775
"""
775-
conn_handler = SQLConnectionHandler()
776-
self._lock_non_sandbox(conn_handler)
777-
queue = "%d_add_raw_data" % self.id
778-
sql = ("SELECT EXISTS(SELECT * FROM qiita.study_raw_data WHERE "
779-
"study_id=%s AND raw_data_id=%s)")
780-
conn_handler.create_queue(queue)
781-
sql_args = [(self.id, rd.id) for rd in raw_data]
782-
conn_handler.add_to_queue(queue, sql, sql_args, many=True)
783-
linked = conn_handler.execute_queue(queue)
784-
785-
if any(linked):
786-
raise QiitaDBError("Some of the passed raw datas have been already"
787-
" linked to the study %s" % self.id)
776+
spec_data = ""
777+
if data_type:
778+
spec_data = " AND data_type_id = %s" % convert_to_id(data_type,
779+
"data_type")
788780

789-
conn_handler.executemany(
790-
"INSERT INTO qiita.study_raw_data (study_id, raw_data_id) "
791-
"VALUES (%s, %s)", sql_args)
781+
conn_handler = SQLConnectionHandler()
782+
sql = """SELECT prep_template_id
783+
FROM qiita.study_prep_template
784+
JOIN qiita.prep_template USING (prep_template_id)
785+
WHERE study_id = %s{0}""".format(spec_data)
786+
return [x[0] for x in conn_handler.execute_fetchall(sql, (self._id,))]
792787

793788
def preprocessed_data(self, data_type=None):
794789
""" Returns list of data ids for preprocessed data info

qiita_db/test/test_analysis.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ def test_retrieve_biom_tables(self):
276276
self.assertEqual(self.analysis.biom_tables, exp)
277277

278278
def test_all_associated_filepaths(self):
279-
exp = {12, 13, 14, 15}
279+
exp = {10, 11, 12, 13}
280280
self.assertEqual(self.analysis.all_associated_filepath_ids, exp)
281281

282282
def test_retrieve_biom_tables_none(self):
@@ -427,7 +427,7 @@ def test_build_mapping_file(self):
427427
obs = self.conn_handler.execute_fetchall(
428428
sql, ("%d_analysis_mapping.txt" % self.analysis.id,))
429429

430-
exp = [[15, '1_analysis_mapping.txt', 9, '852952723', 1, 1],
430+
exp = [[13, '1_analysis_mapping.txt', 9, '852952723', 1, 1],
431431
[new_id, '1_analysis_mapping.txt', 9, '1606265094', 1, 1]]
432432
self.assertEqual(obs, exp)
433433

qiita_db/test/test_base.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from qiita_db.base import QiitaObject, QiitaStatusObject
1414
from qiita_db.exceptions import QiitaDBUnknownIDError
1515
from qiita_db.data import RawData
16-
from qiita_db.study import Study
16+
from qiita_db.study import Study, StudyPerson
1717
from qiita_db.analysis import Analysis
1818

1919

@@ -63,8 +63,9 @@ def test_equal(self):
6363

6464
def test_not_equal(self):
6565
"""Not equals works with object of the same type"""
66-
new = RawData(2)
67-
self.assertNotEqual(self.tester, new)
66+
sp1 = StudyPerson(1)
67+
sp2 = StudyPerson(2)
68+
self.assertNotEqual(sp1, sp2)
6869

6970
def test_not_equal_type(self):
7071
"""Not equals works with object of different type"""

qiita_db/test/test_commands.py

Lines changed: 18 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
from future import standard_library
1616
from functools import partial
1717

18+
import pandas as pd
19+
1820
from qiita_db.commands import (load_study_from_cmd, load_raw_data_cmd,
1921
load_sample_template_from_cmd,
2022
load_prep_template_from_cmd,
@@ -25,9 +27,10 @@
2527
from qiita_db.environment_manager import patch
2628
from qiita_db.study import Study, StudyPerson
2729
from qiita_db.user import User
28-
from qiita_db.data import RawData, PreprocessedData
30+
from qiita_db.data import PreprocessedData
2931
from qiita_db.util import (get_count, check_count, get_db_files_base_dir,
3032
get_mountpoint)
33+
from qiita_db.metadata_template import PrepTemplate
3134
from qiita_core.util import qiita_test_checker
3235
from qiita_ware.processing_pipeline import generate_demux_file
3336

@@ -154,36 +157,12 @@ def test_load_sample_template_from_cmd(self):
154157
@qiita_test_checker()
155158
class TestLoadPrepTemplateFromCmd(TestCase):
156159
def setUp(self):
157-
# Create a sample template file
158-
fd, seqs_fp = mkstemp(suffix='_seqs.fastq')
159-
close(fd)
160-
fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq')
161-
close(fd)
162-
163-
with open(seqs_fp, "w") as f:
164-
f.write("\n")
165-
with open(barcodes_fp, "w") as f:
166-
f.write("\n")
167-
168160
self.pt_contents = PREP_TEMPLATE
169161

170-
self.raw_data = RawData.create(
171-
2, [Study(1)], filepaths=[(seqs_fp, 1), (barcodes_fp, 2)])
172-
173-
join_f = partial(join, join(get_db_files_base_dir(), 'raw_data'))
174-
self.files_to_remove = [
175-
join_f("%s_%s" % (self.raw_data.id, basename(seqs_fp))),
176-
join_f("%s_%s" % (self.raw_data.id, basename(barcodes_fp)))]
177-
178-
def tearDown(self):
179-
for fp in self.files_to_remove:
180-
if exists(fp):
181-
remove(fp)
182-
183162
def test_load_prep_template_from_cmd(self):
184163
"""Correctly adds a prep template to the DB"""
185164
fh = StringIO(self.pt_contents)
186-
st = load_prep_template_from_cmd(fh, self.raw_data.id, 1, '18S')
165+
st = load_prep_template_from_cmd(fh, 1, '18S')
187166
self.assertEqual(st.id, 2)
188167

189168

@@ -222,14 +201,24 @@ def test_load_data_from_cmd(self):
222201
'raw_barcodes']
223202

224203
filetype = 'FASTQ'
225-
study_ids = [1]
204+
metadata_dict = {
205+
'SKB8.640193': {'center_name': 'ANL',
206+
'primer': 'GTGCCAGCMGCCGCGGTAA',
207+
'barcode': 'GTCCGCAAGTTA',
208+
'run_prefix': "s_G1_L001_sequences",
209+
'platform': 'ILLUMINA',
210+
'library_construction_protocol': 'AAAA',
211+
'experiment_design_description': 'BBBB'}}
212+
metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')
213+
pt1 = PrepTemplate.create(metadata, Study(1), "16S")
214+
prep_templates = [pt1.id]
226215

227216
initial_raw_count = get_count('qiita.raw_data')
228217
initial_fp_count = get_count('qiita.filepath')
229218
initial_raw_fp_count = get_count('qiita.raw_filepath')
230219

231220
new = load_raw_data_cmd(filepaths, filepath_types, filetype,
232-
study_ids)
221+
prep_templates)
233222
raw_data_id = new.id
234223
self.files_to_remove.append(
235224
join(self.db_test_raw_dir,
@@ -246,14 +235,12 @@ def test_load_data_from_cmd(self):
246235
initial_fp_count + 3))
247236
self.assertTrue(check_count('qiita.raw_filepath',
248237
initial_raw_fp_count + 3))
249-
self.assertTrue(check_count('qiita.study_raw_data',
250-
initial_raw_count + 1))
251238

252239
# Ensure that the ValueError is raised when a filepath_type is not
253240
# provided for each and every filepath
254241
with self.assertRaises(ValueError):
255242
load_raw_data_cmd(filepaths, filepath_types[:-1], filetype,
256-
study_ids)
243+
prep_templates)
257244

258245

259246
@qiita_test_checker()

qiita_db/test/test_job.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def test_delete_files(self):
123123
Job(1)
124124

125125
obs = self.conn_handler.execute_fetchall(
126-
"SELECT * FROM qiita.filepath WHERE filepath_id = 12")
126+
"SELECT * FROM qiita.filepath WHERE filepath_id = 10")
127127
self.assertEqual(obs, [])
128128

129129
obs = self.conn_handler.execute_fetchall(
@@ -149,7 +149,7 @@ def test_delete_folders(self):
149149
Job(2)
150150

151151
obs = self.conn_handler.execute_fetchall(
152-
"SELECT * FROM qiita.filepath WHERE filepath_id = 13")
152+
"SELECT * FROM qiita.filepath WHERE filepath_id = 11")
153153
self.assertEqual(obs, [])
154154

155155
obs = self.conn_handler.execute_fetchall(
@@ -300,7 +300,7 @@ def test_add_results(self):
300300
obs = self.conn_handler.execute_fetchall(
301301
"SELECT * FROM qiita.job_results_filepath WHERE job_id = 1")
302302

303-
self.assertEqual(obs, [[1, 12], [1, fp_count + 1]])
303+
self.assertEqual(obs, [[1, 10], [1, fp_count + 1]])
304304

305305
def test_add_results_dir(self):
306306
fp_count = get_count('qiita.filepath')
@@ -313,7 +313,7 @@ def test_add_results_dir(self):
313313
# make sure files attached to job properly
314314
obs = self.conn_handler.execute_fetchall(
315315
"SELECT * FROM qiita.job_results_filepath WHERE job_id = 1")
316-
self.assertEqual(obs, [[1, 12], [1, fp_count + 1]])
316+
self.assertEqual(obs, [[1, 10], [1, fp_count + 1]])
317317

318318
def test_add_results_completed(self):
319319
self.job.status = "completed"

qiita_db/test/test_meta_util.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,12 @@ def test_get_accessible_filepath_ids(self):
3636
# shared has access to all study files and analysis files
3737

3838
obs = get_accessible_filepath_ids(User('shared@foo.bar'))
39-
self.assertEqual(obs, set([1, 2, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17,
40-
18, 19, 20]))
39+
self.assertEqual(obs, {1, 2, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16})
4140

4241
# Now shared should not have access to the study files
4342
self._unshare_studies()
4443
obs = get_accessible_filepath_ids(User('shared@foo.bar'))
45-
self.assertEqual(obs, set([12, 13, 14, 15]))
44+
self.assertEqual(obs, {10, 11, 12, 13})
4645

4746
# Now shared should not have access to any files
4847
self._unshare_analyses()
@@ -52,10 +51,10 @@ def test_get_accessible_filepath_ids(self):
5251
# Now shared has access to public study files
5352
self._set_processed_data_public()
5453
obs = get_accessible_filepath_ids(User('shared@foo.bar'))
55-
self.assertEqual(obs, set([1, 2, 5, 6, 7, 11, 16, 19, 20]))
54+
self.assertEqual(obs, {1, 2, 3, 4, 5, 9, 14, 15, 16})
5655

5756
# Test that it doesn't break: if the SampleTemplate hasn't been added
58-
exp = set([1, 2, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20])
57+
exp = {1, 2, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16}
5958
obs = get_accessible_filepath_ids(User('test@foo.bar'))
6059
self.assertEqual(obs, exp)
6160

qiita_db/test/test_setup.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,19 +39,19 @@ def test_processed_data_status(self):
3939
self.assertEqual(get_count("qiita.processed_data_status"), 4)
4040

4141
def test_filepath(self):
42-
self.assertEqual(get_count("qiita.filepath"), 20)
42+
self.assertEqual(get_count("qiita.filepath"), 16)
4343

4444
def test_filepath_type(self):
4545
self.assertEqual(get_count("qiita.filepath_type"), 19)
4646

4747
def test_raw_data(self):
48-
self.assertEqual(get_count("qiita.raw_data"), 4)
48+
self.assertEqual(get_count("qiita.raw_data"), 1)
4949

5050
def test_raw_filepath(self):
51-
self.assertEqual(get_count("qiita.raw_filepath"), 4)
51+
self.assertEqual(get_count("qiita.raw_filepath"), 2)
5252

53-
def test_study_raw_data(self):
54-
self.assertEqual(get_count("qiita.study_raw_data"), 4)
53+
def test_study_prep_template(self):
54+
self.assertEqual(get_count("qiita.study_prep_template"), 1)
5555

5656
def test_required_sample_info(self):
5757
self.assertEqual(get_count("qiita.study_sample"), 27)

0 commit comments

Comments
 (0)