Skip to content

Commit 23b12be

Browse files
committed
Merge pull request #1206 from josenavas/1084-qiita-ware
1084 qiita ware
2 parents 250ae69 + 7fd6d51 commit 23b12be

File tree

1 file changed

+109
-82
lines changed

1 file changed

+109
-82
lines changed

qiita_ware/test/test_processing_pipeline.py

Lines changed: 109 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,73 @@
4141
class ProcessingPipelineTests(TestCase):
4242
def setUp(self):
4343
self.db_dir = get_db_files_base_dir()
44-
self.files_to_remove = []
45-
self.dirs_to_remove = []
44+
45+
# Create a SFF dataset: add prep template and a RawData
46+
study = Study(1)
47+
md_dict = {
48+
'SKB8.640193': {'center_name': 'ANL',
49+
'primer': 'GTGCCAGCMGCCGCGGTAA',
50+
'barcode': 'GTCCGCAAGTTA',
51+
'run_prefix': "preprocess_test",
52+
'platform': 'ILLUMINA',
53+
'library_construction_protocol': 'AAAA',
54+
'experiment_design_description': 'BBBB'},
55+
'SKD8.640184': {'center_name': 'ANL',
56+
'primer': 'GTGCCAGCMGCCGCGGTAA',
57+
'barcode': 'CGTAGAGCTCTC',
58+
'run_prefix': "preprocess_test",
59+
'platform': 'ILLUMINA',
60+
'library_construction_protocol': 'AAAA',
61+
'experiment_design_description': 'BBBB'},
62+
'SKB7.640196': {'center_name': 'ANL',
63+
'primer': 'GTGCCAGCMGCCGCGGTAA',
64+
'barcode': 'CCTCTGAGAGCT',
65+
'run_prefix': "preprocess_test",
66+
'platform': 'ILLUMINA',
67+
'library_construction_protocol': 'AAAA',
68+
'experiment_design_description': 'BBBB'}
69+
}
70+
md = pd.DataFrame.from_dict(md_dict, orient='index')
71+
self.sff_prep_template = PrepTemplate.create(md, study, "16S")
72+
73+
tmp_dir = mkdtemp()
74+
self.path_builder = partial(join, tmp_dir)
75+
fp1 = self.path_builder('preprocess_test1.sff')
76+
with open(fp1, 'w') as f:
77+
f.write('\n')
78+
fp2 = self.path_builder('preprocess_test2.sff')
79+
with open(fp2, 'w') as f:
80+
f.write('\n')
81+
self.raw_sff_id = convert_to_id('raw_sff', 'filepath_type')
82+
fps = [(fp1, self.raw_sff_id), (fp2, self.raw_sff_id)]
83+
84+
# Magic number 1: is the filetype id
85+
self.raw_data = RawData.create(1, [self.sff_prep_template], fps)
86+
87+
# Create a SFF dataset with multiple run prefix:
88+
# add prep template and a RawData
89+
md_dict['SKD8.640184']['run_prefix'] = "new"
90+
md_rp = pd.DataFrame.from_dict(md_dict, orient='index')
91+
self.sff_prep_template_rp = PrepTemplate.create(md_rp, study, "16S")
92+
93+
rp_fp1 = self.path_builder('preprocess_test1.sff')
94+
with open(rp_fp1, 'w') as f:
95+
f.write('\n')
96+
rp_fp2 = self.path_builder('preprocess_test2.sff')
97+
with open(rp_fp2, 'w') as f:
98+
f.write('\n')
99+
fps = [(rp_fp1, self.raw_sff_id), (rp_fp2, self.raw_sff_id)]
100+
101+
# Magic number 1: is the filetype id
102+
self.raw_data_rp = RawData.create(1, [self.sff_prep_template_rp], fps)
103+
104+
# Make sure that we clean up all created files
105+
self.files_to_remove = [fp1, fp2, rp_fp1, rp_fp2]
106+
self.dirs_to_remove = [tmp_dir]
107+
108+
for pt in [self.sff_prep_template, self.sff_prep_template_rp]:
109+
for _, fp in pt.get_filepaths():
110+
self.files_to_remove.append(fp)
46111

47112
def tearDown(self):
48113
for fp in self.files_to_remove:
@@ -156,8 +221,9 @@ def test_get_qiime_minimal_mapping_multiple(self):
156221
'experiment_design_description': 'BBB'}
157222
}
158223
md_template = pd.DataFrame.from_dict(metadata_dict, orient='index')
159-
prep_template = PrepTemplate.create(md_template, RawData(2), Study(1),
160-
'16S')
224+
prep_template = PrepTemplate.create(md_template, Study(1), '16S')
225+
for _, fp in prep_template.get_filepaths():
226+
self.files_to_remove.append(fp)
161227

162228
out_dir = mkdtemp()
163229

@@ -206,15 +272,13 @@ def test_get_preprocess_fastq_cmd(self):
206272
self.assertEqual(obs_cmd_2, exp_cmd_2)
207273

208274
def test_get_preprocess_fasta_cmd_sff_no_run_prefix(self):
209-
raw_data = RawData(3)
210275
params = Preprocessed454Params(1)
211-
prep_template = PrepTemplate(1)
212276
obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd(
213-
raw_data, prep_template, params)
277+
self.raw_data, self.sff_prep_template, params)
214278

215279
get_raw_path = partial(join, self.db_dir, 'raw_data')
216-
seqs_fp = [get_raw_path('1_preprocess_test1.sff'),
217-
get_raw_path('1_preprocess_test2.sff')]
280+
seqs_fp = [get_raw_path('%d_preprocess_test1.sff' % self.raw_data.id),
281+
get_raw_path('%d_preprocess_test2.sff' % self.raw_data.id)]
218282

219283
exp_cmd_1 = ' '.join(["process_sff.py",
220284
"-i %s" % seqs_fp[0],
@@ -224,11 +288,14 @@ def test_get_preprocess_fasta_cmd_sff_no_run_prefix(self):
224288
"-o %s" % obs_output_dir])
225289

226290
fasta_files = ','.join([
227-
join(obs_output_dir, "1_preprocess_test1.fna"),
228-
join(obs_output_dir, "1_preprocess_test2.fna")])
291+
join(obs_output_dir, "%s_preprocess_test1.fna" % self.raw_data.id),
292+
join(obs_output_dir, "%s_preprocess_test2.fna" % self.raw_data.id)]
293+
)
229294
qual_files = ','.join([
230-
join(obs_output_dir, "1_preprocess_test1.qual"),
231-
join(obs_output_dir, "1_preprocess_test2.qual")])
295+
join(obs_output_dir,
296+
"%s_preprocess_test1.qual" % self.raw_data.id),
297+
join(obs_output_dir,
298+
"%s_preprocess_test2.qual" % self.raw_data.id)])
232299
exp_cmd_3a = ' '.join(["split_libraries.py",
233300
"-f %s" % fasta_files])
234301

@@ -257,20 +324,9 @@ def test_get_preprocess_fasta_cmd_sff_no_run_prefix(self):
257324
self.assertEqual(obs_cmds[3], exp_cmd_4)
258325

259326
def test_get_preprocess_fasta_cmd_sff_run_prefix(self):
260-
# Need to alter the run_prefix of one sample so we can test the
261-
# multiple values
262-
conn_handler = SQLConnectionHandler()
263-
sql = ("UPDATE qiita.prep_1 SET run_prefix='test1' WHERE "
264-
"sample_id = '1.SKM9.640192'")
265-
conn_handler.execute(sql)
266-
267-
raw_data = RawData(3)
268327
params = Preprocessed454Params(1)
269-
prep_template = PrepTemplate(1)
270-
prep_template.generate_files()
271-
272328
obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd(
273-
raw_data, prep_template, params)
329+
self.raw_data_rp, self.sff_prep_template_rp, params)
274330

275331
obs_cmds = obs_cmd.split('; ')
276332
# assumming that test_get_preprocess_fasta_cmd_sff_no_run_prefix is
@@ -293,27 +349,17 @@ def test_get_preprocess_fasta_cmd_sff_run_prefix(self):
293349
def test_get_preprocess_fasta_cmd_sff_run_prefix_match(self):
294350
# Test that the run prefixes in the prep_template and the file names
295351
# actually match and raise an error if not
296-
new_fp_id = get_count('qiita.filepath') + 1
297-
conn_handler = SQLConnectionHandler()
298-
sql = ("""
299-
INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum,
300-
checksum_algorithm_id, data_directory_id)
301-
VALUES ('1_new.sff', 17, 852952723, 1, 5);
302-
INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id)
303-
VALUES (3, %s);
304-
UPDATE qiita.prep_1 SET run_prefix='preprocess_test';
305-
UPDATE qiita.prep_1 SET run_prefix='new'
306-
WHERE sample_id = '1.SKB8.640193';
307-
""")
308-
conn_handler.execute(sql, (new_fp_id,))
309-
310-
raw_data = RawData(3)
352+
tmp_dir = mkdtemp()
353+
fp = join(tmp_dir, 'new.sff')
354+
with open(fp, 'w') as f:
355+
f.write('\n')
356+
self.files_to_remove.append(fp)
357+
self.dirs_to_remove.append(tmp_dir)
358+
self.raw_data_rp.add_filepaths([(fp, self.raw_sff_id)])
311359
params = Preprocessed454Params(1)
312-
prep_template = PrepTemplate(1)
313-
prep_template.generate_files()
314360

315361
obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd(
316-
raw_data, prep_template, params)
362+
self.raw_data_rp, self.sff_prep_template_rp, params)
317363

318364
obs_cmds = obs_cmd.split('; ')
319365
# assumming that test_get_preprocess_fasta_cmd_sff_no_run_prefix is
@@ -339,53 +385,34 @@ def test_get_preprocess_fasta_cmd_sff_run_prefix_match(self):
339385
def test_get_preprocess_fasta_cmd_sff_run_prefix_match_error_1(self):
340386
# Test that the run prefixes in the prep_template and the file names
341387
# actually match and raise an error if not
342-
fp_count = get_count('qiita.filepath')
343-
conn_handler = SQLConnectionHandler()
344-
sql = ("""
345-
INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum,
346-
checksum_algorithm_id, data_directory_id)
347-
VALUES ('1_new.sff', 17, 852952723, 1, 5);
348-
INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id)
349-
VALUES (3, %s);
350-
INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum,
351-
checksum_algorithm_id, data_directory_id)
352-
VALUES ('1_error.sff', 17, 852952723, 1, 5);
353-
INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id)
354-
VALUES (3, %s);
355-
UPDATE qiita.prep_1 SET run_prefix='preprocess_test';
356-
UPDATE qiita.prep_1 SET run_prefix='new' WHERE
357-
sample_id = '1.SKB8.640193';
358-
""")
359-
conn_handler.execute(
360-
sql, (fp_count + 1, fp_count + 2))
361-
362-
raw_data = RawData(3)
388+
fp = self.path_builder('new.sff')
389+
with open(fp, 'w') as f:
390+
f.write('\n')
391+
self.files_to_remove.append(fp)
392+
fp_error = self.path_builder('error.sff')
393+
with open(fp_error, 'w') as f:
394+
f.write('\n')
395+
self.files_to_remove.append(fp_error)
396+
self.raw_data_rp.add_filepaths(
397+
[(fp, self.raw_sff_id), (fp_error, self.raw_sff_id)])
363398
params = Preprocessed454Params(1)
364-
prep_template = PrepTemplate(1)
365-
prep_template.generate_files()
366-
367399
with self.assertRaises(ValueError):
368-
_get_preprocess_fasta_cmd(raw_data, prep_template, params)
400+
_get_preprocess_fasta_cmd(
401+
self.raw_data_rp, self.sff_prep_template_rp, params)
369402

370403
def test_get_preprocess_fasta_cmd_sff_run_prefix_match_error_2(self):
371404
# Should raise error
372-
conn_handler = SQLConnectionHandler()
373-
sql = ("""
374-
UPDATE qiita.prep_1 SET run_prefix='test1';
375-
UPDATE qiita.prep_1 SET run_prefix='test2' WHERE
376-
sample_id = '1.SKB2.640194';
377-
UPDATE qiita.prep_1 SET run_prefix='error' WHERE
378-
sample_id = '1.SKB8.640193';
379-
""")
380-
conn_handler.execute(sql)
405+
self.sff_prep_template_rp['1.SKB8.640193']['run_prefix'] = 'test1'
406+
self.sff_prep_template_rp['1.SKD8.640184']['run_prefix'] = 'test2'
407+
self.sff_prep_template_rp['1.SKB7.640196']['run_prefix'] = 'error'
408+
self.sff_prep_template_rp.generate_files()
409+
for _, fp in self.sff_prep_template_rp.get_filepaths():
410+
self.files_to_remove.append(fp)
381411

382-
raw_data = RawData(3)
383412
params = Preprocessed454Params(1)
384-
prep_template = PrepTemplate(1)
385-
prep_template.generate_files()
386-
387413
with self.assertRaises(ValueError):
388-
_get_preprocess_fasta_cmd(raw_data, prep_template, params)
414+
_get_preprocess_fasta_cmd(
415+
self.raw_data_rp, self.sff_prep_template_rp, params)
389416

390417
def test_insert_preprocessed_data(self):
391418
study = Study(1)

0 commit comments

Comments
 (0)