4141class ProcessingPipelineTests (TestCase ):
4242 def setUp (self ):
4343 self .db_dir = get_db_files_base_dir ()
44- self .files_to_remove = []
45- self .dirs_to_remove = []
44+
45+ # Create a SFF dataset: add prep template and a RawData
46+ study = Study (1 )
47+ md_dict = {
48+ 'SKB8.640193' : {'center_name' : 'ANL' ,
49+ 'primer' : 'GTGCCAGCMGCCGCGGTAA' ,
50+ 'barcode' : 'GTCCGCAAGTTA' ,
51+ 'run_prefix' : "preprocess_test" ,
52+ 'platform' : 'ILLUMINA' ,
53+ 'library_construction_protocol' : 'AAAA' ,
54+ 'experiment_design_description' : 'BBBB' },
55+ 'SKD8.640184' : {'center_name' : 'ANL' ,
56+ 'primer' : 'GTGCCAGCMGCCGCGGTAA' ,
57+ 'barcode' : 'CGTAGAGCTCTC' ,
58+ 'run_prefix' : "preprocess_test" ,
59+ 'platform' : 'ILLUMINA' ,
60+ 'library_construction_protocol' : 'AAAA' ,
61+ 'experiment_design_description' : 'BBBB' },
62+ 'SKB7.640196' : {'center_name' : 'ANL' ,
63+ 'primer' : 'GTGCCAGCMGCCGCGGTAA' ,
64+ 'barcode' : 'CCTCTGAGAGCT' ,
65+ 'run_prefix' : "preprocess_test" ,
66+ 'platform' : 'ILLUMINA' ,
67+ 'library_construction_protocol' : 'AAAA' ,
68+ 'experiment_design_description' : 'BBBB' }
69+ }
70+ md = pd .DataFrame .from_dict (md_dict , orient = 'index' )
71+ self .sff_prep_template = PrepTemplate .create (md , study , "16S" )
72+
73+ tmp_dir = mkdtemp ()
74+ self .path_builder = partial (join , tmp_dir )
75+ fp1 = self .path_builder ('preprocess_test1.sff' )
76+ with open (fp1 , 'w' ) as f :
77+ f .write ('\n ' )
78+ fp2 = self .path_builder ('preprocess_test2.sff' )
79+ with open (fp2 , 'w' ) as f :
80+ f .write ('\n ' )
81+ self .raw_sff_id = convert_to_id ('raw_sff' , 'filepath_type' )
82+ fps = [(fp1 , self .raw_sff_id ), (fp2 , self .raw_sff_id )]
83+
84+ # Magic number 1: is the filetype id
85+ self .raw_data = RawData .create (1 , [self .sff_prep_template ], fps )
86+
87+ # Create a SFF dataset with multiple run prefix:
88+ # add prep template and a RawData
89+ md_dict ['SKD8.640184' ]['run_prefix' ] = "new"
90+ md_rp = pd .DataFrame .from_dict (md_dict , orient = 'index' )
91+ self .sff_prep_template_rp = PrepTemplate .create (md_rp , study , "16S" )
92+
93+ rp_fp1 = self .path_builder ('preprocess_test1.sff' )
94+ with open (rp_fp1 , 'w' ) as f :
95+ f .write ('\n ' )
96+ rp_fp2 = self .path_builder ('preprocess_test2.sff' )
97+ with open (rp_fp2 , 'w' ) as f :
98+ f .write ('\n ' )
99+ fps = [(rp_fp1 , self .raw_sff_id ), (rp_fp2 , self .raw_sff_id )]
100+
101+ # Magic number 1: is the filetype id
102+ self .raw_data_rp = RawData .create (1 , [self .sff_prep_template_rp ], fps )
103+
104+ # Make sure that we clean up all created files
105+ self .files_to_remove = [fp1 , fp2 , rp_fp1 , rp_fp2 ]
106+ self .dirs_to_remove = [tmp_dir ]
107+
108+ for pt in [self .sff_prep_template , self .sff_prep_template_rp ]:
109+ for _ , fp in pt .get_filepaths ():
110+ self .files_to_remove .append (fp )
46111
47112 def tearDown (self ):
48113 for fp in self .files_to_remove :
@@ -156,8 +221,9 @@ def test_get_qiime_minimal_mapping_multiple(self):
156221 'experiment_design_description' : 'BBB' }
157222 }
158223 md_template = pd .DataFrame .from_dict (metadata_dict , orient = 'index' )
159- prep_template = PrepTemplate .create (md_template , RawData (2 ), Study (1 ),
160- '16S' )
224+ prep_template = PrepTemplate .create (md_template , Study (1 ), '16S' )
225+ for _ , fp in prep_template .get_filepaths ():
226+ self .files_to_remove .append (fp )
161227
162228 out_dir = mkdtemp ()
163229
@@ -206,15 +272,13 @@ def test_get_preprocess_fastq_cmd(self):
206272 self .assertEqual (obs_cmd_2 , exp_cmd_2 )
207273
208274 def test_get_preprocess_fasta_cmd_sff_no_run_prefix (self ):
209- raw_data = RawData (3 )
210275 params = Preprocessed454Params (1 )
211- prep_template = PrepTemplate (1 )
212276 obs_cmd , obs_output_dir = _get_preprocess_fasta_cmd (
213- raw_data , prep_template , params )
277+ self . raw_data , self . sff_prep_template , params )
214278
215279 get_raw_path = partial (join , self .db_dir , 'raw_data' )
216- seqs_fp = [get_raw_path ('1_preprocess_test1 .sff' ),
217- get_raw_path ('1_preprocess_test2 .sff' )]
280+ seqs_fp = [get_raw_path ('%d_preprocess_test1 .sff' % self . raw_data . id ),
281+ get_raw_path ('%d_preprocess_test2 .sff' % self . raw_data . id )]
218282
219283 exp_cmd_1 = ' ' .join (["process_sff.py" ,
220284 "-i %s" % seqs_fp [0 ],
@@ -224,11 +288,14 @@ def test_get_preprocess_fasta_cmd_sff_no_run_prefix(self):
224288 "-o %s" % obs_output_dir ])
225289
226290 fasta_files = ',' .join ([
227- join (obs_output_dir , "1_preprocess_test1.fna" ),
228- join (obs_output_dir , "1_preprocess_test2.fna" )])
291+ join (obs_output_dir , "%s_preprocess_test1.fna" % self .raw_data .id ),
292+ join (obs_output_dir , "%s_preprocess_test2.fna" % self .raw_data .id )]
293+ )
229294 qual_files = ',' .join ([
230- join (obs_output_dir , "1_preprocess_test1.qual" ),
231- join (obs_output_dir , "1_preprocess_test2.qual" )])
295+ join (obs_output_dir ,
296+ "%s_preprocess_test1.qual" % self .raw_data .id ),
297+ join (obs_output_dir ,
298+ "%s_preprocess_test2.qual" % self .raw_data .id )])
232299 exp_cmd_3a = ' ' .join (["split_libraries.py" ,
233300 "-f %s" % fasta_files ])
234301
@@ -257,20 +324,9 @@ def test_get_preprocess_fasta_cmd_sff_no_run_prefix(self):
257324 self .assertEqual (obs_cmds [3 ], exp_cmd_4 )
258325
259326 def test_get_preprocess_fasta_cmd_sff_run_prefix (self ):
260- # Need to alter the run_prefix of one sample so we can test the
261- # multiple values
262- conn_handler = SQLConnectionHandler ()
263- sql = ("UPDATE qiita.prep_1 SET run_prefix='test1' WHERE "
264- "sample_id = '1.SKM9.640192'" )
265- conn_handler .execute (sql )
266-
267- raw_data = RawData (3 )
268327 params = Preprocessed454Params (1 )
269- prep_template = PrepTemplate (1 )
270- prep_template .generate_files ()
271-
272328 obs_cmd , obs_output_dir = _get_preprocess_fasta_cmd (
273- raw_data , prep_template , params )
329+ self . raw_data_rp , self . sff_prep_template_rp , params )
274330
275331 obs_cmds = obs_cmd .split ('; ' )
276332 # assumming that test_get_preprocess_fasta_cmd_sff_no_run_prefix is
@@ -293,27 +349,17 @@ def test_get_preprocess_fasta_cmd_sff_run_prefix(self):
293349 def test_get_preprocess_fasta_cmd_sff_run_prefix_match (self ):
294350 # Test that the run prefixes in the prep_template and the file names
295351 # actually match and raise an error if not
296- new_fp_id = get_count ('qiita.filepath' ) + 1
297- conn_handler = SQLConnectionHandler ()
298- sql = ("""
299- INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum,
300- checksum_algorithm_id, data_directory_id)
301- VALUES ('1_new.sff', 17, 852952723, 1, 5);
302- INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id)
303- VALUES (3, %s);
304- UPDATE qiita.prep_1 SET run_prefix='preprocess_test';
305- UPDATE qiita.prep_1 SET run_prefix='new'
306- WHERE sample_id = '1.SKB8.640193';
307- """ )
308- conn_handler .execute (sql , (new_fp_id ,))
309-
310- raw_data = RawData (3 )
352+ tmp_dir = mkdtemp ()
353+ fp = join (tmp_dir , 'new.sff' )
354+ with open (fp , 'w' ) as f :
355+ f .write ('\n ' )
356+ self .files_to_remove .append (fp )
357+ self .dirs_to_remove .append (tmp_dir )
358+ self .raw_data_rp .add_filepaths ([(fp , self .raw_sff_id )])
311359 params = Preprocessed454Params (1 )
312- prep_template = PrepTemplate (1 )
313- prep_template .generate_files ()
314360
315361 obs_cmd , obs_output_dir = _get_preprocess_fasta_cmd (
316- raw_data , prep_template , params )
362+ self . raw_data_rp , self . sff_prep_template_rp , params )
317363
318364 obs_cmds = obs_cmd .split ('; ' )
319365 # assumming that test_get_preprocess_fasta_cmd_sff_no_run_prefix is
@@ -339,53 +385,34 @@ def test_get_preprocess_fasta_cmd_sff_run_prefix_match(self):
339385 def test_get_preprocess_fasta_cmd_sff_run_prefix_match_error_1 (self ):
340386 # Test that the run prefixes in the prep_template and the file names
341387 # actually match and raise an error if not
342- fp_count = get_count ('qiita.filepath' )
343- conn_handler = SQLConnectionHandler ()
344- sql = ("""
345- INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum,
346- checksum_algorithm_id, data_directory_id)
347- VALUES ('1_new.sff', 17, 852952723, 1, 5);
348- INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id)
349- VALUES (3, %s);
350- INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum,
351- checksum_algorithm_id, data_directory_id)
352- VALUES ('1_error.sff', 17, 852952723, 1, 5);
353- INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id)
354- VALUES (3, %s);
355- UPDATE qiita.prep_1 SET run_prefix='preprocess_test';
356- UPDATE qiita.prep_1 SET run_prefix='new' WHERE
357- sample_id = '1.SKB8.640193';
358- """ )
359- conn_handler .execute (
360- sql , (fp_count + 1 , fp_count + 2 ))
361-
362- raw_data = RawData (3 )
388+ fp = self .path_builder ('new.sff' )
389+ with open (fp , 'w' ) as f :
390+ f .write ('\n ' )
391+ self .files_to_remove .append (fp )
392+ fp_error = self .path_builder ('error.sff' )
393+ with open (fp_error , 'w' ) as f :
394+ f .write ('\n ' )
395+ self .files_to_remove .append (fp_error )
396+ self .raw_data_rp .add_filepaths (
397+ [(fp , self .raw_sff_id ), (fp_error , self .raw_sff_id )])
363398 params = Preprocessed454Params (1 )
364- prep_template = PrepTemplate (1 )
365- prep_template .generate_files ()
366-
367399 with self .assertRaises (ValueError ):
368- _get_preprocess_fasta_cmd (raw_data , prep_template , params )
400+ _get_preprocess_fasta_cmd (
401+ self .raw_data_rp , self .sff_prep_template_rp , params )
369402
370403 def test_get_preprocess_fasta_cmd_sff_run_prefix_match_error_2 (self ):
371404 # Should raise error
372- conn_handler = SQLConnectionHandler ()
373- sql = ("""
374- UPDATE qiita.prep_1 SET run_prefix='test1';
375- UPDATE qiita.prep_1 SET run_prefix='test2' WHERE
376- sample_id = '1.SKB2.640194';
377- UPDATE qiita.prep_1 SET run_prefix='error' WHERE
378- sample_id = '1.SKB8.640193';
379- """ )
380- conn_handler .execute (sql )
405+ self .sff_prep_template_rp ['1.SKB8.640193' ]['run_prefix' ] = 'test1'
406+ self .sff_prep_template_rp ['1.SKD8.640184' ]['run_prefix' ] = 'test2'
407+ self .sff_prep_template_rp ['1.SKB7.640196' ]['run_prefix' ] = 'error'
408+ self .sff_prep_template_rp .generate_files ()
409+ for _ , fp in self .sff_prep_template_rp .get_filepaths ():
410+ self .files_to_remove .append (fp )
381411
382- raw_data = RawData (3 )
383412 params = Preprocessed454Params (1 )
384- prep_template = PrepTemplate (1 )
385- prep_template .generate_files ()
386-
387413 with self .assertRaises (ValueError ):
388- _get_preprocess_fasta_cmd (raw_data , prep_template , params )
414+ _get_preprocess_fasta_cmd (
415+ self .raw_data_rp , self .sff_prep_template_rp , params )
389416
390417 def test_insert_preprocessed_data (self ):
391418 study = Study (1 )
0 commit comments