qiita-spots · antgonza · May 28, 2015 · May 22, 2015 · May 24, 2015 · May 28, 2015
diff --git a/qiita_ware/test/test_processing_pipeline.py b/qiita_ware/test/test_processing_pipeline.py
@@ -41,8 +41,73 @@
 class ProcessingPipelineTests(TestCase):
     def setUp(self):
         self.db_dir = get_db_files_base_dir()
-        self.files_to_remove = []
-        self.dirs_to_remove = []
+
+        # Create a SFF dataset: add prep template and a RawData
+        study = Study(1)
+        md_dict = {
+            'SKB8.640193': {'center_name': 'ANL',
+                            'primer': 'GTGCCAGCMGCCGCGGTAA',
+                            'barcode': 'GTCCGCAAGTTA',
+                            'run_prefix': "preprocess_test",
+                            'platform': 'ILLUMINA',
+                            'library_construction_protocol': 'AAAA',
+                            'experiment_design_description': 'BBBB'},
+            'SKD8.640184': {'center_name': 'ANL',
+                            'primer': 'GTGCCAGCMGCCGCGGTAA',
+                            'barcode': 'CGTAGAGCTCTC',
+                            'run_prefix': "preprocess_test",
+                            'platform': 'ILLUMINA',
+                            'library_construction_protocol': 'AAAA',
+                            'experiment_design_description': 'BBBB'},
+            'SKB7.640196': {'center_name': 'ANL',
+                            'primer': 'GTGCCAGCMGCCGCGGTAA',
+                            'barcode': 'CCTCTGAGAGCT',
+                            'run_prefix': "preprocess_test",
+                            'platform': 'ILLUMINA',
+                            'library_construction_protocol': 'AAAA',
+                            'experiment_design_description': 'BBBB'}
+        }
+        md = pd.DataFrame.from_dict(md_dict, orient='index')
+        self.sff_prep_template = PrepTemplate.create(md, study, "16S")
+
+        tmp_dir = mkdtemp()
+        self.path_builder = partial(join, tmp_dir)
+        fp1 = self.path_builder('preprocess_test1.sff')
+        with open(fp1, 'w') as f:
+            f.write('\n')
+        fp2 = self.path_builder('preprocess_test2.sff')
+        with open(fp2, 'w') as f:
+            f.write('\n')
+        self.raw_sff_id = convert_to_id('raw_sff', 'filepath_type')
+        fps = [(fp1, self.raw_sff_id), (fp2, self.raw_sff_id)]
+
+        # Magic number 1: is the filetype id
+        self.raw_data = RawData.create(1, [self.sff_prep_template], fps)
+
+        # Create a SFF dataset with multiple run prefix:
+        # add prep template and a RawData
+        md_dict['SKD8.640184']['run_prefix'] = "new"
+        md_rp = pd.DataFrame.from_dict(md_dict, orient='index')
+        self.sff_prep_template_rp = PrepTemplate.create(md_rp, study, "16S")
+
+        rp_fp1 = self.path_builder('preprocess_test1.sff')
+        with open(rp_fp1, 'w') as f:
+            f.write('\n')
+        rp_fp2 = self.path_builder('preprocess_test2.sff')
+        with open(rp_fp2, 'w') as f:
+            f.write('\n')
+        fps = [(rp_fp1, self.raw_sff_id), (rp_fp2, self.raw_sff_id)]
+
+        # Magic number 1: is the filetype id
+        self.raw_data_rp = RawData.create(1, [self.sff_prep_template_rp], fps)
+
+        # Make sure that we clean up all created files
+        self.files_to_remove = [fp1, fp2, rp_fp1, rp_fp2]
+        self.dirs_to_remove = [tmp_dir]
+
+        for pt in [self.sff_prep_template, self.sff_prep_template_rp]:
+            for _, fp in pt.get_filepaths():
+                self.files_to_remove.append(fp)
 
     def tearDown(self):
         for fp in self.files_to_remove:
@@ -156,8 +221,9 @@ def test_get_qiime_minimal_mapping_multiple(self):
                             'experiment_design_description': 'BBB'}
             }
         md_template = pd.DataFrame.from_dict(metadata_dict, orient='index')
-        prep_template = PrepTemplate.create(md_template, RawData(2), Study(1),
-                                            '16S')
+        prep_template = PrepTemplate.create(md_template, Study(1), '16S')
+        for _, fp in prep_template.get_filepaths():
+            self.files_to_remove.append(fp)
 
         out_dir = mkdtemp()
 
@@ -206,15 +272,13 @@ def test_get_preprocess_fastq_cmd(self):
         self.assertEqual(obs_cmd_2, exp_cmd_2)
 
     def test_get_preprocess_fasta_cmd_sff_no_run_prefix(self):
-        raw_data = RawData(3)
         params = Preprocessed454Params(1)
-        prep_template = PrepTemplate(1)
         obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd(
-            raw_data, prep_template, params)
+            self.raw_data, self.sff_prep_template, params)
 
         get_raw_path = partial(join, self.db_dir, 'raw_data')
-        seqs_fp = [get_raw_path('1_preprocess_test1.sff'),
-                   get_raw_path('1_preprocess_test2.sff')]
+        seqs_fp = [get_raw_path('%d_preprocess_test1.sff' % self.raw_data.id),
+                   get_raw_path('%d_preprocess_test2.sff' % self.raw_data.id)]
 
         exp_cmd_1 = ' '.join(["process_sff.py",
                               "-i %s" % seqs_fp[0],
@@ -224,11 +288,14 @@ def test_get_preprocess_fasta_cmd_sff_no_run_prefix(self):
                               "-o %s" % obs_output_dir])
 
         fasta_files = ','.join([
-            join(obs_output_dir, "1_preprocess_test1.fna"),
-            join(obs_output_dir, "1_preprocess_test2.fna")])
+            join(obs_output_dir, "%s_preprocess_test1.fna" % self.raw_data.id),
+            join(obs_output_dir, "%s_preprocess_test2.fna" % self.raw_data.id)]
+            )
         qual_files = ','.join([
-            join(obs_output_dir, "1_preprocess_test1.qual"),
-            join(obs_output_dir, "1_preprocess_test2.qual")])
+            join(obs_output_dir,
+                 "%s_preprocess_test1.qual" % self.raw_data.id),
+            join(obs_output_dir,
+                 "%s_preprocess_test2.qual" % self.raw_data.id)])
         exp_cmd_3a = ' '.join(["split_libraries.py",
                                "-f %s" % fasta_files])
 
@@ -257,20 +324,9 @@ def test_get_preprocess_fasta_cmd_sff_no_run_prefix(self):
         self.assertEqual(obs_cmds[3], exp_cmd_4)
 
     def test_get_preprocess_fasta_cmd_sff_run_prefix(self):
-        # Need to alter the run_prefix of one sample so we can test the
-        # multiple values
-        conn_handler = SQLConnectionHandler()
-        sql = ("UPDATE qiita.prep_1 SET run_prefix='test1' WHERE "
-               "sample_id = '1.SKM9.640192'")
-        conn_handler.execute(sql)
-
-        raw_data = RawData(3)
         params = Preprocessed454Params(1)
-        prep_template = PrepTemplate(1)
-        prep_template.generate_files()
-
         obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd(
-            raw_data, prep_template, params)
+            self.raw_data_rp, self.sff_prep_template_rp, params)
 
         obs_cmds = obs_cmd.split('; ')
         # assumming that test_get_preprocess_fasta_cmd_sff_no_run_prefix is
@@ -293,27 +349,17 @@ def test_get_preprocess_fasta_cmd_sff_run_prefix(self):
     def test_get_preprocess_fasta_cmd_sff_run_prefix_match(self):
         # Test that the run prefixes in the prep_template and the file names
         # actually match and raise an error if not
-        new_fp_id = get_count('qiita.filepath') + 1
-        conn_handler = SQLConnectionHandler()
-        sql = ("""
-            INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum,
-                    checksum_algorithm_id, data_directory_id)
-                VALUES ('1_new.sff', 17, 852952723, 1, 5);
-            INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id)
-                VALUES (3, %s);
-            UPDATE qiita.prep_1 SET run_prefix='preprocess_test';
-            UPDATE qiita.prep_1 SET run_prefix='new'
-                WHERE sample_id = '1.SKB8.640193';
-        """)
-        conn_handler.execute(sql, (new_fp_id,))
-
-        raw_data = RawData(3)
+        tmp_dir = mkdtemp()
+        fp = join(tmp_dir, 'new.sff')
+        with open(fp, 'w') as f:
+            f.write('\n')
+        self.files_to_remove.append(fp)
+        self.dirs_to_remove.append(tmp_dir)
+        self.raw_data_rp.add_filepaths([(fp, self.raw_sff_id)])
         params = Preprocessed454Params(1)
-        prep_template = PrepTemplate(1)
-        prep_template.generate_files()
 
         obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd(
-            raw_data, prep_template, params)
+            self.raw_data_rp, self.sff_prep_template_rp, params)
 
         obs_cmds = obs_cmd.split('; ')
         # assumming that test_get_preprocess_fasta_cmd_sff_no_run_prefix is
@@ -339,53 +385,34 @@ def test_get_preprocess_fasta_cmd_sff_run_prefix_match(self):
     def test_get_preprocess_fasta_cmd_sff_run_prefix_match_error_1(self):
         # Test that the run prefixes in the prep_template and the file names
         # actually match and raise an error if not
-        fp_count = get_count('qiita.filepath')
-        conn_handler = SQLConnectionHandler()
-        sql = ("""
-            INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum,
-                    checksum_algorithm_id, data_directory_id)
-                VALUES ('1_new.sff', 17, 852952723, 1, 5);
-            INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id)
-                VALUES (3, %s);
-            INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum,
-                    checksum_algorithm_id, data_directory_id)
-                VALUES ('1_error.sff', 17, 852952723, 1, 5);
-            INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id)
-                VALUES (3, %s);
-            UPDATE qiita.prep_1 SET run_prefix='preprocess_test';
-            UPDATE qiita.prep_1 SET run_prefix='new' WHERE
-                sample_id = '1.SKB8.640193';
-        """)
-        conn_handler.execute(
-            sql, (fp_count + 1, fp_count + 2))
-
-        raw_data = RawData(3)
+        fp = self.path_builder('new.sff')
+        with open(fp, 'w') as f:
+            f.write('\n')
+        self.files_to_remove.append(fp)
+        fp_error = self.path_builder('error.sff')
+        with open(fp_error, 'w') as f:
+            f.write('\n')
+        self.files_to_remove.append(fp_error)
+        self.raw_data_rp.add_filepaths(
+            [(fp, self.raw_sff_id), (fp_error, self.raw_sff_id)])
         params = Preprocessed454Params(1)
-        prep_template = PrepTemplate(1)
-        prep_template.generate_files()
-
         with self.assertRaises(ValueError):
-            _get_preprocess_fasta_cmd(raw_data, prep_template, params)
+            _get_preprocess_fasta_cmd(
+                self.raw_data_rp, self.sff_prep_template_rp, params)
 
     def test_get_preprocess_fasta_cmd_sff_run_prefix_match_error_2(self):
         # Should raise error
-        conn_handler = SQLConnectionHandler()
-        sql = ("""
-            UPDATE qiita.prep_1 SET run_prefix='test1';
-            UPDATE qiita.prep_1 SET run_prefix='test2' WHERE
-                sample_id = '1.SKB2.640194';
-            UPDATE qiita.prep_1 SET run_prefix='error' WHERE
-                sample_id = '1.SKB8.640193';
-        """)
-        conn_handler.execute(sql)
+        self.sff_prep_template_rp['1.SKB8.640193']['run_prefix'] = 'test1'
+        self.sff_prep_template_rp['1.SKD8.640184']['run_prefix'] = 'test2'
+        self.sff_prep_template_rp['1.SKB7.640196']['run_prefix'] = 'error'
+        self.sff_prep_template_rp.generate_files()
+        for _, fp in self.sff_prep_template_rp.get_filepaths():
+            self.files_to_remove.append(fp)
 
-        raw_data = RawData(3)
         params = Preprocessed454Params(1)
-        prep_template = PrepTemplate(1)
-        prep_template.generate_files()
-
         with self.assertRaises(ValueError):
-            _get_preprocess_fasta_cmd(raw_data, prep_template, params)
+            _get_preprocess_fasta_cmd(
+                self.raw_data_rp, self.sff_prep_template_rp, params)
 
     def test_insert_preprocessed_data(self):
         study = Study(1)