Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 109 additions & 82 deletions qiita_ware/test/test_processing_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,73 @@
class ProcessingPipelineTests(TestCase):
def setUp(self):
self.db_dir = get_db_files_base_dir()
self.files_to_remove = []
self.dirs_to_remove = []

# Create a SFF dataset: add prep template and a RawData
study = Study(1)
md_dict = {
'SKB8.640193': {'center_name': 'ANL',
'primer': 'GTGCCAGCMGCCGCGGTAA',
'barcode': 'GTCCGCAAGTTA',
'run_prefix': "preprocess_test",
'platform': 'ILLUMINA',
'library_construction_protocol': 'AAAA',
'experiment_design_description': 'BBBB'},
'SKD8.640184': {'center_name': 'ANL',
'primer': 'GTGCCAGCMGCCGCGGTAA',
'barcode': 'CGTAGAGCTCTC',
'run_prefix': "preprocess_test",
'platform': 'ILLUMINA',
'library_construction_protocol': 'AAAA',
'experiment_design_description': 'BBBB'},
'SKB7.640196': {'center_name': 'ANL',
'primer': 'GTGCCAGCMGCCGCGGTAA',
'barcode': 'CCTCTGAGAGCT',
'run_prefix': "preprocess_test",
'platform': 'ILLUMINA',
'library_construction_protocol': 'AAAA',
'experiment_design_description': 'BBBB'}
}
md = pd.DataFrame.from_dict(md_dict, orient='index')
self.sff_prep_template = PrepTemplate.create(md, study, "16S")

tmp_dir = mkdtemp()
self.path_builder = partial(join, tmp_dir)
fp1 = self.path_builder('preprocess_test1.sff')
with open(fp1, 'w') as f:
f.write('\n')
fp2 = self.path_builder('preprocess_test2.sff')
with open(fp2, 'w') as f:
f.write('\n')
self.raw_sff_id = convert_to_id('raw_sff', 'filepath_type')
fps = [(fp1, self.raw_sff_id), (fp2, self.raw_sff_id)]

# Magic number 1: is the filetype id
self.raw_data = RawData.create(1, [self.sff_prep_template], fps)

# Create a SFF dataset with multiple run prefix:
# add prep template and a RawData
md_dict['SKD8.640184']['run_prefix'] = "new"
md_rp = pd.DataFrame.from_dict(md_dict, orient='index')
self.sff_prep_template_rp = PrepTemplate.create(md_rp, study, "16S")

rp_fp1 = self.path_builder('preprocess_test1.sff')
with open(rp_fp1, 'w') as f:
f.write('\n')
rp_fp2 = self.path_builder('preprocess_test2.sff')
with open(rp_fp2, 'w') as f:
f.write('\n')
fps = [(rp_fp1, self.raw_sff_id), (rp_fp2, self.raw_sff_id)]

# Magic number 1: is the filetype id
self.raw_data_rp = RawData.create(1, [self.sff_prep_template_rp], fps)

# Make sure that we clean up all created files
self.files_to_remove = [fp1, fp2, rp_fp1, rp_fp2]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are rp_fp1 and rp_fp2 missing from this list?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nix that, just noticed 110.

self.dirs_to_remove = [tmp_dir]

for pt in [self.sff_prep_template, self.sff_prep_template_rp]:
for _, fp in pt.get_filepaths():
self.files_to_remove.append(fp)

def tearDown(self):
for fp in self.files_to_remove:
Expand Down Expand Up @@ -156,8 +221,9 @@ def test_get_qiime_minimal_mapping_multiple(self):
'experiment_design_description': 'BBB'}
}
md_template = pd.DataFrame.from_dict(metadata_dict, orient='index')
prep_template = PrepTemplate.create(md_template, RawData(2), Study(1),
'16S')
prep_template = PrepTemplate.create(md_template, Study(1), '16S')
for _, fp in prep_template.get_filepaths():
self.files_to_remove.append(fp)

out_dir = mkdtemp()

Expand Down Expand Up @@ -206,15 +272,13 @@ def test_get_preprocess_fastq_cmd(self):
self.assertEqual(obs_cmd_2, exp_cmd_2)

def test_get_preprocess_fasta_cmd_sff_no_run_prefix(self):
raw_data = RawData(3)
params = Preprocessed454Params(1)
prep_template = PrepTemplate(1)
obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd(
raw_data, prep_template, params)
self.raw_data, self.sff_prep_template, params)

get_raw_path = partial(join, self.db_dir, 'raw_data')
seqs_fp = [get_raw_path('1_preprocess_test1.sff'),
get_raw_path('1_preprocess_test2.sff')]
seqs_fp = [get_raw_path('%d_preprocess_test1.sff' % self.raw_data.id),
get_raw_path('%d_preprocess_test2.sff' % self.raw_data.id)]

exp_cmd_1 = ' '.join(["process_sff.py",
"-i %s" % seqs_fp[0],
Expand All @@ -224,11 +288,14 @@ def test_get_preprocess_fasta_cmd_sff_no_run_prefix(self):
"-o %s" % obs_output_dir])

fasta_files = ','.join([
join(obs_output_dir, "1_preprocess_test1.fna"),
join(obs_output_dir, "1_preprocess_test2.fna")])
join(obs_output_dir, "%s_preprocess_test1.fna" % self.raw_data.id),
join(obs_output_dir, "%s_preprocess_test2.fna" % self.raw_data.id)]
)
qual_files = ','.join([
join(obs_output_dir, "1_preprocess_test1.qual"),
join(obs_output_dir, "1_preprocess_test2.qual")])
join(obs_output_dir,
"%s_preprocess_test1.qual" % self.raw_data.id),
join(obs_output_dir,
"%s_preprocess_test2.qual" % self.raw_data.id)])
exp_cmd_3a = ' '.join(["split_libraries.py",
"-f %s" % fasta_files])

Expand Down Expand Up @@ -257,20 +324,9 @@ def test_get_preprocess_fasta_cmd_sff_no_run_prefix(self):
self.assertEqual(obs_cmds[3], exp_cmd_4)

def test_get_preprocess_fasta_cmd_sff_run_prefix(self):
# Need to alter the run_prefix of one sample so we can test the
# multiple values
conn_handler = SQLConnectionHandler()
sql = ("UPDATE qiita.prep_1 SET run_prefix='test1' WHERE "
"sample_id = '1.SKM9.640192'")
conn_handler.execute(sql)

raw_data = RawData(3)
params = Preprocessed454Params(1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's 1?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the id of the first Peprocessed454Params, I just need one instance of them to be able to call the preprocessing pipeline

prep_template = PrepTemplate(1)
prep_template.generate_files()

obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd(
raw_data, prep_template, params)
self.raw_data_rp, self.sff_prep_template_rp, params)

obs_cmds = obs_cmd.split('; ')
# assumming that test_get_preprocess_fasta_cmd_sff_no_run_prefix is
Expand All @@ -293,27 +349,17 @@ def test_get_preprocess_fasta_cmd_sff_run_prefix(self):
def test_get_preprocess_fasta_cmd_sff_run_prefix_match(self):
# Test that the run prefixes in the prep_template and the file names
# actually match and raise an error if not
new_fp_id = get_count('qiita.filepath') + 1
conn_handler = SQLConnectionHandler()
sql = ("""
INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum,
checksum_algorithm_id, data_directory_id)
VALUES ('1_new.sff', 17, 852952723, 1, 5);
INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id)
VALUES (3, %s);
UPDATE qiita.prep_1 SET run_prefix='preprocess_test';
UPDATE qiita.prep_1 SET run_prefix='new'
WHERE sample_id = '1.SKB8.640193';
""")
conn_handler.execute(sql, (new_fp_id,))

raw_data = RawData(3)
tmp_dir = mkdtemp()
fp = join(tmp_dir, 'new.sff')
with open(fp, 'w') as f:
f.write('\n')
self.files_to_remove.append(fp)
self.dirs_to_remove.append(tmp_dir)
self.raw_data_rp.add_filepaths([(fp, self.raw_sff_id)])
params = Preprocessed454Params(1)
prep_template = PrepTemplate(1)
prep_template.generate_files()

obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd(
raw_data, prep_template, params)
self.raw_data_rp, self.sff_prep_template_rp, params)

obs_cmds = obs_cmd.split('; ')
# assumming that test_get_preprocess_fasta_cmd_sff_no_run_prefix is
Expand All @@ -339,53 +385,34 @@ def test_get_preprocess_fasta_cmd_sff_run_prefix_match(self):
def test_get_preprocess_fasta_cmd_sff_run_prefix_match_error_1(self):
# Test that the run prefixes in the prep_template and the file names
# actually match and raise an error if not
fp_count = get_count('qiita.filepath')
conn_handler = SQLConnectionHandler()
sql = ("""
INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum,
checksum_algorithm_id, data_directory_id)
VALUES ('1_new.sff', 17, 852952723, 1, 5);
INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id)
VALUES (3, %s);
INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum,
checksum_algorithm_id, data_directory_id)
VALUES ('1_error.sff', 17, 852952723, 1, 5);
INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id)
VALUES (3, %s);
UPDATE qiita.prep_1 SET run_prefix='preprocess_test';
UPDATE qiita.prep_1 SET run_prefix='new' WHERE
sample_id = '1.SKB8.640193';
""")
conn_handler.execute(
sql, (fp_count + 1, fp_count + 2))

raw_data = RawData(3)
fp = self.path_builder('new.sff')
with open(fp, 'w') as f:
f.write('\n')
self.files_to_remove.append(fp)
fp_error = self.path_builder('error.sff')
with open(fp_error, 'w') as f:
f.write('\n')
self.files_to_remove.append(fp_error)
self.raw_data_rp.add_filepaths(
[(fp, self.raw_sff_id), (fp_error, self.raw_sff_id)])
params = Preprocessed454Params(1)
prep_template = PrepTemplate(1)
prep_template.generate_files()

with self.assertRaises(ValueError):
_get_preprocess_fasta_cmd(raw_data, prep_template, params)
_get_preprocess_fasta_cmd(
self.raw_data_rp, self.sff_prep_template_rp, params)

def test_get_preprocess_fasta_cmd_sff_run_prefix_match_error_2(self):
# Should raise error
conn_handler = SQLConnectionHandler()
sql = ("""
UPDATE qiita.prep_1 SET run_prefix='test1';
UPDATE qiita.prep_1 SET run_prefix='test2' WHERE
sample_id = '1.SKB2.640194';
UPDATE qiita.prep_1 SET run_prefix='error' WHERE
sample_id = '1.SKB8.640193';
""")
conn_handler.execute(sql)
self.sff_prep_template_rp['1.SKB8.640193']['run_prefix'] = 'test1'
self.sff_prep_template_rp['1.SKD8.640184']['run_prefix'] = 'test2'
self.sff_prep_template_rp['1.SKB7.640196']['run_prefix'] = 'error'
self.sff_prep_template_rp.generate_files()
for _, fp in self.sff_prep_template_rp.get_filepaths():
self.files_to_remove.append(fp)

raw_data = RawData(3)
params = Preprocessed454Params(1)
prep_template = PrepTemplate(1)
prep_template.generate_files()

with self.assertRaises(ValueError):
_get_preprocess_fasta_cmd(raw_data, prep_template, params)
_get_preprocess_fasta_cmd(
self.raw_data_rp, self.sff_prep_template_rp, params)

def test_insert_preprocessed_data(self):
study = Study(1)
Expand Down