From d0eeb40c1d34b76ae9c079454c419db5a491aef7 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Tue, 14 Feb 2023 15:33:36 -0700 Subject: [PATCH] rm counts_match restrictions --- .gitignore | 3 +++ qtp_sequencing/tests/test_validate.py | 29 --------------------------- qtp_sequencing/validate.py | 18 +---------------- 3 files changed, 4 insertions(+), 46 deletions(-) diff --git a/.gitignore b/.gitignore index 4210b63..2d60476 100644 --- a/.gitignore +++ b/.gitignore @@ -90,3 +90,6 @@ ENV/ # redis dump.rdb + +# tests +qtp_sequencing/tests/test_data/ diff --git a/qtp_sequencing/tests/test_validate.py b/qtp_sequencing/tests/test_validate.py index d0b3708..9a8d90c 100644 --- a/qtp_sequencing/tests/test_validate.py +++ b/qtp_sequencing/tests/test_validate.py @@ -417,35 +417,6 @@ def test_validate_per_sample_FASTQ_error(self): "If preprocessed_fastq is provided, raw_reverse_seqs " "should not be provided") - # Count mismatch - files = {'raw_forward_seqs': ['/path/to/file1.fastq'], - 'raw_reverse_seqs': ['/path/to/file1.fastq', - '/path/to/file1.fastq']} - job_id, _ = self._create_template_and_job( - prep_info, files, "per_sample_FASTQ") - obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ( - self.qclient, job_id, prep_info, files) - self.assertFalse(obs_success) - self.assertIsNone(obs_ainfo) - self.assertEqual(obs_error, - "The number of provided files doesn't match the " - "number of samples (3): 1 raw_forward_seqs, " - "2 raw_reverse_seqs (optional, 0 is ok)") - - # preprocessed_fastq count mismatch - files = {'preprocessed_fastq': ['/path/to/file1_R1.fastq', - '/path/to/file1_R2.fastq']} - job_id, _ = self._create_template_and_job( - prep_info, files, "per_sample_FASTQ") - obs_success, obs_ainfo, obs_error = _validate_per_sample_FASTQ( - self.qclient, job_id, prep_info, files) - self.assertFalse(obs_success) - self.assertIsNone(obs_ainfo) - self.assertEqual(obs_error, - "The number of provided files doesn't match the " - "number of samples (3): 2 raw_forward_seqs, " - "0 raw_reverse_seqs (optional, 0 is ok)") - # Run prefix mismatch files = {'raw_forward_seqs': ['/path/to/prefix1_fwd.fastq', '/path/to/prefix2_fwd.fastq', diff --git a/qtp_sequencing/validate.py b/qtp_sequencing/validate.py index 84df545..bd6684b 100644 --- a/qtp_sequencing/validate.py +++ b/qtp_sequencing/validate.py @@ -248,7 +248,6 @@ def _validate_per_sample_FASTQ(qclient, job_id, prep_info, files, test=False): job_id, "Step 2: Validating 'per_sample_FASTQ' files") samples = list(prep_info.keys()) - samples_count = len(samples) # Check if there is any filepath type that is not supported unsupported_fp_types = set(files) - {'raw_forward_seqs', @@ -268,19 +267,12 @@ def _validate_per_sample_FASTQ(qclient, job_id, prep_info, files, test=False): "should not be provided") return False, None, error_msg read_files = files['raw_forward_seqs'] - read_files_count = len(read_files) - counts_match = read_files_count == samples_count elif 'preprocessed_fastq' in files: if 'raw_reverse_seqs' in files: error_msg = ("If preprocessed_fastq is provided, raw_reverse_seqs " "should not be provided") return False, None, error_msg read_files = files['preprocessed_fastq'] - read_files_count = len(read_files) - # In the preprocessed_fastq case, we either have 1 file per sample - # or 4 files per sample - counts_match = ((read_files_count == samples_count) or - (read_files_count == 4 * samples_count)) else: error_msg = ("Missing required filepath type: raw_forward_seqs or " "preprocessed_fastq") @@ -289,17 +281,9 @@ def _validate_per_sample_FASTQ(qclient, job_id, prep_info, files, test=False): # Make sure that we hve the same number of files than samples if 'raw_reverse_seqs' in files: rev_count = len(files['raw_reverse_seqs']) - counts_match = counts_match and (rev_count == samples_count) else: rev_count = 0 - if not counts_match: - error_msg = ("The number of provided files doesn't match the " - "number of samples (%d): %d raw_forward_seqs, " - "%d raw_reverse_seqs (optional, 0 is ok)" - % (samples_count, read_files_count, rev_count)) - return False, None, error_msg - def _check_files(run_prefixes, read_files, rev_count, files): # Check that the provided files match the run prefixes fwd_fail = [basename(fp) for fp in read_files @@ -320,7 +304,7 @@ def _check_files(run_prefixes, read_files, rev_count, files): run_prefix_present = 'run_prefix' in prep_info[samples[0]] if (fwd_fail or rev_fail) and run_prefix_present: run_prefixes = [v['run_prefix'] for k, v in prep_info.items()] - if samples_count != len(set(run_prefixes)): + if len(samples) != len(set(run_prefixes)): repeated = ["%s (%d)" % (p, run_prefixes.count(p)) for p in set(run_prefixes) if run_prefixes.count(p) > 1]