Skip to content

Commit

Permalink
lets r1 and r2 be fastqc .zip output
Browse files Browse the repository at this point in the history
  • Loading branch information
brwnj committed May 17, 2019
1 parent dc8a722 commit 620c11f
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 3 deletions.
7 changes: 7 additions & 0 deletions fqc/fastqc.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ def out_archive(self, fastq):
filename, ext = os.path.splitext(os.path.basename(fastq))
if ext == ".gz":
filename, ext = os.path.splitext(filename)
elif ext == ".zip":
return os.path.basename(fastq)
if ext in [".fastq", ".fq"]:
return filename + "_fastqc.zip"
else:
Expand Down Expand Up @@ -232,6 +234,11 @@ def run(self, keep_tmp=False, **kwargs):
contaminants (Optional[str]): path to contaminants file
kmers (Optional[int]): length of kmer for which to look
"""
if self.r1 and self.r1.endswith(".zip"):
logging.info("Extracting existing FastQC data from %s" % os.path.dirname(self.r1))
self.extract_data(os.path.dirname(self.r1))
return

if not find_executable(FASTQC_EXE):
sys.exit("`%s` was not found in your PATH." % FASTQC_EXE)

Expand Down
10 changes: 7 additions & 3 deletions fqc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def fastqs_from_dir(input_dir):
Returns:
dict of sample id to file path or list of file paths
"""
exts = ['.fastq', '.fq']
exts = ['.fastq', '.fq', '.zip']
input_dir = os.path.abspath(input_dir)
if not os.path.isdir(input_dir):
logging.debug("Changing input dir from %s to %s" % (input_dir, os.path.dirname(input_dir)))
Expand All @@ -63,12 +63,16 @@ def fastqs_from_dir(input_dir):
# sample name for files without _r1 or _r2 in the name
split_pattern = re.compile('(\\%s)' % '|\\'.join(exts))
# split file name based on _r1 and _r2
pattern = re.compile(r'((?:_[rR][12][^_]*))$')
pattern = re.compile(r'((?s:.*))_[rR][12]')

for f in os.listdir(input_dir):
if not any(ext in f for ext in exts): continue
toks = pattern.split(f)
sample_id = toks[0] if len(toks) > 1 else split_pattern.split(f)[0]
# found _R1 or _R2
if len(toks) == 3:
sample_id = toks[1]
else:
sample_id = split_pattern.split(f)[0]
if sample_id in pairs:
if isinstance(pairs[sample_id], list):
logging.warning("%s has more than 2 paired fastqs in %s" % (sample_id, input_dir))
Expand Down

0 comments on commit 620c11f

Please sign in to comment.