Skip to content

Commit

Permalink
Adds option to keep or split reads in discordant pairs
Browse files Browse the repository at this point in the history
  • Loading branch information
clausmith committed Jan 4, 2018
1 parent f1b8d7c commit e462083
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 18 deletions.
51 changes: 34 additions & 17 deletions onecodex/scripts/filter_fastq.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def with_progress_bar(length, ix, *args, **kwargs):
if label:
bar_kwargs['label'] = label
with click.progressbar(**bar_kwargs) as bar:
return ix(*args, **kwargs, cb=bar.update)
return ix(cb=bar.update, *args, **kwargs)


def filter_rows_by_taxid(results, tax_ids, cb=None):
Expand All @@ -36,10 +36,11 @@ def get_record_count(fp):
return counter


def get_filtered_filename(full_filename):
filename, ext = os.path.splitext(full_filename.rstrip('.gz')
def get_filtered_filename(filepath):
filename = os.path.split(filepath)[-1]
prefix, ext = os.path.splitext(filename.rstrip('.gz')
.rstrip('gzip'))
return '{}.filtered{}'.format(filename, ext), ext
return '{}.filtered{}'.format(prefix, ext), ext


def fetch_readlevel_results(api, classification_id):
Expand All @@ -65,12 +66,16 @@ def write_fastx_record(record, handler):
@click.command()
@click.argument('classification')
@click.argument('fastx', type=click.Path())
@click.option('-r', '--reverse', type=click.Path(), help='The reverse read '
'file, optionally')
@click.option('-t', '--tax-ids', required=True, help='A comma-delimited list '
'of tax ID\'s to retain')
@click.option('-r', '--reverse', type=click.Path(), help='The reverse (R2) '
'read file, optionally')
@click.option('--split-pairs/--keep-pairs', default=False, help='Keep only '
'the read pair member that matches the list of tax ID\'s')
@click.option('-o', '--out', default='.', type=click.Path(), help='Where '
'to put the filtered outputs')
@click.pass_context
def cli(ctx, classification, fastx, reverse, tax_ids):
def cli(ctx, classification, fastx, reverse, tax_ids, split_pairs, out):
tax_ids = tax_ids.split(',')
if not len(tax_ids):
raise OneCodexException('You must supply at least one tax ID')
Expand Down Expand Up @@ -112,8 +117,10 @@ def cli(ctx, classification, fastx, reverse, tax_ids):
)

filtered_filename = get_filtered_filename(fastx)[0]
filtered_filename = os.path.join(out, filtered_filename)
if reverse:
rev_filtered_filename, rev_ext = get_filtered_filename(reverse)
rev_filtered_filename = get_filtered_filename(reverse)[0]
rev_filtered_filename = os.path.join(out, rev_filtered_filename)

click.echo('Getting FASTX record count')
fastx_record_count = 0
Expand All @@ -138,15 +145,25 @@ def cli(ctx, classification, fastx, reverse, tax_ids):
open(reverse, 'rb') as reverse_file, \
open(filtered_filename, 'wb') as out_file, \
open(rev_filtered_filename, 'wb') as rev_out_file:
counter = 0
for fwd, rev in FASTXTranslator(fastx_file, reverse_file,
validate=False):
if counter in filtered_rows:
out_file.write(fwd)
counter += 1
if counter in filtered_rows:
rev_out_file.write(rev)
counter += 1
if split_pairs:
counter = 0
for fwd, rev in FASTXTranslator(fastx_file, reverse_file,
validate=False):
if counter in filtered_rows:
out_file.write(fwd)
if (counter + 1) in filtered_rows:
rev_out_file.write(rev)
counter += 2
else:
counter = 0
for fwd, rev in FASTXTranslator(fastx_file, reverse_file,
validate=False):
if counter in filtered_rows or \
(counter + 1) in filtered_rows:
out_file.write(fwd)
rev_out_file.write(rev)
counter += 2

else:
click.echo('Filtering {}'.format(fastx))
with open(fastx, 'rb') as fastx_file, \
Expand Down
1 change: 0 additions & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,6 @@ def test_paired_files(runner, upload_mocks):
# Check with only --forward, should fail
args = ['--api-key', '01234567890123456789012345678901', 'upload', '--forward', f]
result5 = runner.invoke(Cli, args)
print('You must specify both forward and reverse files' in result5.output)
assert 'You must specify both forward and reverse files' in result5.output
assert result5.exit_code != 0

Expand Down

0 comments on commit e462083

Please sign in to comment.