Skip to content

Commit

Permalink
Merge pull request #178 from JureZmrzlikar/fix_demultiplex
Browse files Browse the repository at this point in the history
demultiplex: Handle case of empty demultiplexed file
  • Loading branch information
tomazc committed Mar 22, 2018
2 parents 82c56cc + 75e89c6 commit 4930534
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 7 deletions.
5 changes: 5 additions & 0 deletions iCount/demultiplex.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ def _extract(reads, barcodes, mismatches=1, minimum_length=15):
all_barcodes = len(barcodes)
# Precompute barcode length for each barcode
barcode_len = {i: len(brc) for i, brc in enumerate(barcodes)}
longest_barcode = max(barcode_len.values())
# Precompute max possible votes for each barcode. Equals to the number of valid nucleotides
max_votes = {i: len(brc.replace('N', '')) for i, brc in enumerate(barcodes)}
# Precompute positions of randomer nucleotides for each barcode
Expand All @@ -156,6 +157,10 @@ def _extract(reads, barcodes, mismatches=1, minimum_length=15):
# `pos` there is nucleotide X. Check which experiments (=barcodes) have X
# at position `pos`. Increment votes for them.
for read in iCount.files.fastq.FastqFile(reads).read():

if len(read.seq) < longest_barcode:
continue

votes = [0] * all_barcodes
for pos, n2i in p2n2i.items():
for exp_id in n2i.get(read.seq[pos], []):
Expand Down
2 changes: 1 addition & 1 deletion iCount/externals/cutadapt.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def run(reads, reads_trimmed, adapter, qual_trim=None, minimum_length=None):
'--quiet',
'-a', adapter,
]
qual_base = ENCODING_TO_OFFSET[get_qual_encoding(reads)]
qual_base = ENCODING_TO_OFFSET.get(get_qual_encoding(reads), 33)
args.extend(['--quality-base={}'.format(qual_base)])

if qual_trim is not None:
Expand Down
4 changes: 2 additions & 2 deletions iCount/files/fastq.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ def get_encoding(quals, count=0, check_count=False):
if encoding:
return encoding

encoding = get_encoding(quals, check_count=False)
return encoding
if quals:
return get_encoding(quals, check_count=False)


class FastqEntry:
Expand Down
11 changes: 7 additions & 4 deletions iCount/tests/test_demultiplex.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def test_run_fail(self):

def test_run_ok_with_adapter(self):
adapter = 'GGAACC'
barcodes = ['NNAAAN', 'NNACTN']
barcodes = ['NNAAAN', 'NNACTN', 'NNTTTTTTTTN']
data = [
['@header/1', 'GGAAAG' + make_sequence(40, rnd_seed=0) + adapter, '+',
make_quality_scores(50, min_chr=33, max_chr=74, rnd_seed=1) + '!J'],
Expand Down Expand Up @@ -56,14 +56,17 @@ def test_run_ok_with_adapter(self):
]
self.assertEqual(fq2_list, expected2)

fq3_list = make_list_from_file('{}/demux_{}.fastq.gz'.format(self.dir, 'nomatch'))
expected3 = [
fq3_list = make_list_from_file('{}/demux_{}.fastq.gz'.format(self.dir, barcodes[2]))
self.assertEqual(fq3_list, [])

fq4_list = make_list_from_file('{}/demux_{}.fastq.gz'.format(self.dir, 'nomatch'))
expected4 = [
['@header3'],
[data[2][1]],
['+'],
[data[2][3]],
]
self.assertEqual(fq3_list, expected3)
self.assertEqual(fq4_list, expected4)

def test_run_ok_no_adapter(self):
barcodes = ['NNAAAN', 'NNACTN']
Expand Down
5 changes: 5 additions & 0 deletions iCount/tests/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ def test_get_qual_encoding_none(self):
fq_file = make_file_from_list(data, bedtool=False, extension='.fq')
self.assertEqual(None, iCount.files.fastq.get_qual_encoding(fq_file))

def test_get_qual_encoding_empty(self):
data = []
fq_file = make_file_from_list(data, bedtool=False, extension='.fq')
self.assertEqual(None, iCount.files.fastq.get_qual_encoding(fq_file))

def test_fastq_entry(self):
fq_entry = iCount.files.fastq.FastqEntry('header12345', 'ACTGCTGCAT', '+', 'ABCDEFFBBA')
self.assertEqual(fq_entry.id, 'header12345')
Expand Down

0 comments on commit 4930534

Please sign in to comment.