Skip to content

Commit

Permalink
Have PrefixComparer and SuffixComparer take min_overlap into account
Browse files Browse the repository at this point in the history
The locate() methods should return None if the min_overlap is not fulfilled.
The Prefix-/SuffixComparer did not do so.

Closes #376
  • Loading branch information
marcelm committed Apr 17, 2019
1 parent bcc2052 commit 1451a0d
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 6 deletions.
6 changes: 4 additions & 2 deletions CHANGES.rst
Expand Up @@ -2,9 +2,11 @@
Changes
=======

v2.2 (2019-03-20)
-----------------
v2.2 (in development)
---------------------

* :issue:`376`: Fix a crash when using anchored 5' adapters together with
``--no-indels`` and trying to trim an empty read.
* :issue:`369`: Fix a crash when attempting to trim an empty read using a ``-g``
adapter with wildcards.

Expand Down
10 changes: 8 additions & 2 deletions src/cutadapt/_align.pyx
Expand Up @@ -549,6 +549,7 @@ cdef class PrefixComparer:
int m
int max_k # max. number of errors
readonly int effective_length
int min_overlap

# __init__ instead of __cinit__ because we need to override this in SuffixComparer
def __init__(
Expand All @@ -557,6 +558,7 @@ cdef class PrefixComparer:
double max_error_rate,
bint wildcard_ref=False,
bint wildcard_query=False,
int min_overlap=1,
):
self.wildcard_ref = wildcard_ref
self.wildcard_query = wildcard_query
Expand All @@ -570,6 +572,9 @@ cdef class PrefixComparer:
raise ValueError("max_error_rate must be between 0 and 1")
self.max_k = int(max_error_rate * self.effective_length)
self.reference = reference.encode('ascii').upper()
if min_overlap < 1:
raise ValueError("min_overlap must be at least 1")
self.min_overlap = min_overlap
if self.wildcard_ref:
self.reference = self.reference.translate(IUPAC_TABLE)
elif self.wildcard_query:
Expand Down Expand Up @@ -621,7 +626,7 @@ cdef class PrefixComparer:
matches += 1

errors = length - matches
if errors > self.max_k:
if errors > self.max_k or length < self.min_overlap:
return None
return (0, length, 0, length, matches, length - matches)

Expand All @@ -634,8 +639,9 @@ cdef class SuffixComparer(PrefixComparer):
double max_error_rate,
bint wildcard_ref=False,
bint wildcard_query=False,
int min_overlap=1,
):
super().__init__(reference[::-1], max_error_rate, wildcard_ref, wildcard_query)
super().__init__(reference[::-1], max_error_rate, wildcard_ref, wildcard_query, min_overlap)

def locate(self, str query):
cdef int n = len(query)
Expand Down
9 changes: 7 additions & 2 deletions src/cutadapt/adapters.py
Expand Up @@ -710,8 +710,13 @@ def __init__(self, sequence, where, remove=None, max_error_rate=0.1, min_overlap
self.indels = indels
if self.is_anchored and not self.indels:
aligner_class = align.PrefixComparer if self.where is Where.PREFIX else align.SuffixComparer
self.aligner = aligner_class(self.sequence, self.max_error_rate,
wildcard_ref=self.adapter_wildcards, wildcard_query=self.read_wildcards)
self.aligner = aligner_class(
self.sequence,
self.max_error_rate,
wildcard_ref=self.adapter_wildcards,
wildcard_query=self.read_wildcards,
min_overlap=self.min_overlap
)
else:
# TODO
# Indels are suppressed by setting their cost very high, but a different algorithm
Expand Down
8 changes: 8 additions & 0 deletions tests/test_adapters.py
Expand Up @@ -328,3 +328,11 @@ def test_anywhere_parameter():
cutter = AdapterCutter([adapter])
trimmed_read = cutter(read, [])
assert trimmed_read.sequence == ''


@pytest.mark.parametrize("where", [Where.PREFIX, Where.SUFFIX])
def test_no_indels_empty_read(where):
# Issue #376
adapter = Adapter('ACGT', where=where, indels=False)
empty = Sequence('name', '')
adapter.match_to(empty)

0 comments on commit 1451a0d

Please sign in to comment.