Skip to content

Commit

Permalink
fix Levenshtein search parameter handling edge-case
Browse files Browse the repository at this point in the history
When max_l_dist was given and some of the other params
weren't, those other params were left as None, causing
type errors in comparisons during searches.
  • Loading branch information
taleinat committed Apr 6, 2020
1 parent a19cd59 commit 9dab06d
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 11 deletions.
48 changes: 37 additions & 11 deletions src/fuzzysearch/common.py
Expand Up @@ -43,11 +43,21 @@ class LevenshteinSearchParams(object):

def __attrs_post_init__(self):
self._check_params_valid()
object.__setattr__(self, 'max_l_dist', self._normalize_max_l_dist())
max_subs, max_ins, max_dels, max_l_dist = \
self._normalize_params(*self.unpacked)
object.__setattr__(self, 'max_substitutions', max_subs)
object.__setattr__(self, 'max_insertions', max_ins)
object.__setattr__(self, 'max_deletions', max_dels)
object.__setattr__(self, 'max_l_dist', max_l_dist)

@property
def unpacked(self):
return self.max_substitutions, self.max_insertions, self.max_deletions, self.max_l_dist
return (
self.max_substitutions,
self.max_insertions,
self.max_deletions,
self.max_l_dist,
)

def _check_params_valid(self):
if not all(x is None or (isinstance(x, int) and x >= 0)
Expand Down Expand Up @@ -75,20 +85,36 @@ def _check_params_valid(self):
elif self.max_deletions is None:
raise ValueError('# deletions must be limited!')

def _normalize_max_l_dist(self):
@classmethod
def _normalize_params(cls,
max_substitutions, max_insertions,
max_deletions, max_l_dist):
maxes_sum = sum(
x if x is not None else 1 << 29
for x in [
self.max_substitutions,
self.max_insertions,
self.max_deletions,
max_substitutions,
max_insertions,
max_deletions,
]
)
return (
self.max_l_dist
if self.max_l_dist is not None and self.max_l_dist <= maxes_sum
else maxes_sum
)

if max_l_dist is None:
# replace max_l_dist with the sum of the other limits
return (
max_substitutions,
max_insertions,
max_deletions,
maxes_sum,
)
else:
def _normalize(param):
return min(param, max_l_dist) if param is not None else max_l_dist
return (
_normalize(max_substitutions),
_normalize(max_insertions),
_normalize(max_deletions),
min(max_l_dist, maxes_sum),
)


def count_differences_with_maximum(sequence1, sequence2, max_differences):
Expand Down
6 changes: 6 additions & 0 deletions tests/test_generic_search.py
Expand Up @@ -251,6 +251,12 @@ def test_only_max_l_dist_none(self):
[],
)

def test_no_deletion(self):
self.assertEqual(
self.search(b('PATTERN'), b('---PATERN---'), None, None, 0, 1),
[]
)

def test_invalid_none_arguments(self):
# check that an exception is raised when max_l_dist is None as well as
# at least one other limitation
Expand Down

0 comments on commit 9dab06d

Please sign in to comment.