Skip to content

Commit

Permalink
Merge pull request #76 from juliangilbey/jarowinkler-jellyfish
Browse files Browse the repository at this point in the history
Modify JaroWinkler boosting to match behaviour of jellyfish algorithm
  • Loading branch information
orsinium committed Oct 27, 2021
2 parents b5ee3c3 + a5a6ae0 commit 5785d46
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 4 deletions.
2 changes: 1 addition & 1 deletion tests/test_edit/test_jaro_winkler.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
@pytest.mark.parametrize('left, right, expected', [
('elephant', 'hippo', 0.44166666666666665),
('fly', 'ant', 0.0),
('frog', 'fog', 0.916666666),
('frog', 'fog', 0.925),
('MARTHA', 'MARHTA', 0.9611111111111111),
('DWAYNE', 'DUANE', 0.84),
('DIXON', 'DICKSONX', 0.8133333333333332),
Expand Down
7 changes: 4 additions & 3 deletions textdistance/algorithms/edit_based.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,9 @@ def __call__(self, s1, s2, prefix_weight=0.1):
if not s1_len or not s2_len:
return 0.0

min_len = max(s1_len, s2_len)
search_range = (min_len // 2) - 1
min_len = min(s1_len, s2_len)
search_range = max(s1_len, s2_len)
search_range = (search_range // 2) - 1
if search_range < 0:
search_range = 0

Expand Down Expand Up @@ -294,7 +295,7 @@ def __call__(self, s1, s2, prefix_weight=0.1):
# stop to boost if strings are not similar
if not self.winklerize:
return weight
if weight <= 0.7 or s1_len <= 3 or s2_len <= 3:
if weight <= 0.7:
return weight

# winkler modification
Expand Down

0 comments on commit 5785d46

Please sign in to comment.