Skip to content
Permalink
Browse files
Fallback to difflib if levenshtein unavailable
  • Loading branch information
YoannQDQ authored and nyalldawson committed Mar 17, 2023
1 parent 04a8436 commit 53c7b80
Showing 1 changed file with 10 additions and 5 deletions.
@@ -58,19 +58,24 @@
def findMinimalDistanceIndex(source, target):
""" Find the source substring index that most closely matches the target string"""
index = min(len(source), len(target))

# Fallback to difflib if Levenshtein is not available
# Levenshtein is faster than difflib
try:
from Levenshtein import distance as lev
from Levenshtein import distance
except ImportError:
return index
d0 = lev(source[:index], target)
from difflib import SequenceMatcher
def distance(s, t): return 1 - SequenceMatcher(None, s, t).ratio()

d0 = distance(source[:index], target)
if d0 == 0:
return index

ref_dist_more = d0
ref_index_more = index
if index < len(source) - 1:
while True:
new_dist = lev(source[:ref_index_more + 1], target)
new_dist = distance(source[:ref_index_more + 1], target)
if new_dist <= ref_dist_more:
ref_dist_more = new_dist
ref_index_more = ref_index_more + 1
@@ -83,7 +88,7 @@ def findMinimalDistanceIndex(source, target):
ref_index_less = index
if index > 0:
while True:
new_dist = lev(source[:ref_index_less - 1], target)
new_dist = distance(source[:ref_index_less - 1], target)
if new_dist <= ref_dist_less:
ref_dist_less = new_dist
ref_index_less = ref_index_less - 1

0 comments on commit 53c7b80

Please sign in to comment.