Skip to content

Commit 33f944d

Browse files
committed
Merge: core: add Text::levenshtein_distance
Pull-Request: #1912 Reviewed-by: Alexandre Terrasa <alexandre@moz-code.org>
2 parents 5c7b7f9 + 4d5546d commit 33f944d

File tree

1 file changed

+59
-0
lines changed

1 file changed

+59
-0
lines changed

lib/core/text/abstract_text.nit

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -974,6 +974,65 @@ abstract class Text
974974
return s.plain_to_s
975975
end
976976

977+
# Return the Levenshtein distance between two strings
978+
#
979+
# ~~~
980+
# assert "abcd".levenshtein_distance("abcd") == 0
981+
# assert "".levenshtein_distance("abcd") == 4
982+
# assert "abcd".levenshtein_distance("") == 4
983+
# assert "abcd".levenshtein_distance("xyz") == 4
984+
# assert "abcd".levenshtein_distance("xbdy") == 3
985+
# ~~~
986+
fun levenshtein_distance(other: String): Int
987+
do
988+
var slen = self.length
989+
var olen = other.length
990+
991+
# fast cases
992+
if slen == 0 then return olen
993+
if olen == 0 then return slen
994+
if self == other then return 0
995+
996+
# previous row of distances
997+
var v0 = new Array[Int].with_capacity(olen+1)
998+
999+
# current row of distances
1000+
var v1 = new Array[Int].with_capacity(olen+1)
1001+
1002+
for j in [0..olen] do
1003+
# prefix insert cost
1004+
v0[j] = j
1005+
end
1006+
1007+
for i in [0..slen[ do
1008+
1009+
# prefix delete cost
1010+
v1[0] = i + 1
1011+
1012+
for j in [0..olen[ do
1013+
# delete cost
1014+
var cost1 = v1[j] + 1
1015+
# insert cost
1016+
var cost2 = v0[j + 1] + 1
1017+
# same char cost (+0)
1018+
var cost3 = v0[j]
1019+
# change cost
1020+
if self[i] != other[j] then cost3 += 1
1021+
# keep the min
1022+
v1[j+1] = cost1.min(cost2).min(cost3)
1023+
end
1024+
1025+
# Switch columns:
1026+
# * v1 become v0 in the next iteration
1027+
# * old v0 is reused as the new v1
1028+
var tmp = v1
1029+
v1 = v0
1030+
v0 = tmp
1031+
end
1032+
1033+
return v0[olen]
1034+
end
1035+
9771036
# Copies `n` bytes from `self` at `src_offset` into `dest` starting at `dest_offset`
9781037
#
9791038
# Basically a high-level synonym of NativeString::copy_to

0 commit comments

Comments
 (0)