Skip to content

Commit

Permalink
solve #30
Browse files Browse the repository at this point in the history
  • Loading branch information
matthieugomez committed Jul 13, 2020
1 parent 6b5f858 commit 4df4bad
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 11 deletions.
22 changes: 12 additions & 10 deletions src/edit.jl
Original file line number Diff line number Diff line change
Expand Up @@ -124,24 +124,26 @@ function (dist::DamerauLevenshtein)(s1, s2, max_dist::Union{Integer, Nothing} =
v = collect(1:(len2-k))
w = similar(v)
if max_dist !== nothing
i2_start = k + 1
i2_end = max_dist
i2_start = 1
i2_end = max_dist + 1
end
prevch1, prevch2 = first(s1), first(s2)
current = 0
for (i1, ch1) in enumerate(s1)
i1 <= k && continue
left = current = i1 - k - 1
left = i1 - k - 1
current = left + 1
nextTransCost = 0
if max_dist !== nothing
i2_start += (i1 > 1 + max_dist - (len2 - len1)) ? 1 : 0
i2_end += (i2_end < len2) ? 1 : 0
i2_start += (i1 - k - 1 > max_dist - (len2 - len1)) ? 1 : 0
i2_end += (i2_end <= len2) ? 1 : 0
end
for (i2, ch2) in enumerate(s2)
i2 <= k && continue
# no need to look beyond window of lower right diagonal - maxDistance cells
#lower right diag is i1 - (len2 - len1)) and the upper left diagonal + max_dist cells (upper left is i1)
if (max_dist !== nothing) && ((i2 < i2_start) | (i2 > i2_end))
if i2 <= k
prevch2 = ch2
elseif (max_dist !== nothing) && ((i2 - k < i2_start) | (i2 - k >= i2_end))
# no need to look beyond window of lower right diagonal - maxDistance cells
#lower right diag is i1 - (len2 - len1)) and the upper left diagonal + max_dist cells (upper left is i1)
prevch2 = ch2
else
above, current, left = current, left, v[i2 - k]
Expand All @@ -150,7 +152,7 @@ function (dist::DamerauLevenshtein)(s1, s2, max_dist::Union{Integer, Nothing} =
if ch1 != ch2
current = min(left, current, above) + 1
# never happens at i2 = k + 1 because then the two previous characters were equal
if (i1 > 1 + k) & (i2 > 1 + k) && (ch1 == prevch2) && (prevch1 == ch2)
if (i1 - k > 1) & (i2 - k > 1) && (ch1 == prevch2) && (prevch1 == ch2)
thisTransCost += 1
current = min(current, thisTransCost)
end
Expand Down
15 changes: 14 additions & 1 deletion test/distances.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ using StringDistances, Unicode, Test
@test evaluate(DamerauLevenshtein(), "cape sand recycling ", "edith ann graham") == 17
@test evaluate(DamerauLevenshtein(), "jellyifhs", "jellyfish") == 2
@test evaluate(DamerauLevenshtein(), "ifhs", "fish") == 2
@test DamerauLevenshtein()("abcdef", "abcxyf", 2) == 2

@test evaluate(DamerauLevenshtein(), [1, 2, 3], [1,2, 4]) == 1
@test evaluate(DamerauLevenshtein(), graphemes("alborgów"), graphemes("amoniak")) == evaluate(DamerauLevenshtein(), "alborgów", "amoniak")
@test DamerauLevenshtein()("bc", "abc") == 1
Expand Down Expand Up @@ -161,7 +163,7 @@ using StringDistances, Unicode, Test
# Test with R package StringDist
for x in solutions
t, solution = x
for i in 1:length(solution)
for i in eachindex(solution)
if isnan(evaluate(t, strings[i]...))
@test isnan(solution[i])
else
Expand All @@ -174,8 +176,19 @@ using StringDistances, Unicode, Test
for i in eachindex(strings)
@test round(Int, (1 - evaluate(RatcliffObershelp(), strings[i]...)) * 100) solution[i] atol = 1e-4
end

# test max_dist
for i in eachindex(strings)
d = Levenshtein()(strings[i]...)
@test Levenshtein()(strings[i]..., d) == d
d = DamerauLevenshtein()(strings[i]...)
@test DamerauLevenshtein()(strings[i]..., d) == d
end
end

d = DamerauLevenshtein()("abcdef", "abcxyf")
@test DamerauLevenshtein()("abcdef", "abcxyf", d) == d



#= R test
Expand Down

0 comments on commit 4df4bad

Please sign in to comment.