/
edit_distance.go
84 lines (71 loc) · 1.6 KB
/
edit_distance.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
package similarity
type EditDistance struct {
// test use
mixed int
}
// CompareAscii ascii
func (e *EditDistance) CompareAscii(s1, s2 string) float64 {
cacheX := make([]int, len(s2))
diagonal := 0
for y, yLen := 0, len(s1); y < yLen; y++ {
for x, xLen := 0, len(cacheX); x < xLen; x++ {
on := x + 1
left := y + 1
if x == 0 {
diagonal = y
} else if y == 0 {
diagonal = x
}
if y > 0 {
on = cacheX[x]
}
if x-1 >= 0 {
left = cacheX[x-1]
}
same := 0
if s1[y] != s2[x] {
same = 1
}
oldDiagonal := cacheX[x]
cacheX[x] = min(min(on+1, left+1), same+diagonal)
diagonal = oldDiagonal
//fmt.Printf("left:%d on:%d diagonal:%d (min:%d)#", left, on, oldDiagonal, cacheX[x])
}
//fmt.Println()
}
e.mixed = cacheX[len(cacheX)-1]
return 1.0 - float64(cacheX[len(cacheX)-1])/float64(max(len(s1), len(s2)))
}
// CompareUtf8 utf8
func (e *EditDistance) CompareUtf8(utf8Str1, utf8Str2 string) float64 {
r1 := []rune(utf8Str1)
r2 := []rune(utf8Str2)
cacheX := make([]int, len(r2))
diagonal := 0
for y, yLen := 0, len(r1); y < yLen; y++ {
for x, xLen := 0, len(cacheX); x < xLen; x++ {
on := x + 1
left := y + 1
if x == 0 {
diagonal = y
} else if y == 0 {
diagonal = x
}
if y > 0 {
on = cacheX[x]
}
if x-1 >= 0 {
left = cacheX[x-1]
}
same := 0
if r1[y] != r2[x] {
same = 1
}
oldDiagonal := cacheX[x]
cacheX[x] = min(min(on+1, left+1), same+diagonal)
diagonal = oldDiagonal
}
}
e.mixed = cacheX[len(cacheX)-1]
return 1.0 - float64(cacheX[len(cacheX)-1])/float64(max(len(r1), len(r2)))
}