Skip to content

Commit

Permalink
Merge pull request #85 from josharian/varia
Browse files Browse the repository at this point in the history
Assorted optimizations
  • Loading branch information
sergi committed Jan 25, 2018
2 parents 1744e29 + ded6142 commit f3948f6
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 69 deletions.
135 changes: 67 additions & 68 deletions diffmatchpatch/diff.go
Expand Up @@ -40,8 +40,41 @@ type Diff struct {
Text string
}

// splice removes amount elements from slice at index index, replacing them with elements.
func splice(slice []Diff, index int, amount int, elements ...Diff) []Diff {
return append(slice[:index], append(elements, slice[index+amount:]...)...)
if len(elements) == amount {
// Easy case: overwrite the relevant items.
copy(slice[index:], elements)
return slice
}
if len(elements) < amount {
// Fewer new items than old.
// Copy in the new items.
copy(slice[index:], elements)
// Shift the remaining items left.
copy(slice[index+len(elements):], slice[index+amount:])
// Calculate the new end of the slice.
end := len(slice) - amount + len(elements)
// Zero stranded elements at end so that they can be garbage collected.
tail := slice[end:]
for i := range tail {
tail[i] = Diff{}
}
return slice[:end]
}
// More new items than old.
// Make room in slice for new elements.
// There's probably an even more efficient way to do this,
// but this is simple and clear.
need := len(slice) - amount + len(elements)
for len(slice) < need {
slice = append(slice, Diff{})
}
// Shift slice elements right to make room for new elements.
copy(slice[index+len(elements):], slice[index+amount:])
// Copy in new elements.
copy(slice[index:], elements)
return slice
}

// DiffMain finds the differences between two texts.
Expand Down Expand Up @@ -145,7 +178,10 @@ func (dmp *DiffMatchPatch) diffCompute(text1, text2 []rune, checklines bool, dea
diffsA := dmp.diffMainRunes(text1A, text2A, checklines, deadline)
diffsB := dmp.diffMainRunes(text1B, text2B, checklines, deadline)
// Merge the results.
return append(diffsA, append([]Diff{Diff{DiffEqual, string(midCommon)}}, diffsB...)...)
diffs := diffsA
diffs = append(diffs, Diff{DiffEqual, string(midCommon)})
diffs = append(diffs, diffsB...)
return diffs
} else if checklines && len(text1) > 100 && len(text2) > 100 {
return dmp.diffLineMode(text1, text2, deadline)
}
Expand Down Expand Up @@ -247,7 +283,7 @@ func (dmp *DiffMatchPatch) diffBisect(runes1, runes2 []rune, deadline time.Time)
k2end := 0
for d := 0; d < maxD; d++ {
// Bail out if deadline is reached.
if !deadline.IsZero() && time.Now().After(deadline) {
if !deadline.IsZero() && d%16 == 0 && time.Now().After(deadline) {
break
}

Expand Down Expand Up @@ -434,48 +470,29 @@ func (dmp *DiffMatchPatch) DiffCommonSuffix(text1, text2 string) int {

// commonPrefixLength returns the length of the common prefix of two rune slices.
func commonPrefixLength(text1, text2 []rune) int {
short, long := text1, text2
if len(short) > len(long) {
short, long = long, short
}
for i, r := range short {
if r != long[i] {
return i
// Linear search. See comment in commonSuffixLength.
n := 0
for ; n < len(text1) && n < len(text2); n++ {
if text1[n] != text2[n] {
return n
}
}
return len(short)
return n
}

// commonSuffixLength returns the length of the common suffix of two rune slices.
func commonSuffixLength(text1, text2 []rune) int {
n := min(len(text1), len(text2))
for i := 0; i < n; i++ {
if text1[len(text1)-i-1] != text2[len(text2)-i-1] {
return i
// Use linear search rather than the binary search discussed at https://neil.fraser.name/news/2007/10/09/.
// See discussion at https://github.com/sergi/go-diff/issues/54.
i1 := len(text1)
i2 := len(text2)
for n := 0; ; n++ {
i1--
i2--
if i1 < 0 || i2 < 0 || text1[i1] != text2[i2] {
return n
}
}
return n

// TODO research and benchmark this, why is it not activated? https://github.com/sergi/go-diff/issues/54
// Binary search.
// Performance analysis: http://neil.fraser.name/news/2007/10/09/
/*
pointermin := 0
pointermax := math.Min(len(text1), len(text2))
pointermid := pointermax
pointerend := 0
for pointermin < pointermid {
if text1[len(text1)-pointermid:len(text1)-pointerend] ==
text2[len(text2)-pointermid:len(text2)-pointerend] {
pointermin = pointermid
pointerend = pointermin
} else {
pointermax = pointermid
}
pointermid = math.Floor((pointermax-pointermin)/2 + pointermin)
}
return pointermid
*/
}

// DiffCommonOverlap determines if the suffix of one string is the prefix of another.
Expand Down Expand Up @@ -628,11 +645,7 @@ func (dmp *DiffMatchPatch) diffHalfMatchI(l, s []rune, i int) [][]rune {
func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
changes := false
// Stack of indices where equalities are found.
type equality struct {
data int
next *equality
}
var equalities *equality
equalities := make([]int, 0, len(diffs))

var lastequality string
// Always equal to diffs[equalities[equalitiesLength - 1]][1]
Expand All @@ -645,11 +658,7 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
for pointer < len(diffs) {
if diffs[pointer].Type == DiffEqual {
// Equality found.

equalities = &equality{
data: pointer,
next: equalities,
}
equalities = append(equalities, pointer)
lengthInsertions1 = lengthInsertions2
lengthDeletions1 = lengthDeletions2
lengthInsertions2 = 0
Expand All @@ -670,23 +679,20 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
(len(lastequality) <= difference1) &&
(len(lastequality) <= difference2) {
// Duplicate record.
insPoint := equalities.data
diffs = append(
diffs[:insPoint],
append([]Diff{Diff{DiffDelete, lastequality}}, diffs[insPoint:]...)...)
insPoint := equalities[len(equalities)-1]
diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality})

// Change second copy to insert.
diffs[insPoint+1].Type = DiffInsert
// Throw away the equality we just deleted.
equalities = equalities.next
equalities = equalities[:len(equalities)-1]

if equalities != nil {
equalities = equalities.next
if len(equalities) > 0 {
equalities = equalities[:len(equalities)-1]
}
if equalities != nil {
pointer = equalities.data
} else {
pointer = -1
pointer = -1
if len(equalities) > 0 {
pointer = equalities[len(equalities)-1]
}

lengthInsertions1 = 0 // Reset the counters.
Expand Down Expand Up @@ -724,10 +730,7 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
float64(overlapLength1) >= float64(len(insertion))/2 {

// Overlap found. Insert an equality and trim the surrounding edits.
diffs = append(
diffs[:pointer],
append([]Diff{Diff{DiffEqual, insertion[:overlapLength1]}}, diffs[pointer:]...)...)

diffs = splice(diffs, pointer, 0, Diff{DiffEqual, insertion[:overlapLength1]})
diffs[pointer-1].Text =
deletion[0 : len(deletion)-overlapLength1]
diffs[pointer+1].Text = insertion[overlapLength1:]
Expand All @@ -738,10 +741,7 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
float64(overlapLength2) >= float64(len(insertion))/2 {
// Reverse overlap found. Insert an equality and swap and trim the surrounding edits.
overlap := Diff{DiffEqual, deletion[:overlapLength2]}
diffs = append(
diffs[:pointer],
append([]Diff{overlap}, diffs[pointer:]...)...)

diffs = splice(diffs, pointer, 0, overlap)
diffs[pointer-1].Type = DiffInsert
diffs[pointer-1].Text = insertion[0 : len(insertion)-overlapLength2]
diffs[pointer+1].Type = DiffDelete
Expand Down Expand Up @@ -954,8 +954,7 @@ func (dmp *DiffMatchPatch) DiffCleanupEfficiency(diffs []Diff) []Diff {
insPoint := equalities.data

// Duplicate record.
diffs = append(diffs[:insPoint],
append([]Diff{Diff{DiffDelete, lastequality}}, diffs[insPoint:]...)...)
diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality})

// Change second copy to insert.
diffs[insPoint+1].Type = DiffInsert
Expand Down
36 changes: 35 additions & 1 deletion diffmatchpatch/diff_test.go
Expand Up @@ -130,6 +130,8 @@ func TestDiffCommonSuffix(t *testing.T) {
}
}

var SinkInt int // exported sink var to avoid compiler optimizations in benchmarks

func BenchmarkDiffCommonSuffix(b *testing.B) {
s := "ABCDEFGHIJKLMNOPQRSTUVWXYZÅÄÖ"

Expand All @@ -138,10 +140,42 @@ func BenchmarkDiffCommonSuffix(b *testing.B) {
b.ResetTimer()

for i := 0; i < b.N; i++ {
dmp.DiffCommonSuffix(s, s)
SinkInt = dmp.DiffCommonSuffix(s, s)
}
}

func BenchmarkCommonLength(b *testing.B) {
data := []struct {
name string
x, y []rune
}{
{name: "empty", x: nil, y: []rune{}},
{name: "short", x: []rune("AABCC"), y: []rune("AA-CC")},
{name: "long",
x: []rune(strings.Repeat("A", 1000) + "B" + strings.Repeat("C", 1000)),
y: []rune(strings.Repeat("A", 1000) + "-" + strings.Repeat("C", 1000)),
},
}
b.Run("prefix", func(b *testing.B) {
for _, d := range data {
b.Run(d.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
SinkInt = commonPrefixLength(d.x, d.y)
}
})
}
})
b.Run("suffix", func(b *testing.B) {
for _, d := range data {
b.Run(d.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
SinkInt = commonSuffixLength(d.x, d.y)
}
})
}
})
}

func TestCommonSuffixLength(t *testing.T) {
type TestCase struct {
Text1 string
Expand Down

0 comments on commit f3948f6

Please sign in to comment.