Merge pull request #85 from josharian/varia

Assorted optimizations
sergi · Jan 25, 2018 · f3948f6 · f3948f6
2 parents 1744e29 + ded6142
commit f3948f6
Show file tree

Hide file tree

Showing 2 changed files with 102 additions and 69 deletions.
diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go
@@ -40,8 +40,41 @@ type Diff struct {
 	Text string
 }
 
+// splice removes amount elements from slice at index index, replacing them with elements.
 func splice(slice []Diff, index int, amount int, elements ...Diff) []Diff {
-	return append(slice[:index], append(elements, slice[index+amount:]...)...)
+	if len(elements) == amount {
+		// Easy case: overwrite the relevant items.
+		copy(slice[index:], elements)
+		return slice
+	}
+	if len(elements) < amount {
+		// Fewer new items than old.
+		// Copy in the new items.
+		copy(slice[index:], elements)
+		// Shift the remaining items left.
+		copy(slice[index+len(elements):], slice[index+amount:])
+		// Calculate the new end of the slice.
+		end := len(slice) - amount + len(elements)
+		// Zero stranded elements at end so that they can be garbage collected.
+		tail := slice[end:]
+		for i := range tail {
+			tail[i] = Diff{}
+		}
+		return slice[:end]
+	}
+	// More new items than old.
+	// Make room in slice for new elements.
+	// There's probably an even more efficient way to do this,
+	// but this is simple and clear.
+	need := len(slice) - amount + len(elements)
+	for len(slice) < need {
+		slice = append(slice, Diff{})
+	}
+	// Shift slice elements right to make room for new elements.
+	copy(slice[index+len(elements):], slice[index+amount:])
+	// Copy in new elements.
+	copy(slice[index:], elements)
+	return slice
 }
 
 // DiffMain finds the differences between two texts.
@@ -145,7 +178,10 @@ func (dmp *DiffMatchPatch) diffCompute(text1, text2 []rune, checklines bool, dea
 		diffsA := dmp.diffMainRunes(text1A, text2A, checklines, deadline)
 		diffsB := dmp.diffMainRunes(text1B, text2B, checklines, deadline)
 		// Merge the results.
-		return append(diffsA, append([]Diff{Diff{DiffEqual, string(midCommon)}}, diffsB...)...)
+		diffs := diffsA
+		diffs = append(diffs, Diff{DiffEqual, string(midCommon)})
+		diffs = append(diffs, diffsB...)
+		return diffs
 	} else if checklines && len(text1) > 100 && len(text2) > 100 {
 		return dmp.diffLineMode(text1, text2, deadline)
 	}
@@ -247,7 +283,7 @@ func (dmp *DiffMatchPatch) diffBisect(runes1, runes2 []rune, deadline time.Time)
 	k2end := 0
 	for d := 0; d < maxD; d++ {
 		// Bail out if deadline is reached.
-		if !deadline.IsZero() && time.Now().After(deadline) {
+		if !deadline.IsZero() && d%16 == 0 && time.Now().After(deadline) {
 			break
 		}
 
@@ -434,48 +470,29 @@ func (dmp *DiffMatchPatch) DiffCommonSuffix(text1, text2 string) int {
 
 // commonPrefixLength returns the length of the common prefix of two rune slices.
 func commonPrefixLength(text1, text2 []rune) int {
-	short, long := text1, text2
-	if len(short) > len(long) {
-		short, long = long, short
-	}
-	for i, r := range short {
-		if r != long[i] {
-			return i
+	// Linear search. See comment in commonSuffixLength.
+	n := 0
+	for ; n < len(text1) && n < len(text2); n++ {
+		if text1[n] != text2[n] {
+			return n
 		}
 	}
-	return len(short)
+	return n
 }
 
 // commonSuffixLength returns the length of the common suffix of two rune slices.
 func commonSuffixLength(text1, text2 []rune) int {
-	n := min(len(text1), len(text2))
-	for i := 0; i < n; i++ {
-		if text1[len(text1)-i-1] != text2[len(text2)-i-1] {
-			return i
+	// Use linear search rather than the binary search discussed at https://neil.fraser.name/news/2007/10/09/.
+	// See discussion at https://github.com/sergi/go-diff/issues/54.
+	i1 := len(text1)
+	i2 := len(text2)
+	for n := 0; ; n++ {
+		i1--
+		i2--
+		if i1 < 0 || i2 < 0 || text1[i1] != text2[i2] {
+			return n
 		}
 	}
-	return n
-
-	// TODO research and benchmark this, why is it not activated? https://github.com/sergi/go-diff/issues/54
-	// Binary search.
-	// Performance analysis: http://neil.fraser.name/news/2007/10/09/
-	/*
-	   pointermin := 0
-	   pointermax := math.Min(len(text1), len(text2))
-	   pointermid := pointermax
-	   pointerend := 0
-	   for pointermin < pointermid {
-	       if text1[len(text1)-pointermid:len(text1)-pointerend] ==
-	           text2[len(text2)-pointermid:len(text2)-pointerend] {
-	           pointermin = pointermid
-	           pointerend = pointermin
-	       } else {
-	           pointermax = pointermid
-	       }
-	       pointermid = math.Floor((pointermax-pointermin)/2 + pointermin)
-	   }
-	   return pointermid
-	*/
 }
 
 // DiffCommonOverlap determines if the suffix of one string is the prefix of another.
@@ -628,11 +645,7 @@ func (dmp *DiffMatchPatch) diffHalfMatchI(l, s []rune, i int) [][]rune {
 func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
 	changes := false
 	// Stack of indices where equalities are found.
-	type equality struct {
-		data int
-		next *equality
-	}
-	var equalities *equality
+	equalities := make([]int, 0, len(diffs))
 
 	var lastequality string
 	// Always equal to diffs[equalities[equalitiesLength - 1]][1]
@@ -645,11 +658,7 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
 	for pointer < len(diffs) {
 		if diffs[pointer].Type == DiffEqual {
 			// Equality found.
-
-			equalities = &equality{
-				data: pointer,
-				next: equalities,
-			}
+			equalities = append(equalities, pointer)
 			lengthInsertions1 = lengthInsertions2
 			lengthDeletions1 = lengthDeletions2
 			lengthInsertions2 = 0
@@ -670,23 +679,20 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
 				(len(lastequality) <= difference1) &&
 				(len(lastequality) <= difference2) {
 				// Duplicate record.
-				insPoint := equalities.data
-				diffs = append(
-					diffs[:insPoint],
-					append([]Diff{Diff{DiffDelete, lastequality}}, diffs[insPoint:]...)...)
+				insPoint := equalities[len(equalities)-1]
+				diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality})
 
 				// Change second copy to insert.
 				diffs[insPoint+1].Type = DiffInsert
 				// Throw away the equality we just deleted.
-				equalities = equalities.next
+				equalities = equalities[:len(equalities)-1]
 
-				if equalities != nil {
-					equalities = equalities.next
+				if len(equalities) > 0 {
+					equalities = equalities[:len(equalities)-1]
 				}
-				if equalities != nil {
-					pointer = equalities.data
-				} else {
-					pointer = -1
+				pointer = -1
+				if len(equalities) > 0 {
+					pointer = equalities[len(equalities)-1]
 				}
 
 				lengthInsertions1 = 0 // Reset the counters.
@@ -724,10 +730,7 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
 					float64(overlapLength1) >= float64(len(insertion))/2 {
 
 					// Overlap found. Insert an equality and trim the surrounding edits.
-					diffs = append(
-						diffs[:pointer],
-						append([]Diff{Diff{DiffEqual, insertion[:overlapLength1]}}, diffs[pointer:]...)...)
-
+					diffs = splice(diffs, pointer, 0, Diff{DiffEqual, insertion[:overlapLength1]})
 					diffs[pointer-1].Text =
 						deletion[0 : len(deletion)-overlapLength1]
 					diffs[pointer+1].Text = insertion[overlapLength1:]
@@ -738,10 +741,7 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
 					float64(overlapLength2) >= float64(len(insertion))/2 {
 					// Reverse overlap found. Insert an equality and swap and trim the surrounding edits.
 					overlap := Diff{DiffEqual, deletion[:overlapLength2]}
-					diffs = append(
-						diffs[:pointer],
-						append([]Diff{overlap}, diffs[pointer:]...)...)
-
+					diffs = splice(diffs, pointer, 0, overlap)
 					diffs[pointer-1].Type = DiffInsert
 					diffs[pointer-1].Text = insertion[0 : len(insertion)-overlapLength2]
 					diffs[pointer+1].Type = DiffDelete
@@ -954,8 +954,7 @@ func (dmp *DiffMatchPatch) DiffCleanupEfficiency(diffs []Diff) []Diff {
 				insPoint := equalities.data
 
 				// Duplicate record.
-				diffs = append(diffs[:insPoint],
-					append([]Diff{Diff{DiffDelete, lastequality}}, diffs[insPoint:]...)...)
+				diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality})
 
 				// Change second copy to insert.
 				diffs[insPoint+1].Type = DiffInsert

diff --git a/diffmatchpatch/diff_test.go b/diffmatchpatch/diff_test.go
@@ -130,6 +130,8 @@ func TestDiffCommonSuffix(t *testing.T) {
 	}
 }
 
+var SinkInt int // exported sink var to avoid compiler optimizations in benchmarks
+
 func BenchmarkDiffCommonSuffix(b *testing.B) {
 	s := "ABCDEFGHIJKLMNOPQRSTUVWXYZÅÄÖ"
 
@@ -138,10 +140,42 @@ func BenchmarkDiffCommonSuffix(b *testing.B) {
 	b.ResetTimer()
 
 	for i := 0; i < b.N; i++ {
-		dmp.DiffCommonSuffix(s, s)
+		SinkInt = dmp.DiffCommonSuffix(s, s)
 	}
 }
 
+func BenchmarkCommonLength(b *testing.B) {
+	data := []struct {
+		name string
+		x, y []rune
+	}{
+		{name: "empty", x: nil, y: []rune{}},
+		{name: "short", x: []rune("AABCC"), y: []rune("AA-CC")},
+		{name: "long",
+			x: []rune(strings.Repeat("A", 1000) + "B" + strings.Repeat("C", 1000)),
+			y: []rune(strings.Repeat("A", 1000) + "-" + strings.Repeat("C", 1000)),
+		},
+	}
+	b.Run("prefix", func(b *testing.B) {
+		for _, d := range data {
+			b.Run(d.name, func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					SinkInt = commonPrefixLength(d.x, d.y)
+				}
+			})
+		}
+	})
+	b.Run("suffix", func(b *testing.B) {
+		for _, d := range data {
+			b.Run(d.name, func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					SinkInt = commonSuffixLength(d.x, d.y)
+				}
+			})
+		}
+	})
+}
+
 func TestCommonSuffixLength(t *testing.T) {
 	type TestCase struct {
 		Text1 string