Fix inconsistent tiebreak scores when --nth is used

Make sure to consistently calculate tiebreak scores based on the original line. This change may not be preferable if you filter aligned tabular input on a subset of columns using --nth. However, if we calculate length tiebreak only on the matched components instead of the entire line, the result can be very confusing when multiple --nth components are specified, so let's keep it simple and consistent. Close #926
junegunn · Jun 2, 2017 · 2e3dc75 · 2e3dc75
1 parent 5d6eb5b
commit 2e3dc75
Show file tree

Hide file tree

Showing 8 changed files with 49 additions and 95 deletions.
diff --git a/src/core.go b/src/core.go
@@ -95,9 +95,10 @@ func Run(opts *Options) {
 			}
 			chars, colors := ansiProcessor(data)
 			return &Item{
-				index:  int32(index),
-				text:   chars,
-				colors: colors}
+				index:      int32(index),
+				trimLength: -1,
+				text:       chars,
+				colors:     colors}
 		})
 	} else {
 		chunkList = NewChunkList(func(data []byte, index int) *Item {
@@ -110,9 +111,10 @@ func Run(opts *Options) {
 			}
 			textRunes := joinTokens(trans)
 			item := Item{
-				index:    int32(index),
-				origText: &data,
-				colors:   nil}
+				index:      int32(index),
+				trimLength: -1,
+				origText:   &data,
+				colors:     nil}
 
 			trimmed, colors := ansiProcessorRunes(textRunes)
 			item.text = trimmed

diff --git a/src/item.go b/src/item.go
@@ -7,6 +7,7 @@ import (
 // Item represents each input line
 type Item struct {
 	index       int32
+	trimLength  int32
 	text        util.Chars
 	origText    *[]byte
 	colors      *[]ansiOffset
@@ -18,6 +19,14 @@ func (item *Item) Index() int32 {
 	return item.index
 }
 
+func (item *Item) TrimLength() int32 {
+	if item.trimLength >= 0 {
+		return item.trimLength
+	}
+	item.trimLength = int32(item.text.TrimLength())
+	return item.trimLength
+}
+
 // Colors returns ansiOffsets of the Item
 func (item *Item) Colors() []ansiOffset {
 	if item.colors == nil {

diff --git a/src/pattern.go b/src/pattern.go
@@ -299,49 +299,47 @@ func (p *Pattern) matchChunk(chunk *Chunk, space []*Result, slab *util.Slab) []*
 // MatchItem returns true if the Item is a match
 func (p *Pattern) MatchItem(item *Item, withPos bool, slab *util.Slab) (*Result, []Offset, *[]int) {
 	if p.extended {
-		if offsets, bonus, trimLen, pos := p.extendedMatch(item, withPos, slab); len(offsets) == len(p.termSets) {
-			return buildResult(item, offsets, bonus, trimLen), offsets, pos
+		if offsets, bonus, pos := p.extendedMatch(item, withPos, slab); len(offsets) == len(p.termSets) {
+			return buildResult(item, offsets, bonus), offsets, pos
 		}
 		return nil, nil, nil
 	}
-	offset, bonus, trimLen, pos := p.basicMatch(item, withPos, slab)
+	offset, bonus, pos := p.basicMatch(item, withPos, slab)
 	if sidx := offset[0]; sidx >= 0 {
 		offsets := []Offset{offset}
-		return buildResult(item, offsets, bonus, trimLen), offsets, pos
+		return buildResult(item, offsets, bonus), offsets, pos
 	}
 	return nil, nil, nil
 }
 
-func (p *Pattern) basicMatch(item *Item, withPos bool, slab *util.Slab) (Offset, int, int, *[]int) {
+func (p *Pattern) basicMatch(item *Item, withPos bool, slab *util.Slab) (Offset, int, *[]int) {
 	input := p.prepareInput(item)
 	if p.fuzzy {
 		return p.iter(p.fuzzyAlgo, input, p.caseSensitive, p.normalize, p.forward, p.text, withPos, slab)
 	}
 	return p.iter(algo.ExactMatchNaive, input, p.caseSensitive, p.normalize, p.forward, p.text, withPos, slab)
 }
 
-func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Offset, int, int, *[]int) {
+func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Offset, int, *[]int) {
 	input := p.prepareInput(item)
 	offsets := []Offset{}
 	var totalScore int
-	var totalTrimLen int
 	var allPos *[]int
 	if withPos {
 		allPos = &[]int{}
 	}
 	for _, termSet := range p.termSets {
 		var offset Offset
 		var currentScore int
-		var trimLen int
 		matched := false
 		for _, term := range termSet {
 			pfun := p.procFun[term.typ]
-			off, score, tLen, pos := p.iter(pfun, input, term.caseSensitive, p.normalize, p.forward, term.text, withPos, slab)
+			off, score, pos := p.iter(pfun, input, term.caseSensitive, p.normalize, p.forward, term.text, withPos, slab)
 			if sidx := off[0]; sidx >= 0 {
 				if term.inv {
 					continue
 				}
-				offset, currentScore, trimLen = off, score, tLen
+				offset, currentScore = off, score
 				matched = true
 				if withPos {
 					if pos != nil {
@@ -354,18 +352,17 @@ func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Of
 				}
 				break
 			} else if term.inv {
-				offset, currentScore, trimLen = Offset{0, 0}, 0, 0
+				offset, currentScore = Offset{0, 0}, 0
 				matched = true
 				continue
 			}
 		}
 		if matched {
 			offsets = append(offsets, offset)
 			totalScore += currentScore
-			totalTrimLen += trimLen
 		}
 	}
-	return offsets, totalScore, totalTrimLen, allPos
+	return offsets, totalScore, allPos
 }
 
 func (p *Pattern) prepareInput(item *Item) []Token {
@@ -375,7 +372,7 @@ func (p *Pattern) prepareInput(item *Item) []Token {
 
 	var ret []Token
 	if len(p.nth) == 0 {
-		ret = []Token{Token{text: &item.text, prefixLength: 0, trimLength: int32(item.text.TrimLength())}}
+		ret = []Token{Token{text: &item.text, prefixLength: 0}}
 	} else {
 		tokens := Tokenize(item.text, p.delimiter)
 		ret = Transform(tokens, p.nth)
@@ -384,7 +381,7 @@ func (p *Pattern) prepareInput(item *Item) []Token {
 	return ret
 }
 
-func (p *Pattern) iter(pfun algo.Algo, tokens []Token, caseSensitive bool, normalize bool, forward bool, pattern []rune, withPos bool, slab *util.Slab) (Offset, int, int, *[]int) {
+func (p *Pattern) iter(pfun algo.Algo, tokens []Token, caseSensitive bool, normalize bool, forward bool, pattern []rune, withPos bool, slab *util.Slab) (Offset, int, *[]int) {
 	for _, part := range tokens {
 		if res, pos := pfun(caseSensitive, normalize, forward, *part.text, pattern, withPos, slab); res.Start >= 0 {
 			sidx := int32(res.Start) + part.prefixLength
@@ -394,8 +391,8 @@ func (p *Pattern) iter(pfun algo.Algo, tokens []Token, caseSensitive bool, norma
 					(*pos)[idx] += int(part.prefixLength)
 				}
 			}
-			return Offset{sidx, eidx}, res.Score, int(part.trimLength), pos
+			return Offset{sidx, eidx}, res.Score, pos
 		}
 	}
-	return Offset{-1, -1}, 0, -1, nil
+	return Offset{-1, -1}, 0, nil
 }
diff --git a/src/result.go b/src/result.go
@@ -29,7 +29,7 @@ type Result struct {
 	rank rank
 }
 
-func buildResult(item *Item, offsets []Offset, score int, trimLen int) *Result {
+func buildResult(item *Item, offsets []Offset, score int) *Result {
 	if len(offsets) > 1 {
 		sort.Sort(ByOrder(offsets))
 	}
@@ -57,8 +57,7 @@ func buildResult(item *Item, offsets []Offset, score int, trimLen int) *Result {
 			// Higher is better
 			val = math.MaxUint16 - util.AsUint16(score)
 		case byLength:
-			// If offsets is empty, trimLen will be 0, but we don't care
-			val = util.AsUint16(trimLen)
+			val = util.AsUint16(int(item.TrimLength()))
 		case byBegin, byEnd:
 			if validOffsetFound {
 				whitePrefixLen := 0
@@ -72,7 +71,7 @@ func buildResult(item *Item, offsets []Offset, score int, trimLen int) *Result {
 				if criterion == byBegin {
 					val = util.AsUint16(minEnd - whitePrefixLen)
 				} else {
-					val = util.AsUint16(math.MaxUint16 - math.MaxUint16*(maxEnd-whitePrefixLen)/trimLen)
+					val = util.AsUint16(math.MaxUint16 - math.MaxUint16*(maxEnd-whitePrefixLen)/int(item.TrimLength()))
 				}
 			}
 		}

diff --git a/src/result_test.go b/src/result_test.go
@@ -52,7 +52,7 @@ func TestResultRank(t *testing.T) {
 	sortCriteria = []criterion{byScore, byLength}
 
 	strs := [][]rune{[]rune("foo"), []rune("foobar"), []rune("bar"), []rune("baz")}
-	item1 := buildResult(&Item{text: util.RunesToChars(strs[0]), index: 1}, []Offset{}, 2, 3)
+	item1 := buildResult(&Item{text: util.RunesToChars(strs[0]), index: 1, trimLength: -1}, []Offset{}, 2)
 	if item1.rank.points[0] != math.MaxUint16-2 || // Bonus
 		item1.rank.points[1] != 3 || // Length
 		item1.rank.points[2] != 0 || // Unused
@@ -61,7 +61,7 @@ func TestResultRank(t *testing.T) {
 		t.Error(item1.rank)
 	}
 	// Only differ in index
-	item2 := buildResult(&Item{text: util.RunesToChars(strs[0])}, []Offset{}, 2, 3)
+	item2 := buildResult(&Item{text: util.RunesToChars(strs[0])}, []Offset{}, 2)
 
 	items := []*Result{item1, item2}
 	sort.Sort(ByRelevance(items))
@@ -77,10 +77,10 @@ func TestResultRank(t *testing.T) {
 	}
 
 	// Sort by relevance
-	item3 := buildResult(&Item{index: 2}, []Offset{Offset{1, 3}, Offset{5, 7}}, 3, 0)
-	item4 := buildResult(&Item{index: 2}, []Offset{Offset{1, 2}, Offset{6, 7}}, 4, 0)
-	item5 := buildResult(&Item{index: 2}, []Offset{Offset{1, 3}, Offset{5, 7}}, 5, 0)
-	item6 := buildResult(&Item{index: 2}, []Offset{Offset{1, 2}, Offset{6, 7}}, 6, 0)
+	item3 := buildResult(&Item{index: 2}, []Offset{Offset{1, 3}, Offset{5, 7}}, 3)
+	item4 := buildResult(&Item{index: 2}, []Offset{Offset{1, 2}, Offset{6, 7}}, 4)
+	item5 := buildResult(&Item{index: 2}, []Offset{Offset{1, 3}, Offset{5, 7}}, 5)
+	item6 := buildResult(&Item{index: 2}, []Offset{Offset{1, 2}, Offset{6, 7}}, 6)
 	items = []*Result{item1, item2, item3, item4, item5, item6}
 	sort.Sort(ByRelevance(items))
 	if !(items[0] == item6 && items[1] == item5 &&

diff --git a/src/tokenizer.go b/src/tokenizer.go
@@ -20,7 +20,6 @@ type Range struct {
 type Token struct {
 	text         *util.Chars
 	prefixLength int32
-	trimLength   int32
 }
 
 // Delimiter for tokenizing the input
@@ -81,7 +80,7 @@ func withPrefixLengths(tokens []util.Chars, begin int) []Token {
 	prefixLength := begin
 	for idx, token := range tokens {
 		// NOTE: &tokens[idx] instead of &tokens
-		ret[idx] = Token{&tokens[idx], int32(prefixLength), int32(token.TrimLength())}
+		ret[idx] = Token{&tokens[idx], int32(prefixLength)}
 		prefixLength += token.Length()
 	}
 	return ret
@@ -242,7 +241,7 @@ func Transform(tokens []Token, withNth []Range) []Token {
 		} else {
 			prefixLength = 0
 		}
-		transTokens[idx] = Token{&merged, prefixLength, int32(merged.TrimLength())}
+		transTokens[idx] = Token{&merged, prefixLength}
 	}
 	return transTokens
 }
diff --git a/src/tokenizer_test.go b/src/tokenizer_test.go
@@ -48,22 +48,22 @@ func TestTokenize(t *testing.T) {
 	// AWK-style
 	input := "  abc:  def:  ghi  "
 	tokens := Tokenize(util.RunesToChars([]rune(input)), Delimiter{})
-	if tokens[0].text.ToString() != "abc:  " || tokens[0].prefixLength != 2 || tokens[0].trimLength != 4 {
+	if tokens[0].text.ToString() != "abc:  " || tokens[0].prefixLength != 2 {
 		t.Errorf("%s", tokens)
 	}
 
 	// With delimiter
 	tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp(":"))
-	if tokens[0].text.ToString() != "  abc:" || tokens[0].prefixLength != 0 || tokens[0].trimLength != 4 {
+	if tokens[0].text.ToString() != "  abc:" || tokens[0].prefixLength != 0 {
 		t.Errorf("%s", tokens)
 	}
 
 	// With delimiter regex
 	tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp("\\s+"))
-	if tokens[0].text.ToString() != "  " || tokens[0].prefixLength != 0 || tokens[0].trimLength != 0 ||
-		tokens[1].text.ToString() != "abc:  " || tokens[1].prefixLength != 2 || tokens[1].trimLength != 4 ||
-		tokens[2].text.ToString() != "def:  " || tokens[2].prefixLength != 8 || tokens[2].trimLength != 4 ||
-		tokens[3].text.ToString() != "ghi  " || tokens[3].prefixLength != 14 || tokens[3].trimLength != 3 {
+	if tokens[0].text.ToString() != "  " || tokens[0].prefixLength != 0 ||
+		tokens[1].text.ToString() != "abc:  " || tokens[1].prefixLength != 2 ||
+		tokens[2].text.ToString() != "def:  " || tokens[2].prefixLength != 8 ||
+		tokens[3].text.ToString() != "ghi  " || tokens[3].prefixLength != 14 {
 		t.Errorf("%s", tokens)
 	}
 }

diff --git a/test/test_go.rb b/test/test_go.rb
@@ -683,62 +683,10 @@ def test_tiebreak_length_with_nth
     ]
     assert_equal output, `#{FZF} -fh < #{tempname}`.split($/)
 
-    output = %w[
-      1234567:h
-      12345:he
-      1:hell
-      123:hello
-    ]
+    # Since 0.16.8, --nth doesn't affect --tiebreak
     assert_equal output, `#{FZF} -fh -n2 -d: < #{tempname}`.split($/)
   end
 
-  def test_tiebreak_length_with_nth_trim_length
-    input = [
-      "apple juice   bottle 1",
-      "apple  ui     bottle 2",
-      "app     ice   bottle 3",
-      "app     ic    bottle 4",
-    ]
-    writelines tempname, input
-
-    # len(1)
-    output = [
-      "app     ice   bottle 3",
-      "app     ic    bottle 4",
-      "apple juice   bottle 1",
-      "apple  ui     bottle 2",
-    ]
-    assert_equal output, `#{FZF} -fa -n1 < #{tempname}`.split($/)
-
-    # len(1 ~ 2)
-    output = [
-      "app     ic    bottle 4",
-      "app     ice   bottle 3",
-      "apple  ui     bottle 2",
-      "apple juice   bottle 1",
-    ]
-    assert_equal output, `#{FZF} -fai -n1..2 < #{tempname}`.split($/)
-
-    # len(1) + len(2)
-    output = [
-      "app     ic    bottle 4",
-      "app     ice   bottle 3",
-      "apple  ui     bottle 2",
-      "apple juice   bottle 1",
-    ]
-    assert_equal output, `#{FZF} -x -f"a i" -n1,2 < #{tempname}`.split($/)
-
-    # len(2)
-    output = [
-      "app     ic    bottle 4",
-      "app     ice   bottle 3",
-      "apple  ui     bottle 2",
-      "apple juice   bottle 1",
-    ]
-    assert_equal output, `#{FZF} -fi -n2 < #{tempname}`.split($/)
-    assert_equal output, `#{FZF} -fi -n2,1..2 < #{tempname}`.split($/)
-  end
-
   def test_invalid_cache
     tmux.send_keys "(echo d; echo D; echo x) | #{fzf '-q d'}", :Enter
     tmux.until { |lines| lines[-2].include? '2/3' }