Skip to content

Commit

Permalink
Experimental word-boundary ranking algorithm
Browse files Browse the repository at this point in the history
See the comment in algo/algo.go for a description.
  • Loading branch information
mjwestcott committed Apr 1, 2016
1 parent 8d6e13b commit 60c9d84
Show file tree
Hide file tree
Showing 16 changed files with 206 additions and 96 deletions.
130 changes: 106 additions & 24 deletions src/algo/algo.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import (
"strings"
"unicode"

"github.com/junegunn/fzf/src/util"
"github.com/mjwestcott/fzf/src/util"
)

/*
Expand All @@ -22,10 +22,41 @@ func runeAt(runes []rune, index int, max int, forward bool) rune {
return runes[max-index-1]
}

// Result conatins the results of running a match function.
type Result struct {
Start int
End int

// Every result is assigned a penalty based on the distances of the
// matching runes from the beginning of its containing word. The basic
// idea is to assign values to each rune in the input text. Then,
// add up those values which are matched by the pattern. The only nuance
// is that consecutive matches have no penalty.
//
// input "Hello, world! This is a test."
// values 12345--12345--1234-12-1-1234-
// pattern wo th tes
// penalties 10 10 100
// total = 3
//
// Now an example that should be heavily penalized because many of the
// matches occur in the middle of words:
//
// input "/usr/jg/repos/go/src/github.com/junegunn"
// values -123-12-12345-12-123-123456-123-12345678
// pattern s p g git gunn
// penalties 2 3 1 100 5000
// total = 12
//
// We can then decide how to use that penalty when ranking items. One
// simple and effective idea is to rank according to matchlen + penalty.
Penalty int32
}

// FuzzyMatch performs fuzzy-match
func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) (int, int) {
func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) *Result {
if len(pattern) == 0 {
return 0, 0
return &Result{0, 0, 0}
}

// 0. (FIXME) How to find the shortest match?
Expand All @@ -46,6 +77,7 @@ func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune)

for index := range runes {
char := runeAt(runes, index, lenRunes, forward)

// This is considerably faster than blindly applying strings.ToLower to the
// whole string
if !caseSensitive {
Expand Down Expand Up @@ -90,12 +122,57 @@ func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune)
}
}
}

// Calculate the penalty. This can't be done at the same time as the
// pattern scan above because 'forward' may be false.
var fromBoundary int32
var totalPenalty int32
var consecutive bool
var pidx int

// We can think about how to start closer to sidx.
for index := 0; index < eidx; index++ {
var penalty int32

// Calculate current rune penalty.
char := runes[index]
if unicode.IsLetter(char) || unicode.IsNumber(char) {
fromBoundary++
penalty = fromBoundary
} else {
fromBoundary = 0
}

// Calculate totalPenalty of the match.
if index >= sidx {
if !caseSensitive {
if char >= 'A' && char <= 'Z' {
char += 32
} else if char > unicode.MaxASCII {
char = unicode.To(unicode.LowerCase, char)
}
}
pchar := pattern[pidx]
if pchar == char {
if !consecutive {
totalPenalty += penalty
}
if pidx++; pidx == lenPattern {
break
}
consecutive = true
} else {
consecutive = false
}
}
}

if forward {
return sidx, eidx
return &Result{sidx, eidx, totalPenalty}
}
return lenRunes - eidx, lenRunes - sidx
return &Result{lenRunes - eidx, lenRunes - sidx, totalPenalty}
}
return -1, -1
return &Result{-1, -1, 0}
}

// ExactMatchNaive is a basic string searching algorithm that handles case
Expand All @@ -105,16 +182,17 @@ func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune)
//
// We might try to implement better algorithms in the future:
// http://en.wikipedia.org/wiki/String_searching_algorithm
func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []rune) (int, int) {
func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []rune) *Result {
// Note: ExactMatchNaive always return a zero penalty.
if len(pattern) == 0 {
return 0, 0
return &Result{0, 0, 0}
}

lenRunes := len(runes)
lenPattern := len(pattern)

if lenRunes < lenPattern {
return -1, -1
return &Result{-1, -1, 0}
}

pidx := 0
Expand All @@ -132,22 +210,23 @@ func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []r
pidx++
if pidx == lenPattern {
if forward {
return index - lenPattern + 1, index + 1
return &Result{index - lenPattern + 1, index + 1, 0}
}
return lenRunes - (index + 1), lenRunes - (index - lenPattern + 1)
return &Result{lenRunes - (index + 1), lenRunes - (index - lenPattern + 1), 0}
}
} else {
index -= pidx
pidx = 0
}
}
return -1, -1
return &Result{-1, -1, 0}
}

// PrefixMatch performs prefix-match
func PrefixMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) (int, int) {
func PrefixMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) *Result {
// Note: PrefixMatch always return a zero penalty.
if len(runes) < len(pattern) {
return -1, -1
return &Result{-1, -1, 0}
}

for index, r := range pattern {
Expand All @@ -156,44 +235,47 @@ func PrefixMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune)
char = unicode.ToLower(char)
}
if char != r {
return -1, -1
return &Result{-1, -1, 0}
}
}
return 0, len(pattern)
return &Result{0, len(pattern), 0}
}

// SuffixMatch performs suffix-match
func SuffixMatch(caseSensitive bool, forward bool, input []rune, pattern []rune) (int, int) {
func SuffixMatch(caseSensitive bool, forward bool, input []rune, pattern []rune) *Result {
// Note: SuffixMatch always return a zero penalty.
runes := util.TrimRight(input)
trimmedLen := len(runes)
diff := trimmedLen - len(pattern)
if diff < 0 {
return -1, -1
return &Result{-1, -1, 0}
}

for index, r := range pattern {
char := runes[index+diff]

if !caseSensitive {
char = unicode.ToLower(char)
}
if char != r {
return -1, -1
return &Result{-1, -1, 0}
}
}
return trimmedLen - len(pattern), trimmedLen
return &Result{trimmedLen - len(pattern), trimmedLen, 0}
}

// EqualMatch performs equal-match
func EqualMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) (int, int) {
func EqualMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) *Result {
// Note: EqualMatch always return a zero penalty.
if len(runes) != len(pattern) {
return -1, -1
return &Result{-1, -1, 0}
}
runesStr := string(runes)
if !caseSensitive {
runesStr = strings.ToLower(runesStr)
}
if runesStr == string(pattern) {
return 0, len(pattern)
return &Result{0, len(pattern), 0}
}
return -1, -1
return &Result{-1, -1, 0}
}
71 changes: 44 additions & 27 deletions src/algo/algo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,65 +5,82 @@ import (
"testing"
)

func assertMatch(t *testing.T, fun func(bool, bool, []rune, []rune) (int, int), caseSensitive bool, forward bool, input string, pattern string, sidx int, eidx int) {
func assertMatch(t *testing.T, fun func(bool, bool, []rune, []rune) *Result, caseSensitive, forward bool, input, pattern string, sidx, eidx int, penalty int32) {
if !caseSensitive {
pattern = strings.ToLower(pattern)
}
s, e := fun(caseSensitive, forward, []rune(input), []rune(pattern))
if s != sidx {
t.Errorf("Invalid start index: %d (expected: %d, %s / %s)", s, sidx, input, pattern)
res := fun(caseSensitive, forward, []rune(input), []rune(pattern))
if res.Start != sidx {
t.Errorf("Invalid start index: %d (expected: %d, %s / %s)", res.Start, sidx, input, pattern)
}
if e != eidx {
t.Errorf("Invalid end index: %d (expected: %d, %s / %s)", e, eidx, input, pattern)
if res.End != eidx {
t.Errorf("Invalid end index: %d (expected: %d, %s / %s)", res.End, eidx, input, pattern)
}
if res.Penalty != penalty {
t.Errorf("Invalid penalty: %d (expected: %d, %s / %s)", res.Penalty, penalty, input, pattern)
}
}

func TestFuzzyMatch(t *testing.T) {
assertMatch(t, FuzzyMatch, false, true, "fooBarbaz", "oBZ", 2, 9)
assertMatch(t, FuzzyMatch, true, true, "fooBarbaz", "oBZ", -1, -1)
assertMatch(t, FuzzyMatch, true, true, "fooBarbaz", "oBz", 2, 9)
assertMatch(t, FuzzyMatch, true, true, "fooBarbaz", "fooBarbazz", -1, -1)
assertMatch(t, FuzzyMatch, false, true, "fooBarbaz", "oBZ", 2, 9, 12)
assertMatch(t, FuzzyMatch, true, true, "fooBarbaz", "oBZ", -1, -1, 0)
assertMatch(t, FuzzyMatch, true, true, "fooBarbaz", "oBz", 2, 9, 12)
assertMatch(t, FuzzyMatch, true, true, "fooBarbaz", "fooBarbazz", -1, -1, 0)

assertMatch(t, FuzzyMatch, false, true, "foo bar baz", "fbb", 0, 9, 3)
assertMatch(t, FuzzyMatch, false, true, "foo/bar/baz", "fbb", 0, 9, 3)
assertMatch(t, FuzzyMatch, false, true, "foo barbaz", "fbb", 0, 8, 6)
assertMatch(t, FuzzyMatch, false, true, "fooBar Baz", "foob", 0, 4, 1)
assertMatch(t, FuzzyMatch, true, true, "Foo Bar Baz", "fbb", -1, -1, 0)
assertMatch(t, FuzzyMatch, true, true, "Foo/Bar/Baz", "FBB", 0, 9, 3)
assertMatch(t, FuzzyMatch, true, true, "foo BarBaz", "fBB", 0, 8, 6)
assertMatch(t, FuzzyMatch, true, true, "FooBar Baz", "FooB", 0, 4, 1)
}

func TestFuzzyMatchBackward(t *testing.T) {
assertMatch(t, FuzzyMatch, false, true, "foobar fb", "fb", 0, 4)
assertMatch(t, FuzzyMatch, false, false, "foobar fb", "fb", 7, 9)
assertMatch(t, FuzzyMatch, false, true, "foobar fb", "fb", 0, 4, 5)
assertMatch(t, FuzzyMatch, false, false, "foobar fb", "fb", 7, 9, 1)
}

func TestExactMatchNaive(t *testing.T) {
for _, dir := range []bool{true, false} {
assertMatch(t, ExactMatchNaive, false, dir, "fooBarbaz", "oBA", 2, 5)
assertMatch(t, ExactMatchNaive, true, dir, "fooBarbaz", "oBA", -1, -1)
assertMatch(t, ExactMatchNaive, true, dir, "fooBarbaz", "fooBarbazz", -1, -1)
assertMatch(t, ExactMatchNaive, false, dir, "fooBarbaz", "oBA", 2, 5, 0)
assertMatch(t, ExactMatchNaive, true, dir, "fooBarbaz", "oBA", -1, -1, 0)
assertMatch(t, ExactMatchNaive, true, dir, "fooBarbaz", "fooBarbazz", -1, -1, 0)
}
}

func TestExactMatchNaiveBackward(t *testing.T) {
assertMatch(t, ExactMatchNaive, false, true, "foobar foob", "oo", 1, 3)
assertMatch(t, ExactMatchNaive, false, false, "foobar foob", "oo", 8, 10)
<<<<<<< 8d6e13bf94234addd3801f25de7d966d32d53133
assertMatch(t, ExactMatchNaive, false, true, "foobar foob", "oo", 1, 3, 0)
assertMatch(t, ExactMatchNaive, false, false, "foobar foob", "oo", 8, 10, 0)
=======
assertMatch(t, ExactMatchNaive, false, true, "foobar foob", "oo", 1, 3, 0)
assertMatch(t, ExactMatchNaive, false, false, "foobar foob", "oo", 8, 10, 0)
>>>>>>> Experimental word-boundary ranking algorithm
}

func TestPrefixMatch(t *testing.T) {
for _, dir := range []bool{true, false} {
assertMatch(t, PrefixMatch, false, dir, "fooBarbaz", "Foo", 0, 3)
assertMatch(t, PrefixMatch, true, dir, "fooBarbaz", "Foo", -1, -1)
assertMatch(t, PrefixMatch, false, dir, "fooBarbaz", "baz", -1, -1)
assertMatch(t, PrefixMatch, false, dir, "fooBarbaz", "Foo", 0, 3, 0)
assertMatch(t, PrefixMatch, true, dir, "fooBarbaz", "Foo", -1, -1, 0)
assertMatch(t, PrefixMatch, false, dir, "fooBarbaz", "baz", -1, -1, 0)
}
}

func TestSuffixMatch(t *testing.T) {
for _, dir := range []bool{true, false} {
assertMatch(t, SuffixMatch, false, dir, "fooBarbaz", "Foo", -1, -1)
assertMatch(t, SuffixMatch, false, dir, "fooBarbaz", "baz", 6, 9)
assertMatch(t, SuffixMatch, true, dir, "fooBarbaz", "Baz", -1, -1)
assertMatch(t, SuffixMatch, false, dir, "fooBarbaz", "Foo", -1, -1, 0)
assertMatch(t, SuffixMatch, false, dir, "fooBarbaz", "baz", 6, 9, 0)
assertMatch(t, SuffixMatch, true, dir, "fooBarbaz", "Baz", -1, -1, 0)
}
}

func TestEmptyPattern(t *testing.T) {
for _, dir := range []bool{true, false} {
assertMatch(t, FuzzyMatch, true, dir, "foobar", "", 0, 0)
assertMatch(t, ExactMatchNaive, true, dir, "foobar", "", 0, 0)
assertMatch(t, PrefixMatch, true, dir, "foobar", "", 0, 0)
assertMatch(t, SuffixMatch, true, dir, "foobar", "", 6, 6)
assertMatch(t, FuzzyMatch, true, dir, "foobar", "", 0, 0, 0)
assertMatch(t, ExactMatchNaive, true, dir, "foobar", "", 0, 0, 0)
assertMatch(t, PrefixMatch, true, dir, "foobar", "", 0, 0, 0)
assertMatch(t, SuffixMatch, true, dir, "foobar", "", 6, 6, 0)
}
}
2 changes: 1 addition & 1 deletion src/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package fzf
import (
"time"

"github.com/junegunn/fzf/src/util"
"github.com/mjwestcott/fzf/src/util"
)

const (
Expand Down
2 changes: 1 addition & 1 deletion src/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import (
"runtime"
"time"

"github.com/junegunn/fzf/src/util"
"github.com/mjwestcott/fzf/src/util"
)

func initProcs() {
Expand Down
2 changes: 1 addition & 1 deletion src/fzf/main.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package main

import "github.com/junegunn/fzf/src"
import "github.com/mjwestcott/fzf/src"

func main() {
fzf.Run(fzf.ParseOptions())
Expand Down
6 changes: 4 additions & 2 deletions src/item.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package fzf
import (
"math"

"github.com/junegunn/fzf/src/curses"
"github.com/mjwestcott/fzf/src/curses"
)

// Offset holds three 32-bit integers denoting the offsets of a matched substring
Expand All @@ -21,6 +21,7 @@ type Item struct {
origText *[]rune
transformed []Token
offsets []Offset
penalty int32
colors []ansiOffset
rank [5]int32
}
Expand Down Expand Up @@ -81,7 +82,8 @@ func (item *Item) Rank(cache bool) [5]int32 {
var val int32
switch criterion {
case byMatchLen:
val = int32(matchlen)
// A simple and effective way to incorporate the penalty.
val = int32(matchlen) + item.penalty
case byLength:
// It is guaranteed that .transformed in not null in normal execution
if item.transformed != nil {
Expand Down
Loading

0 comments on commit 60c9d84

Please sign in to comment.