Skip to content

Commit

Permalink
Merge pull request #75 from tw4452852/find_performance
Browse files Browse the repository at this point in the history
improve find performance
  • Loading branch information
monochromegane committed Jul 19, 2015
2 parents 51e7de8 + eaa84a8 commit 719cb42
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 17 deletions.
54 changes: 37 additions & 17 deletions find.go
Expand Up @@ -3,9 +3,10 @@ package the_platinum_searcher
import (
"io/ioutil"
"os"

"path/filepath"
"runtime"
"strings"
"sync"
)

type find struct {
Expand Down Expand Up @@ -62,7 +63,7 @@ func (f *find) findFile(root string, pattern *Pattern, done chan struct{}) {
}

ignores = append(ignores, genericIgnore(f.Option.Ignore))
Walk(root, ignores, f.Option.Follow, func(path string, info *FileInfo, depth int, ignores ignoreMatchers, err error) (error, ignoreMatchers) {
Walk(root, ignores, f.Option.Follow, f.Option.MultiFinder, func(path string, info *FileInfo, depth int, ignores ignoreMatchers, err error) (error, ignoreMatchers) {
if info.IsDir() {
if depth > f.Option.Depth+1 {
return filepath.SkipDir, ignores
Expand Down Expand Up @@ -111,22 +112,33 @@ func (f *find) findFile(root string, pattern *Pattern, done chan struct{}) {

type WalkFunc func(path string, info *FileInfo, depth int, ignores ignoreMatchers, err error) (error, ignoreMatchers)

func Walk(root string, ignores ignoreMatchers, follow bool, walkFn WalkFunc) error {
func Walk(root string, ignores ignoreMatchers, follow, multiFinder bool, walkFn WalkFunc) error {
info, err := os.Lstat(root)
fileInfo := newFileInfo(root, info, follow)
if err != nil {
walkError, _ := walkFn(root, fileInfo, 1, nil, err)
return walkError
}
return walk(root, fileInfo, 1, ignores, walkFn)

var pool chan struct{}
if multiFinder {
pool = make(chan struct{}, runtime.NumCPU())
}
waiter := &sync.WaitGroup{}
err = walk(root, fileInfo, 1, ignores, walkFn, waiter, pool)
waiter.Wait()
return err
}

func walkOnGoRoutine(path string, info *FileInfo, notify chan int, depth int, parentIgnore ignoreMatchers, walkFn WalkFunc) {
walk(path, info, depth, parentIgnore, walkFn)
notify <- 0
func walkOnGoRoutine(path string, info *FileInfo, depth int, parentIgnore ignoreMatchers, walkFn WalkFunc, waiter *sync.WaitGroup, pool chan struct{}) {
walk(path, info, depth, parentIgnore, walkFn, waiter, pool)
if pool != nil {
<-pool
}
waiter.Done()
}

func walk(path string, info *FileInfo, depth int, parentIgnores ignoreMatchers, walkFn WalkFunc) error {
func walk(path string, info *FileInfo, depth int, parentIgnores ignoreMatchers, walkFn WalkFunc, waiter *sync.WaitGroup, pool chan struct{}) error {
err, ig := walkFn(path, info, depth, parentIgnores, nil)
if err != nil {
if info.IsDir() && err == filepath.SkipDir {
Expand All @@ -146,21 +158,29 @@ func walk(path string, info *FileInfo, depth int, parentIgnores ignoreMatchers,
}

depth++
notify := make(chan int, len(list))
for _, l := range list {
fileInfo := newFileInfo(path, l, info.follow)
if isDirectRoot(depth) {
go walkOnGoRoutine(filepath.Join(path, fileInfo.Name()), fileInfo, notify, depth, ig, walkFn)

// normal mode(pool == nil): spawn goroutine on DirectRoot
// multiple finder mode(pool != nil): spawn goroutine as many as possible
if pool == nil {
if isDirectRoot(depth) {
waiter.Add(1)
go walkOnGoRoutine(filepath.Join(path, fileInfo.Name()), fileInfo, depth, ig, walkFn, waiter, pool)
} else {
walk(filepath.Join(path, fileInfo.Name()), fileInfo, depth, ig, walkFn, waiter, pool)
}
} else {
walk(filepath.Join(path, fileInfo.Name()), fileInfo, depth, ig, walkFn)
}
}
if isDirectRoot(depth) {
for i := 0; i < cap(notify); i++ {
<-notify
select {
case pool <- struct{}{}:
waiter.Add(1)
go walkOnGoRoutine(filepath.Join(path, fileInfo.Name()), fileInfo, depth, ig, walkFn, waiter, pool)
default:
walk(filepath.Join(path, fileInfo.Name()), fileInfo, depth, ig, walkFn, waiter, pool)
}
}
}

return nil
}

Expand Down
27 changes: 27 additions & 0 deletions find_test.go
Expand Up @@ -50,6 +50,33 @@ func TestFind(t *testing.T) {
}
}

func TestMultiFind(t *testing.T) {
out := make(chan *GrepParams)
opt := defaultOpts()
opt.MultiFinder = true
find := find{out, opt}
go find.Start([]string{"files"}, &Pattern{Pattern: "go"})

testPath := mkFoundPaths(out)

// Ensure these files were not returned
if e := ".hidden/hidden.txt"; testPath(e) {
t.Errorf("Found %s, It should not contains file under hidden directory.", e)
}
if e := "binary/binary.bin"; testPath(e) {
t.Errorf("%s should be text file.", e)
}

// Enumerate found paths and ensure a couple of them are in there.
if e := "files/ascii.txt"; !testPath(e) {
t.Errorf("Find failed to locate: %s", e)
}

if e := "files/depth/file_1.txt"; !testPath(e) {
t.Errorf("Find failed to locate: %s", e)
}
}

func TestFindWithHidden(t *testing.T) {
out := make(chan *GrepParams)
find := find{out, &Option{Hidden: true}}
Expand Down
1 change: 1 addition & 0 deletions option.go
Expand Up @@ -30,6 +30,7 @@ type Option struct {
WordRegexp bool `short:"w" long:"word-regexp" description:"Only match whole words"`
Proc int // Number of goroutine. Not user option.
Stats bool `long:"stats" description:"Print stats about files scanned, time taken, etc"`
MultiFinder bool `long:"multi-finder" description:"Use as many concurrent finders as possible, this will lead the result disorder"`
Version bool `long:"version" description:"Show version"`
}

Expand Down

0 comments on commit 719cb42

Please sign in to comment.