Skip to content

Commit

Permalink
When unsorted, Walk uses Scanner to enumerate directories
Browse files Browse the repository at this point in the history
Slightly smaller memory footprint and fewer allocations. Some room for
improvement, although this is enough to capture what I hoped to do
with this library for the past few months.

```
go test -bench=. -benchmem
goos: linux
goarch: amd64
pkg: github.com/karrick/godirwalk
BenchmarkReadDirnamesStandardLibrary-8   	   50000	     29312 ns/op	   10240 B/op	      10 allocs/op
BenchmarkReadDirnamesThisLibrary-8       	  100000	     14611 ns/op	    4560 B/op	      12 allocs/op
BenchmarkFilepathWalk-8                  	      10	 207360158 ns/op	46235054 B/op	   86129 allocs/op
BenchmarkGodirwalk-8                     	      10	 138169018 ns/op	20777411 B/op	   93295 allocs/op
BenchmarkGodirwalkUnsorted-8             	      20	 130196966 ns/op	20293127 B/op	   78388 allocs/op
BenchmarkFlameGraphFilepathWalk-8        	       1	2456320457 ns/op	462351888 B/op	  861292 allocs/op
BenchmarkFlameGraphGodirwalk-8           	       1	1373471992 ns/op	207774112 B/op	  932952 allocs/op
PASS
ok  	github.com/karrick/godirwalk	13.829s
```
  • Loading branch information
karrick committed Sep 27, 2019
1 parent dc39e36 commit 74f3b4a
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 13 deletions.
14 changes: 14 additions & 0 deletions debug_development.go
@@ -0,0 +1,14 @@
// +build godirwalk_debug

package godirwalk

import (
"fmt"
"os"
)

// debug formats and prints arguments to stderr for development builds
func debug(f string, a ...interface{}) {
// fmt.Fprintf(os.Stderr, f, a...)
os.Stderr.Write([]byte("godirwalk: " + fmt.Sprintf(f, a...)))
}
6 changes: 6 additions & 0 deletions debug_release.go
@@ -0,0 +1,6 @@
// +build !godirwalk_debug

package godirwalk

// debug is a no-op for release builds
func debug(_ string, _ ...interface{}) {}
27 changes: 27 additions & 0 deletions scanner.go
@@ -0,0 +1,27 @@
package godirwalk

type scanner interface {
Err() error
Scan() bool
Dirent() (*Dirent, error)
}

type dirents struct {
dd []*Dirent
de *Dirent
}

func (d *dirents) Err() error {
d.dd, d.de = nil, nil
return nil
}

func (d *dirents) Dirent() (*Dirent, error) { return d.de, nil }

func (d *dirents) Scan() bool {
if len(d.dd) > 0 {
d.de, d.dd = d.dd[0], d.dd[1:]
return true
}
return false
}
53 changes: 40 additions & 13 deletions walk.go
Expand Up @@ -17,9 +17,9 @@ import (
const DefaultScratchBufferSize = 16 * 1024

// MinimumScratchBufferSize specifies the minimum size of the scratch buffer
// that Walk, ReadDirents, and ReadDirnames will use when reading file entries
// from the operating system. It is initialized to the result from calling
// `os.Getpagesize()` during program startup.
// that Walk, ReadDirents, ReadDirnames, and Scandir will use when reading file
// entries from the operating system. It is initialized to the result from
// calling `os.Getpagesize()` during program startup.
var MinimumScratchBufferSize = os.Getpagesize()

// Options provide parameters for how the Walk function operates.
Expand Down Expand Up @@ -259,21 +259,45 @@ func walk(osPathname string, dirent *Dirent, options *Options) error {

// If get here, then specified pathname refers to a directory or a
// symbolic link to a directory.
deChildren, err := ReadDirents(osPathname, options.ScratchBuffer)
if err != nil {
if action := options.ErrorCallback(osPathname, err); action == SkipNode {
return nil
}
return err
}

if !options.Unsorted {
sort.Sort(deChildren) // sort children entries unless upstream says to leave unsorted
var ds scanner

if options.Unsorted {
// When upstream does not request a sorted iteration, it's more memory
// efficient to read a single child at a time from the file system.
ds, err = NewScanner(osPathname)
if err != nil {
if action := options.ErrorCallback(osPathname, err); action == SkipNode {
return nil
}
return err
}
} else {
// When upstream wants a sorted iteration, we must read the entire
// directory and sort through the child names, and then iterate on each
// child.
deChildren, err := ReadDirents(osPathname, nil)
if err != nil {
if action := options.ErrorCallback(osPathname, err); action == SkipNode {
return nil
}
return err
}
sort.Sort(deChildren)
ds = &dirents{dd: deChildren}
}

for _, deChild := range deChildren {
for ds.Scan() {
deChild, err := ds.Dirent()
osChildname := filepath.Join(osPathname, deChild.name)
if err != nil {
if action := options.ErrorCallback(osChildname, err); action == SkipNode {
return nil
}
return err
}
err = walk(osChildname, deChild, options)
debug("osChildname: %q; error: %v\n", osChildname, err)
if err == nil {
continue
}
Expand All @@ -296,6 +320,9 @@ func walk(osPathname string, dirent *Dirent, options *Options) error {
}
// continue processing remaining siblings
}
if err = ds.Err(); err != nil {
return err
}

if options.PostChildrenCallback == nil {
return nil
Expand Down
42 changes: 42 additions & 0 deletions walk_test.go
Expand Up @@ -3,6 +3,7 @@ package godirwalk
import (
"os"
"path/filepath"
"sort"
"testing"
)

Expand Down Expand Up @@ -40,12 +41,31 @@ func godirwalkWalk(tb testing.TB, osDirname string) []string {
return entries
}

func godirwalkWalkUnsorted(tb testing.TB, osDirname string) []string {
tb.Helper()
var entries []string
err := Walk(osDirname, &Options{
ScratchBuffer: testScratchBuffer,
Callback: func(osPathname string, dirent *Dirent) error {
if dirent.Name() == "skip" {
return filepath.SkipDir
}
entries = append(entries, filepath.FromSlash(osPathname))
return nil
},
Unsorted: true,
})
ensureError(tb, err)
return entries
}

// Ensure the results from calling this library's Walk function exactly match
// those returned by filepath.Walk
func ensureSameAsStandardLibrary(tb testing.TB, osDirname string) {
tb.Helper()
osDirname = filepath.Join(testRoot, osDirname)
actual := godirwalkWalk(tb, osDirname)
sort.Strings(actual)
expected := filepathWalk(tb, osDirname)
ensureStringSlicesMatch(tb, actual, expected)
}
Expand All @@ -60,6 +80,19 @@ func TestWalkCompatibleWithFilepathWalk(t *testing.T) {
t.Run("test root", func(t *testing.T) {
ensureSameAsStandardLibrary(t, "d0")
})
t.Run("ignore skips", func(t *testing.T) {
// When filepath.SkipDir is returned, the remainder of the children in a
// directory are not visited. This causes results to be different when
// visiting in lexicographical order or natural order. For this test, we
// want to ensure godirwalk can optimize traversals when unsorted using
// the Scanner, but recognize that we cannot test against standard
// library when we skip any nodes within it.
osDirname := filepath.Join(testRoot, "d0/d1")
actual := godirwalkWalkUnsorted(t, osDirname)
sort.Strings(actual)
expected := filepathWalk(t, osDirname)
ensureStringSlicesMatch(t, actual, expected)
})
}

// Test cases for encountering the filepath.SkipDir error at different
Expand Down Expand Up @@ -270,6 +303,15 @@ func BenchmarkGodirwalk(b *testing.B) {
}
}

func BenchmarkGodirwalkUnsorted(b *testing.B) {
if testing.Short() {
b.Skip("Skipping benchmark using user's Go source directory")
}
for i := 0; i < b.N; i++ {
_ = godirwalkWalkUnsorted(b, goPrefix)
}
}

func BenchmarkFlameGraphFilepathWalk(b *testing.B) {
for i := 0; i < flameIterations; i++ {
_ = filepathWalk(b, goPrefix)
Expand Down

0 comments on commit 74f3b4a

Please sign in to comment.