diff --git a/prefix/match.go b/prefix/match.go deleted file mode 100644 index 57e745c..0000000 --- a/prefix/match.go +++ /dev/null @@ -1,45 +0,0 @@ -package prefix - -import ( - "path" - "path/filepath" - "strings" -) - -// Match matches a path against a pattern. It returns m = true if the path -// matches the pattern, and partial = true if the pattern has more separators -// than the path and the common components match (for example, name = foo and -// pattern = foo/bar/*). slashSeparator determines whether the path and pattern -// are '/' delimited (true) or use the native path separator (false). -func Match(pattern, name string, slashSeparator bool) (m bool, partial bool) { - separator := filepath.Separator - if slashSeparator { - separator = '/' - } - count := strings.Count(name, string(separator)) - if strings.Count(pattern, string(separator)) > count { - pattern = trimUntilIndex(pattern, string(separator), count) - partial = true - } - if slashSeparator { - m, _ = path.Match(pattern, name) - } else { - m, _ = filepath.Match(pattern, name) - } - return m, partial -} - -func trimUntilIndex(str, sep string, count int) string { - s := str - i := 0 - c := 0 - for { - idx := strings.Index(s, sep) - s = s[idx+len(sep):] - i += idx + len(sep) - c++ - if c > count { - return str[:i-len(sep)] - } - } -} diff --git a/prefix/match_test.go b/prefix/match_test.go deleted file mode 100644 index bbb253b..0000000 --- a/prefix/match_test.go +++ /dev/null @@ -1,57 +0,0 @@ -package prefix - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestMatch(t *testing.T) { - ok, partial := Match("foo", "foo", false) - assert.Equal(t, true, ok) - assert.Equal(t, false, partial) - - ok, partial = Match("foo/bar/baz", "foo", false) - assert.Equal(t, true, ok) - assert.Equal(t, true, partial) - - ok, partial = Match("foo/bar/baz", "foo/bar", false) - assert.Equal(t, true, ok) - assert.Equal(t, true, partial) - - ok, partial = Match("foo/bar/baz", "foo/bax", false) - assert.Equal(t, false, ok) - assert.Equal(t, true, partial) - - ok, partial = Match("foo/bar/baz", "foo/bar/baz", false) - assert.Equal(t, true, ok) - assert.Equal(t, false, partial) - - ok, partial = Match("f*", "foo", false) - assert.Equal(t, true, ok) - assert.Equal(t, false, partial) - - ok, partial = Match("foo/bar/*", "foo", false) - assert.Equal(t, true, ok) - assert.Equal(t, true, partial) - - ok, partial = Match("foo/*/baz", "foo", false) - assert.Equal(t, true, ok) - assert.Equal(t, true, partial) - - ok, partial = Match("*/*/baz", "foo", false) - assert.Equal(t, true, ok) - assert.Equal(t, true, partial) - - ok, partial = Match("*/bar/baz", "foo/bar", false) - assert.Equal(t, true, ok) - assert.Equal(t, true, partial) - - ok, partial = Match("*/bar/baz", "foo/bax", false) - assert.Equal(t, false, ok) - assert.Equal(t, true, partial) - - ok, partial = Match("*/*/baz", "foo/bar/baz", false) - assert.Equal(t, true, ok) - assert.Equal(t, false, partial) -} diff --git a/walker.go b/walker.go index 2eb1bc0..f93434d 100644 --- a/walker.go +++ b/walker.go @@ -10,7 +10,6 @@ import ( "github.com/docker/docker/pkg/fileutils" "github.com/pkg/errors" - "github.com/tonistiigi/fsutil/prefix" "github.com/tonistiigi/fsutil/types" ) @@ -36,20 +35,15 @@ func Walk(ctx context.Context, p string, opt *WalkOpt, fn filepath.WalkFunc) err return errors.WithStack(&os.PathError{Op: "walk", Path: root, Err: syscall.ENOTDIR}) } - var pm *fileutils.PatternMatcher - if opt != nil && opt.ExcludePatterns != nil { - pm, err = fileutils.NewPatternMatcher(opt.ExcludePatterns) - if err != nil { - return errors.Wrapf(err, "invalid excludepatterns: %s", opt.ExcludePatterns) - } - } + var ( + includePatterns []string + includeMatcher *fileutils.PatternMatcher + excludeMatcher *fileutils.PatternMatcher + ) - var includePatterns []string if opt != nil && opt.IncludePatterns != nil { includePatterns = make([]string, len(opt.IncludePatterns)) - for k := range opt.IncludePatterns { - includePatterns[k] = filepath.Clean(opt.IncludePatterns[k]) - } + copy(includePatterns, opt.IncludePatterns) } if opt != nil && opt.FollowPaths != nil { targets, err := FollowLinks(p, opt.FollowPaths) @@ -61,13 +55,32 @@ func Walk(ctx context.Context, p string, opt *WalkOpt, fn filepath.WalkFunc) err includePatterns = dedupePaths(includePatterns) } } + if len(includePatterns) != 0 { + includeMatcher, err = fileutils.NewPatternMatcher(includePatterns) + if err != nil { + return errors.Wrapf(err, "invalid includepatterns: %s", opt.IncludePatterns) + } + } - var ( - lastIncludedDir string + if opt != nil && opt.ExcludePatterns != nil { + excludeMatcher, err = fileutils.NewPatternMatcher(opt.ExcludePatterns) + if err != nil { + return errors.Wrapf(err, "invalid excludepatterns: %s", opt.ExcludePatterns) + } + } - parentDirs []string // used only for exclude handling - parentMatchedExclude []bool - ) + type visitedDir struct { + fi os.FileInfo + path string + origpath string + pathWithSep string + matchedInclude bool + matchedExclude bool + calledFn bool + } + + // used only for include/exclude handling + var parentDirs []visitedDir seenFiles := make(map[uint64]string) return filepath.Walk(root, func(path string, fi os.FileInfo, err error) (retErr error) { @@ -90,87 +103,84 @@ func Walk(ctx context.Context, p string, opt *WalkOpt, fn filepath.WalkFunc) err return nil } - if opt != nil { - if includePatterns != nil { - skip := false - if lastIncludedDir != "" { - if strings.HasPrefix(path, lastIncludedDir+string(filepath.Separator)) { - skip = true - } - } + var dir visitedDir - if !skip { - matched := false - partial := true - for _, pattern := range includePatterns { - if ok, p := prefix.Match(pattern, path, false); ok { - matched = true - if !p { - partial = false - break - } - } - } - if !matched { - if fi.IsDir() { - return filepath.SkipDir - } - return nil - } - if !partial && fi.IsDir() { - lastIncludedDir = path - } + if includeMatcher != nil || excludeMatcher != nil { + for len(parentDirs) != 0 { + lastParentDir := parentDirs[len(parentDirs)-1].pathWithSep + if strings.HasPrefix(path, lastParentDir) { + break } + parentDirs = parentDirs[:len(parentDirs)-1] } - if pm != nil { - for len(parentMatchedExclude) != 0 { - lastParentDir := parentDirs[len(parentDirs)-1] - if strings.HasPrefix(path, lastParentDir) { - break - } - parentDirs = parentDirs[:len(parentDirs)-1] - parentMatchedExclude = parentMatchedExclude[:len(parentMatchedExclude)-1] - } - var m bool - if len(parentMatchedExclude) != 0 { - m, err = pm.MatchesUsingParentResult(path, parentMatchedExclude[len(parentMatchedExclude)-1]) - } else { - m, err = pm.MatchesOrParentMatches(path) + if fi.IsDir() { + dir = visitedDir{ + fi: fi, + path: path, + origpath: origpath, + pathWithSep: path + string(filepath.Separator), } - if err != nil { - return errors.Wrap(err, "failed to match excludepatterns") + } + } + + skip := false + + if includeMatcher != nil { + var parentMatchedInclude *bool + if len(parentDirs) != 0 { + parentMatchedInclude = &parentDirs[len(parentDirs)-1].matchedInclude + } + m, err := matchesPatterns(includeMatcher, path, parentMatchedInclude) + if err != nil { + return errors.Wrap(err, "failed to match includepatterns") + } + + if fi.IsDir() { + dir.matchedInclude = m + } + + if !m { + skip = true + } + } + + if excludeMatcher != nil { + var parentMatchedExclude *bool + if len(parentDirs) != 0 { + parentMatchedExclude = &parentDirs[len(parentDirs)-1].matchedExclude + } + m, err := matchesPatterns(excludeMatcher, path, parentMatchedExclude) + if err != nil { + return errors.Wrap(err, "failed to match excludepatterns") + } + + if fi.IsDir() { + dir.matchedExclude = m + } + + if m { + if fi.IsDir() && !excludeMatcher.Exclusions() { + return filepath.SkipDir } + skip = true + } + } - var dirSlash string + if includeMatcher != nil || excludeMatcher != nil { + defer func() { if fi.IsDir() { - dirSlash = path + string(filepath.Separator) - parentDirs = append(parentDirs, dirSlash) - parentMatchedExclude = append(parentMatchedExclude, m) + parentDirs = append(parentDirs, dir) } + }() + } - if m { - if fi.IsDir() { - if !pm.Exclusions() { - return filepath.SkipDir - } - for _, pat := range pm.Patterns() { - if !pat.Exclusion() { - continue - } - patStr := pat.String() + string(filepath.Separator) - if strings.HasPrefix(patStr, dirSlash) { - goto passedFilter - } - } - return filepath.SkipDir - } - return nil - } - } + if skip { + return nil } - passedFilter: + dir.calledFn = true + stat, err := mkstat(origpath, path, fi, seenFiles) if err != nil { return err @@ -185,6 +195,31 @@ func Walk(ctx context.Context, p string, opt *WalkOpt, fn filepath.WalkFunc) err return nil } } + for i, parentDir := range parentDirs { + if parentDir.calledFn { + continue + } + parentStat, err := mkstat(parentDir.origpath, parentDir.path, parentDir.fi, seenFiles) + if err != nil { + return err + } + + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + if opt != nil && opt.Map != nil { + if allowed := opt.Map(parentStat.Path, parentStat); !allowed { + continue + } + } + + if err := fn(parentStat.Path, &StatInfo{parentStat}, nil); err != nil { + return err + } + parentDirs[i].calledFn = true + } if err := fn(stat.Path, &StatInfo{stat}, nil); err != nil { return err } @@ -193,6 +228,23 @@ func Walk(ctx context.Context, p string, opt *WalkOpt, fn filepath.WalkFunc) err }) } +func matchesPatterns(pm *fileutils.PatternMatcher, path string, parentMatched *bool) (bool, error) { + var ( + m bool + err error + ) + if parentMatched != nil { + m, err = pm.MatchesUsingParentResult(path, *parentMatched) + } else { + m, err = pm.MatchesOrParentMatches(path) + } + if err != nil { + return false, err + } + + return m, nil +} + type StatInfo struct { *types.Stat } diff --git a/walker_test.go b/walker_test.go index 8b9f2d3..2b71827 100644 --- a/walker_test.go +++ b/walker_test.go @@ -87,8 +87,7 @@ file bar/foo }, bufWalk(b)) assert.NoError(t, err) - assert.Equal(t, `dir bar -`, string(b.Bytes())) + assert.Empty(t, b.Bytes()) b.Reset() err = Walk(context.Background(), d, &WalkOpt{ @@ -305,3 +304,357 @@ func tmpDir(inp []*change) (dir string, retErr error) { } return tmpdir, nil } + +func BenchmarkWalker(b *testing.B) { + for _, scenario := range []struct { + maxDepth int + pattern string + expected int + }{{ + maxDepth: 1, + pattern: "target", + expected: 1, + }, { + maxDepth: 1, + pattern: "**/target", + expected: 1, + }, { + maxDepth: 2, + pattern: "*/target", + expected: 52, + }, { + maxDepth: 2, + pattern: "**/target", + expected: 52, + }, { + maxDepth: 3, + pattern: "*/*/target", + expected: 1378, + }, { + maxDepth: 3, + pattern: "**/target", + expected: 1378, + }, { + maxDepth: 4, + pattern: "*/*/*/target", + expected: 2794, + }, { + maxDepth: 4, + pattern: "**/target", + expected: 2794, + }, { + maxDepth: 5, + pattern: "*/*/*/*/target", + expected: 1405, + }, { + maxDepth: 5, + pattern: "**/target", + expected: 1405, + }, { + maxDepth: 6, + pattern: "*/*/*/*/*/target", + expected: 2388, + }, { + maxDepth: 6, + pattern: "**/target", + expected: 2388, + }} { + scenario := scenario // copy loop var + b.Run(fmt.Sprintf("[%d]-%s", scenario.maxDepth, scenario.pattern), func(b *testing.B) { + tmpdir, err := ioutil.TempDir("", "walk") + if err != nil { + b.Error(err) + } + defer func() { + b.StopTimer() + os.RemoveAll(tmpdir) + }() + mkBenchTree(tmpdir, scenario.maxDepth, 1) + + // don't include time to setup dirs in benchmark + b.ResetTimer() + + for i := 0; i < b.N; i++ { + count := 0 + err = Walk(context.Background(), tmpdir, &WalkOpt{ + IncludePatterns: []string{scenario.pattern}, + }, func(path string, fi os.FileInfo, err error) error { + count++ + return nil + }) + if err != nil { + b.Error(err) + } + if count != scenario.expected { + b.Errorf("Got count %d, expected %d", count, scenario.expected) + } + } + }) + } + +} + +func TestWalkerDoublestarInclude(t *testing.T) { + d, err := tmpDir(changeStream([]string{ + "ADD a dir", + "ADD a/b dir", + "ADD a/b/baz dir", + "ADD a/b/bar dir ", + "ADD a/b/bar/foo file", + "ADD a/b/bar/fop file", + "ADD bar dir", + "ADD bar/foo file", + "ADD baz dir", + "ADD foo2 file", + "ADD foo dir", + "ADD foo/bar dir", + "ADD foo/bar/bee file", + })) + + assert.NoError(t, err) + defer os.RemoveAll(d) + b := &bytes.Buffer{} + err = Walk(context.Background(), d, &WalkOpt{ + IncludePatterns: []string{"**"}, + }, bufWalk(b)) + assert.NoError(t, err) + + trimEqual(t, ` + dir a + dir a/b + dir a/b/bar + file a/b/bar/foo + file a/b/bar/fop + dir a/b/baz + dir bar + file bar/foo + dir baz + dir foo + dir foo/bar + file foo/bar/bee + file foo2 + `, string(b.Bytes())) + + b.Reset() + err = Walk(context.Background(), d, &WalkOpt{ + IncludePatterns: []string{"**/bar"}, + }, bufWalk(b)) + assert.NoError(t, err) + + trimEqual(t, ` + dir a + dir a/b + dir a/b/bar + file a/b/bar/foo + file a/b/bar/fop + dir bar + file bar/foo + dir foo + dir foo/bar + file foo/bar/bee + `, string(b.Bytes())) + + b.Reset() + err = Walk(context.Background(), d, &WalkOpt{ + IncludePatterns: []string{"**/bar/foo"}, + }, bufWalk(b)) + assert.NoError(t, err) + + trimEqual(t, ` + dir a + dir a/b + dir a/b/bar + file a/b/bar/foo + dir bar + file bar/foo + `, string(b.Bytes())) + + b.Reset() + err = Walk(context.Background(), d, &WalkOpt{ + IncludePatterns: []string{"**/b*"}, + }, bufWalk(b)) + assert.NoError(t, err) + + trimEqual(t, ` + dir a + dir a/b + dir a/b/bar + file a/b/bar/foo + file a/b/bar/fop + dir a/b/baz + dir bar + file bar/foo + dir baz + dir foo + dir foo/bar + file foo/bar/bee + `, string(b.Bytes())) + + b.Reset() + err = Walk(context.Background(), d, &WalkOpt{ + IncludePatterns: []string{"**/bar/f*"}, + }, bufWalk(b)) + assert.NoError(t, err) + + trimEqual(t, ` + dir a + dir a/b + dir a/b/bar + file a/b/bar/foo + file a/b/bar/fop + dir bar + file bar/foo + `, string(b.Bytes())) + + b.Reset() + err = Walk(context.Background(), d, &WalkOpt{ + IncludePatterns: []string{"**/bar/g*"}, + }, bufWalk(b)) + assert.NoError(t, err) + + trimEqual(t, ``, string(b.Bytes())) + + b.Reset() + err = Walk(context.Background(), d, &WalkOpt{ + IncludePatterns: []string{"**/f*"}, + }, bufWalk(b)) + assert.NoError(t, err) + + trimEqual(t, ` + dir a + dir a/b + dir a/b/bar + file a/b/bar/foo + file a/b/bar/fop + dir bar + file bar/foo + dir foo + dir foo/bar + file foo/bar/bee + file foo2 + `, string(b.Bytes())) + + b.Reset() + err = Walk(context.Background(), d, &WalkOpt{ + IncludePatterns: []string{"**/b*/f*"}, + }, bufWalk(b)) + assert.NoError(t, err) + + trimEqual(t, ` + dir a + dir a/b + dir a/b/bar + file a/b/bar/foo + file a/b/bar/fop + dir bar + file bar/foo + `, string(b.Bytes())) + + b.Reset() + err = Walk(context.Background(), d, &WalkOpt{ + IncludePatterns: []string{"**/b*/foo"}, + }, bufWalk(b)) + assert.NoError(t, err) + + trimEqual(t, ` + dir a + dir a/b + dir a/b/bar + file a/b/bar/foo + dir bar + file bar/foo + `, string(b.Bytes())) + + b.Reset() + err = Walk(context.Background(), d, &WalkOpt{ + IncludePatterns: []string{"**/foo/**"}, + }, bufWalk(b)) + assert.NoError(t, err) + + trimEqual(t, ` + dir foo + dir foo/bar + file foo/bar/bee + `, string(b.Bytes())) + + b.Reset() + err = Walk(context.Background(), d, &WalkOpt{ + IncludePatterns: []string{"**/baz"}, + }, bufWalk(b)) + assert.NoError(t, err) + + trimEqual(t, ` + dir a + dir a/b + dir a/b/baz + dir baz + `, string(b.Bytes())) +} + +func trimEqual(t assert.TestingT, expected, actual string, msgAndArgs ...interface{}) bool { + lines := []string{} + for _, line := range strings.Split(expected, "\n") { + line = strings.TrimSpace(line) + if line != "" { + lines = append(lines, line) + } + } + lines = append(lines, "") // we expect a trailing newline + expected = strings.Join(lines, "\n") + + return assert.Equal(t, expected, actual, msgAndArgs) +} + +// mkBenchTree will create directories named a-z recursively +// up to 3 layers deep. If maxDepth is > 3 we will shorten +// the last letter to prevent the generated inodes going over +// 25k. The final directory in the tree will contain only files. +// Additionally there is a single file named `target` +// in each leaf directory. +func mkBenchTree(dir string, maxDepth, depth int) error { + end := 'z' + switch maxDepth { + case 1, 2, 3: + end = 'z' // max 19682 inodes + case 4: + end = 'k' // max 19030 inodes + case 5: + end = 'e' // max 12438 inodes + case 6: + end = 'd' // max 8188 inodes + case 7, 8: + end = 'c' // max 16398 inodes + case 9, 10, 11, 12: + end = 'b' // max 16378 inodes + default: + panic("depth cannot be > 12, would create too many files") + } + + if depth == maxDepth { + fd, err := os.Create(filepath.Join(dir, "target")) + if err != nil { + return err + } + fd.Close() + } + for r := 'a'; r <= end; r++ { + p := filepath.Join(dir, string(r)) + if depth == maxDepth { + fd, err := os.Create(p) + if err != nil { + return err + } + fd.Close() + } else { + err := os.Mkdir(p, 0755) + if err != nil { + return err + } + err = mkBenchTree(p, maxDepth, depth+1) + if err != nil { + return err + } + } + } + return nil +}