Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: bring back delayed leaf detection in listing #10346

Merged
merged 1 commit into from
Aug 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/api-response.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ import (
const (
// RFC3339 a subset of the ISO8601 timestamp format. e.g 2014-04-29T18:30:38Z
iso8601TimeFormat = "2006-01-02T15:04:05.000Z" // Reply date format with nanosecond precision.
maxObjectList = 10000 // Limit number of objects in a listObjectsResponse/listObjectsVersionsResponse.
maxObjectList = 1000 // Limit number of objects in a listObjectsResponse/listObjectsVersionsResponse.
maxDeleteList = 10000 // Limit number of objects deleted in a delete call.
maxUploadsList = 10000 // Limit number of uploads in a listUploadsResponse.
maxPartsList = 10000 // Limit number of parts in a listPartsResponse.
Expand Down
24 changes: 20 additions & 4 deletions cmd/erasure-sets.go
Original file line number Diff line number Diff line change
Expand Up @@ -838,9 +838,17 @@ func (f *FileInfoCh) Push(fi FileInfo) {
// if the caller wishes to list N entries to call lexicallySortedEntry
// N times until this boolean is 'false'.
func lexicallySortedEntry(entryChs []FileInfoCh, entries []FileInfo, entriesValid []bool) (FileInfo, int, bool) {
for i := range entryChs {
entries[i], entriesValid[i] = entryChs[i].Pop()
var wg sync.WaitGroup
for j := range entryChs {
j := j
wg.Add(1)
// Pop() entries in parallel for large drive setups.
go func() {
defer wg.Done()
entries[j], entriesValid[j] = entryChs[j].Pop()
}()
}
wg.Wait()

var isTruncated = false
for _, valid := range entriesValid {
Expand Down Expand Up @@ -902,9 +910,17 @@ func lexicallySortedEntry(entryChs []FileInfoCh, entries []FileInfo, entriesVali
// if the caller wishes to list N entries to call lexicallySortedEntry
// N times until this boolean is 'false'.
func lexicallySortedEntryVersions(entryChs []FileInfoVersionsCh, entries []FileInfoVersions, entriesValid []bool) (FileInfoVersions, int, bool) {
for i := range entryChs {
entries[i], entriesValid[i] = entryChs[i].Pop()
var wg sync.WaitGroup
for j := range entryChs {
j := j
wg.Add(1)
// Pop() entries in parallel for large drive setups.
go func() {
defer wg.Done()
entries[j], entriesValid[j] = entryChs[j].Pop()
}()
}
wg.Wait()

var isTruncated = false
for _, valid := range entriesValid {
Expand Down
22 changes: 15 additions & 7 deletions cmd/fs-v1.go
Original file line number Diff line number Diff line change
Expand Up @@ -1334,23 +1334,31 @@ func (fs *FSObjects) DeleteObject(ctx context.Context, bucket, object string, op
return ObjectInfo{Bucket: bucket, Name: object}, nil
}

func (fs *FSObjects) isLeafDir(bucket string, leafPath string) bool {
return fs.isObjectDir(bucket, leafPath)
}

func (fs *FSObjects) isLeaf(bucket string, leafPath string) bool {
return !strings.HasSuffix(leafPath, slashSeparator)
}

// Returns function "listDir" of the type listDirFunc.
// isLeaf - is used by listDir function to check if an entry
// is a leaf or non-leaf entry.
func (fs *FSObjects) listDirFactory() ListDirFunc {
// listDir - lists all the entries at a given prefix and given entry in the prefix.
listDir := func(bucket, prefixDir, prefixEntry string) (emptyDir bool, entries []string) {
listDir := func(bucket, prefixDir, prefixEntry string) (emptyDir bool, entries []string, delayIsLeaf bool) {
var err error
entries, err = readDir(pathJoin(fs.fsPath, bucket, prefixDir))
if err != nil && err != errFileNotFound {
logger.LogIf(GlobalContext, err)
return false, nil
return false, nil, false
}
if len(entries) == 0 {
return true, nil
return true, nil, false
}
sort.Strings(entries)
return false, filterMatchingPrefix(entries, prefixEntry)
entries, delayIsLeaf = filterListEntries(bucket, prefixDir, entries, prefixEntry, fs.isLeaf)
return false, entries, delayIsLeaf
}

// Return list factory instance.
Expand Down Expand Up @@ -1453,7 +1461,7 @@ func (fs *FSObjects) ListObjects(ctx context.Context, bucket, prefix, marker, de
}()

return listObjects(ctx, fs, bucket, prefix, marker, delimiter, maxKeys, fs.listPool,
fs.listDirFactory(), fs.getObjectInfoNoFSLock, fs.getObjectInfoNoFSLock)
fs.listDirFactory(), fs.isLeaf, fs.isLeafDir, fs.getObjectInfoNoFSLock, fs.getObjectInfoNoFSLock)
}

// GetObjectTags - get object tags from an existing object
Expand Down Expand Up @@ -1550,7 +1558,7 @@ func (fs *FSObjects) HealBucket(ctx context.Context, bucket string, dryRun, remo
// error walker returns error. Optionally if context.Done() is received
// then Walk() stops the walker.
func (fs *FSObjects) Walk(ctx context.Context, bucket, prefix string, results chan<- ObjectInfo, opts ObjectOptions) error {
return fsWalk(ctx, fs, bucket, prefix, fs.listDirFactory(), results, fs.getObjectInfoNoFSLock, fs.getObjectInfoNoFSLock)
return fsWalk(ctx, fs, bucket, prefix, fs.listDirFactory(), fs.isLeaf, fs.isLeafDir, results, fs.getObjectInfoNoFSLock, fs.getObjectInfoNoFSLock)
}

// HealObjects - no-op for fs. Valid only for Erasure.
Expand Down
4 changes: 2 additions & 2 deletions cmd/gateway-common.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ var (
// ListObjects function alias.
ListObjects = listObjects

// FilterMatchingPrefix function alias.
FilterMatchingPrefix = filterMatchingPrefix
// FilterListEntries function alias.
FilterListEntries = filterListEntries

// IsStringEqual is string equal.
IsStringEqual = isStringEqual
Expand Down
17 changes: 13 additions & 4 deletions cmd/gateway/hdfs/gateway-hdfs.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,9 +348,17 @@ func (n *hdfsObjects) ListBuckets(ctx context.Context) (buckets []minio.BucketIn
return buckets, nil
}

func (n *hdfsObjects) isLeafDir(bucket, leafPath string) bool {
return n.isObjectDir(context.Background(), bucket, leafPath)
}

func (n *hdfsObjects) isLeaf(bucket, leafPath string) bool {
return !strings.HasSuffix(leafPath, hdfsSeparator)
}

func (n *hdfsObjects) listDirFactory() minio.ListDirFunc {
// listDir - lists all the entries at a given prefix and given entry in the prefix.
listDir := func(bucket, prefixDir, prefixEntry string) (emptyDir bool, entries []string) {
listDir := func(bucket, prefixDir, prefixEntry string) (emptyDir bool, entries []string, delayIsLeaf bool) {
f, err := n.clnt.Open(n.hdfsPathJoin(bucket, prefixDir))
if err != nil {
if os.IsNotExist(err) {
Expand All @@ -366,7 +374,7 @@ func (n *hdfsObjects) listDirFactory() minio.ListDirFunc {
return
}
if len(fis) == 0 {
return true, nil
return true, nil, false
}
for _, fi := range fis {
if fi.IsDir() {
Expand All @@ -375,7 +383,8 @@ func (n *hdfsObjects) listDirFactory() minio.ListDirFunc {
entries = append(entries, fi.Name())
}
}
return false, minio.FilterMatchingPrefix(entries, prefixEntry)
entries, delayIsLeaf = minio.FilterListEntries(bucket, prefixDir, entries, prefixEntry, n.isLeaf)
return false, entries, delayIsLeaf
}

// Return list factory instance.
Expand Down Expand Up @@ -426,7 +435,7 @@ func (n *hdfsObjects) ListObjects(ctx context.Context, bucket, prefix, marker, d
return objectInfo, nil
}

return minio.ListObjects(ctx, n, bucket, prefix, marker, delimiter, maxKeys, n.listPool, n.listDirFactory(), getObjectInfo, getObjectInfo)
return minio.ListObjects(ctx, n, bucket, prefix, marker, delimiter, maxKeys, n.listPool, n.listDirFactory(), n.isLeaf, n.isLeafDir, getObjectInfo, getObjectInfo)
}

// Lists a path's direct, first-level entries and populates them in the `fileInfos` cache which maps
Expand Down
14 changes: 7 additions & 7 deletions cmd/object-api-common.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,11 @@ func cleanupDir(ctx context.Context, storage StorageAPI, volume, dirPath string)
return err
}

func listObjectsNonSlash(ctx context.Context, bucket, prefix, marker, delimiter string, maxKeys int, tpool *TreeWalkPool, listDir ListDirFunc, getObjInfo func(context.Context, string, string) (ObjectInfo, error), getObjectInfoDirs ...func(context.Context, string, string) (ObjectInfo, error)) (loi ListObjectsInfo, err error) {
func listObjectsNonSlash(ctx context.Context, bucket, prefix, marker, delimiter string, maxKeys int, tpool *TreeWalkPool, listDir ListDirFunc, isLeaf IsLeafFunc, isLeafDir IsLeafDirFunc, getObjInfo func(context.Context, string, string) (ObjectInfo, error), getObjectInfoDirs ...func(context.Context, string, string) (ObjectInfo, error)) (loi ListObjectsInfo, err error) {
endWalkCh := make(chan struct{})
defer close(endWalkCh)
recursive := true
walkResultCh := startTreeWalk(ctx, bucket, prefix, "", recursive, listDir, endWalkCh)
walkResultCh := startTreeWalk(ctx, bucket, prefix, "", recursive, listDir, isLeaf, isLeafDir, endWalkCh)

var objInfos []ObjectInfo
var eof bool
Expand Down Expand Up @@ -227,14 +227,14 @@ func listObjectsNonSlash(ctx context.Context, bucket, prefix, marker, delimiter
// to allocate a receive channel for ObjectInfo, upon any unhandled
// error walker returns error. Optionally if context.Done() is received
// then Walk() stops the walker.
func fsWalk(ctx context.Context, obj ObjectLayer, bucket, prefix string, listDir ListDirFunc, results chan<- ObjectInfo, getObjInfo func(context.Context, string, string) (ObjectInfo, error), getObjectInfoDirs ...func(context.Context, string, string) (ObjectInfo, error)) error {
func fsWalk(ctx context.Context, obj ObjectLayer, bucket, prefix string, listDir ListDirFunc, isLeaf IsLeafFunc, isLeafDir IsLeafDirFunc, results chan<- ObjectInfo, getObjInfo func(context.Context, string, string) (ObjectInfo, error), getObjectInfoDirs ...func(context.Context, string, string) (ObjectInfo, error)) error {
if err := checkListObjsArgs(ctx, bucket, prefix, "", obj); err != nil {
// Upon error close the channel.
close(results)
return err
}

walkResultCh := startTreeWalk(ctx, bucket, prefix, "", true, listDir, ctx.Done())
walkResultCh := startTreeWalk(ctx, bucket, prefix, "", true, listDir, isLeaf, isLeafDir, ctx.Done())

go func() {
defer close(results)
Expand Down Expand Up @@ -277,9 +277,9 @@ func fsWalk(ctx context.Context, obj ObjectLayer, bucket, prefix string, listDir
return nil
}

func listObjects(ctx context.Context, obj ObjectLayer, bucket, prefix, marker, delimiter string, maxKeys int, tpool *TreeWalkPool, listDir ListDirFunc, getObjInfo func(context.Context, string, string) (ObjectInfo, error), getObjectInfoDirs ...func(context.Context, string, string) (ObjectInfo, error)) (loi ListObjectsInfo, err error) {
func listObjects(ctx context.Context, obj ObjectLayer, bucket, prefix, marker, delimiter string, maxKeys int, tpool *TreeWalkPool, listDir ListDirFunc, isLeaf IsLeafFunc, isLeafDir IsLeafDirFunc, getObjInfo func(context.Context, string, string) (ObjectInfo, error), getObjectInfoDirs ...func(context.Context, string, string) (ObjectInfo, error)) (loi ListObjectsInfo, err error) {
if delimiter != SlashSeparator && delimiter != "" {
return listObjectsNonSlash(ctx, bucket, prefix, marker, delimiter, maxKeys, tpool, listDir, getObjInfo, getObjectInfoDirs...)
return listObjectsNonSlash(ctx, bucket, prefix, marker, delimiter, maxKeys, tpool, listDir, isLeaf, isLeafDir, getObjInfo, getObjectInfoDirs...)
}

if err := checkListObjsArgs(ctx, bucket, prefix, marker, obj); err != nil {
Expand Down Expand Up @@ -322,7 +322,7 @@ func listObjects(ctx context.Context, obj ObjectLayer, bucket, prefix, marker, d
walkResultCh, endWalkCh := tpool.Release(listParams{bucket, recursive, marker, prefix})
if walkResultCh == nil {
endWalkCh = make(chan struct{})
walkResultCh = startTreeWalk(ctx, bucket, prefix, marker, recursive, listDir, endWalkCh)
walkResultCh = startTreeWalk(ctx, bucket, prefix, marker, recursive, listDir, isLeaf, isLeafDir, endWalkCh)
}

var objInfos []ObjectInfo
Expand Down
4 changes: 2 additions & 2 deletions cmd/os-readdir_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ import (
// refer https://github.com/golang/go/issues/24015
const blockSize = 8 << 10 // 8192

// By default atleast 1000 entries in single getdents call
// By default atleast 20 entries in single getdents call
harshavardhana marked this conversation as resolved.
Show resolved Hide resolved
var direntPool = sync.Pool{
New: func() interface{} {
buf := make([]byte, blockSize*1000)
buf := make([]byte, blockSize*20)
return &buf
},
}
Expand Down
111 changes: 100 additions & 11 deletions cmd/tree-walk.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,84 @@ func filterMatchingPrefix(entries []string, prefixEntry string) []string {
}
end--
}
sort.Strings(entries[start:end])
return entries[start:end]
}

// xl.ListDir returns entries with trailing "/" for directories. At the object layer
// we need to remove this trailing "/" for objects and retain "/" for prefixes before
// sorting because the trailing "/" can affect the sorting results for certain cases.
// Ex. lets say entries = ["a-b/", "a/"] and both are objects.
// sorting with out trailing "/" = ["a", "a-b"]
// sorting with trailing "/" = ["a-b/", "a/"]
// Hence if entries[] does not have a case like the above example then isLeaf() check
// can be delayed till the entry is pushed into the TreeWalkResult channel.
// delayIsLeafCheck() returns true if isLeaf can be delayed or false if
// isLeaf should be done in listDir()
func delayIsLeafCheck(entries []string) bool {
harshavardhana marked this conversation as resolved.
Show resolved Hide resolved
for i, entry := range entries {
if i == len(entries)-1 {
break
}
// If any byte in the "entry" string is less than '/' then the
// next "entry" should not contain '/' at the same same byte position.
for j := 0; j < len(entry); j++ {
if entry[j] < '/' {
if len(entries[i+1]) > j {
if entries[i+1][j] == '/' {
return false
}
}
}
}
}
return true
}

// ListDirFunc - "listDir" function of type listDirFunc returned by listDirFactory() - explained below.
type ListDirFunc func(bucket, prefixDir, prefixEntry string) (emptyDir bool, entries []string)
type ListDirFunc func(bucket, prefixDir, prefixEntry string) (emptyDir bool, entries []string, delayIsLeaf bool)

// IsLeafFunc - A function isLeaf of type isLeafFunc is used to detect if an
// entry is a leaf entry. There are 2 scenarios where isLeaf should behave
// differently depending on the backend:
// 1. FS backend object listing - isLeaf is true if the entry
// has no trailing "/"
// 2. Erasure backend object listing - isLeaf is true if the entry
// is a directory and contains xl.meta
type IsLeafFunc func(string, string) bool

// IsLeafDirFunc - A function isLeafDir of type isLeafDirFunc is used to detect
// if an entry is empty directory.
type IsLeafDirFunc func(string, string) bool

func filterListEntries(bucket, prefixDir string, entries []string, prefixEntry string, isLeaf IsLeafFunc) ([]string, bool) {
// Listing needs to be sorted.
sort.Strings(entries)

// Filter entries that have the prefix prefixEntry.
entries = filterMatchingPrefix(entries, prefixEntry)

// Can isLeaf() check be delayed till when it has to be sent down the
// TreeWalkResult channel?
delayIsLeaf := delayIsLeafCheck(entries)
if delayIsLeaf {
return entries, true
}

// isLeaf() check has to happen here so that trailing "/" for objects can be removed.
for i, entry := range entries {
if isLeaf(bucket, pathJoin(prefixDir, entry)) {
entries[i] = strings.TrimSuffix(entry, slashSeparator)
}
}

// Sort again after removing trailing "/" for objects as the previous sort
// does not hold good anymore.
sort.Strings(entries)
return entries, false
}

// treeWalk walks directory tree recursively pushing TreeWalkResult into the channel as and when it encounters files.
func doTreeWalk(ctx context.Context, bucket, prefixDir, entryPrefixMatch, marker string, recursive bool, listDir ListDirFunc, resultCh chan TreeWalkResult, endWalkCh <-chan struct{}, isEnd bool) (emptyDir bool, treeErr error) {
func doTreeWalk(ctx context.Context, bucket, prefixDir, entryPrefixMatch, marker string, recursive bool, listDir ListDirFunc, isLeaf IsLeafFunc, isLeafDir IsLeafDirFunc, resultCh chan TreeWalkResult, endWalkCh <-chan struct{}, isEnd bool) (emptyDir bool, treeErr error) {
// Example:
// if prefixDir="one/two/three/" and marker="four/five.txt" treeWalk is recursively
// called with prefixDir="one/two/three/four/" and marker="five.txt"
Expand All @@ -75,7 +144,12 @@ func doTreeWalk(ctx context.Context, bucket, prefixDir, entryPrefixMatch, marker
}
}

emptyDir, entries := listDir(bucket, prefixDir, entryPrefixMatch)
emptyDir, entries, delayIsLeaf := listDir(bucket, prefixDir, entryPrefixMatch)
// When isleaf check is delayed, make sure that it is set correctly here.
if delayIsLeaf && isLeaf == nil || isLeafDir == nil {
return false, errInvalidArgument
}

// For an empty list return right here.
if emptyDir {
return true, nil
Expand All @@ -94,8 +168,23 @@ func doTreeWalk(ctx context.Context, bucket, prefixDir, entryPrefixMatch, marker
}

for i, entry := range entries {
pentry := pathJoin(prefixDir, entry)
isDir := HasSuffix(pentry, SlashSeparator)
var leaf, leafDir bool

// Decision to do isLeaf check was pushed from listDir() to here.
if delayIsLeaf {
leaf = isLeaf(bucket, pathJoin(prefixDir, entry))
if leaf {
entry = strings.TrimSuffix(entry, slashSeparator)
}
} else {
leaf = !strings.HasSuffix(entry, slashSeparator)
}

if strings.HasSuffix(entry, slashSeparator) {
leafDir = isLeafDir(bucket, pathJoin(prefixDir, entry))
}

isDir := !leafDir && !leaf

if i == 0 && markerDir == entry {
if !recursive {
Expand Down Expand Up @@ -123,8 +212,8 @@ func doTreeWalk(ctx context.Context, bucket, prefixDir, entryPrefixMatch, marker
// markIsEnd is passed to this entry's treeWalk() so that treeWalker.end can be marked
// true at the end of the treeWalk stream.
markIsEnd := i == len(entries)-1 && isEnd
emptyDir, err := doTreeWalk(ctx, bucket, pentry, prefixMatch, markerArg, recursive,
listDir, resultCh, endWalkCh, markIsEnd)
emptyDir, err := doTreeWalk(ctx, bucket, pathJoin(prefixDir, entry), prefixMatch, markerArg, recursive,
listDir, isLeaf, isLeafDir, resultCh, endWalkCh, markIsEnd)
if err != nil {
return false, err
}
Expand All @@ -142,7 +231,7 @@ func doTreeWalk(ctx context.Context, bucket, prefixDir, entryPrefixMatch, marker
select {
case <-endWalkCh:
return false, errWalkAbort
case resultCh <- TreeWalkResult{entry: pentry, end: isEOF}:
case resultCh <- TreeWalkResult{entry: pathJoin(prefixDir, entry), end: isEOF}:
}
}

Expand All @@ -151,7 +240,7 @@ func doTreeWalk(ctx context.Context, bucket, prefixDir, entryPrefixMatch, marker
}

// Initiate a new treeWalk in a goroutine.
func startTreeWalk(ctx context.Context, bucket, prefix, marker string, recursive bool, listDir ListDirFunc, endWalkCh <-chan struct{}) chan TreeWalkResult {
func startTreeWalk(ctx context.Context, bucket, prefix, marker string, recursive bool, listDir ListDirFunc, isLeaf IsLeafFunc, isLeafDir IsLeafDirFunc, endWalkCh <-chan struct{}) chan TreeWalkResult {
// Example 1
// If prefix is "one/two/three/" and marker is "one/two/three/four/five.txt"
// treeWalk is called with prefixDir="one/two/three/" and marker="four/five.txt"
Expand All @@ -173,7 +262,7 @@ func startTreeWalk(ctx context.Context, bucket, prefix, marker string, recursive
marker = strings.TrimPrefix(marker, prefixDir)
go func() {
isEnd := true // Indication to start walking the tree with end as true.
doTreeWalk(ctx, bucket, prefixDir, entryPrefixMatch, marker, recursive, listDir, resultCh, endWalkCh, isEnd)
doTreeWalk(ctx, bucket, prefixDir, entryPrefixMatch, marker, recursive, listDir, isLeaf, isLeafDir, resultCh, endWalkCh, isEnd)
close(resultCh)
}()
return resultCh
Expand Down
Loading