Skip to content

Commit

Permalink
satellite/metabase: improve listing queries
Browse files Browse the repository at this point in the history
This fixes some performance issues with listing objects with many
versions.

This also ends up fixing multiple corner cases with regards to how
prefixes work and also listing only latest objects.

To switch back to the old implementation, there is config:

  # switch to iterator based implementation.
  metainfo.use-list-objects-iterator: false

Updates #6734

Change-Id: Ie74e3fa862010ef2286a2474c00145f250dbfc64
  • Loading branch information
egonelbre authored and Storj Robot committed Apr 1, 2024
1 parent 8a7b305 commit 8f2a8d0
Show file tree
Hide file tree
Showing 11 changed files with 1,686 additions and 341 deletions.
477 changes: 477 additions & 0 deletions cmd/tools/metabase-listing-performance/main.go

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions satellite/metabase/db.go
Expand Up @@ -37,6 +37,7 @@ type Config struct {
// TODO remove this flag when server-side copy implementation will be finished
ServerSideCopy bool
ServerSideCopyDisabled bool
UseListObjectsIterator bool

TestingUniqueUnversioned bool
}
Expand Down
72 changes: 72 additions & 0 deletions satellite/metabase/list.go
Expand Up @@ -5,8 +5,11 @@ package metabase

import (
"context"
"strings"
"time"

"github.com/zeebo/errs"

"storj.io/common/storj"
"storj.io/common/uuid"
)
Expand Down Expand Up @@ -130,3 +133,72 @@ func (opts *IterateObjectsWithStatus) Verify() error {
}
return nil
}

// ListObjectsWithIterator lists objects.
func (db *DB) ListObjectsWithIterator(ctx context.Context, opts ListObjects) (result ListObjectsResult, err error) {
defer mon.Task()(&ctx)(&err)

if err := opts.Verify(); err != nil {
return ListObjectsResult{}, err
}
if opts.Pending || opts.AllVersions {
return ListObjectsResult{}, errs.New("not implemented")
}

ListLimit.Ensure(&opts.Limit)

err = db.IterateObjectsAllVersionsWithStatus(ctx,
IterateObjectsWithStatus{
ProjectID: opts.ProjectID,
BucketName: opts.BucketName,
Prefix: opts.Prefix,
Cursor: IterateCursor{
Key: opts.Cursor.Key,
Version: MaxVersion,
},
Recursive: opts.Recursive,
// TODO we may need to increase batch size to optimize number
// of DB calls for objects with multiple versions
BatchSize: opts.Limit + 1,
Pending: false,
IncludeCustomMetadata: opts.IncludeCustomMetadata,
IncludeSystemMetadata: opts.IncludeSystemMetadata,
}, func(ctx context.Context, it ObjectsIterator) error {
var previousLatestSet bool
var entry, previousLatest ObjectEntry
prefix := opts.Prefix
if prefix != "" && !strings.HasSuffix(string(prefix), "/") {
prefix += "/"
}

for len(result.Objects) < opts.Limit && it.Next(ctx, &entry) {
objectKey := prefix + entry.ObjectKey
if opts.Cursor.Key == objectKey && opts.Cursor.Version >= entry.Version {
previousLatestSet = true
previousLatest = entry
continue
}

if entry.Status.IsDeleteMarker() && (!previousLatestSet || prefix+previousLatest.ObjectKey != objectKey) {
previousLatestSet = true
previousLatest = entry
continue
}

if !previousLatestSet || prefix+previousLatest.ObjectKey != objectKey {
previousLatestSet = true
previousLatest = entry

result.Objects = append(result.Objects, entry)
}
}

result.More = it.Next(ctx, &entry)
return nil
},
)
if err != nil {
return ListObjectsResult{}, err
}
return result, nil
}

0 comments on commit 8f2a8d0

Please sign in to comment.