Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
satellite/metainfo/piecedeletion: configurable threshold
The threshold of piece deletions from the nodes during CommitObject
when overriding an existing object seemed to cause a race condition in
tests.

This change makes the threshold configurable so we can set it to maximum
so CommitObject waits until all pieces are removed from the nodes in the
test.

Change-Id: Idf6b52e71d0082a1cd87ad99a2edded6892d02a8
  • Loading branch information
Erikvv committed Oct 20, 2022
1 parent 56128b9 commit 2ed18a0
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 15 deletions.
2 changes: 0 additions & 2 deletions satellite/metainfo/endpoint.go
Expand Up @@ -36,8 +36,6 @@ import (

const (
satIDExpiration = 48 * time.Hour

deleteObjectPiecesSuccessThreshold = 0.75
)

var (
Expand Down
2 changes: 1 addition & 1 deletion satellite/metainfo/endpoint_object.go
Expand Up @@ -1486,7 +1486,7 @@ func (endpoint *Endpoint) deleteSegmentPieces(ctx context.Context, segments []me

// Only return an error if we failed to delete the objects. If we failed
// to delete pieces, let garbage collector take care of it.
err = endpoint.deletePieces.Delete(ctx, requests, deleteObjectPiecesSuccessThreshold)
err = endpoint.deletePieces.Delete(ctx, requests)
if err != nil {
endpoint.log.Error("failed to delete pieces", zap.Error(err))
}
Expand Down
1 change: 1 addition & 0 deletions satellite/metainfo/endpoint_object_test.go
Expand Up @@ -1953,6 +1953,7 @@ func TestEndpoint_Object_MultipleVersions(t *testing.T) {
Reconfigure: testplanet.Reconfigure{
Satellite: func(log *zap.Logger, index int, config *satellite.Config) {
config.Metainfo.MultipleVersions = true
config.Metainfo.PieceDeletion.DeleteSuccessThreshold = 1

testplanet.ReconfigureRS(2, 3, 4, 4)(log, index, config)
},
Expand Down
18 changes: 13 additions & 5 deletions satellite/metainfo/piecedeletion/service.go
Expand Up @@ -25,9 +25,10 @@ type Config struct {
MaxPiecesPerBatch int `help:"maximum number of pieces per batch" default:"5000" testDefault:"4000"`
MaxPiecesPerRequest int `help:"maximum number pieces per single request" default:"1000" testDefault:"2000"`

DialTimeout time.Duration `help:"timeout for dialing nodes (0 means satellite default)" default:"3s" testDefault:"2s"`
FailThreshold time.Duration `help:"threshold for retrying a failed node" releaseDefault:"10m" devDefault:"2s"`
RequestTimeout time.Duration `help:"timeout for a single delete request" releaseDefault:"15s" devDefault:"2s"`
DialTimeout time.Duration `help:"timeout for dialing nodes (0 means satellite default)" default:"3s" testDefault:"2s"`
FailThreshold time.Duration `help:"threshold for retrying a failed node" releaseDefault:"10m" devDefault:"2s"`
RequestTimeout time.Duration `help:"timeout for a single delete request" releaseDefault:"15s" devDefault:"2s"`
DeleteSuccessThreshold float64 `help:"Which fraction of nodes should be contacted successfully until the delete of a batch of pieces is considered completed" default:".75"`
}

const (
Expand Down Expand Up @@ -147,8 +148,9 @@ func (service *Service) Close() error {
return nil
}

// Delete deletes the pieces specified in the requests waiting until success threshold is reached.
func (service *Service) Delete(ctx context.Context, requests []Request, successThreshold float64) (err error) {
// DeleteWithCustomThreshold deletes the pieces specified in the requests,
// returning when they have been deleted from the specified fraction of storage nodes.
func (service *Service) DeleteWithCustomThreshold(ctx context.Context, requests []Request, successThreshold float64) (err error) {
defer mon.Task()(&ctx, len(requests), requestsPieceCount(requests), successThreshold)(&err)

if len(requests) == 0 {
Expand Down Expand Up @@ -226,6 +228,12 @@ func (service *Service) Delete(ctx context.Context, requests []Request, successT
return nil
}

// Delete deletes the pieces specified in the requests,
// returning when they have been deleted from the default fraction of storage nodes.
func (service *Service) Delete(ctx context.Context, requests []Request) (err error) {
return service.DeleteWithCustomThreshold(ctx, requests, service.config.DeleteSuccessThreshold)
}

// Request defines a deletion requests for a node.
type Request struct {
Node storj.NodeURL
Expand Down
14 changes: 7 additions & 7 deletions satellite/metainfo/piecedeletion/service_test.go
Expand Up @@ -126,7 +126,7 @@ func TestService_DeletePieces_AllNodesUp(t *testing.T) {
}

// ensure that no requests return an error
err := satelliteSys.API.Metainfo.PieceDeletion.Delete(ctx, nil, percentExp)
err := satelliteSys.API.Metainfo.PieceDeletion.DeleteWithCustomThreshold(ctx, nil, percentExp)
require.NoError(t, err)

var (
Expand All @@ -152,7 +152,7 @@ func TestService_DeletePieces_AllNodesUp(t *testing.T) {
requests = append(requests, nodePieces)
}

err = satelliteSys.API.Metainfo.PieceDeletion.Delete(ctx, requests, percentExp)
err = satelliteSys.API.Metainfo.PieceDeletion.DeleteWithCustomThreshold(ctx, requests, percentExp)
require.NoError(t, err)

planet.WaitForStorageNodeDeleters(ctx)
Expand Down Expand Up @@ -217,7 +217,7 @@ func TestService_DeletePieces_SomeNodesDown(t *testing.T) {
}
}

err := satelliteSys.API.Metainfo.PieceDeletion.Delete(ctx, requests, 0.9999)
err := satelliteSys.API.Metainfo.PieceDeletion.DeleteWithCustomThreshold(ctx, requests, 0.9999)
require.NoError(t, err)

planet.WaitForStorageNodeDeleters(ctx)
Expand Down Expand Up @@ -280,7 +280,7 @@ func TestService_DeletePieces_AllNodesDown(t *testing.T) {
require.NoError(t, planet.StopPeer(sn))
}

err := satelliteSys.API.Metainfo.PieceDeletion.Delete(ctx, requests, 0.9999)
err := satelliteSys.API.Metainfo.PieceDeletion.DeleteWithCustomThreshold(ctx, requests, 0.9999)
require.NoError(t, err)

planet.WaitForStorageNodeDeleters(ctx)
Expand Down Expand Up @@ -318,7 +318,7 @@ func TestService_DeletePieces_DisproportionateNumberOfRequestsAndNodes(t *testin
}
}

err := satelliteSys.API.Metainfo.PieceDeletion.Delete(ctx, requests, percentExp)
err := satelliteSys.API.Metainfo.PieceDeletion.DeleteWithCustomThreshold(ctx, requests, percentExp)
require.NoError(t, err)
})
}
Expand All @@ -333,7 +333,7 @@ func TestService_DeletePieces_Invalid(t *testing.T) {
{Pieces: make([]storj.PieceID, 1)},
{Pieces: make([]storj.PieceID, 1)},
}
err := service.Delete(ctx, nodesPieces, 1)
err := service.DeleteWithCustomThreshold(ctx, nodesPieces, 1)
require.Error(t, err)
assert.Contains(t, err.Error(), "request #0 is invalid")
})
Expand Down Expand Up @@ -392,7 +392,7 @@ func TestService_DeletePieces_Timeout(t *testing.T) {
storageNodeDB.SetLatency(delay)
}

err := satelliteSys.API.Metainfo.PieceDeletion.Delete(ctx, requests, 0.75)
err := satelliteSys.API.Metainfo.PieceDeletion.DeleteWithCustomThreshold(ctx, requests, 0.75)
require.NoError(t, err)
// A timeout error won't be propagated up to the service level
// but we'll know that the deletes didn't happen based on usedSpace
Expand Down
3 changes: 3 additions & 0 deletions scripts/testdata/satellite-config.yaml.lock
Expand Up @@ -553,6 +553,9 @@ identity.key-path: /root/.local/share/storj/identity/satellite/identity.key
# toggle flag if overlay is enabled
# metainfo.overlay: true

# Which fraction of nodes should be contacted successfully until the delete of a batch of pieces is considered completed
# metainfo.piece-deletion.delete-success-threshold: 0.75

# timeout for dialing nodes (0 means satellite default)
# metainfo.piece-deletion.dial-timeout: 3s

Expand Down

0 comments on commit 2ed18a0

Please sign in to comment.