Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add normal/deep type of heal scanning #7251

Merged
merged 1 commit into from Mar 14, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions cmd/admin-heal-ops.go
Expand Up @@ -575,7 +575,7 @@ func (h *healSequence) healMinioSysMeta(metaPrefix string) func() error {
if h.isQuitting() {
return errHealStopSignalled
}
res, herr := objectAPI.HealObject(h.ctx, bucket, object, h.settings.DryRun, h.settings.Remove)
res, herr := objectAPI.HealObject(h.ctx, bucket, object, h.settings.DryRun, h.settings.Remove, h.settings.ScanMode)
// Object might have been deleted, by the time heal
// was attempted we ignore this object an move on.
if isErrObjectNotFound(herr) {
Expand Down Expand Up @@ -718,7 +718,7 @@ func (h *healSequence) healObject(bucket, object string) error {
return errServerNotInitialized
}

hri, err := objectAPI.HealObject(h.ctx, bucket, object, h.settings.DryRun, h.settings.Remove)
hri, err := objectAPI.HealObject(h.ctx, bucket, object, h.settings.DryRun, h.settings.Remove, h.settings.ScanMode)
if isErrObjectNotFound(err) {
return nil
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/fs-v1.go
Expand Up @@ -1240,7 +1240,7 @@ func (fs *FSObjects) HealFormat(ctx context.Context, dryRun bool) (madmin.HealRe
}

// HealObject - no-op for fs. Valid only for XL.
func (fs *FSObjects) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool) (
func (fs *FSObjects) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool, scanMode madmin.HealScanMode) (
res madmin.HealResultItem, err error) {
logger.LogIf(ctx, NotImplemented{})
return res, NotImplemented{}
Expand Down
4 changes: 3 additions & 1 deletion cmd/fs-v1_test.go
Expand Up @@ -22,6 +22,8 @@ import (
"os"
"path/filepath"
"testing"

"github.com/minio/minio/pkg/madmin"
)

// Tests for if parent directory is object
Expand Down Expand Up @@ -390,7 +392,7 @@ func TestFSHealObject(t *testing.T) {
defer os.RemoveAll(disk)

obj := initFSObjects(disk, t)
_, err := obj.HealObject(context.Background(), "bucket", "object", false, false)
_, err := obj.HealObject(context.Background(), "bucket", "object", false, false, madmin.HealDeepScan)
if err == nil || !isSameType(err, NotImplemented{}) {
t.Fatalf("Heal Object should return NotImplemented error ")
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/gateway-unsupported.go
Expand Up @@ -102,7 +102,7 @@ func (a GatewayUnsupported) ListBucketsHeal(ctx context.Context) (buckets []Buck
}

// HealObject - Not implemented stub
func (a GatewayUnsupported) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool) (h madmin.HealResultItem, e error) {
func (a GatewayUnsupported) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool, scanMode madmin.HealScanMode) (h madmin.HealResultItem, e error) {
return h, NotImplemented{}
}

Expand Down
2 changes: 1 addition & 1 deletion cmd/object-api-interface.go
Expand Up @@ -88,7 +88,7 @@ type ObjectLayer interface {
ReloadFormat(ctx context.Context, dryRun bool) error
HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error)
HealBucket(ctx context.Context, bucket string, dryRun, remove bool) (madmin.HealResultItem, error)
HealObject(ctx context.Context, bucket, object string, dryRun, remove bool) (madmin.HealResultItem, error)
HealObject(ctx context.Context, bucket, object string, dryRun, remove bool, scanMode madmin.HealScanMode) (madmin.HealResultItem, error)
ListBucketsHeal(ctx context.Context) (buckets []BucketInfo, err error)
HealObjects(ctx context.Context, bucket, prefix string, healObjectFn func(string, string) error) error

Expand Down
4 changes: 2 additions & 2 deletions cmd/xl-sets.go
Expand Up @@ -1296,8 +1296,8 @@ func (s *xlSets) HealBucket(ctx context.Context, bucket string, dryRun, remove b
}

// HealObject - heals inconsistent object on a hashedSet based on object name.
func (s *xlSets) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool) (madmin.HealResultItem, error) {
return s.getHashedSet(object).HealObject(ctx, bucket, object, dryRun, remove)
func (s *xlSets) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool, scanMode madmin.HealScanMode) (madmin.HealResultItem, error) {
return s.getHashedSet(object).HealObject(ctx, bucket, object, dryRun, remove, scanMode)
}

// Lists all buckets which need healing.
Expand Down
52 changes: 32 additions & 20 deletions cmd/xl-v1-healing-common.go
Expand Up @@ -22,6 +22,7 @@ import (
"time"

"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/madmin"
)

// commonTime returns a maximally occurring time from a list of time.
Expand Down Expand Up @@ -158,7 +159,7 @@ func getLatestXLMeta(ctx context.Context, partsMetadata []xlMetaV1, errs []error
// - slice of errors about the state of data files on disk - can have
// a not-found error or a hash-mismatch error.
func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []xlMetaV1, errs []error, bucket,
object string) ([]StorageAPI, []error) {
object string, scanMode madmin.HealScanMode) ([]StorageAPI, []error) {
availableDisks := make([]StorageAPI, len(onlineDisks))
dataErrs := make([]error, len(onlineDisks))

Expand All @@ -168,27 +169,38 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
continue
}

erasureInfo := partsMetadata[i].Erasure
erasure, err := NewErasure(ctx, erasureInfo.DataBlocks, erasureInfo.ParityBlocks, erasureInfo.BlockSize)
if err != nil {
dataErrs[i] = err
continue
}

// disk has a valid xl.json but may not have all the
// parts. This is considered an outdated disk, since
// it needs healing too.
for _, part := range partsMetadata[i].Parts {
checksumInfo := erasureInfo.GetChecksumInfo(part.Name)
tillOffset := erasure.ShardFileTillOffset(0, part.Size, part.Size)
err = bitrotCheckFile(onlineDisk, bucket, pathJoin(object, part.Name), tillOffset, checksumInfo.Algorithm, checksumInfo.Hash, erasure.ShardSize())
switch scanMode {
case madmin.HealDeepScan:
erasureInfo := partsMetadata[i].Erasure
erasure, err := NewErasure(ctx, erasureInfo.DataBlocks, erasureInfo.ParityBlocks, erasureInfo.BlockSize)
if err != nil {
isCorrupt := strings.HasPrefix(err.Error(), "Bitrot verification mismatch - expected ")
if !isCorrupt && err != errFileNotFound && err != errVolumeNotFound {
logger.LogIf(ctx, err)
}
dataErrs[i] = err
break
continue
}

// disk has a valid xl.json but may not have all the
// parts. This is considered an outdated disk, since
// it needs healing too.
for _, part := range partsMetadata[i].Parts {
checksumInfo := erasureInfo.GetChecksumInfo(part.Name)
tillOffset := erasure.ShardFileTillOffset(0, part.Size, part.Size)
err = bitrotCheckFile(onlineDisk, bucket, pathJoin(object, part.Name), tillOffset, checksumInfo.Algorithm, checksumInfo.Hash, erasure.ShardSize())
if err != nil {
isCorrupt := strings.HasPrefix(err.Error(), "Bitrot verification mismatch - expected ")
if !isCorrupt && err != errFileNotFound && err != errVolumeNotFound {
logger.LogIf(ctx, err)
}
dataErrs[i] = err
break
}
}
case madmin.HealNormalScan:
for _, part := range partsMetadata[i].Parts {
_, err := onlineDisk.StatFile(bucket, pathJoin(object, part.Name))
if err != nil {
dataErrs[i] = err
harshavardhana marked this conversation as resolved.
Show resolved Hide resolved
break
}
}
}

Expand Down
8 changes: 5 additions & 3 deletions cmd/xl-v1-healing-common_test.go
Expand Up @@ -23,6 +23,8 @@ import (
"path/filepath"
"testing"
"time"

"github.com/minio/minio/pkg/madmin"
)

// validates functionality provided to find most common
Expand Down Expand Up @@ -239,7 +241,7 @@ func TestListOnlineDisks(t *testing.T) {
i+1, test.expectedTime, modTime)
}

availableDisks, newErrs := disksWithAllParts(context.Background(), onlineDisks, partsMetadata, test.errs, bucket, object)
availableDisks, newErrs := disksWithAllParts(context.Background(), onlineDisks, partsMetadata, test.errs, bucket, object, madmin.HealDeepScan)
test.errs = newErrs

if test._tamperBackend != noTamper {
Expand Down Expand Up @@ -291,7 +293,7 @@ func TestDisksWithAllParts(t *testing.T) {
t.Fatalf("Failed to read xl meta data %v", err)
}

filteredDisks, errs := disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object)
filteredDisks, errs := disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object, madmin.HealDeepScan)

if len(filteredDisks) != len(xlDisks) {
t.Errorf("Unexpected number of disks: %d", len(filteredDisks))
Expand Down Expand Up @@ -328,7 +330,7 @@ func TestDisksWithAllParts(t *testing.T) {
}

errs = make([]error, len(xlDisks))
filteredDisks, errs = disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object)
filteredDisks, errs = disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object, madmin.HealDeepScan)

if len(filteredDisks) != len(xlDisks) {
t.Errorf("Unexpected number of disks: %d", len(filteredDisks))
Expand Down
9 changes: 5 additions & 4 deletions cmd/xl-v1-healing.go
Expand Up @@ -208,7 +208,8 @@ func shouldHealObjectOnDisk(xlErr, dataErr error, meta xlMetaV1, quorumModTime t

// Heals an object by re-writing corrupt/missing erasure blocks.
func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, object string,
quorum int, dryRun bool) (result madmin.HealResultItem, err error) {
quorum int, dryRun bool, scanMode madmin.HealScanMode) (result madmin.HealResultItem, err error) {

partsMetadata, errs := readAllXLMetadata(ctx, storageDisks, bucket, object)

errCount := 0
Expand All @@ -232,7 +233,7 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
latestDisks, modTime := listOnlineDisks(storageDisks, partsMetadata, errs)

// List of disks having all parts as per latest xl.json.
availableDisks, dataErrs := disksWithAllParts(ctx, latestDisks, partsMetadata, errs, bucket, object)
availableDisks, dataErrs := disksWithAllParts(ctx, latestDisks, partsMetadata, errs, bucket, object, scanMode)

// Initialize heal result object
result = madmin.HealResultItem{
Expand Down Expand Up @@ -621,7 +622,7 @@ func (xl xlObjects) isObjectDangling(metaArr []xlMetaV1, errs []error) (validMet
// FIXME: If an object object was deleted and one disk was down,
// and later the disk comes back up again, heal on the object
// should delete it.
func (xl xlObjects) HealObject(ctx context.Context, bucket, object string, dryRun bool, remove bool) (hr madmin.HealResultItem, err error) {
func (xl xlObjects) HealObject(ctx context.Context, bucket, object string, dryRun bool, remove bool, scanMode madmin.HealScanMode) (hr madmin.HealResultItem, err error) {
// Create context that also contains information about the object and bucket.
// The top level handler might not have this information.
reqInfo := logger.GetReqInfo(ctx)
Expand Down Expand Up @@ -670,5 +671,5 @@ func (xl xlObjects) HealObject(ctx context.Context, bucket, object string, dryRu
defer objectLock.RUnlock()

// Heal the object.
return healObject(healCtx, xl.getDisks(), bucket, object, latestXLMeta.Erasure.DataBlocks, dryRun)
return healObject(healCtx, xl.getDisks(), bucket, object, latestXLMeta.Erasure.DataBlocks, dryRun, scanMode)
}
6 changes: 4 additions & 2 deletions cmd/xl-v1-healing_test.go
Expand Up @@ -21,6 +21,8 @@ import (
"context"
"path/filepath"
"testing"

"github.com/minio/minio/pkg/madmin"
)

// Tests undoes and validates if the undoing completes successfully.
Expand Down Expand Up @@ -114,7 +116,7 @@ func TestHealObjectXL(t *testing.T) {
t.Fatalf("Failed to delete a file - %v", err)
}

_, err = obj.HealObject(context.Background(), bucket, object, false, false)
_, err = obj.HealObject(context.Background(), bucket, object, false, false, madmin.HealNormalScan)
if err != nil {
t.Fatalf("Failed to heal object - %v", err)
}
Expand All @@ -130,7 +132,7 @@ func TestHealObjectXL(t *testing.T) {
}

// Try healing now, expect to receive errDiskNotFound.
_, err = obj.HealObject(context.Background(), bucket, object, false, false)
_, err = obj.HealObject(context.Background(), bucket, object, false, false, madmin.HealDeepScan)
// since majority of xl.jsons are not available, object quorum can't be read properly and error will be errXLReadQuorum
if _, ok := err.(InsufficientReadQuorum); !ok {
t.Errorf("Expected %v but received %v", InsufficientReadQuorum{}, err)
Expand Down
5 changes: 3 additions & 2 deletions cmd/xl-v1-object_test.go
Expand Up @@ -28,6 +28,7 @@ import (
"time"

humanize "github.com/dustin/go-humanize"
"github.com/minio/minio/pkg/madmin"
)

func TestRepeatPutObjectPart(t *testing.T) {
Expand Down Expand Up @@ -308,7 +309,7 @@ func TestHealing(t *testing.T) {
t.Fatal(err)
}

_, err = xl.HealObject(context.Background(), bucket, object, false, false)
_, err = xl.HealObject(context.Background(), bucket, object, false, false, madmin.HealNormalScan)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -337,7 +338,7 @@ func TestHealing(t *testing.T) {
t.Fatal(err)
}

_, err = xl.HealObject(context.Background(), bucket, object, false, false)
_, err = xl.HealObject(context.Background(), bucket, object, false, false, madmin.HealDeepScan)
if err != nil {
t.Fatal(err)
}
Expand Down
17 changes: 14 additions & 3 deletions pkg/madmin/heal-commands.go
Expand Up @@ -26,11 +26,22 @@ import (
"time"
)

// HealScanMode represents the type of healing scan
type HealScanMode int

const (
// HealNormalScan checks if parts are present and not outdated
HealNormalScan HealScanMode = iota
// HealDeepScan checks for parts bitrot checksums
HealDeepScan
)

// HealOpts - collection of options for a heal sequence
type HealOpts struct {
Recursive bool `json:"recursive"`
DryRun bool `json:"dryRun"`
Remove bool `json:"remove"`
Recursive bool `json:"recursive"`
DryRun bool `json:"dryRun"`
Remove bool `json:"remove"`
ScanMode HealScanMode `json:"scanMode"`
}

// HealStartSuccess - holds information about a successfully started
Expand Down