Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release-1.26] Fix s3 snapshot restore #8734

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/etcd/etcd.go
Expand Up @@ -354,10 +354,10 @@ func (e *ETCD) Reset(ctx context.Context, rebootstrap func() error) error {
// If asked to restore from a snapshot, do so
if e.config.ClusterResetRestorePath != "" {
if e.config.EtcdS3 {
logrus.Infof("Retrieving etcd snapshot %s from S3", e.config.ClusterResetRestorePath)
if err := e.initS3IfNil(ctx); err != nil {
return err
}
logrus.Infof("Retrieving etcd snapshot %s from S3", e.config.ClusterResetRestorePath)
if err := e.s3.Download(ctx); err != nil {
return err
}
Expand Down
50 changes: 30 additions & 20 deletions pkg/etcd/s3.go
Expand Up @@ -13,7 +13,6 @@ import (
"os"
"path"
"path/filepath"
"runtime"
"sort"
"strconv"
"strings"
Expand All @@ -28,6 +27,7 @@ import (
"github.com/sirupsen/logrus"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
)

var (
Expand Down Expand Up @@ -100,30 +100,40 @@ func NewS3(ctx context.Context, config *config.Control) (*S3, error) {
}
logrus.Infof("S3 bucket %s exists", config.EtcdS3BucketName)

for config.Runtime.Core == nil {
runtime.Gosched()
s3 := &S3{
config: config,
client: c,
nodeName: os.Getenv("NODE_NAME"),
}

// cluster id hack: see https://groups.google.com/forum/#!msg/kubernetes-sig-architecture/mVGobfD4TpY/nkdbkX1iBwAJ
var clusterID string
if ns, err := config.Runtime.Core.Core().V1().Namespace().Get(metav1.NamespaceSystem, metav1.GetOptions{}); err != nil {
logrus.Warnf("Failed to set cluster ID: %v", err)
if config.ClusterReset {
logrus.Debug("Skip setting S3 snapshot cluster ID and token during cluster-reset")
} else {
clusterID = string(ns.UID)
}
if err := wait.PollImmediateUntilWithContext(ctx, time.Second, func(ctx context.Context) (bool, error) {
if config.Runtime.Core == nil {
return false, nil
}

tokenHash, err := util.GetTokenHash(config)
if err != nil {
return nil, errors.Wrap(err, "failed to get server token hash for etcd snapshot")
// cluster id hack: see https://groups.google.com/forum/#!msg/kubernetes-sig-architecture/mVGobfD4TpY/nkdbkX1iBwAJ
ns, err := config.Runtime.Core.Core().V1().Namespace().Get(metav1.NamespaceSystem, metav1.GetOptions{})
if err != nil {
return false, errors.Wrap(err, "failed to set S3 snapshot cluster ID")
}
s3.clusterID = string(ns.UID)

tokenHash, err := util.GetTokenHash(config)
if err != nil {
return false, errors.Wrap(err, "failed to set S3 snapshot server token hash")
}
s3.tokenHash = tokenHash

return true, nil
}); err != nil {
return nil, err
}
}

return &S3{
config: config,
client: c,
clusterID: clusterID,
tokenHash: tokenHash,
nodeName: os.Getenv("NODE_NAME"),
}, nil
return s3, nil
}

// upload uploads the given snapshot to the configured S3
Expand Down Expand Up @@ -229,7 +239,6 @@ func (s *S3) Download(ctx context.Context) error {
snapshotFile := filepath.Join(snapshotDir, s.config.ClusterResetRestorePath)
metadataFile := filepath.Join(snapshotDir, "..", metadataDir, s.config.ClusterResetRestorePath)

logrus.Debugf("Downloading snapshot from s3://%s/%s", s.config.EtcdS3BucketName, snapshotKey)
if err := s.downloadSnapshot(ctx, snapshotKey, snapshotFile); err != nil {
return err
}
Expand All @@ -243,6 +252,7 @@ func (s *S3) Download(ctx context.Context) error {

// downloadSnapshot downloads the snapshot file from S3 using the minio API.
func (s *S3) downloadSnapshot(ctx context.Context, key, file string) error {
logrus.Debugf("Downloading snapshot from s3://%s/%s", s.config.EtcdS3BucketName, key)
ctx, cancel := context.WithTimeout(ctx, s.config.EtcdS3Timeout)
defer cancel()
defer os.Chmod(file, 0600)
Expand Down