Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable recovery using existing bucket #28

Merged
merged 1 commit into from
Jan 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,13 @@ func main() {
os.Exit(1)
}

// Grab platform status to determine where OpenShift is installed
platformStatus, err := platform.GetPlatformStatus(startupClient)
// Get infrastructureStatus (which contains the platformStatus).
infraStatus, err := platform.GetInfrastructureStatus(startupClient)
if err != nil {
log.Error(err, "Failed to retrieve infrastructure status")
os.Exit(1)
}
platformStatus, err := platform.GetPlatformStatus(startupClient, infraStatus)
if err != nil {
log.Error(err, "Failed to retrieve platform status")
os.Exit(1)
Expand Down
7 changes: 4 additions & 3 deletions deploy/credential_request.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@ spec:
- effect: Allow
action:
- s3:CreateBucket
- s3:DeleteObjectTagging
- s3:GetBucketTagging
- s3:ListAllMyBuckets
- s3:ListBucket
- s3:PutBucketAcl
- s3:PutBucketPublicAccessBlock
- s3:PutBucketTagging
- s3:PutEncryptionConfiguration
- s3:PutLifecycleConfiguration
- s3:PutBucketTagging
- s3:DeleteObjectTagging
- s3:GetBucketTagging
resource: "*"
13 changes: 10 additions & 3 deletions pkg/controller/velero/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,15 +126,22 @@ func (r *ReconcileVelero) Reconcile(request reconcile.Request) (reconcile.Result
return reconcile.Result{}, err
}

// Grab platform status to determine where OpenShift is installed
// Grab platformStatus to determine where OpenShift is installed.
platformStatusClient, err := platform.GetPlatformStatusClient()
if err != nil {
return reconcile.Result{}, err
}
platformStatus, err := platform.GetPlatformStatus(platformStatusClient)
infraStatus, err := platform.GetInfrastructureStatus(platformStatusClient)
if err != nil {
return reconcile.Result{}, err
}
platformStatus, err := platform.GetPlatformStatus(platformStatusClient, infraStatus)
if err != nil {
return reconcile.Result{}, err
}

// Grab the unique identifier for this cluster's infrastructure.
infraName := infraStatus.InfrastructureName

// Verify that we have received an AWS region from the platform
if platformStatus.AWS == nil || len(platformStatus.AWS.Region) < 1 {
Expand All @@ -151,7 +158,7 @@ func (r *ReconcileVelero) Reconcile(request reconcile.Request) (reconcile.Result
if instance.S3BucketReconcileRequired(s3ReconcilePeriod) {
// Always directly return from this, as we will either update the
// timestamp when complete, or return an error.
return r.provisionS3(reqLogger, s3Client, instance)
return r.provisionS3(reqLogger, s3Client, instance, infraName)
}

// Now go provision Velero
Expand Down
22 changes: 18 additions & 4 deletions pkg/controller/velero/s3.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,29 @@ const (
bucketPrefix = "managed-velero-backups-"
)

func (r *ReconcileVelero) provisionS3(reqLogger logr.Logger, s3Client *awss3.S3, instance *veleroCR.Velero) (reconcile.Result, error) {
func (r *ReconcileVelero) provisionS3(reqLogger logr.Logger, s3Client *awss3.S3, instance *veleroCR.Velero, infraName string) (reconcile.Result, error) {
var err error
bucketLog := reqLogger.WithValues("S3Bucket.Name", instance.Status.S3Bucket.Name, "S3Bucket.Region", s3Client.Client.Config.Region)

// This switch handles the provisioning steps/checks
switch {
// We don't yet have a bucket name selected
case instance.Status.S3Bucket.Name == "":
log.Info("No S3 bucket defined")

// Use an existing bucket, if it exists.
log.Info("No S3 bucket defined. Searching for existing bucket to use")
existingBucket, err := s3.FindExistingBucket(s3Client, infraName)
if err != nil {
return reconcile.Result{}, err
}
if existingBucket != "" {
log.Info(fmt.Sprintf("Recovered existing bucket: %s", existingBucket))
instance.Status.S3Bucket.Name = existingBucket
instance.Status.S3Bucket.Provisioned = true
return reconcile.Result{}, r.statusUpdate(reqLogger, instance)
}

// Prepare to create a new bucket, if none exist.
proposedName := generateBucketName(bucketPrefix)
proposedBucketExists, err := s3.DoesBucketExist(s3Client, proposedName)
if err != nil {
Expand Down Expand Up @@ -66,7 +80,7 @@ func (r *ReconcileVelero) provisionS3(reqLogger logr.Logger, s3Client *awss3.S3,
return reconcile.Result{}, fmt.Errorf("error occurred when creating bucket %v: %v", instance.Status.S3Bucket.Name, err.Error())
}
}
err = s3.TagBucket(s3Client, instance.Status.S3Bucket.Name, defaultBackupStorageLocation)
err = s3.TagBucket(s3Client, instance.Status.S3Bucket.Name, defaultBackupStorageLocation, infraName)
if err != nil {
return reconcile.Result{}, fmt.Errorf("error occurred when tagging bucket %v: %v", instance.Status.S3Bucket.Name, err.Error())
}
Expand Down Expand Up @@ -119,7 +133,7 @@ func (r *ReconcileVelero) provisionS3(reqLogger logr.Logger, s3Client *awss3.S3,

// Make sure that tags are applied to buckets
bucketLog.Info("Enforcing S3 Bucket tags on S3 Bucket")
err = s3.TagBucket(s3Client, instance.Status.S3Bucket.Name, defaultBackupStorageLocation)
err = s3.TagBucket(s3Client, instance.Status.S3Bucket.Name, defaultBackupStorageLocation, infraName)
if err != nil {
return reconcile.Result{}, fmt.Errorf("error occurred when tagging bucket %v: %v", instance.Status.S3Bucket.Name, err.Error())
}
Expand Down
61 changes: 58 additions & 3 deletions pkg/s3/bucket.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ import (
)

const (
bucketTagKey = "velero.io/backup-location"
bucketTagBackupLocation = "velero.io/backup-location"
bucketTagInfraName = "velero.io/infrastructureName"
)

// CreateBucket creates a new S3 bucket.
func CreateBucket(s3Client *s3.S3, bucketName string) error {
createBucketInput := &s3.CreateBucketInput{
ACL: aws.String(s3.BucketCannedACLPrivate),
Expand All @@ -34,6 +36,7 @@ func CreateBucket(s3Client *s3.S3, bucketName string) error {
return err
}

// DoesBucketExist checks that the bucket exists, and that we have access to it.
func DoesBucketExist(s3Client *s3.S3, bucketName string) (bool, error) {
input := &s3.HeadBucketInput{
Bucket: aws.String(bucketName),
Expand All @@ -58,6 +61,7 @@ func DoesBucketExist(s3Client *s3.S3, bucketName string) (bool, error) {
return true, nil
}

// EncryptBucket sets the encryption configuration for the bucket.
func EncryptBucket(s3Client *s3.S3, bucketName string) error {
bucketEncryptionInput := &s3.PutBucketEncryptionInput{
Bucket: aws.String(bucketName),
Expand All @@ -81,6 +85,7 @@ func EncryptBucket(s3Client *s3.S3, bucketName string) error {
return err
}

// BlockBucketPublicAccess blocks public access to the bucket's contents.
cblecker marked this conversation as resolved.
Show resolved Hide resolved
func BlockBucketPublicAccess(s3Client *s3.S3, bucketName string) error {
publicAccessBlockInput := &s3.PutPublicAccessBlockInput{
Bucket: aws.String(bucketName),
Expand All @@ -101,6 +106,7 @@ func BlockBucketPublicAccess(s3Client *s3.S3, bucketName string) error {
return err
}

// SetBucketLifecycle sets a lifecycle on the specified bucket.
func SetBucketLifecycle(s3Client *s3.S3, bucketName string) error {
bucketLifecycleConfigurationInput := &s3.PutBucketLifecycleConfigurationInput{
Bucket: aws.String(bucketName),
Expand Down Expand Up @@ -146,22 +152,71 @@ func CreateBucketTaggingInput(bucketname string, tags map[string]string) *s3.Put
return putInput
}

// ClearBucketTags wipes all existing tags from a bucket so that velero-specific
// tags can be applied to the bucket instead.
func ClearBucketTags(s3Client *s3.S3, bucketName string) (err error) {
deleteInput := &s3.DeleteBucketTaggingInput{Bucket: aws.String(bucketName)}
_, err = s3Client.DeleteBucketTagging(deleteInput)
return err
}

func TagBucket(s3Client *s3.S3, bucketName string, backUpLocation string) error {
// TagBucket adds tags to an S3 bucket. The tags are used to indicate that velero backups
// are stored in the bucket, and to identify the associated cluster.
func TagBucket(s3Client *s3.S3, bucketName string, backUpLocation string, infraName string) error {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out of scope for this PR, but I think we should probably refactor this eventually to be a generic function that gets called multiple times for each time we want to tag a bucket. Just a thought for future work.

err := ClearBucketTags(s3Client, bucketName)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm. I didn't realize we actually wiped tags every time we call this. This isn't really impotent, is it?

Is this intended? What were your thoughts behind this, @c-e-brumm?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea was that if we always deleted the tags and reset the tags they will always be the tags we want. This way a new tag can be added or a tag can be changed in one place in the code and it will always take effect on existing and new clusters. We won't need additional logic to validate tags if they get reset every time. Heavy handed but simple was the thought.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I gotcha. There are some potential issues I see with this in the future, so we may want to revisit at some point in the future to make this code more robust.

if err != nil {
return fmt.Errorf("unable to clear %v bucket tags: %v", bucketName, err)
}
input := CreateBucketTaggingInput(bucketName, map[string]string{bucketTagKey: backUpLocation})
input := CreateBucketTaggingInput(bucketName, map[string]string{
bucketTagBackupLocation: backUpLocation,
bucketTagInfraName: infraName,
})
_, err = s3Client.PutBucketTagging(input)
if err != nil {
fmt.Println(err.Error())
return err
}
return nil
}

// FindExistingBucket looks for an S3 bucket matching this cluster's velero tags
// and infrastructure tags. If a matching bucket is found, the bucket name is returned.
func FindExistingBucket(s3Client *s3.S3, infraName string) (string, error) {
// List all buckets associated with this cluster's AWS account.
input := &s3.ListBucketsInput{}
result, err := s3Client.ListBuckets(input)
if err != nil {
fmt.Println(err.Error())
return "", err
}

for _, bucket := range result.Buckets {
request := &s3.GetBucketTaggingInput{
Bucket: aws.String(*bucket.Name),
}
response, err := s3Client.GetBucketTagging(request)
if ec2err, ok := err.(awserr.Error); ok && ec2err.Code() == "NoSuchTagSet" {
// If there is no tag set, exit this function without error.
return "", nil
} else if err != nil {
return "", err
}

var tagMatchesCluster, tagMatchesVelero bool
for _, tag := range response.TagSet {
if *tag.Key == bucketTagInfraName && *tag.Value == infraName {
tagMatchesCluster = true
}
if *tag.Key == bucketTagBackupLocation {
tagMatchesVelero = true
}
}

if tagMatchesCluster && tagMatchesVelero {
return *bucket.Name, nil
}
}

// No matching buckets found.
return "", nil
}
3 changes: 1 addition & 2 deletions pkg/s3/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ import (
"fmt"

"github.com/openshift/managed-velero-operator/version"

"github.com/operator-framework/operator-sdk/pkg/k8sutil"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand All @@ -30,7 +30,6 @@ func NewS3Client(kubeClient client.Client, region string) (*s3.S3, error) {
var err error

awsConfig := &aws.Config{Region: aws.String(region)}

namespace, err := k8sutil.GetOperatorNamespace()
if err != nil {
return nil, fmt.Errorf("failed to get operator namespace: %v", err)
Expand Down
20 changes: 13 additions & 7 deletions pkg/util/platform/platform.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,8 @@ func GetPlatformStatusClient() (client.Client, error) {
return client.New(cfg, client.Options{Scheme: scheme})
}

// GetPlatformStatus provides a backwards-compatible way to look up platform
// status. AWS is the special case. 4.1 clusters on AWS expose the region config
// only through install-config. New AWS clusters and all other 4.2+ platforms
// are configured via platform status.
func GetPlatformStatus(client client.Client) (*configv1.PlatformStatus, error) {
// GetInfrastructureStatus fetches the InfrastructureStatus for the cluster.
func GetInfrastructureStatus(client client.Client) (*configv1.InfrastructureStatus, error) {
var err error

// Retrieve the cluster infrastructure config.
Expand All @@ -63,7 +60,16 @@ func GetPlatformStatus(client client.Client) (*configv1.PlatformStatus, error) {
return nil, err
}

if status := infra.Status.PlatformStatus; status != nil {
return &infra.Status, nil
}

// GetPlatformStatus provides a backwards-compatible way to look up platform
// status. AWS is the special case. 4.1 clusters on AWS expose the region config
// only through install-config. New AWS clusters and all other 4.2+ platforms
// are configured via platform status.
func GetPlatformStatus(client client.Client, infraStatus *configv1.InfrastructureStatus) (*configv1.PlatformStatus, error) {

if status := infraStatus.PlatformStatus; status != nil {
// Only AWS needs backwards compatibility with install-config
if status.Type != configv1.AWSPlatformType {
return status, nil
Expand Down Expand Up @@ -92,7 +98,7 @@ func GetPlatformStatus(client client.Client) (*configv1.PlatformStatus, error) {
return &configv1.PlatformStatus{
//lint:ignore SA1019 ignore deprecation, as this function is specifically designed for backwards compatibility
//nolint:staticcheck // ref https://github.com/golangci/golangci-lint/issues/741
Type: infra.Status.Platform,
Type: infraStatus.Platform,
AWS: &configv1.AWSPlatformStatus{
Region: ic.Platform.AWS.Region,
},
Expand Down
7 changes: 6 additions & 1 deletion pkg/util/platform/platform_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,13 @@ func TestGetPlatformStatus(t *testing.T) {
t.Fatalf("unable to create fake configmap object: %v", err)
}

infraStatus, err := GetInfrastructureStatus(fc)
if err != nil {
t.Fatalf("unable to get fake infrastructureStatus object: %v", err)
}

// Run test and compare
ps, err := GetPlatformStatus(fc)
ps, err := GetPlatformStatus(fc, infraStatus)
if err != nil {
t.Errorf("error on retrieving platform status: %v", err)
}
Expand Down