Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Revert "Revert "Encryption Key Rotation Changes""
  • Loading branch information
ryansann committed Dec 14, 2020
1 parent a6327bb commit 49e158a
Show file tree
Hide file tree
Showing 8 changed files with 337 additions and 63 deletions.
229 changes: 185 additions & 44 deletions cluster/encryption.go
Expand Up @@ -6,26 +6,30 @@ import (
"encoding/base64"
"encoding/json"
"fmt"
"strings"
"sync"

ghodssyaml "github.com/ghodss/yaml"
"github.com/pkg/errors"
normantypes "github.com/rancher/norman/types"
"github.com/sirupsen/logrus"
"golang.org/x/sync/errgroup"
sigsyaml "sigs.k8s.io/yaml"

"github.com/rancher/rke/k8s"
"github.com/rancher/rke/log"
"github.com/rancher/rke/services"
"github.com/rancher/rke/templates"
v3 "github.com/rancher/rke/types"
"github.com/rancher/rke/util"
"github.com/sirupsen/logrus"
"golang.org/x/sync/errgroup"
v1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/serializer"
apiserverconfig "k8s.io/apiserver/pkg/apis/config"
apiserverconfigv1 "k8s.io/apiserver/pkg/apis/config/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/util/retry"
sigsyaml "sigs.k8s.io/yaml"
)

const (
Expand Down Expand Up @@ -113,94 +117,214 @@ func (c *Cluster) DisableSecretsEncryption(ctx context.Context, currentCluster *
return nil
}

const (
rewriteSecretsOperation = "rewrite-secrets"
secretBatchSize = 250
)

// RewriteSecrets does the following:
// - retrieves all cluster secrets in batches with size of <secretBatchSize>
// - triggers rewrites with new encryption key by sending each secret over a channel consumed by workers that perform the rewrite
// - logs progress of rewrite operation
func (c *Cluster) RewriteSecrets(ctx context.Context) error {
log.Infof(ctx, "Rewriting cluster secrets")
var errgrp errgroup.Group
k8sClient, err := k8s.NewClient(c.LocalKubeConfigPath, c.K8sWrapTransport)
if err != nil {
return fmt.Errorf("failed to initialize new kubernetes client: %v", err)

k8sClient, cliErr := k8s.NewClient(c.LocalKubeConfigPath, c.K8sWrapTransport)
if cliErr != nil {
return fmt.Errorf("failed to initialize new kubernetes client: %v", cliErr)
}
secretsList, err := k8s.GetSecretsList(k8sClient, "")
if err != nil {
return err

rewrites := make(chan interface{}, secretBatchSize)
go func() {
defer close(rewrites) // exiting this go routine triggers workers to exit

retryErr := func(err error) bool { // all returned errors can be retried
return true
}

var continueToken string
var secrets []v1.Secret
var restart bool
for {
err := retry.OnError(retry.DefaultRetry, retryErr, func() error {
l, err := k8sClient.CoreV1().Secrets("").List(ctx, metav1.ListOptions{
Limit: secretBatchSize, // keep the per request secrets batch size small to avoid client timeouts
Continue: continueToken,
})
if err != nil {
if isExpiredTokenErr(err) { // restart list operation due to token expiration
logrus.Debugf("[%v] continue token expired, restarting list operation", rewriteSecretsOperation)
continueToken = ""
restart = true
return nil
}
return err
}

secrets = append(secrets, l.Items...)
continueToken = l.Continue

return nil
})
if err != nil {
cliErr = err
break
}

// send this batch to workers for rewrite
// duplicates are ok because we cache the names of secrets that have been rewritten, thus workers will only rewrite each secret once
for _, s := range secrets {
rewrites <- s
}
secrets = nil // reset secrets since they've been sent to workers

// if there's no continue token and the list operation doesn't need to be restarted, we've retrieved all secrets
if continueToken == "" && !restart {
break
}

restart = false
}

logrus.Debugf("[%v] All secrets retrieved and sent for rewrite", rewriteSecretsOperation)
}()

// NOTE: since we retrieve secrets in batches, we don't know total number of secrets up front.
// Telling the user how many we've rewritten so far is the best we can do
done := make(chan struct{}, SyncWorkers)
defer close(done)
go func() {
var rewritten int
for range done {
rewritten++
if rewritten%50 == 0 { // log a message every 50 secrets
log.Infof(ctx, "[%s] %v secrets rewritten", rewriteSecretsOperation, rewritten)
}
}
}()

getSecretID := func(s v1.Secret) string {
return strings.Join([]string{s.Namespace, s.Name}, "/")
}

secretsQueue := util.GetObjectQueue(secretsList.Items)
// track secrets that have been rewritten
// this is needed in case the continue token expires and the list secrets operation needs to be restarted
rewritten := make(map[string]struct{})
var rmtx sync.RWMutex

// spawn workers to perform secret rewrites
var errgrp errgroup.Group
for w := 0; w < SyncWorkers; w++ {
errgrp.Go(func() error {
var errList []error
for secret := range secretsQueue {
for secret := range rewrites {
s := secret.(v1.Secret)
err := rewriteSecret(k8sClient, &s)
if err != nil {
errList = append(errList, err)
id := getSecretID(s)

rmtx.RLock()
_, ok := rewritten[id]
rmtx.RUnlock()

if !ok {
err := rewriteSecret(k8sClient, &s)
if err != nil {
errList = append(errList, err)
}

rmtx.Lock()
rewritten[id] = struct{}{}
rmtx.Unlock()

done <- struct{}{}
}
}

return util.ErrList(errList)
})
}
if err := errgrp.Wait(); err != nil {
return err
logrus.Errorf("[%v] error: %v", rewriteSecretsOperation, err)
return err // worker error from rewrites
}
log.Infof(ctx, "Cluster secrets rewritten successfully")
return nil

if cliErr != nil {
log.Infof(ctx, "[%s] Operation encountered error: %v", rewriteSecretsOperation, cliErr)
} else {
log.Infof(ctx, "[%s] Operation completed", rewriteSecretsOperation)
}

return cliErr
}

func (c *Cluster) RotateEncryptionKey(ctx context.Context, fullState *FullState) error {
//generate new key
// generate new key
newKey, err := generateEncryptionKey()
if err != nil {
return err
}

oldKey, err := c.extractActiveKey(c.EncryptionConfig.EncryptionProviderFile)
if err != nil {
return err
}
// reverse the keys order in the file, making newKey the Active Key
initialKeyList := []*encryptionKey{ // order is critical here!
newKey,
oldKey,
}
initialProviderConfig, err := providerFileFromKeyList(keyList{KeyList: initialKeyList})

logrus.Debug("adding new encryption key, provider config: [newKey, oldKey]")

// Ensure encryption is done with newKey
err = c.updateEncryptionProvider(ctx, []*encryptionKey{newKey, oldKey}, fullState)
if err != nil {
return err
}
c.EncryptionConfig.EncryptionProviderFile = initialProviderConfig
if err := c.DeployEncryptionProviderFile(ctx); err != nil {
return err
}
// commit to state as soon as possible
logrus.Debugf("[%s] Updating cluster state", services.ControlRole)
if err := c.UpdateClusterCurrentState(ctx, fullState); err != nil {
return err
}
if err := services.RestartKubeAPIWithHealthcheck(ctx, c.ControlPlaneHosts, c.LocalConnDialerFactory, c.Certificates); err != nil {
return err
}
// rewrite secrets

// rewrite secrets via updates to secrets
if err := c.RewriteSecrets(ctx); err != nil {
// if there's a rewrite error, the cluster will need to be restored, so redeploy the initial encryption provider config
var updateErr error
for i := 0; i < 3; i++ { // up to 3 retries
updateErr = c.updateEncryptionProvider(ctx, []*encryptionKey{oldKey}, fullState)
if updateErr == nil {
break
}
}

if updateErr != nil {
err = errors.Wrap(err, updateErr.Error())
}

return err
}

// At this point, all secrets have been rewritten using the newKey, so we remove the old one.
finalKeyList := []*encryptionKey{
newKey,
logrus.Debug("removing old encryption key, provider config: [newKey]")

err = c.updateEncryptionProvider(ctx, []*encryptionKey{newKey}, fullState)
if err != nil {
return err
}
finalProviderConfig, err := providerFileFromKeyList(keyList{KeyList: finalKeyList})

return nil
}

func (c *Cluster) updateEncryptionProvider(ctx context.Context, keys []*encryptionKey, fullState *FullState) error {
providerConfig, err := providerFileFromKeyList(keyList{KeyList: keys})
if err != nil {
return err
}
c.EncryptionConfig.EncryptionProviderFile = finalProviderConfig

c.EncryptionConfig.EncryptionProviderFile = providerConfig
if err := c.DeployEncryptionProviderFile(ctx); err != nil {
return err
}
// commit to state

// commit to state as soon as possible
logrus.Debugf("[%s] Updating cluster state", services.ControlRole)
if err := c.UpdateClusterCurrentState(ctx, fullState); err != nil {
return err
}
if err := services.RestartKubeAPIWithHealthcheck(ctx, c.ControlPlaneHosts, c.LocalConnDialerFactory, c.Certificates); err != nil {
return err
}

return nil
}

Expand Down Expand Up @@ -301,6 +425,18 @@ func (c *Cluster) generateDisabledEncryptionProviderFile() (string, error) {
return disabledProviderFileFromKey(key)
}

const (
errExpiredToken = "The provided continue parameter is too old"
)

// isExpiredTokenErr returns true if the error passed in is due to a continue token expiring
func isExpiredTokenErr(err error) bool {
if strings.Contains(err.Error(), errExpiredToken) {
return true
}
return false
}

func rewriteSecret(k8sClient *kubernetes.Clientset, secret *v1.Secret) error {
var err error
if err = k8s.UpdateSecret(k8sClient, secret); err == nil {
Expand All @@ -309,6 +445,10 @@ func rewriteSecret(k8sClient *kubernetes.Clientset, secret *v1.Secret) error {
if apierrors.IsConflict(err) {
secret, err = k8s.GetSecret(k8sClient, secret.Name, secret.Namespace)
if err != nil {
// if the secret no longer exists, we can skip it since it does not need to be rewritten
if apierrors.IsNotFound(err) {
return nil
}
return err
}
err = k8s.UpdateSecret(k8sClient, secret)
Expand All @@ -335,6 +475,7 @@ func isEncryptionEnabled(rkeConfig *v3.RancherKubernetesEngineConfig) bool {
}
return false
}

func isEncryptionCustomConfig(rkeConfig *v3.RancherKubernetesEngineConfig) bool {
if isEncryptionEnabled(rkeConfig) &&
rkeConfig.Services.KubeAPI.SecretsEncryptionConfig.CustomConfig != nil {
Expand Down

0 comments on commit 49e158a

Please sign in to comment.