Skip to content

Commit

Permalink
ETCD-565: add manual etcd signer cert rotation e2e test
Browse files Browse the repository at this point in the history
  • Loading branch information
tjungblu committed Apr 29, 2024
1 parent 4ded56b commit 9ece7d8
Show file tree
Hide file tree
Showing 10 changed files with 21,971 additions and 18,589 deletions.
2 changes: 1 addition & 1 deletion test/extended/dr/OWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ reviewers:
- tjungblu
approvers:
- deads2k
- soltysh
- hasbro17
- sttts
- dusk125
- Elbehery
- tjungblu
16 changes: 8 additions & 8 deletions test/extended/etcd/OWNERS
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
reviewers:
- csrwng
- dusk125
- hasbro17
- hexfusion
- ironcladlou
- smarterclayton
- Elbehery
- tjungblu
approvers:
- csrwng
- deads2k
- soltysh
- hasbro17
- hexfusion
- ironcladlou
- smarterclayton
- dusk125
- Elbehery
- tjungblu
173 changes: 173 additions & 0 deletions test/extended/etcd/cert_rotation.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
package etcd

import (
"context"
g "github.com/onsi/ginkgo/v2"
o "github.com/onsi/gomega"
"github.com/openshift/library-go/test/library"
exutil "github.com/openshift/origin/test/extended/util"
"github.com/pkg/errors"
apierrors "k8s.io/apimachinery/pkg/api/errors"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"math/rand"
"strings"
"time"
)

var _ = g.Describe("[sig-etcd][Feature:CertRotation] etcd", func() {
defer g.GinkgoRecover()

ctx := context.TODO()
oc := exutil.NewCLIWithoutNamespace("etcd-certs").AsAdmin()

g.It("can manually rotate signer certificates [Slow][Timeout:30m]", func() {
kasSecretsClient := oc.AdminKubeClient().CoreV1().Secrets("openshift-kube-apiserver")
kasPodClient := oc.AdminKubeClient().CoreV1().Pods("openshift-kube-apiserver")
etcdSecretsClient := oc.AdminKubeClient().CoreV1().Secrets("openshift-etcd")
etcdPodClient := oc.AdminKubeClient().CoreV1().Pods("openshift-etcd")
configSecretsClient := oc.AdminKubeClient().CoreV1().Secrets("openshift-config")

currentKasClientCert, err := kasSecretsClient.Get(ctx, "etcd-client", v1.GetOptions{})
o.Expect(err).ToNot(o.HaveOccurred())
currentEtcdLeafCerts, err := etcdSecretsClient.Get(ctx, "etcd-all-certs", v1.GetOptions{})
o.Expect(err).ToNot(o.HaveOccurred())

// as of 4.16, the manual signer rotation is effectively a secret copy, similar to below OC command:
// $ oc get secret etcd-signer -n openshift-etcd -ojson | \
// jq 'del(.metadata["namespace","creationTimestamp","resourceVersion","selfLink","uid"])' | \
// oc apply -n openshift-config -f -
newSigner, err := etcdSecretsClient.Get(ctx, "etcd-signer", v1.GetOptions{})
o.Expect(err).ToNot(o.HaveOccurred())

newSigner.ObjectMeta = v1.ObjectMeta{Name: "etcd-signer", Namespace: "openshift-config"}
_, err = configSecretsClient.Update(ctx, newSigner, v1.UpdateOptions{})
o.Expect(err).ToNot(o.HaveOccurred())

g.GinkgoT().Log("waiting for etcd/apiserver to stabilize on the same revision")
// await all rollouts, then assert the leaf certs all successfully changed
err = library.WaitForPodsToStabilizeOnTheSameRevision(g.GinkgoT(), etcdPodClient, "etcd=true",
5, 1*time.Minute, 30*time.Second, 30*time.Minute)
err = errors.Wrap(err, "timed out waiting for etcd pods to stabilize on the same revision")
o.Expect(err).ToNot(o.HaveOccurred())

err = library.WaitForPodsToStabilizeOnTheSameRevision(g.GinkgoT(), kasPodClient, "apiserver=true",
5, 1*time.Minute, 30*time.Second, 30*time.Minute)
err = errors.Wrap(err, "timed out waiting for APIServer pods to stabilize on the same revision")
o.Expect(err).ToNot(o.HaveOccurred())

rotatedKasClientCert, err := kasSecretsClient.Get(ctx, "etcd-client", v1.GetOptions{})
o.Expect(err).ToNot(o.HaveOccurred())
o.Expect(rotatedKasClientCert.Data).ToNot(o.Equal(currentKasClientCert.Data))

rotatedEtcdLeafCerts, err := etcdSecretsClient.Get(ctx, "etcd-all-certs", v1.GetOptions{})
o.Expect(err).ToNot(o.HaveOccurred())
o.Expect(rotatedEtcdLeafCerts.Data).ToNot(o.Equal(currentEtcdLeafCerts.Data))
})

g.It("can manually rotate metrics signer certificates [Slow][Timeout:30m]", func() {
etcdSecretsClient := oc.AdminKubeClient().CoreV1().Secrets("openshift-etcd")
configSecretsClient := oc.AdminKubeClient().CoreV1().Secrets("openshift-config")
etcdPodClient := oc.AdminKubeClient().CoreV1().Pods("openshift-etcd")

currentEtcdMetricCert, err := etcdSecretsClient.Get(ctx, "etcd-metric-client", v1.GetOptions{})
o.Expect(err).ToNot(o.HaveOccurred())

// as of 4.16, the manual signer rotation is effectively a secret copy, similar to below OC command:
// $ oc get secret etcd-metrics-signer -n openshift-etcd -ojson | \
// jq 'del(.metadata["namespace","creationTimestamp","resourceVersion","selfLink","uid"])' | \
// oc apply -n openshift-config -f -
newSigner, err := etcdSecretsClient.Get(ctx, "etcd-metric-signer", v1.GetOptions{})
o.Expect(err).ToNot(o.HaveOccurred())

newSigner.ObjectMeta = v1.ObjectMeta{Name: "etcd-metric-signer", Namespace: "openshift-config"}
_, err = configSecretsClient.Update(ctx, newSigner, v1.UpdateOptions{})
o.Expect(err).ToNot(o.HaveOccurred())

g.GinkgoT().Log("waiting for etcd to stabilize on the same revision")
// await all rollouts, then assert the leaf certs all successfully changed
err = library.WaitForPodsToStabilizeOnTheSameRevision(g.GinkgoT(), etcdPodClient, "etcd=true",
5, 1*time.Minute, 30*time.Second, 30*time.Minute)
err = errors.Wrap(err, "timed out waiting for etcd pods to stabilize on the same revision")
o.Expect(err).ToNot(o.HaveOccurred())

rotatedEtcdMetricsCert, err := etcdSecretsClient.Get(ctx, "etcd-metric-client", v1.GetOptions{})
o.Expect(err).ToNot(o.HaveOccurred())
o.Expect(rotatedEtcdMetricsCert.Data).ToNot(o.Equal(currentEtcdMetricCert.Data))
// TODO check whether we still have prometheus metrics
})

g.It("can recreate dynamic certificates [Slow][Timeout:15m]", func() {
etcdSecretsClient := oc.AdminKubeClient().CoreV1().Secrets("openshift-etcd")
etcdPodClient := oc.AdminKubeClient().CoreV1().Pods("openshift-etcd")

allEtcdSecrets, err := etcdSecretsClient.List(ctx, v1.ListOptions{})
o.Expect(err).ToNot(o.HaveOccurred())

// we pick any peer cert at random and delete it
rand.Shuffle(len(allEtcdSecrets.Items), func(i, j int) {
allEtcdSecrets.Items[i], allEtcdSecrets.Items[j] = allEtcdSecrets.Items[j], allEtcdSecrets.Items[i]
})

var currentSecretName string
var currentSecretData map[string][]byte
for _, item := range allEtcdSecrets.Items {
if strings.Contains(item.Name, "etcd-peer") {
currentSecretName = item.Name
currentSecretData = item.Data
g.GinkgoT().Logf("Deleting secret %s...", currentSecretName)
err = etcdSecretsClient.Delete(ctx, item.Name, v1.DeleteOptions{})
o.Expect(err).ToNot(o.HaveOccurred())
break
}
}

o.Expect(currentSecretData).ToNot(o.BeNil())

g.GinkgoT().Log("waiting for the secret to be recreated...")
err = wait.Poll(30*time.Second, 5*time.Minute, func() (bool, error) {
_, err := etcdSecretsClient.Get(ctx, currentSecretName, v1.GetOptions{})
if err != nil {
return !apierrors.IsNotFound(err), err
}
return true, nil
})
err = errors.Wrap(err, "timed out waiting for secret to be recreated by CEO")
o.Expect(err).ToNot(o.BeNil())

g.GinkgoT().Log("waiting for etcd to stabilize on the same revision")
// await all rollouts, then assert the leaf certs all successfully changed
err = library.WaitForPodsToStabilizeOnTheSameRevision(g.GinkgoT(), etcdPodClient, "etcd=true",
5, 1*time.Minute, 30*time.Second, 30*time.Minute)
err = errors.Wrap(err, "timed out waiting for etcd pods to stabilize on the same revision")
o.Expect(err).ToNot(o.HaveOccurred())
})

g.It("can recreate trust bundle [Slow][Timeout:15m]", func() {
etcdConfigMapClient := oc.AdminKubeClient().CoreV1().ConfigMaps("openshift-etcd")
etcdPodClient := oc.AdminKubeClient().CoreV1().Pods("openshift-etcd")
bundleName := "etcd-ca-bundle"

err := etcdConfigMapClient.Delete(ctx, bundleName, v1.DeleteOptions{})
err = errors.Wrap(err, "error while deleting etcd CA bundle")
o.Expect(err).ToNot(o.HaveOccurred())

g.GinkgoT().Log("waiting for the bundle to be recreated...")
err = wait.Poll(30*time.Second, 5*time.Minute, func() (bool, error) {
_, err := etcdConfigMapClient.Get(ctx, bundleName, v1.GetOptions{})
if err != nil {
return !apierrors.IsNotFound(err), err
}
return true, nil
})
err = errors.Wrap(err, "timed out waiting for bundle to be recreated by CEO")
o.Expect(err).ToNot(o.BeNil())

g.GinkgoT().Log("waiting for etcd to stabilize on the same revision")
// await all rollouts, then assert the leaf certs all successfully changed
err = library.WaitForPodsToStabilizeOnTheSameRevision(g.GinkgoT(), etcdPodClient, "etcd=true",
5, 1*time.Minute, 30*time.Second, 30*time.Minute)
err = errors.Wrap(err, "timed out waiting for etcd pods to stabilize on the same revision")
o.Expect(err).ToNot(o.HaveOccurred())
})
})

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 9ece7d8

Please sign in to comment.