Skip to content

Commit

Permalink
Merge pull request #1271 from tjungblu/ETCD-606-again
Browse files Browse the repository at this point in the history
ETCD-606: Batch bundle revision rollout
  • Loading branch information
openshift-merge-bot[bot] committed Jun 10, 2024
2 parents a0be76e + 7e71119 commit 472b0be
Show file tree
Hide file tree
Showing 11 changed files with 157 additions and 55 deletions.
10 changes: 5 additions & 5 deletions bindata/etcd/pod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ ${COMPUTED_ENV_VARS}
# this has a non-zero return code if the command is non-zero. If you use an export first, it doesn't and you
# will succeed when you should fail.
ETCD_INITIAL_CLUSTER=$(discover-etcd-initial-cluster \
--cacert=/etc/kubernetes/static-pod-certs/configmaps/etcd-serving-ca/ca-bundle.crt \
--cacert=/etc/kubernetes/static-pod-certs/configmaps/etcd-all-bundles/server-ca-bundle.crt \
--cert=/etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-peer-NODE_NAME.crt \
--key=/etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-peer-NODE_NAME.key \
--endpoints=${ALL_ETCD_ENDPOINTS} \
Expand Down Expand Up @@ -175,11 +175,11 @@ ${COMPUTED_ENV_VARS}
--initial-advertise-peer-urls=https://${NODE_NODE_ENVVAR_NAME_IP}:2380 \
--cert-file=/etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-serving-NODE_NAME.crt \
--key-file=/etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-serving-NODE_NAME.key \
--trusted-ca-file=/etc/kubernetes/static-pod-certs/configmaps/etcd-serving-ca/ca-bundle.crt \
--trusted-ca-file=/etc/kubernetes/static-pod-certs/configmaps/etcd-all-bundles/server-ca-bundle.crt \
--client-cert-auth=true \
--peer-cert-file=/etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-peer-NODE_NAME.crt \
--peer-key-file=/etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-peer-NODE_NAME.key \
--peer-trusted-ca-file=/etc/kubernetes/static-pod-certs/configmaps/etcd-peer-client-ca/ca-bundle.crt \
--peer-trusted-ca-file=/etc/kubernetes/static-pod-certs/configmaps/etcd-all-bundles/server-ca-bundle.crt \
--peer-client-cert-auth=true \
--advertise-client-urls=https://${NODE_NODE_ENVVAR_NAME_IP}:2379 \
--listen-client-urls=https://${LISTEN_ON_ALL_IPS}:2379,unixs://${NODE_NODE_ENVVAR_NAME_IP}:0 \
Expand Down Expand Up @@ -255,8 +255,8 @@ ${COMPUTED_ENV_VARS}
--key-file /etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-serving-metrics-NODE_NAME.key \
--cert /etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-peer-NODE_NAME.crt \
--cert-file /etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-serving-metrics-NODE_NAME.crt \
--cacert /etc/kubernetes/static-pod-certs/configmaps/etcd-peer-client-ca/ca-bundle.crt \
--trusted-ca-file /etc/kubernetes/static-pod-certs/configmaps/etcd-metrics-proxy-serving-ca/ca-bundle.crt \
--cacert /etc/kubernetes/static-pod-certs/configmaps/etcd-all-bundles/server-ca-bundle.crt \
--trusted-ca-file /etc/kubernetes/static-pod-certs/configmaps/etcd-all-bundles/metrics-ca-bundle.crt \
--listen-cipher-suites TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_AES_128_GCM_SHA256,TLS_AES_256_GCM_SHA384,TLS_CHACHA20_POLY1305_SHA256
env:
${COMPUTED_ENV_VARS}
Expand Down
4 changes: 2 additions & 2 deletions bindata/etcd/restore-pod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,11 @@ spec:
--initial-advertise-peer-urls=https://${NODE_NODE_ENVVAR_NAME_IP}:2380 \
--cert-file=/etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-serving-NODE_NAME.crt \
--key-file=/etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-serving-NODE_NAME.key \
--trusted-ca-file=/etc/kubernetes/static-pod-certs/configmaps/etcd-serving-ca/ca-bundle.crt \
--trusted-ca-file=/etc/kubernetes/static-pod-certs/configmaps/etcd-all-bundles/server-ca-bundle.crt \
--client-cert-auth=true \
--peer-cert-file=/etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-peer-NODE_NAME.crt \
--peer-key-file=/etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-peer-NODE_NAME.key \
--peer-trusted-ca-file=/etc/kubernetes/static-pod-certs/configmaps/etcd-peer-client-ca/ca-bundle.crt \
--peer-trusted-ca-file=/etc/kubernetes/static-pod-certs/configmaps/etcd-all-bundles/server-ca-bundle.crt \
--peer-client-cert-auth=true \
--advertise-client-urls=https://${NODE_NODE_ENVVAR_NAME_IP}:2379 \
--listen-client-urls=https://${LISTEN_ON_ALL_IPS}:2379 \
Expand Down
21 changes: 19 additions & 2 deletions pkg/cmd/operator/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,21 @@ package operator
import (
"context"
"fmt"
"net/http"

"github.com/openshift/cluster-etcd-operator/pkg/operator"
"github.com/openshift/cluster-etcd-operator/pkg/version"
"github.com/openshift/library-go/pkg/controller/controllercmd"
"github.com/spf13/cobra"
corev1 "k8s.io/api/core/v1"
"k8s.io/apiserver/pkg/server/healthz"
"k8s.io/client-go/tools/record"
"net/http"
"strings"
)

func NewOperator() *cobra.Command {
cmd := controllercmd.
NewControllerCommandConfig("openshift-cluster-etcd-operator", version.Get(), operator.RunOperator).
WithEventRecorderOptions(EtcdOperatorCorrelatorOptions()).
WithHealthChecks(healthz.NamedCheck("controller-aliveness", func(_ *http.Request) error {
if !operator.AlivenessChecker.Alive() {
return fmt.Errorf("found unhealthy aliveness check, returning error")
Expand All @@ -27,3 +30,17 @@ func NewOperator() *cobra.Command {

return cmd
}

// EtcdOperatorCorrelatorOptions is a very strict correlator policy to avoid spamming etcd/apiserver with duplicated events
func EtcdOperatorCorrelatorOptions() record.CorrelatorOptions {
return record.CorrelatorOptions{
// only allow the same event five times in 10m
MaxEvents: 5,
MaxIntervalInSeconds: 600,
BurstSize: 5, // default: 25 (change allows a single source to send 5 events about object per minute)
QPS: 1. / 300., // default: 1/300 (change allows refill rate to 1 new event every 300s)
KeyFunc: func(event *corev1.Event) (aggregateKey string, localKey string) {
return strings.Join([]string{event.Type, event.Reason, event.Message}, "_"), event.Message
},
}
}
46 changes: 30 additions & 16 deletions pkg/cmd/render/certs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ func TestCertSingleNode(t *testing.T) {
require.NoError(t, err)

require.Equal(t, 11, len(secrets))
require.Equal(t, 9, len(bundles))
require.Equal(t, 10, len(bundles))

assertCertificateCorrectness(t, secrets)
assertBundleCorrectness(t, secrets, bundles)
Expand All @@ -40,7 +40,7 @@ func TestCertsMultiNode(t *testing.T) {
require.NoError(t, err)

require.Equal(t, 17, len(secrets))
require.Equal(t, 9, len(bundles))
require.Equal(t, 10, len(bundles))

assertCertificateCorrectness(t, secrets)
assertBundleCorrectness(t, secrets, bundles)
Expand Down Expand Up @@ -116,27 +116,41 @@ func assertBundleCorrectness(t *testing.T, secrets []corev1.Secret, bundles []co
}

for _, bundle := range bundles {
bundleCerts, err := cert.ParseCertsPEM([]byte(bundle.Data["ca-bundle.crt"]))
require.NoError(t, err)
if bundle.Name == tlshelpers.EtcdAllBundlesConfigMapName {
serverBundleCerts, err := cert.ParseCertsPEM([]byte(bundle.Data["server-ca-bundle.crt"]))
require.NoError(t, err)
assertMatchingBundles(t, signerMap[tlshelpers.EtcdSignerCertSecretName], serverBundleCerts)

var signers []*x509.Certificate
if strings.Contains(bundle.Name, "metric") {
signers = signerMap[tlshelpers.EtcdMetricsSignerCertSecretName]
metricsBundleCerts, err := cert.ParseCertsPEM([]byte(bundle.Data["metrics-ca-bundle.crt"]))
require.NoError(t, err)
assertMatchingBundles(t, signerMap[tlshelpers.EtcdMetricsSignerCertSecretName], metricsBundleCerts)
} else {
signers = signerMap[tlshelpers.EtcdSignerCertSecretName]
}
bundleCerts, err := cert.ParseCertsPEM([]byte(bundle.Data["ca-bundle.crt"]))
require.NoError(t, err)

sort.SliceStable(signers, func(i, j int) bool {
return bytes.Compare(signers[i].Raw, signers[j].Raw) < 0
})
sort.SliceStable(bundleCerts, func(i, j int) bool {
return bytes.Compare(bundleCerts[i].Raw, bundleCerts[j].Raw) < 0
})
var signers []*x509.Certificate
if strings.Contains(bundle.Name, "metric") {
signers = signerMap[tlshelpers.EtcdMetricsSignerCertSecretName]
} else {
signers = signerMap[tlshelpers.EtcdSignerCertSecretName]
}

require.Equal(t, signers, bundleCerts)
assertMatchingBundles(t, signers, bundleCerts)
}
}
}

func assertMatchingBundles(t *testing.T, expectedBundle []*x509.Certificate, actualBundle []*x509.Certificate) {
sort.SliceStable(expectedBundle, func(i, j int) bool {
return bytes.Compare(expectedBundle[i].Raw, expectedBundle[j].Raw) < 0
})
sort.SliceStable(actualBundle, func(i, j int) bool {
return bytes.Compare(actualBundle[i].Raw, actualBundle[j].Raw) < 0
})

require.Equal(t, expectedBundle, actualBundle)
}

// we only check that all certificates were generated, the correctness is verified above
func assertNodeSpecificCertificates(t *testing.T, node *corev1.Node, secrets []corev1.Secret) {
expectedSet := sets.NewString(fmt.Sprintf("etcd-peer-%s", node.Name),
Expand Down
2 changes: 1 addition & 1 deletion pkg/etcdenvvar/envvarcontroller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ var (
defaultEnvResult = map[string]string{
"ALL_ETCD_ENDPOINTS": "https://192.168.2.0:2379,https://192.168.2.1:2379,https://192.168.2.2:2379",
"ETCDCTL_API": "3",
"ETCDCTL_CACERT": "/etc/kubernetes/static-pod-certs/configmaps/etcd-serving-ca/ca-bundle.crt",
"ETCDCTL_CACERT": "/etc/kubernetes/static-pod-certs/configmaps/etcd-all-bundles/server-ca-bundle.crt",
"ETCDCTL_CERT": "/etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-peer-NODE_NAME.crt",
"ETCDCTL_ENDPOINTS": "https://192.168.2.0:2379,https://192.168.2.1:2379,https://192.168.2.2:2379",
"ETCDCTL_KEY": "/etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-peer-NODE_NAME.key",
Expand Down
2 changes: 1 addition & 1 deletion pkg/etcdenvvar/etcd_env.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ func getEtcdctlEnvVars(envVarContext envVarContext) (map[string]string, error) {
}
return map[string]string{
"ETCDCTL_API": "3",
"ETCDCTL_CACERT": "/etc/kubernetes/static-pod-certs/configmaps/etcd-serving-ca/ca-bundle.crt",
"ETCDCTL_CACERT": "/etc/kubernetes/static-pod-certs/configmaps/etcd-all-bundles/server-ca-bundle.crt",
"ETCDCTL_CERT": "/etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-peer-NODE_NAME.crt",
"ETCDCTL_KEY": "/etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-peer-NODE_NAME.key",
"ETCDCTL_ENDPOINTS": endpoints,
Expand Down
14 changes: 7 additions & 7 deletions pkg/operator/etcd_assets/bindata.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

56 changes: 53 additions & 3 deletions pkg/operator/etcdcertsigner/etcdcertsignercontroller.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ type EtcdCertSignerController struct {
secretInformer corev1informers.SecretInformer
secretLister corev1listers.SecretLister
secretClient corev1client.SecretsGetter
configMapClient corev1client.ConfigMapsGetter
quorumChecker ceohelpers.QuorumChecker

certConfig *certConfig
Expand Down Expand Up @@ -151,6 +152,7 @@ func NewEtcdCertSignerController(
secretInformer: secretInformer,
secretLister: secretLister,
secretClient: secretClient,
configMapClient: cmGetter,
quorumChecker: quorumChecker,
certConfig: certCfg,
signerExpirationGauge: signerExpirationGauge,
Expand Down Expand Up @@ -231,7 +233,8 @@ func (c *EtcdCertSignerController) syncAllMasterCertificates(ctx context.Context
return fmt.Errorf("error on ensuring metrics-signer cert: %w", err)
}

signerBundle, metricsSignerBundle, err := c.ensureBundles(ctx, []*crypto.CA{signerCaPair, newSignerCaPair}, []*crypto.CA{metricsSignerCaPair, newMetricsSignerCaPair})
signerBundle, metricsSignerBundle, err := c.ensureBundles(ctx, recorder,
[]*crypto.CA{signerCaPair, newSignerCaPair}, []*crypto.CA{metricsSignerCaPair, newMetricsSignerCaPair})
if err != nil {
return fmt.Errorf("error on ensuring bundles: %w", err)
}
Expand Down Expand Up @@ -308,22 +311,69 @@ func (c *EtcdCertSignerController) syncAllMasterCertificates(ctx context.Context

// ensureBundles will take the metrics and server CA certificates and ensure the bundle is updated.
// This will cause a revision rollout if a change in the bundle was detected.
// TODO(thomas): that bundle rollout is not transactional: it's very likely the first update will already trigger a new revision, the metrics could come in a follow-up
func (c *EtcdCertSignerController) ensureBundles(ctx context.Context, serverCA []*crypto.CA, metricsCA []*crypto.CA) (serverBundle []*x509.Certificate, metricsBundle []*x509.Certificate, err error) {
func (c *EtcdCertSignerController) ensureBundles(ctx context.Context, recorder events.Recorder,
serverCA []*crypto.CA, metricsCA []*crypto.CA) (serverBundle []*x509.Certificate, metricsBundle []*x509.Certificate, err error) {
for _, ca := range serverCA {
serverBundle, err = c.certConfig.signerCaBundle.EnsureConfigMapCABundle(ctx, ca)
if err != nil {
return
}
}

serverCaBytes, err := crypto.EncodeCertificates(serverBundle...)
if err != nil {
return nil, nil, fmt.Errorf("could not encode server bundle: %w", err)
}

for _, ca := range metricsCA {
metricsBundle, err = c.certConfig.metricsSignerCaBundle.EnsureConfigMapCABundle(ctx, ca)
if err != nil {
return
}
}

metricsCaBytes, err := crypto.EncodeCertificates(metricsBundle...)
if err != nil {
return nil, nil, fmt.Errorf("could not encode metrics bundle: %w", err)
}

// Write a configmap that aggregates all certs for all nodes for the static
// pod controller to watch. A single configmap ensures that a bundle change
// (e.g. node addition or cert rotation) triggers at most one static pod
// rollout. If multiple configmaps were written, the static pod controller
// might initiate rollout before all configmaps had been updated.
configMap := &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Namespace: operatorclient.TargetNamespace,
Name: tlshelpers.EtcdAllBundlesConfigMapName,
Annotations: map[string]string{
apiannotations.OpenShiftComponent: "Etcd",
},
},
Data: map[string]string{
"server-ca-bundle.crt": string(serverCaBytes),
"metrics-ca-bundle.crt": string(metricsCaBytes),
},
}

safe, err := c.quorumChecker.IsSafeToUpdateRevision()
if err != nil {
return nil, nil, fmt.Errorf("EtcdCertSignerController.ensureBundles can't evaluate whether quorum is safe: %w", err)
}

if !safe {
return nil, nil, fmt.Errorf("skipping EtcdCertSignerController.ensureBundles reconciliation due to insufficient quorum")
}
_, changed, err := resourceapply.ApplyConfigMap(ctx, c.configMapClient, recorder, configMap)
if err != nil {
return nil, nil, fmt.Errorf("could not apply bundle configmap: %w", err)
}

if changed {
// TODO(thomas): set the rollout bundle revision in the CEO status
// TODO(thomas): we need to notify the outside caller that it's not safe to rotate the leafs just yet
}

return
}

Expand Down
Loading

0 comments on commit 472b0be

Please sign in to comment.