Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug 1954121: [release-4.7] Improve cert controller detection and correction of invalid certs #577

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
261 changes: 171 additions & 90 deletions pkg/operator/etcdcertsigner/etcdcertsignercontroller.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ package etcdcertsigner
import (
"bytes"
"context"
"crypto/tls"
"crypto/x509"
"fmt"
"strings"
"time"

corev1 "k8s.io/api/core/v1"
Expand All @@ -14,7 +17,7 @@ import (
"k8s.io/client-go/kubernetes"
corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
corev1listers "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/util/cert"
"k8s.io/klog/v2"

operatorv1 "github.com/openshift/api/operator/v1"
configv1informers "github.com/openshift/client-go/config/informers/externalversions/config/v1"
Expand All @@ -29,6 +32,39 @@ import (
"github.com/openshift/cluster-etcd-operator/pkg/tlshelpers"
)

// Annotation key used to associate a cert secret with a node uid. This allows
// cert regeneration if a node was deleted and added with the same name.
const nodeUIDAnnotation = "etcd-operator.alpha.openshift.io/cert-secret-node-uid"

// etcdCertConfig defines the configuration required to maintain a cert secret for an etcd member.
type etcdCertConfig struct {
// Name of the secret in namespace openshift-config that contains the CA used to issue the cert
caSecretName string
// Function that derives the name of the cert secret from the node name
secretNameFunc func(nodeName string) string
// Function that creates the key material for a new cert
newCertFunc func(caCert, caKey []byte, ipAddresses []string) (*bytes.Buffer, *bytes.Buffer, error)
}

// Define configuration for creating etcd cert secrets.
var certConfigMap = map[string]etcdCertConfig{
"peer": {
caSecretName: "etcd-signer",
secretNameFunc: tlshelpers.GetPeerClientSecretNameForNode,
newCertFunc: tlshelpers.CreatePeerCertKey,
},
"server": {
caSecretName: "etcd-signer",
secretNameFunc: tlshelpers.GetServingSecretNameForNode,
newCertFunc: tlshelpers.CreateServerCertKey,
},
"metric": {
caSecretName: "etcd-metric-signer",
secretNameFunc: tlshelpers.GetServingMetricsSecretNameForNode,
newCertFunc: tlshelpers.CreateMetricCertKey,
},
}

type EtcdCertSignerController struct {
kubeClient kubernetes.Interface
operatorClient v1helpers.OperatorClient
Expand Down Expand Up @@ -102,22 +138,11 @@ func (c *EtcdCertSignerController) syncAllMasters(recorder events.Recorder) erro

errs := []error{}
for _, node := range nodes {
nodeInternalIPs, err := dnshelpers.GetInternalIPAddressesForNodeName(node)
if err != nil {
errs = append(errs, err)
continue
}
err = c.ensureAndValidateSecrets(node, nodeInternalIPs, recorder)
if errors.IsNotFound(err) {
// if any of the secrets are not present, create all secrets for the node
err = c.createSecretsForNode(node, nodeInternalIPs, recorder)
}

if err != nil {
errs = append(errs, err)
certErrs := c.ensureCertSecrets(node, recorder)
if certErrs != nil {
errs = append(errs, certErrs...)
}
}

if len(errs) > 0 {
return utilerrors.NewAggregate(errs)
}
Expand Down Expand Up @@ -193,108 +218,164 @@ func (c *EtcdCertSignerController) syncAllMasters(recorder events.Recorder) erro
return utilerrors.NewAggregate(errs)
}

// validateCertificateSAN makes sure that a given certificate is valid
// at least for the SANs defined in the configuration.
// NOTE: certName argument is used only for forming the error message
func validateCertificateSAN(certData map[string][]byte, ipAddresses []string, certName string) error {
certificates, err := cert.ParseCertsPEM(certData["tls.crt"])
// ensureCertSecret attempts to ensure the existence of secrets containing the
// etcd cert (cert+key) pairs needed for an etcd member.
func (c *EtcdCertSignerController) ensureCertSecrets(node *corev1.Node, recorder events.Recorder) []error {
ipAddresses, err := dnshelpers.GetInternalIPAddressesForNodeName(node)
if err != nil {
return fmt.Errorf("could not parse TLS cert for %s: %w", certName, err)
return []error{err}
}
for _, ipAddress := range ipAddresses {
if err := certificates[0].VerifyHostname(ipAddress); err != nil {
return fmt.Errorf("SAN for the certificate %s does not include %s: %w", certName, ipAddress, err)

errs := []error{}
for _, certConfig := range certConfigMap {
err := c.ensureCertSecret(node, ipAddresses, certConfig, recorder)
if err != nil {
errs = append(errs, err)
continue
}
}
return nil
return errs
}

// Ensure that all secrets exist for the node and validate that the IP addresses are correct.
func (c *EtcdCertSignerController) ensureAndValidateSecrets(node *corev1.Node, nodeInternalIPs []string, recorder events.Recorder) error {
etcdPeerClientSecretName := tlshelpers.GetPeerClientSecretNameForNode(node.Name)
etcdServingSecretName := tlshelpers.GetServingSecretNameForNode(node.Name)
metricsServingSecretName := tlshelpers.GetServingMetricsSecretNameForNode(node.Name)
// ensureCertSecret attempts to ensure the existence of a secret containing an
// etcd cert (cert+key) pair. The secret will be created if it does not
// exist. If the secret exists but contains an invalid cert pair, it will be
// updated with a new cert pair.
func (c *EtcdCertSignerController) ensureCertSecret(node *corev1.Node, ipAddresses []string, certConfig etcdCertConfig, recorder events.Recorder) error {
secretName := certConfig.secretNameFunc(node.Name)

secretNames := []string{etcdPeerClientSecretName, etcdServingSecretName, metricsServingSecretName}
secret, err := c.secretLister.Secrets(operatorclient.TargetNamespace).Get(secretName)
if err != nil && !errors.IsNotFound(err) {
return err
}

errs := []error{}
for _, secretName := range secretNames {
secret, err := c.secretLister.Secrets(operatorclient.TargetNamespace).Get(secretName)
if errors.IsNotFound(err) {
storedUID := ""
if secret != nil {
storedUID := secret.Annotations[nodeUIDAnnotation]
invalidMsg, err := checkCertValidity(secret.Data["tls.crt"], secret.Data["tls.key"], ipAddresses, string(node.UID), storedUID)
if err != nil {
return err
}
if err != nil {
errs = append(errs, err)
continue
if len(invalidMsg) > 0 {
klog.V(4).Info("TLS cert %s is invalid and will be regenerated: %v", secretName, invalidMsg)
// A nil secret will prompt creation of a new keypair
secret = nil
}
if secret.Data == nil {
errs = append(errs, fmt.Errorf("secret data not found in %s", secretName))
continue
}

cert := []byte{}
key := []byte{}
if secret != nil {
if storedUID == string(node.UID) {
// Nothing to do: Cert pair is valid, node uid is current
return nil
}
if err := validateCertificateSAN(secret.Data, nodeInternalIPs, secretName); err != nil {
errs = append(errs, err)
cert = secret.Data["tls.crt"]
key = secret.Data["tls.key"]
} else {
// Generate a new cert pair. The secret is missing or its contents are invalid.
caSecret, err := c.secretLister.Secrets(operatorclient.GlobalUserSpecifiedConfigNamespace).Get(certConfig.caSecretName)
if err != nil {
return err
}
}
if len(errs) > 0 {
return utilerrors.NewAggregate(errs)
certBuffer, keyBuffer, err := certConfig.newCertFunc(caSecret.Data["tls.crt"], caSecret.Data["tls.key"], ipAddresses)
if err != nil {
return err
}
cert = certBuffer.Bytes()
key = keyBuffer.Bytes()
}

return nil
return c.applySecret(secretName, string(node.UID), cert, key, recorder)
}

func (c *EtcdCertSignerController) createSecretsForNode(node *corev1.Node, nodeInternalIPs []string, recorder events.Recorder) error {
etcdPeerClientSecretName := tlshelpers.GetPeerClientSecretNameForNode(node.Name)
etcdServingSecretName := tlshelpers.GetServingSecretNameForNode(node.Name)
metricsServingSecretName := tlshelpers.GetServingMetricsSecretNameForNode(node.Name)
// get the signers
etcdCASecret, err := c.secretLister.Secrets(operatorclient.GlobalUserSpecifiedConfigNamespace).Get("etcd-signer")
if err != nil {
return err
}
etcdMetricCASecret, err := c.secretLister.Secrets(operatorclient.GlobalUserSpecifiedConfigNamespace).Get("etcd-metric-signer")
if err != nil {
return err
}
func (c *EtcdCertSignerController) applySecret(secretName, nodeUID string, cert, key []byte, recorder events.Recorder) error {
//TODO: Update annotations Not Before and Not After for Cert Rotation
secret := newCertSecret(secretName, nodeUID, cert, key)
_, _, err := resourceapply.ApplySecret(c.secretClient, recorder, secret)
return err
}

// create the certificates and update them in the API
pCert, pKey, err := tlshelpers.CreatePeerCertKey(etcdCASecret.Data["tls.crt"], etcdCASecret.Data["tls.key"], nodeInternalIPs)
if err != nil {
return err
// newCertSecret ensures consistency of secret creation between the controller
// and its tests.
func newCertSecret(secretName, nodeUID string, cert, key []byte) *corev1.Secret {
return &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: secretName,
Namespace: operatorclient.TargetNamespace,
Annotations: map[string]string{
nodeUIDAnnotation: nodeUID,
},
},
Type: corev1.SecretTypeTLS,
Data: map[string][]byte{
"tls.crt": cert,
"tls.key": key,
},
}
}

if err := c.createSecret(etcdPeerClientSecretName, pCert, pKey, recorder); err != nil {
return err
// checkCertValidity validates the provided cert bytes. If the cert needs to be
// regenerated, a message will be returned indicating why. An empty message
// indicates a valid cert. An error will be returned if the cert is not valid
// and should not be regenerated.
func checkCertValidity(certBytes, keyBytes []byte, ipAddresses []string, nodeUID, storedUID string) (string, error) {
// Loading the keypair without error indicates the key material is valid and
// the cert and private key are related.
keyPair, err := tls.X509KeyPair(certBytes, keyBytes)
if err != nil {
// Invalid keypair - regen required
return fmt.Sprintf("%v", err), nil
}
sCert, sKey, err := tlshelpers.CreateServerCertKey(etcdCASecret.Data["tls.crt"], etcdCASecret.Data["tls.key"], nodeInternalIPs)
leafCert, err := x509.ParseCertificate(keyPair.Certificate[0])
if err != nil {
return err
// Should never happen once x509KeyPair() succeeds
return fmt.Sprintf("%v", err), nil
}

if err := c.createSecret(etcdServingSecretName, sCert, sKey, recorder); err != nil {
return err
// Check that the cert is valid for the provided ip addresses
errs := []error{}
for _, ipAddress := range ipAddresses {
if err := leafCert.VerifyHostname(ipAddress); err != nil {
errs = append(errs, err)
}
}

metricCert, metricKey, err := tlshelpers.CreateMetricCertKey(etcdMetricCASecret.Data["tls.crt"], etcdMetricCASecret.Data["tls.key"], nodeInternalIPs)
if err != nil {
return err
if len(errs) > 0 {
// When a cluster is upgraded to 4.8 - the first release to annotate
// cert secrets with the node UID - previously created secrets will
// initially not have a node UID set. Assume that the lack of a stored
// node UID indicates that the certs were generated for the current node
// so that if the cert is not valid for the node's current ip addresses
// it will be flagged as an error.
//
// TODO(marun) The check for a missing stored uid can be removed in 4.9
// since by then all cert secrets will be guaranteed to have a stored
// node uid. That would change the conditional from ternary to binary
// and allow the use of a boolean indication of node uid change.
nodeUIDUnchanged := nodeUID == storedUID || len(storedUID) == 0

if nodeUIDUnchanged {
// This is an error condition. If the cert SAN doesn't include all
// of ip's for the node that it was generated for, an out-of-band
// etcd cluster membership change may be required. The operator
// doesn't currently handle this.
return "", utilerrors.NewAggregate(errs)
} else {
// A different node uid indicates node removal and addition with the
// same name. etcd on the node will be a new cluster member and
// needs a new certificate.
msgs := []string{}
for _, err := range errs {
msgs = append(msgs, fmt.Sprintf("%v", err))
}
return strings.Join(msgs, ", "), nil
}
}

if err := c.createSecret(metricsServingSecretName, metricCert, metricKey, recorder); err != nil {
return err
}
// TODO(marun) Check that the certificate was issued by the CA

return nil
}
// TODO(marun) Check that the certificate is not expired or due for replacement

func (c *EtcdCertSignerController) createSecret(secretName string, cert *bytes.Buffer, key *bytes.Buffer, recorder events.Recorder) error {
//TODO: Update annotations Not Before and Not After for Cert Rotation
_, _, err := resourceapply.ApplySecret(c.secretClient, recorder, &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{Name: secretName, Namespace: operatorclient.TargetNamespace},
Type: corev1.SecretTypeTLS,
Data: map[string][]byte{
"tls.crt": cert.Bytes(),
"tls.key": key.Bytes(),
},
})
return err
// Cert is valid
return "", nil
}