Skip to content

Commit

Permalink
Merge pull request #367 from ironcladlou/cleanup-bootstrap-endpoint
Browse files Browse the repository at this point in the history
Bug 1832923: Forget bootstrap etcd member IP after bootstrap
  • Loading branch information
openshift-merge-robot committed Jun 5, 2020
2 parents 0f6093f + c314c0b commit abe09ec
Show file tree
Hide file tree
Showing 7 changed files with 859 additions and 26 deletions.
1 change: 1 addition & 0 deletions go.mod
Expand Up @@ -24,4 +24,5 @@ require (
k8s.io/client-go v0.18.3
k8s.io/component-base v0.18.3
k8s.io/klog v1.0.0
k8s.io/utils v0.0.0-20200324210504-a9aa75ae1b89
)
20 changes: 8 additions & 12 deletions pkg/etcdcli/etcdcli.go
Expand Up @@ -74,28 +74,24 @@ func (g *etcdClientGetter) getEtcdClient() (*clientv3.Client, error) {

network, err := g.networkLister.Get("cluster")
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to list cluster network: %w", err)
}

etcdEndpoints := []string{}
nodes, err := g.nodeLister.List(labels.Set{"node-role.kubernetes.io/master": ""}.AsSelector())
for _, node := range nodes {
internalIP, err := dnshelpers.GetEscapedPreferredInternalIPAddressForNodeName(network, node)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to get internal IP for node: %w", err)
}
etcdEndpoints = append(etcdEndpoints, fmt.Sprintf("https://%s:2379", internalIP))
}

hostEtcd, err := g.configmapsLister.ConfigMaps(operatorclient.TargetNamespace).Get("etcd-endpoints")
configmap, err := g.configmapsLister.ConfigMaps(operatorclient.TargetNamespace).Get("etcd-endpoints")
if err != nil {
return nil, err
}
bootstrapIP, ok := hostEtcd.Annotations[BootstrapIPAnnotationKey]
if !ok {
klog.V(2).Infof("configmaps/etcd-endpoints is missing annotation %s", BootstrapIPAnnotationKey)
return nil, fmt.Errorf("failed to list endpoints: %w", err)
}
if bootstrapIP != "" {
if bootstrapIP, ok := configmap.Annotations[BootstrapIPAnnotationKey]; ok && bootstrapIP != "" {
// escape if IPv6
if net.ParseIP(bootstrapIP).To4() == nil {
bootstrapIP = "[" + bootstrapIP + "]"
Expand All @@ -112,11 +108,11 @@ func (g *etcdClientGetter) getEtcdClient() (*clientv3.Client, error) {

c, err := getEtcdClient(etcdEndpoints)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to get etcd client: %w", err)
}
if g.cachedClient != nil {
if err := g.cachedClient.Close(); err != nil {
utilruntime.HandleError(err)
utilruntime.HandleError(fmt.Errorf("failed to close cached client: %w", err))
}
}
g.cachedClient = c
Expand Down Expand Up @@ -146,7 +142,7 @@ func getEtcdClient(endpoints []string) (*clientv3.Client, error) {

cli, err := clientv3.New(*cfg)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to make etcd client for endpoints %v: %w", endpoints, err)
}
return cli, err
}
Expand Down
93 changes: 79 additions & 14 deletions pkg/operator/etcdendpointscontroller/etcdendpointscontroller.go
Expand Up @@ -13,50 +13,50 @@ import (
"github.com/openshift/library-go/pkg/operator/v1helpers"
operatorv1helpers "github.com/openshift/library-go/pkg/operator/v1helpers"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/kubernetes"
corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
corev1listers "k8s.io/client-go/listers/core/v1"
"k8s.io/klog"

"github.com/openshift/cluster-etcd-operator/pkg/etcdcli"
"github.com/openshift/cluster-etcd-operator/pkg/operator/operatorclient"
)

// EtcdEndpointsController maintains a configmap resource with
// IP addresses for etcd. It should never depend on DNS directly or transitively.
type EtcdEndpointsController struct {
operatorClient v1helpers.OperatorClient
operatorClient v1helpers.StaticPodOperatorClient
nodeLister corev1listers.NodeLister
configmapLister corev1listers.ConfigMapLister
configmapClient corev1client.ConfigMapsGetter
}

func NewEtcdEndpointsController(
operatorClient v1helpers.OperatorClient,
operatorClient v1helpers.StaticPodOperatorClient,
eventRecorder events.Recorder,
kubeClient kubernetes.Interface,
kubeInformers operatorv1helpers.KubeInformersForNamespaces,
) factory.Controller {
kubeInformersForTargetNamespace := kubeInformers.InformersFor(operatorclient.TargetNamespace)
configmapsInformer := kubeInformersForTargetNamespace.Core().V1().ConfigMaps()
kubeInformersForCluster := kubeInformers.InformersFor("")
nodeInformer := kubeInformersForCluster.Core().V1().Nodes()
nodeInformer := kubeInformers.InformersFor("").Core().V1().Nodes()

c := &EtcdEndpointsController{
operatorClient: operatorClient,
nodeLister: nodeInformer.Lister(),
configmapLister: configmapsInformer.Lister(),
configmapLister: kubeInformers.ConfigMapLister(),
configmapClient: kubeClient.CoreV1(),
}
return factory.New().ResyncEvery(time.Minute).WithInformers(
operatorClient.Informer(),
configmapsInformer.Informer(),
kubeInformers.InformersFor(operatorclient.TargetNamespace).Core().V1().ConfigMaps().Informer(),
nodeInformer.Informer(),
).WithSync(c.sync).ToController("EtcdEndpointsController", eventRecorder.WithComponentSuffix("etcd-endpoints-controller"))
}

func (c *EtcdEndpointsController) sync(ctx context.Context, syncCtx factory.SyncContext) error {
err := c.syncConfigMap(ctx, syncCtx.Recorder())
err := c.syncConfigMap(syncCtx.Recorder())

if err != nil {
_, _, updateErr := v1helpers.UpdateStatus(c.operatorClient, v1helpers.UpdateConditionFn(operatorv1.OperatorCondition{
Expand All @@ -83,9 +83,29 @@ func (c *EtcdEndpointsController) sync(ctx context.Context, syncCtx factory.Sync
return nil
}

func (c *EtcdEndpointsController) syncConfigMap(ctx context.Context, recorder events.Recorder) error {
func (c *EtcdEndpointsController) syncConfigMap(recorder events.Recorder) error {
bootstrapComplete, err := isBootstrapComplete(c.configmapLister, c.operatorClient)
if err != nil {
return fmt.Errorf("couldn't determine bootstrap status: %w", err)
}

required := configMapAsset()

// If the bootstrap IP is present on the existing configmap, either copy it
// forward or remove it if possible so clients can forget about it.
if existing, err := c.configmapLister.ConfigMaps(operatorclient.TargetNamespace).Get("etcd-endpoints"); err == nil {
if existingIP, hasExistingIP := existing.Annotations[etcdcli.BootstrapIPAnnotationKey]; hasExistingIP {
if bootstrapComplete {
// remove the annotation
required.Annotations[etcdcli.BootstrapIPAnnotationKey+"-"] = existingIP
} else {
required.Annotations[etcdcli.BootstrapIPAnnotationKey] = existingIP
}
}
} else if !errors.IsNotFound(err) {
return fmt.Errorf("couldn't get configmap %s/%s: %w", operatorclient.TargetNamespace, "etcd-endpoints", err)
}

// create endpoint addresses for each node
nodes, err := c.nodeLister.List(labels.Set{"node-role.kubernetes.io/master": ""}.AsSelector())
if err != nil {
Expand All @@ -112,15 +132,60 @@ func (c *EtcdEndpointsController) syncConfigMap(ctx context.Context, recorder ev

required.Data = endpointAddresses

_, _, err = resourceapply.ApplyConfigMap(c.configmapClient, recorder, required)
return err
// Apply endpoint updates
if _, _, err := resourceapply.ApplyConfigMap(c.configmapClient, recorder, required); err != nil {
return err
}

return nil
}

func configMapAsset() *corev1.ConfigMap {
return &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: "etcd-endpoints",
Namespace: operatorclient.TargetNamespace,
Name: "etcd-endpoints",
Namespace: operatorclient.TargetNamespace,
Annotations: map[string]string{},
},
}
}

// isBootstrapComplete returns true if bootstrap has completed. This is used to
// indicate whether it's safe for clients to forget about the bootstrap member IP.
func isBootstrapComplete(configMapClient corev1listers.ConfigMapLister, staticPodClient v1helpers.StaticPodOperatorClient) (bool, error) {
// do a cheap check to see if the annotation is already gone.
// check to see if bootstrapping is complete
bootstrapFinishedConfigMap, err := configMapClient.ConfigMaps("kube-system").Get("bootstrap")
if err != nil {
if errors.IsNotFound(err) {
// If the resource was deleted (e.g. by an admin) after bootstrap is actually complete,
// this is a false negative.
klog.V(4).Infof("bootstrap considered incomplete because the kube-system/bootstrap configmap wasn't found")
return false, nil
}
// We don't know, give up quickly.
return false, fmt.Errorf("failed to get configmap %s/%s: %w", "kube-system", "bootstrap", err)
}

if status, ok := bootstrapFinishedConfigMap.Data["status"]; !ok || status != "complete" {
// do nothing, not torn down
klog.V(4).Infof("bootstrap considered incomplete because status is %q", status)
return false, nil
}

// now run check to stability of revisions
_, status, _, err := staticPodClient.GetStaticPodOperatorState()
if err != nil {
return false, fmt.Errorf("failed to get static pod operator state: %w", err)
}
if status.LatestAvailableRevision == 0 {
return false, nil
}
for _, curr := range status.NodeStatuses {
if curr.CurrentRevision != status.LatestAvailableRevision {
klog.V(4).Infof("bootstrap considered incomplete because revision %d is still in progress", status.LatestAvailableRevision)
return false, nil
}
}
return true, nil
}

0 comments on commit abe09ec

Please sign in to comment.