Skip to content

Commit

Permalink
Revert "Bug 1832923: remove bootstrap annotation after bootstrapping"
Browse files Browse the repository at this point in the history
  • Loading branch information
hexfusion committed May 28, 2020
1 parent 036c0b2 commit d32171c
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 107 deletions.
20 changes: 12 additions & 8 deletions pkg/etcdcli/etcdcli.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,24 +74,28 @@ func (g *etcdClientGetter) getEtcdClient() (*clientv3.Client, error) {

network, err := g.networkLister.Get("cluster")
if err != nil {
return nil, fmt.Errorf("failed to list cluster network: %w", err)
return nil, err
}

etcdEndpoints := []string{}
nodes, err := g.nodeLister.List(labels.Set{"node-role.kubernetes.io/master": ""}.AsSelector())
for _, node := range nodes {
internalIP, err := dnshelpers.GetEscapedPreferredInternalIPAddressForNodeName(network, node)
if err != nil {
return nil, fmt.Errorf("failed to get internal IP for node: %w", err)
return nil, err
}
etcdEndpoints = append(etcdEndpoints, fmt.Sprintf("https://%s:2379", internalIP))
}

configmap, err := g.configmapsLister.ConfigMaps(operatorclient.TargetNamespace).Get("etcd-endpoints")
hostEtcd, err := g.configmapsLister.ConfigMaps(operatorclient.TargetNamespace).Get("etcd-endpoints")
if err != nil {
return nil, fmt.Errorf("failed to list endpoints: %w", err)
return nil, err
}
bootstrapIP, ok := hostEtcd.Annotations[BootstrapIPAnnotationKey]
if !ok {
klog.V(2).Infof("configmaps/etcd-endpoints is missing annotation %s", BootstrapIPAnnotationKey)
}
if bootstrapIP, ok := configmap.Annotations[BootstrapIPAnnotationKey]; ok && bootstrapIP != "" {
if bootstrapIP != "" {
// escape if IPv6
if net.ParseIP(bootstrapIP).To4() == nil {
bootstrapIP = "[" + bootstrapIP + "]"
Expand All @@ -108,11 +112,11 @@ func (g *etcdClientGetter) getEtcdClient() (*clientv3.Client, error) {

c, err := getEtcdClient(etcdEndpoints)
if err != nil {
return nil, fmt.Errorf("failed to get etcd client: %w", err)
return nil, err
}
if g.cachedClient != nil {
if err := g.cachedClient.Close(); err != nil {
utilruntime.HandleError(fmt.Errorf("failed to close cached client: %w", err))
utilruntime.HandleError(err)
}
}
g.cachedClient = c
Expand Down Expand Up @@ -142,7 +146,7 @@ func getEtcdClient(endpoints []string) (*clientv3.Client, error) {

cli, err := clientv3.New(*cfg)
if err != nil {
return nil, fmt.Errorf("failed to make etcd client for endpoints %v: %w", endpoints, err)
return nil, err
}
return cli, err
}
Expand Down
100 changes: 2 additions & 98 deletions pkg/operator/etcdendpointscontroller/etcdendpointscontroller.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,28 +15,24 @@ import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/client-go/kubernetes"
corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
corev1listers "k8s.io/client-go/listers/core/v1"

"github.com/openshift/cluster-etcd-operator/pkg/etcdcli"
"github.com/openshift/cluster-etcd-operator/pkg/operator/operatorclient"
)

// EtcdEndpointsController maintains a configmap resource with
// IP addresses for etcd. It should never depend on DNS directly or transitively.
type EtcdEndpointsController struct {
operatorClient v1helpers.OperatorClient
etcdClient etcdcli.EtcdClient
nodeLister corev1listers.NodeLister
configmapLister corev1listers.ConfigMapLister
configmapClient corev1client.ConfigMapsGetter
}

func NewEtcdEndpointsController(
operatorClient v1helpers.OperatorClient,
etcdClient etcdcli.EtcdClient,
eventRecorder events.Recorder,
kubeClient kubernetes.Interface,
kubeInformers operatorv1helpers.KubeInformersForNamespaces,
Expand All @@ -48,7 +44,6 @@ func NewEtcdEndpointsController(

c := &EtcdEndpointsController{
operatorClient: operatorClient,
etcdClient: etcdClient,
nodeLister: nodeInformer.Lister(),
configmapLister: configmapsInformer.Lister(),
configmapClient: kubeClient.CoreV1(),
Expand Down Expand Up @@ -89,29 +84,6 @@ func (c *EtcdEndpointsController) sync(ctx context.Context, syncCtx factory.Sync
}

func (c *EtcdEndpointsController) syncConfigMap(ctx context.Context, recorder events.Recorder) error {
// This resource should have been created at installation and should never
// be deleted. Treat the inability to access it as fatal to smoke out any
// problems which could indicate the resource is gone.
existing, err := c.configmapLister.ConfigMaps(operatorclient.TargetNamespace).Get("etcd-endpoints")
if err != nil {
return fmt.Errorf("couldn't get required configmap %s/%s: %w", operatorclient.TargetNamespace, "etcd-endpoints", err)
}
_, existingHasBootstrapIP := existing.Annotations[etcdcli.BootstrapIPAnnotationKey]

// If the bootstrap IP annotation appears to have been prematurely removed, generate a warning.
// This could be a transient condition depending on the state of caches at the time of sync,
// so a warning seems sufficient. This check may be overly paranoid, but a significant amount
// of time has been spent in the past tracing problems back to unexpected deletions and
// modifications of the endpoints structure (previously a literal endpoints resource).
isSafe, err := c.isSafeToRemoveBootstrapIP()
if err != nil {
return fmt.Errorf("failed to determine whether it's safe to remove the boostrap IP annotation: %w", err)
}
if !existingHasBootstrapIP && !isSafe {
recorder.Warningf("EtcdEndpointsMissingBootstrapIP", "the bootstrap IP annotation has been removed from the %s/%s "+
"configmap, but current cluster state indicates the annotation may still be required", existing.Namespace, existing.Name)
}

required := configMapAsset()

// create endpoint addresses for each node
Expand Down Expand Up @@ -140,22 +112,8 @@ func (c *EtcdEndpointsController) syncConfigMap(ctx context.Context, recorder ev

required.Data = endpointAddresses

// Apply endpoint updates
if _, _, err := resourceapply.ApplyConfigMap(c.configmapClient, recorder, required); err != nil {
return err
}

// Try and remove the bootstrap annotation.
if existingHasBootstrapIP {
if err := c.tryRemoveBootstrapIP(ctx, recorder); err != nil {
// We can try again later without rippling out failures because the impact
// of the stale bootstrap endpoint is somewhat benign client errors which
// should resolve when we finally succeed.
utilruntime.HandleError(fmt.Errorf("failed to remove bootstrap IP annotation: %w", err))
}
}

return nil
_, _, err = resourceapply.ApplyConfigMap(c.configmapClient, recorder, required)
return err
}

func configMapAsset() *corev1.ConfigMap {
Expand All @@ -166,57 +124,3 @@ func configMapAsset() *corev1.ConfigMap {
},
}
}

// isSafeToRemoveBootstrapIP returns true if it's safe to remove the bootstrap
// member IP from the endpoints list, otherwise false. If the determination
// can't be made, returns false and an error.
func (c *EtcdEndpointsController) isSafeToRemoveBootstrapIP() (bool, error) {
// See if the etcd client knows about the bootstrap member.
members, err := c.etcdClient.MemberList()
if err != nil {
return false, fmt.Errorf("couldn't list etcd members: %w", err)
}
bootstrapFound := false
for _, member := range members {
if member.Name == "etcd-bootstrap" {
bootstrapFound = true
break
}
}
return !bootstrapFound && len(members) >= 3, nil
}

// tryRemoveBootstrapIP will remove the bootstrap IP annotation from the endpoints
// configmap. If bootstrapping is detected to still be in progress, the function
// does nothing and returns no error to facilitate quiet retries.
func (c *EtcdEndpointsController) tryRemoveBootstrapIP(ctx context.Context, recorder events.Recorder) error {
existing, err := c.configmapLister.ConfigMaps(operatorclient.TargetNamespace).Get("etcd-endpoints")
if err != nil {
return fmt.Errorf("couldn't get required configmap %s/%s: %w", operatorclient.TargetNamespace, "etcd-endpoints", err)
}

// Only proceed if there's actually something to do.
if _, existingHasBootstrapIP := existing.Annotations[etcdcli.BootstrapIPAnnotationKey]; !existingHasBootstrapIP {
return nil
}

// See if the etcd client knows about the bootstrap member.
isSafe, err := c.isSafeToRemoveBootstrapIP()
if err != nil {
return fmt.Errorf("failed to determine whether it's safe to remove the boostrap IP annotation: %w", err)
}
// If it's not yet safe to remove the annotation, no-op and we'll try
// again during another sync.
if !isSafe {
return nil
}

// Bootstrap appears to be complete, so remove the bootstrap IP annotation.
updated := existing.DeepCopy()
delete(updated.Annotations, etcdcli.BootstrapIPAnnotationKey)
if _, err := c.configmapClient.ConfigMaps(operatorclient.TargetNamespace).Update(ctx, updated, metav1.UpdateOptions{}); err != nil {
return fmt.Errorf("failed to update configmap %s/%s: %w", updated.Namespace, updated.Name, err)
}
recorder.Eventf("BootstrapIPRemoved", "Removed etcd bootstrap member IP annotation from configmap %s/%s", updated.Namespace, updated.Name)
return nil
}
1 change: 0 additions & 1 deletion pkg/operator/starter.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,6 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle
)
etcdEndpointsController := etcdendpointscontroller.NewEtcdEndpointsController(
operatorClient,
etcdClient,
controllerContext.EventRecorder,
coreClient,
kubeInformersForNamespaces,
Expand Down

0 comments on commit d32171c

Please sign in to comment.