New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Bug 1832923: Forget bootstrap etcd member IP after bootstrap #367
Merged
openshift-merge-robot
merged 1 commit into
openshift:master
from
ironcladlou:cleanup-bootstrap-endpoint
Jun 5, 2020
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,50 +13,50 @@ import ( | |
"github.com/openshift/library-go/pkg/operator/v1helpers" | ||
operatorv1helpers "github.com/openshift/library-go/pkg/operator/v1helpers" | ||
corev1 "k8s.io/api/core/v1" | ||
"k8s.io/apimachinery/pkg/api/errors" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/labels" | ||
"k8s.io/client-go/kubernetes" | ||
corev1client "k8s.io/client-go/kubernetes/typed/core/v1" | ||
corev1listers "k8s.io/client-go/listers/core/v1" | ||
"k8s.io/klog" | ||
|
||
"github.com/openshift/cluster-etcd-operator/pkg/etcdcli" | ||
"github.com/openshift/cluster-etcd-operator/pkg/operator/operatorclient" | ||
) | ||
|
||
// EtcdEndpointsController maintains a configmap resource with | ||
// IP addresses for etcd. It should never depend on DNS directly or transitively. | ||
type EtcdEndpointsController struct { | ||
operatorClient v1helpers.OperatorClient | ||
operatorClient v1helpers.StaticPodOperatorClient | ||
nodeLister corev1listers.NodeLister | ||
configmapLister corev1listers.ConfigMapLister | ||
configmapClient corev1client.ConfigMapsGetter | ||
} | ||
|
||
func NewEtcdEndpointsController( | ||
operatorClient v1helpers.OperatorClient, | ||
operatorClient v1helpers.StaticPodOperatorClient, | ||
eventRecorder events.Recorder, | ||
kubeClient kubernetes.Interface, | ||
kubeInformers operatorv1helpers.KubeInformersForNamespaces, | ||
) factory.Controller { | ||
kubeInformersForTargetNamespace := kubeInformers.InformersFor(operatorclient.TargetNamespace) | ||
configmapsInformer := kubeInformersForTargetNamespace.Core().V1().ConfigMaps() | ||
kubeInformersForCluster := kubeInformers.InformersFor("") | ||
nodeInformer := kubeInformersForCluster.Core().V1().Nodes() | ||
nodeInformer := kubeInformers.InformersFor("").Core().V1().Nodes() | ||
|
||
c := &EtcdEndpointsController{ | ||
operatorClient: operatorClient, | ||
nodeLister: nodeInformer.Lister(), | ||
configmapLister: configmapsInformer.Lister(), | ||
configmapLister: kubeInformers.ConfigMapLister(), | ||
configmapClient: kubeClient.CoreV1(), | ||
} | ||
return factory.New().ResyncEvery(time.Minute).WithInformers( | ||
operatorClient.Informer(), | ||
configmapsInformer.Informer(), | ||
kubeInformers.InformersFor(operatorclient.TargetNamespace).Core().V1().ConfigMaps().Informer(), | ||
nodeInformer.Informer(), | ||
).WithSync(c.sync).ToController("EtcdEndpointsController", eventRecorder.WithComponentSuffix("etcd-endpoints-controller")) | ||
} | ||
|
||
func (c *EtcdEndpointsController) sync(ctx context.Context, syncCtx factory.SyncContext) error { | ||
err := c.syncConfigMap(ctx, syncCtx.Recorder()) | ||
err := c.syncConfigMap(syncCtx.Recorder()) | ||
|
||
if err != nil { | ||
_, _, updateErr := v1helpers.UpdateStatus(c.operatorClient, v1helpers.UpdateConditionFn(operatorv1.OperatorCondition{ | ||
|
@@ -83,9 +83,29 @@ func (c *EtcdEndpointsController) sync(ctx context.Context, syncCtx factory.Sync | |
return nil | ||
} | ||
|
||
func (c *EtcdEndpointsController) syncConfigMap(ctx context.Context, recorder events.Recorder) error { | ||
func (c *EtcdEndpointsController) syncConfigMap(recorder events.Recorder) error { | ||
bootstrapComplete, err := isBootstrapComplete(c.configmapLister, c.operatorClient) | ||
if err != nil { | ||
return fmt.Errorf("couldn't determine bootstrap status: %w", err) | ||
} | ||
|
||
required := configMapAsset() | ||
|
||
// If the bootstrap IP is present on the existing configmap, either copy it | ||
// forward or remove it if possible so clients can forget about it. | ||
if existing, err := c.configmapLister.ConfigMaps(operatorclient.TargetNamespace).Get("etcd-endpoints"); err == nil { | ||
hexfusion marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if existingIP, hasExistingIP := existing.Annotations[etcdcli.BootstrapIPAnnotationKey]; hasExistingIP { | ||
if bootstrapComplete { | ||
// remove the annotation | ||
required.Annotations[etcdcli.BootstrapIPAnnotationKey+"-"] = existingIP | ||
} else { | ||
required.Annotations[etcdcli.BootstrapIPAnnotationKey] = existingIP | ||
} | ||
} | ||
} else if !errors.IsNotFound(err) { | ||
return fmt.Errorf("couldn't get configmap %s/%s: %w", operatorclient.TargetNamespace, "etcd-endpoints", err) | ||
} | ||
|
||
// create endpoint addresses for each node | ||
nodes, err := c.nodeLister.List(labels.Set{"node-role.kubernetes.io/master": ""}.AsSelector()) | ||
if err != nil { | ||
|
@@ -112,15 +132,60 @@ func (c *EtcdEndpointsController) syncConfigMap(ctx context.Context, recorder ev | |
|
||
required.Data = endpointAddresses | ||
|
||
_, _, err = resourceapply.ApplyConfigMap(c.configmapClient, recorder, required) | ||
return err | ||
// Apply endpoint updates | ||
if _, _, err := resourceapply.ApplyConfigMap(c.configmapClient, recorder, required); err != nil { | ||
return err | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func configMapAsset() *corev1.ConfigMap { | ||
return &corev1.ConfigMap{ | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Name: "etcd-endpoints", | ||
Namespace: operatorclient.TargetNamespace, | ||
Name: "etcd-endpoints", | ||
Namespace: operatorclient.TargetNamespace, | ||
Annotations: map[string]string{}, | ||
}, | ||
} | ||
} | ||
|
||
// isBootstrapComplete returns true if bootstrap has completed. This is used to | ||
// indicate whether it's safe for clients to forget about the bootstrap member IP. | ||
func isBootstrapComplete(configMapClient corev1listers.ConfigMapLister, staticPodClient v1helpers.StaticPodOperatorClient) (bool, error) { | ||
// do a cheap check to see if the annotation is already gone. | ||
// check to see if bootstrapping is complete | ||
bootstrapFinishedConfigMap, err := configMapClient.ConfigMaps("kube-system").Get("bootstrap") | ||
if err != nil { | ||
if errors.IsNotFound(err) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. style: prefer if errors.IsNotFound(){
return
}
if err != nil{
different return
} |
||
// If the resource was deleted (e.g. by an admin) after bootstrap is actually complete, | ||
// this is a false negative. | ||
klog.V(4).Infof("bootstrap considered incomplete because the kube-system/bootstrap configmap wasn't found") | ||
return false, nil | ||
} | ||
// We don't know, give up quickly. | ||
return false, fmt.Errorf("failed to get configmap %s/%s: %w", "kube-system", "bootstrap", err) | ||
} | ||
|
||
if status, ok := bootstrapFinishedConfigMap.Data["status"]; !ok || status != "complete" { | ||
// do nothing, not torn down | ||
klog.V(4).Infof("bootstrap considered incomplete because status is %q", status) | ||
return false, nil | ||
} | ||
|
||
// now run check to stability of revisions | ||
_, status, _, err := staticPodClient.GetStaticPodOperatorState() | ||
if err != nil { | ||
return false, fmt.Errorf("failed to get static pod operator state: %w", err) | ||
} | ||
if status.LatestAvailableRevision == 0 { | ||
return false, nil | ||
} | ||
for _, curr := range status.NodeStatuses { | ||
if curr.CurrentRevision != status.LatestAvailableRevision { | ||
klog.V(4).Infof("bootstrap considered incomplete because revision %d is still in progress", status.LatestAvailableRevision) | ||
return false, nil | ||
} | ||
} | ||
return true, nil | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Curious what is the difference between the former and the latter?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
functionally equivalent, both cause sync to trigger on configmap change in the openshift-etcd namespace
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK so why change :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because configmapsInformer was only used in one place (so what's the point of a local to begin with), and also had a misleading name (it isn't an informer for all configmaps, it's only for the openshift-etcd namespace).