Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

馃悰 Fix node deletion under managed control planes #3673

Merged
merged 1 commit into from Sep 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
24 changes: 24 additions & 0 deletions controllers/machine_controller.go
Expand Up @@ -376,6 +376,30 @@ func (r *MachineReconciler) isDeleteNodeAllowed(ctx context.Context, cluster *cl
return errNilNodeRef
}

// controlPlaneRef is an optional field in the Cluster so skip the external
// managed control plane check if it is nil
if cluster.Spec.ControlPlaneRef != nil {
controlPlane, err := external.Get(ctx, r.Client, cluster.Spec.ControlPlaneRef, cluster.Spec.ControlPlaneRef.Namespace)
if apierrors.IsNotFound(err) {
// If control plane object in the reference does not exist, log and skip check for
// external managed control plane
r.Log.Error(err, "control plane object specified in cluster spec.controlPlaneRef does not exist", "kind", cluster.Spec.ControlPlaneRef.Kind, "name", cluster.Spec.ControlPlaneRef.Name)
} else {
if err != nil {
// If any other error occurs when trying to get the control plane object,
// return the error so we can retry
return err
}

// Check if the ControlPlane is externally managed (AKS, EKS, GKE, etc)
// and skip the following section if control plane is externally managed
// because there will be no control plane nodes registered
if util.IsExternalManagedControlPlane(controlPlane) {
return nil
}
}
}

// Get all of the machines that belong to this cluster.
machines, err := getActiveMachinesInCluster(ctx, r.Client, machine.Namespace, machine.Labels[clusterv1.ClusterLabelName])
if err != nil {
Expand Down
49 changes: 48 additions & 1 deletion controllers/machine_controller_test.go
Expand Up @@ -783,7 +783,7 @@ func TestIsDeleteNodeAllowed(t *testing.T) {
expectedError: nil,
},
{
name: "has nodeRef and control plane is healthy",
name: "has nodeRef and cluster is being deleted",
cluster: &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
DeletionTimestamp: &deletionts,
Expand All @@ -792,6 +792,40 @@ func TestIsDeleteNodeAllowed(t *testing.T) {
machine: &clusterv1.Machine{},
expectedError: errClusterIsBeingDeleted,
},
{
name: "has nodeRef and control plane is healthy and externally managed",
cluster: &clusterv1.Cluster{
Spec: clusterv1.ClusterSpec{
ControlPlaneRef: &corev1.ObjectReference{
APIVersion: "controlplane.cluster.x-k8s.io/v1alpha3",
Kind: "AWSManagedControlPlane",
Name: "test-cluster",
Namespace: "test-cluster",
},
},
},
machine: &clusterv1.Machine{
ObjectMeta: metav1.ObjectMeta{
Name: "created",
Namespace: "default",
Labels: map[string]string{
clusterv1.ClusterLabelName: "test",
},
Finalizers: []string{clusterv1.MachineFinalizer},
},
Spec: clusterv1.MachineSpec{
ClusterName: "test-cluster",
InfrastructureRef: corev1.ObjectReference{},
Bootstrap: clusterv1.Bootstrap{Data: pointer.StringPtr("data")},
},
Status: clusterv1.MachineStatus{
NodeRef: &corev1.ObjectReference{
Name: "test",
},
},
},
expectedError: nil,
},
}

for _, tc := range testCases {
Expand Down Expand Up @@ -844,13 +878,26 @@ func TestIsDeleteNodeAllowed(t *testing.T) {
m2.Labels[clusterv1.MachineControlPlaneLabelName] = ""
}

emp := &unstructured.Unstructured{
Object: map[string]interface{}{
"status": map[string]interface{}{
"externalManagedControlPlane": true,
},
},
}
emp.SetAPIVersion("controlplane.cluster.x-k8s.io/v1alpha3")
emp.SetKind("AWSManagedControlPlane")
emp.SetName("test-cluster")
emp.SetNamespace("test-cluster")

mr := &MachineReconciler{
Client: helpers.NewFakeClientWithScheme(
scheme.Scheme,
tc.cluster,
tc.machine,
m1,
m2,
emp,
),
Log: log.Log,
scheme: scheme.Scheme,
Expand Down
Expand Up @@ -174,10 +174,13 @@ following fields defined:

#### Optional `status` fields

The `status` object **may** define several fields that do not affect functionality if missing:
The `status` object **may** define several fields:

* `failureReason` - is a string that explains why an error has occurred, if possible.
* `failureMessage` - is a string that holds the message contained by the error.
* `externalManagedControlPlane` - is a bool that should be set to true if the Node objects do not
exist in the cluster. For example, managed control plane providers for AKS, EKS, GKE, etc, should
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(I think it should since we're using controlPlaneRef the same way)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@CecileRobertMichon Yes, as long as you reference the AzureManagedControlPlane in the cluster's controlPlaneRef field you should be able to use the status in your resource as well. Since the check here in CAPI is using the unstructured API, the underlying type is not important. Whatever object is referenced by controlPlaneRef just needs to have the status field externalManagedControlPlane if there are no control plane Machine resources in the cluster.

set this to `true`. Leaving the field undefined is equivalent to setting the value to `false`.

## Example usage

Expand Down
10 changes: 10 additions & 0 deletions util/util.go
Expand Up @@ -197,6 +197,16 @@ func GetControlPlaneMachinesFromList(machineList *clusterv1.MachineList) (res []
return
}

// IsExternalManagedControlPlane returns a bool indicating whether the control plane referenced
// in the passed Unstructured resource is an externally managed control plane such as AKS, EKS, GKE, etc.
func IsExternalManagedControlPlane(controlPlane *unstructured.Unstructured) bool {
managed, found, err := unstructured.NestedBool(controlPlane.Object, "status", "externalManagedControlPlane")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we document somewhere that manage control plane implementation should have this field (might be in https://cluster-api.sigs.k8s.io/developer/architecture/controllers/control-plane.html#required-status-fields)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 this isn't part of the contract currently AFAIK

cc @alexeldeib

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 updating the documentation, just a note though the field is completely optional and there is a fallback to the current behavior during delete

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. What are thoughts on making this new field be an optional part of the contract to avoid a breaking change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an unusual case in that the field is really required for managed control planes like AKS, EKS, GKE, etc otherwise the Machine controller will suffer this pretty bad bug (instance termination without draining nodes), but not required at all for KubeadmControlPlane. No other optional status fields currently appear to modify functionality and are purely informational. If we make it a new required status field though, I think this would need to wait for a new minor version to be released. I'd like to get the bugfix released sooner rather than later if possible.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm 馃憤 to optional. Please update the control plane architecture doc linked above and add it to the list of optional status fields, assuming everyone else agrees.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if err != nil || !found {
return false
}
return managed
}

// GetMachineIfExists gets a machine from the API server if it exists.
func GetMachineIfExists(c client.Client, namespace, name string) (*clusterv1.Machine, error) {
if c == nil {
Expand Down
41 changes: 41 additions & 0 deletions util/util_test.go
Expand Up @@ -25,6 +25,7 @@ import (
. "github.com/onsi/gomega"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"

Expand Down Expand Up @@ -598,6 +599,46 @@ func TestGetMachinesForCluster(t *testing.T) {
g.Expect(machines.Items[0].Labels[clusterv1.ClusterLabelName]).To(Equal(cluster.Name))
}

func TestIsExternalManagedControlPlane(t *testing.T) {
g := NewWithT(t)

t.Run("should return true if control plane status externalManagedControlPlane is true", func(t *testing.T) {
controlPlane := &unstructured.Unstructured{
Object: map[string]interface{}{
"status": map[string]interface{}{
"externalManagedControlPlane": true,
},
},
}
result := IsExternalManagedControlPlane(controlPlane)
g.Expect(result).Should(Equal(true))
})

t.Run("should return false if control plane status externalManagedControlPlane is false", func(t *testing.T) {
controlPlane := &unstructured.Unstructured{
Object: map[string]interface{}{
"status": map[string]interface{}{
"externalManagedControlPlane": false,
},
},
}
result := IsExternalManagedControlPlane(controlPlane)
g.Expect(result).Should(Equal(false))
})

t.Run("should return false if control plane status externalManagedControlPlane is not set", func(t *testing.T) {
controlPlane := &unstructured.Unstructured{
Object: map[string]interface{}{
"status": map[string]interface{}{
"someOtherStatusField": "someValue",
},
},
}
result := IsExternalManagedControlPlane(controlPlane)
g.Expect(result).Should(Equal(false))
})
}

func TestEnsureOwnerRef(t *testing.T) {
g := NewWithT(t)

Expand Down