diff --git a/exp/internal/controllers/machinepool_controller.go b/exp/internal/controllers/machinepool_controller.go index dadaf09a8d8d..f56a8fa0e66d 100644 --- a/exp/internal/controllers/machinepool_controller.go +++ b/exp/internal/controllers/machinepool_controller.go @@ -200,6 +200,7 @@ func (r *MachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker") return ctrl.Result{RequeueAfter: time.Minute}, nil } + return ctrl.Result{}, err } @@ -253,41 +254,67 @@ func (r *MachinePoolReconciler) reconcile(ctx context.Context, cluster *clusterv return res, kerrors.NewAggregate(errs) } -func (r *MachinePoolReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, mp *expv1.MachinePool) error { - if ok, err := r.reconcileDeleteExternal(ctx, mp); !ok || err != nil { +// reconcileDelete delete machinePool related resources. +func (r *MachinePoolReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool) error { + if ok, err := r.reconcileDeleteExternal(ctx, machinePool); !ok || err != nil { // Return early and don't remove the finalizer if we got an error or // the external reconciliation deletion isn't ready. return err } - if err := r.reconcileDeleteNodes(ctx, cluster, mp); err != nil { - // Return early and don't remove the finalizer if we got an error. - return err + // check nodes delete timeout passed. + if !r.isMachinePoolNodeDeleteTimeoutPassed(machinePool) { + if err := r.reconcileDeleteNodes(ctx, cluster, machinePool); err != nil { + // Return early and don't remove the finalizer if we got an error. + return err + } + } else { + ctrl.LoggerFrom(ctx).Info("MachinePool %s NodeDeleteTimeout passed, force delete the machinePool", machinePool.Namespace, machinePool.Name) } - controllerutil.RemoveFinalizer(mp, expv1.MachinePoolFinalizer) + controllerutil.RemoveFinalizer(machinePool, expv1.MachinePoolFinalizer) return nil } -func (r *MachinePoolReconciler) reconcileDeleteNodes(ctx context.Context, cluster *clusterv1.Cluster, machinepool *expv1.MachinePool) error { - if len(machinepool.Status.NodeRefs) == 0 { +// reconcileDeleteNodes delete the cluster nodes. +func (r *MachinePoolReconciler) reconcileDeleteNodes(ctx context.Context, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool) error { + if len(machinePool.Status.NodeRefs) == 0 { return nil } + if r.Tracker == nil { + return errors.New("Cannot establish cluster client to delete nodes") + } + clusterClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster)) if err != nil { return err } - return r.deleteRetiredNodes(ctx, clusterClient, machinepool.Status.NodeRefs, machinepool.Spec.ProviderIDList) + return r.deleteRetiredNodes(ctx, clusterClient, machinePool.Status.NodeRefs, machinePool.Spec.ProviderIDList) +} + +// isMachinePoolDeleteTimeoutPassed check the machinePool node delete time out. +func (r *MachinePoolReconciler) isMachinePoolNodeDeleteTimeoutPassed(machinePool *expv1.MachinePool) bool { + if !machinePool.DeletionTimestamp.IsZero() && machinePool.Spec.Template.Spec.NodeDeletionTimeout != nil { + if machinePool.Spec.Template.Spec.NodeDeletionTimeout.Duration.Nanoseconds() != 0 { + deleteTimePlusDuration := machinePool.DeletionTimestamp.Add(machinePool.Spec.Template.Spec.NodeDeletionTimeout.Duration) + return deleteTimePlusDuration.Before(time.Now()) + } + } + return false } // reconcileDeleteExternal tries to delete external references, returning true if it cannot find any. -func (r *MachinePoolReconciler) reconcileDeleteExternal(ctx context.Context, m *expv1.MachinePool) (bool, error) { +func (r *MachinePoolReconciler) reconcileDeleteExternal(ctx context.Context, machinePool *expv1.MachinePool) (bool, error) { objects := []*unstructured.Unstructured{} - references := []*corev1.ObjectReference{ - m.Spec.Template.Spec.Bootstrap.ConfigRef, - &m.Spec.Template.Spec.InfrastructureRef, + references := []*corev1.ObjectReference{} + // check for external ref + if machinePool.Spec.Template.Spec.Bootstrap.ConfigRef != nil { + references = append(references, machinePool.Spec.Template.Spec.Bootstrap.ConfigRef) + } + if machinePool.Spec.Template.Spec.InfrastructureRef != (corev1.ObjectReference{}) { + references = append(references, &machinePool.Spec.Template.Spec.InfrastructureRef) } // Loop over the references and try to retrieve it with the client. @@ -296,10 +323,10 @@ func (r *MachinePoolReconciler) reconcileDeleteExternal(ctx context.Context, m * continue } - obj, err := external.Get(ctx, r.Client, ref, m.Namespace) + obj, err := external.Get(ctx, r.Client, ref, machinePool.Namespace) if err != nil && !apierrors.IsNotFound(errors.Cause(err)) { return false, errors.Wrapf(err, "failed to get %s %q for MachinePool %q in namespace %q", - ref.GroupVersionKind(), ref.Name, m.Name, m.Namespace) + ref.GroupVersionKind(), ref.Name, machinePool.Name, machinePool.Namespace) } if obj != nil { objects = append(objects, obj) @@ -311,7 +338,7 @@ func (r *MachinePoolReconciler) reconcileDeleteExternal(ctx context.Context, m * if err := r.Client.Delete(ctx, obj); err != nil && !apierrors.IsNotFound(err) { return false, errors.Wrapf(err, "failed to delete %v %q for MachinePool %q in namespace %q", - obj.GroupVersionKind(), obj.GetName(), m.Name, m.Namespace) + obj.GroupVersionKind(), obj.GetName(), machinePool.Name, machinePool.Namespace) } } diff --git a/exp/internal/controllers/machinepool_controller_test.go b/exp/internal/controllers/machinepool_controller_test.go index 731ad55b0200..893bb922dbb6 100644 --- a/exp/internal/controllers/machinepool_controller_test.go +++ b/exp/internal/controllers/machinepool_controller_test.go @@ -17,7 +17,9 @@ limitations under the License. package controllers import ( + "context" "testing" + "time" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" @@ -31,6 +33,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/controllers/remote" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/internal/test/builder" "sigs.k8s.io/cluster-api/util" @@ -277,8 +280,6 @@ func TestReconcileMachinePoolRequest(t *testing.T) { }, } - time := metav1.Now() - testCluster := clusterv1.Cluster{ TypeMeta: metav1.TypeMeta{Kind: "Cluster", APIVersion: clusterv1.GroupVersion.String()}, ObjectMeta: metav1.ObjectMeta{Namespace: metav1.NamespaceDefault, Name: "test-cluster"}, @@ -295,20 +296,26 @@ func TestReconcileMachinePoolRequest(t *testing.T) { }, } + timeNow := metav1.Now() type expected struct { - result reconcile.Result - err bool + mpExist bool + result reconcile.Result + err bool } testCases := []struct { + name string machinePool expv1.MachinePool expected expected + withTracker bool }{ { + name: "Create machine Pool", machinePool: expv1.MachinePool{ ObjectMeta: metav1.ObjectMeta{ - Name: "created", - Namespace: metav1.NamespaceDefault, - Finalizers: []string{expv1.MachinePoolFinalizer}, + Name: "created", + Namespace: metav1.NamespaceDefault, + Finalizers: []string{expv1.MachinePoolFinalizer}, + CreationTimestamp: timeNow, }, Spec: expv1.MachinePoolSpec{ ClusterName: "test-cluster", @@ -316,7 +323,6 @@ func TestReconcileMachinePoolRequest(t *testing.T) { Replicas: ptr.To[int32](1), Template: clusterv1.MachineTemplateSpec{ Spec: clusterv1.MachineSpec{ - InfrastructureRef: corev1.ObjectReference{ APIVersion: builder.InfrastructureGroupVersion.String(), Kind: builder.TestInfrastructureMachineTemplateKind, @@ -336,21 +342,28 @@ func TestReconcileMachinePoolRequest(t *testing.T) { }, }, expected: expected{ - result: reconcile.Result{}, - err: false, + mpExist: true, + result: reconcile.Result{}, + err: false, }, + withTracker: true, }, { + name: "Delete machinePool with nodeDeleteTimeout not passed", machinePool: expv1.MachinePool{ ObjectMeta: metav1.ObjectMeta{ - Name: "updated", - Namespace: metav1.NamespaceDefault, - Finalizers: []string{expv1.MachinePoolFinalizer}, + Name: "deleted", + Namespace: metav1.NamespaceDefault, + Labels: map[string]string{ + clusterv1.MachineControlPlaneLabel: "", + }, + Finalizers: []string{expv1.MachinePoolFinalizer}, + CreationTimestamp: timeNow, + DeletionTimestamp: &metav1.Time{Time: timeNow.Add(time.Hour * 1)}, }, Spec: expv1.MachinePoolSpec{ - ClusterName: "test-cluster", - ProviderIDList: []string{"test://id-1"}, - Replicas: ptr.To[int32](1), + ClusterName: "test-cluster", + Replicas: ptr.To[int32](1), Template: clusterv1.MachineTemplateSpec{ Spec: clusterv1.MachineSpec{ InfrastructureRef: corev1.ObjectReference{ @@ -358,60 +371,112 @@ func TestReconcileMachinePoolRequest(t *testing.T) { Kind: builder.TestInfrastructureMachineTemplateKind, Name: "infra-config1", }, - Bootstrap: clusterv1.Bootstrap{DataSecretName: ptr.To("data")}, + Bootstrap: clusterv1.Bootstrap{DataSecretName: ptr.To("data")}, + NodeDeletionTimeout: &metav1.Duration{Duration: 1 * time.Minute}, }, }, + ProviderIDList: []string{"aws:///us-test-2a/i-013ab00756982217f"}, }, Status: expv1.MachinePoolStatus{ - Replicas: 1, - ReadyReplicas: 1, NodeRefs: []corev1.ObjectReference{ - {Name: "test"}, + { + APIVersion: "v1", + Kind: "Node", + Name: "test-node", + }, }, - ObservedGeneration: 1, }, }, expected: expected{ - result: reconcile.Result{}, - err: false, + mpExist: true, + result: reconcile.Result{}, + err: false, }, + withTracker: true, }, { + name: "Delete machinePool with nodeDeleteTimeout passed", machinePool: expv1.MachinePool{ ObjectMeta: metav1.ObjectMeta{ - Name: "deleted", + Name: "deleted-timeoutNode", Namespace: metav1.NamespaceDefault, Labels: map[string]string{ clusterv1.MachineControlPlaneLabel: "", }, Finalizers: []string{expv1.MachinePoolFinalizer}, - DeletionTimestamp: &time, + CreationTimestamp: metav1.Time{Time: timeNow.Add(time.Minute * -2)}, + DeletionTimestamp: &metav1.Time{Time: timeNow.Add(time.Minute * -1)}, }, Spec: expv1.MachinePoolSpec{ ClusterName: "test-cluster", Replicas: ptr.To[int32](1), Template: clusterv1.MachineTemplateSpec{ Spec: clusterv1.MachineSpec{ - InfrastructureRef: corev1.ObjectReference{ - APIVersion: builder.InfrastructureGroupVersion.String(), - Kind: builder.TestInfrastructureMachineTemplateKind, - Name: "infra-config1", - }, - Bootstrap: clusterv1.Bootstrap{DataSecretName: ptr.To("data")}, + NodeDeletionTimeout: &metav1.Duration{Duration: 10 * time.Second}, + }, + }, + ProviderIDList: []string{"aws:///us-test-2a/i-013ab00756982217f"}, + }, + Status: expv1.MachinePoolStatus{ + NodeRefs: []corev1.ObjectReference{ + { + APIVersion: "v1", + Kind: "Node", + Name: "test-node", }, }, }, }, expected: expected{ - result: reconcile.Result{}, - err: false, + mpExist: false, + result: reconcile.Result{}, + err: true, + }, + }, + { + name: "Try delete machinePool with nodeDeleteTimeout set to zero", + machinePool: expv1.MachinePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: "try-delete", + Namespace: metav1.NamespaceDefault, + Labels: map[string]string{ + clusterv1.MachineControlPlaneLabel: "", + }, + Finalizers: []string{expv1.MachinePoolFinalizer}, + CreationTimestamp: metav1.Time{Time: timeNow.Add(time.Minute * -2)}, + DeletionTimestamp: &metav1.Time{Time: timeNow.Add(time.Minute * -1)}, + }, + Spec: expv1.MachinePoolSpec{ + ClusterName: "test-cluster", + Replicas: ptr.To[int32](1), + Template: clusterv1.MachineTemplateSpec{ + Spec: clusterv1.MachineSpec{ + NodeDeletionTimeout: &metav1.Duration{Duration: 0 * time.Second}, + }, + }, + ProviderIDList: []string{"aws:///us-test-2a/i-013ab00756982217f"}, + }, + Status: expv1.MachinePoolStatus{ + NodeRefs: []corev1.ObjectReference{ + { + APIVersion: "v1", + Kind: "Node", + Name: "test-node", + }, + }, + }, + }, + expected: expected{ + mpExist: true, + result: reconcile.Result{}, + err: true, }, }, } for i := range testCases { tc := testCases[i] - t.Run("machinePool should be "+tc.machinePool.Name, func(t *testing.T) { + t.Run(tc.name, func(t *testing.T) { g := NewWithT(t) clientFake := fake.NewClientBuilder().WithObjects( @@ -428,18 +493,64 @@ func TestReconcileMachinePoolRequest(t *testing.T) { APIReader: clientFake, } + // Set the tracker. + if tc.withTracker { + r.Tracker = remote.NewTestClusterCacheTracker(ctrl.LoggerFrom(ctx), clientFake, clientFake, clientFake.Scheme(), client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}) + } + result, err := r.Reconcile(ctx, reconcile.Request{NamespacedName: util.ObjectKey(&tc.machinePool)}) if tc.expected.err { g.Expect(err).To(HaveOccurred()) } else { g.Expect(err).ToNot(HaveOccurred()) } - g.Expect(result).To(BeComparableTo(tc.expected.result)) + + // Check machinePool test cases + key := client.ObjectKey{Namespace: tc.machinePool.Namespace, Name: tc.machinePool.Name} + if tc.expected.mpExist { + g.Expect(r.Client.Get(ctx, key, &expv1.MachinePool{})).ToNot(HaveOccurred()) + } else { + // We expect to fail retrieve the machinePool as it is deleted + g.Expect(r.Client.Get(ctx, key, &expv1.MachinePool{})).To(HaveOccurred()) + } }) } } +func TestMachinePoolNodeDeleteTimeoutPassed(t *testing.T) { + g := NewWithT(t) + r := &MachinePoolReconciler{} + timeNow := metav1.Now() + machinePool := expv1.MachinePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machinepool", + Namespace: metav1.NamespaceDefault, + CreationTimestamp: timeNow, + }, + } + + // MachienPool not deleted expected false + g.Expect(r.isMachinePoolNodeDeleteTimeoutPassed(&machinePool)).To(BeFalse()) + + // MachienPool deleted and NodeDeleteTimeout not set expected false + machinePool.DeletionTimestamp = &timeNow + g.Expect(r.isMachinePoolNodeDeleteTimeoutPassed(&machinePool)).To(BeFalse()) + + // MachienPool deleted and NodeDeleteTimeout set to zero expected false + machinePool.Spec.Template.Spec.NodeDeletionTimeout = &metav1.Duration{Duration: 0 * time.Second} + g.Expect(r.isMachinePoolNodeDeleteTimeoutPassed(&machinePool)).To(BeFalse()) + + // MachienPool deleted (at timeNow) and NodeDeleteTimeout set to 1 min expected false + machinePool.Spec.Template.Spec.NodeDeletionTimeout = &metav1.Duration{Duration: 1 * time.Minute} + g.Expect(r.isMachinePoolNodeDeleteTimeoutPassed(&machinePool)).To(BeFalse()) + + // MachienPool deleted (at timeNow -1 min) and NodeDeleteTimeout set to 10 Second expected true + machinePool.DeletionTimestamp = &metav1.Time{Time: timeNow.Add(time.Minute * -1)} + machinePool.Spec.Template.Spec.NodeDeletionTimeout = &metav1.Duration{Duration: 10 * time.Second} + g.Expect(r.isMachinePoolNodeDeleteTimeoutPassed(&machinePool)).To(BeTrue()) +} + func TestReconcileMachinePoolDeleteExternal(t *testing.T) { testCluster := &clusterv1.Cluster{ ObjectMeta: metav1.ObjectMeta{Namespace: metav1.NamespaceDefault, Name: "test-cluster"}, @@ -591,8 +702,10 @@ func TestRemoveMachinePoolFinalizerAfterDeleteReconcile(t *testing.T) { }, } key := client.ObjectKey{Namespace: m.Namespace, Name: m.Name} + clientFake := fake.NewClientBuilder().WithObjects(testCluster, m).WithStatusSubresource(&expv1.MachinePool{}).Build() mr := &MachinePoolReconciler{ - Client: fake.NewClientBuilder().WithObjects(testCluster, m).WithStatusSubresource(&expv1.MachinePool{}).Build(), + Client: clientFake, + Tracker: remote.NewTestClusterCacheTracker(ctrl.LoggerFrom(ctx), clientFake, clientFake, clientFake.Scheme(), client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), } _, err := mr.Reconcile(ctx, reconcile.Request{NamespacedName: key}) g.Expect(err).ToNot(HaveOccurred()) @@ -864,6 +977,7 @@ func TestMachinePoolConditions(t *testing.T) { r := &MachinePoolReconciler{ Client: clientFake, APIReader: clientFake, + Tracker: remote.NewTestClusterCacheTracker(ctrl.LoggerFrom(context.TODO()), clientFake, clientFake, clientFake.Scheme(), client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), } _, err := r.Reconcile(ctx, reconcile.Request{NamespacedName: util.ObjectKey(machinePool)}) diff --git a/exp/internal/webhooks/machinepool.go b/exp/internal/webhooks/machinepool.go index 5f52e091a1ee..af28fbbeae45 100644 --- a/exp/internal/webhooks/machinepool.go +++ b/exp/internal/webhooks/machinepool.go @@ -21,10 +21,12 @@ import ( "fmt" "strconv" "strings" + "time" "github.com/pkg/errors" v1 "k8s.io/api/admission/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/validation/field" "k8s.io/utils/ptr" @@ -38,6 +40,8 @@ import ( "sigs.k8s.io/cluster-api/util/version" ) +const defaultNodeDeletionTimeout = 10 * time.Second + func (webhook *MachinePool) SetupWebhookWithManager(mgr ctrl.Manager) error { if webhook.decoder == nil { webhook.decoder = admission.NewDecoder(mgr.GetScheme()) @@ -108,6 +112,11 @@ func (webhook *MachinePool) Default(ctx context.Context, obj runtime.Object) err m.Spec.Template.Spec.InfrastructureRef.Namespace = m.Namespace } + // Set the default value for the node deletion timeout. + if m.Spec.Template.Spec.NodeDeletionTimeout == nil { + m.Spec.Template.Spec.NodeDeletionTimeout = &metav1.Duration{Duration: defaultNodeDeletionTimeout} + } + // tolerate version strings without a "v" prefix: prepend it if it's not there. if m.Spec.Template.Spec.Version != nil && !strings.HasPrefix(*m.Spec.Template.Spec.Version, "v") { normalizedVersion := "v" + *m.Spec.Template.Spec.Version diff --git a/exp/internal/webhooks/machinepool_test.go b/exp/internal/webhooks/machinepool_test.go index 61fbc53b75ed..e0f3c3522af4 100644 --- a/exp/internal/webhooks/machinepool_test.go +++ b/exp/internal/webhooks/machinepool_test.go @@ -62,6 +62,7 @@ func TestMachinePoolDefault(t *testing.T) { g.Expect(mp.Spec.Template.Spec.Bootstrap.ConfigRef.Namespace).To(Equal(mp.Namespace)) g.Expect(mp.Spec.Template.Spec.InfrastructureRef.Namespace).To(Equal(mp.Namespace)) g.Expect(mp.Spec.Template.Spec.Version).To(Equal(ptr.To("v1.20.0"))) + g.Expect(mp.Spec.Template.Spec.NodeDeletionTimeout).To(Equal(&metav1.Duration{Duration: defaultNodeDeletionTimeout})) } func TestCalculateMachinePoolReplicas(t *testing.T) { diff --git a/internal/controllers/topology/cluster/reconcile_state_test.go b/internal/controllers/topology/cluster/reconcile_state_test.go index bdd250f2c97b..f70a6e163637 100644 --- a/internal/controllers/topology/cluster/reconcile_state_test.go +++ b/internal/controllers/topology/cluster/reconcile_state_test.go @@ -2661,6 +2661,8 @@ func TestReconcileMachinePools(t *testing.T) { if gotMachinePool.Spec.Template.Spec.Bootstrap.ConfigRef != nil { wantMachinePoolState.Object.Spec.Template.Spec.Bootstrap.ConfigRef.Name = gotMachinePool.Spec.Template.Spec.Bootstrap.ConfigRef.Name } + // expect default value for the node deletion timeout. + wantMachinePoolState.Object.Spec.Template.Spec.NodeDeletionTimeout = &metav1.Duration{Duration: 10 * time.Second} // Compare MachinePool. // Note: We're intentionally only comparing Spec as otherwise we would have to account for