From 5a6a389e88e3afd872551f18a0c54fecabbed701 Mon Sep 17 00:00:00 2001 From: Antonin Stefanutti Date: Tue, 6 Feb 2024 15:30:43 +0100 Subject: [PATCH] Make sure pending workloads are removed from the cache (#1687) --- pkg/controller/core/workload_controller.go | 5 +- .../core/clusterqueue_controller_test.go | 56 +++++++++++++++++++ 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/pkg/controller/core/workload_controller.go b/pkg/controller/core/workload_controller.go index 9a9b275b0a..69038752df 100644 --- a/pkg/controller/core/workload_controller.go +++ b/pkg/controller/core/workload_controller.go @@ -373,9 +373,8 @@ func (r *WorkloadReconciler) Delete(e event.DeleteEvent) bool { // Even if the state is unknown, the last cached state tells us whether the // workload was in the queues and should be cleared from them. - if workload.HasQuotaReservation(wl) { - r.queues.DeleteWorkload(wl) - } + r.queues.DeleteWorkload(wl) + return true } diff --git a/test/integration/controller/core/clusterqueue_controller_test.go b/test/integration/controller/core/clusterqueue_controller_test.go index 21a8e9e9e7..562cfca885 100644 --- a/test/integration/controller/core/clusterqueue_controller_test.go +++ b/test/integration/controller/core/clusterqueue_controller_test.go @@ -50,6 +50,7 @@ const ( ) var ignoreConditionTimestamps = cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime") +var ignoreConditionMessage = cmpopts.IgnoreFields(metav1.Condition{}, "Message") var ignoreLastChangeTime = cmpopts.IgnoreFields(kueue.ClusterQueuePendingWorkloadsStatus{}, "LastChangeTime") var ignorePendingWorkloadsStatus = cmpopts.IgnoreFields(kueue.ClusterQueueStatus{}, "PendingWorkloadsStatus") @@ -362,6 +363,61 @@ var _ = ginkgo.Describe("ClusterQueue controller", ginkgo.Ordered, ginkgo.Contin util.ExpectReservingActiveWorkloadsMetric(clusterQueue, 0) }) + ginkgo.It("Should update status and report metrics when a pending workload is deleted", func() { + workload := testing.MakeWorkload("one", ns.Name).Queue(localQueue.Name). + Request(corev1.ResourceCPU, "5").Obj() + + ginkgo.By("Creating a workload", func() { + gomega.Expect(k8sClient.Create(ctx, workload)).To(gomega.Succeed()) + }) + + // Pending workloads count is incremented as the workload is inadmissible + // because ResourceFlavors don't exist. + gomega.Eventually(func() kueue.ClusterQueueStatus { + var updatedCq kueue.ClusterQueue + gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(clusterQueue), &updatedCq)).To(gomega.Succeed()) + return updatedCq.Status + }, util.Timeout, util.Interval).Should(gomega.BeComparableTo(kueue.ClusterQueueStatus{ + PendingWorkloads: 1, + FlavorsReservation: emptyUsedFlavors, + FlavorsUsage: emptyUsedFlavors, + Conditions: []metav1.Condition{ + { + Type: kueue.ClusterQueueActive, + Status: metav1.ConditionFalse, + Reason: "FlavorNotFound", + }, + }, + }, ignoreConditionMessage, ignoreConditionTimestamps, ignorePendingWorkloadsStatus)) + + util.ExpectPendingWorkloadsMetric(clusterQueue, 0, 1) + util.ExpectReservingActiveWorkloadsMetric(clusterQueue, 0) + + ginkgo.By("Deleting the pending workload", func() { + gomega.Expect(k8sClient.Delete(ctx, workload)).To(gomega.Succeed()) + }) + + // Pending workloads count is decrement as the deleted workload has been removed from the queue. + gomega.Eventually(func() kueue.ClusterQueueStatus { + var updatedCq kueue.ClusterQueue + gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(clusterQueue), &updatedCq)).To(gomega.Succeed()) + return updatedCq.Status + }, util.Timeout, util.Interval).Should(gomega.BeComparableTo(kueue.ClusterQueueStatus{ + PendingWorkloads: 0, + FlavorsReservation: emptyUsedFlavors, + FlavorsUsage: emptyUsedFlavors, + Conditions: []metav1.Condition{ + { + Type: kueue.ClusterQueueActive, + Status: metav1.ConditionFalse, + Reason: "FlavorNotFound", + }, + }, + }, ignoreConditionMessage, ignoreConditionTimestamps, ignorePendingWorkloadsStatus)) + util.ExpectPendingWorkloadsMetric(clusterQueue, 0, 0) + util.ExpectReservingActiveWorkloadsMetric(clusterQueue, 0) + }) + ginkgo.It("Should update status when workloads have reclaimable pods", func() { ginkgo.By("Creating ResourceFlavors", func() {