diff --git a/apis/placement/v1beta1/stageupdate_types.go b/apis/placement/v1beta1/stageupdate_types.go index be25a3660..f7a58b300 100644 --- a/apis/placement/v1beta1/stageupdate_types.go +++ b/apis/placement/v1beta1/stageupdate_types.go @@ -127,7 +127,8 @@ type StageConfig struct { // LabelSelector is a label query over all the joined member clusters. Clusters matching the query are selected // for this stage. There cannot be overlapping clusters between stages when the stagedUpdateRun is created. - // If the label selector is absent, the stage includes all the selected clusters. + // If the label selector is empty, the stage includes all the selected clusters. + // If the label selector is nil, the stage does not include any selected clusters. // +kubebuilder:validation:Optional LabelSelector *metav1.LabelSelector `json:"labelSelector,omitempty"` diff --git a/config/crd/bases/placement.kubernetes-fleet.io_clusterstagedupdateruns.yaml b/config/crd/bases/placement.kubernetes-fleet.io_clusterstagedupdateruns.yaml index bbbe90add..7fb164af9 100644 --- a/config/crd/bases/placement.kubernetes-fleet.io_clusterstagedupdateruns.yaml +++ b/config/crd/bases/placement.kubernetes-fleet.io_clusterstagedupdateruns.yaml @@ -1818,7 +1818,8 @@ spec: description: |- LabelSelector is a label query over all the joined member clusters. Clusters matching the query are selected for this stage. There cannot be overlapping clusters between stages when the stagedUpdateRun is created. - If the label selector is absent, the stage includes all the selected clusters. + If the label selector is empty, the stage includes all the selected clusters. + If the label selector is nil, the stage does not include any selected clusters. properties: matchExpressions: description: matchExpressions is a list of label selector diff --git a/config/crd/bases/placement.kubernetes-fleet.io_clusterstagedupdatestrategies.yaml b/config/crd/bases/placement.kubernetes-fleet.io_clusterstagedupdatestrategies.yaml index 652b1a13b..f1dc847c1 100644 --- a/config/crd/bases/placement.kubernetes-fleet.io_clusterstagedupdatestrategies.yaml +++ b/config/crd/bases/placement.kubernetes-fleet.io_clusterstagedupdatestrategies.yaml @@ -222,7 +222,8 @@ spec: description: |- LabelSelector is a label query over all the joined member clusters. Clusters matching the query are selected for this stage. There cannot be overlapping clusters between stages when the stagedUpdateRun is created. - If the label selector is absent, the stage includes all the selected clusters. + If the label selector is empty, the stage includes all the selected clusters. + If the label selector is nil, the stage does not include any selected clusters. properties: matchExpressions: description: matchExpressions is a list of label selector diff --git a/pkg/controllers/updaterun/initialization.go b/pkg/controllers/updaterun/initialization.go index 3428819ec..8789a3c09 100644 --- a/pkg/controllers/updaterun/initialization.go +++ b/pkg/controllers/updaterun/initialization.go @@ -22,6 +22,7 @@ import ( "fmt" "sort" "strconv" + "strings" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" @@ -380,13 +381,17 @@ func (r *Reconciler) computeRunStageStatus( // Check if the clusters are all placed. if len(allPlacedClusters) != len(allSelectedClusters) { missingErr := controller.NewUserError(fmt.Errorf("some clusters are not placed in any stage")) + missingClusters := make([]string, 0, len(allSelectedClusters)-len(allPlacedClusters)) for cluster := range allSelectedClusters { if _, ok := allPlacedClusters[cluster]; !ok { - klog.ErrorS(missingErr, "Cluster is missing in any stage", "cluster", cluster, "clusterStagedUpdateStrategy", updateStrategyName, "clusterStagedUpdateRun", updateRunRef) + missingClusters = append(missingClusters, cluster) } } - // no more retries here. - return fmt.Errorf("%w: %s", errInitializedFailed, missingErr.Error()) + // Sort the missing clusters by their names to generate a stable error message. + sort.Strings(missingClusters) + klog.ErrorS(missingErr, "Clusters are missing in any stage", "clusters", strings.Join(missingClusters, ", "), "clusterStagedUpdateStrategy", updateStrategyName, "clusterStagedUpdateRun", updateRunRef) + // no more retries here, only show the first 10 missing clusters in the CRP status. + return fmt.Errorf("%w: %s, total %d, showing up to 10: %s", errInitializedFailed, missingErr.Error(), len(missingClusters), strings.Join(missingClusters[:min(10, len(missingClusters))], ", ")) } return nil } diff --git a/pkg/controllers/updaterun/initialization_integration_test.go b/pkg/controllers/updaterun/initialization_integration_test.go index 825531f6b..834e21d29 100644 --- a/pkg/controllers/updaterun/initialization_integration_test.go +++ b/pkg/controllers/updaterun/initialization_integration_test.go @@ -549,7 +549,7 @@ var _ = Describe("Updaterun initialization tests", func() { validateFailedInitCondition(ctx, updateRun, "referenced clusterStagedUpdateStrategy not found") }) - Context("Test computeStagedStatus", func() { + Context("Test computeRunStageStatus", func() { Context("Test validateAfterStageTask", func() { It("Should fail to initialize if any after stage task has 2 same tasks", func() { By("Creating a clusterStagedUpdateStrategy with 2 same after stage tasks") @@ -617,7 +617,50 @@ var _ = Describe("Updaterun initialization tests", func() { Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) By("Validating the initialization failed") - validateFailedInitCondition(ctx, updateRun, "some clusters are not placed in any stage") + validateFailedInitCondition(ctx, updateRun, "some clusters are not placed in any stage, total 5, showing up to 10: cluster-0, cluster-2, cluster-4, cluster-6, cluster-8") + }) + + It("Should select all scheduled clusters if labelSelector is empty and select no clusters if labelSelector is nil", func() { + By("Creating a clusterStagedUpdateStrategy with two stages, using empty labelSelector and nil labelSelector respectively") + updateStrategy.Spec.Stages[0].LabelSelector = nil // no clusters selected + updateStrategy.Spec.Stages[1].LabelSelector = &metav1.LabelSelector{} // all clusters selected + Expect(k8sClient.Create(ctx, updateStrategy)).To(Succeed()) + + By("Creating a new clusterStagedUpdateRun") + Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) + + By("Validating the clusterStagedUpdateRun status") + Eventually(func() error { + if err := k8sClient.Get(ctx, updateRunNamespacedName, updateRun); err != nil { + return err + } + + want := generateSucceededInitializationStatus(crp, updateRun, policySnapshot, updateStrategy, clusterResourceOverride) + // No clusters should be selected in the first stage. + want.StagesStatus[0].Clusters = []placementv1beta1.ClusterUpdatingStatus{} + // All clusters should be selected in the second stage and sorted by name. + want.StagesStatus[1].Clusters = []placementv1beta1.ClusterUpdatingStatus{ + {ClusterName: "cluster-0"}, + {ClusterName: "cluster-1"}, + {ClusterName: "cluster-2"}, + {ClusterName: "cluster-3"}, + {ClusterName: "cluster-4"}, + {ClusterName: "cluster-5"}, + {ClusterName: "cluster-6"}, + {ClusterName: "cluster-7"}, + {ClusterName: "cluster-8"}, + {ClusterName: "cluster-9"}, + } + // initialization should fail due to resourceSnapshot not found. + want.Conditions = []metav1.Condition{ + generateFalseCondition(updateRun, placementv1beta1.StagedUpdateRunConditionInitialized), + } + + if diff := cmp.Diff(*want, updateRun.Status, cmpOptions...); diff != "" { + return fmt.Errorf("status mismatch: (-want +got):\n%s", diff) + } + return nil + }, timeout, interval).Should(Succeed(), "failed to validate the clusterStagedUpdateRun status") }) }) @@ -639,7 +682,7 @@ var _ = Describe("Updaterun initialization tests", func() { // Remove the CROs, as they are not added in this test. want.StagesStatus[0].Clusters[i].ClusterResourceOverrideSnapshots = nil } - // initialization should fail. + // initialization should fail due to resourceSnapshot not found. want.Conditions = []metav1.Condition{ generateFalseCondition(updateRun, placementv1beta1.StagedUpdateRunConditionInitialized), } @@ -648,7 +691,7 @@ var _ = Describe("Updaterun initialization tests", func() { return fmt.Errorf("status mismatch: (-want +got):\n%s", diff) } return nil - }, timeout, interval).Should(Succeed(), "failed to validate the clusterStagedUpdateRun in the status") + }, timeout, interval).Should(Succeed(), "failed to validate the clusterStagedUpdateRun status") }) }) diff --git a/pkg/utils/controller/metrics/metrics.go b/pkg/utils/controller/metrics/metrics.go index 327f3589a..9ec82e78a 100644 --- a/pkg/utils/controller/metrics/metrics.go +++ b/pkg/utils/controller/metrics/metrics.go @@ -73,6 +73,7 @@ var ( Name: "fleet_workload_eviction_complete", Help: "Eviction complete status ", }, []string{"name", "isCompleted", "isValid"}) + // FleetUpdateRunStatusLastTimestampSeconds is a prometheus metric which holds the // last update timestamp of update run status in seconds. FleetUpdateRunStatusLastTimestampSeconds = prometheus.NewGaugeVec(prometheus.GaugeOpts{