Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions pkg/controllers/updaterun/controller_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -410,11 +410,17 @@ func generateTestClusterResourceBindingsAndClusters(policySnapshotIndex int) ([]
}

unscheduledClusters := make([]*clusterv1beta1.MemberCluster, numUnscheduledClusters)
for i := range unscheduledClusters {
// Half of the unscheduled clusters have old policy snapshot.
for i := range numUnscheduledClusters / 2 {
unscheduledClusters[i] = generateTestMemberCluster(i, "unscheduled-cluster-"+strconv.Itoa(i), map[string]string{"group": "staging"})
// update the policySnapshot name so that these clusters are considered to-be-deleted
// Update the policySnapshot name so that these clusters are considered to-be-deleted.
resourceBindings[numTargetClusters+i] = generateTestClusterResourceBinding(policySnapshotName+"a", unscheduledClusters[i].Name, placementv1beta1.BindingStateUnscheduled)
}
// The other half of the unscheduled clusters have latest policy snapshot but still unscheduled.
for i := numUnscheduledClusters / 2; i < numUnscheduledClusters; i++ {
unscheduledClusters[i] = generateTestMemberCluster(i, "unscheduled-cluster-"+strconv.Itoa(i), map[string]string{"group": "staging"})
resourceBindings[numTargetClusters+i] = generateTestClusterResourceBinding(policySnapshotName, unscheduledClusters[i].Name, placementv1beta1.BindingStateUnscheduled)
}
return resourceBindings, targetClusters, unscheduledClusters
}

Expand Down
27 changes: 15 additions & 12 deletions pkg/controllers/updaterun/initialization.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,22 +190,25 @@ func (r *Reconciler) collectScheduledClusters(
var toBeDeletedBindings, selectedBindings []*placementv1beta1.ClusterResourceBinding
for i, binding := range bindingList.Items {
if binding.Spec.SchedulingPolicySnapshotName == latestPolicySnapshot.Name {
if binding.Spec.State != placementv1beta1.BindingStateScheduled && binding.Spec.State != placementv1beta1.BindingStateBound {
stateErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("binding `%s`'s state %s is not scheduled or bound", binding.Name, binding.Spec.State))
klog.ErrorS(stateErr, "Failed to collect clusterResourceBindings", "clusterResourcePlacement", placementName, "latestPolicySnapshot", latestPolicySnapshot.Name, "clusterStagedUpdateRun", updateRunRef)
// no more retries here.
return nil, nil, fmt.Errorf("%w: %s", errInitializedFailed, stateErr.Error())
if binding.Spec.State == placementv1beta1.BindingStateUnscheduled {
klog.V(2).InfoS("Found an unscheduled binding with the latest policy snapshot, delete it", "binding", binding.Name, "clusterResourcePlacement", placementName,
"latestPolicySnapshot", latestPolicySnapshot.Name, "clusterStagedUpdateRun", updateRunRef)
toBeDeletedBindings = append(toBeDeletedBindings, &bindingList.Items[i])
} else {
klog.V(2).InfoS("Found a scheduled binding", "binding", binding.Name, "clusterResourcePlacement", placementName,
"latestPolicySnapshot", latestPolicySnapshot.Name, "clusterStagedUpdateRun", updateRunRef)
selectedBindings = append(selectedBindings, &bindingList.Items[i])
}
klog.V(2).InfoS("Found a scheduled binding", "binding", binding.Name, "clusterResourcePlacement", placementName, "latestPolicySnapshot", latestPolicySnapshot.Name, "clusterStagedUpdateRun", updateRunRef)
selectedBindings = append(selectedBindings, &bindingList.Items[i])
} else {
if binding.Spec.State != placementv1beta1.BindingStateUnscheduled {
stateErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("binding `%s` with old policy snapshot %s has state %s, not unscheduled", binding.Name, binding.Spec.SchedulingPolicySnapshotName, binding.Spec.State))
klog.ErrorS(stateErr, "Failed to collect clusterResourceBindings", "clusterResourcePlacement", placementName, "latestPolicySnapshot", latestPolicySnapshot.Name, "clusterStagedUpdateRun", updateRunRef)
// no more retries here.
return nil, nil, fmt.Errorf("%w: %s", errInitializedFailed, stateErr.Error())
stateErr := fmt.Errorf("binding `%s` with old policy snapshot %s has state %s, we might observe a transient state, need retry", binding.Name, binding.Spec.SchedulingPolicySnapshotName, binding.Spec.State)
klog.V(2).InfoS("Found a not-unscheduled binding with old policy snapshot, retrying", "binding", binding.Name, "clusterResourcePlacement", placementName,
"latestPolicySnapshot", latestPolicySnapshot.Name, "clusterStagedUpdateRun", updateRunRef)
// Transient state can be retried.
return nil, nil, stateErr
}
klog.V(2).InfoS("Found a to-be-deleted binding", "binding", binding.Name, "cluster", binding.Spec.TargetCluster, "clusterResourcePlacement", placementName, "latestPolicySnapshot", latestPolicySnapshot.Name, "clusterStagedUpdateRun", updateRunRef)
klog.V(2).InfoS("Found an unscheduled binding with old policy snapshot", "binding", binding.Name, "cluster", binding.Spec.TargetCluster,
"clusterResourcePlacement", placementName, "latestPolicySnapshot", latestPolicySnapshot.Name, "clusterStagedUpdateRun", updateRunRef)
toBeDeletedBindings = append(toBeDeletedBindings, &bindingList.Items[i])
}
}
Expand Down
28 changes: 22 additions & 6 deletions pkg/controllers/updaterun/initialization_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -438,31 +438,47 @@ var _ = Describe("Updaterun initialization tests", func() {
Expect(updateRun.Status.PolicyObservedClusterCount).To(Equal(1), "failed to update the updateRun PolicyObservedClusterCount status")
})

It("Should fail to initialize if the bindings with latest policy snapshots are not in Scheduled or Bound state", func() {
It("Should not fail to initialize if the bindings with latest policy snapshots are in Unscheduled state", func() {
By("Creating a not scheduled clusterResourceBinding")
binding := generateTestClusterResourceBinding(policySnapshot.Name, "cluster-1", placementv1beta1.BindingStateUnscheduled)
Expect(k8sClient.Create(ctx, binding)).To(Succeed())

By("Creating a new clusterStagedUpdateRun")
Expect(k8sClient.Create(ctx, updateRun)).To(Succeed())

By("Validating the initialization failed")
validateFailedInitCondition(ctx, updateRun, "state Unscheduled is not scheduled or bound")
By("Validating the initialization failed due to observedClusterCount mismatch, not binding state, and no selected clusters")
validateFailedInitCondition(ctx, updateRun, "the number of selected bindings 0 is not equal to the observed cluster count 10")

By("Deleting the clusterResourceBinding")
Expect(k8sClient.Delete(ctx, binding)).Should(Succeed())
})

It("Should fail to initialize if the bindings with old policy snapshots are not in Unscheduled state", func() {
It("Should retry to initialize if the bindings with old policy snapshots are not in Unscheduled state", func() {
By("Creating a scheduled clusterResourceBinding with old policy snapshot")
binding := generateTestClusterResourceBinding(policySnapshot.Name+"a", "cluster-0", placementv1beta1.BindingStateScheduled)
Expect(k8sClient.Create(ctx, binding)).To(Succeed())

By("Creating a new clusterStagedUpdateRun")
Expect(k8sClient.Create(ctx, updateRun)).To(Succeed())

By("Validating the initialization failed")
validateFailedInitCondition(ctx, updateRun, "has state Scheduled, not unscheduled")
By("Validating the initialization not failed consistently")
// Populate the cache first.
Eventually(func() error {
if err := k8sClient.Get(ctx, updateRunNamespacedName, updateRun); err != nil {
return err
}
return nil
}, timeout, interval).Should(Succeed(), "failed to get the updateRun")
Consistently(func() error {
if err := k8sClient.Get(ctx, updateRunNamespacedName, updateRun); err != nil {
return err
}
initCond := meta.FindStatusCondition(updateRun.Status.Conditions, string(placementv1beta1.StagedUpdateRunConditionInitialized))
if initCond != nil {
return fmt.Errorf("got initialization condition: %v, want nil", initCond)
}
return nil
}, duration, interval).Should(Succeed(), "the initialization should keep retrying, not failed")

By("Deleting the clusterResourceBinding")
Expect(k8sClient.Delete(ctx, binding)).Should(Succeed())
Expand Down
1 change: 1 addition & 0 deletions test/e2e/setup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ var (
}

updateRunStatusCmpOption = cmp.Options{
cmpopts.SortSlices(lessFuncCondition),
utils.IgnoreConditionLTTAndMessageFields,
cmpopts.IgnoreFields(placementv1beta1.StageUpdatingStatus{}, "StartTime", "EndTime"),
cmpopts.EquateEmpty(),
Expand Down
Loading
Loading