Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions apis/placement/v1beta1/stageupdate_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,13 +152,14 @@ func (c *ClusterStagedUpdateRun) SetUpdateRunStatus(status UpdateRunStatus) {
type State string

const (
// StateNotStarted describes user intent to initialize but not execute the update run.
// StateInitialized describes user intent to initialize but not execute the update run.
// This is the default state when an update run is created.
StateNotStarted State = "Initialize"
// Users can subsequently set the state to Execute or Abandon.
StateInitialized State = "Initialize"

// StateStarted describes user intent to execute (or resume execution if paused).
// StateExecuted describes user intent to execute (or resume execution if paused).
// Users can subsequently set the state to Pause or Abandon.
StateStarted State = "Execute"
StateExecuted State = "Execute"

// StateStopped describes user intent to pause the update run.
// Users can subsequently set the state to Execute or Abandon.
Expand Down Expand Up @@ -426,7 +427,6 @@ const (
// Its condition status can be one of the following:
// - "True": The staged update run is initialized successfully.
// - "False": The staged update run encountered an error during initialization and aborted.
// - "Unknown": The staged update run initialization has started.
StagedUpdateRunConditionInitialized StagedUpdateRunConditionType = "Initialized"

// StagedUpdateRunConditionProgressing indicates whether the staged update run is making progress.
Expand Down
60 changes: 36 additions & 24 deletions pkg/controllers/updaterun/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,15 +104,23 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim
// Emit the update run status metric based on status conditions in the updateRun.
defer emitUpdateRunStatusMetric(updateRun)

state := updateRun.GetUpdateRunSpec().State

var updatingStageIndex int
var toBeUpdatedBindings, toBeDeletedBindings []placementv1beta1.BindingObj
updateRunStatus := updateRun.GetUpdateRunStatus()
initCond := meta.FindStatusCondition(updateRunStatus.Conditions, string(placementv1beta1.StagedUpdateRunConditionInitialized))
if !condition.IsConditionStatusTrue(initCond, updateRun.GetGeneration()) {
if condition.IsConditionStatusFalse(initCond, updateRun.GetGeneration()) {
// Check if initialized regardless of generation.
// The updateRun spec fields are immutable except for the state field. When the state changes,
// the update run generation increments, but we don't need to reinitialize since initialization is a one-time setup.
if !(initCond != nil && initCond.Status == metav1.ConditionTrue) {
// Check if initialization failed for the current generation.
if initCond != nil && initCond.Status == metav1.ConditionFalse {
klog.V(2).InfoS("The updateRun has failed to initialize", "errorMsg", initCond.Message, "updateRun", runObjRef)
return runtime.Result{}, nil
}

// Initialize the updateRun.
var initErr error
if toBeUpdatedBindings, toBeDeletedBindings, initErr = r.initialize(ctx, updateRun); initErr != nil {
klog.ErrorS(initErr, "Failed to initialize the updateRun", "updateRun", runObjRef)
Expand All @@ -122,10 +130,10 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim
}
return runtime.Result{}, initErr
}
updatingStageIndex = 0 // start from the first stage.
klog.V(2).InfoS("Initialized the updateRun", "updateRun", runObjRef)
updatingStageIndex = 0 // start from the first stage (typically for Initialize or Execute states).
klog.V(2).InfoS("Initialized the updateRun", "state", state, "updateRun", runObjRef)
} else {
klog.V(2).InfoS("The updateRun is initialized", "updateRun", runObjRef)
klog.V(2).InfoS("The updateRun is initialized", "state", state, "updateRun", runObjRef)
// Check if the updateRun is finished.
finishedCond := meta.FindStatusCondition(updateRunStatus.Conditions, string(placementv1beta1.StagedUpdateRunConditionSucceeded))
if condition.IsConditionStatusTrue(finishedCond, updateRun.GetGeneration()) || condition.IsConditionStatusFalse(finishedCond, updateRun.GetGeneration()) {
Expand All @@ -151,28 +159,32 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim
}

// Execute the updateRun.
klog.V(2).InfoS("Continue to execute the updateRun", "updatingStageIndex", updatingStageIndex, "updateRun", runObjRef)
finished, waitTime, execErr := r.execute(ctx, updateRun, updatingStageIndex, toBeUpdatedBindings, toBeDeletedBindings)
if errors.Is(execErr, errStagedUpdatedAborted) {
// errStagedUpdatedAborted cannot be retried.
return runtime.Result{}, r.recordUpdateRunFailed(ctx, updateRun, execErr.Error())
}
if state == placementv1beta1.StateExecuted {
klog.V(2).InfoS("Continue to execute the updateRun", "state", state, "updatingStageIndex", updatingStageIndex, "updateRun", runObjRef)
finished, waitTime, execErr := r.execute(ctx, updateRun, updatingStageIndex, toBeUpdatedBindings, toBeDeletedBindings)
if errors.Is(execErr, errStagedUpdatedAborted) {
// errStagedUpdatedAborted cannot be retried.
return runtime.Result{}, r.recordUpdateRunFailed(ctx, updateRun, execErr.Error())
}

if finished {
klog.V(2).InfoS("The updateRun is completed", "updateRun", runObjRef)
return runtime.Result{}, r.recordUpdateRunSucceeded(ctx, updateRun)
}
if finished {
klog.V(2).InfoS("The updateRun is completed", "updateRun", runObjRef)
return runtime.Result{}, r.recordUpdateRunSucceeded(ctx, updateRun)
}

// The execution is not finished yet or it encounters a retriable error.
// We need to record the status and requeue.
if updateErr := r.recordUpdateRunStatus(ctx, updateRun); updateErr != nil {
return runtime.Result{}, updateErr
}
klog.V(2).InfoS("The updateRun is not finished yet", "requeueWaitTime", waitTime, "execErr", execErr, "updateRun", runObjRef)
if execErr != nil {
return runtime.Result{}, execErr
// The execution is not finished yet or it encounters a retriable error.
// We need to record the status and requeue.
if updateErr := r.recordUpdateRunStatus(ctx, updateRun); updateErr != nil {
return runtime.Result{}, updateErr
}
klog.V(2).InfoS("The updateRun is not finished yet", "requeueWaitTime", waitTime, "execErr", execErr, "updateRun", runObjRef)
if execErr != nil {
return runtime.Result{}, execErr
}
return runtime.Result{Requeue: true, RequeueAfter: waitTime}, nil
}
return runtime.Result{Requeue: true, RequeueAfter: waitTime}, nil
klog.V(2).InfoS("The updateRun is initialized but not executed, waiting to execute", "state", state, "updateRun", runObjRef)
return runtime.Result{}, nil
}

// handleDelete handles the deletion of the updateRun object.
Expand Down
28 changes: 12 additions & 16 deletions pkg/controllers/updaterun/controller_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,16 @@ func generateMetricsLabels(
}
}

func generateInitializationSucceededMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric {
return &prometheusclientmodel.Metric{
Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionInitialized),
string(metav1.ConditionTrue), condition.UpdateRunInitializeSucceededReason),
Gauge: &prometheusclientmodel.Gauge{
Value: ptr.To(float64(time.Now().UnixNano()) / 1e9),
},
}
}

func generateInitializationFailedMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric {
return &prometheusclientmodel.Metric{
Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionInitialized),
Expand Down Expand Up @@ -341,6 +351,7 @@ func generateTestClusterStagedUpdateRun() *placementv1beta1.ClusterStagedUpdateR
PlacementName: testCRPName,
ResourceSnapshotIndex: testResourceSnapshotIndex,
StagedUpdateStrategyName: testUpdateStrategyName,
State: placementv1beta1.StateExecuted,
},
}
}
Expand Down Expand Up @@ -807,23 +818,8 @@ func generateFalseCondition(obj client.Object, condType any) metav1.Condition {
}
}

func generateFalseProgressingCondition(obj client.Object, condType any, succeeded bool) metav1.Condition {
func generateFalseProgressingCondition(obj client.Object, condType any, reason string) metav1.Condition {
falseCond := generateFalseCondition(obj, condType)
reason := ""
switch condType {
case placementv1beta1.StagedUpdateRunConditionProgressing:
if succeeded {
reason = condition.UpdateRunSucceededReason
} else {
reason = condition.UpdateRunFailedReason
}
case placementv1beta1.StageUpdatingConditionProgressing:
if succeeded {
reason = condition.StageUpdatingSucceededReason
} else {
reason = condition.StageUpdatingFailedReason
}
}
falseCond.Reason = reason
return falseCond
}
54 changes: 34 additions & 20 deletions pkg/controllers/updaterun/execution.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,31 +285,45 @@ func (r *Reconciler) executeUpdatingStage(
}

if finishedClusterCount == len(updatingStageStatus.Clusters) {
// All the clusters in the stage have been updated.
markUpdateRunWaiting(updateRun, fmt.Sprintf(condition.UpdateRunWaitingMessageFmt, "after-stage", updatingStageStatus.StageName))
markStageUpdatingWaiting(updatingStageStatus, updateRun.GetGeneration(), "All clusters in the stage are updated, waiting for after-stage tasks to complete")
klog.V(2).InfoS("The stage has finished all cluster updating", "stage", updatingStageStatus.StageName, "updateRun", updateRunRef)
// Check if the after stage tasks are ready.
approved, waitTime, err := r.checkAfterStageTasksStatus(ctx, updatingStageIndex, updateRun)
if err != nil {
return 0, err
}
if approved {
markUpdateRunProgressing(updateRun)
markStageUpdatingSucceeded(updatingStageStatus, updateRun.GetGeneration())
// No need to wait to get to the next stage.
return 0, nil
}
// The after stage tasks are not ready yet.
if waitTime < 0 {
waitTime = stageUpdatingWaitTime
}
return waitTime, nil
return r.handleStageCompletion(ctx, updatingStageIndex, updateRun, updatingStageStatus)
}

// Some clusters are still updating.
return clusterUpdatingWaitTime, nil
}

// handleStageCompletion handles the completion logic when all clusters in a stage are finished.
// Returns the wait time and any error encountered.
func (r *Reconciler) handleStageCompletion(
ctx context.Context,
updatingStageIndex int,
updateRun placementv1beta1.UpdateRunObj,
updatingStageStatus *placementv1beta1.StageUpdatingStatus,
) (time.Duration, error) {
updateRunRef := klog.KObj(updateRun)

// All the clusters in the stage have been updated.
markUpdateRunWaiting(updateRun, fmt.Sprintf(condition.UpdateRunWaitingMessageFmt, "after-stage", updatingStageStatus.StageName))
markStageUpdatingWaiting(updatingStageStatus, updateRun.GetGeneration(), "All clusters in the stage are updated, waiting for after-stage tasks to complete")
klog.V(2).InfoS("The stage has finished all cluster updating", "stage", updatingStageStatus.StageName, "updateRun", updateRunRef)
// Check if the after stage tasks are ready.
approved, waitTime, err := r.checkAfterStageTasksStatus(ctx, updatingStageIndex, updateRun)
if err != nil {
return 0, err
}
if approved {
markUpdateRunProgressing(updateRun)
markStageUpdatingSucceeded(updatingStageStatus, updateRun.GetGeneration())
// No need to wait to get to the next stage.
return 0, nil
}
// The after stage tasks are not ready yet.
if waitTime < 0 {
waitTime = stageUpdatingWaitTime
}
return waitTime, nil
}

// executeDeleteStage executes the delete stage by deleting the bindings.
func (r *Reconciler) executeDeleteStage(
ctx context.Context,
Expand Down
Loading
Loading