Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions pkg/cvo/internal/operatorstatus.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,14 +201,20 @@ func checkOperatorHealth(ctx context.Context, client ClusterOperatorsGetter, exp
}
}

// during initialization we allow degraded
if degraded && mode != resourcebuilder.InitializingMode {
if degraded {
if degradedCondition != nil && len(degradedCondition.Message) > 0 {
nestedMessage = fmt.Errorf("cluster operator %s is %s=%s: %s, %s", actual.Name, degradedCondition.Type, degradedCondition.Status, degradedCondition.Reason, degradedCondition.Message)
}
var updateEffect payload.UpdateEffectType

if mode == resourcebuilder.InitializingMode {
updateEffect = payload.UpdateEffectReport
} else {
updateEffect = payload.UpdateEffectFailAfterInterval
}
return &payload.UpdateError{
Nested: nestedMessage,
UpdateEffect: payload.UpdateEffectFailAfterInterval,
UpdateEffect: updateEffect,
Reason: "ClusterOperatorDegraded",
PluralReason: "ClusterOperatorsDegraded",
Message: fmt.Sprintf("Cluster operator %s is degraded", actual.Name),
Expand Down
73 changes: 73 additions & 0 deletions pkg/cvo/internal/operatorstatus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,42 @@ func Test_checkOperatorHealth(t *testing.T) {
PluralMessageFormat: "Cluster operators %s are degraded",
Name: "test-co",
},
}, {
name: "cluster operator reporting available=true degraded=true in InitializingMode",
actual: &configv1.ClusterOperator{
ObjectMeta: metav1.ObjectMeta{Name: "test-co"},
Status: configv1.ClusterOperatorStatus{
Versions: []configv1.OperandVersion{{
Name: "operator", Version: "v1",
}, {
Name: "operand-1", Version: "v1",
}},
Conditions: []configv1.ClusterOperatorStatusCondition{
{Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue},
{Type: configv1.OperatorDegraded, Status: configv1.ConditionTrue, Reason: "RandomReason", Message: "random error"},
},
},
},
mode: resourcebuilder.InitializingMode,
exp: &configv1.ClusterOperator{
ObjectMeta: metav1.ObjectMeta{Name: "test-co"},
Status: configv1.ClusterOperatorStatus{
Versions: []configv1.OperandVersion{{
Name: "operator", Version: "v1",
}, {
Name: "operand-1", Version: "v1",
}},
},
},
expErr: &payload.UpdateError{
Nested: fmt.Errorf("cluster operator test-co is Degraded=True: RandomReason, random error"),
UpdateEffect: payload.UpdateEffectReport,
Reason: "ClusterOperatorDegraded",
PluralReason: "ClusterOperatorsDegraded",
Message: "Cluster operator test-co is degraded",
PluralMessageFormat: "Cluster operators %s are degraded",
Name: "test-co",
},
}, {
name: "cluster operator reporting available=true progressing=true degraded=true",
actual: &configv1.ClusterOperator{
Expand Down Expand Up @@ -475,6 +511,43 @@ func Test_checkOperatorHealth(t *testing.T) {
PluralMessageFormat: "Cluster operators %s are degraded",
Name: "test-co",
},
}, {
name: "cluster operator reporting available=true progressing=true degraded=true in InitializingMode",
actual: &configv1.ClusterOperator{
ObjectMeta: metav1.ObjectMeta{Name: "test-co"},
Status: configv1.ClusterOperatorStatus{
Versions: []configv1.OperandVersion{{
Name: "operator", Version: "v1",
}, {
Name: "operand-1", Version: "v1",
}},
Conditions: []configv1.ClusterOperatorStatusCondition{
{Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue},
{Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue},
{Type: configv1.OperatorDegraded, Status: configv1.ConditionTrue, Reason: "RandomReason", Message: "random error"},
},
},
},
mode: resourcebuilder.InitializingMode,
exp: &configv1.ClusterOperator{
ObjectMeta: metav1.ObjectMeta{Name: "test-co"},
Status: configv1.ClusterOperatorStatus{
Versions: []configv1.OperandVersion{{
Name: "operator", Version: "v1",
}, {
Name: "operand-1", Version: "v1",
}},
},
},
expErr: &payload.UpdateError{
Nested: fmt.Errorf("cluster operator test-co is Degraded=True: RandomReason, random error"),
UpdateEffect: payload.UpdateEffectReport,
Reason: "ClusterOperatorDegraded",
PluralReason: "ClusterOperatorsDegraded",
Message: "Cluster operator test-co is degraded",
PluralMessageFormat: "Cluster operators %s are degraded",
Name: "test-co",
},
}, {
name: "cluster operator reporting available=true no progressing or degraded",
actual: &configv1.ClusterOperator{
Expand Down
7 changes: 5 additions & 2 deletions pkg/cvo/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -430,8 +430,9 @@ func setDesiredReleaseAcceptedCondition(config *configv1.ClusterVersion, status
// convertErrorToProgressing returns true if the provided status indicates a failure condition can be interpreted as
// still making internal progress. The general error we try to suppress is an operator or operators still being
// progressing AND the general payload task making progress towards its goal. The error's UpdateEffect determines
// whether an error should be considered a failure and, if so, whether the operator should be given up to 40 minutes
// to recover from the error.
// how an update error is interpreted. An error may simply need to be reported but does not indicate the update is
// failing. An error may indicate the update is failing or that if the error continues for a defined interval the
// update is failing.
func convertErrorToProgressing(history []configv1.UpdateHistory, now time.Time, status *SyncWorkerStatus) (reason string, message string, ok bool) {
if len(history) == 0 || status.Failure == nil || status.Reconciling {
return "", "", false
Expand All @@ -441,6 +442,8 @@ func convertErrorToProgressing(history []configv1.UpdateHistory, now time.Time,
return "", "", false
}
switch uErr.UpdateEffect {
case payload.UpdateEffectReport:
return uErr.Reason, uErr.Error(), false
case payload.UpdateEffectNone:
return uErr.Reason, fmt.Sprintf("waiting on %s", uErr.Name), true
case payload.UpdateEffectFail:
Expand Down
21 changes: 16 additions & 5 deletions pkg/cvo/sync_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -920,6 +920,8 @@ func (w *SyncWorker) apply(ctx context.Context, work *SyncWork, maxWorkers int,
}
capabilities := capability.GetCapabilitiesStatus(work.Capabilities)

var reportEffectErrors []error

// update each object
errs := payload.RunGraph(ctx, graph, maxWorkers, func(ctx context.Context, tasks []*payload.Task) error {
// in specific modes, attempt to precreate a set of known types (currently ClusterOperator) without
Expand Down Expand Up @@ -967,7 +969,12 @@ func (w *SyncWorker) apply(ctx context.Context, work *SyncWork, maxWorkers int,
continue
}
if err := task.Run(ctx, payloadUpdate.Release.Version, w.builder, work.State); err != nil {
return err
if uErr, ok := err.(*payload.UpdateError); ok && uErr.UpdateEffect == payload.UpdateEffectReport {
// do not fail the task on this manifest, just record it for later complaining
reportEffectErrors = append(reportEffectErrors, err)
} else {
return err
}
}
cr.Inc()
klog.V(2).Infof("Done syncing for %s", task)
Expand All @@ -983,7 +990,7 @@ func (w *SyncWorker) apply(ctx context.Context, work *SyncWork, maxWorkers int,

// update the status
cr.Complete()
return nil
return apierrors.NewAggregate(reportEffectErrors)
}

var (
Expand Down Expand Up @@ -1173,21 +1180,25 @@ func condenseClusterOperators(errs []error) []error {
}
nested := make([]error, 0, len(reasonErrors))
names := make([]string, 0, len(reasonErrors))
updateEffect := payload.UpdateEffectNone
updateEffect := payload.UpdateEffectReport
for _, err := range reasonErrors {
nested = append(nested, err)
if len(err.Name) > 0 {
names = append(names, err.Name)
}

switch err.UpdateEffect {
case payload.UpdateEffectReport:
case payload.UpdateEffectNone:
case payload.UpdateEffectFail:
updateEffect = payload.UpdateEffectFail
if updateEffect == payload.UpdateEffectReport {
updateEffect = payload.UpdateEffectNone
}
case payload.UpdateEffectFailAfterInterval:
if updateEffect != payload.UpdateEffectFail {
updateEffect = payload.UpdateEffectFailAfterInterval
}
case payload.UpdateEffectFail:
updateEffect = payload.UpdateEffectFail
}
}
sort.Strings(names)
Expand Down
4 changes: 4 additions & 0 deletions pkg/payload/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,10 @@ func (st *Task) Run(ctx context.Context, version string, builder ResourceBuilder
type UpdateEffectType string

const (
// UpdateEffectReport defines an error that requires reporting but does not
// block reconciliation from completing.
UpdateEffectReport UpdateEffectType = "Report"

// UpdateEffectNone defines an error as having no affect on the update state.
UpdateEffectNone UpdateEffectType = "None"

Expand Down