-
Notifications
You must be signed in to change notification settings - Fork 399
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
MCO: clear out failing status on success and add tests #442
Changes from all commits
5b1ee9f
146ea72
e1c97fd
2a582a9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,13 +28,12 @@ func (optr *Operator) syncAvailableStatus() error { | |
} | ||
|
||
optrVersion, _ := optr.vStore.Get("operator") | ||
progressing := cov1helpers.IsStatusConditionTrue(co.Status.Conditions, configv1.OperatorProgressing) | ||
failing := cov1helpers.IsStatusConditionTrue(co.Status.Conditions, configv1.OperatorFailing) | ||
message := fmt.Sprintf("Cluster has deployed %s", optrVersion) | ||
|
||
available := configv1.ConditionTrue | ||
|
||
if failing && !progressing { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm, so failing now always means !available? OK, that looks like what the CVO does as well I think. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. my understanding for the MCO is that if we fail the sync during a progressing, we could have e.g. applied a new MCO or something else that can likely misbehave if at some point we failed and thus no reason to report available. @abhinavdahiya what do you think? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so, when looking at https://github.com/openshift/cluster-version-operator/blob/master/docs/dev/clusteroperator.md#conditions, there's an example:
I'm not sure how to read that, if any of the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually, maybe that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This patch accounts for that case though and the test changed accounts for my bad assumption (all other tests are fine anyway with this): diff --git a/pkg/operator/status.go b/pkg/operator/status.go
index b71370e..3ce2262 100644
--- a/pkg/operator/status.go
+++ b/pkg/operator/status.go
@@ -29,11 +29,12 @@ func (optr *Operator) syncAvailableStatus() error {
optrVersion, _ := optr.vStore.Get("operator")
failing := cov1helpers.IsStatusConditionTrue(co.Status.Conditions, configv1.OperatorFailing)
+ progressing := cov1helpers.IsStatusConditionTrue(co.Status.Conditions, configv1.OperatorProgressing)
message := fmt.Sprintf("Cluster has deployed %s", optrVersion)
available := configv1.ConditionTrue
- if failing {
+ if (failing && !progressing) || (failing && optr.inClusterBringup) {
available = configv1.ConditionFalse
message = fmt.Sprintf("Cluster not available for %s", optrVersion)
}
diff --git a/pkg/operator/status_test.go b/pkg/operator/status_test.go
index 1437769..9b24c97 100644
--- a/pkg/operator/status_test.go
+++ b/pkg/operator/status_test.go
@@ -350,8 +350,7 @@ func TestOperatorSyncStatus(t *testing.T) {
},
},
},
- // 3. test that if progressing fails, we report available=false because state of the operator
- // might have changed in the various sync calls
+ // 3. test that if progressing fails, we report available=true for the current version
{
syncs: []syncCase{
{
@@ -390,7 +389,7 @@ func TestOperatorSyncStatus(t *testing.T) {
},
{
Type: configv1.OperatorAvailable,
- Status: configv1.ConditionFalse,
+ Status: configv1.ConditionTrue,
},
{
Type: configv1.OperatorFailing,
@@ -405,6 +404,29 @@ func TestOperatorSyncStatus(t *testing.T) {
},
},
},
+ {
+ // we mock the fact that we are at operator=test-version-2 after the previous sync
+ cond: []configv1.ClusterOperatorStatusCondition{
+ {
+ Type: configv1.OperatorProgressing,
+ Status: configv1.ConditionFalse,
+ },
+ {
+ Type: configv1.OperatorAvailable,
+ Status: configv1.ConditionTrue,
+ },
+ {
+ Type: configv1.OperatorFailing,
+ Status: configv1.ConditionFalse,
+ },
+ },
+ syncFuncs: []syncFunc{
+ {
+ name: "fn1",
+ fn: func(config renderConfig) error { return nil },
+ },
+ },
+ },
},
},
// 4. test that if progressing fails during bringup, we still report failing and not available
@@ -601,4 +623,4 @@ func TestInClusterBringUpStayOnErr(t *testing.T) {
assert.Nil(t, err, "expected syncAll to pass")
assert.False(t, optr.inClusterBringup)
-}
\ No newline at end of file
+} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the patch above effectively enables the mco to report available=true, progressing=true, failing=true if during a progressing we get a fail, but the mco is still available (assumption from cvo doc) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. opened #450 to further discuss this. |
||
if failing { | ||
available = configv1.ConditionFalse | ||
message = fmt.Sprintf("Cluster not available for %s", optrVersion) | ||
} | ||
|
@@ -47,7 +46,7 @@ func (optr *Operator) syncAvailableStatus() error { | |
|
||
co.Status.Versions = optr.vStore.GetAll() | ||
optr.setMachineConfigPoolStatuses(&co.Status) | ||
_, err = optr.configClient.ConfigV1().ClusterOperators().UpdateStatus(co) | ||
_, err = optr.configClient.UpdateStatus(co) | ||
return err | ||
} | ||
|
||
|
@@ -67,6 +66,7 @@ func (optr *Operator) syncProgressingStatus() error { | |
|
||
if optr.vStore.Equal(co.Status.Versions) { | ||
if optr.inClusterBringup { | ||
message = fmt.Sprintf("Cluster is bootstrapping %s", optrVersion) | ||
progressing = configv1.ConditionTrue | ||
} | ||
} else { | ||
|
@@ -80,15 +80,12 @@ func (optr *Operator) syncProgressingStatus() error { | |
}) | ||
|
||
optr.setMachineConfigPoolStatuses(&co.Status) | ||
_, err = optr.configClient.ConfigV1().ClusterOperators().UpdateStatus(co) | ||
_, err = optr.configClient.UpdateStatus(co) | ||
return err | ||
} | ||
|
||
// syncFailingStatus applies the new condition to the mco's ClusterOperator object. | ||
func (optr *Operator) syncFailingStatus(ierr error) error { | ||
if ierr == nil { | ||
return nil | ||
} | ||
func (optr *Operator) syncFailingStatus(ierr error) (err error) { | ||
co, err := optr.fetchClusterOperator() | ||
if err != nil { | ||
return err | ||
|
@@ -98,34 +95,40 @@ func (optr *Operator) syncFailingStatus(ierr error) error { | |
} | ||
|
||
optrVersion, _ := optr.vStore.Get("operator") | ||
var message string | ||
if optr.vStore.Equal(co.Status.Versions) { | ||
// syncing the state to exiting version. | ||
message = fmt.Sprintf("Failed to resync %s because: %v", optrVersion, ierr.Error()) | ||
failing := configv1.ConditionTrue | ||
var message, reason string | ||
if ierr == nil { | ||
failing = configv1.ConditionFalse | ||
} else { | ||
message = fmt.Sprintf("Unable to apply %s: %v", optrVersion, ierr.Error()) | ||
if optr.vStore.Equal(co.Status.Versions) { | ||
// syncing the state to exiting version. | ||
message = fmt.Sprintf("Failed to resync %s because: %v", optrVersion, ierr.Error()) | ||
} else { | ||
message = fmt.Sprintf("Unable to apply %s: %v", optrVersion, ierr.Error()) | ||
} | ||
reason = ierr.Error() | ||
|
||
// set progressing | ||
if cov1helpers.IsStatusConditionTrue(co.Status.Conditions, configv1.OperatorProgressing) { | ||
cov1helpers.SetStatusCondition(&co.Status.Conditions, configv1.ClusterOperatorStatusCondition{Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Message: fmt.Sprintf("Unable to apply %s", version.Version.String())}) | ||
} else { | ||
cov1helpers.SetStatusCondition(&co.Status.Conditions, configv1.ClusterOperatorStatusCondition{Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: fmt.Sprintf("Error while reconciling %s", version.Version.String())}) | ||
} | ||
} | ||
// set failing condition | ||
cov1helpers.SetStatusCondition(&co.Status.Conditions, configv1.ClusterOperatorStatusCondition{ | ||
Type: configv1.OperatorFailing, Status: configv1.ConditionTrue, | ||
Type: configv1.OperatorFailing, Status: failing, | ||
Message: message, | ||
Reason: ierr.Error(), | ||
Reason: reason, | ||
}) | ||
|
||
// set progressing | ||
if cov1helpers.IsStatusConditionTrue(co.Status.Conditions, configv1.OperatorProgressing) { | ||
cov1helpers.SetStatusCondition(&co.Status.Conditions, configv1.ClusterOperatorStatusCondition{Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Message: fmt.Sprintf("Unable to apply %s", version.Version.String())}) | ||
} else { | ||
cov1helpers.SetStatusCondition(&co.Status.Conditions, configv1.ClusterOperatorStatusCondition{Type: configv1.OperatorProgressing, Status: configv1.ConditionFalse, Message: fmt.Sprintf("Error while reconciling %s", version.Version.String())}) | ||
} | ||
|
||
optr.setMachineConfigPoolStatuses(&co.Status) | ||
_, err = optr.configClient.ConfigV1().ClusterOperators().UpdateStatus(co) | ||
_, err = optr.configClient.UpdateStatus(co) | ||
return err | ||
} | ||
|
||
func (optr *Operator) fetchClusterOperator() (*configv1.ClusterOperator, error) { | ||
co, err := optr.configClient.ConfigV1().ClusterOperators().Get(optr.name, metav1.GetOptions{}) | ||
co, err := optr.configClient.Get(optr.name, metav1.GetOptions{}) | ||
if meta.IsNoMatchError(err) { | ||
return nil, nil | ||
} | ||
|
@@ -139,7 +142,7 @@ func (optr *Operator) fetchClusterOperator() (*configv1.ClusterOperator, error) | |
} | ||
|
||
func (optr *Operator) initializeClusterOperator() (*configv1.ClusterOperator, error) { | ||
co, err := optr.configClient.ConfigV1().ClusterOperators().Create(&configv1.ClusterOperator{ | ||
co, err := optr.configClient.Create(&configv1.ClusterOperator{ | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Name: optr.name, | ||
}, | ||
|
@@ -154,7 +157,7 @@ func (optr *Operator) initializeClusterOperator() (*configv1.ClusterOperator, er | |
co.Status.RelatedObjects = []configv1.ObjectReference{ | ||
{Resource: "namespaces", Name: "openshift-machine-config-operator"}, | ||
} | ||
return optr.configClient.ConfigV1().ClusterOperators().UpdateStatus(co) | ||
return optr.configClient.UpdateStatus(co) | ||
} | ||
|
||
func (optr *Operator) setMachineConfigPoolStatuses(status *configv1.ClusterOperatorStatus) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is important for us to be able to control what we use in the MCO itself and allow us to create meaningful mocks for proper unit tests.