Skip to content

Commit

Permalink
Create an alert when stale credentials are found
Browse files Browse the repository at this point in the history
  • Loading branch information
akhil-rane authored and openshift-cherrypick-robot committed Oct 20, 2021
1 parent c1cda68 commit 95eca1d
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,11 @@ spec:
for: 5m
labels:
severity: warning
- alert: CloudCredentialOperatorStaleCredentials
annotations:
message: 1 or more credentials requests are stale and should be deleted. Check the status.conditions on CredentialsRequest CRs to identify the stale one(s).
expr: cco_credentials_requests_conditions{condition="StaleCredentials"}
> 0
for: 5m
labels:
severity: warning
14 changes: 12 additions & 2 deletions pkg/operator/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ func newAccumulator(client client.Client, logger log.FieldLogger) *credRequestAc
for _, c := range credreqv1.FailureConditionTypes {
acc.crConditions[c] = 0
}
acc.crConditions[credreqv1.StaleCredentials] = 0

return acc
}
Expand All @@ -252,10 +253,19 @@ func (a *credRequestAccumulator) processCR(cr *credreqv1.CredentialsRequest, cco
a.podIdentityCredentials++
}

// Skip reporting conditions if CCO is disabled, as we shouldn't be alerting in that case.
// Skip reporting conditions if CCO is disabled, as we shouldn't be alerting in that case, except for stale credentials.
// condition. The stale credentials are removed by cleanup controller. But when CCO is disabled the only way to inform
// users to remove these credentials is through alerts.
if !ccoDisabled {
for _, cond := range cr.Status.Conditions {
if cond.Status == corev1.ConditionTrue {
// do not report stale credentials when CCO is enabled as it will be removed by cleanup controller.
if cond.Status == corev1.ConditionTrue && cond.Type != credreqv1.StaleCredentials {
a.crConditions[cond.Type]++
}
}
} else {
for _, cond := range cr.Status.Conditions {
if cond.Status == corev1.ConditionTrue && cond.Type == credreqv1.StaleCredentials {
a.crConditions[cond.Type]++
}
}
Expand Down
16 changes: 14 additions & 2 deletions pkg/operator/metrics/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ var (
Type: credreqv1.InsufficientCloudCredentials,
Status: corev1.ConditionTrue,
}

staleCredsCond = credreqv1.CredentialsRequestCondition{
Type: credreqv1.StaleCredentials,
Status: corev1.ConditionTrue,
}
)

func TestSecretGetter(t *testing.T) {
Expand Down Expand Up @@ -165,6 +170,8 @@ func TestCredentialsRequests(t *testing.T) {
}(),
// insufficient cloud creds condition
testCredReqWithConditions(testAWSCredRequest("ainsufficientcreds"), []credreqv1.CredentialsRequestCondition{insufficientCredsCond}),
// stale creds condition
testCredReqWithConditions(testAWSCredRequest("astalecreds"), []credreqv1.CredentialsRequestCondition{staleCredsCond}),

// regular GCP credreq
testGCPCredRequest("gregular"),
Expand All @@ -174,14 +181,15 @@ func TestCredentialsRequests(t *testing.T) {
},
validate: func(t *testing.T, accumulator *credRequestAccumulator) {
// total cred requests
assert.Equal(t, 5, accumulator.crTotals["aws"])
assert.Equal(t, 6, accumulator.crTotals["aws"])
assert.Equal(t, 2, accumulator.crTotals["gcp"])

// conditions
assert.Equal(t, 1, accumulator.crConditions[credreqv1.MissingTargetNamespace])
assert.Equal(t, 1, accumulator.crConditions[credreqv1.CredentialsProvisionFailure])
assert.Equal(t, 1, accumulator.crConditions[credreqv1.Ignored])
assert.Equal(t, 1, accumulator.crConditions[credreqv1.InsufficientCloudCredentials])
assert.Equal(t, 0, accumulator.crConditions[credreqv1.StaleCredentials])
},
},
{
Expand All @@ -194,19 +202,23 @@ func TestCredentialsRequests(t *testing.T) {
testCredReqWithConditions(testAWSCredRequest("aprovisionfailed"), []credreqv1.CredentialsRequestCondition{provisionFailedCond}),
// insufficient cloud creds condition
testCredReqWithConditions(testAWSCredRequest("ainsufficientcreds"), []credreqv1.CredentialsRequestCondition{insufficientCredsCond}),
// stale creds condition
testCredReqWithConditions(testAWSCredRequest("astalecreds"), []credreqv1.CredentialsRequestCondition{staleCredsCond}),

// GCP credreq with condition set
testCredReqWithConditions(testGCPCredRequest("gignored"), []credreqv1.CredentialsRequestCondition{ignoredCond}),
},
validate: func(t *testing.T, accumulator *credRequestAccumulator) {
// total cred requests
assert.Equal(t, 3, accumulator.crTotals["aws"])
assert.Equal(t, 4, accumulator.crTotals["aws"])
assert.Equal(t, 1, accumulator.crTotals["gcp"])

// failure conditions should all be zero as CCO is disabled
for _, cond := range credreqv1.FailureConditionTypes {
assert.Equal(t, 0, accumulator.crConditions[cond])
}
// stale conditions should be reported when cco is disabled
assert.Equal(t, 1, accumulator.crConditions[credreqv1.StaleCredentials])
},
},
{
Expand Down

0 comments on commit 95eca1d

Please sign in to comment.