Skip to content

Commit

Permalink
Merge pull request #28543 from DennisPeriquet/trt-1452-namespaced-bac…
Browse files Browse the repository at this point in the history
…koffs

TRT-1452: Separate excessive Back-off restarting tests via namespace
  • Loading branch information
openshift-merge-bot[bot] committed Feb 1, 2024
2 parents 4e368eb + 44bf785 commit f533902
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 9 deletions.
Expand Up @@ -5,6 +5,7 @@ import (
"strings"

"github.com/openshift/origin/pkg/monitor/monitorapi"
"github.com/openshift/origin/pkg/monitortestlibrary/platformidentification"
"github.com/openshift/origin/pkg/test/ginkgo/junitapi"
)

Expand Down Expand Up @@ -60,6 +61,56 @@ func (s *singleEventThresholdCheck) Test(events monitorapi.Intervals) []*junitap
return []*junitapi.JUnitTestCase{success}
}

// getNamespacedFailuresAndFlakes returns a map that maps namespaces to failures and flakes
// found in the intervals. Namespaces without failures or flakes are not in the map.
func (s *singleEventThresholdCheck) getNamespacedFailuresAndFlakes(events monitorapi.Intervals) map[string]*eventResult {
nsResults := map[string]*eventResult{}

for _, e := range events {
namespace := e.StructuredLocator.Keys[monitorapi.LocatorNamespaceKey]

// We only create junit for known namespaces
if !platformidentification.KnownNamespaces.Has(namespace) {
namespace = ""
}

var failPresent, flakePresent bool
if s.matcher.Allows(e, "") {
msg := fmt.Sprintf("%s - %s", e.Locator, e.StructuredMessage.HumanMessage)
times := GetTimesAnEventHappened(e.StructuredMessage)

failPresent = false
flakePresent = false
switch {
case s.failThreshold > 0 && times > s.failThreshold:
failPresent = true
case times > s.flakeThreshold:
flakePresent = true
}
if failPresent || flakePresent {
if _, ok := nsResults[namespace]; !ok {
tmp := &eventResult{}
nsResults[namespace] = tmp
}
if failPresent {
nsResults[namespace].failures = append(nsResults[namespace].failures, fmt.Sprintf("event [%s] happened %d times", msg, times))
}
if flakePresent {
nsResults[namespace].flakes = append(nsResults[namespace].flakes, fmt.Sprintf("event [%s] happened %d times", msg, times))
}
}
}
}
return nsResults
}

// NamespacedTest is is similar to Test() except it creates junits per namespace.
func (s *singleEventThresholdCheck) NamespacedTest(events monitorapi.Intervals) []*junitapi.JUnitTestCase {

nsResults := s.getNamespacedFailuresAndFlakes(events)
return generateJUnitTestCasesCoreNamespaces(s.testName, nsResults)
}

func NewSingleEventThresholdCheck(testName string, matcher *SimplePathologicalEventMatcher, failThreshold, flakeThreshold int) *singleEventThresholdCheck {
return &singleEventThresholdCheck{
testName: testName,
Expand Down
@@ -0,0 +1,81 @@
package pathologicaleventlibrary

import (
"testing"

"github.com/openshift/origin/pkg/monitor/monitorapi"
"github.com/stretchr/testify/assert"
)

func Test_singleEventThresholdCheck_getNamespacedFailuresAndFlakes(t *testing.T) {
namespace := "openshift-etcd-operator"
samplePod := "etcd-operator-6f9b4d9d4f-4q9q8"

testName := "[sig-cluster-lifecycle] pathological event should not see excessive Back-off restarting failed containers"
backoffMatcher := NewSingleEventThresholdCheck(testName, AllowBackOffRestartingFailedContainer,
DuplicateEventThreshold, BackoffRestartingFlakeThreshold)
type fields struct {
testName string
matcher *SimplePathologicalEventMatcher
failThreshold int
flakeThreshold int
}
type args struct {
events monitorapi.Intervals
}
tests := []struct {
name string
fields fields
args args
expectedKeyCount int
}{
{
name: "Successful test yields no keys",
fields: fields{
testName: testName,
matcher: backoffMatcher.matcher,
failThreshold: DuplicateEventThreshold,
flakeThreshold: BackoffRestartingFlakeThreshold,
},
args: args{
events: monitorapi.Intervals{
BuildTestDupeKubeEvent(namespace, samplePod,
"BackOff",
"Back-off restarting failed container",
5),
},
},
expectedKeyCount: 0,
},
{
name: "Failing test yields one key",
fields: fields{
testName: testName,
matcher: backoffMatcher.matcher,
failThreshold: DuplicateEventThreshold,
flakeThreshold: BackoffRestartingFlakeThreshold,
},
args: args{
events: monitorapi.Intervals{
BuildTestDupeKubeEvent(namespace, samplePod,
"BackOff",
"Back-off restarting failed container",
21),
},
},
expectedKeyCount: 1,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
s := &singleEventThresholdCheck{
testName: tt.fields.testName,
matcher: tt.fields.matcher,
failThreshold: tt.fields.failThreshold,
flakeThreshold: tt.fields.flakeThreshold,
}
got := s.getNamespacedFailuresAndFlakes(tt.args.events)
assert.Equal(t, tt.expectedKeyCount, len(got))
})
}
}
Expand Up @@ -60,7 +60,7 @@ func testBackoffStartingFailedContainer(events monitorapi.Intervals) []*junitapi

return pathologicaleventlibrary.NewSingleEventThresholdCheck(testName, pathologicaleventlibrary.AllowBackOffRestartingFailedContainer,
pathologicaleventlibrary.DuplicateEventThreshold, pathologicaleventlibrary.BackoffRestartingFlakeThreshold).
Test(events.Filter(monitorapi.Not(monitorapi.IsInE2ENamespace)))
NamespacedTest(events.Filter(monitorapi.Not(monitorapi.IsInE2ENamespace)))
}

func testConfigOperatorReadinessProbe(events monitorapi.Intervals) []*junitapi.JUnitTestCase {
Expand Down
@@ -1,10 +1,12 @@
package legacynodemonitortests

import (
"strings"
"testing"

"github.com/openshift/origin/pkg/monitor/monitorapi"
"github.com/openshift/origin/pkg/monitortestlibrary/pathologicaleventlibrary"
"github.com/openshift/origin/pkg/test/ginkgo/junitapi"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
Expand Down Expand Up @@ -74,30 +76,33 @@ func Test_testBackoffPullingRegistryRedhatImage(t *testing.T) {
}

func Test_testBackoffStartingFailedContainer(t *testing.T) {
namespace := "openshift-etcd-operator"
samplePod := "etcd-operator-6f9b4d9d4f-4q9q8"

tests := []struct {
name string
interval monitorapi.Interval
kind string
}{
{
name: "Test pass case",
interval: pathologicaleventlibrary.BuildTestDupeKubeEvent("", "",
interval: pathologicaleventlibrary.BuildTestDupeKubeEvent(namespace, samplePod,
"BackOff",
"Back-off restarting failed container",
5),
kind: "pass",
},
{
name: "Test failure case",
interval: pathologicaleventlibrary.BuildTestDupeKubeEvent("", "",
interval: pathologicaleventlibrary.BuildTestDupeKubeEvent(namespace, samplePod,
"BackOff",
"Back-off restarting failed container",
56),
kind: "fail",
},
{
name: "Test flake case",
interval: pathologicaleventlibrary.BuildTestDupeKubeEvent("", "",
interval: pathologicaleventlibrary.BuildTestDupeKubeEvent(namespace, samplePod,
"BackOff",
"Back-off restarting failed container",
11),
Expand All @@ -108,23 +113,34 @@ func Test_testBackoffStartingFailedContainer(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
e := monitorapi.Intervals{tt.interval}
junits := testBackoffStartingFailedContainer(e)

// Find the junit with the namespace of openshift-etcd-operator int the testname
var testJunits []*junitapi.JUnitTestCase
for _, j := range junits {
if strings.Contains(j.Name, namespace) {
testJunits = append(testJunits, j)
}
}
if len(testJunits) == 0 {
t.Errorf("We should have at least one junit test for namespace openshift-etcd-operator")
}
switch tt.kind {
case "pass":
if len(junits) != 1 {
if len(testJunits) != 1 {
t.Errorf("This should've been a single passing Test, but got %d tests", len(junits))
}
if len(junits[0].SystemOut) != 0 {
if testJunits[0].FailureOutput != nil && len(testJunits[0].FailureOutput.Output) != 0 {
t.Errorf("This should've been a pass, but got %s", junits[0].SystemErr)
}
case "fail":
if len(junits) != 1 {
if len(testJunits) != 1 {
t.Errorf("This should've been a single failing Test, but got %d tests", len(junits))
}
if len(junits[0].SystemOut) == 0 {
if testJunits[0].FailureOutput != nil && len(testJunits[0].FailureOutput.Output) == 0 {
t.Error("This should've been a failure but got no output")
}
case "flake":
if len(junits) != 2 {
if len(testJunits) != 2 {
t.Errorf("This should've been a two tests as flake, but got %d tests", len(junits))
}
default:
Expand Down

0 comments on commit f533902

Please sign in to comment.