Separate excessive Back-off restarting tests via namespace

openshift · Jan 25, 2024 · 002374f · 002374f
1 parent 39182cb
commit 002374f
Show file tree

Hide file tree

Showing 4 changed files with 71 additions and 18 deletions.
diff --git a/pkg/monitortestlibrary/pathologicaleventlibrary/duplicated_events.go b/pkg/monitortestlibrary/pathologicaleventlibrary/duplicated_events.go
@@ -141,16 +141,20 @@ func generateJUnitTestCasesCoreNamespaces(testName string, nsResults map[string]
 	for namespace := range namespaces {
 		jUnitName := getJUnitName(testName, namespace)
 		if result, ok := nsResults[namespace]; ok {
-			output := generateFailureOutput(result.failures, result.flakes)
-			tests = append(tests, &junitapi.JUnitTestCase{
-				Name: jUnitName,
-				FailureOutput: &junitapi.FailureOutput{
-					Output: output,
-				},
-			})
-			// Add a success for flakes
-			if len(result.failures) == 0 && len(result.flakes) > 0 {
+			if len(result.failures) == 0 && len(result.flakes) == 0 {
 				tests = append(tests, &junitapi.JUnitTestCase{Name: jUnitName})
+			} else {
+				output := generateFailureOutput(result.failures, result.flakes)
+				tests = append(tests, &junitapi.JUnitTestCase{
+					Name: jUnitName,
+					FailureOutput: &junitapi.FailureOutput{
+						Output: output,
+					},
+				})
+				// Add a success for flakes
+				if len(result.failures) == 0 && len(result.flakes) > 0 {
+					tests = append(tests, &junitapi.JUnitTestCase{Name: jUnitName})
+				}
 			}
 		} else {
 			tests = append(tests, &junitapi.JUnitTestCase{Name: jUnitName})

diff --git a/pkg/monitortestlibrary/pathologicaleventlibrary/duplicated_events_special.go b/pkg/monitortestlibrary/pathologicaleventlibrary/duplicated_events_special.go
@@ -5,6 +5,7 @@ import (
 	"strings"
 
 	"github.com/openshift/origin/pkg/monitor/monitorapi"
+	"github.com/openshift/origin/pkg/monitortestlibrary/platformidentification"
 	"github.com/openshift/origin/pkg/test/ginkgo/junitapi"
 )
 
@@ -60,6 +61,38 @@ func (s *singleEventThresholdCheck) Test(events monitorapi.Intervals) []*junitap
 	return []*junitapi.JUnitTestCase{success}
 }
 
+// NamespacedTest is just like Test() except it creates junits per namespace.
+func (s *singleEventThresholdCheck) NamespacedTest(events monitorapi.Intervals) []*junitapi.JUnitTestCase {
+	nsResults := map[string]*eventResult{}
+
+	for _, e := range events {
+		namespace := e.StructuredLocator.Keys[monitorapi.LocatorNamespaceKey]
+
+		// We only create junit for known namespaces
+		if !platformidentification.KnownNamespaces.Has(namespace) {
+			namespace = ""
+		}
+
+		if _, ok := nsResults[namespace]; !ok {
+			tmp := &eventResult{}
+			nsResults[namespace] = tmp
+		}
+
+		if s.matcher.Allows(e, "") {
+			msg := fmt.Sprintf("%s - %s", e.Locator, e.StructuredMessage.HumanMessage)
+			times := GetTimesAnEventHappened(e.StructuredMessage)
+			switch {
+			case s.failThreshold > 0 && times > s.failThreshold:
+				nsResults[namespace].failures = append(nsResults[namespace].failures, fmt.Sprintf("event [%s] happened %d times", msg, times))
+			case times > s.flakeThreshold:
+				nsResults[namespace].flakes = append(nsResults[namespace].flakes, fmt.Sprintf("event [%s] happened %d times", msg, times))
+			}
+		}
+	}
+
+	return generateJUnitTestCasesCoreNamespaces(s.testName, nsResults)
+}
+
 func NewSingleEventThresholdCheck(testName string, matcher *SimplePathologicalEventMatcher, failThreshold, flakeThreshold int) *singleEventThresholdCheck {
 	return &singleEventThresholdCheck{
 		testName:       testName,

diff --git a/pkg/monitortests/node/legacynodemonitortests/pathological_events.go b/pkg/monitortests/node/legacynodemonitortests/pathological_events.go
@@ -60,7 +60,7 @@ func testBackoffStartingFailedContainer(events monitorapi.Intervals) []*junitapi
 
 	return pathologicaleventlibrary.NewSingleEventThresholdCheck(testName, pathologicaleventlibrary.AllowBackOffRestartingFailedContainer,
 		pathologicaleventlibrary.DuplicateEventThreshold, pathologicaleventlibrary.BackoffRestartingFlakeThreshold).
-		Test(events.Filter(monitorapi.Not(monitorapi.IsInE2ENamespace)))
+		NamespacedTest(events.Filter(monitorapi.Not(monitorapi.IsInE2ENamespace)))
 }
 
 func testConfigOperatorReadinessProbe(events monitorapi.Intervals) []*junitapi.JUnitTestCase {

diff --git a/pkg/monitortests/node/legacynodemonitortests/pathological_events_test.go b/pkg/monitortests/node/legacynodemonitortests/pathological_events_test.go
@@ -1,10 +1,12 @@
 package legacynodemonitortests
 
 import (
+	"strings"
 	"testing"
 
 	"github.com/openshift/origin/pkg/monitor/monitorapi"
 	"github.com/openshift/origin/pkg/monitortestlibrary/pathologicaleventlibrary"
+	"github.com/openshift/origin/pkg/test/ginkgo/junitapi"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
@@ -74,30 +76,33 @@ func Test_testBackoffPullingRegistryRedhatImage(t *testing.T) {
 }
 
 func Test_testBackoffStartingFailedContainer(t *testing.T) {
+	namespace := "openshift-etcd-operator"
+	samplePod := "etcd-operator-6f9b4d9d4f-4q9q8"
+
 	tests := []struct {
 		name     string
 		interval monitorapi.Interval
 		kind     string
 	}{
 		{
 			name: "Test pass case",
-			interval: pathologicaleventlibrary.BuildTestDupeKubeEvent("", "",
+			interval: pathologicaleventlibrary.BuildTestDupeKubeEvent(namespace, samplePod,
 				"BackOff",
 				"Back-off restarting failed container",
 				5),
 			kind: "pass",
 		},
 		{
 			name: "Test failure case",
-			interval: pathologicaleventlibrary.BuildTestDupeKubeEvent("", "",
+			interval: pathologicaleventlibrary.BuildTestDupeKubeEvent(namespace, samplePod,
 				"BackOff",
 				"Back-off restarting failed container",
 				56),
 			kind: "fail",
 		},
 		{
 			name: "Test flake case",
-			interval: pathologicaleventlibrary.BuildTestDupeKubeEvent("", "",
+			interval: pathologicaleventlibrary.BuildTestDupeKubeEvent(namespace, samplePod,
 				"BackOff",
 				"Back-off restarting failed container",
 				11),
@@ -108,23 +113,34 @@ func Test_testBackoffStartingFailedContainer(t *testing.T) {
 		t.Run(tt.name, func(t *testing.T) {
 			e := monitorapi.Intervals{tt.interval}
 			junits := testBackoffStartingFailedContainer(e)
+
+			// Find the junit with the namespace of openshift-etcd-operator int the testname
+			var testJunits []*junitapi.JUnitTestCase
+			for _, j := range junits {
+				if strings.Contains(j.Name, namespace) {
+					testJunits = append(testJunits, j)
+				}
+			}
+			if len(testJunits) == 0 {
+				t.Errorf("We should have at least one junit test for namespace openshift-etcd-operator")
+			}
 			switch tt.kind {
 			case "pass":
-				if len(junits) != 1 {
+				if len(testJunits) != 1 {
 					t.Errorf("This should've been a single passing Test, but got %d tests", len(junits))
 				}
-				if len(junits[0].SystemOut) != 0 {
+				if testJunits[0].FailureOutput != nil && len(testJunits[0].FailureOutput.Output) != 0 {
 					t.Errorf("This should've been a pass, but got %s", junits[0].SystemErr)
 				}
 			case "fail":
-				if len(junits) != 1 {
+				if len(testJunits) != 1 {
 					t.Errorf("This should've been a single failing Test, but got %d tests", len(junits))
 				}
-				if len(junits[0].SystemOut) == 0 {
+				if testJunits[0].FailureOutput != nil && len(testJunits[0].FailureOutput.Output) == 0 {
 					t.Error("This should've been a failure but got no output")
 				}
 			case "flake":
-				if len(junits) != 2 {
+				if len(testJunits) != 2 {
 					t.Errorf("This should've been a two tests as flake, but got %d tests", len(junits))
 				}
 			default: