From ab482618aabb1a85c8607525ef70fc1eb221ddbd Mon Sep 17 00:00:00 2001
From: Kui Wang <kuiwang@redhat.com>
Date: Fri, 21 Nov 2025 10:11:42 +0800
Subject: [PATCH] Migrate OLM v0 stress test cases

---
 .../openshift_payload_olmv0.json              |  32 ++
 tests-extension/test/qe/specs/olmv0_allns.go  |   4 +-
 tests-extension/test/qe/specs/olmv0_common.go |   3 +-
 .../test/qe/specs/olmv0_defaultoption.go      |   3 +-
 .../test/qe/specs/olmv0_hypershiftmgmt.go     |   3 +-
 .../test/qe/specs/olmv0_multins.go            |   3 +-
 .../test/qe/specs/olmv0_nonallns.go           |  22 +-
 tests-extension/test/qe/specs/olmv0_opm.go    |   5 +-
 tests-extension/test/qe/specs/olmv0_stress.go | 282 ++++++++++++++++++
 tests-extension/test/qe/util/client.go        | 164 +++++++---
 tests-extension/test/qe/util/nodes.go         |  20 +-
 .../test/qe/util/olmv0util/catalog_source.go  |  17 +-
 .../test/qe/util/opmcli/opm_bin.go            |  22 +-
 .../config/pkg-ins/metrics-endpoint.yml       |   9 +
 .../metrics-profiles/metrics-aggregated.yml   |   4 +
 .../manifests/config/pkg-ins/pkg-ins.yml      |  72 +++++
 .../manifests/config/pkg-ins/templates/og.yml |   7 +
 .../config/pkg-ins/templates/sub.yml          |  10 +
 .../test/qe/util/stress/util/ma.py            |   6 +
 .../test/qe/util/stress/util/ma/__init__.py   |  10 +
 .../qe/util/stress/util/ma/cli/__init__.py    |   0
 .../qe/util/stress/util/ma/cli/__main__.py    |   8 +
 .../util/stress/util/ma/cli/cmd_check_ccpu.py |  98 ++++++
 .../qe/util/stress/util/ma/cli/cmd_group.py   |  47 +++
 .../qe/util/stress/util/ma/helper/__init__.py |   0
 .../qe/util/stress/util/ma/helper/algo.py     |  44 +++
 .../qe/util/stress/util/ma/helper/const.py    |   6 +
 .../stress/util/ma/helper/containercpu.py     | 173 +++++++++++
 .../util/stress/util/ma/helper/exceptions.py  |   2 +
 .../qe/util/stress/util/ma/helper/util.py     |  47 +++
 30 files changed, 1064 insertions(+), 59 deletions(-)
 create mode 100644 tests-extension/test/qe/specs/olmv0_stress.go
 create mode 100644 tests-extension/test/qe/util/stress/manifests/config/pkg-ins/metrics-endpoint.yml
 create mode 100644 tests-extension/test/qe/util/stress/manifests/config/pkg-ins/metrics-profiles/metrics-aggregated.yml
 create mode 100644 tests-extension/test/qe/util/stress/manifests/config/pkg-ins/pkg-ins.yml
 create mode 100644 tests-extension/test/qe/util/stress/manifests/config/pkg-ins/templates/og.yml
 create mode 100644 tests-extension/test/qe/util/stress/manifests/config/pkg-ins/templates/sub.yml
 create mode 100755 tests-extension/test/qe/util/stress/util/ma.py
 create mode 100644 tests-extension/test/qe/util/stress/util/ma/__init__.py
 create mode 100644 tests-extension/test/qe/util/stress/util/ma/cli/__init__.py
 create mode 100644 tests-extension/test/qe/util/stress/util/ma/cli/__main__.py
 create mode 100644 tests-extension/test/qe/util/stress/util/ma/cli/cmd_check_ccpu.py
 create mode 100644 tests-extension/test/qe/util/stress/util/ma/cli/cmd_group.py
 create mode 100644 tests-extension/test/qe/util/stress/util/ma/helper/__init__.py
 create mode 100644 tests-extension/test/qe/util/stress/util/ma/helper/algo.py
 create mode 100644 tests-extension/test/qe/util/stress/util/ma/helper/const.py
 create mode 100644 tests-extension/test/qe/util/stress/util/ma/helper/containercpu.py
 create mode 100644 tests-extension/test/qe/util/stress/util/ma/helper/exceptions.py
 create mode 100644 tests-extension/test/qe/util/stress/util/ma/helper/util.py

diff --git a/tests-extension/.openshift-tests-extension/openshift_payload_olmv0.json b/tests-extension/.openshift-tests-extension/openshift_payload_olmv0.json
index b0dfa4e022..771841bb89 100644
--- a/tests-extension/.openshift-tests-extension/openshift_payload_olmv0.json
+++ b/tests-extension/.openshift-tests-extension/openshift_payload_olmv0.json
@@ -1741,5 +1741,37 @@
     "environmentSelector": {
       "exclude": "topology==\"External\""
     }
+  },
+  {
+    "name": "[sig-operator][Jira:OLM] OLM v0 for stress PolarionID:80299-[OTP][Skipped:Disconnected][OlmStress]create mass operator to see if they all are installed successfully with different ns [Slow][Timeout:180m]",
+    "labels": {
+      "Extended": {},
+      "NonHyperShiftHOST": {},
+      "StressTest": {}
+    },
+    "resources": {
+      "isolation": {}
+    },
+    "source": "openshift:payload:olmv0",
+    "lifecycle": "blocking",
+    "environmentSelector": {
+      "exclude": "topology==\"External\""
+    }
+  },
+  {
+    "name": "[sig-operator][Jira:OLM] OLM v0 for stress PolarionID:80413-[OTP][Skipped:Disconnected][OlmStress]install operator repeatedly serially with same ns [Slow][Timeout:180m]",
+    "labels": {
+      "Extended": {},
+      "NonHyperShiftHOST": {},
+      "StressTest": {}
+    },
+    "resources": {
+      "isolation": {}
+    },
+    "source": "openshift:payload:olmv0",
+    "lifecycle": "blocking",
+    "environmentSelector": {
+      "exclude": "topology==\"External\""
+    }
   }
 ]
diff --git a/tests-extension/test/qe/specs/olmv0_allns.go b/tests-extension/test/qe/specs/olmv0_allns.go
index 161ef5b9e6..490ca045dd 100644
--- a/tests-extension/test/qe/specs/olmv0_allns.go
+++ b/tests-extension/test/qe/specs/olmv0_allns.go
@@ -20,13 +20,13 @@ var _ = g.Describe("[sig-operator][Jira:OLM] OLMv0 within all namespace", func()
 	defer g.GinkgoRecover()
 
 	var (
-		oc = exutil.NewCLI("olm-all-"+exutil.GetRandomString(), exutil.KubeConfigPath())
-
+		oc = exutil.NewCLIWithoutNamespace("default")
 		dr = make(olmv0util.DescriberResrouce)
 	)
 
 	g.BeforeEach(func() {
 		exutil.SkipMicroshift(oc)
+		oc.SetupProject()
 		exutil.SkipNoOLMCore(oc)
 		itName := g.CurrentSpecReport().FullText()
 		dr.AddIr(itName)
diff --git a/tests-extension/test/qe/specs/olmv0_common.go b/tests-extension/test/qe/specs/olmv0_common.go
index a8e90927e6..fb9d715333 100644
--- a/tests-extension/test/qe/specs/olmv0_common.go
+++ b/tests-extension/test/qe/specs/olmv0_common.go
@@ -28,13 +28,14 @@ var _ = g.Describe("[sig-operator][Jira:OLM] OLMv0 should", func() {
 	defer g.GinkgoRecover()
 
 	var (
-		oc = exutil.NewCLI("olm-common-"+exutil.GetRandomString(), exutil.KubeConfigPath())
+		oc = exutil.NewCLIWithoutNamespace("default")
 
 		dr = make(olmv0util.DescriberResrouce)
 	)
 
 	g.BeforeEach(func() {
 		exutil.SkipMicroshift(oc)
+		oc.SetupProject()
 		exutil.SkipNoOLMCore(oc)
 		itName := g.CurrentSpecReport().FullText()
 		dr.AddIr(itName)
diff --git a/tests-extension/test/qe/specs/olmv0_defaultoption.go b/tests-extension/test/qe/specs/olmv0_defaultoption.go
index 7f9a033121..28394e4fa0 100644
--- a/tests-extension/test/qe/specs/olmv0_defaultoption.go
+++ b/tests-extension/test/qe/specs/olmv0_defaultoption.go
@@ -33,11 +33,12 @@ var _ = g.Describe("[sig-operator][Jira:OLM] OLMv0 optional should", func() {
 	defer g.GinkgoRecover()
 
 	var (
-		oc = exutil.NewCLI("default-"+exutil.GetRandomString(), exutil.KubeConfigPath())
+		oc = exutil.NewCLIWithoutNamespace("default")
 	)
 
 	g.BeforeEach(func() {
 		exutil.SkipMicroshift(oc)
+		oc.SetupProject()
 		exutil.SkipNoOLMCore(oc)
 	})
 
diff --git a/tests-extension/test/qe/specs/olmv0_hypershiftmgmt.go b/tests-extension/test/qe/specs/olmv0_hypershiftmgmt.go
index 3eba3b3c99..19d45d2ce6 100644
--- a/tests-extension/test/qe/specs/olmv0_hypershiftmgmt.go
+++ b/tests-extension/test/qe/specs/olmv0_hypershiftmgmt.go
@@ -18,7 +18,7 @@ var _ = g.Describe("[sig-operator][Jira:OLM] OLMv0 on hypershift mgmt", g.Label(
 	defer g.GinkgoRecover()
 
 	var (
-		oc                                                  = exutil.NewCLIForKubeOpenShift("hypershiftmgmt-" + exutil.GetRandomString())
+		oc                                                  = exutil.NewCLIWithoutNamespace("default")
 		guestClusterName, guestClusterKube, hostedClusterNS string
 		isAKS                                               bool
 		errIsAKS                                            error
@@ -26,6 +26,7 @@ var _ = g.Describe("[sig-operator][Jira:OLM] OLMv0 on hypershift mgmt", g.Label(
 
 	g.BeforeEach(func() {
 		exutil.SkipMicroshift(oc)
+		oc.SetupProject()
 		if !exutil.IsHypershiftMgmtCluster(oc) {
 			g.Skip("this is not a hypershift management cluster, skip test run")
 		}
diff --git a/tests-extension/test/qe/specs/olmv0_multins.go b/tests-extension/test/qe/specs/olmv0_multins.go
index 2c055703b6..6af212161a 100644
--- a/tests-extension/test/qe/specs/olmv0_multins.go
+++ b/tests-extension/test/qe/specs/olmv0_multins.go
@@ -19,13 +19,14 @@ var _ = g.Describe("[sig-operator][Jira:OLM] OLMv0 with multi ns", func() {
 	defer g.GinkgoRecover()
 
 	var (
-		oc = exutil.NewCLI("olm-multi-"+exutil.GetRandomString(), exutil.KubeConfigPath())
+		oc = exutil.NewCLIWithoutNamespace("default")
 
 		dr = make(olmv0util.DescriberResrouce)
 	)
 
 	g.BeforeEach(func() {
 		exutil.SkipMicroshift(oc)
+		oc.SetupProject()
 
 		exutil.SkipNoOLMCore(oc)
 		itName := g.CurrentSpecReport().FullText()
diff --git a/tests-extension/test/qe/specs/olmv0_nonallns.go b/tests-extension/test/qe/specs/olmv0_nonallns.go
index 310b320ffb..7c3ffd83e1 100644
--- a/tests-extension/test/qe/specs/olmv0_nonallns.go
+++ b/tests-extension/test/qe/specs/olmv0_nonallns.go
@@ -26,13 +26,14 @@ var _ = g.Describe("[sig-operator][Jira:OLM] OLMv0 within a namespace", func() {
 	defer g.GinkgoRecover()
 
 	var (
-		oc = exutil.NewCLI("olm-a-"+exutil.GetRandomString(), exutil.KubeConfigPath())
+		oc = exutil.NewCLIWithoutNamespace("default")
 
 		dr = make(olmv0util.DescriberResrouce)
 	)
 
 	g.BeforeEach(func() {
 		exutil.SkipMicroshift(oc)
+		oc.SetupProject()
 		exutil.SkipNoOLMCore(oc)
 		itName := g.CurrentSpecReport().FullText()
 		dr.AddIr(itName)
@@ -1108,7 +1109,22 @@ var _ = g.Describe("[sig-operator][Jira:OLM] OLMv0 within a namespace", func() {
 		g.By("Delete sa of csv")
 		sa.GetDefinition(oc)
 		sa.Delete(oc)
-		olmv0util.NewCheck("expect", exutil.AsUser, exutil.WithNamespace, exutil.Compare, "RequirementsNotMet", exutil.Ok, []string{"csv", sub.InstalledCSV, "-o=jsonpath={.status.reason}"}).Check(oc)
+		var output string
+		var err error
+		errCsv := wait.PollUntilContextTimeout(context.TODO(), 10*time.Second, 300*time.Second, false, func(ctx context.Context) (bool, error) {
+			output, err = oc.WithoutNamespace().Run("get").Args("csv", sub.InstalledCSV, "-n", sub.Namespace, "-o=jsonpath={.status.reason}").Output()
+			if err != nil {
+				return false, err
+			}
+			if strings.Contains(output, "RequirementsNotMet") {
+				return true, nil
+			}
+			return false, nil
+		})
+		if strings.Contains(output, "InstallWaiting") {
+			g.Skip("skip because of slow installation")
+		}
+		exutil.AssertWaitPollNoErr(errCsv, fmt.Sprintf("csv status %v is not expected", output))
 
 		g.By("Recovery sa of csv")
 		sa.Reapply(oc)
@@ -1835,6 +1851,8 @@ var _ = g.Describe("[sig-operator][Jira:OLM] OLMv0 within a namespace", func() {
 
 		g.By("install operator")
 		sub.Create(oc, itName, dr)
+		defer sub.DeleteCSV(itName, dr)
+		defer sub.Delete(itName, dr)
 
 		g.By("check if dependent operator is installed")
 		olmv0util.NewCheck("expect", exutil.AsAdmin, exutil.WithoutNamespace, exutil.Compare, "Succeeded", exutil.Ok, []string{"csv", sub.InstalledCSV, "-n", sub.Namespace, "-o=jsonpath={.status.phase}"}).Check(oc)
diff --git a/tests-extension/test/qe/specs/olmv0_opm.go b/tests-extension/test/qe/specs/olmv0_opm.go
index c845460596..e5bf2a2234 100644
--- a/tests-extension/test/qe/specs/olmv0_opm.go
+++ b/tests-extension/test/qe/specs/olmv0_opm.go
@@ -19,13 +19,14 @@ var _ = g.Describe("[sig-operator][Jira:OLM] OLMv0 opm should", g.Label("NonHype
 	defer g.GinkgoRecover()
 
 	var (
-		oc     = exutil.NewCLIForKubeOpenShift("opm-" + exutil.GetRandomString())
+		oc     = exutil.NewCLIWithoutNamespace("default")
 		opmCLI = opmcli.NewOpmCLI()
 	)
 
 	g.BeforeEach(func() {
 		exutil.SkipMicroshift(oc)
 
+		oc.SetupProject()
 		err := opmcli.EnsureOPMBinary()
 		if err != nil {
 			g.Skip("Failed to setup opm binary: " + err.Error())
@@ -500,6 +501,8 @@ var _ = g.Describe("[sig-operator][Jira:OLM] OLMv0 opm should", g.Label("NonHype
 		if os.Getenv("HTTP_PROXY") != "" || os.Getenv("http_proxy") != "" {
 			g.Skip("HTTP_PROXY is not empty - skipping test ...")
 		}
+		errPolicy := opmcli.EnsureContainerPolicy()
+		o.Expect(errPolicy).NotTo(o.HaveOccurred())
 		opmBaseDir := exutil.FixturePath("testdata", "opm", "53871")
 		opmCLI.ExecCommandPath = opmBaseDir
 
diff --git a/tests-extension/test/qe/specs/olmv0_stress.go b/tests-extension/test/qe/specs/olmv0_stress.go
new file mode 100644
index 0000000000..3b7ae52283
--- /dev/null
+++ b/tests-extension/test/qe/specs/olmv0_stress.go
@@ -0,0 +1,282 @@
+package specs
+
+import (
+	"context"
+	"fmt"
+	"math/rand"
+	"path/filepath"
+	"strings"
+	"time"
+
+	g "github.com/onsi/ginkgo/v2"
+	o "github.com/onsi/gomega"
+	"k8s.io/apimachinery/pkg/util/wait"
+	e2e "k8s.io/kubernetes/test/e2e/framework"
+
+	exutil "github.com/openshift/operator-framework-olm/tests-extension/test/qe/util"
+	olmv0util "github.com/openshift/operator-framework-olm/tests-extension/test/qe/util/olmv0util"
+)
+
+var _ = g.Describe("[sig-operator][Jira:OLM] OLM v0 for stress", func() {
+	defer g.GinkgoRecover()
+
+	var (
+		oc = exutil.NewCLIWithoutNamespace("default")
+		dr = make(olmv0util.DescriberResrouce)
+	)
+
+	g.BeforeEach(func() {
+		exutil.SkipMicroshift(oc)
+		oc.SetupProject()
+		exutil.SkipNoOLMCore(oc)
+		itName := g.CurrentSpecReport().FullText()
+		dr.AddIr(itName)
+	})
+
+	g.It("PolarionID:80299-[OTP][Skipped:Disconnected][OlmStress]create mass operator to see if they all are installed successfully with different ns [Slow][Timeout:180m]", g.Label("StressTest"), g.Label("NonHyperShiftHOST"), func() {
+		var (
+			itName              = g.CurrentSpecReport().FullText()
+			buildPruningBaseDir = exutil.FixturePath("testdata", "olm")
+			ogSingleTemplate    = filepath.Join(buildPruningBaseDir, "operatorgroup.yaml")
+			subTemplate         = filepath.Join(buildPruningBaseDir, "olm-subscription.yaml")
+			caseID              = "80299"
+			ns                  = "openshift-operator-lifecycle-manager"
+			catalogLabel        = "app=catalog-operator"
+			olmLabel            = "app=olm-operator"
+
+			og = olmv0util.OperatorGroupDescription{
+				Name:      "og-singlenamespace",
+				Namespace: "",
+				Template:  ogSingleTemplate,
+			}
+			sub = olmv0util.SubscriptionDescription{
+				SubName:                "local-storage-operator",
+				Namespace:              "",
+				Channel:                "stable",
+				IpApproval:             "Automatic",
+				OperatorPackage:        "local-storage-operator",
+				CatalogSourceName:      "redhat-operators",
+				CatalogSourceNamespace: "openshift-marketplace",
+				StartingCSV:            "",
+				CurrentCSV:             "",
+				InstalledCSV:           "",
+				Template:               subTemplate,
+				SingleNamespace:        true,
+			}
+		)
+		catsrcName := olmv0util.GetCatsrc(oc, "redhat-operators", "local-storage-operator")
+		if len(catsrcName) == 0 {
+			g.Skip("there is no package local-storage-operator")
+		}
+		e2e.Logf("the catsrc is %v", catsrcName)
+		sub.CatalogSourceName = catsrcName
+		startTime := time.Now().UTC()
+		e2e.Logf("Start time: %s", startTime.Format(time.RFC3339))
+
+		for i := 0; i < 45; i++ {
+			e2e.Logf("=================it is round %v=================", i)
+			func() {
+				seed := time.Now().UnixNano()
+				r := rand.New(rand.NewSource(seed))
+				randomNum := r.Intn(5) + 5
+				e2e.Logf("=================round %v has %v namespaces =================", i, randomNum)
+				namespaces := []string{}
+				for j := 0; j < randomNum; j++ {
+					namespaces = append(namespaces, "olm-stress-"+exutil.GetRandomString())
+				}
+
+				for _, nsName := range namespaces {
+					g.By(fmt.Sprintf("create ns %s, and then install og and sub", nsName))
+					err := oc.AsAdmin().WithoutNamespace().Run("create").Args("ns", nsName).Execute()
+					o.Expect(err).NotTo(o.HaveOccurred())
+					defer func(ns string) {
+						_ = oc.AsAdmin().WithoutNamespace().Run("delete").Args("ns", ns, "--force", "--grace-period=0", "--wait=false").Execute()
+					}(nsName)
+					og.Namespace = nsName
+					og.Create(oc, itName, dr)
+					sub.Namespace = nsName
+					sub.CreateWithoutCheckNoPrint(oc, itName, dr)
+				}
+				for _, nsName := range namespaces {
+					g.By(fmt.Sprintf("find the installed csv ns %s", nsName))
+					sub.Namespace = nsName
+					sub.FindInstalledCSV(oc, itName, dr)
+				}
+				for _, nsName := range namespaces {
+					g.By(fmt.Sprintf("check the installed csv is ok in %s", nsName))
+					sub.Namespace = nsName
+
+					errWait := wait.PollUntilContextTimeout(context.TODO(), 3*time.Second, 150*time.Second, false, func(ctx context.Context) (bool, error) {
+						phase, err := oc.AsAdmin().WithoutNamespace().Run("get").Args("csv", sub.InstalledCSV, "-n", sub.Namespace, "-o=jsonpath={.status.phase}").Output()
+						if err != nil {
+							if strings.Contains(phase, "NotFound") || strings.Contains(phase, "No resources found") {
+								e2e.Logf("the existing csv does not exist, and try to get new csv")
+								sub.FindInstalledCSV(oc, itName, dr)
+							} else {
+								e2e.Logf("the error: %v, and try next", err)
+							}
+							return false, nil
+						}
+
+						e2e.Logf("---> we expect value: %s, in returned value: %s", "Succeeded+2+InstallSucceeded", phase)
+						if strings.Compare(phase, "Succeeded") == 0 || strings.Compare(phase, "InstallSucceeded") == 0 {
+							e2e.Logf("the output %s matches one of the content %s, expected", phase, "Succeeded+2+InstallSucceeded")
+							return true, nil
+						}
+						e2e.Logf("the output %s does not match one of the content %s, unexpected", phase, "Succeeded+2+InstallSucceeded")
+						return false, nil
+					})
+					if errWait != nil {
+						olmv0util.GetResource(oc, true, true, "pod", "-n", "openshift-marketplace")
+						olmv0util.GetResource(oc, true, true, "operatorgroup", "-n", sub.Namespace, "-o", "yaml")
+						olmv0util.GetResource(oc, true, true, "subscription", "-n", sub.Namespace, "-o", "yaml")
+						olmv0util.GetResource(oc, true, true, "installplan", "-n", sub.Namespace)
+						olmv0util.GetResource(oc, true, true, "csv", "-n", sub.Namespace)
+						olmv0util.GetResource(oc, true, true, "pods", "-n", sub.Namespace)
+					}
+					exutil.AssertWaitPollNoErr(errWait, fmt.Sprintf("expected content %s not found by %v", "Succeeded+2+InstallSucceeded", strings.Join([]string{"csv", sub.InstalledCSV, "-n", sub.Namespace, "-o=jsonpath={.status.phase}"}, " ")))
+
+				}
+
+			}()
+		}
+		endTime := time.Now().UTC()
+		e2e.Logf("End time:  %v", endTime.Format(time.RFC3339))
+
+		duration := endTime.Sub(startTime)
+		minutes := int(duration.Minutes())
+		if minutes < 1 {
+			minutes = 1
+		}
+
+		podName, err := oc.AsAdmin().WithoutNamespace().Run("get").Args("pods", "-l", catalogLabel, "-o=jsonpath={.items[0].metadata.name}", "-n", ns).Output()
+		if err == nil {
+			if !exutil.WriteErrToArtifactDir(oc, ns, podName, "error", "Unhandled|Reconciler error|try again|level=info|warning", caseID, minutes) {
+				e2e.Logf("no error log into artifact for pod %s in %s", podName, ns)
+			}
+		}
+		podName, err = oc.AsAdmin().WithoutNamespace().Run("get").Args("pods", "-l", olmLabel, "-o=jsonpath={.items[0].metadata.name}", "-n", ns).Output()
+		if err == nil {
+			if !exutil.WriteErrToArtifactDir(oc, ns, podName, "error", "Unhandled|Reconciler error|level=info|warning|update Operator status|no errors|warning", caseID, minutes) {
+				e2e.Logf("no error log into artifact for pod %s in %s", podName, ns)
+			}
+		}
+
+		if !exutil.IsPodReady(oc, ns, catalogLabel) {
+			olmv0util.GetResource(oc, true, true, "pod", "-n", ns, "-l", catalogLabel, "-o", "yaml")
+			exutil.AssertWaitPollNoErr(fmt.Errorf("the pod with %s is not correct", catalogLabel), "the pod with app=catalog-operator is not correct")
+		}
+		if !exutil.IsPodReady(oc, ns, olmLabel) {
+			olmv0util.GetResource(oc, true, true, "pod", "-n", ns, "-l", olmLabel, "-o", "yaml")
+			exutil.AssertWaitPollNoErr(fmt.Errorf("the pod with %s is not correct", olmLabel), "the pod with app=olm-operator is not correct")
+		}
+	})
+
+	g.It("PolarionID:80413-[OTP][Skipped:Disconnected][OlmStress]install operator repeatedly serially with same ns [Slow][Timeout:180m]", g.Label("StressTest"), g.Label("NonHyperShiftHOST"), func() {
+		var (
+			itName              = g.CurrentSpecReport().FullText()
+			buildPruningBaseDir = exutil.FixturePath("testdata", "olm")
+			ogSingleTemplate    = filepath.Join(buildPruningBaseDir, "operatorgroup.yaml")
+			catsrcImageTemplate = filepath.Join(buildPruningBaseDir, "catalogsource-image.yaml")
+			subTemplate         = filepath.Join(buildPruningBaseDir, "olm-subscription.yaml")
+			caseID              = "80413"
+			ns                  = "openshift-must-gather-operator"
+			nsOlm               = "openshift-operator-lifecycle-manager"
+			catalogLabel        = "app=catalog-operator"
+			olmLabel            = "app=olm-operator"
+
+			catsrc = olmv0util.CatalogSourceDescription{
+				Name:        "catsrc-80413",
+				Namespace:   ns,
+				DisplayName: "Test 80413",
+				Publisher:   "OLM QE",
+				SourceType:  "grpc",
+				Address:     "quay.io/app-sre/must-gather-operator-registry@sha256:0a0610e37a016fb4eed1b000308d840795838c2306f305a151c64cf3b4fd6bb4",
+				Template:    catsrcImageTemplate,
+			}
+			og = olmv0util.OperatorGroupDescription{
+				Name:      "og",
+				Namespace: ns,
+				Template:  ogSingleTemplate,
+			}
+			sub = olmv0util.SubscriptionDescription{
+				SubName:                "must-gather-operator",
+				Namespace:              ns,
+				Channel:                "stable",
+				IpApproval:             "Automatic",
+				OperatorPackage:        "must-gather-operator",
+				CatalogSourceName:      "catsrc-80413",
+				CatalogSourceNamespace: ns,
+				StartingCSV:            "",
+				CurrentCSV:             "",
+				InstalledCSV:           "",
+				Template:               subTemplate,
+				SingleNamespace:        true,
+			}
+		)
+
+		startTime := time.Now().UTC()
+		e2e.Logf("Start time: %s", startTime.Format(time.RFC3339))
+
+		for i := 0; i < 150; i++ {
+			e2e.Logf("=================it is round %v=================", i)
+			func() {
+				g.By(fmt.Sprintf("create ns %s", ns))
+				err := oc.AsAdmin().WithoutNamespace().Run("create").Args("ns", ns).Execute()
+				o.Expect(err).NotTo(o.HaveOccurred())
+				defer func() {
+					_ = oc.AsAdmin().WithoutNamespace().Run("delete").Args("ns", ns).Execute()
+				}()
+
+				g.By(fmt.Sprintf("install catsrc in %s", ns))
+				defer catsrc.Delete(itName, dr)
+				catsrc.Create(oc, itName, dr)
+
+				g.By(fmt.Sprintf("install og in %s", ns))
+				og.Create(oc, itName, dr)
+
+				g.By(fmt.Sprintf("install sub in %s", ns))
+				sub.CreateWithoutCheckNoPrint(oc, itName, dr)
+
+				g.By(fmt.Sprintf("find the installed csv ns %s", ns))
+				sub.FindInstalledCSV(oc, itName, dr)
+
+				g.By(fmt.Sprintf("check the installed csv is ok in %s", ns))
+				olmv0util.NewCheck("expect", true, true, true, "Succeeded+2+InstallSucceeded", true, []string{"csv", sub.InstalledCSV, "-n", sub.Namespace, "-o=jsonpath={.status.phase}"}).Check(oc)
+
+			}()
+		}
+
+		endTime := time.Now().UTC()
+		e2e.Logf("End time:  %v", endTime.Format(time.RFC3339))
+
+		duration := endTime.Sub(startTime)
+		minutes := int(duration.Minutes())
+		if minutes < 1 {
+			minutes = 1
+		}
+
+		podName, err := oc.AsAdmin().WithoutNamespace().Run("get").Args("pods", "-l", catalogLabel, "-o=jsonpath={.items[0].metadata.name}", "-n", nsOlm).Output()
+		if err == nil {
+			if !exutil.WriteErrToArtifactDir(oc, nsOlm, podName, "error", "Unhandled|Reconciler error|try again|level=info|warning", caseID, minutes) {
+				e2e.Logf("no error log into artifact for pod %s in %s", podName, nsOlm)
+			}
+		}
+		podName, err = oc.AsAdmin().WithoutNamespace().Run("get").Args("pods", "-l", olmLabel, "-o=jsonpath={.items[0].metadata.name}", "-n", nsOlm).Output()
+		if err == nil {
+			if !exutil.WriteErrToArtifactDir(oc, nsOlm, podName, "error", "Unhandled|Reconciler error|level=info|warning|update Operator status|no errors|warning", caseID, minutes) {
+				e2e.Logf("no error log into artifact for pod %s in %s", podName, nsOlm)
+			}
+		}
+
+		if !exutil.IsPodReady(oc, nsOlm, catalogLabel) {
+			olmv0util.GetResource(oc, true, true, "pod", "-n", nsOlm, "-l", catalogLabel, "-o", "yaml")
+			exutil.AssertWaitPollNoErr(fmt.Errorf("the pod with %s is not correct", catalogLabel), "the pod with app=catalog-operator is not correct")
+		}
+		if !exutil.IsPodReady(oc, nsOlm, olmLabel) {
+			olmv0util.GetResource(oc, true, true, "pod", "-n", nsOlm, "-l", olmLabel, "-o", "yaml")
+			exutil.AssertWaitPollNoErr(fmt.Errorf("the pod with %s is not correct", olmLabel), "the pod with app=olm-operator is not correct")
+		}
+	})
+
+})
diff --git a/tests-extension/test/qe/util/client.go b/tests-extension/test/qe/util/client.go
index 3a3b4a6ffa..3204b68f07 100644
--- a/tests-extension/test/qe/util/client.go
+++ b/tests-extension/test/qe/util/client.go
@@ -720,63 +720,153 @@ type ExitError struct {
 	*exec.ExitError
 }
 
+// isTransientKubeconfigError checks if the error is a transient kubeconfig-related error
+// that can be retried (typically caused by race conditions in parallel test execution)
+func isTransientKubeconfigError(output string) bool {
+	transientErrors := []string{
+		"context was not found",
+		"Error in configuration",
+		"cluster has no server defined",
+		"error loading config file",
+		"unable to read client-cert",
+		"unable to read client-key",
+		"unable to read certificate-authority",
+	}
+	for _, errMsg := range transientErrors {
+		if strings.Contains(output, errMsg) {
+			return true
+		}
+	}
+	return false
+}
+
 // Output executes the command and returns stdout/stderr combined into one string
+// Implements retry logic for transient kubeconfig errors caused by parallel test execution
 func (c *CLI) Output() (string, error) {
 	if c.verbose {
 		fmt.Printf("DEBUG: oc %s\n", c.printCmd())
 	}
-	cmd := exec.Command(c.execPath, c.finalArgs...) // nolint:gosec // G204: execPath is hardcoded to "oc" and finalArgs come from test code
-	cmd.Stdin = c.stdin
-	if c.showInfo {
-		e2e.Logf("Running '%s %s'", c.execPath, strings.Join(c.finalArgs, " "))
-	}
-	out, err := cmd.CombinedOutput()
-	trimmed := strings.TrimSpace(string(out))
-	if err == nil {
-		c.stdout = bytes.NewBuffer(out)
-		return trimmed, nil
+
+	// Retry configuration for handling race conditions in parallel tests
+	const maxRetries = 3
+	const retryDelay = 500 * time.Millisecond
+
+	var lastErr error
+	var lastOutput string
+
+	for attempt := 1; attempt <= maxRetries; attempt++ {
+		cmd := exec.Command(c.execPath, c.finalArgs...) // nolint:gosec // G204: execPath is hardcoded to "oc" and finalArgs come from test code
+		cmd.Stdin = c.stdin
+		if c.showInfo && attempt == 1 {
+			e2e.Logf("Running '%s %s'", c.execPath, strings.Join(c.finalArgs, " "))
+		}
+		out, err := cmd.CombinedOutput()
+		trimmed := strings.TrimSpace(string(out))
+
+		if err == nil {
+			c.stdout = bytes.NewBuffer(out)
+			if attempt > 1 {
+				e2e.Logf("Command succeeded on retry attempt %d/%d", attempt, maxRetries)
+			}
+			return trimmed, nil
+		}
+
+		lastOutput = trimmed
+		lastErr = err
+
+		// Check if this is a transient kubeconfig error that we should retry
+		if isTransientKubeconfigError(trimmed) {
+			if attempt < maxRetries {
+				e2e.Logf("Transient kubeconfig error detected (attempt %d/%d), retrying after %v: %s",
+					attempt, maxRetries, retryDelay, trimmed)
+				time.Sleep(retryDelay)
+				continue
+			}
+			e2e.Logf("Transient kubeconfig error persisted after %d attempts: %s", maxRetries, trimmed)
+		}
+
+		// Not a transient error or max retries reached, break and handle normally
+		break
 	}
+
+	// Handle the final error
 	var err1 *exec.ExitError
-	if errors.As(err, &err1) {
-		e2e.Logf("Error running %v:\n%s", cmd, trimmed)
-		return trimmed, &ExitError{ExitError: err1, Cmd: c.execPath + " " + strings.Join(c.finalArgs, " "), StdErr: trimmed}
+	if errors.As(lastErr, &err1) {
+		e2e.Logf("Error running %v:\n%s", c.execPath+" "+strings.Join(c.finalArgs, " "), lastOutput)
+		return lastOutput, &ExitError{ExitError: err1, Cmd: c.execPath + " " + strings.Join(c.finalArgs, " "), StdErr: lastOutput}
 	}
-	FatalErr(fmt.Errorf("unable to execute %q: %v", c.execPath, err))
+	FatalErr(fmt.Errorf("unable to execute %q: %v", c.execPath, lastErr))
 	return "", nil
 }
 
 // Outputs executes the command and returns the stdout/stderr output as separate strings
+// Implements retry logic for transient kubeconfig errors caused by parallel test execution
 func (c *CLI) Outputs() (string, string, error) {
 	if c.verbose {
 		fmt.Printf("DEBUG: oc %s\n", c.printCmd())
 	}
-	cmd := exec.Command(c.execPath, c.finalArgs...) // nolint:gosec // G204: execPath is hardcoded to "oc" and finalArgs come from test code
-	cmd.Stdin = c.stdin
-	e2e.Logf("showInfo is %v", c.showInfo)
-	if c.showInfo {
-		e2e.Logf("Running '%s %s'", c.execPath, strings.Join(c.finalArgs, " "))
-	}
-	//out, err := cmd.CombinedOutput()
-	var stdErrBuff, stdOutBuff bytes.Buffer
-	cmd.Stdout = &stdOutBuff
-	cmd.Stderr = &stdErrBuff
-	err := cmd.Run()
 
-	stdOutBytes := stdOutBuff.Bytes()
-	stdErrBytes := stdErrBuff.Bytes()
-	stdOut := strings.TrimSpace(string(stdOutBytes))
-	stdErr := strings.TrimSpace(string(stdErrBytes))
-	if err == nil {
-		c.stdout = bytes.NewBuffer(stdOutBytes)
-		c.stderr = bytes.NewBuffer(stdErrBytes)
-		return stdOut, stdErr, nil
+	// Retry configuration for handling race conditions in parallel tests
+	const maxRetries = 3
+	const retryDelay = 500 * time.Millisecond
+
+	var lastErr error
+	var lastStdOut, lastStdErr string
+
+	for attempt := 1; attempt <= maxRetries; attempt++ {
+		cmd := exec.Command(c.execPath, c.finalArgs...) // nolint:gosec // G204: execPath is hardcoded to "oc" and finalArgs come from test code
+		cmd.Stdin = c.stdin
+		if c.showInfo && attempt == 1 {
+			e2e.Logf("showInfo is %v", c.showInfo)
+			e2e.Logf("Running '%s %s'", c.execPath, strings.Join(c.finalArgs, " "))
+		}
+
+		var stdErrBuff, stdOutBuff bytes.Buffer
+		cmd.Stdout = &stdOutBuff
+		cmd.Stderr = &stdErrBuff
+		err := cmd.Run()
+
+		stdOutBytes := stdOutBuff.Bytes()
+		stdErrBytes := stdErrBuff.Bytes()
+		stdOut := strings.TrimSpace(string(stdOutBytes))
+		stdErr := strings.TrimSpace(string(stdErrBytes))
+
+		if err == nil {
+			c.stdout = bytes.NewBuffer(stdOutBytes)
+			c.stderr = bytes.NewBuffer(stdErrBytes)
+			if attempt > 1 {
+				e2e.Logf("Command succeeded on retry attempt %d/%d", attempt, maxRetries)
+			}
+			return stdOut, stdErr, nil
+		}
+
+		lastStdOut = stdOut
+		lastStdErr = stdErr
+		lastErr = err
+
+		// Check if this is a transient kubeconfig error in either stdout or stderr
+		combinedOutput := stdOut + stdErr
+		if isTransientKubeconfigError(combinedOutput) {
+			if attempt < maxRetries {
+				e2e.Logf("Transient kubeconfig error detected (attempt %d/%d), retrying after %v: %s",
+					attempt, maxRetries, retryDelay, combinedOutput)
+				time.Sleep(retryDelay)
+				continue
+			}
+			e2e.Logf("Transient kubeconfig error persisted after %d attempts: %s", maxRetries, combinedOutput)
+		}
+
+		// Not a transient error or max retries reached, break and handle normally
+		break
 	}
+
+	// Handle the final error
 	var err1 *exec.ExitError
-	if errors.As(err, &err1) {
-		e2e.Logf("Error running %v:\nStdOut>\n%s\nStdErr>\n%s\n", cmd, stdOut, stdErr)
-		return stdOut, stdErr, &ExitError{ExitError: err1, Cmd: c.execPath + " " + strings.Join(c.finalArgs, " "), StdErr: stdErr}
+	if errors.As(lastErr, &err1) {
+		e2e.Logf("Error running %v:\nStdOut>\n%s\nStdErr>\n%s\n", c.execPath+" "+strings.Join(c.finalArgs, " "), lastStdOut, lastStdErr)
+		return lastStdOut, lastStdErr, &ExitError{ExitError: err1, Cmd: c.execPath + " " + strings.Join(c.finalArgs, " "), StdErr: lastStdErr}
 	}
-	FatalErr(fmt.Errorf("unable to execute %q: %v", c.execPath, err))
+	FatalErr(fmt.Errorf("unable to execute %q: %v", c.execPath, lastErr))
 	return "", "", nil
 }
 
diff --git a/tests-extension/test/qe/util/nodes.go b/tests-extension/test/qe/util/nodes.go
index 0ee466eab4..0353a887e7 100644
--- a/tests-extension/test/qe/util/nodes.go
+++ b/tests-extension/test/qe/util/nodes.go
@@ -737,11 +737,23 @@ func GetNodeListByLabel(oc *CLI, labelKey string) []string {
 //   - bool: true if default node selector is configured, false otherwise
 func IsDefaultNodeSelectorEnabled(oc *CLI) bool {
 	defaultNodeSelector, getNodeSelectorErr := oc.AsAdmin().WithoutNamespace().Run("get").Args("scheduler", "cluster", "-o=jsonpath={.spec.defaultNodeSelector}").Output()
-	if getNodeSelectorErr != nil && strings.Contains(defaultNodeSelector, `the server doesn't have a resource type`) {
-		e2e.Logf("WARNING: The scheduler API is not supported on the test cluster")
-		return false
+	if getNodeSelectorErr != nil {
+		// Check if the scheduler API is not supported (expected on some cluster types)
+		if strings.Contains(defaultNodeSelector, `the server doesn't have a resource type`) {
+			e2e.Logf("WARNING: The scheduler API is not supported on the test cluster")
+			return false
+		}
+		// Handle transient kubeconfig errors (race conditions during parallel test execution)
+		// These errors can occur when temp kubeconfig files have stale context references
+		if strings.Contains(defaultNodeSelector, "context was not found") ||
+			strings.Contains(defaultNodeSelector, "Error in configuration") ||
+			strings.Contains(defaultNodeSelector, "cluster has no server defined") {
+			e2e.Logf("WARNING: Transient kubeconfig error detected (likely race condition), assuming no default node selector: %v", getNodeSelectorErr)
+			return false
+		}
+		// For other unexpected errors, fail the test with detailed information
+		o.Expect(getNodeSelectorErr).NotTo(o.HaveOccurred(), "Fail to get cluster scheduler defaultNodeSelector got error: %v\n", getNodeSelectorErr)
 	}
-	o.Expect(getNodeSelectorErr).NotTo(o.HaveOccurred(), "Fail to get cluster scheduler defaultNodeSelector got error: %v\n", getNodeSelectorErr)
 	return !strings.EqualFold(defaultNodeSelector, "")
 }
 
diff --git a/tests-extension/test/qe/util/olmv0util/catalog_source.go b/tests-extension/test/qe/util/olmv0util/catalog_source.go
index 38a9c9ff6f..0d910103ab 100644
--- a/tests-extension/test/qe/util/olmv0util/catalog_source.go
+++ b/tests-extension/test/qe/util/olmv0util/catalog_source.go
@@ -93,9 +93,20 @@ func (catsrc *CatalogSourceDescription) SetSCCRestricted(oc *exutil.CLI) {
 	} else {
 		// Retrieve PSA enforcement level from cluster configuration
 		output, err := oc.AsAdmin().WithoutNamespace().Run("get").Args("configmaps", "-n", "openshift-kube-apiserver", "config", `-o=jsonpath={.data.config\.yaml}`).Output()
-		o.Expect(err).NotTo(o.HaveOccurred())
-		psa = gjson.Get(output, "admission.pluginConfig.PodSecurity.configuration.defaults.enforce").String()
-		e2e.Logf("pod-security.kubernetes.io/enforce is %s", string(psa))
+		if err != nil {
+			// Check if this is a Forbidden error (permission denied)
+			if strings.Contains(err.Error(), "Forbidden") {
+				// In some CI environments, even with AsAdmin(), the service account may not have permission
+				// to access openshift-kube-apiserver namespace. Use default value "restricted" in such cases.
+				e2e.Logf("cannot get default PSA setting from kube-apiserver config (permission denied), use default value restricted: %v", err)
+			} else {
+				// For other errors (e.g., network issues, API server down), fail the test
+				o.Expect(err).NotTo(o.HaveOccurred())
+			}
+		} else {
+			psa = gjson.Get(output, "admission.pluginConfig.PodSecurity.configuration.defaults.enforce").String()
+			e2e.Logf("pod-security.kubernetes.io/enforce is %s", string(psa))
+		}
 	}
 	// Apply restricted security context if PSA enforcement requires it
 	if strings.Contains(string(psa), "restricted") {
diff --git a/tests-extension/test/qe/util/opmcli/opm_bin.go b/tests-extension/test/qe/util/opmcli/opm_bin.go
index cbff6f1604..94c82f9fc1 100644
--- a/tests-extension/test/qe/util/opmcli/opm_bin.go
+++ b/tests-extension/test/qe/util/opmcli/opm_bin.go
@@ -299,14 +299,24 @@ func downloadFile(url, filepath string) error {
 
 func setupOPMEnv(opmDir string) error {
 	currentPath := os.Getenv("PATH")
-	if !strings.Contains(currentPath, opmDir) {
-		newPath := opmDir + ":" + currentPath
-		err := os.Setenv("PATH", newPath)
-		if err != nil {
-			return err
+
+	// Split PATH and check if opmDir already exists as an exact entry
+	// This prevents false positives like "/tmp/google-cloud-sdk/bin" matching "/tmp"
+	paths := strings.Split(currentPath, ":")
+	for _, p := range paths {
+		if p == opmDir {
+			e2e.Logf("%s already in PATH", opmDir)
+			return nil
 		}
-		e2e.Logf("Added %s to PATH: %s", opmDir, newPath)
 	}
+
+	// Not in PATH, add it
+	newPath := opmDir + ":" + currentPath
+	err := os.Setenv("PATH", newPath)
+	if err != nil {
+		return err
+	}
+	e2e.Logf("Added %s to PATH: %s", opmDir, newPath)
 	return nil
 }
 
diff --git a/tests-extension/test/qe/util/stress/manifests/config/pkg-ins/metrics-endpoint.yml b/tests-extension/test/qe/util/stress/manifests/config/pkg-ins/metrics-endpoint.yml
new file mode 100644
index 0000000000..b43d6e0021
--- /dev/null
+++ b/tests-extension/test/qe/util/stress/manifests/config/pkg-ins/metrics-endpoint.yml
@@ -0,0 +1,9 @@
+- endpoint: {{.PROMETHEUS_URL}}
+  token: {{.PROMETHEUS_TOKEN}}
+  step: 10s
+  skipTLSVerify: true
+  metrics:
+    - metrics-profiles/metrics-aggregated.yml
+  indexer:
+    type: local
+    metricsDirectory: collected-metrics-{{.UUID}}
diff --git a/tests-extension/test/qe/util/stress/manifests/config/pkg-ins/metrics-profiles/metrics-aggregated.yml b/tests-extension/test/qe/util/stress/manifests/config/pkg-ins/metrics-profiles/metrics-aggregated.yml
new file mode 100644
index 0000000000..2827aa04cf
--- /dev/null
+++ b/tests-extension/test/qe/util/stress/manifests/config/pkg-ins/metrics-profiles/metrics-aggregated.yml
@@ -0,0 +1,4 @@
+# Containers & pod metrics
+
+- query: (sum(irate(container_cpu_usage_seconds_total{container="registry-server",namespace="openshift-marketplace"}[2m]) * 100) by (container, pod)) > 0
+  metricName: containerCPU-CatSrc
diff --git a/tests-extension/test/qe/util/stress/manifests/config/pkg-ins/pkg-ins.yml b/tests-extension/test/qe/util/stress/manifests/config/pkg-ins/pkg-ins.yml
new file mode 100644
index 0000000000..316190d91d
--- /dev/null
+++ b/tests-extension/test/qe/util/stress/manifests/config/pkg-ins/pkg-ins.yml
@@ -0,0 +1,72 @@
+---
+global:
+  gc: {{.GC}}
+  gcMetrics: {{.GC_METRICS}}
+  measurements:
+    - name: podLatency
+
+
+jobs:
+  - name: {{.OPERATION}} # will be parameter
+    jobType: create
+    jobIterations: {{.JOB_ITERATIONS}}
+    namespace: {{.OPERATION}}
+    namespacedIterations: {{.NAMESPACED_ITERATIONS}}
+    iterationsPerNamespace: {{.ITERATIONS_PER_NAMESPACE}}
+    cleanup: true
+    podWait: true
+    waitWhenFinished: true
+    maxWaitTimeout: {{.MAX_WAIT_TIMEOUT}}
+    jobIterationDelay: {{.JOB_ITERATION_DELAY}}
+    jobPause: {{.JOB_PAUSE}}
+    qps: {{.QPS}}
+    burst: {{.BURST}}
+    executionMode: parallel
+    verifyObjects: true
+    errorOnVerify: true
+    skipIndexing: false
+    preLoadImages: true
+    preLoadPeriod: 15s
+    churn: {{.CHURN}}
+    churnCycles: {{.CHURN_CYCLES}}
+    churnDuration: {{.CHURN_DURATION}}
+    churnPercent: {{.CHURN_PERCENT}}
+    churnDelay: {{.CHURN_DELAY}}
+    churnDeletionStrategy: {{.CHURN_DELETION_STRATEGY}}
+    defaultMissingKeysWithZero: false
+    namespaceLabels:
+      security.openshift.io/scc.podSecurityLabelSync: false
+      pod-security.kubernetes.io/enforce: privileged
+      pod-security.kubernetes.io/audit: privileged
+      pod-security.kubernetes.io/warn: privileged
+    objects:
+
+      - objectTemplate: templates/og.yml
+        replicas: 1
+        inputVars:
+          prefixNamespace: {{.OPERATION}}
+
+
+      - objectTemplate: templates/sub.yml
+        replicas: 1
+        inputVars:
+          pkgName: {{.PKG_NAME}}
+          channelName: {{.CHANNEL_NAME}}
+          catsrcName: {{.CATALOGSOURCE_NAME}}
+          catsrcNamespace: {{.CATALOGSOURCE_NAMESPACE}}
+        # will add inputVars to make operator as parameters
+        waitOptions:
+          # only take one of customStatusPaths and kind, and both can use labelSelector
+          # 1, take customStatusPaths to check sub's status
+          customStatusPaths:
+            - key: ".state"
+              value: "AtLatestKnown"
+          # 2, take kind to check pod's status with label
+          # labelSelector: {app: migration}
+          # labelSelector: {control-plane: controller-manager}
+          # kind: Pod
+          # 3, take other field of sub and it does not work yet
+          # labelSelector: {kube-burner-job: operator-install-delete}
+          # customStatusPaths:
+          #   - key: ".installedCSV"
+          #     value: "oadp-operator.v1.4.2"
diff --git a/tests-extension/test/qe/util/stress/manifests/config/pkg-ins/templates/og.yml b/tests-extension/test/qe/util/stress/manifests/config/pkg-ins/templates/og.yml
new file mode 100644
index 0000000000..d9d9ba3d1e
--- /dev/null
+++ b/tests-extension/test/qe/util/stress/manifests/config/pkg-ins/templates/og.yml
@@ -0,0 +1,7 @@
+kind: OperatorGroup
+apiVersion: operators.coreos.com/v1
+metadata:
+  name: og
+spec:
+  targetNamespaces:
+  - "{{.prefixNamespace}}-{{.Iteration}}"
diff --git a/tests-extension/test/qe/util/stress/manifests/config/pkg-ins/templates/sub.yml b/tests-extension/test/qe/util/stress/manifests/config/pkg-ins/templates/sub.yml
new file mode 100644
index 0000000000..4761d85fe7
--- /dev/null
+++ b/tests-extension/test/qe/util/stress/manifests/config/pkg-ins/templates/sub.yml
@@ -0,0 +1,10 @@
+apiVersion: operators.coreos.com/v1alpha1
+kind: Subscription
+metadata:
+  name: {{.pkgName}}
+spec:
+  channel: {{.channelName}}
+  installPlanApproval: automatic
+  name: {{.pkgName}}
+  source: {{.catsrcName}}
+  sourceNamespace: {{.catsrcNamespace}}
diff --git a/tests-extension/test/qe/util/stress/util/ma.py b/tests-extension/test/qe/util/stress/util/ma.py
new file mode 100755
index 0000000000..976a9fed83
--- /dev/null
+++ b/tests-extension/test/qe/util/stress/util/ma.py
@@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+
+from ma.cli.__main__ import main
+
+if __name__ == "__main__":
+    main()
diff --git a/tests-extension/test/qe/util/stress/util/ma/__init__.py b/tests-extension/test/qe/util/stress/util/ma/__init__.py
new file mode 100644
index 0000000000..1898849ec8
--- /dev/null
+++ b/tests-extension/test/qe/util/stress/util/ma/__init__.py
@@ -0,0 +1,10 @@
+import os
+import sys
+
+if sys.version_info < (3, 9):
+    sys.exit("Sorry, Python < 3.9 is no longer supported.")
+
+sys.dont_write_bytecode = True
+
+def version():
+    return "0.1"
diff --git a/tests-extension/test/qe/util/stress/util/ma/cli/__init__.py b/tests-extension/test/qe/util/stress/util/ma/cli/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests-extension/test/qe/util/stress/util/ma/cli/__main__.py b/tests-extension/test/qe/util/stress/util/ma/cli/__main__.py
new file mode 100644
index 0000000000..f201dfbedb
--- /dev/null
+++ b/tests-extension/test/qe/util/stress/util/ma/cli/__main__.py
@@ -0,0 +1,8 @@
+from ma.cli.cmd_group import cli
+
+
+def main():
+    try:
+        cli(obj={})
+    except Exception as e:
+        raise SystemExit(e)
diff --git a/tests-extension/test/qe/util/stress/util/ma/cli/cmd_check_ccpu.py b/tests-extension/test/qe/util/stress/util/ma/cli/cmd_check_ccpu.py
new file mode 100644
index 0000000000..c18a63179d
--- /dev/null
+++ b/tests-extension/test/qe/util/stress/util/ma/cli/cmd_check_ccpu.py
@@ -0,0 +1,98 @@
+import click
+import logging
+from ma.helper.containercpu import ContainerCPU
+
+logger = logging.getLogger(__name__)
+
+
+@click.command()
+@click.pass_context
+@click.option(
+    "-i",
+    "--metrics_result_file",
+    type=click.Path(
+        exists=True,
+        file_okay=True,
+        dir_okay=False,
+        readable=True,
+        resolve_path=True
+    ),
+    required=True,
+    help="the result file of the metrics"
+)
+@click.option(
+    "-o",
+    "--output_dir",
+    default="./",
+    type=click.Path(
+        exists=True,
+        file_okay=False,
+        dir_okay=True,
+        readable=True,
+        resolve_path=True
+    ),
+    help="the directory of generated metrics figure"
+)
+@click.option(
+    "--zscore_threshold",
+    type=int,
+    default=3,
+    required=False,
+    help="the threshold for z-score"
+)
+@click.option(
+    "--window_size",
+    type=int,
+    default=18,
+    required=False,
+    help="the size of moving window"
+)
+@click.option(
+    "--window_threshold",
+    type=int,
+    default=3,
+    required=False,
+    help="the threshold of moving window"
+)
+@click.option(
+    "--watermark",
+    type=int,
+    default=20,
+    required=False,
+    help="the abnoarm cpu usage calucalted by 100"
+)
+@click.option(
+    "--anomalies_threshold",
+    type=int,
+    default=2,
+    required=False,
+    help="the anomalies threshold to determine if the checking fails or not"
+)
+def check_ccpu(ctx,
+    metrics_result_file,
+    output_dir,
+    zscore_threshold,
+    window_size,
+    window_threshold,
+    watermark,
+    anomalies_threshold):
+    """
+    Check if cpu usage is expected
+    """
+    try:
+        ccpu = ContainerCPU(metrics_result_file,
+            output_dir,
+            zscore_threshold,
+            window_size,
+            window_threshold,
+            watermark,
+            anomalies_threshold)
+        ccpu.handle()
+        ccpu.ok_or_not()
+        # print(ccpu.get_preliminary_screening())
+        # print(ccpu.get_refinement_screening())
+        # print(ccpu.get_final_screening())
+    except Exception as e:
+        logger.exception("checking container cpu failing")
+        raise
+
diff --git a/tests-extension/test/qe/util/stress/util/ma/cli/cmd_group.py b/tests-extension/test/qe/util/stress/util/ma/cli/cmd_group.py
new file mode 100644
index 0000000000..94bfc575ea
--- /dev/null
+++ b/tests-extension/test/qe/util/stress/util/ma/cli/cmd_group.py
@@ -0,0 +1,47 @@
+import click
+import logging
+import sys
+from ma.cli.cmd_check_ccpu import check_ccpu
+import ma.helper.util as util
+from ma import version
+from ma.helper.const import CONTEXT_SETTINGS
+
+logger = logging.getLogger(__name__)
+
+
+def print_version(ctx, param, value):
+    if not value or ctx.resilient_parsing:
+        return
+    click.echo("ma v{}".format(version()))
+    click.echo("python v{}".format(sys.version))
+    ctx.exit()
+
+
+@click.group(context_settings=CONTEXT_SETTINGS)
+@click.pass_context
+@click.option(
+    "-V",
+    "--version",
+    is_flag=True,
+    callback=print_version,
+    expose_value=False,
+    is_eager=True,
+)
+@click.option(
+    "-v",
+    "--debug",
+    help="enable debug logging",
+    is_flag=True,
+    default=False)
+def cli(ctx, debug):
+    util.init_logging(logging.DEBUG if debug else logging.INFO)
+    is_help = False
+    for k in CONTEXT_SETTINGS["help_option_names"]:
+        if k in sys.argv:
+            is_help = True
+            break
+    if not is_help:
+        logger.info("start to handle sub command")
+        pass
+
+cli.add_command(check_ccpu)
diff --git a/tests-extension/test/qe/util/stress/util/ma/helper/__init__.py b/tests-extension/test/qe/util/stress/util/ma/helper/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests-extension/test/qe/util/stress/util/ma/helper/algo.py b/tests-extension/test/qe/util/stress/util/ma/helper/algo.py
new file mode 100644
index 0000000000..13985f83ae
--- /dev/null
+++ b/tests-extension/test/qe/util/stress/util/ma/helper/algo.py
@@ -0,0 +1,44 @@
+import logging
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+def z_score(timestamps, values, threshold):
+    # (default threshold = 3)
+    preliminary_anomalies = []
+    mean = np.mean(values)
+    std = np.std(values)
+    logger.info(f"mean: {mean}, std: {std}")
+    for i, value in enumerate(values):
+        if std == 0:
+            preliminary_anomalies.append((timestamps[i], value))
+            continue
+        if value > mean + threshold * std:
+            preliminary_anomalies.append((timestamps[i], value))
+    return preliminary_anomalies
+
+
+def moving_window_statistics (preliminary_anomalies, timestamps, values, window_size, window_threshold):
+    refined_anomalies = []
+    # Refinement with moving window (default window_size = 30 points, default threshold = 3)
+    logger.info(f"window_size: {window_size}, window_threshold: {window_threshold}")
+    for ts, value in preliminary_anomalies:
+        index = timestamps.index(ts)
+        if index < window_size:
+            continue
+        window = values[index - window_size : index]
+        window_mean = np.mean(window)
+        window_std = np.std(window)
+        if window_std == 0:
+            refined_anomalies.append((ts, value))
+            continue
+        if value > window_mean + window_threshold * window_std:
+            refined_anomalies.append((ts, value))
+    return refined_anomalies
+
+def watermark(refined_anomalies, watermark):
+    anomalies = []
+    for ts, value in refined_anomalies:
+        if value > watermark:
+            anomalies.append((ts, value))
+    return anomalies
diff --git a/tests-extension/test/qe/util/stress/util/ma/helper/const.py b/tests-extension/test/qe/util/stress/util/ma/helper/const.py
new file mode 100644
index 0000000000..f220707d7a
--- /dev/null
+++ b/tests-extension/test/qe/util/stress/util/ma/helper/const.py
@@ -0,0 +1,6 @@
+CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])
+
+# draw figure
+FIGURE_WIDTH = 40
+FIGURE_HEIGHT = 10
+
diff --git a/tests-extension/test/qe/util/stress/util/ma/helper/containercpu.py b/tests-extension/test/qe/util/stress/util/ma/helper/containercpu.py
new file mode 100644
index 0000000000..08c454a1f6
--- /dev/null
+++ b/tests-extension/test/qe/util/stress/util/ma/helper/containercpu.py
@@ -0,0 +1,173 @@
+import json
+import os
+import logging
+from datetime import datetime
+from pathlib import Path
+import ma.helper.util as util
+import ma.helper.algo as algo
+from ma.helper.exceptions import ContainerCPUException
+
+logger = logging.getLogger(__name__)
+
+
+class ContainerCPU:
+    """
+    Container CPU object is used to check if the Container CPU usage is expected.
+
+    """
+
+    def __init__(self, metrics_result_file, output_dir, zscore_threshold, window_size, window_threshold, watermark, anomalies_threshold):
+        self.mrf = metrics_result_file
+        self.odir = output_dir
+        self.zscore_threshold = zscore_threshold
+        self.window_size = window_size
+        self.window_threshold = window_threshold
+        self.watermark = watermark
+        self.anomalies_threshold = anomalies_threshold
+        self.preliminary_anomalies = []
+        self.refined_anomalies = []
+        self.final_anomalies = []
+
+        try:
+            self.base_name = os.path.basename(self.mrf)
+            self.base_name_wo_ext, self.base_name_ext = os.path.splitext(self.base_name)
+        except BaseException as re:
+            raise ContainerCPUException("parse the file path failed") from re
+
+        try:
+            with open(self.mrf) as f:
+                self.data = json.load(f)
+            self.values =     [d['value'] for d in self.data]
+            self.timestamps = [d['timestamp'] for d in self.data]
+            self.timestamps_format = [datetime.strptime(d['timestamp'], "%Y-%m-%dT%H:%M:%S.%fZ") for d in self.data]
+
+        except BaseException as re:
+            raise ContainerCPUException("load ccpu metric data failed") from re
+
+    def handle(self):
+        self.tendency_chart()
+        self.preliminary_screening()
+        self.refinement_screening()
+        self.final_screening()
+        self.convert_preliminary_screening()
+        self.convert_refinement_screening()
+        self.convert_final_screening()
+
+    def tendency_chart(self):
+        """
+        it draws the cpu usage figure and save it as pdf
+        """
+        try:
+            util.draw_figure(
+                self.timestamps_format,
+                self.values,
+                self.odir,
+                self.base_name_wo_ext
+            )
+        except BaseException as re:
+            raise ContainerCPUException("drawing pdf failed") from re
+
+
+    def preliminary_screening(self):
+        """
+        it uses a simple, fast method (e.g., Z-Score) to flag potential anomalies
+        """
+        try:
+            self.preliminary_anomalies = algo.z_score(
+                self.timestamps,
+                self.values,
+                self.zscore_threshold)
+        except BaseException as re:
+            raise ContainerCPUException("preliminary screening failed") from re
+
+    def refinement_screening(self):
+        """
+        it applyes stricter, context-aware rules (e.g., moving window statistics) to 
+        validate whether the candidates flagged in preliminary_screening are true anomalies
+        """
+        try:
+            self.refined_anomalies = algo.moving_window_statistics(
+                self.preliminary_anomalies,
+                self.timestamps,
+                self.values,
+                self.window_size,
+                self.window_threshold)
+        except BaseException as re:
+            raise ContainerCPUException("refinement screening failed") from re
+
+    def final_screening(self):
+        try:
+            self.final_anomalies = algo.watermark(
+                self.refined_anomalies,
+                self.watermark)
+        except BaseException as re:
+            raise ContainerCPUException("final screening failed") from re
+
+    def get_preliminary_screening(self):
+        """
+        it gets the result of a simple, fast method (e.g., Z-Score) to flag potential anomalies
+        """
+        return self.preliminary_anomalies
+
+    def get_refinement_screening(self):
+        """
+        it get results of refinement screening
+        """
+        return self.refined_anomalies
+
+    def get_final_screening(self):
+        """
+        it get results of final screening
+        """
+        return self.final_anomalies
+
+    def convert_preliminary_screening(self):
+        """
+        it converts the result of a simple, fast method (e.g., Z-Score) to flag potential anomalies
+        """
+        try:
+            util.convert_screening(
+                self.preliminary_anomalies,
+                os.path.join(self.odir, self.base_name_wo_ext) + "_prescr.json"
+            )
+        except BaseException as re:
+            raise ContainerCPUException("convert preliminary screening failed") from re
+
+
+    def convert_refinement_screening(self):
+        """
+        it converts results of refinement screening
+        """
+        try:
+            util.convert_screening(
+                self.refined_anomalies,
+                os.path.join(self.odir, self.base_name_wo_ext) + "_refscr.json"
+            )
+        except BaseException as re:
+            raise ContainerCPUException("convert refinement screening failed") from re
+
+    def convert_final_screening(self):
+        """
+        it converts results of final screening
+        """
+        try:
+            util.convert_screening(
+                self.final_anomalies,
+                os.path.join(self.odir, self.base_name_wo_ext) + "_finscr.json"
+            )
+        except BaseException as re:
+            raise ContainerCPUException("convert final screening failed") from re
+
+    def ok_or_not(self):
+        """
+        it reports if the result is ok
+        """
+        try:
+            base_path = os.path.join(self.odir, self.base_name_wo_ext)
+            result = "pass"
+            if len(self.final_anomalies) > self.anomalies_threshold:
+                result = "fail"
+            output_path = Path(base_path+"_result-"+result)
+            output_path.write_text(result)
+        except BaseException as re:
+            raise ContainerCPUException("check result failed") from re
\ No newline at end of file
diff --git a/tests-extension/test/qe/util/stress/util/ma/helper/exceptions.py b/tests-extension/test/qe/util/stress/util/ma/helper/exceptions.py
new file mode 100644
index 0000000000..17feb001fd
--- /dev/null
+++ b/tests-extension/test/qe/util/stress/util/ma/helper/exceptions.py
@@ -0,0 +1,2 @@
+class ContainerCPUException(BaseException):
+    """Exception class to raise error in ContainerCPU"""
diff --git a/tests-extension/test/qe/util/stress/util/ma/helper/util.py b/tests-extension/test/qe/util/stress/util/ma/helper/util.py
new file mode 100644
index 0000000000..29fc4bc560
--- /dev/null
+++ b/tests-extension/test/qe/util/stress/util/ma/helper/util.py
@@ -0,0 +1,47 @@
+import logging
+import json
+import os
+import matplotlib.pyplot as plt
+from ma.helper.const import *
+from pathlib import Path
+
+
+
+def init_logging(log_level=logging.INFO):
+    logging.basicConfig(
+        # format="%(module)s: %(asctime)s: %(levelname)s: %(message)s",
+        format="%(asctime)s: %(levelname)s: %(message)s",
+        datefmt="%Y-%m-%dT%H:%M:%SZ",
+        level=log_level,
+    )
+
+    loggers = logging.Logger.manager.loggerDict
+    for k in loggers.keys():
+        if "requests" in k or "urllib3" in k or "gssapi" in k:
+            logger = logging.getLogger(k)
+            logger.setLevel(logging.WARNING)
+        if "requests_kerberos" in k:
+            logger = logging.getLogger(k)
+            logger.setLevel(logging.CRITICAL)
+
+def draw_figure(timestamps_format, values, odir, base_name_wo_ext):
+    plt.figure(figsize=(FIGURE_WIDTH, FIGURE_HEIGHT))
+    plt.plot(timestamps_format, values, marker='o')
+    # plt.xticks(rotation=45)
+    plt.ylabel('CPU Usage')
+    plt.title('CPU Usage Over Time')
+    plt.tight_layout()
+    saved_file_wo_ext = os.path.join(odir, base_name_wo_ext)
+    plt.savefig(saved_file_wo_ext+"_figure.pdf")
+    plt.close()
+
+def convert_screening(anomalies, file):
+    formatted_data = [
+        {"timestamp": ts, "value": round(val, 15)}
+        for ts, val in anomalies
+    ]
+
+    output_path = Path(file)
+    output_path.write_text(
+        json.dumps(formatted_data, indent=2, ensure_ascii=False)
+    )