Skip to content

Commit

Permalink
Merge pull request #28747 from dgoodwin/revert-28741-1714144074991
Browse files Browse the repository at this point in the history
TRT-1625: Revert #28741 "OCPBUGS-32923: Fix allowed firing alerts not triggering"
  • Loading branch information
openshift-merge-bot[bot] committed Apr 26, 2024
2 parents d015548 + 817d444 commit b04fe1a
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 200 deletions.
54 changes: 26 additions & 28 deletions pkg/alerts/allowed_conformance.go
Expand Up @@ -2,63 +2,61 @@ package alerts

import (
configv1 "github.com/openshift/api/config/v1"
helper "github.com/openshift/origin/test/extended/util/prometheus"
)

// AllowedAlertsDuringConformance lists all alerts that are allowed to be pending or firing during
// conformance testing.
// WARNING: there is a parallel list for allowed alerts during upgrade in allowed_upgrade.go,
// ensure that alerts we want to allow in both are added to both.
func AllowedAlertsDuringConformance(featureSet configv1.FeatureSet) (allowedFiringWithBugs, allowedFiring, allowedPendingWithBugs, allowedPending MetricConditions) {
func AllowedAlertsDuringConformance(featureSet configv1.FeatureSet) (allowedFiringWithBugs, allowedFiring, allowedPendingWithBugs, allowedPending helper.MetricConditions) {

firingAlertsWithBugs := MetricConditions{}
allowedFiringAlerts := MetricConditions{
firingAlertsWithBugs := helper.MetricConditions{}
allowedFiringAlerts := helper.MetricConditions{
{
AlertName: "TargetDown",
AlertNamespace: "openshift-e2e-loki",
Text: "Loki is nice to have, but we can allow it to be down",
Selector: map[string]string{"alertname": "TargetDown", "namespace": "openshift-e2e-loki"},
Text: "Loki is nice to have, but we can allow it to be down",
},
{
AlertName: "KubePodNotReady",
AlertNamespace: "openshift-e2e-loki",
Text: "Loki is nice to have, but we can allow it to be down",
Selector: map[string]string{"alertname": "KubePodNotReady", "namespace": "openshift-e2e-loki"},
Text: "Loki is nice to have, but we can allow it to be down",
},
{
AlertName: "KubeDeploymentReplicasMismatch",
AlertNamespace: "openshift-e2e-loki",
Text: "Loki is nice to have, but we can allow it to be down",
Selector: map[string]string{"alertname": "KubeDeploymentReplicasMismatch", "namespace": "openshift-e2e-loki"},
Text: "Loki is nice to have, but we can allow it to be down",
},
{
AlertName: "HighOverallControlPlaneCPU",
Text: "high CPU utilization during e2e runs is normal",
Selector: map[string]string{"alertname": "HighOverallControlPlaneCPU"},
Text: "high CPU utilization during e2e runs is normal",
},
{
AlertName: "ExtremelyHighIndividualControlPlaneCPU",
Text: "high CPU utilization during e2e runs is normal",
Selector: map[string]string{"alertname": "ExtremelyHighIndividualControlPlaneCPU"},
Text: "high CPU utilization during e2e runs is normal",
},
}
pendingAlertsWithBugs := MetricConditions{}
allowedPendingAlerts := MetricConditions{
pendingAlertsWithBugs := helper.MetricConditions{}
allowedPendingAlerts := helper.MetricConditions{
{
AlertName: "HighOverallControlPlaneCPU",
Text: "high CPU utilization during e2e runs is normal",
Selector: map[string]string{"alertname": "HighOverallControlPlaneCPU"},
Text: "high CPU utilization during e2e runs is normal",
},
{
AlertName: "ExtremelyHighIndividualControlPlaneCPU",
Text: "high CPU utilization during e2e runs is normal",
Selector: map[string]string{"alertname": "ExtremelyHighIndividualControlPlaneCPU"},
Text: "high CPU utilization during e2e runs is normal",
},
}

switch featureSet {
case configv1.TechPreviewNoUpgrade:
allowedFiringAlerts = append(
allowedFiringAlerts,
MetricCondition{
AlertName: "TechPreviewNoUpgrade",
Text: "Allow testing of TechPreviewNoUpgrade clusters, this will only fire when a FeatureGate has been enabled",
helper.MetricCondition{
Selector: map[string]string{"alertname": "TechPreviewNoUpgrade"},
Text: "Allow testing of TechPreviewNoUpgrade clusters, this will only fire when a FeatureGate has been enabled",
},
MetricCondition{
AlertName: "ClusterNotUpgradeable",
Text: "Allow testing of ClusterNotUpgradeable clusters, this will only fire when a FeatureGate has been enabled",
helper.MetricCondition{
Selector: map[string]string{"alertname": "ClusterNotUpgradeable"},
Text: "Allow testing of ClusterNotUpgradeable clusters, this will only fire when a FeatureGate has been enabled",
})
}

Expand Down
42 changes: 20 additions & 22 deletions pkg/alerts/allowed_upgrade.go
Expand Up @@ -2,52 +2,50 @@ package alerts

import (
configv1 "github.com/openshift/api/config/v1"
helper "github.com/openshift/origin/test/extended/util/prometheus"
)

// AllowedAlertsDuringUpgrade lists all alerts that are allowed to be pending or firing during
// upgrade.
// WARNING: there is a parallel list for allowed alerts during conformance in allowed_conformance.go,
// ensure that alerts we want to allow in both are added to both.
func AllowedAlertsDuringUpgrade(featureSet configv1.FeatureSet) (allowedFiringWithBugs, allowedFiring, allowedPendingWithBugs, allowedPending MetricConditions) {
func AllowedAlertsDuringUpgrade(featureSet configv1.FeatureSet) (allowedFiringWithBugs, allowedFiring, allowedPendingWithBugs, allowedPending helper.MetricConditions) {

firingAlertsWithBugs := MetricConditions{}
firingAlertsWithBugs := helper.MetricConditions{}

allowedFiringAlerts := MetricConditions{
allowedFiringAlerts := helper.MetricConditions{
{
AlertName: "TargetDown",
AlertNamespace: "openshift-e2e-loki",
Text: "Loki is nice to have, but we can allow it to be down",
Selector: map[string]string{"alertname": "TargetDown", "namespace": "openshift-e2e-loki"},
Text: "Loki is nice to have, but we can allow it to be down",
},
{
AlertName: "KubePodNotReady",
AlertNamespace: "openshift-e2e-loki",
Text: "Loki is nice to have, but we can allow it to be down",
Selector: map[string]string{"alertname": "KubePodNotReady", "namespace": "openshift-e2e-loki"},
Text: "Loki is nice to have, but we can allow it to be down",
},
{
AlertName: "KubeDeploymentReplicasMismatch",
AlertNamespace: "openshift-e2e-loki",
Text: "Loki is nice to have, but we can allow it to be down",
Selector: map[string]string{"alertname": "KubeDeploymentReplicasMismatch", "namespace": "openshift-e2e-loki"},
Text: "Loki is nice to have, but we can allow it to be down",
},
}
pendingAlertsWithBugs := MetricConditions{}
allowedPendingAlerts := MetricConditions{
pendingAlertsWithBugs := helper.MetricConditions{}
allowedPendingAlerts := helper.MetricConditions{
{
AlertName: "etcdMemberCommunicationSlow",
Text: "Excluded because it triggers during upgrade (detects ~5m of high latency immediately preceeding the end of the test), and we don't want to change the alert because it is correct",
Selector: map[string]string{"alertname": "etcdMemberCommunicationSlow"},
Text: "Excluded because it triggers during upgrade (detects ~5m of high latency immediately preceeding the end of the test), and we don't want to change the alert because it is correct",
},
}

switch featureSet {
case configv1.TechPreviewNoUpgrade:
allowedFiringAlerts = append(
allowedFiringAlerts,
MetricCondition{
AlertName: "TechPreviewNoUpgrade",
Text: "Allow testing of TechPreviewNoUpgrade clusters, this will only fire when a FeatureGate has been enabled",
helper.MetricCondition{
Selector: map[string]string{"alertname": "TechPreviewNoUpgrade"},
Text: "Allow testing of TechPreviewNoUpgrade clusters, this will only fire when a FeatureGate has been enabled",
},
MetricCondition{
AlertName: "ClusterNotUpgradeable",
Text: "Allow testing of ClusterNotUpgradeable clusters, this will only fire when a FeatureGate has been enabled",
helper.MetricCondition{
Selector: map[string]string{"alertname": "ClusterNotUpgradeable"},
Text: "Allow testing of ClusterNotUpgradeable clusters, this will only fire when a FeatureGate has been enabled",
})
}

Expand Down
45 changes: 0 additions & 45 deletions pkg/alerts/types.go

This file was deleted.

Expand Up @@ -6,7 +6,6 @@ import (
"strings"
"time"

"github.com/openshift/origin/pkg/alerts"
"github.com/openshift/origin/pkg/monitortestframework"
"github.com/openshift/origin/pkg/monitortestlibrary/allowedalerts"
"github.com/openshift/origin/pkg/monitortestlibrary/historicaldata"
Expand All @@ -15,9 +14,11 @@ import (
configv1 "github.com/openshift/api/config/v1"
"github.com/sirupsen/logrus"

configv1client "github.com/openshift/client-go/config/clientset/versioned"
"github.com/openshift/origin/pkg/monitor/monitorapi"
"github.com/openshift/origin/pkg/test/ginkgo/junitapi"
helper "github.com/openshift/origin/test/extended/util/prometheus"

configv1client "github.com/openshift/client-go/config/clientset/versioned"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
Expand All @@ -26,7 +27,7 @@ import (
"k8s.io/kubernetes/test/e2e/framework"
)

type AllowedAlertsFunc func(featureSet configv1.FeatureSet) (allowedFiringWithBugs, allowedFiring, allowedPendingWithBugs, allowedPending alerts.MetricConditions)
type AllowedAlertsFunc func(featureSet configv1.FeatureSet) (allowedFiringWithBugs, allowedFiring, allowedPendingWithBugs, allowedPending helper.MetricConditions)

func testAlerts(events monitorapi.Intervals,
allowancesFunc AllowedAlertsFunc,
Expand Down Expand Up @@ -142,23 +143,23 @@ func runBackstopTest(
case allowedalerts.AlertPending:
// a pending test covers pending and everything above (firing)
allowedPendingAlerts = append(allowedPendingAlerts,
alerts.MetricCondition{
AlertName: alertTest.AlertName(),
Text: "has a separate e2e test",
helper.MetricCondition{
Selector: map[string]string{"alertname": alertTest.AlertName()},
Text: "has a separate e2e test",
},
)
allowedFiringAlerts = append(allowedFiringAlerts,
alerts.MetricCondition{
AlertName: alertTest.AlertName(),
Text: "has a separate e2e test",
helper.MetricCondition{
Selector: map[string]string{"alertname": alertTest.AlertName()},
Text: "has a separate e2e test",
},
)
case allowedalerts.AlertInfo:
// an info test covers all firing
allowedFiringAlerts = append(allowedFiringAlerts,
alerts.MetricCondition{
AlertName: alertTest.AlertName(),
Text: "has a separate e2e test",
helper.MetricCondition{
Selector: map[string]string{"alertname": alertTest.AlertName()},
Text: "has a separate e2e test",
},
)
}
Expand Down
48 changes: 48 additions & 0 deletions test/extended/util/prometheus/helpers.go
Expand Up @@ -15,6 +15,7 @@ import (

g "github.com/onsi/ginkgo/v2"
o "github.com/onsi/gomega"
"github.com/openshift/origin/pkg/monitor/monitorapi"
exutil "github.com/openshift/origin/test/extended/util"
prometheusv1 "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/common/model"
Expand Down Expand Up @@ -150,6 +151,53 @@ func RequestPrometheusServiceAccountAPIToken(ctx context.Context, oc *exutil.CLI
return req.Status.Token, nil
}

type MetricCondition struct {
// TODO: Remove in favor of explicit fields
Selector map[string]string

AlertName string
AlertNamespace string
AlertLevel string

// Text is the description of why this alert condition matched.
Text string

Matches func(sample *model.Sample) bool
}

type MetricConditions []MetricCondition

func (c MetricConditions) Matches(sample *model.Sample) *MetricCondition {
for i, condition := range c {
matches := true
for name, value := range condition.Selector {
if sample.Metric[model.LabelName(name)] != model.LabelValue(value) {
matches = false
break
}
}
if matches && (condition.Matches == nil || condition.Matches(sample)) {
return &c[i]
}
}
return nil
}

func (c MetricConditions) MatchesInterval(alertInterval monitorapi.Interval) *MetricCondition {

// TODO: Source check for SourceAlert would be a good idea here.

checkAlertName := alertInterval.StructuredLocator.Keys[monitorapi.LocatorAlertKey]
checkAlertNamespace := alertInterval.StructuredLocator.Keys[monitorapi.LocatorNamespaceKey]

for _, condition := range c {
if checkAlertName == condition.AlertName && checkAlertNamespace == condition.AlertNamespace {
return &condition
}
}
return nil
}

func RunQuery(ctx context.Context, prometheusClient prometheusv1.API, query string) (*PrometheusResponse, error) {
return RunQueryAtTime(ctx, prometheusClient, query, time.Now())
}
Expand Down

0 comments on commit b04fe1a

Please sign in to comment.