diff --git a/hack/prom-rule-ci/prom-rules-tests.yaml b/hack/prom-rule-ci/prom-rules-tests.yaml index e9449cb7dcf2..01ef80b972f6 100644 --- a/hack/prom-rule-ci/prom-rules-tests.yaml +++ b/hack/prom-rule-ci/prom-rules-tests.yaml @@ -6,56 +6,6 @@ group_eval_order: - kubevirt.rules #information about this format can be found in: https://prometheus.io/docs/prometheus/latest/configuration/unit_testing_rules/ tests: - # Pod is using more CPU than expected - - interval: 1m - input_series: - - series: 'container_cpu_usage_seconds_total{namespace="ci",pod="virt-controller-8546c99968-x9jgg",node="node1"}' - values: '1+1x6' - - series: 'kube_pod_container_resource_requests{namespace="ci",container="virt-controller",resource="cpu",pod="virt-controller-8546c99968-x9jgg",node="node1"}' - values: '0+0x6' - - alert_rule_test: - - eval_time: 6m - alertname: KubeVirtComponentExceedsRequestedCPU - exp_alerts: - - exp_annotations: - description: "Pod virt-controller-8546c99968-x9jgg cpu usage exceeds the CPU requested" - summary: "The containers in the pod are using more CPU than what is defined in the containers resource requests" - runbook_url: "https://kubevirt.io/monitoring/runbooks/KubeVirtComponentExceedsRequestedCPU" - exp_labels: - severity: "warning" - operator_health_impact: "none" - kubernetes_operator_part_of: "kubevirt" - kubernetes_operator_component: "kubevirt" - pod: "virt-controller-8546c99968-x9jgg" - - # Pod is using more memory than expected - - interval: 1m - input_series: - - series: 'container_memory_working_set_bytes{namespace="ci",container="",pod="virt-controller-8546c99968-x9jgg",node="node1"}' - values: "157286400+0x5" - - series: 'kube_pod_container_resource_requests{namespace="ci",container="virt-controller",resource="memory",pod="virt-controller-8546c99968-x9jgg",node="node1"}' - values: "118325248+0x5" - - alert_rule_test: - - eval_time: 5m - alertname: KubeVirtComponentExceedsRequestedMemory - exp_alerts: - - exp_annotations: - description: "Container virt-controller in pod virt-controller-8546c99968-x9jgg memory usage exceeds the memory requested" - summary: "The container is using more memory than what is defined in the containers resource requests" - runbook_url: "https://kubevirt.io/monitoring/runbooks/KubeVirtComponentExceedsRequestedMemory" - exp_labels: - severity: "warning" - operator_health_impact: "none" - kubernetes_operator_part_of: "kubevirt" - kubernetes_operator_component: "kubevirt" - namespace: ci - node: "node1" - pod: "virt-controller-8546c99968-x9jgg" - resource: "memory" - container: virt-controller - # Alerts to test whether our operators are up or not - interval: 1m input_series: diff --git a/pkg/virt-operator/resource/generate/components/prometheus.go b/pkg/virt-operator/resource/generate/components/prometheus.go index e03eda77a3d9..3334a53133df 100644 --- a/pkg/virt-operator/resource/generate/components/prometheus.go +++ b/pkg/virt-operator/resource/generate/components/prometheus.go @@ -422,39 +422,6 @@ func NewPrometheusRuleSpec(ns string) *v1.PrometheusRuleSpec { operatorHealthImpactLabelKey: "none", }, }, - { - Alert: "KubeVirtComponentExceedsRequestedMemory", - Expr: intstr.FromString( - // In 'container_memory_working_set_bytes', 'container=""' filters the accumulated metric for the pod slice to measure total Memory usage for all containers within the pod - fmt.Sprintf(`((kube_pod_container_resource_requests{namespace="%s",container=~"virt-controller|virt-api|virt-handler|virt-operator",resource="memory"}) - on(pod) group_left(node) container_memory_working_set_bytes{container="",namespace="%s"}) < 0`, ns, ns)), - For: "5m", - Annotations: map[string]string{ - "description": "Container {{ $labels.container }} in pod {{ $labels.pod }} memory usage exceeds the memory requested", - "summary": "The container is using more memory than what is defined in the containers resource requests", - "runbook_url": fmt.Sprintf(runbookURLTemplate, "KubeVirtComponentExceedsRequestedMemory"), - }, - Labels: map[string]string{ - severityAlertLabelKey: "warning", - operatorHealthImpactLabelKey: "none", - }, - }, - { - Alert: "KubeVirtComponentExceedsRequestedCPU", - Expr: intstr.FromString( - // In 'container_cpu_usage_seconds_total', 'container=""' filters the accumulated metric for the pod slice to measure total CPU usage for all containers within the pod - fmt.Sprintf(`((kube_pod_container_resource_requests{namespace="%s",container=~"virt-controller|virt-api|virt-handler|virt-operator",resource="cpu"}) - on(pod) sum(rate(container_cpu_usage_seconds_total{container="",namespace="%s"}[5m])) by (pod)) < 0`, ns, ns), - ), - For: "5m", - Annotations: map[string]string{ - "description": "Pod {{ $labels.pod }} cpu usage exceeds the CPU requested", - "summary": "The containers in the pod are using more CPU than what is defined in the containers resource requests", - "runbook_url": fmt.Sprintf(runbookURLTemplate, "KubeVirtComponentExceedsRequestedCPU"), - }, - Labels: map[string]string{ - severityAlertLabelKey: "warning", - operatorHealthImpactLabelKey: "none", - }, - }, { Alert: "KubeVirtVMIExcessiveMigrations", Expr: intstr.FromString("sum by (vmi) (max_over_time(kubevirt_vmi_migration_succeeded[1d])) >= 12"), diff --git a/tests/monitoring/component_monitoring.go b/tests/monitoring/component_monitoring.go index 96680acfb288..d655b32e1d15 100644 --- a/tests/monitoring/component_monitoring.go +++ b/tests/monitoring/component_monitoring.go @@ -249,35 +249,6 @@ var _ = Describe("[Serial][sig-monitoring]Component Monitoring", Serial, decorat }, 5*time.Minute, 500*time.Millisecond).Should(Succeed()) }) }) - - Context("Resource metrics", func() { - var resourceAlerts = []string{ - "KubeVirtComponentExceedsRequestedCPU", - "KubeVirtComponentExceedsRequestedMemory", - } - - BeforeEach(func() { - virtClient = kubevirt.Client() - scales = NewScaling(virtClient, []string{virtOperator.deploymentName}) - scales.UpdateScale(virtOperator.deploymentName, int32(0)) - reduceAlertPendingTime(virtClient) - }) - - AfterEach(func() { - scales.RestoreAllScales() - time.Sleep(10 * time.Second) - waitUntilAlertDoesNotExist(virtClient, resourceAlerts...) - }) - - It("KubeVirtComponentExceedsRequestedCPU should be triggered when virt-api exceeds requested CPU", func() { - By("updating virt-api deployment CPU and Memory requests") - updateDeploymentResourcesRequest(virtClient, virtApi.deploymentName, resource.MustParse("0m"), resource.MustParse("0Mi")) - - By("waiting for KubeVirtComponentExceedsRequestedCPU and KubeVirtComponentExceedsRequestedMemory alerts") - verifyAlertExist(virtClient, "KubeVirtComponentExceedsRequestedCPU") - verifyAlertExist(virtClient, "KubeVirtComponentExceedsRequestedMemory") - }) - }) }) func updateDeploymentResourcesRequest(virtClient kubecli.KubevirtClient, deploymentName string, cpu, memory resource.Quantity) {