Skip to content

Commit

Permalink
Fix wrong threshold number
Browse files Browse the repository at this point in the history
  • Loading branch information
mtodor committed Apr 16, 2024
1 parent a81dd5e commit 87f12f7
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 9 deletions.
2 changes: 1 addition & 1 deletion resources/prometheus/prometheus-rules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -690,7 +690,7 @@ spec:
description: "During the last 15 minutes, the average memory request commitment on worker nodes was {{ $value | humanizePercentage }}. This could make pods unschedulable."
sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-cloud-service/runbooks/-/blob/master/sops/dp-027-cluster-scale-up.md"
- alert: WorkerNodesCPUQuotaOverCommit
expr: avg(availability_zone:acscs_worker_nodes:cpu_request_ratio) > 0.1
expr: avg(availability_zone:acscs_worker_nodes:cpu_request_ratio) > 0.99
for: 15m
labels:
severity: warning
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,21 @@ tests:
- interval: 1m
input_series:
- series: kube_node_role{node="worker-1", role="acscs-worker"}
values: "1+0x20"
values: "1+0x40"
- series: kube_node_labels{node="worker-1", label_failure_domain_beta_kubernetes_io_zone="us-east-1a"}
values: "1+0x20"
values: "1+0x40"
- series: kube_node_status_allocatable{node="worker-1", resource="cpu", job="kube-state-metrics"}
values: "200+0x20"
values: "200+0x40"
- series: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{node="worker-1", resource="cpu", job="kube-state-metrics"}
values: "199+0x20"
values: "196+0x20 199+0x20"
alert_rule_test:
- eval_time: 1m
alertname: WorkerNodesCPUQuotaOverCommit
exp_alerts: []
- eval_time: 16m
alertname: WorkerNodesCPUQuotaOverCommit
exp_alerts: []
- eval_time: 36m
alertname: WorkerNodesCPUQuotaOverCommit
exp_alerts:
- exp_labels:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,21 @@ tests:
- interval: 1m
input_series:
- series: kube_node_role{node="worker-1", role="acscs-worker"}
values: "1+0x20"
values: "1+0x40"
- series: kube_node_labels{node="worker-1", label_failure_domain_beta_kubernetes_io_zone="us-east-1a"}
values: "1+0x20"
values: "1+0x40"
- series: kube_node_status_allocatable{node="worker-1", resource="memory", job="kube-state-metrics"}
values: "200+0x20"
values: "200+0x40"
- series: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{node="worker-1", resource="memory", job="kube-state-metrics"}
values: "199+0x20"
values: "196+0x20 199+0x20"
alert_rule_test:
- eval_time: 1m
alertname: WorkerNodesMemoryQuotaOverCommit
exp_alerts: []
- eval_time: 16m
alertname: WorkerNodesMemoryQuotaOverCommit
exp_alerts: []
- eval_time: 36m
alertname: WorkerNodesMemoryQuotaOverCommit
exp_alerts:
- exp_labels:
Expand Down

0 comments on commit 87f12f7

Please sign in to comment.