-
Notifications
You must be signed in to change notification settings - Fork 106
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
RHOBS-995: Simplify cluster:capacity_effective_cpu_cores, add tests
Simplify by dividing all x86_64 cpu counts by 2. Note that this takes advantage of the way that the SKUs are structured, where the capacity is written as multiples of "2 cores or 4vCPUs". One difference in how this simplification works is that with nodes reporting more than 2 threads-per-core will be counted by CPUs, rather than by cores. When exactly 2 threads-per-core are reported, there is no functional difference, as node_role_os_version_machine:cpu_capacity_cores:sum already divides CPUs by 2. This adds testing similar to what's in cluster-monitoring-operator, covering only the `cluster:capacity_effective_cpu_cores` rule. I had to update the prometheus version, as promtool was too old and incorrectly flagging existing rules. I added a note about rule tests to the README. I did not update the prow config, because I don't know where to, but happy to do an update for that given some hints.
- Loading branch information
Showing
4 changed files
with
147 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
rule_files: | ||
- ../tmp/rules.yaml | ||
|
||
evaluation_interval: 1m | ||
|
||
tests: | ||
# cluster:capacity_effective_cpu_cores tests | ||
- input_series: | ||
# amd64 worker node | ||
- series: 'cluster:capacity_cpu_cores:sum{label_node_openshift_io_os_id="rhcos",label_kubernetes_io_arch="amd64",_id="amd64_worker",tenant_id="tenant_id"}' | ||
values: '4' | ||
- series: 'cluster:cpu_capacity_cores:_id{_id="amd64_worker",tenant_id="tenant_id"}' | ||
values: '0' | ||
# non-amd64 worker node | ||
- series: 'cluster:capacity_cpu_cores:sum{label_node_openshift_io_os_id="rhcos",label_kubernetes_io_arch="arm64",_id="nonamd64_worker",tenant_id="tenant_id"}' | ||
values: '4' | ||
- series: 'cluster:cpu_capacity_cores:_id{_id="nonamd64_worker",tenant_id="tenant_id"}' | ||
values: '0' | ||
# amd64 schedulable control plane node | ||
- series: 'cluster:capacity_cpu_cores:sum{label_node_openshift_io_os_id="rhcos",label_kubernetes_io_arch="amd64",label_node_role_kubernetes_io="master",_id="amd64_control_plane_schedulable",tenant_id="tenant_id"}' | ||
values: '4' | ||
- series: 'cluster:cpu_capacity_cores:_id{_id="amd64_control_plane_schedulable",tenant_id="tenant_id"}' | ||
values: '0' | ||
- series: 'cluster_master_schedulable{_id="amd64_control_plane_schedulable",tenant_id="tenant_id"}' | ||
values: '1' | ||
# non-amd64 schedulable control plane node | ||
- series: 'cluster:capacity_cpu_cores:sum{label_node_openshift_io_os_id="rhcos",label_kubernetes_io_arch="arm64",label_node_role_kubernetes_io="master",_id="non-amd64_control_plane_schedulable",tenant_id="tenant_id"}' | ||
values: '4' | ||
- series: 'cluster:cpu_capacity_cores:_id{_id="non-amd64_control_plane_schedulable",tenant_id="tenant_id"}' | ||
values: '0' | ||
- series: 'cluster_master_schedulable{_id="non-amd64_control_plane_schedulable",tenant_id="tenant_id"}' | ||
values: '1' | ||
# amd64 non-schedulable control plane node | ||
- series: 'cluster:capacity_cpu_cores:sum{label_node_openshift_io_os_id="rhcos",label_kubernetes_io_arch="amd64",label_node_role_kubernetes_io="master",_id="amd64_control_plane_non_schedulable",tenant_id="tenant_id"}' | ||
values: '4' | ||
- series: 'cluster:cpu_capacity_cores:_id{_id="amd64_control_plane_non_schedulable",tenant_id="tenant_id"}' | ||
values: '0' | ||
- series: 'cluster_master_schedulable{_id="amd64_control_plane_non_schedulable",tenant_id="tenant_id"}' | ||
values: '0' | ||
# amd64 infra node | ||
- series: 'cluster:capacity_cpu_cores:sum{label_node_openshift_io_os_id="rhcos",label_kubernetes_io_arch="amd64",label_node_role_kubernetes_io="infra",_id="amd64_infra",tenant_id="tenant_id"}' | ||
values: '4' | ||
- series: 'cluster:cpu_capacity_cores:_id{_id="amd64_infra",tenant_id="tenant_id"}' | ||
values: '0' | ||
- series: 'cluster_master_schedulable{_id="amd64_infra",tenant_id="tenant_id"}' | ||
values: '1' | ||
# non-amd64 non-schedulable control plane node | ||
- series: 'cluster:capacity_cpu_cores:sum{label_node_openshift_io_os_id="rhcos",label_kubernetes_io_arch="arm64",label_node_role_kubernetes_io="master",_id="non-amd64_control_plane_non_schedulable",tenant_id="tenant_id"}' | ||
values: '4' | ||
- series: 'cluster:cpu_capacity_cores:_id{_id="non-amd64_control_plane_non_schedulable",tenant_id="tenant_id"}' | ||
values: '0' | ||
- series: 'cluster_master_schedulable{_id="non-amd64_control_plane_non_schedulable",tenant_id="tenant_id"}' | ||
values: '0' | ||
# non-amd64 infra node | ||
- series: 'cluster:capacity_cpu_cores:sum{label_node_openshift_io_os_id="rhcos",label_kubernetes_io_arch="amd64",label_node_role_kubernetes_io="infra",_id="non-amd64_infra",tenant_id="tenant_id"}' | ||
values: '4' | ||
- series: 'cluster:cpu_capacity_cores:_id{_id="non-amd64_infra",tenant_id="tenant_id"}' | ||
values: '0' | ||
- series: 'cluster_master_schedulable{_id="non-amd64_infra",tenant_id="tenant_id"}' | ||
values: '1' | ||
promql_expr_test: | ||
# amd64 worker (divided by 2) | ||
- expr: cluster:capacity_effective_cpu_cores{_id="amd64_worker"} | ||
eval_time: 0 | ||
exp_samples: | ||
- labels: 'cluster:capacity_effective_cpu_cores{_id="amd64_worker",tenant_id="tenant_id"}' | ||
value: 2 | ||
# non-amd64 worker (not adjusted) | ||
- expr: cluster:capacity_effective_cpu_cores{_id="nonamd64_worker"} | ||
eval_time: 0 | ||
exp_samples: | ||
- labels: 'cluster:capacity_effective_cpu_cores{_id="nonamd64_worker",tenant_id="tenant_id"}' | ||
value: 4 | ||
# amd64 schedulable control plane node (divided by 2) | ||
- expr: cluster:capacity_effective_cpu_cores{_id="amd64_control_plane_schedulable"} | ||
eval_time: 0 | ||
exp_samples: | ||
- labels: 'cluster:capacity_effective_cpu_cores{_id="amd64_control_plane_schedulable",tenant_id="tenant_id"}' | ||
value: 2 | ||
# non-amd64 schedulable control plane node (not adjusted) | ||
- expr: cluster:capacity_effective_cpu_cores{_id="non-amd64_control_plane_schedulable"} | ||
eval_time: 0 | ||
exp_samples: | ||
- labels: 'cluster:capacity_effective_cpu_cores{_id="non-amd64_control_plane_schedulable",tenant_id="tenant_id"}' | ||
value: 4 | ||
# amd64 non-schedulable control plane node excluded | ||
- expr: cluster:capacity_effective_cpu_cores{_id="amd64_control_plane_non_schedulable"} | ||
eval_time: 0 | ||
exp_samples: | ||
- labels: 'cluster:capacity_effective_cpu_cores{_id="amd64_control_plane_non_schedulable",tenant_id="tenant_id"}' | ||
value: 0 | ||
# amd64 infra excluded | ||
- expr: cluster:capacity_effective_cpu_cores{_id="amd64_infra"} | ||
eval_time: 0 | ||
exp_samples: | ||
- labels: 'cluster:capacity_effective_cpu_cores{_id="amd64_infra",tenant_id="tenant_id"}' | ||
value: 0 | ||
# non-amd64 non-schedulable control plane node excluded | ||
- expr: cluster:capacity_effective_cpu_cores{_id="non-amd64_control_plane_non_schedulable"} | ||
eval_time: 0 | ||
exp_samples: | ||
- labels: 'cluster:capacity_effective_cpu_cores{_id="non-amd64_control_plane_non_schedulable",tenant_id="tenant_id"}' | ||
value: 0 | ||
# non-amd64 infra not included | ||
- expr: cluster:capacity_effective_cpu_cores{_id="non-amd64_infra"} | ||
eval_time: 0 | ||
exp_samples: | ||
- labels: 'cluster:capacity_effective_cpu_cores{_id="non-amd64_infra",tenant_id="tenant_id"}' | ||
value: 0 |