Skip to content

Commit

Permalink
Fix misleading domain memory metrics
Browse files Browse the repository at this point in the history
kubevirt_vmi_memory_used_total_bytes refers the amount of
memory declared in libvirt domain xml file. It is misleading.
We decided to rename it as "kubevirt_vmi_memory_domain_total_bytes".

We also think it is valuable to have a metric which gives
the amount of memory used in the VM. We define a new metric
"kubevirt_vmi_memory_used_bytes" for this purpose. It is computed as
"kubevirt_vmi_memory_available_bytes-kubevirt_vmi_memory_usable_bytes"

For all details, See  https://bugzilla.redhat.com/show_bug.cgi?id=2018925

Signed-off-by: Erkan Erol <eerol@redhat.com>
  • Loading branch information
Erkan Erol committed Dec 20, 2021
1 parent 13d494f commit c868381
Show file tree
Hide file tree
Showing 6 changed files with 51 additions and 4 deletions.
7 changes: 5 additions & 2 deletions docs/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ Current balloon bytes.
### kubevirt_vmi_memory_available_bytes
Amount of `usable` memory as seen by the domain.

### kubevirt_vmi_memory_domain_total_bytes
The amount of memory in bytes used by the domain.

### kubevirt_vmi_memory_pgmajfault
The number of page faults when disk IO was required.

Expand All @@ -48,8 +51,8 @@ Amount of `unused` memory as seen by the domain.
### kubevirt_vmi_memory_usable_bytes
The amount of memory which can be reclaimed by balloon without causing host swapping in bytes.

### kubevirt_vmi_memory_used_total_bytes
The amount of memory in bytes used by the domain.
### kubevirt_vmi_memory_used_bytes
Amount of `used` memory as seen by the domain.

### kubevirt_vmi_network_receive_bytes_total
Network traffic receive in bytes.
Expand Down
33 changes: 33 additions & 0 deletions hack/prom-rule-ci/prom-rules-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -432,3 +432,36 @@ tests:
- eval_time: 1m
alertname: VMCannotBeEvicted
exp_alerts: []

# Test recording rule
- interval: 1m
input_series:
# In reality there are many labels on these metrics
# they are the same except the ones containing vm name like "name" in the example below
- series: 'kubevirt_vmi_memory_available_bytes{container="virt-handler", name="vm-example-1", namespace="default", node="node-1"}'
# time: 0 1 2 3
values: "1376882688 1376882688 1376882688 1376882688"
- series: 'kubevirt_vmi_memory_available_bytes{container="virt-handler", name="vm-example-2", namespace="default", node="node-1"}'
# time: 0 1 2 3
values: "2893266944 2893266944 2893266944 2893266944"
- series: 'kubevirt_vmi_memory_usable_bytes{container="virt-handler", name="vm-example-1", namespace="default", node="node-1"}'
# time: 0 1 2 3
values: "1073176576 1073176576 1073176576 1273176576"
- series: 'kubevirt_vmi_memory_usable_bytes{container="virt-handler", name="vm-example-2", namespace="default", node="node-1"}'
# time: 0 1 2 3
values: "2448936960 2448936960 2448936960 2658936964"
promql_expr_test:
- expr: 'kubevirt_vmi_memory_used_bytes'
eval_time: 1m
exp_samples:
- labels: 'kubevirt_vmi_memory_used_bytes{container="virt-handler", name="vm-example-1", namespace="default", node="node-1"}'
value: 303706112
- labels: 'kubevirt_vmi_memory_used_bytes{container="virt-handler", name="vm-example-2", namespace="default", node="node-1"}'
value: 444329984
- expr: 'kubevirt_vmi_memory_used_bytes'
eval_time: 3m
exp_samples:
- labels: 'kubevirt_vmi_memory_used_bytes{container="virt-handler", name="vm-example-1", namespace="default", node="node-1"}'
value: 103706112
- labels: 'kubevirt_vmi_memory_used_bytes{container="virt-handler", name="vm-example-2", namespace="default", node="node-1"}'
value: 234329980
2 changes: 1 addition & 1 deletion pkg/monitoring/domainstats/prometheus/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ func (metrics *vmiMetrics) updateMemory(mem *stats.DomainStatsMemory) {

if mem.TotalSet {
metrics.pushCommonMetric(
"kubevirt_vmi_memory_used_total_bytes",
"kubevirt_vmi_memory_domain_total_bytes",
"The amount of memory in bytes used by the domain.",
prometheus.GaugeValue,
float64(mem.Total)*1024,
Expand Down
2 changes: 1 addition & 1 deletion pkg/monitoring/domainstats/prometheus/prometheus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ var _ = Describe("Prometheus", func() {
result.Write(dto)

Expect(result).ToNot(BeNil())
Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_memory_used_total_bytes"))
Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_memory_domain_total_bytes"))
Expect(dto.Gauge.GetValue()).To(BeEquivalentTo(float64(1024)))
})

Expand Down
4 changes: 4 additions & 0 deletions pkg/virt-operator/resource/generate/components/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,10 @@ func NewPrometheusRuleSpec(ns string, workloadUpdatesEnabled bool) *v1.Prometheu
Record: "kubevirt_vm_container_free_memory_bytes",
Expr: intstr.FromString("sum by(pod, container) ( kube_pod_container_resource_limits_memory_bytes{pod=~'virt-launcher-.*', container='compute'} - on(pod,container) container_memory_working_set_bytes{pod=~'virt-launcher-.*', container='compute'})"),
},
{
Record: "kubevirt_vmi_memory_used_bytes",
Expr: intstr.FromString("kubevirt_vmi_memory_available_bytes-kubevirt_vmi_memory_usable_bytes"),
},
{
Alert: "KubevirtVmHighMemoryUsage",
Expr: intstr.FromString("kubevirt_vm_container_free_memory_bytes < 20971520"),
Expand Down
7 changes: 7 additions & 0 deletions tools/doc-generator/doc-generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ const (

vmiEvictionBlockerName = "kubevirt_vmi_non_evictable"
vmiEvictionBlockerDesc = "Indication for a VirtualMachine that its eviction strategy is set to Live Migration but is not migratable."

vmiMemoryUsedBytes = "kubevirt_vmi_memory_used_bytes"
vmiMemoryUsedBytesDesc = "Amount of `used` memory as seen by the domain."
)

func main() {
Expand Down Expand Up @@ -137,6 +140,10 @@ var (
name: vmiEvictionBlockerName,
description: vmiEvictionBlockerDesc,
},
{
name: vmiMemoryUsedBytes,
description: vmiMemoryUsedBytesDesc,
},
}
)

Expand Down

0 comments on commit c868381

Please sign in to comment.