Update metric names to fit metrics naming conventions

Signed-off-by: Shirly Radco <sradco@redhat.com>
kubevirt · Jul 23, 2023 · 550119b · 550119b
1 parent 998edbc
commit 550119b
Show file tree

Hide file tree

Showing 10 changed files with 132 additions and 132 deletions.
diff --git a/docs/metrics.md b/docs/metrics.md
@@ -12,7 +12,7 @@ All metrics documented here are auto-generated by the utility tool `tools/doc-ge
 ### kubevirt_info
 Version information.
 
-### kubevirt_allocatable_nodes_count
+### kubevirt_allocatable_nodes
 The number of nodes in the cluster that have the devices.kubevirt.io/kvm resource available. Type: Gauge.
 
 ### kubevirt_api_request_deprecated_total
@@ -21,7 +21,7 @@ The total number of requests to deprecated KubeVirt APIs. Type: Counter.
 ### kubevirt_configuration_emulation_enabled
 Indicates whether the Software Emulation is enabled in the configuration. Type: Gauge.
 
-### kubevirt_kvm_available_nodes_count
+### kubevirt_nodes_with_kvm
 The number of nodes in the cluster that have the devices.kubevirt.io/kvm resource available. Type: Gauge.
 
 ### kubevirt_migrate_vmi_data_processed_bytes
@@ -36,28 +36,28 @@ The rate of memory being dirty in the Guest OS. Type: Gauge.
 ### kubevirt_migrate_vmi_disk_transfer_rate_bytes
 The rate at which the disk is being transferred. Type: Gauge.
 
-### kubevirt_migrate_vmi_failed
+### kubevirt_vmi_migrations_failed
 Number of failed migrations. Type: Gauge.
 
 ### kubevirt_migrate_vmi_memory_transfer_rate_bytes
 The rate at which the memory is being transferred. Type: Gauge.
 
-### kubevirt_migrate_vmi_pending_count
+### kubevirt_vmi_migrations_pending
 Number of current pending migrations. Type: Gauge.
 
-### kubevirt_migrate_vmi_running_count
+### kubevirt_vmi_migrations_running
 Number of current running migrations. Type: Gauge.
 
-### kubevirt_migrate_vmi_scheduling_count
+### kubevirt_vmi_migrations_scheduling
 Number of current scheduling migrations. Type: Gauge.
 
-### kubevirt_migrate_vmi_succeeded
+### kubevirt_vmi_migrations_succeeded
 Number of migrations successfully executed. Type: Gauge.
 
 ### kubevirt_number_of_vms
 The number of VMs in the cluster by namespace. Type: Gauge.
 
-### kubevirt_virt_api_up_total
+### kubevirt_virt_api_up
 The number of virt-api pods that are up. Type: Gauge.
 
 ### kubevirt_virt_controller_leading
@@ -66,22 +66,22 @@ Indication for an operating virt-controller. Type: Gauge.
 ### kubevirt_virt_controller_ready
 Indication for a virt-controller that is ready to take the lead. Type: Gauge.
 
-### kubevirt_virt_controller_ready_total
+### kubevirt_virt_controller_ready
 The number of virt-controller pods that are ready. Type: Gauge.
 
-### kubevirt_virt_controller_up_total
+### kubevirt_virt_controller_up
 The number of virt-controller pods that are up. Type: Gauge.
 
-### kubevirt_virt_handler_up_total
+### kubevirt_virt_handler_up
 The number of virt-handler pods that are up. Type: Gauge.
 
-### kubevirt_virt_operator_leading_total
+### kubevirt_virt_operator_leading
 The number of virt-operator pods that are leading. Type: Gauge.
 
-### kubevirt_virt_operator_ready_total
+### kubevirt_virt_operator_ready
 The number of virt-operator pods that are ready. Type: Gauge.
 
-### kubevirt_virt_operator_up_total
+### kubevirt_virt_operator_up
 The number of virt-operator pods that are up. Type: Gauge.
 
 ### kubevirt_vm_container_free_memory_bytes_based_on_rss
@@ -117,7 +117,7 @@ Total CPU time spent in all modes (sum of both vcpu and hypervisor usage). Type:
 ### kubevirt_vmi_cpu_user_usage_seconds
 Total CPU time spent in user mode. Type: Gauge.
 
-### kubevirt_vmi_filesystem_capacity_bytes_total
+### kubevirt_vmi_filesystem_capacity_bytes
 Total VM filesystem capacity in bytes. Type: Gauge.
 
 ### kubevirt_vmi_filesystem_used_bytes
@@ -132,22 +132,22 @@ Amount of usable memory as seen by the domain. This value may not be accurate if
 ### kubevirt_vmi_memory_cached_bytes
 The amount of memory that is being used to cache I/O and is available to be reclaimed, corresponds to the sum of `Buffers` + `Cached` + `SwapCached` in `/proc/meminfo`. Type: Gauge.
 
-### kubevirt_vmi_memory_domain_bytes_total
+### kubevirt_vmi_memory_domain_bytes
 The amount of memory in bytes allocated to the domain. The `memory` value in domain xml file. Type: Gauge.
 
-### kubevirt_vmi_memory_pgmajfault
+### kubevirt_vmi_memory_pgmajfault_total
 The number of page faults when disk IO was required. Page faults occur when a process makes a valid access to virtual memory that is not available. When servicing the page fault, if disk IO is required, it is considered as major fault. Type: Counter.
 
-### kubevirt_vmi_memory_pgminfault
+### kubevirt_vmi_memory_pgminfault_total
 The number of other page faults, when disk IO was not required. Page faults occur when a process makes a valid access to virtual memory that is not available. When servicing the page fault, if disk IO is NOT required, it is considered as minor fault. Type: Counter.
 
 ### kubevirt_vmi_memory_resident_bytes
 Resident set size of the process running the domain. Type: Gauge.
 
-### kubevirt_vmi_memory_swap_in_traffic_bytes_total
+### kubevirt_vmi_memory_swap_in_traffic_bytes
 The total amount of data read from swap space of the guest in bytes. Type: Gauge.
 
-### kubevirt_vmi_memory_swap_out_traffic_bytes_total
+### kubevirt_vmi_memory_swap_out_traffic_bytes
 The total amount of memory written out to swap space of the guest in bytes. Type: Gauge.
 
 ### kubevirt_vmi_memory_unused_bytes
@@ -189,7 +189,7 @@ Total network traffic transmitted packets. Type: Counter.
 ### kubevirt_vmi_non_evictable
 Indication for a VirtualMachine that its eviction strategy is set to Live Migration but is not migratable. Type: Gauge.
 
-### kubevirt_vmi_outdated_count
+### kubevirt_vmi_number_of_outdated
 Indication for the total number of VirtualMachineInstance workloads that are not running within the most up-to-date version of the virt-launcher environment. Type: Gauge.
 
 ### kubevirt_vmi_phase_count
@@ -198,7 +198,7 @@ Sum of VMIs per phase and node. `phase` can be one of the following: [`Pending`,
 ### kubevirt_vmi_storage_flush_requests_total
 Total storage flush requests. Type: Counter.
 
-### kubevirt_vmi_storage_flush_times_ms_total
+### kubevirt_vmi_storage_flush_times_seconds_total
 Total time (ms) spent on cache flushing. Type: Counter.
 
 ### kubevirt_vmi_storage_iops_read_total
@@ -207,28 +207,28 @@ Total number of I/O read operations. Type: Counter.
 ### kubevirt_vmi_storage_iops_write_total
 Total number of I/O write operations. Type: Counter.
 
-### kubevirt_vmi_storage_read_times_ms_total
+### kubevirt_vmi_storage_read_times_seconds_total
 Total time (ms) spent on read operations. Type: Counter.
 
 ### kubevirt_vmi_storage_read_traffic_bytes_total
 Total number of bytes read from storage. Type: Counter.
 
-### kubevirt_vmi_storage_write_times_ms_total
+### kubevirt_vmi_storage_write_times_seconds_total
 Total time (ms) spent on write operations. Type: Counter.
 
 ### kubevirt_vmi_storage_write_traffic_bytes_total
 Total number of written bytes. Type: Counter.
 
-### kubevirt_vmi_vcpu_seconds
+### kubevirt_vmi_vcpu_seconds_total
 Total amount of time spent in each state by each vcpu (cpu_time excluding hypervisor time). Where `id` is the vcpu identifier and `state` can be one of the following: [`OFFLINE`, `RUNNING`, `BLOCKED`]. Type: Counter.
 
-### kubevirt_vmi_vcpu_wait_seconds
+### kubevirt_vmi_vcpu_wait_seconds_total
 Amount of time spent by each vcpu while waiting on I/O. Type: Counter.
 
 ### kubevirt_vmsnapshot_disks_restored_from_source_bytes
 Returns the amount of space in bytes restored from the source virtual machine. Type: Gauge.
 
-### kubevirt_vmsnapshot_disks_restored_from_source_total
+### kubevirt_vmsnapshot_disks_restored_from_source
 Returns the total number of virtual machine disks restored from the source virtual machine. Type: Gauge.
 
 ### kubevirt_vmsnapshot_persistentvolumeclaim_labels

diff --git a/hack/prom-rule-ci/prom-rules-tests.yaml b/hack/prom-rule-ci/prom-rules-tests.yaml
@@ -725,7 +725,7 @@ tests:
   # Excessive VMI Migrations in a period of time
   - interval: 1h
     input_series:
-      - series: 'kubevirt_migrate_vmi_succeeded{vmi="vmi-example-1"}'
+      - series: 'kubevirt_vmi_migrations_succeeded{vmi="vmi-example-1"}'
         # time:  0 1 2 3 4 5
         values: "_ _ _ 1 7 13"
 

diff --git a/pkg/monitoring/domainstats/prometheus/prometheus.go b/pkg/monitoring/domainstats/prometheus/prometheus.go
@@ -161,7 +161,7 @@ func (metrics *vmiMetrics) updateMemory(mem *stats.DomainStatsMemory) {
 
 	if mem.SwapInSet {
 		metrics.pushCommonMetric(
-			"kubevirt_vmi_memory_swap_in_traffic_bytes_total",
+			"kubevirt_vmi_memory_swap_in_traffic_bytes",
 			"The total amount of data read from swap space of the guest in bytes.",
 			prometheus.GaugeValue,
 			float64(mem.SwapIn)*1024,
@@ -170,7 +170,7 @@ func (metrics *vmiMetrics) updateMemory(mem *stats.DomainStatsMemory) {
 
 	if mem.SwapOutSet {
 		metrics.pushCommonMetric(
-			"kubevirt_vmi_memory_swap_out_traffic_bytes_total",
+			"kubevirt_vmi_memory_swap_out_traffic_bytes",
 			"The total amount of memory written out to swap space of the guest in bytes.",
 			prometheus.GaugeValue,
 			float64(mem.SwapOut)*1024,
@@ -179,7 +179,7 @@ func (metrics *vmiMetrics) updateMemory(mem *stats.DomainStatsMemory) {
 
 	if mem.MajorFaultSet {
 		metrics.pushCommonMetric(
-			"kubevirt_vmi_memory_pgmajfault",
+			"kubevirt_vmi_memory_pgmajfault_total",
 			"The number of page faults when disk IO was required. Page faults occur when a process makes a valid access to virtual memory that is not available. When servicing the page fault, if disk IO is required, it is considered as major fault.",
 			prometheus.CounterValue,
 			float64(mem.MajorFault),
@@ -188,7 +188,7 @@ func (metrics *vmiMetrics) updateMemory(mem *stats.DomainStatsMemory) {
 
 	if mem.MinorFaultSet {
 		metrics.pushCommonMetric(
-			"kubevirt_vmi_memory_pgminfault",
+			"kubevirt_vmi_memory_pgminfault_total",
 			"The number of other page faults, when disk IO was not required. Page faults occur when a process makes a valid access to virtual memory that is not available. When servicing the page fault, if disk IO is NOT required, it is considered as minor fault.",
 			prometheus.CounterValue,
 			float64(mem.MinorFault),
@@ -215,7 +215,7 @@ func (metrics *vmiMetrics) updateMemory(mem *stats.DomainStatsMemory) {
 
 	if mem.TotalSet {
 		metrics.pushCommonMetric(
-			"kubevirt_vmi_memory_domain_bytes_total",
+			"kubevirt_vmi_memory_domain_bytes",
 			"The amount of memory in bytes allocated to the domain. The `memory` value in domain xml file.",
 			prometheus.GaugeValue,
 			float64(mem.Total)*1024,
@@ -282,7 +282,7 @@ func (metrics *vmiMetrics) updateVcpu(vcpuStats []stats.DomainStatsVcpu) {
 
 		if vcpu.StateSet && vcpu.TimeSet {
 			metrics.pushCustomMetric(
-				"kubevirt_vmi_vcpu_seconds",
+				"kubevirt_vmi_vcpu_seconds_total",
 				"Total amount of time spent in each state by each vcpu (cpu_time excluding hypervisor time). Where `id` is the vcpu identifier and `state` can be one of the following: [`OFFLINE`, `RUNNING`, `BLOCKED`].",
 				prometheus.CounterValue,
 				float64(vcpu.Time/1000000000),
@@ -293,10 +293,10 @@ func (metrics *vmiMetrics) updateVcpu(vcpuStats []stats.DomainStatsVcpu) {
 
 		if vcpu.WaitSet {
 			metrics.pushCustomMetric(
-				"kubevirt_vmi_vcpu_wait_seconds",
+				"kubevirt_vmi_vcpu_wait_seconds_total",
 				"Amount of time spent by each vcpu while waiting on I/O.",
 				prometheus.CounterValue,
-				float64(vcpu.Wait)/float64(1000000),
+				float64(vcpu.Wait)/float64(1000000000),
 				[]string{"id"},
 				[]string{stringVcpuIdx},
 			)
@@ -364,21 +364,21 @@ func (metrics *vmiMetrics) updateBlock(blkStats []stats.DomainStatsBlock) {
 
 		if block.RdTimesSet {
 			metrics.pushCustomMetric(
-				"kubevirt_vmi_storage_read_times_ms_total",
-				"Total time (ms) spent on read operations.",
+				"kubevirt_vmi_storage_read_times_seconds_total",
+				"Total time spent on read operations.",
 				prometheus.CounterValue,
-				float64(block.RdTimes)/1000000,
+				float64(block.RdTimes)/1000000000,
 				blkLabels,
 				blkLabelValues,
 			)
 		}
 
 		if block.WrTimesSet {
 			metrics.pushCustomMetric(
-				"kubevirt_vmi_storage_write_times_ms_total",
-				"Total time (ms) spent on write operations.",
+				"kubevirt_vmi_storage_write_times_seconds_total",
+				"Total time spent on write operations.",
 				prometheus.CounterValue,
-				float64(block.WrTimes)/1000000,
+				float64(block.WrTimes)/1000000000,
 				blkLabels,
 				blkLabelValues,
 			)
@@ -397,10 +397,10 @@ func (metrics *vmiMetrics) updateBlock(blkStats []stats.DomainStatsBlock) {
 
 		if block.FlTimesSet {
 			metrics.pushCustomMetric(
-				"kubevirt_vmi_storage_flush_times_ms_total",
-				"Total time (ms) spent on cache flushing.",
+				"kubevirt_vmi_storage_flush_times_seconds_total",
+				"Total time spent on cache flushing.",
 				prometheus.CounterValue,
-				float64(block.FlTimes)/1000000,
+				float64(block.FlTimes)/1000000000,
 				blkLabels,
 				blkLabelValues,
 			)
@@ -533,7 +533,7 @@ func (metrics *vmiMetrics) updateFilesystem(vmFSStats k6tv1.VirtualMachineInstan
 		fsLabelValues := []string{fsStat.DiskName, fsStat.MountPoint, fsStat.FileSystemType}
 
 		metrics.pushCustomMetric(
-			"kubevirt_vmi_filesystem_capacity_bytes_total",
+			"kubevirt_vmi_filesystem_capacity_bytes",
 			"Total VM filesystem capacity in bytes.",
 			prometheus.GaugeValue,
 			float64(fsStat.TotalBytes),

diff --git a/pkg/monitoring/domainstats/prometheus/prometheus_test.go b/pkg/monitoring/domainstats/prometheus/prometheus_test.go
@@ -197,7 +197,7 @@ var _ = Describe("Prometheus", func() {
 			result.Write(dto)
 
 			Expect(result).ToNot(BeNil())
-			Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_memory_swap_in_traffic_bytes_total"))
+			Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_memory_swap_in_traffic_bytes"))
 			Expect(dto.Gauge.GetValue()).To(BeEquivalentTo(float64(1024)))
 		})
 
@@ -222,7 +222,7 @@ var _ = Describe("Prometheus", func() {
 			result.Write(dto)
 
 			Expect(result).ToNot(BeNil())
-			Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_memory_swap_out_traffic_bytes_total"))
+			Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_memory_swap_out_traffic_bytes"))
 			Expect(dto.Gauge.GetValue()).To(BeEquivalentTo(float64(1024)))
 		})
 
@@ -247,7 +247,7 @@ var _ = Describe("Prometheus", func() {
 			result.Write(dto)
 
 			Expect(result).ToNot(BeNil())
-			Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_memory_pgmajfault"))
+			Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_memory_pgmajfault_total"))
 			Expect(dto.Counter.GetValue()).To(BeEquivalentTo(float64(1024)))
 		})
 
@@ -272,7 +272,7 @@ var _ = Describe("Prometheus", func() {
 			result.Write(dto)
 
 			Expect(result).ToNot(BeNil())
-			Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_memory_pgminfault"))
+			Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_memory_pgminfault_total"))
 			Expect(dto.Counter.GetValue()).To(BeEquivalentTo(float64(1024)))
 		})
 
@@ -347,7 +347,7 @@ var _ = Describe("Prometheus", func() {
 			result.Write(dto)
 
 			Expect(result).ToNot(BeNil())
-			Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_memory_domain_bytes_total"))
+			Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_memory_domain_bytes"))
 			Expect(dto.Gauge.GetValue()).To(BeEquivalentTo(float64(1024)))
 		})
 
@@ -430,7 +430,7 @@ var _ = Describe("Prometheus", func() {
 
 			result := <-ch
 			Expect(result).ToNot(BeNil())
-			Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_vcpu_seconds"))
+			Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_vcpu_seconds_total"))
 		})
 
 		It("should not expose vcpu metrics for invalid DomainStats", func() {
@@ -676,7 +676,7 @@ var _ = Describe("Prometheus", func() {
 
 			result := <-ch
 			Expect(result).ToNot(BeNil())
-			Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_storage_read_times_ms_total"))
+			Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_storage_read_times_seconds_total"))
 		})
 
 		It("should handle block write time metrics", func() {
@@ -703,7 +703,7 @@ var _ = Describe("Prometheus", func() {
 
 			result := <-ch
 			Expect(result).ToNot(BeNil())
-			Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_storage_write_times_ms_total"))
+			Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_storage_write_times_seconds_total"))
 		})
 
 		It("should handle block flush requests metrics", func() {
@@ -757,7 +757,7 @@ var _ = Describe("Prometheus", func() {
 
 			result := <-ch
 			Expect(result).ToNot(BeNil())
-			Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_storage_flush_times_ms_total"))
+			Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_storage_flush_times_seconds_total"))
 		})
 
 		It("should use alias when alias is not empty", func() {
@@ -1153,7 +1153,7 @@ var _ = Describe("Prometheus", func() {
 
 			result := <-ch
 			Expect(result).ToNot(BeNil())
-			Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_vcpu_wait_seconds"))
+			Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_vcpu_wait_seconds_total"))
 		})
 
 		It("should expose vcpu to cpu pinning metric", func() {
@@ -1216,7 +1216,7 @@ var _ = Describe("Prometheus", func() {
 			ps.Report("test", &vmi, newVmStats(domainStats, fsStats))
 			result := <-ch
 			Expect(result).ToNot(BeNil())
-			Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_filesystem_capacity_bytes_total"))
+			Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_filesystem_capacity_bytes"))
 			result = <-ch
 			Expect(result).ToNot(BeNil())
 			Expect(result.Desc().String()).To(ContainSubstring("kubevirt_vmi_filesystem_used_bytes"))