Skip to content

Commit

Permalink
Rename rest client metrics to include kubevirt prefix
Browse files Browse the repository at this point in the history
Signed-off-by: machadovilaca <machadovilaca@gmail.com>
  • Loading branch information
machadovilaca committed Apr 9, 2024
1 parent 9db1b13 commit 9207da7
Show file tree
Hide file tree
Showing 7 changed files with 31 additions and 40 deletions.
18 changes: 9 additions & 9 deletions docs/metrics.md
Expand Up @@ -33,6 +33,15 @@ The number of VMs in the cluster by namespace. Type: Gauge.
### kubevirt_portforward_active_tunnels
Amount of active portforward tunnels, broken down by namespace and vmi name. Type: Gauge.

### kubevirt_rest_client_rate_limiter_duration_seconds
Client side rate limiter latency in seconds. Broken down by verb and URL. Type: Histogram.

### kubevirt_rest_client_request_latency_seconds
Request latency in seconds. Broken down by verb and URL. Type: Histogram.

### kubevirt_rest_client_requests_total
Number of HTTP requests, partitioned by status code, method, and host. Type: Counter.

### kubevirt_usbredir_active_connections
Amount of active USB redirection connections, broken down by namespace and vmi name. Type: Gauge.

Expand Down Expand Up @@ -270,15 +279,6 @@ Returns the labels of the persistent volume claims that are used for restoring v
### kubevirt_vnc_active_connections
Amount of active VNC connections, broken down by namespace and vmi name. Type: Gauge.

### rest_client_rate_limiter_duration_seconds
Client side rate limiter latency in seconds. Broken down by verb and URL. Type: Histogram.

### rest_client_request_latency_seconds
Request latency in seconds. Broken down by verb and URL. Type: Histogram.

### rest_client_requests_total
Number of HTTP requests, partitioned by status code, method, and host. Type: Counter.

## Developing new metrics
After developing new metrics or changing old ones, please run `make generate` to regenerate this document.

Expand Down
32 changes: 16 additions & 16 deletions hack/prom-rule-ci/prom-rules-tests.yaml
Expand Up @@ -317,21 +317,21 @@ tests:
# values : `0+100x15 0+100x5` the same way because prometheus counters might reset
- interval: 1m
input_series:
- series: 'rest_client_requests_total{namespace="ci", pod="virt-controller-1", code="200"}'
- series: 'kubevirt_rest_client_requests_total{namespace="ci", pod="virt-controller-1", code="200"}'
values: '0+10x20'
- series: 'rest_client_requests_total{namespace="ci", pod="virt-controller-1", code="400"}'
- series: 'kubevirt_rest_client_requests_total{namespace="ci", pod="virt-controller-1", code="400"}'
values: '0+100x15 0+100x5'
- series: 'rest_client_requests_total{namespace="ci", pod="virt-operator-1", code="200"}'
- series: 'kubevirt_rest_client_requests_total{namespace="ci", pod="virt-operator-1", code="200"}'
values: '0+10x20'
- series: 'rest_client_requests_total{namespace="ci", pod="virt-operator-1", code="400"}'
- series: 'kubevirt_rest_client_requests_total{namespace="ci", pod="virt-operator-1", code="400"}'
values: '0+100x15 0+100x5'
- series: 'rest_client_requests_total{namespace="ci", pod="virt-handler-1", code="200"}'
- series: 'kubevirt_rest_client_requests_total{namespace="ci", pod="virt-handler-1", code="200"}'
values: '0+10x20'
- series: 'rest_client_requests_total{namespace="ci", pod="virt-handler-1", code="500"}'
- series: 'kubevirt_rest_client_requests_total{namespace="ci", pod="virt-handler-1", code="500"}'
values: '0+100x15 0+100x5'
- series: 'rest_client_requests_total{namespace="ci", pod="virt-api-1", code="200"}'
- series: 'kubevirt_rest_client_requests_total{namespace="ci", pod="virt-api-1", code="200"}'
values: '0+10x20'
- series: 'rest_client_requests_total{namespace="ci", pod="virt-api-1", code="500"}'
- series: 'kubevirt_rest_client_requests_total{namespace="ci", pod="virt-api-1", code="500"}'
values: '0+100x15 0+100x5'

alert_rule_test:
Expand Down Expand Up @@ -390,21 +390,21 @@ tests:
# values : '0+5x90 0+5x10' the same way because prometheus counters might reset
- interval: 1m
input_series:
- series: 'rest_client_requests_total{namespace="ci", pod="virt-controller-1", code="200"}'
- series: 'kubevirt_rest_client_requests_total{namespace="ci", pod="virt-controller-1", code="200"}'
values: '0+10x100'
- series: 'rest_client_requests_total{namespace="ci", pod="virt-controller-1", code="400"}'
- series: 'kubevirt_rest_client_requests_total{namespace="ci", pod="virt-controller-1", code="400"}'
values: '0+5x90 0+5x10'
- series: 'rest_client_requests_total{namespace="ci", pod="virt-operator-1", code="200"}'
- series: 'kubevirt_rest_client_requests_total{namespace="ci", pod="virt-operator-1", code="200"}'
values: '0+10x100'
- series: 'rest_client_requests_total{namespace="ci", pod="virt-operator-1", code="400"}'
- series: 'kubevirt_rest_client_requests_total{namespace="ci", pod="virt-operator-1", code="400"}'
values: '0+5x90 0+5x10'
- series: 'rest_client_requests_total{namespace="ci", pod="virt-handler-1", code="200"}'
- series: 'kubevirt_rest_client_requests_total{namespace="ci", pod="virt-handler-1", code="200"}'
values: '0+10x100'
- series: 'rest_client_requests_total{namespace="ci", pod="virt-handler-1", code="500"}'
- series: 'kubevirt_rest_client_requests_total{namespace="ci", pod="virt-handler-1", code="500"}'
values: '0+5x90 0+5x10'
- series: 'rest_client_requests_total{namespace="ci", pod="virt-api-1", code="200"}'
- series: 'kubevirt_rest_client_requests_total{namespace="ci", pod="virt-api-1", code="200"}'
values: '0+10x100'
- series: 'rest_client_requests_total{namespace="ci", pod="virt-api-1", code="500"}'
- series: 'kubevirt_rest_client_requests_total{namespace="ci", pod="virt-api-1", code="500"}'
values: '0+5x90 0+5x10'

alert_rule_test:
Expand Down
6 changes: 3 additions & 3 deletions pkg/monitoring/metrics/common/client/rest_metrics.go
Expand Up @@ -34,7 +34,7 @@ var (
// "verb" and "url" labels. It is used for the rest client latency metrics.
requestLatency = operatormetrics.NewHistogramVec(
operatormetrics.MetricOpts{
Name: "rest_client_request_latency_seconds",
Name: "kubevirt_rest_client_request_latency_seconds",
Help: "Request latency in seconds. Broken down by verb and URL.",
},
prometheus.HistogramOpts{
Expand All @@ -50,7 +50,7 @@ var (

rateLimiterLatency = operatormetrics.NewHistogramVec(
operatormetrics.MetricOpts{
Name: "rest_client_rate_limiter_duration_seconds",
Name: "kubevirt_rest_client_rate_limiter_duration_seconds",
Help: "Client side rate limiter latency in seconds. Broken down by verb and URL.",
},
prometheus.HistogramOpts{
Expand All @@ -61,7 +61,7 @@ var (

requestResult = operatormetrics.NewCounterVec(
operatormetrics.MetricOpts{
Name: "rest_client_requests_total",
Name: "kubevirt_rest_client_requests_total",
Help: "Number of HTTP requests, partitioned by status code, method, and host.",
},
[]string{"code", "method", "host", "resource", "verb"},
Expand Down
2 changes: 1 addition & 1 deletion pkg/monitoring/rules/alerts/alerts.go
Expand Up @@ -79,7 +79,7 @@ func getRunbookURLTemplate() string {
}

func getErrorRatio(ns string, podName string, errorCodeRegex string, durationInMinutes int) string {
errorRatioQuery := "sum ( rate ( rest_client_requests_total{namespace=\"%s\",pod=~\"%s-.*\",code=~\"%s\"} [%dm] ) ) / sum ( rate ( rest_client_requests_total{namespace=\"%s\",pod=~\"%s-.*\"} [%dm] ) )"
errorRatioQuery := "sum ( rate ( kubevirt_rest_client_requests_total{namespace=\"%s\",pod=~\"%s-.*\",code=~\"%s\"} [%dm] ) ) / sum ( rate ( kubevirt_rest_client_requests_total{namespace=\"%s\",pod=~\"%s-.*\"} [%dm] ) )"
return fmt.Sprintf(errorRatioQuery, ns, podName, errorCodeRegex, durationInMinutes, ns, podName, durationInMinutes)
}

Expand Down
6 changes: 0 additions & 6 deletions tests/monitoring/metrics.go
Expand Up @@ -74,12 +74,6 @@ var _ = Describe("[sig-monitoring]Metrics", decorators.SigMonitoring, func() {
"kubevirt_vmi_migrations_in_running_phase": true,
"kubevirt_vmi_migration_succeeded": true,
"kubevirt_vmi_migration_failed": true,

// name do not follow the convention to be prefixed with 'kubevirt_'
// TODO: @machadovilaca - refactor the metric names
"rest_client_request_latency_seconds": true,
"rest_client_rate_limiter_duration_seconds": true,
"rest_client_requests_total": true,
}

It("should contain virt components metrics", func() {
Expand Down
2 changes: 1 addition & 1 deletion tools/perfscale-audit/metric-client/metric-client.go
Expand Up @@ -42,7 +42,7 @@ const (
vmiCreationTimePercentileQuery = `histogram_quantile(0.%d, rate(kubevirt_vmi_phase_transition_time_from_creation_seconds_bucket{phase="Running"}[%ds] offset %ds))`
vmiDeletionToSucceededTimePercentileQuery = `histogram_quantile(0.%d, rate(kubevirt_vmi_phase_transition_time_from_deletion_seconds_bucket{phase="Succeeded"}[%ds] offset %ds))`
vmiDeletionToFailedTimePercentileQuery = `histogram_quantile(0.%d, rate(kubevirt_vmi_phase_transition_time_from_deletion_seconds_bucket{phase="Failed"}[%ds] offset %ds))`
resourceRequestCountsByOperation = `increase(rest_client_requests_total{pod=~"virt-controller.*|virt-handler.*|virt-operator.*|virt-api.*"}[%ds] offset %ds)`
resourceRequestCountsByOperation = `increase(kubevirt_rest_client_requests_total{pod=~"virt-controller.*|virt-handler.*|virt-operator.*|virt-api.*"}[%ds] offset %ds)`
)

// Gauge - Using a Gauge doesn't require using an offset because it holds the accurate count
Expand Down
5 changes: 1 addition & 4 deletions tools/prom-metrics-collector/metrics_collector.go
Expand Up @@ -33,10 +33,7 @@ import (
// https://sdk.operatorframework.io/docs/best-practices/observability-best-practices/#metrics-guidelines
// should be ignored.
var excludedMetrics = map[string]struct{}{
"kubevirt_vmi_phase_count": {},
"rest_client_rate_limiter_duration_seconds": {},
"rest_client_request_latency_seconds": {},
"rest_client_requests_total": {},
"kubevirt_vmi_phase_count": {},
}

// Extract the name, help, and type from the metrics doc file
Expand Down

0 comments on commit 9207da7

Please sign in to comment.