From cb25833d9211ed6e53dc8d8a8ca6d7c8cd5a5e6e Mon Sep 17 00:00:00 2001 From: willzgli Date: Mon, 8 May 2023 11:08:19 +0800 Subject: [PATCH] fix(monitor): fix bug in prometheusrule 1. idelta interval(2m) is not matched with prometheus scrape_interval(default is 1m), this will make the related series have break point frequently 2. (container_memory_usage_bytes - container_memory_cache) is not container's real memory use container_memory_working_set_bytes to replace it --- pkg/monitor/controller/prometheus/yamls.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/monitor/controller/prometheus/yamls.go b/pkg/monitor/controller/prometheus/yamls.go index 8ec01b345d..6a46ca8402 100755 --- a/pkg/monitor/controller/prometheus/yamls.go +++ b/pkg/monitor/controller/prometheus/yamls.go @@ -503,7 +503,7 @@ groups: expr: container_memory_usage_bytes * on(namespace, pod_name) group_left(workload_kind,workload_name,node, node_role) __pod_info2 - record: k8s_container_mem_no_cache_bytes - expr: (container_memory_usage_bytes - container_memory_cache) * on(namespace, pod_name) group_left(workload_kind,workload_name,node, node_role) __pod_info2 + expr: container_memory_working_set_bytes * on(namespace, pod_name) group_left(workload_kind,workload_name,node, node_role) __pod_info2 - record: k8s_container_rate_mem_usage_request expr: k8s_container_mem_usage_bytes * 100 / on (pod_name,namespace,container_name) group_left kube_pod_container_resource_requests{resource="memory"} @@ -548,10 +548,10 @@ groups: expr: sum(rate(container_network_transmit_bytes_total[4m])) without(interface) * on(namespace, pod_name) group_left(workload_kind,workload_name,node, node_role) __pod_info2 - record: k8s_container_network_receive_bytes - expr: sum(idelta(container_network_receive_bytes_total[2m])) without(interface) * on(namespace, pod_name) group_left(workload_kind,workload_name,node, node_role) __pod_info2 + expr: sum(idelta(container_network_receive_bytes_total[4m])) without(interface) * on(namespace, pod_name) group_left(workload_kind,workload_name,node, node_role) __pod_info2 - record: k8s_container_network_transmit_bytes - expr: sum(idelta(container_network_transmit_bytes_total[2m])) without(interface) * on(namespace, pod_name) group_left(workload_kind,workload_name,node, node_role) __pod_info2 + expr: sum(idelta(container_network_transmit_bytes_total[4m])) without(interface) * on(namespace, pod_name) group_left(workload_kind,workload_name,node, node_role) __pod_info2 - record: k8s_container_network_receive_packets expr: sum(rate(container_network_receive_packets_total[4m])) without(interface) * on(namespace, pod_name) group_left(workload_kind,workload_name,node, node_role) __pod_info2 @@ -671,7 +671,7 @@ groups: expr: sum(kube_pod_status_phase{phase=~"Running|Succeeded"}) by (namespace,pod_name) * on(namespace, pod_name) group_left(workload_kind,workload_name,node, node_role) __pod_info2 - record: k8s_pod_restart_total - expr: sum(idelta(kube_pod_container_status_restarts_total [2m])) by (namespace,pod_name) * on(namespace, pod_name) group_left(workload_kind,workload_name,node, node_role) __pod_info2 + expr: sum(idelta(kube_pod_container_status_restarts_total [4m])) by (namespace,pod_name) * on(namespace, pod_name) group_left(workload_kind,workload_name,node, node_role) __pod_info2 - record: k8s_pod_restart_total_number expr: sum(kube_pod_container_status_restarts_total) by (namespace,pod_name) * on(namespace, pod_name) group_left(workload_kind,workload_name,node, node_role) __pod_info2