-
Notifications
You must be signed in to change notification settings - Fork 11
/
onzack-namespace-monitoring-recording-rules.yaml
121 lines (121 loc) · 7.68 KB
/
onzack-namespace-monitoring-recording-rules.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: standard-namespace-monitoring-recording-rules
spec:
groups:
- name: cpu
rules:
- record: container_cpu_usage_seconds_total:sum_rate5m
expr: |-
sum by (uid,container,pod,namespace,node,role) (
label_replace(
rate(container_cpu_usage_seconds_total{container!="", container!="POD"}[5m]) * on(pod, namespace) group_left() (kube_pod_status_phase{phase="Running"}==1) * on(node) group_left(role) instance:kube_node_role
, "uid", "$1-$2-$3-$4-$5", "id", ".+pod([a-zA-Z0-9]{8})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{12}).+")
)
- record: kube_pod_container_resource_requests:cpu:running
expr: |-
sum by (uid,container,pod,namespace,node,role) (
kube_pod_container_resource_requests{resource="cpu"} * on(pod, namespace) group_left() (kube_pod_status_phase{phase="Running"}==1) * on(node) group_left(role) instance:kube_node_role
)
- record: kube_pod_container_resource_limits:cpu:running
expr: |-
sum by (uid,container,pod,namespace,node,role) (
kube_pod_container_resource_limits{resource="cpu"} * on(pod, namespace) group_left() (kube_pod_status_phase{phase="Running"}==1) * on(node) group_left(role) instance:kube_node_role
)
- record: container_cpu_cfs_throttling:percent
expr: |-
100
/
sum by (uid,container,pod,namespace,node,role) (
label_replace(
increase(container_cpu_cfs_periods_total{container!="", container!="POD"}[5m]) * on(pod, namespace) group_left() (kube_pod_status_phase{phase="Running"}==1) * on(node) group_left(role) instance:kube_node_role
, "uid", "$1-$2-$3-$4-$5", "id", ".+pod([a-zA-Z0-9]{8})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{12}).+")
)
*
sum by (uid,container,pod,namespace,node,role) (
label_replace(
increase(container_cpu_cfs_throttled_periods_total{container!="", container!="POD"}[5m]) * on(pod, namespace) group_left() (kube_pod_status_phase{phase="Running"}==1) * on(node) group_left(role) instance:kube_node_role
, "uid", "$1-$2-$3-$4-$5", "id", ".+pod([a-zA-Z0-9]{8})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{12}).+")
)
- name: memory
rules:
- record: container_memory_working_set_bytes:sum
expr: |-
sum by (uid,container,pod,namespace,node,role) (
label_replace(
container_memory_working_set_bytes{container!="", container!="POD"} * on(pod, namespace) group_left() (kube_pod_status_phase{phase="Running"}==1) * on(node) group_left(role) instance:kube_node_role
, "uid", "$1-$2-$3-$4-$5", "id", ".+pod([a-zA-Z0-9]{8})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{12}).+")
)
- record: kube_pod_container_resource_requests:memory:running
expr: |-
sum by (uid,container,pod,namespace,node,role) (
kube_pod_container_resource_requests{resource="memory"} * on(pod, namespace) group_left() (kube_pod_status_phase{phase="Running"}==1) * on(node) group_left(role) instance:kube_node_role
)
- record: kube_pod_container_resource_limits:memory:running
expr: |-
sum by (uid,container,pod,namespace,node,role) (
kube_pod_container_resource_limits{resource="memory"} * on(pod, namespace) group_left() (kube_pod_status_phase{phase="Running"}==1) * on(node) group_left(role) instance:kube_node_role
)
- name: storage
rules:
- record: container_fs_reads_bytes_total:sum_rate5m
expr: |-
sum by (uid,container,pod,namespace,node,role) (
label_replace(
rate(container_fs_reads_bytes_total{container!="", container!="POD"}[5m]) * on(pod, namespace) group_left() (kube_pod_status_phase{phase="Running"}==1) * on(node) group_left(role) instance:kube_node_role
, "uid", "$1-$2-$3-$4-$5", "id", ".+pod([a-zA-Z0-9]{8})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{12}).+")
)
- record: container_fs_reads_total:sum_rate5m
expr: |-
sum by (uid,container,pod,namespace,node,role) (
label_replace(
rate(container_fs_reads_total{container!="", container!="POD"}[5m]) * on(pod, namespace) group_left() (kube_pod_status_phase{phase="Running"}==1) * on(node) group_left(role) instance:kube_node_role
, "uid", "$1-$2-$3-$4-$5", "id", ".+pod([a-zA-Z0-9]{8})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{12}).+")
)
- record: container_fs_writes_bytes_total:sum_rate5m
expr: |-
sum by (uid,container,pod,namespace,node,role) (
label_replace(
rate(container_fs_writes_bytes_total{container!="", container!="POD"}[5m]) * on(pod, namespace) group_left() (kube_pod_status_phase{phase="Running"}==1) * on(node) group_left(role) instance:kube_node_role
, "uid", "$1-$2-$3-$4-$5", "id", ".+pod([a-zA-Z0-9]{8})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{12}).+")
)
- record: container_fs_writes_total:sum_rate5m
expr: |-
sum by (uid,container,pod,namespace,node,role) (
label_replace(
rate(container_fs_writes_total{container!="", container!="POD"}[5m]) * on(pod, namespace) group_left() (kube_pod_status_phase{phase="Running"}==1) * on(node) group_left(role) instance:kube_node_role
, "uid", "$1-$2-$3-$4-$5", "id", ".+pod([a-zA-Z0-9]{8})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{4})(?:-|_)([a-zA-Z0-9]{12}).+")
)
- name: network
rules:
- record: container_network_receive_bytes_total:sum_rate5m
expr: |-
sum by (pod,namespace,node,role) (
rate(container_network_receive_bytes_total[5m]) * on(pod, namespace) group_left() (kube_pod_status_phase{phase="Running"}==1) * on(node) group_left(role) instance:kube_node_role
)
- record: container_network_receive_packets_total:sum_rate5m
expr: |-
sum by (pod,namespace,node,role) (
rate(container_network_receive_packets_total[5m]) * on(pod, namespace) group_left() (kube_pod_status_phase{phase="Running"}==1) * on(node) group_left(role) instance:kube_node_role
)
- record: container_network_receive_packets_dropped_total:sum_rate5m
expr: |-
sum by (pod,namespace,node,role) (
rate(container_network_receive_packets_dropped_total[5m]) * on(pod, namespace) group_left() (kube_pod_status_phase{phase="Running"}==1) * on(node) group_left(role) instance:kube_node_role
)
- record: container_network_transmit_bytes_total:sum_rate5m
expr: |-
sum by (pod,namespace,node,role) (
rate(container_network_transmit_bytes_total[5m]) * on(pod, namespace) group_left() (kube_pod_status_phase{phase="Running"}==1) * on(node) group_left(role) instance:kube_node_role
)
- record: container_network_transmit_packets_total:sum_rate5m
expr: |-
sum by (pod,namespace,node,role) (
rate(container_network_transmit_packets_total[5m]) * on(pod, namespace) group_left() (kube_pod_status_phase{phase="Running"}==1) * on(node) group_left(role) instance:kube_node_role
)
- record: container_network_transmit_packets_dropped_total:sum_rate5m
expr: |-
sum by (pod,namespace,node,role) (
rate(container_network_transmit_packets_dropped_total[5m]) * on(pod, namespace) group_left() (kube_pod_status_phase{phase="Running"}==1) * on(node) group_left(role) instance:kube_node_role
)