/
vms.go
95 lines (87 loc) · 3.8 KB
/
vms.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
/*
Copyright 2023 The KubeVirt Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package alerts
import (
"fmt"
promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/utils/ptr"
)
var (
fiftyMB = resource.MustParse("50Mi")
vmsAlerts = []promv1.Rule{
{
Alert: "KubevirtVmHighMemoryUsage",
Expr: intstr.FromString("kubevirt_vm_container_free_memory_bytes_based_on_working_set_bytes < 52428800 or kubevirt_vm_container_free_memory_bytes_based_on_rss < 52428800"),
For: ptr.To(promv1.Duration("1m")),
Annotations: map[string]string{
"description": fmt.Sprintf("Container {{ $labels.container }} in pod {{ $labels.pod }} in namespace {{ $labels.namespace }} free memory is less than %s and it is close to requested memory", fiftyMB.String()),
"summary": "VM is at risk of being evicted and in serious cases of memory exhaustion being terminated by the runtime.",
},
Labels: map[string]string{
severityAlertLabelKey: "warning",
operatorHealthImpactLabelKey: "none",
},
},
{
Alert: "OrphanedVirtualMachineInstances",
Expr: intstr.FromString("(((max by (node) (kube_pod_status_ready{condition='true',pod=~'virt-handler.*'} * on(pod) group_left(node) max by(pod,node)(kube_pod_info{pod=~'virt-handler.*',node!=''})) ) == 1) or (count by (node)( kube_pod_info{pod=~'virt-launcher.*',node!=''})*0)) == 0"),
For: ptr.To(promv1.Duration("10m")),
Annotations: map[string]string{
"summary": "No ready virt-handler pod detected on node {{ $labels.node }} with running vmis for more than 10 minutes",
},
Labels: map[string]string{
severityAlertLabelKey: "warning",
operatorHealthImpactLabelKey: "none",
},
},
{
Alert: "VMCannotBeEvicted",
Expr: intstr.FromString("kubevirt_vmi_non_evictable > 0"),
For: ptr.To(promv1.Duration("1m")),
Annotations: map[string]string{
"description": "Eviction policy for {{ $labels.name }} (on node {{ $labels.node }}) is set to Live Migration but the VM is not migratable",
"summary": "The VM's eviction strategy is set to Live Migration but the VM is not migratable",
},
Labels: map[string]string{
severityAlertLabelKey: "warning",
operatorHealthImpactLabelKey: "none",
},
},
{
Alert: "KubeVirtVMIExcessiveMigrations",
Expr: intstr.FromString("sum by (vmi) (max_over_time(kubevirt_vmi_migration_succeeded[1d])) >= 12"),
Annotations: map[string]string{
"description": "VirtualMachineInstance {{ $labels.vmi }} has been migrated more than 12 times during the last 24 hours",
"summary": "An excessive amount of migrations have been detected on a VirtualMachineInstance in the last 24 hours.",
},
Labels: map[string]string{
severityAlertLabelKey: "warning",
operatorHealthImpactLabelKey: "none",
},
},
{
Alert: "OutdatedVirtualMachineInstanceWorkloads",
Expr: intstr.FromString("kubevirt_vmi_number_of_outdated != 0"),
For: ptr.To(promv1.Duration("24h")),
Annotations: map[string]string{
"summary": "Some running VMIs are still active in outdated pods after KubeVirt control plane update has completed.",
},
Labels: map[string]string{
severityAlertLabelKey: "warning",
operatorHealthImpactLabelKey: "none",
},
},
}
)