add memory qos strategy

Signed-off-by: saintube <saintube@foxmail.com>
koordinator-sh · Apr 28, 2022 · 24fb108 · 24fb108
1 parent ccbad0c
commit 24fb108
Show file tree

Hide file tree

Showing 28 changed files with 4,107 additions and 139 deletions.
diff --git a/apis/extension/pod.go b/apis/extension/pod.go
@@ -25,6 +25,8 @@ import (
 
 const (
 	AnnotationPodCPUBurst = DomainPrefix + "cpuBurst"
+
+	AnnotationPodMemoryQoS = DomainPrefix + "memoryQoS"
 )
 
 func GetPodCPUBurstConfig(pod *corev1.Pod) (*slov1aplhpa1.CPUBurstConfig, error) {
@@ -43,3 +45,19 @@ func GetPodCPUBurstConfig(pod *corev1.Pod) (*slov1aplhpa1.CPUBurstConfig, error)
 	}
 	return &cpuBurst, nil
 }
+
+func GetPodMemoryQoSConfig(pod *corev1.Pod) (*slov1aplhpa1.PodMemoryQoSConfig, error) {
+	if pod == nil || pod.Annotations == nil {
+		return nil, nil
+	}
+	value, exist := pod.Annotations[AnnotationPodMemoryQoS]
+	if !exist {
+		return nil, nil
+	}
+	cfg := slov1aplhpa1.PodMemoryQoSConfig{}
+	err := json.Unmarshal([]byte(value), &cfg)
+	if err != nil {
+		return nil, err
+	}
+	return &cfg, nil
+}
diff --git a/apis/slo/v1alpha1/nodeslo_types.go b/apis/slo/v1alpha1/nodeslo_types.go
@@ -23,6 +23,107 @@ import (
 // EDIT THIS FILE!  THIS IS SCAFFOLDING FOR YOU TO OWN!
 // NOTE: json tags are required.  Any new fields you add must have json tags for the fields to be serialized.
 
+// MemoryQoS enables memory qos features.
+type MemoryQoS struct {
+	// memcg qos
+	// If enabled, memcg qos will be set by the agent, where some fields are implicitly calculated from pod spec.
+	// 1. `memory.min` := spec.requests.memory * minLimitFactor / 100 (use 0 if requests.memory is not set)
+	// 2. `memory.low` := spec.requests.memory * lowLimitFactor / 100 (use 0 if requests.memory is not set)
+	// 3. `memory.limit_in_bytes` := spec.limits.memory (set $node.allocatable.memory if limits.memory is not set)
+	// 4. `memory.high` := memory.limit_in_bytes * throttlingFactor / 100 (use "max" if memory.high <= memory.min)
+	// MinLimitPercent specifies the minLimitFactor percentage to calculate `memory.min`, which protects memory
+	// from global reclamation when memory usage does not exceed the min limit.
+	// Close: 0.
+	MinLimitPercent *int64 `json:"minLimitPercent,omitempty"`
+	// LowLimitPercent specifies the lowLimitFactor percentage to calculate `memory.low`, which TRIES BEST
+	// protecting memory from global reclamation when memory usage does not exceed the low limit unless no unprotected
+	// memcg can be reclaimed.
+	// NOTE: `memory.low` should be larger than `memory.min`. If spec.requests.memory == spec.limits.memory,
+	// pod `memory.low` and `memory.high` become invalid, while `memory.wmark_ratio` is still in effect.
+	// Close: 0.
+	LowLimitPercent *int64 `json:"lowLimitPercent,omitempty"`
+	// ThrottlingPercent specifies the throttlingFactor percentage to calculate `memory.high` with pod
+	// memory.limits or node allocatable memory, which triggers memcg direct reclamation when memory usage exceeds.
+	// Lower the factor brings more heavier reclaim pressure.
+	// Close: 0.
+	ThrottlingPercent *int64 `json:"throttlingPercent,omitempty"`
+
+	// wmark_ratio (Anolis OS required)
+	// Async memory reclamation is triggered when cgroup memory usage exceeds `memory.wmark_high` and the reclamation
+	// stops when usage is below `memory.wmark_low`. Basically,
+	// `memory.wmark_high` := min(memory.high, memory.limit_in_bytes) * memory.wmark_scale_factor
+	// `memory.wmark_low` := min(memory.high, memory.limit_in_bytes) * (memory.wmark_ratio - memory.wmark_scale_factor)
+	// WmarkRatio specifies `memory.wmark_ratio` that help calculate `memory.wmark_high`, which triggers async
+	// memory reclamation when memory usage exceeds.
+	// Close: 0. Recommended: 95.
+	WmarkRatio *int64 `json:"wmarkRatio,omitempty"`
+	// WmarkScalePermill specifies `memory.wmark_scale_factor` that helps calculate `memory.wmark_low`, which
+	// stops async memory reclamation when memory usage belows.
+	// Close: 50. Recommended: 20.
+	WmarkScalePermill *int64 `json:"wmarkScalePermill,omitempty"`
+
+	// wmark_min_adj (Anolis OS required)
+	// WmarkMinAdj specifies `memory.wmark_min_adj` which adjusts per-memcg threshold for global memory
+	// reclamation. Lower the factor brings later reclamation.
+	// The adjustment uses different formula for different value range.
+	// [-25, 0)：global_wmark_min' = global_wmark_min + (global_wmark_min - 0) * wmarkMinAdj
+	// (0, 50]：global_wmark_min' = global_wmark_min + (global_wmark_low - global_wmark_min) * wmarkMinAdj
+	// Close: [LSR:0, LS:0, BE:0]. Recommended: [LSR:-25, LS:-25, BE:50].
+	WmarkMinAdj *int64 `json:"wmarkMinAdj,omitempty"`
+
+	// TODO: enhance the usages of oom priority and oom kill group
+	PriorityEnable *int64 `json:"priorityEnable,omitempty"`
+	Priority       *int64 `json:"priority,omitempty"`
+	OomKillGroup   *int64 `json:"oomKillGroup,omitempty"`
+}
+
+type PodMemoryQoSPolicy string
+
+const (
+	// PodMemoryQoSPolicyDefault indicates pod inherits node-level config
+	PodMemoryQoSPolicyDefault PodMemoryQoSPolicy = "default"
+	// PodMemoryQoSPolicyNone indicates pod disables memory qos
+	PodMemoryQoSPolicyNone PodMemoryQoSPolicy = "none"
+	// PodMemoryQoSPolicyAuto indicates pod uses a recommended config
+	PodMemoryQoSPolicyAuto PodMemoryQoSPolicy = "auto"
+)
+
+type PodMemoryQoSConfig struct {
+	// Policy indicates the qos plan; use "default" if empty
+	Policy    PodMemoryQoSPolicy `json:"policy,omitempty"`
+	MemoryQoS `json:",inline"`
+}
+
+// MemoryQoSCfg stores node-level config of memory qos
+type MemoryQoSCfg struct {
+	// Enable indicates whether the memory qos is enabled (Cluster default: false).
+	// This field is used for node-level control, while pod-level configuration is done with MemoryQoS and `Policy`
+	// instead of an `Enable` option. Please view the differences between MemoryQoSCfg and PodMemoryQoSConfig structs.
+	Enable    *bool `json:"enable,omitempty"`
+	MemoryQoS `json:",inline"`
+}
+
+type ResourceQoS struct {
+	MemoryQoS *MemoryQoSCfg `json:"memoryQoS,omitempty"`
+}
+
+type ResourceQoSStrategy struct {
+	// ResourceQoS for LSR pods.
+	LSR *ResourceQoS `json:"lsr,omitempty"`
+
+	// ResourceQoS for LS pods.
+	LS *ResourceQoS `json:"ls,omitempty"`
+
+	// ResourceQoS for BE pods.
+	BE *ResourceQoS `json:"be,omitempty"`
+
+	// ResourceQoS for system pods
+	System *ResourceQoS `json:"system,omitempty"`
+
+	// ResourceQoS for root cgroup.
+	CgroupRoot *ResourceQoS `json:"cgroupRoot,omitempty"`
+}
+
 type CPUSuppressPolicy string
 
 const (
@@ -87,6 +188,8 @@ type CPUBurstStrategy struct {
 type NodeSLOSpec struct {
 	// BE pods will be limited if node resource usage overload
 	ResourceUsedThresholdWithBE *ResourceThresholdStrategy `json:"resourceUsedThresholdWithBE,omitempty"`
+	// QoS config strategy for pods of different qos-class
+	ResourceQoSStrategy *ResourceQoSStrategy `json:"resourceQoSStrategy,omitempty"`
 	// CPU Burst Strategy
 	CPUBurstStrategy *CPUBurstStrategy `json:"cpuBurstStrategy,omitempty"`
 }

diff --git a/apis/slo/v1alpha1/zz_generated.deepcopy.go b/apis/slo/v1alpha1/zz_generated.deepcopy.go