Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automated cherry pick of #41052 #41053

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion cluster/gce/config-default.sh
Expand Up @@ -105,7 +105,7 @@ fi
RUNTIME_CONFIG="${KUBE_RUNTIME_CONFIG:-}"

# Optional: set feature gates
FEATURE_GATES="${KUBE_FEATURE_GATES:-}"
FEATURE_GATES="${KUBE_FEATURE_GATES:-ExperimentalCriticalPodAnnotation=true}"

# Optional: Install cluster DNS.
ENABLE_CLUSTER_DNS="${KUBE_ENABLE_CLUSTER_DNS:-true}"
Expand Down
2 changes: 1 addition & 1 deletion cluster/gce/config-test.sh
Expand Up @@ -65,7 +65,7 @@ MASTER_IP_RANGE="${MASTER_IP_RANGE:-10.246.0.0/24}"
RUNTIME_CONFIG="${KUBE_RUNTIME_CONFIG:-}"

# Optional: set feature gates
FEATURE_GATES="${KUBE_FEATURE_GATES:-}"
FEATURE_GATES="${KUBE_FEATURE_GATES:-ExperimentalCriticalPodAnnotation=true}"

TERMINATED_POD_GC_THRESHOLD=${TERMINATED_POD_GC_THRESHOLD:-100}

Expand Down
5 changes: 3 additions & 2 deletions pkg/kubelet/eviction/eviction_manager.go
Expand Up @@ -31,6 +31,7 @@ import (
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/kubelet/util/format"
"k8s.io/kubernetes/pkg/util/clock"
utilconfig "k8s.io/kubernetes/pkg/util/config"
"k8s.io/kubernetes/pkg/util/wait"
)

Expand Down Expand Up @@ -103,7 +104,7 @@ func (m *managerImpl) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAd
// the node has memory pressure, admit if not best-effort
if hasNodeCondition(m.nodeConditions, api.NodeMemoryPressure) {
notBestEffort := qos.BestEffort != qos.GetPodQOS(attrs.Pod)
if notBestEffort || kubetypes.IsCriticalPod(attrs.Pod) {
if notBestEffort || (kubetypes.IsCriticalPod(attrs.Pod) && utilconfig.DefaultFeatureGate.ExperimentalCriticalPodAnnotation()) {
return lifecycle.PodAdmitResult{Admit: true}
}
}
Expand Down Expand Up @@ -248,7 +249,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
// we kill at most a single pod during each eviction interval
for i := range activePods {
pod := activePods[i]
if kubepod.IsStaticPod(pod) {
if utilconfig.DefaultFeatureGate.ExperimentalCriticalPodAnnotation() && kubepod.IsStaticPod(pod) {
// The eviction manager doesn't evict static pods. To stop a static
// pod, the admin needs to remove the manifest from kubelet's
// --config directory.
Expand Down
2 changes: 2 additions & 0 deletions pkg/kubelet/eviction/eviction_manager_test.go
Expand Up @@ -28,6 +28,7 @@ import (
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/types"
"k8s.io/kubernetes/pkg/util/clock"
utilconfig "k8s.io/kubernetes/pkg/util/config"
)

// mockPodKiller is used to testing which pod is killed
Expand Down Expand Up @@ -159,6 +160,7 @@ func TestMemoryPressure(t *testing.T) {
thresholdsFirstObservedAt: thresholdsObservedAt{},
}

utilconfig.DefaultFeatureGate.Set("ExperimentalCriticalPodAnnotation=True")
// create a best effort pod to test admission
bestEffortPodToAdmit, _ := podMaker("best-admit", newResourceList("", ""), newResourceList("", ""), "0Gi")
burstablePodToAdmit, _ := podMaker("burst-admit", newResourceList("100m", "100Mi"), newResourceList("200m", "200Mi"), "0Gi")
Expand Down
28 changes: 17 additions & 11 deletions pkg/kubelet/kubelet.go
Expand Up @@ -2428,20 +2428,26 @@ func (kl *Kubelet) handleMirrorPod(mirrorPod *api.Pod, start time.Time) {
func (kl *Kubelet) HandlePodAdditions(pods []*api.Pod) {
start := kl.clock.Now()

// Pass critical pods through admission check first.
var criticalPods []*api.Pod
var nonCriticalPods []*api.Pod
for _, p := range pods {
if kubetypes.IsCriticalPod(p) {
criticalPods = append(criticalPods, p)
} else {
nonCriticalPods = append(nonCriticalPods, p)
if utilconfig.DefaultFeatureGate.ExperimentalCriticalPodAnnotation() {
// Pass critical pods through admission check first.
var criticalPods []*api.Pod
var nonCriticalPods []*api.Pod
for _, p := range pods {
if kubetypes.IsCriticalPod(p) {
criticalPods = append(criticalPods, p)
} else {
nonCriticalPods = append(nonCriticalPods, p)
}
}
sort.Sort(sliceutils.PodsByCreationTime(criticalPods))
sort.Sort(sliceutils.PodsByCreationTime(nonCriticalPods))
pods = append(criticalPods, nonCriticalPods...)

} else {
sort.Sort(sliceutils.PodsByCreationTime(pods))
}
sort.Sort(sliceutils.PodsByCreationTime(criticalPods))
sort.Sort(sliceutils.PodsByCreationTime(nonCriticalPods))

for _, pod := range append(criticalPods, nonCriticalPods...) {
for _, pod := range pods {

if kubepod.IsMirrorPod(pod) {
kl.podManager.AddPod(pod)
Expand Down
2 changes: 2 additions & 0 deletions pkg/kubelet/kubelet_test.go
Expand Up @@ -64,6 +64,7 @@ import (
"k8s.io/kubernetes/pkg/runtime"
"k8s.io/kubernetes/pkg/types"
"k8s.io/kubernetes/pkg/util/clock"
utilconfig "k8s.io/kubernetes/pkg/util/config"
"k8s.io/kubernetes/pkg/util/flowcontrol"
"k8s.io/kubernetes/pkg/util/mount"
utilruntime "k8s.io/kubernetes/pkg/util/runtime"
Expand Down Expand Up @@ -1999,6 +2000,7 @@ func TestHandlePortConflicts(t *testing.T) {

// Tests that we sort pods based on criticality.
func TestCriticalPrioritySorting(t *testing.T) {
utilconfig.DefaultFeatureGate.Set("ExperimentalCriticalPodAnnotation=True")
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
kl := testKubelet.kubelet
nodes := []api.Node{
Expand Down
3 changes: 2 additions & 1 deletion pkg/kubelet/qos/policy.go
Expand Up @@ -19,6 +19,7 @@ package qos
import (
"k8s.io/kubernetes/pkg/api"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
utilconfig "k8s.io/kubernetes/pkg/util/config"
)

const (
Expand All @@ -44,7 +45,7 @@ const (
// and 1000. Containers with higher OOM scores are killed if the system runs out of memory.
// See https://lwn.net/Articles/391222/ for more information.
func GetContainerOOMScoreAdjust(pod *api.Pod, container *api.Container, memoryCapacity int64) int {
if kubetypes.IsCriticalPod(pod) {
if utilconfig.DefaultFeatureGate.ExperimentalCriticalPodAnnotation() && kubetypes.IsCriticalPod(pod) {
return CriticalPodOOMAdj
}

Expand Down
2 changes: 2 additions & 0 deletions pkg/kubelet/qos/policy_test.go
Expand Up @@ -23,6 +23,7 @@ import (
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/resource"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
utilconfig "k8s.io/kubernetes/pkg/util/config"
)

const (
Expand Down Expand Up @@ -215,6 +216,7 @@ func TestGetContainerOOMScoreAdjust(t *testing.T) {
highOOMScoreAdj: -998,
},
}
utilconfig.DefaultFeatureGate.Set("ExperimentalCriticalPodAnnotation=True")
for _, test := range oomTests {
oomScoreAdj := GetContainerOOMScoreAdjust(test.pod, &test.pod.Spec.Containers[0], test.memoryCapacity)
if oomScoreAdj < test.lowOOMScoreAdj || oomScoreAdj > test.highOOMScoreAdj {
Expand Down
23 changes: 18 additions & 5 deletions pkg/util/config/feature_gate.go
Expand Up @@ -42,17 +42,21 @@ const (
appArmor = "AppArmor"
dynamicKubeletConfig = "DynamicKubeletConfig"
dynamicVolumeProvisioning = "DynamicVolumeProvisioning"
// Ensures guaranteed scheduling of pods marked with a special pod annotation `scheduler.alpha.kubernetes.io/critical-pod`
// and also prevents them from being evicted from a node.
experimentalCriticalPodAnnotation = "ExperimentalCriticalPodAnnotation"
)

var (
// Default values for recorded features. Every new feature gate should be
// represented here.
knownFeatures = map[string]featureSpec{
allAlphaGate: {false, alpha},
externalTrafficLocalOnly: {false, alpha},
appArmor: {true, beta},
dynamicKubeletConfig: {false, alpha},
dynamicVolumeProvisioning: {true, alpha},
allAlphaGate: {false, alpha},
externalTrafficLocalOnly: {false, alpha},
appArmor: {true, beta},
dynamicKubeletConfig: {false, alpha},
dynamicVolumeProvisioning: {true, alpha},
experimentalCriticalPodAnnotation: {false, alpha},
}

// Special handling for a few gates.
Expand Down Expand Up @@ -107,6 +111,10 @@ type FeatureGate interface {
// owner: mtaufen
// alpha: v1.4
DynamicKubeletConfig() bool

// owner: @vishh
// alpha: v1.4
ExperimentalCriticalPodAnnotation() bool
}

// featureGate implements FeatureGate as well as pflag.Value for flag parsing.
Expand Down Expand Up @@ -195,6 +203,11 @@ func (f *featureGate) DynamicVolumeProvisioning() bool {
return f.lookup(dynamicVolumeProvisioning)
}

// ExperimentalCriticalPodAnnotation returns true if experimentalCriticalPodAnnotation feature is enabled.
func (f *featureGate) ExperimentalCriticalPodAnnotation() bool {
return f.lookup(experimentalCriticalPodAnnotation)
}

func (f *featureGate) lookup(key string) bool {
defaultValue := f.known[key].enabled
if f.enabled != nil {
Expand Down