Skip to content

Commit

Permalink
Merge pull request #91863 from knabben/kubelet-memcg-notification
Browse files Browse the repository at this point in the history
Moving Kubelet kernel-memgc-notification to configuration file
  • Loading branch information
k8s-ci-robot committed Jun 25, 2020
2 parents c5b3dba + c39cf28 commit 4a91ecb
Show file tree
Hide file tree
Showing 11 changed files with 40 additions and 27 deletions.
4 changes: 2 additions & 2 deletions cluster/gce/config-test.sh
Expand Up @@ -228,10 +228,10 @@ TEST_CLUSTER_API_CONTENT_TYPE=${TEST_CLUSTER_API_CONTENT_TYPE:-}

KUBELET_TEST_ARGS="${KUBELET_TEST_ARGS:-} --serialize-image-pulls=false ${TEST_CLUSTER_API_CONTENT_TYPE}"
if [[ "${NODE_OS_DISTRIBUTION}" = 'gci' ]] || [[ "${NODE_OS_DISTRIBUTION}" = 'ubuntu' ]] || [[ "${NODE_OS_DISTRIBUTION}" = 'custom' ]]; then
NODE_KUBELET_TEST_ARGS="${NODE_KUBELET_TEST_ARGS:-} --experimental-kernel-memcg-notification=true"
NODE_KUBELET_TEST_ARGS="${NODE_KUBELET_TEST_ARGS:-} --kernel-memcg-notification=true"
fi
if [[ "${MASTER_OS_DISTRIBUTION}" = 'gci' ]] || [[ "${MASTER_OS_DISTRIBUTION}" = 'ubuntu' ]]; then
MASTER_KUBELET_TEST_ARGS="${MASTER_KUBELET_TEST_ARGS:-} --experimental-kernel-memcg-notification=true"
MASTER_KUBELET_TEST_ARGS="${MASTER_KUBELET_TEST_ARGS:-} --kernel-memcg-notification=true"
fi
APISERVER_TEST_ARGS="${APISERVER_TEST_ARGS:-} --runtime-config=extensions/v1beta1,scheduling.k8s.io/v1alpha1,settings.k8s.io/v1alpha1 ${TEST_CLUSTER_DELETE_COLLECTION_WORKERS} ${TEST_CLUSTER_MAX_REQUESTS_INFLIGHT}"
CONTROLLER_MANAGER_TEST_ARGS="${CONTROLLER_MANAGER_TEST_ARGS:-} ${TEST_CLUSTER_RESYNC_PERIOD} ${TEST_CLUSTER_API_CONTENT_TYPE}"
Expand Down
2 changes: 1 addition & 1 deletion cluster/gce/util.sh
Expand Up @@ -891,7 +891,7 @@ function construct-windows-kubelet-flags {
flags+=" --cgroups-per-qos=false --enforce-node-allocatable="

# Turn off kernel memory cgroup notification.
flags+=" --experimental-kernel-memcg-notification=false"
flags+=" --kernel-memcg-notification=false"

# TODO(#78628): Re-enable KubeletPodResources when the issue is fixed.
# Force disable KubeletPodResources feature on Windows until #78628 is fixed.
Expand Down
35 changes: 17 additions & 18 deletions cmd/kubelet/app/options/options.go
Expand Up @@ -117,9 +117,6 @@ type KubeletFlags struct {
RemoteImageEndpoint string
// experimentalMounterPath is the path of mounter binary. Leave empty to use the default mount path
ExperimentalMounterPath string
// If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.
// +optional
ExperimentalKernelMemcgNotification bool
// This flag, if set, enables a check prior to mount operations to verify that the required components
// (binaries, etc.) to mount the volume are available on the underlying node. If the check is enabled
// and fails the mount operation fails.
Expand Down Expand Up @@ -180,20 +177,19 @@ func NewKubeletFlags() *KubeletFlags {
}

return &KubeletFlags{
ContainerRuntimeOptions: *NewContainerRuntimeOptions(),
CertDirectory: "/var/lib/kubelet/pki",
RootDirectory: defaultRootDir,
MasterServiceNamespace: metav1.NamespaceDefault,
MaxContainerCount: -1,
MaxPerPodContainerCount: 1,
MinimumGCAge: metav1.Duration{Duration: 0},
NonMasqueradeCIDR: "10.0.0.0/8",
RegisterSchedulable: true,
ExperimentalKernelMemcgNotification: false,
RemoteRuntimeEndpoint: remoteRuntimeEndpoint,
NodeLabels: make(map[string]string),
RegisterNode: true,
SeccompProfileRoot: filepath.Join(defaultRootDir, "seccomp"),
ContainerRuntimeOptions: *NewContainerRuntimeOptions(),
CertDirectory: "/var/lib/kubelet/pki",
RootDirectory: defaultRootDir,
MasterServiceNamespace: metav1.NamespaceDefault,
MaxContainerCount: -1,
MaxPerPodContainerCount: 1,
MinimumGCAge: metav1.Duration{Duration: 0},
NonMasqueradeCIDR: "10.0.0.0/8",
RegisterSchedulable: true,
RemoteRuntimeEndpoint: remoteRuntimeEndpoint,
NodeLabels: make(map[string]string),
RegisterNode: true,
SeccompProfileRoot: filepath.Join(defaultRootDir, "seccomp"),
// prior to the introduction of this flag, there was a hardcoded cap of 50 images
EnableCAdvisorJSONEndpoints: false,
}
Expand Down Expand Up @@ -345,7 +341,6 @@ func (f *KubeletFlags) AddFlags(mainfs *pflag.FlagSet) {
fs.Var(utiltaints.NewTaintsVar(&f.RegisterWithTaints), "register-with-taints", "Register the node with the given list of taints (comma separated \"<key>=<value>:<effect>\"). No-op if register-node is false.")

// EXPERIMENTAL FLAGS
fs.BoolVar(&f.ExperimentalKernelMemcgNotification, "experimental-kernel-memcg-notification", f.ExperimentalKernelMemcgNotification, "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.")
fs.StringVar(&f.RemoteRuntimeEndpoint, "container-runtime-endpoint", f.RemoteRuntimeEndpoint, "[Experimental] The endpoint of remote runtime service. Currently unix socket endpoint is supported on Linux, while npipe and tcp endpoints are supported on windows. Examples:'unix:///var/run/dockershim.sock', 'npipe:////./pipe/dockershim'")
fs.StringVar(&f.RemoteImageEndpoint, "image-service-endpoint", f.RemoteImageEndpoint, "[Experimental] The endpoint of remote image service. If not specified, it will be the same with container-runtime-endpoint by default. Currently unix socket endpoint is supported on Linux, while npipe and tcp endpoints are supported on windows. Examples:'unix:///var/run/dockershim.sock', 'npipe:////./pipe/dockershim'")
bindableNodeLabels := cliflag.ConfigurationMap(f.NodeLabels)
Expand Down Expand Up @@ -520,6 +515,7 @@ func AddKubeletConfigFlags(mainfs *pflag.FlagSet, c *kubeletconfig.KubeletConfig
fs.StringSliceVar(&c.AllowedUnsafeSysctls, "allowed-unsafe-sysctls", c.AllowedUnsafeSysctls, "Comma-separated whitelist of unsafe sysctls or unsafe sysctl patterns (ending in *). Use these at your own risk.")

fs.Int32Var(&c.NodeStatusMaxImages, "node-status-max-images", c.NodeStatusMaxImages, "The maximum number of images to report in Node.Status.Images. If -1 is specified, no cap will be applied.")
fs.BoolVar(&c.KernelMemcgNotification, "kernel-memcg-notification", c.KernelMemcgNotification, "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.")

// Flags intended for testing, not recommended used in production environments.
fs.Int64Var(&c.MaxOpenFiles, "max-open-files", c.MaxOpenFiles, "Number of files that can be opened by Kubelet process.")
Expand All @@ -544,4 +540,7 @@ func AddKubeletConfigFlags(mainfs *pflag.FlagSet, c *kubeletconfig.KubeletConfig
fs.StringSliceVar(&c.EnforceNodeAllocatable, "enforce-node-allocatable", c.EnforceNodeAllocatable, "A comma separated list of levels of node allocatable enforcement to be enforced by kubelet. Acceptable options are 'none', 'pods', 'system-reserved', and 'kube-reserved'. If the latter two options are specified, '--system-reserved-cgroup' and '--kube-reserved-cgroup' must also be set, respectively. If 'none' is specified, no additional options should be set. See https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources/ for more details.")
fs.StringVar(&c.SystemReservedCgroup, "system-reserved-cgroup", c.SystemReservedCgroup, "Absolute name of the top level cgroup that is used to manage non-kubernetes components for which compute resources were reserved via '--system-reserved' flag. Ex. '/system-reserved'. [default='']")
fs.StringVar(&c.KubeReservedCgroup, "kube-reserved-cgroup", c.KubeReservedCgroup, "Absolute name of the top level cgroup that is used to manage kubernetes components for which compute resources were reserved via '--kube-reserved' flag. Ex. '/kube-reserved'. [default='']")

// Graduated experimental flags, kept for backward compatibility
fs.BoolVar(&c.KernelMemcgNotification, "experimental-kernel-memcg-notification", c.KernelMemcgNotification, "Use kernelMemcgNotification configuration, this flag will be removed in 1.23.")
}
6 changes: 3 additions & 3 deletions cmd/kubelet/app/server.go
Expand Up @@ -1104,7 +1104,7 @@ func RunKubelet(kubeServer *options.KubeletServer, kubeDeps *kubelet.Dependencie
kubeServer.RegisterWithTaints,
kubeServer.AllowedUnsafeSysctls,
kubeServer.ExperimentalMounterPath,
kubeServer.ExperimentalKernelMemcgNotification,
kubeServer.KernelMemcgNotification,
kubeServer.ExperimentalCheckNodeCapabilitiesBeforeMount,
kubeServer.ExperimentalNodeAllocatableIgnoreEvictionThreshold,
kubeServer.MinimumGCAge,
Expand Down Expand Up @@ -1178,7 +1178,7 @@ func createAndInitKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
registerWithTaints []api.Taint,
allowedUnsafeSysctls []string,
experimentalMounterPath string,
experimentalKernelMemcgNotification bool,
kernelMemcgNotification bool,
experimentalCheckNodeCapabilitiesBeforeMount bool,
experimentalNodeAllocatableIgnoreEvictionThreshold bool,
minimumGCAge metav1.Duration,
Expand Down Expand Up @@ -1210,7 +1210,7 @@ func createAndInitKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
registerWithTaints,
allowedUnsafeSysctls,
experimentalMounterPath,
experimentalKernelMemcgNotification,
kernelMemcgNotification,
experimentalCheckNodeCapabilitiesBeforeMount,
experimentalNodeAllocatableIgnoreEvictionThreshold,
minimumGCAge,
Expand Down
1 change: 1 addition & 0 deletions pkg/kubelet/apis/config/fuzzer/fuzzer.go
Expand Up @@ -59,6 +59,7 @@ func Funcs(codecs runtimeserializer.CodecFactory) []interface{} {
obj.ImageMinimumGCAge = metav1.Duration{Duration: 2 * time.Minute}
obj.ImageGCHighThresholdPercent = 85
obj.ImageGCLowThresholdPercent = 80
obj.KernelMemcgNotification = false
obj.MaxOpenFiles = 1000000
obj.MaxPods = 110
obj.PodPidsLimit = -1
Expand Down
1 change: 1 addition & 0 deletions pkg/kubelet/apis/config/helpers_test.go
Expand Up @@ -189,6 +189,7 @@ var (
"ImageGCHighThresholdPercent",
"ImageGCLowThresholdPercent",
"ImageMinimumGCAge.Duration",
"KernelMemcgNotification",
"KubeAPIBurst",
"KubeAPIQPS",
"KubeReservedCgroup",
Expand Down
3 changes: 3 additions & 0 deletions pkg/kubelet/apis/config/types.go
Expand Up @@ -321,6 +321,9 @@ type KubeletConfiguration struct {
// These sysctls are namespaced but not allowed by default. For example: "kernel.msg*,net.ipv4.route.min_pmtu"
// +optional
AllowedUnsafeSysctls []string
// kernelMemcgNotification if enabled, the kubelet will integrate with the kernel memcg
// notification to determine if memory eviction thresholds are crossed rather than polling.
KernelMemcgNotification bool

/* the following fields are meant for Node Allocatable */

Expand Down
2 changes: 2 additions & 0 deletions pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pkg/kubelet/kubelet.go
Expand Up @@ -365,7 +365,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
registerWithTaints []api.Taint,
allowedUnsafeSysctls []string,
experimentalMounterPath string,
experimentalKernelMemcgNotification bool,
kernelMemcgNotification bool,
experimentalCheckNodeCapabilitiesBeforeMount bool,
experimentalNodeAllocatableIgnoreEvictionThreshold bool,
minimumGCAge metav1.Duration,
Expand Down Expand Up @@ -434,7 +434,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
PressureTransitionPeriod: kubeCfg.EvictionPressureTransitionPeriod.Duration,
MaxPodGracePeriodSeconds: int64(kubeCfg.EvictionMaxPodGracePeriod),
Thresholds: thresholds,
KernelMemcgNotification: experimentalKernelMemcgNotification,
KernelMemcgNotification: kernelMemcgNotification,
PodCgroupRoot: kubeDeps.ContainerManager.GetPodCgroupRoot(),
}

Expand Down
7 changes: 7 additions & 0 deletions staging/src/k8s.io/kubelet/config/v1beta1/types.go
Expand Up @@ -786,6 +786,13 @@ type KubeletConfiguration struct {
// Default: ""
// +optional
ProviderID string `json:"providerID,omitempty"`
// kernelMemcgNotification, if set, the kubelet will integrate with the kernel memcg notification
// to determine if memory eviction thresholds are crossed rather than polling.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may impact the way Kubelet interacts with the kernel.
// Default: false
// +optional
KernelMemcgNotification bool `json:"kernelMemcgNotification,omitempty"`
}

type KubeletAuthorizationMode string
Expand Down
2 changes: 1 addition & 1 deletion test/e2e_node/remote/node_e2e.go
Expand Up @@ -98,7 +98,7 @@ func prependCOSMounterFlag(args, host, workspace string) (string, error) {
// prependMemcgNotificationFlag prepends the flag for enabling memcg
// notification to args and returns the result.
func prependMemcgNotificationFlag(args string) string {
return "--kubelet-flags=--experimental-kernel-memcg-notification=true " + args
return "--kubelet-flags=--kernel-memcg-notification=true " + args
}

// updateOSSpecificKubeletFlags updates the Kubelet args with OS specific
Expand Down

0 comments on commit 4a91ecb

Please sign in to comment.