Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Moving Kubelet kernel-memcg-notification to configuration file #91863

Merged
merged 1 commit into from Jun 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions cluster/gce/config-test.sh
Expand Up @@ -220,10 +220,10 @@ TEST_CLUSTER_API_CONTENT_TYPE=${TEST_CLUSTER_API_CONTENT_TYPE:-}

KUBELET_TEST_ARGS="${KUBELET_TEST_ARGS:-} --serialize-image-pulls=false ${TEST_CLUSTER_API_CONTENT_TYPE}"
if [[ "${NODE_OS_DISTRIBUTION}" = 'gci' ]] || [[ "${NODE_OS_DISTRIBUTION}" = 'ubuntu' ]] || [[ "${NODE_OS_DISTRIBUTION}" = 'custom' ]]; then
NODE_KUBELET_TEST_ARGS="${NODE_KUBELET_TEST_ARGS:-} --experimental-kernel-memcg-notification=true"
NODE_KUBELET_TEST_ARGS="${NODE_KUBELET_TEST_ARGS:-} --kernel-memcg-notification=true"
fi
if [[ "${MASTER_OS_DISTRIBUTION}" = 'gci' ]] || [[ "${MASTER_OS_DISTRIBUTION}" = 'ubuntu' ]]; then
MASTER_KUBELET_TEST_ARGS="${MASTER_KUBELET_TEST_ARGS:-} --experimental-kernel-memcg-notification=true"
MASTER_KUBELET_TEST_ARGS="${MASTER_KUBELET_TEST_ARGS:-} --kernel-memcg-notification=true"
fi
APISERVER_TEST_ARGS="${APISERVER_TEST_ARGS:-} --runtime-config=extensions/v1beta1,scheduling.k8s.io/v1alpha1,settings.k8s.io/v1alpha1 ${TEST_CLUSTER_DELETE_COLLECTION_WORKERS} ${TEST_CLUSTER_MAX_REQUESTS_INFLIGHT}"
CONTROLLER_MANAGER_TEST_ARGS="${CONTROLLER_MANAGER_TEST_ARGS:-} ${TEST_CLUSTER_RESYNC_PERIOD} ${TEST_CLUSTER_API_CONTENT_TYPE}"
Expand Down
2 changes: 1 addition & 1 deletion cluster/gce/util.sh
Expand Up @@ -891,7 +891,7 @@ function construct-windows-kubelet-flags {
flags+=" --cgroups-per-qos=false --enforce-node-allocatable="

# Turn off kernel memory cgroup notification.
flags+=" --experimental-kernel-memcg-notification=false"
flags+=" --kernel-memcg-notification=false"

# TODO(#78628): Re-enable KubeletPodResources when the issue is fixed.
# Force disable KubeletPodResources feature on Windows until #78628 is fixed.
Expand Down
35 changes: 17 additions & 18 deletions cmd/kubelet/app/options/options.go
Expand Up @@ -117,9 +117,6 @@ type KubeletFlags struct {
RemoteImageEndpoint string
// experimentalMounterPath is the path of mounter binary. Leave empty to use the default mount path
ExperimentalMounterPath string
// If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.
// +optional
ExperimentalKernelMemcgNotification bool
// This flag, if set, enables a check prior to mount operations to verify that the required components
// (binaries, etc.) to mount the volume are available on the underlying node. If the check is enabled
// and fails the mount operation fails.
Expand Down Expand Up @@ -180,20 +177,19 @@ func NewKubeletFlags() *KubeletFlags {
}

return &KubeletFlags{
ContainerRuntimeOptions: *NewContainerRuntimeOptions(),
CertDirectory: "/var/lib/kubelet/pki",
RootDirectory: defaultRootDir,
MasterServiceNamespace: metav1.NamespaceDefault,
MaxContainerCount: -1,
MaxPerPodContainerCount: 1,
MinimumGCAge: metav1.Duration{Duration: 0},
NonMasqueradeCIDR: "10.0.0.0/8",
RegisterSchedulable: true,
ExperimentalKernelMemcgNotification: false,
RemoteRuntimeEndpoint: remoteRuntimeEndpoint,
NodeLabels: make(map[string]string),
RegisterNode: true,
SeccompProfileRoot: filepath.Join(defaultRootDir, "seccomp"),
ContainerRuntimeOptions: *NewContainerRuntimeOptions(),
CertDirectory: "/var/lib/kubelet/pki",
RootDirectory: defaultRootDir,
MasterServiceNamespace: metav1.NamespaceDefault,
MaxContainerCount: -1,
MaxPerPodContainerCount: 1,
MinimumGCAge: metav1.Duration{Duration: 0},
NonMasqueradeCIDR: "10.0.0.0/8",
RegisterSchedulable: true,
RemoteRuntimeEndpoint: remoteRuntimeEndpoint,
NodeLabels: make(map[string]string),
RegisterNode: true,
SeccompProfileRoot: filepath.Join(defaultRootDir, "seccomp"),
// prior to the introduction of this flag, there was a hardcoded cap of 50 images
EnableCAdvisorJSONEndpoints: false,
}
Expand Down Expand Up @@ -345,7 +341,6 @@ func (f *KubeletFlags) AddFlags(mainfs *pflag.FlagSet) {
fs.Var(utiltaints.NewTaintsVar(&f.RegisterWithTaints), "register-with-taints", "Register the node with the given list of taints (comma separated \"<key>=<value>:<effect>\"). No-op if register-node is false.")

// EXPERIMENTAL FLAGS
fs.BoolVar(&f.ExperimentalKernelMemcgNotification, "experimental-kernel-memcg-notification", f.ExperimentalKernelMemcgNotification, "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.")
fs.StringVar(&f.RemoteRuntimeEndpoint, "container-runtime-endpoint", f.RemoteRuntimeEndpoint, "[Experimental] The endpoint of remote runtime service. Currently unix socket endpoint is supported on Linux, while npipe and tcp endpoints are supported on windows. Examples:'unix:///var/run/dockershim.sock', 'npipe:////./pipe/dockershim'")
fs.StringVar(&f.RemoteImageEndpoint, "image-service-endpoint", f.RemoteImageEndpoint, "[Experimental] The endpoint of remote image service. If not specified, it will be the same with container-runtime-endpoint by default. Currently unix socket endpoint is supported on Linux, while npipe and tcp endpoints are supported on windows. Examples:'unix:///var/run/dockershim.sock', 'npipe:////./pipe/dockershim'")
fs.BoolVar(&f.ExperimentalNodeAllocatableIgnoreEvictionThreshold, "experimental-allocatable-ignore-eviction", f.ExperimentalNodeAllocatableIgnoreEvictionThreshold, "When set to 'true', Hard Eviction Thresholds will be ignored while calculating Node Allocatable. See https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources/ for more details. [default=false]")
Expand Down Expand Up @@ -519,6 +514,7 @@ func AddKubeletConfigFlags(mainfs *pflag.FlagSet, c *kubeletconfig.KubeletConfig
fs.StringSliceVar(&c.AllowedUnsafeSysctls, "allowed-unsafe-sysctls", c.AllowedUnsafeSysctls, "Comma-separated whitelist of unsafe sysctls or unsafe sysctl patterns (ending in *). Use these at your own risk.")

fs.Int32Var(&c.NodeStatusMaxImages, "node-status-max-images", c.NodeStatusMaxImages, "The maximum number of images to report in Node.Status.Images. If -1 is specified, no cap will be applied.")
fs.BoolVar(&c.KernelMemcgNotification, "kernel-memcg-notification", c.KernelMemcgNotification, "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should keep the flag name the same for backwards compatibility during the deprecation period.
One technique to manage this is to add a new flag called "kernel-memcg-notification", but still keep the old one and just mark the old one deprecated. BootstrapKubeconfig is one example of that (in this case both can point to the same field in the config, instead of flags struct, but the concept is the same).

Copy link
Member Author

@knabben knabben Jun 9, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mtaufen PTAL

--experimental-kernel-memcg-notification                   Use kernelMemcgNotification configuration. (DEPRECATED: This parameter should be set via the config file specified by the Kubelet's --config flag. See https://kubernetes.io/docs/tasks/administer-cluster/kubelet-config-file/ for more information.)
--kernel-memcg-notification                                If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling. (DEPRECATED: This parameter should be set via the config file specified by the Kubelet's --config flag. See https://kubernetes.io/docs/tasks/administer-cluster/kubelet-config-file/ for more information.)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

curious, why the name change (why remove the "experimental" part)?

Copy link
Member Author

@knabben knabben Jun 19, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically it "graduated", since it is not experimental anymore, we moved to the KubeletConfiguration file.

Keeping the --experimental-kernel-memcg-notification one only for backward compatibility reasons, it must be removed in a few releases. Only --kernel-memcg-notification will be left at this point.


// Flags intended for testing, not recommended used in production environments.
fs.Int64Var(&c.MaxOpenFiles, "max-open-files", c.MaxOpenFiles, "Number of files that can be opened by Kubelet process.")
Expand All @@ -543,4 +539,7 @@ func AddKubeletConfigFlags(mainfs *pflag.FlagSet, c *kubeletconfig.KubeletConfig
fs.StringSliceVar(&c.EnforceNodeAllocatable, "enforce-node-allocatable", c.EnforceNodeAllocatable, "A comma separated list of levels of node allocatable enforcement to be enforced by kubelet. Acceptable options are 'none', 'pods', 'system-reserved', and 'kube-reserved'. If the latter two options are specified, '--system-reserved-cgroup' and '--kube-reserved-cgroup' must also be set, respectively. If 'none' is specified, no additional options should be set. See https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources/ for more details.")
fs.StringVar(&c.SystemReservedCgroup, "system-reserved-cgroup", c.SystemReservedCgroup, "Absolute name of the top level cgroup that is used to manage non-kubernetes components for which compute resources were reserved via '--system-reserved' flag. Ex. '/system-reserved'. [default='']")
fs.StringVar(&c.KubeReservedCgroup, "kube-reserved-cgroup", c.KubeReservedCgroup, "Absolute name of the top level cgroup that is used to manage kubernetes components for which compute resources were reserved via '--kube-reserved' flag. Ex. '/kube-reserved'. [default='']")

// Graduated experimental flags, kept for backward compatibility
fs.BoolVar(&c.KernelMemcgNotification, "experimental-kernel-memcg-notification", c.KernelMemcgNotification, "Use kernelMemcgNotification configuration, this flag will be removed in 1.23.")
}
6 changes: 3 additions & 3 deletions cmd/kubelet/app/server.go
Expand Up @@ -1109,7 +1109,7 @@ func RunKubelet(kubeServer *options.KubeletServer, kubeDeps *kubelet.Dependencie
kubeServer.RegisterWithTaints,
kubeServer.AllowedUnsafeSysctls,
kubeServer.ExperimentalMounterPath,
kubeServer.ExperimentalKernelMemcgNotification,
kubeServer.KernelMemcgNotification,
kubeServer.ExperimentalCheckNodeCapabilitiesBeforeMount,
kubeServer.ExperimentalNodeAllocatableIgnoreEvictionThreshold,
kubeServer.MinimumGCAge,
Expand Down Expand Up @@ -1183,7 +1183,7 @@ func createAndInitKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
registerWithTaints []api.Taint,
allowedUnsafeSysctls []string,
experimentalMounterPath string,
experimentalKernelMemcgNotification bool,
kernelMemcgNotification bool,
experimentalCheckNodeCapabilitiesBeforeMount bool,
experimentalNodeAllocatableIgnoreEvictionThreshold bool,
minimumGCAge metav1.Duration,
Expand Down Expand Up @@ -1215,7 +1215,7 @@ func createAndInitKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
registerWithTaints,
allowedUnsafeSysctls,
experimentalMounterPath,
experimentalKernelMemcgNotification,
kernelMemcgNotification,
experimentalCheckNodeCapabilitiesBeforeMount,
experimentalNodeAllocatableIgnoreEvictionThreshold,
minimumGCAge,
Expand Down
1 change: 1 addition & 0 deletions pkg/kubelet/apis/config/fuzzer/fuzzer.go
Expand Up @@ -59,6 +59,7 @@ func Funcs(codecs runtimeserializer.CodecFactory) []interface{} {
obj.ImageMinimumGCAge = metav1.Duration{Duration: 2 * time.Minute}
obj.ImageGCHighThresholdPercent = 85
obj.ImageGCLowThresholdPercent = 80
obj.KernelMemcgNotification = false
obj.MaxOpenFiles = 1000000
obj.MaxPods = 110
obj.PodPidsLimit = -1
Expand Down
1 change: 1 addition & 0 deletions pkg/kubelet/apis/config/helpers_test.go
Expand Up @@ -189,6 +189,7 @@ var (
"ImageGCHighThresholdPercent",
"ImageGCLowThresholdPercent",
"ImageMinimumGCAge.Duration",
"KernelMemcgNotification",
"KubeAPIBurst",
"KubeAPIQPS",
"KubeReservedCgroup",
Expand Down
3 changes: 3 additions & 0 deletions pkg/kubelet/apis/config/types.go
Expand Up @@ -322,6 +322,9 @@ type KubeletConfiguration struct {
// These sysctls are namespaced but not allowed by default. For example: "kernel.msg*,net.ipv4.route.min_pmtu"
// +optional
AllowedUnsafeSysctls []string
// kernelMemcgNotification if enabled, the kubelet will integrate with the kernel memcg
// notification to determine if memory eviction thresholds are crossed rather than polling.
KernelMemcgNotification bool

/* the following fields are meant for Node Allocatable */

Expand Down
2 changes: 2 additions & 0 deletions pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pkg/kubelet/kubelet.go
Expand Up @@ -363,7 +363,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
registerWithTaints []api.Taint,
allowedUnsafeSysctls []string,
experimentalMounterPath string,
experimentalKernelMemcgNotification bool,
kernelMemcgNotification bool,
experimentalCheckNodeCapabilitiesBeforeMount bool,
experimentalNodeAllocatableIgnoreEvictionThreshold bool,
minimumGCAge metav1.Duration,
Expand Down Expand Up @@ -432,7 +432,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
PressureTransitionPeriod: kubeCfg.EvictionPressureTransitionPeriod.Duration,
MaxPodGracePeriodSeconds: int64(kubeCfg.EvictionMaxPodGracePeriod),
Thresholds: thresholds,
KernelMemcgNotification: experimentalKernelMemcgNotification,
KernelMemcgNotification: kernelMemcgNotification,
PodCgroupRoot: kubeDeps.ContainerManager.GetPodCgroupRoot(),
}

Expand Down
7 changes: 7 additions & 0 deletions staging/src/k8s.io/kubelet/config/v1beta1/types.go
Expand Up @@ -787,6 +787,13 @@ type KubeletConfiguration struct {
// Default: ""
// +optional
ProviderID string `json:"providerID,omitempty"`
// kernelMemcgNotification, if set, the kubelet will integrate with the kernel memcg notification
// to determine if memory eviction thresholds are crossed rather than polling.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may impact the way Kubelet interacts with the kernel.
// Default: false
// +optional
KernelMemcgNotification bool `json:"kernelMemcgNotification,omitempty"`
}

type KubeletAuthorizationMode string
Expand Down
2 changes: 1 addition & 1 deletion test/e2e_node/remote/node_e2e.go
Expand Up @@ -98,7 +98,7 @@ func prependCOSMounterFlag(args, host, workspace string) (string, error) {
// prependMemcgNotificationFlag prepends the flag for enabling memcg
// notification to args and returns the result.
func prependMemcgNotificationFlag(args string) string {
return "--kubelet-flags=--experimental-kernel-memcg-notification=true " + args
return "--kubelet-flags=--kernel-memcg-notification=true " + args
}

// updateOSSpecificKubeletFlags updates the Kubelet args with OS specific
Expand Down