Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make container runtime's cgroup configurable. #20687

Merged
merged 4 commits into from
Feb 11, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 3 additions & 1 deletion cluster/gce/trusty/master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,9 @@ script
--cluster-domain=${DNS_DOMAIN} \
--configure-cbr0=${ALLOCATE_NODE_CIDRS} \
--cgroup-root=/ \
--system-container=/system \
--system-cgroups=/system \
--runtime-cgroups=/docker-daemon \
--kubelet-cgroups=/kubelet \
--nosystemd=true \
${ARGS}
end script
Expand Down
4 changes: 3 additions & 1 deletion cluster/gce/trusty/node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,9 @@ script
--cluster-domain=${DNS_DOMAIN} \
--configure-cbr0=true \
--cgroup-root=/ \
--system-container=/system \
--system-cgroups=/system \
--runtime-cgroups=/docker-daemon \
--kubelet-cgroups=/kubelet \
--nosystemd=true \
${ARGS}
end script
Expand Down
12 changes: 8 additions & 4 deletions cluster/saltbase/salt/kubelet/default
Original file line number Diff line number Diff line change
Expand Up @@ -102,15 +102,19 @@
{% set experimental_flannel_overlay = "--experimental-flannel-overlay=true" %}
{% endif -%}

# Run containers under the root cgroup and create a system container.
{% set system_container = "" -%}
# Setup cgroups hierarchies.
{% set cgroup_root = "" -%}
{% if grains['os_family'] == 'Debian' -%}
{% set system_container = "--system-container=/system" -%}
{% set system_container = "" -%}
{% set kubelet_container = "" -%}
{% set runtime_container = "" -%}
{% if grains['os_family'] == 'Debian' -%}
{% if pillar.get('is_systemd') %}
{% set cgroup_root = "--cgroup-root=docker" -%}
{% else %}
{% set cgroup_root = "--cgroup-root=/" -%}
{% set system_container = "--system-cgroups=/system" -%}
{% set runtime_container = "--runtime-cgroups=/docker-daemon" -%}
{% set kubelet_container= "--kubelet-cgroups=/kubelet" -%}
{% endif %}
{% endif -%}
{% if grains['oscodename'] == 'vivid' -%}
Expand Down
25 changes: 16 additions & 9 deletions cmd/kubelet/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ type KubeletServer struct {
KubeConfig util.StringFlag
APIServerList []string

DockerDaemonContainer string
RunOnce bool
RunOnce bool

// Insert a probability of random errors during calls to the master.
ChaosChance float64
Expand All @@ -61,9 +60,8 @@ type KubeletServer struct {
// NewKubeletServer will create a new KubeletServer with default values.
func NewKubeletServer() *KubeletServer {
return &KubeletServer{
AuthPath: util.NewStringFlag("/var/lib/kubelet/kubernetes_auth"), // deprecated
KubeConfig: util.NewStringFlag("/var/lib/kubelet/kubeconfig"),
DockerDaemonContainer: "/docker-daemon",
AuthPath: util.NewStringFlag("/var/lib/kubelet/kubernetes_auth"), // deprecated
KubeConfig: util.NewStringFlag("/var/lib/kubelet/kubeconfig"),

SystemReserved: make(util.ConfigurationMap),
KubeReserved: make(util.ConfigurationMap),
Expand Down Expand Up @@ -113,14 +111,15 @@ func NewKubeletServer() *KubeletServer {
RegisterSchedulable: true,
RegistryBurst: 10,
RegistryPullQPS: 5.0,
ResourceContainer: "/kubelet",
KubeletCgroups: "",
RktPath: "",
RktStage1Image: "",
RootDirectory: defaultRootDir,
RuntimeCgroups: "",
SerializeImagePulls: true,
StreamingConnectionIdleTimeout: unversioned.Duration{4 * time.Hour},
SyncFrequency: unversioned.Duration{1 * time.Minute},
SystemContainer: "",
SystemCgroups: "",
ReconcileCIDR: true,
KubeAPIQPS: 5.0,
KubeAPIBurst: 10,
Expand Down Expand Up @@ -192,13 +191,20 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&s.VolumePluginDir, "volume-plugin-dir", s.VolumePluginDir, "<Warning: Alpha feature> The full path of the directory in which to search for additional third party volume plugins")
fs.StringVar(&s.CloudProvider, "cloud-provider", s.CloudProvider, "The provider for cloud services. Empty string for no provider.")
fs.StringVar(&s.CloudConfigFile, "cloud-config", s.CloudConfigFile, "The path to the cloud provider configuration file. Empty string for no configuration file.")
fs.StringVar(&s.ResourceContainer, "resource-container", s.ResourceContainer, "Absolute name of the resource-only container to create and run the Kubelet in (Default: /kubelet).")

fs.StringVar(&s.KubeletCgroups, "resource-container", s.KubeletCgroups, "Optional absolute name of the resource-only container to create and run the Kubelet in.")
fs.MarkDeprecated("resource-container", "Use --kubelet-cgroups instead. Will be removed in a future version.")
fs.StringVar(&s.KubeletCgroups, "kubelet-cgroups", s.KubeletCgroups, "Optional absolute name of cgroups to create and run the Kubelet in.")

fs.StringVar(&s.SystemCgroups, "system-container", s.SystemCgroups, "Optional resource-only container in which to place all non-kernel processes that are not already in a container. Empty for no container. Rolling back the flag requires a reboot. (Default: \"\").")
fs.MarkDeprecated("system-container", "Use --system-cgroups instead. Will be removed in a future version.")
fs.StringVar(&s.SystemCgroups, "system-cgroups", s.SystemCgroups, "Optional absolute name of cgroups in which to place all non-kernel processes that are not already inside a cgroup under `/`. Empty for no container. Rolling back the flag requires a reboot. (Default: \"\").")

fs.StringVar(&s.CgroupRoot, "cgroup-root", s.CgroupRoot, "Optional root cgroup to use for pods. This is handled by the container runtime on a best effort basis. Default: '', which means use the container runtime default.")
fs.StringVar(&s.ContainerRuntime, "container-runtime", s.ContainerRuntime, "The container runtime to use. Possible values: 'docker', 'rkt'. Default: 'docker'.")
fs.StringVar(&s.LockFilePath, "lock-file", s.LockFilePath, "<Warning: Alpha feature> The path to file for kubelet to use as a lock file.")
fs.StringVar(&s.RktPath, "rkt-path", s.RktPath, "Path of rkt binary. Leave empty to use the first rkt in $PATH. Only used if --container-runtime='rkt'")
fs.StringVar(&s.RktStage1Image, "rkt-stage1-image", s.RktStage1Image, "image to use as stage1. Local paths and http/https URLs are supported. If empty, the 'stage1.aci' in the same directory as '--rkt-path' will be used")
fs.StringVar(&s.SystemContainer, "system-container", s.SystemContainer, "Optional resource-only container in which to place all non-kernel processes that are not already in a container. Empty for no container. Rolling back the flag requires a reboot. (Default: \"\").")
fs.BoolVar(&s.ConfigureCBR0, "configure-cbr0", s.ConfigureCBR0, "If true, kubelet will configure cbr0 based on Node.Spec.PodCIDR.")
fs.BoolVar(&s.HairpinMode, "configure-hairpin-mode", s.HairpinMode, "If true, kubelet will set the hairpin mode flag on container interfaces. This allows endpoints of a Service to loadbalance back to themselves if they should try to access their own Service.")
fs.IntVar(&s.MaxPods, "max-pods", s.MaxPods, "Number of Pods that can run on this Kubelet.")
Expand All @@ -223,4 +229,5 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
fs.DurationVar(&s.OutOfDiskTransitionFrequency.Duration, "outofdisk-transition-frequency", s.OutOfDiskTransitionFrequency.Duration, "Duration for which the kubelet has to wait before transitioning out of out-of-disk node condition status. Default: 5m0s")
fs.StringVar(&s.NodeIP, "node-ip", s.NodeIP, "IP address of the node. If set, kubelet will use this IP address for the node")
fs.BoolVar(&s.EnableCustomMetrics, "enable-custom-metrics", s.EnableCustomMetrics, "Support for gathering custom metrics.")
fs.StringVar(&s.RuntimeCgroups, "runtime-cgroups", s.RuntimeCgroups, "Optional absolute name of cgroups to create and run the runtime in.")
}
32 changes: 19 additions & 13 deletions cmd/kubelet/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ func UnsecuredKubeletConfig(s *options.KubeletServer) (*KubeletConfig, error) {
CPUCFSQuota: s.CPUCFSQuota,
DiskSpacePolicy: diskSpacePolicy,
DockerClient: dockertools.ConnectToDockerOrDie(s.DockerEndpoint),
DockerDaemonContainer: s.DockerDaemonContainer,
RuntimeCgroups: s.RuntimeCgroups,
DockerExecHandler: dockerExecHandler,
EnableCustomMetrics: s.EnableCustomMetrics,
EnableDebuggingHandlers: s.EnableDebuggingHandlers,
Expand Down Expand Up @@ -236,7 +236,7 @@ func UnsecuredKubeletConfig(s *options.KubeletServer) (*KubeletConfig, error) {
RegistryPullQPS: s.RegistryPullQPS,
ResolverConfig: s.ResolverConfig,
Reservation: *reservation,
ResourceContainer: s.ResourceContainer,
KubeletCgroups: s.KubeletCgroups,
RktPath: s.RktPath,
RktStage1Image: s.RktStage1Image,
RootDirectory: s.RootDirectory,
Expand All @@ -245,7 +245,7 @@ func UnsecuredKubeletConfig(s *options.KubeletServer) (*KubeletConfig, error) {
StandaloneMode: (len(s.APIServerList) == 0),
StreamingConnectionIdleTimeout: s.StreamingConnectionIdleTimeout.Duration,
SyncFrequency: s.SyncFrequency.Duration,
SystemContainer: s.SystemContainer,
SystemCgroups: s.SystemCgroups,
TLSOptions: tlsOptions,
Writer: writer,
VolumePlugins: ProbeVolumePlugins(s.VolumePluginDir),
Expand Down Expand Up @@ -306,7 +306,16 @@ func Run(s *options.KubeletServer, kcfg *KubeletConfig) error {
}

if kcfg.ContainerManager == nil {
kcfg.ContainerManager, err = cm.NewContainerManager(kcfg.Mounter, kcfg.CAdvisorInterface)
if kcfg.SystemCgroups != "" && kcfg.CgroupRoot == "" {
return fmt.Errorf("invalid configuration: system container was specified and cgroup root was not specified")
}

kcfg.ContainerManager, err = cm.NewContainerManager(kcfg.Mounter, kcfg.CAdvisorInterface, cm.NodeConfig{
RuntimeCgroupsName: kcfg.RuntimeCgroups,
SystemCgroupsName: kcfg.SystemCgroups,
KubeletCgroupsName: kcfg.KubeletCgroups,
ContainerRuntime: kcfg.ContainerRuntime,
})
if err != nil {
return err
}
Expand Down Expand Up @@ -501,7 +510,7 @@ func SimpleKubelet(client *clientset.Clientset,
CPUCFSQuota: true,
DiskSpacePolicy: diskSpacePolicy,
DockerClient: dockerClient,
DockerDaemonContainer: "/docker-daemon",
RuntimeCgroups: "",
DockerExecHandler: &dockertools.NativeExecHandler{},
EnableCustomMetrics: false,
EnableDebuggingHandlers: true,
Expand Down Expand Up @@ -530,11 +539,11 @@ func SimpleKubelet(client *clientset.Clientset,
RegistryBurst: 10,
RegistryPullQPS: 5.0,
ResolverConfig: kubetypes.ResolvConfDefault,
ResourceContainer: "/kubelet",
KubeletCgroups: "/kubelet",
RootDirectory: rootDir,
SerializeImagePulls: true,
SyncFrequency: syncFrequency,
SystemContainer: "",
SystemCgroups: "",
TLSOptions: tlsOptions,
VolumePlugins: volumePlugins,
Writer: &io.StdWriter{},
Expand Down Expand Up @@ -677,7 +686,7 @@ type KubeletConfig struct {
CPUCFSQuota bool
DiskSpacePolicy kubelet.DiskSpacePolicy
DockerClient dockertools.DockerInterface
DockerDaemonContainer string
RuntimeCgroups string
DockerExecHandler dockertools.ExecHandler
EnableCustomMetrics bool
EnableDebuggingHandlers bool
Expand Down Expand Up @@ -724,7 +733,7 @@ type KubeletConfig struct {
RegistryPullQPS float64
Reservation kubetypes.Reservation
ResolverConfig string
ResourceContainer string
KubeletCgroups string
RktPath string
RktStage1Image string
RootDirectory string
Expand All @@ -733,7 +742,7 @@ type KubeletConfig struct {
StandaloneMode bool
StreamingConnectionIdleTimeout time.Duration
SyncFrequency time.Duration
SystemContainer string
SystemCgroups string
TLSOptions *server.TLSOptions
Writer io.Writer
VolumePlugins []volume.VolumePlugin
Expand Down Expand Up @@ -802,16 +811,13 @@ func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.Pod
kc.Cloud,
kc.NodeLabels,
kc.NodeStatusUpdateFrequency,
kc.ResourceContainer,
kc.OSInterface,
kc.CgroupRoot,
kc.ContainerRuntime,
kc.RktPath,
kc.RktStage1Image,
kc.Mounter,
kc.Writer,
kc.DockerDaemonContainer,
kc.SystemContainer,
kc.ConfigureCBR0,
kc.NonMasqueradeCIDR,
kc.PodCIDR,
Expand Down
11 changes: 8 additions & 3 deletions contrib/mesos/pkg/executor/service/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ func (s *KubeletExecutorServer) runKubelet(

return decorated, pc, nil
}
kcfg.DockerDaemonContainer = "" // don't move the docker daemon into a cgroup
kcfg.RuntimeCgroups = "" // don't move the docker daemon into a cgroup
kcfg.Hostname = kcfg.HostnameOverride
kcfg.KubeClient = apiclient

Expand All @@ -201,7 +201,7 @@ func (s *KubeletExecutorServer) runKubelet(
kcfg.NodeName = kcfg.HostnameOverride
kcfg.PodConfig = kconfig.NewPodConfig(kconfig.PodConfigNotificationIncremental, kcfg.Recorder) // override the default pod source
kcfg.StandaloneMode = false
kcfg.SystemContainer = "" // don't take control over other system processes.
kcfg.SystemCgroups = "" // don't take control over other system processes.
if kcfg.Cloud != nil {
// fail early and hard because having the cloud provider loaded would go unnoticed,
// but break bigger cluster because accessing the state.json from every slave kills the master.
Expand All @@ -216,7 +216,12 @@ func (s *KubeletExecutorServer) runKubelet(
}

kcfg.CAdvisorInterface = cAdvisorInterface
kcfg.ContainerManager, err = cm.NewContainerManager(kcfg.Mounter, cAdvisorInterface)
kcfg.ContainerManager, err = cm.NewContainerManager(kcfg.Mounter, cAdvisorInterface, cm.NodeConfig{
RuntimeCgroupsName: kcfg.RuntimeCgroups,
SystemCgroupsName: kcfg.SystemCgroups,
KubeletCgroupsName: kcfg.KubeletCgroups,
ContainerRuntime: kcfg.ContainerRuntime,
})
if err != nil {
return err
}
Expand Down
5 changes: 3 additions & 2 deletions docs/admin/kubelet.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ kubelet
--kube-api-qps=5: QPS to use while talking with kubernetes apiserver
--kube-reserved=: A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for kubernetes system components. Currently only cpu and memory are supported. See http://releases.k8s.io/HEAD/docs/user-guide/compute-resources.html for more detail. [default=none]
--kubeconfig="/var/lib/kubelet/kubeconfig": Path to a kubeconfig file, specifying how to authenticate to API server (the master location is set by the api-servers flag).
--kubelet-cgroups="": Optional absolute name of cgroups to create and run the Kubelet in.
--lock-file="/var/run/lock/kubelet.lock": <Warning: Alpha feature> The path to file for kubelet to use as a lock file.
--log-flush-frequency=5s: Maximum number of seconds between log flushes
--low-diskspace-threshold-mb=256: The absolute free disk space, in MB, to maintain. When disk space falls below this threshold, new pods would be rejected. Default: 256
Expand Down Expand Up @@ -134,15 +135,15 @@ kubelet
--registry-burst=10: Maximum size of a bursty pulls, temporarily allows pulls to burst to this number, while still not exceeding registry-qps. Only used if --registry-qps > 0
--registry-qps=5: If > 0, limit registry pull QPS to this value. If 0, unlimited. [default=5.0]
--resolv-conf="/etc/resolv.conf": Resolver configuration file used as the basis for the container DNS resolution configuration.
--resource-container="/kubelet": Absolute name of the resource-only container to create and run the Kubelet in (Default: /kubelet).
--rkt-path="": Path of rkt binary. Leave empty to use the first rkt in $PATH. Only used if --container-runtime='rkt'
--rkt-stage1-image="": image to use as stage1. Local paths and http/https URLs are supported. If empty, the 'stage1.aci' in the same directory as '--rkt-path' will be used
--root-dir="/var/lib/kubelet": Directory path for managing kubelet files (volume mounts,etc).
--runonce[=false]: If true, exit after spawning pods from local manifests or remote urls. Exclusive with --api-servers, and --enable-server
--runtime-cgroups="": Optional absolute name of cgroups to create and run the runtime in.
--serialize-image-pulls[=true]: Pull images one at a time. We recommend *not* changing the default value on nodes that run docker daemon with version < 1.9 or an Aufs storage backend. Issue #10959 has more details. [default=true]
--streaming-connection-idle-timeout=4h0m0s: Maximum time a streaming connection can be idle before the connection is automatically closed. 0 indicates no timeout. Example: '5m'
--sync-frequency=1m0s: Max period between synchronizing running containers and config
--system-container="": Optional resource-only container in which to place all non-kernel processes that are not already in a container. Empty for no container. Rolling back the flag requires a reboot. (Default: "").
--system-cgroups="": Optional absolute name of cgroups in which to place all non-kernel processes that are not already inside a cgroup under `/`. Empty for no container. Rolling back the flag requires a reboot. (Default: "").
--system-reserved=: A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for non-kubernetes components. Currently only cpu and memory are supported. See http://releases.k8s.io/HEAD/docs/user-guide/compute-resources.html for more detail. [default=none]
--tls-cert-file="": File containing x509 Certificate for HTTPS. (CA cert, if any, concatenated after server cert). If --tls-cert-file and --tls-private-key-file are not provided, a self-signed certificate and key are generated for the public address and saved to the directory passed to --cert-dir.
--tls-private-key-file="": File containing x509 private key matching --tls-cert-file.
Expand Down
3 changes: 3 additions & 0 deletions hack/verify-flags/known-flags.txt
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ kubectl-path
kubelet-address
kubelet-cadvisor-port
kubelet-certificate-authority
kubelet-cgroups
kubelet-client-certificate
kubelet-client-key
kubelet-docker-endpoint
Expand Down Expand Up @@ -311,6 +312,7 @@ root-ca-file
root-dir
run-proxy
runtime-config
runtime-cgroups
save-config
scheduler-config
scheduler-name
Expand Down Expand Up @@ -347,6 +349,7 @@ storage-versions
streaming-connection-idle-timeout
suicide-timeout
sync-frequency
system-cgroups
system-container
system-reserved
target-port
Expand Down