Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create a system container #8681

Merged
merged 3 commits into from
May 29, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 4 additions & 2 deletions cluster/saltbase/salt/kubelet/default
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,12 @@
{% set configure_cbr0 = "--configure-cbr0=" + pillar['allocate_node_cidrs'] -%}
{% endif -%}

# Run containers under the root cgroup.
# Run containers under the root cgroup and create a system container.
{% set system_container = "" -%}
{% set cgroup_root = "" -%}
{% if grains['os_family'] == 'Debian' -%}
{% set system_container = "--system-container=/system" -%}
{% set cgroup_root = "--cgroup_root=/" -%}
{% endif -%}

DAEMON_ARGS="{{daemon_args}} {{api_servers_with_port}} {{hostname_override}} {{cloud_provider}} {{config}} --allow_privileged={{pillar['allow_privileged']}} {{pillar['log_level']}} {{cluster_dns}} {{cluster_domain}} {{docker_root}} {{configure_cbr0}} {{cgroup_root}}"
DAEMON_ARGS="{{daemon_args}} {{api_servers_with_port}} {{hostname_override}} {{cloud_provider}} {{config}} --allow_privileged={{pillar['allow_privileged']}} {{pillar['log_level']}} {{cluster_dns}} {{cluster_domain}} {{docker_root}} {{configure_cbr0}} {{cgroup_root}} {{system_container}}"
8 changes: 7 additions & 1 deletion cmd/kubelet/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ type KubeletServer struct {
CgroupRoot string
ContainerRuntime string
DockerDaemonContainer string
SystemContainer string
ConfigureCBR0 bool
MaxPods int

Expand Down Expand Up @@ -170,6 +171,7 @@ func NewKubeletServer() *KubeletServer {
CgroupRoot: "",
ContainerRuntime: "docker",
DockerDaemonContainer: "/docker-daemon",
SystemContainer: "",
ConfigureCBR0: false,
}
}
Expand Down Expand Up @@ -228,7 +230,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&s.ResourceContainer, "resource-container", s.ResourceContainer, "Absolute name of the resource-only container to create and run the Kubelet in (Default: /kubelet).")
fs.StringVar(&s.CgroupRoot, "cgroup_root", s.CgroupRoot, "Optional root cgroup to use for pods. This is handled by the container runtime on a best effort basis. Default: '', which means use the container runtime default.")
fs.StringVar(&s.ContainerRuntime, "container_runtime", s.ContainerRuntime, "The container runtime to use. Possible values: 'docker', 'rkt'. Default: 'docker'.")
fs.StringVar(&s.DockerDaemonContainer, "docker-daemon-container", s.DockerDaemonContainer, "Optional resource-only container in which to place the Docker Daemon. Empty for no container (Default: /docker-daemon).")
fs.StringVar(&s.SystemContainer, "system-container", s.SystemContainer, "Optional resource-only container in which to place all non-kernel processes that are not already in a container. Empty for no container. Rolling back the flag requires a reboot. (Default: \"\").")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Discussed offline: We should tie this to CgroupRoot.

fs.BoolVar(&s.ConfigureCBR0, "configure-cbr0", s.ConfigureCBR0, "If true, kubelet will configure cbr0 based on Node.Spec.PodCIDR.")
fs.IntVar(&s.MaxPods, "max-pods", 100, "Number of Pods that can run on this Kubelet.")

Expand Down Expand Up @@ -347,6 +349,7 @@ func (s *KubeletServer) Run(_ []string) error {
ContainerRuntime: s.ContainerRuntime,
Mounter: mounter,
DockerDaemonContainer: s.DockerDaemonContainer,
SystemContainer: s.SystemContainer,
ConfigureCBR0: s.ConfigureCBR0,
MaxPods: s.MaxPods,
}
Expand Down Expand Up @@ -513,6 +516,7 @@ func SimpleKubelet(client *client.Client,
ContainerRuntime: "docker",
Mounter: mount.New(),
DockerDaemonContainer: "/docker-daemon",
SystemContainer: "",
MaxPods: 32,
}
return &kcfg
Expand Down Expand Up @@ -648,6 +652,7 @@ type KubeletConfig struct {
ContainerRuntime string
Mounter mount.Interface
DockerDaemonContainer string
SystemContainer string
ConfigureCBR0 bool
MaxPods int
}
Expand Down Expand Up @@ -701,6 +706,7 @@ func createAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.Pod
kc.ContainerRuntime,
kc.Mounter,
kc.DockerDaemonContainer,
kc.SystemContainer,
kc.ConfigureCBR0,
kc.MaxPods)

Expand Down
1 change: 1 addition & 0 deletions pkg/kubelet/container_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,6 @@ package kubelet
type containerManager interface {
// Runs the container manager's housekeeping.
// - Ensures that the Docker daemon is in a container.
// - Creates the system container where all non-containerized processes run.
Start() error
}
112 changes: 98 additions & 14 deletions pkg/kubelet/container_manager_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,33 +35,60 @@ import (
)

type containerManagerImpl struct {
// Absolute name of the desired container that Docker should be in.
dockerContainerName string
// Whether to create and use the specified containers.
useDockerContainer bool
useSystemContainer bool

// The manager of the resource-only container Docker should be in.
manager fs.Manager
// OOM score for the Docker container.
dockerOomScoreAdj int

// Managers for containers.
dockerContainer fs.Manager
systemContainer fs.Manager
rootContainer fs.Manager
}

var _ containerManager = &containerManagerImpl{}

// Takes the absolute name that the Docker daemon should be in.
// Empty container name disables moving the Docker daemon.
func newContainerManager(dockerDaemonContainer string) (containerManager, error) {
// Takes the absolute name of the specified containers.
// Empty container name disables use of the specified container.
func newContainerManager(dockerDaemonContainer, systemContainer string) (containerManager, error) {
if systemContainer == "/" {
return nil, fmt.Errorf("system container cannot be root (\"/\")")
}

return &containerManagerImpl{
dockerContainerName: dockerDaemonContainer,
manager: fs.Manager{
useDockerContainer: dockerDaemonContainer != "",
useSystemContainer: systemContainer != "",
dockerOomScoreAdj: -900,
dockerContainer: fs.Manager{
Cgroups: &configs.Cgroup{
Name: dockerDaemonContainer,
AllowAllDevices: true,
},
},
dockerOomScoreAdj: -900,
systemContainer: fs.Manager{
Cgroups: &configs.Cgroup{
Name: systemContainer,
AllowAllDevices: true,
},
},
rootContainer: fs.Manager{
Cgroups: &configs.Cgroup{
Name: "/",
},
},
}, nil
}

func (cm *containerManagerImpl) Start() error {
if cm.dockerContainerName != "" {
if cm.useSystemContainer {
err := cm.ensureSystemContainer()
if err != nil {
return err
}
}
if cm.useDockerContainer {
go util.Until(func() {
err := cm.ensureDockerInContainer()
if err != nil {
Expand Down Expand Up @@ -99,10 +126,10 @@ func (cm *containerManagerImpl) ensureDockerInContainer() error {
errs = append(errs, fmt.Errorf("failed to find container of PID %q: %v", pid, err))
}

if cont != cm.dockerContainerName {
err = cm.manager.Apply(pid)
if cont != cm.dockerContainer.Cgroups.Name {
err = cm.dockerContainer.Apply(pid)
if err != nil {
errs = append(errs, fmt.Errorf("failed to move PID %q (in %q) to %q", pid, cont, cm.dockerContainerName))
errs = append(errs, fmt.Errorf("failed to move PID %q (in %q) to %q", pid, cont, cm.dockerContainer.Cgroups.Name))
}
}

Expand All @@ -125,3 +152,60 @@ func getContainer(pid int) (string, error) {

return cgroups.ParseCgroupFile("cpu", f)
}

// Ensures the system container is created and all non-kernel processes without
// a container are moved to it.
func (cm *containerManagerImpl) ensureSystemContainer() error {
// Move non-kernel PIDs to the system container.
attemptsRemaining := 10
var errs []error
for attemptsRemaining >= 0 {
// Only keep errors on latest attempt.
errs = []error{}
attemptsRemaining--

allPids, err := cm.rootContainer.GetPids()
if err != nil {
errs = append(errs, fmt.Errorf("Failed to list PIDs for root: %v", err))
continue
}

// Remove kernel pids
pids := make([]int, 0, len(allPids))
for _, pid := range allPids {
if isKernelPid(pid) {
continue
}

pids = append(pids, pid)
}
glog.Infof("Found %d PIDs in root, %d of them are kernel related", len(allPids), len(allPids)-len(pids))

// Check if we moved all the non-kernel PIDs.
if len(pids) == 0 {
break
}

glog.Infof("Moving non-kernel threads: %v", pids)
for _, pid := range pids {
err := cm.systemContainer.Apply(pid)
if err != nil {
errs = append(errs, fmt.Errorf("failed to move PID %d into the system container %q: %v", pid, cm.systemContainer.Cgroups.Name, err))
continue
}
}

}
if attemptsRemaining < 0 {
errs = append(errs, fmt.Errorf("ran out of attempts to create system containers %q", cm.systemContainer.Cgroups.Name))
}

return errors.NewAggregate(errs)
}

// Determines whether the specified PID is a kernel PID.
func isKernelPid(pid int) bool {
// Kernel threads have no associated executable.
_, err := os.Readlink(fmt.Sprintf("/proc/%d/exe", pid))
return err != nil
}
2 changes: 1 addition & 1 deletion pkg/kubelet/container_manager_unsupported.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,6 @@ func (unsupportedContainerManager) Start() error {
return fmt.Errorf("Container Manager is unsupported in this build")
}

func newContainerManager(dockerDaemonContainer string) (containerManager, error) {
func newContainerManager(dockerDaemonContainer, systemContainer string) (containerManager, error) {
return &unsupportedContainerManager{}, nil
}
8 changes: 7 additions & 1 deletion pkg/kubelet/kubelet.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ func NewMainKubelet(
containerRuntime string,
mounter mount.Interface,
dockerDaemonContainer string,
systemContainer string,
configureCBR0 bool,
pods int) (*Kubelet, error) {
if rootDirectory == "" {
Expand All @@ -146,6 +147,9 @@ func NewMainKubelet(
if resyncInterval <= 0 {
return nil, fmt.Errorf("invalid sync frequency %d", resyncInterval)
}
if systemContainer != "" && cgroupRoot == "" {
return nil, fmt.Errorf("invalid configuration: system container was specified and cgroup root was not specified")
}
dockerClient = dockertools.NewInstrumentedDockerInterface(dockerClient)

serviceStore := cache.NewStore(cache.MetaNamespaceKeyFunc)
Expand Down Expand Up @@ -294,7 +298,9 @@ func NewMainKubelet(
return nil, fmt.Errorf("unsupported container runtime %q specified", containerRuntime)
}

containerManager, err := newContainerManager(dockerDaemonContainer)
// Setup container manager, can fail if the devices hierarchy is not mounted
// (it is required by Docker however).
containerManager, err := newContainerManager(dockerDaemonContainer, systemContainer)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a comment that this can fail if devices hierarchy is not mounted.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do, this makes it depend on #8586

if err != nil {
return nil, fmt.Errorf("failed to create the Container Manager: %v", err)
}
Expand Down