Skip to content

Commit

Permalink
Merge pull request moby#38377 from rgulewich/38332-cgroup-ns
Browse files Browse the repository at this point in the history
Start containers in their own cgroup namespaces
  • Loading branch information
yongtang authored May 12, 2019
2 parents 3998dff + 072400f commit 3042254
Show file tree
Hide file tree
Showing 25 changed files with 515 additions and 20 deletions.
5 changes: 5 additions & 0 deletions api/server/router/container/container_routes.go
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,11 @@ func (s *containerRouter) postContainersCreate(ctx context.Context, w http.Respo
if hostConfig.IpcMode.IsEmpty() {
hostConfig.IpcMode = container.IpcMode("shareable")
}

// Older clients expect the default to be "host"
if hostConfig.CgroupnsMode.IsEmpty() {
hostConfig.CgroupnsMode = container.CgroupnsMode("host")
}
}

if hostConfig != nil && hostConfig.PidsLimit != nil && *hostConfig.PidsLimit <= 0 {
Expand Down
13 changes: 13 additions & 0 deletions api/swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,19 @@ definitions:
description: "A list of kernel capabilities to drop from the container. Conflicts with option 'Capabilities'"
items:
type: "string"
CgroupnsMode:
type: "string"
enum:
- "private"
- "host"
description: |
cgroup namespace mode for the container. Possible values are:
- `"private"`: the container runs in its own private cgroup namespace
- `"host"`: use the host system's cgroup namespace
If not specified, the daemon default is used, which can either be `"private"`
or `"host"`, depending on daemon version, kernel support and configuration.
Dns:
type: "array"
description: "A list of DNS servers for the container to use."
Expand Down
30 changes: 27 additions & 3 deletions api/types/container/host_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,29 @@ import (
"github.com/docker/go-units"
)

// CgroupnsMode represents the cgroup namespace mode of the container
type CgroupnsMode string

// IsPrivate indicates whether the container uses its own private cgroup namespace
func (c CgroupnsMode) IsPrivate() bool {
return c == "private"
}

// IsHost indicates whether the container shares the host's cgroup namespace
func (c CgroupnsMode) IsHost() bool {
return c == "host"
}

// IsEmpty indicates whether the container cgroup namespace mode is unset
func (c CgroupnsMode) IsEmpty() bool {
return c == ""
}

// Valid indicates whether the cgroup namespace mode is valid
func (c CgroupnsMode) Valid() bool {
return c.IsEmpty() || c.IsPrivate() || c.IsHost()
}

// Isolation represents the isolation technology of a container. The supported
// values are platform specific
type Isolation string
Expand Down Expand Up @@ -381,9 +404,10 @@ type HostConfig struct {
CapAdd strslice.StrSlice // List of kernel capabilities to add to the container
CapDrop strslice.StrSlice // List of kernel capabilities to remove from the container
Capabilities []string `json:"Capabilities"` // List of kernel capabilities to be available for container (this overrides the default set)
DNS []string `json:"Dns"` // List of DNS server to lookup
DNSOptions []string `json:"DnsOptions"` // List of DNSOption to look for
DNSSearch []string `json:"DnsSearch"` // List of DNSSearch to look for
CgroupnsMode CgroupnsMode // Cgroup namespace mode to use for the container
DNS []string `json:"Dns"` // List of DNS server to lookup
DNSOptions []string `json:"DnsOptions"` // List of DNSOption to look for
DNSSearch []string `json:"DnsSearch"` // List of DNSSearch to look for
ExtraHosts []string // List of extra hosts
GroupAdd []string // List of additional groups that the container process will run as
IpcMode IpcMode // IPC namespace to use for the container
Expand Down
1 change: 1 addition & 0 deletions cmd/dockerd/config_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,5 +64,6 @@ func installConfigFlags(conf *config.Config, flags *pflag.FlagSet) error {
// rootless needs to be explicitly specified for running "rootful" dockerd in rootless dockerd (#38702)
// Note that defaultUserlandProxyPath and honorXDG are configured according to the value of rootless.RunningWithRootlessKit, not the value of --rootless.
flags.BoolVar(&conf.Rootless, "rootless", rootless.RunningWithRootlessKit(), "Enable rootless mode; typically used with RootlessKit (experimental)")
flags.StringVar(&conf.CgroupNamespaceMode, "default-cgroupns-mode", config.DefaultCgroupNamespaceMode, `Default mode for containers cgroup namespace ("host" | "private")`)
return nil
}
18 changes: 17 additions & 1 deletion daemon/config/config_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import (
)

const (
// DefaultCgroupNamespaceMode is the default for a container's CgroupnsMode, if not set otherwise
DefaultCgroupNamespaceMode = "host" // TODO: change to private
// DefaultIpcMode is default for container's IpcMode, if not set otherwise
DefaultIpcMode = "private"
)
Expand All @@ -37,6 +39,7 @@ type Config struct {
ShmSize opts.MemBytes `json:"default-shm-size,omitempty"`
NoNewPrivileges bool `json:"no-new-privileges,omitempty"`
IpcMode string `json:"default-ipc-mode,omitempty"`
CgroupNamespaceMode string `json:"default-cgroupns-mode,omitempty"`
// ResolvConf is the path to the configuration of the host resolver
ResolvConf string `json:"resolv-conf,omitempty"`
Rootless bool `json:"rootless,omitempty"`
Expand Down Expand Up @@ -84,9 +87,22 @@ func verifyDefaultIpcMode(mode string) error {
return nil
}

func verifyDefaultCgroupNsMode(mode string) error {
cm := containertypes.CgroupnsMode(mode)
if !cm.Valid() {
return fmt.Errorf("Default cgroup namespace mode (%v) is invalid. Use \"host\" or \"private\".", cm) // nolint: golint
}

return nil
}

// ValidatePlatformConfig checks if any platform-specific configuration settings are invalid.
func (conf *Config) ValidatePlatformConfig() error {
return verifyDefaultIpcMode(conf.IpcMode)
if err := verifyDefaultIpcMode(conf.IpcMode); err != nil {
return err
}

return verifyDefaultCgroupNsMode(conf.CgroupNamespaceMode)
}

// IsRootless returns conf.Rootless
Expand Down
22 changes: 22 additions & 0 deletions daemon/daemon_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,15 @@ func (daemon *Daemon) adaptContainerSettings(hostConfig *containertypes.HostConf
hostConfig.IpcMode = containertypes.IpcMode(m)
}

// Set default cgroup namespace mode, if unset for container
if hostConfig.CgroupnsMode.IsEmpty() {
m := config.DefaultCgroupNamespaceMode
if daemon.configStore != nil {
m = daemon.configStore.CgroupNamespaceMode
}
hostConfig.CgroupnsMode = containertypes.CgroupnsMode(m)
}

adaptSharedNamespaceContainer(daemon, hostConfig)

var err error
Expand Down Expand Up @@ -675,6 +684,19 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.
}
}

if !hostConfig.CgroupnsMode.Valid() {
return warnings, fmt.Errorf("invalid cgroup namespace mode: %v", hostConfig.CgroupnsMode)
}
if hostConfig.CgroupnsMode.IsPrivate() {
if !sysInfo.CgroupNamespaces {
warnings = append(warnings, "Your kernel does not support cgroup namespaces. Cgroup namespace setting discarded.")
}

if hostConfig.Privileged {
return warnings, fmt.Errorf("privileged mode is incompatible with private cgroup namespaces. You must run the container in the host cgroup namespace when running privileged mode")
}
}

return warnings, nil
}

Expand Down
4 changes: 4 additions & 0 deletions daemon/info.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,10 @@ func (daemon *Daemon) fillSecurityOptions(v *types.Info, sysInfo *sysinfo.SysInf
if daemon.Rootless() {
securityOptions = append(securityOptions, "name=rootless")
}
if daemon.cgroupNamespacesEnabled(sysInfo) {
securityOptions = append(securityOptions, "name=cgroupns")
}

v.SecurityOptions = securityOptions
}

Expand Down
5 changes: 5 additions & 0 deletions daemon/info_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"strings"

"github.com/docker/docker/api/types"
containertypes "github.com/docker/docker/api/types/container"
"github.com/docker/docker/dockerversion"
"github.com/docker/docker/pkg/sysinfo"
"github.com/pkg/errors"
Expand Down Expand Up @@ -247,6 +248,10 @@ func parseRuncVersion(v string) (version string, commit string, err error) {
return version, commit, err
}

func (daemon *Daemon) cgroupNamespacesEnabled(sysInfo *sysinfo.SysInfo) bool {
return sysInfo.CgroupNamespaces && containertypes.CgroupnsMode(daemon.configStore.CgroupNamespaceMode).IsPrivate()
}

// Rootless returns true if daemon is running in rootless mode
func (daemon *Daemon) Rootless() bool {
return daemon.configStore.Rootless
Expand Down
4 changes: 4 additions & 0 deletions daemon/info_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ func (daemon *Daemon) fillPlatformVersion(v *types.Version) {}
func fillDriverWarnings(v *types.Info) {
}

func (daemon *Daemon) cgroupNamespacesEnabled(sysInfo *sysinfo.SysInfo) bool {
return false
}

// Rootless returns true if daemon is running in rootless mode
func (daemon *Daemon) Rootless() bool {
return false
Expand Down
13 changes: 13 additions & 0 deletions daemon/oci_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,19 @@ func WithNamespaces(daemon *Daemon, c *container.Container) coci.SpecOpts {
s.Hostname = ""
}

// cgroup
if !c.HostConfig.CgroupnsMode.IsEmpty() {
cgroupNsMode := c.HostConfig.CgroupnsMode
if !cgroupNsMode.Valid() {
return fmt.Errorf("invalid cgroup namespace mode: %v", cgroupNsMode)
}

if cgroupNsMode.IsPrivate() && !c.HostConfig.Privileged {
nsCgroup := specs.LinuxNamespace{Type: "cgroup"}
setNamespace(s, nsCgroup)
}
}

return nil
}
}
Expand Down
5 changes: 5 additions & 0 deletions daemon/reload_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ func (daemon *Daemon) reloadPlatform(conf *config.Config, attributes map[string]
daemon.configStore.ShmSize = conf.ShmSize
}

if conf.CgroupNamespaceMode != "" {
daemon.configStore.CgroupNamespaceMode = conf.CgroupNamespaceMode
}

if conf.IpcMode != "" {
daemon.configStore.IpcMode = conf.IpcMode
}
Expand All @@ -51,6 +55,7 @@ func (daemon *Daemon) reloadPlatform(conf *config.Config, attributes map[string]
attributes["default-runtime"] = daemon.configStore.DefaultRuntime
attributes["default-shm-size"] = fmt.Sprintf("%d", daemon.configStore.ShmSize)
attributes["default-ipc-mode"] = daemon.configStore.IpcMode
attributes["default-cgroupns-mode"] = daemon.configStore.CgroupNamespaceMode

return nil
}
5 changes: 5 additions & 0 deletions docs/api/version-history.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@ keywords: "API, Docker, rcli, REST, documentation"
* `POST /containers/{id}/update` now accepts a `PidsLimit` field to tune a container's
PID limit. Set `0` or `-1` for unlimited. Leave `null` to not change the current value.
* `POST /build` now accepts `outputs` key for configuring build outputs when using BuildKit mode.
* `POST /containers/create` on Linux now accepts the `HostConfig.CgroupnsMode` property.
Set the property to `host` to create the container in the daemon's cgroup namespace, or
`private` to create the container in its own private cgroup namespace. The per-daemon
default is `host`, and can be changed by using the`CgroupNamespaceMode` daemon configuration
parameter.

## V1.39 API changes

Expand Down
92 changes: 92 additions & 0 deletions integration/build/build_cgroupns_linux_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package build // import "github.com/docker/docker/integration/build"

import (
"context"
"encoding/json"
"io"
"strings"
"testing"

"github.com/docker/docker/api/types"
"github.com/docker/docker/integration/internal/requirement"
"github.com/docker/docker/internal/test/daemon"
"github.com/docker/docker/internal/test/fakecontext"
"github.com/docker/docker/pkg/jsonmessage"
"gotest.tools/assert"
"gotest.tools/skip"
)

// Finds the output of `readlink /proc/<pid>/ns/cgroup` in build output
func getCgroupFromBuildOutput(buildOutput io.Reader) (string, error) {
const prefix = "cgroup:"

dec := json.NewDecoder(buildOutput)
for {
m := jsonmessage.JSONMessage{}
err := dec.Decode(&m)
if err == io.EOF {
return "", nil
}
if err != nil {
return "", err
}
if ix := strings.Index(m.Stream, prefix); ix == 0 {
return strings.TrimSpace(m.Stream), nil
}
}
}

// Runs a docker build against a daemon with the given cgroup namespace default value.
// Returns the container cgroup and daemon cgroup.
func testBuildWithCgroupNs(t *testing.T, daemonNsMode string) (string, string) {
d := daemon.New(t, daemon.WithDefaultCgroupNamespaceMode(daemonNsMode))
d.StartWithBusybox(t)
defer d.Stop(t)

dockerfile := `
FROM busybox
RUN readlink /proc/self/ns/cgroup
`
ctx := context.Background()
source := fakecontext.New(t, "", fakecontext.WithDockerfile(dockerfile))
defer source.Close()

client := d.NewClientT(t)
resp, err := client.ImageBuild(ctx,
source.AsTarReader(t),
types.ImageBuildOptions{
Remove: true,
ForceRemove: true,
Tags: []string{"buildcgroupns"},
})
assert.NilError(t, err)
defer resp.Body.Close()

containerCgroup, err := getCgroupFromBuildOutput(resp.Body)
assert.NilError(t, err)
daemonCgroup := d.CgroupNamespace(t)

return containerCgroup, daemonCgroup
}

func TestCgroupNamespacesBuild(t *testing.T) {
skip.If(t, testEnv.DaemonInfo.OSType != "linux")
skip.If(t, testEnv.IsRemoteDaemon())
skip.If(t, !requirement.CgroupNamespacesEnabled())

// When the daemon defaults to private cgroup namespaces, containers launched
// should be in their own private cgroup namespace by default
containerCgroup, daemonCgroup := testBuildWithCgroupNs(t, "private")
assert.Assert(t, daemonCgroup != containerCgroup)
}

func TestCgroupNamespacesBuildDaemonHostMode(t *testing.T) {
skip.If(t, testEnv.DaemonInfo.OSType != "linux")
skip.If(t, testEnv.IsRemoteDaemon())
skip.If(t, !requirement.CgroupNamespacesEnabled())

// When the daemon defaults to host cgroup namespaces, containers
// launched should not be inside their own cgroup namespaces
containerCgroup, daemonCgroup := testBuildWithCgroupNs(t, "host")
assert.Assert(t, daemonCgroup == containerCgroup)
}
Loading

0 comments on commit 3042254

Please sign in to comment.