Skip to content

Commit

Permalink
*: add support for rootless containers
Browse files Browse the repository at this point in the history
This enables the support for the rootless container mode. There are
certain restrictions on what non-root users can do, resulting in several
runC features not being used. There are no checks in place at the moment
to make this clear to users.

* All cgroup operations require having CAP_SYS_ADMIN in the root user
namespace. This means that we cannot set up *any* cgroups, or join
cgroups. This can be circumvented by having the user own the current
cgroup [this mode is currently not implemented].

* setgroups(2) cannot be used in a non-privileged user namespace setup,
so we have to hard fail if for some reason we decide that we need
setgroups(2) enabled.

* We cannot map any user other than ourselves in a rootless container,
which means that any user-related directives won't work. You can only be
"root".

Signed-off-by: Aleksa Sarai <asarai@suse.de>
  • Loading branch information
cyphar committed Apr 23, 2016
1 parent f36fc4c commit cef7834
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 24 deletions.
2 changes: 1 addition & 1 deletion exec.go
Expand Up @@ -24,7 +24,7 @@ Where "<container-id>" is the name for the instance of the container and
For example, if the container is configured to run the linux ps command the
following will output a list of processes running in the container:
# runc exec <container-id> ps`,
Flags: []cli.Flag{
cli.StringFlag{
Expand Down
26 changes: 16 additions & 10 deletions libcontainer/container_linux.go
Expand Up @@ -42,6 +42,7 @@ type linuxContainer struct {
criuVersion int
state containerState
created time.Time
notRoot bool
}

// State represents a running container's state
Expand Down Expand Up @@ -331,6 +332,7 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
PassedFilesCount: len(process.ExtraFiles),
ContainerId: c.ID(),
NoNewPrivileges: c.config.NoNewPrivileges,
NotRoot: c.notRoot,
AppArmorProfile: c.config.AppArmorProfile,
ProcessLabel: c.config.ProcessLabel,
Rlimits: c.config.Rlimits,
Expand Down Expand Up @@ -741,6 +743,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
}

func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
// XXX: Do we need to deal with this case? AFAIK criu still requires root.
if err := c.cgroupManager.Apply(pid); err != nil {
return err
}
Expand Down Expand Up @@ -1210,16 +1213,19 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
Type: GidmapAttr,
Value: b,
})
// check if we have CAP_SETGID to setgroup properly
pid, err := capability.NewPid(os.Getpid())
if err != nil {
return nil, err
}
if !pid.Get(capability.EFFECTIVE, capability.CAP_SETGID) {
r.AddData(&Boolmsg{
Type: SetgroupAttr,
Value: true,
})
// The following only applies if we are root.
if !c.notRoot {
// check if we have CAP_SETGID to setgroup properly
pid, err := capability.NewPid(os.Getpid())
if err != nil {
return nil, err
}
if !pid.Get(capability.EFFECTIVE, capability.CAP_SETGID) {
r.AddData(&Boolmsg{
Type: SetgroupAttr,
Value: true,
})
}
}
}
}
Expand Down
49 changes: 49 additions & 0 deletions libcontainer/factory_linux.go
Expand Up @@ -148,6 +148,50 @@ type LinuxFactory struct {
NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager
}

func haveMappingRights(config *configs.Config) (bool, error) {
// There are two cases where we have to bail. If the user is trying to run
// without user namespaces or if they are trying to run with user namespaces
// where the remapping isn't their own user. These only apply for non-root
// users.
notroot := false

rootuid, err := config.HostUID()
if err != nil {
return false, err
}
if euid := os.Geteuid(); euid != 0 {
if !config.Namespaces.Contains(configs.NEWUSER) {
return false, fmt.Errorf("rootless containers require user namespaces")
}
if rootuid != euid {
return false, fmt.Errorf("rootless containers cannot map container root to a different host user")
}
// Thus, we are going to be running under unprivileged user namespaces.
notroot = true
}
rootgid, err := config.HostGID()
if err != nil {
return false, err
}
// Similar to the above test, we need to make sure that we aren't trying to
// map to a group ID that we don't have the right to be.
if notroot && rootgid != os.Getegid() {
return false, fmt.Errorf("rootless containers cannot map container root to a different host group")
}

// We can only map one user and group inside a container (our own).
if notroot {
if len(config.UidMappings) != 1 || config.UidMappings[0].Size != 1 {
return false, fmt.Errorf("rootless containers cannot map more than one user")
}
if len(config.GidMappings) != 1 || config.GidMappings[0].Size != 1 {
return false, fmt.Errorf("rootless containers cannot map more than one group")
}
}

return notroot, nil
}

func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
if l.Root == "" {
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
Expand All @@ -158,6 +202,10 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
if err := l.Validator.Validate(config); err != nil {
return nil, newGenericError(err, ConfigInvalid)
}
notroot, err := haveMappingRights(config)
if err != nil {
return nil, newGenericError(err, ConfigInvalid)
}
containerRoot := filepath.Join(l.Root, id)
if _, err := os.Stat(containerRoot); err == nil {
return nil, newGenericError(fmt.Errorf("container with id exists: %v", id), IdInUse)
Expand All @@ -175,6 +223,7 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
initArgs: l.InitArgs,
criuPath: l.CriuPath,
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
notRoot: notroot,
}
c.state = &stoppedState{c: c}
return c, nil
Expand Down
7 changes: 5 additions & 2 deletions libcontainer/init_linux.go
Expand Up @@ -51,6 +51,7 @@ type initConfig struct {
ProcessLabel string `json:"process_label"`
AppArmorProfile string `json:"apparmor_profile"`
NoNewPrivileges bool `json:"no_new_privileges"`
NotRoot bool `json:"not_root"`
User string `json:"user"`
Config *configs.Config `json:"config"`
Console string `json:"console"`
Expand Down Expand Up @@ -129,8 +130,10 @@ func finalizeNamespace(config *initConfig) error {
if err := system.SetKeepCaps(); err != nil {
return err
}
if err := setupUser(config); err != nil {
return err
if !config.NotRoot {
if err := setupUser(config); err != nil {
return err
}
}
if err := system.ClearKeepCaps(); err != nil {
return err
Expand Down
22 changes: 15 additions & 7 deletions libcontainer/process_linux.go
Expand Up @@ -247,13 +247,18 @@ func (p *initProcess) start() error {
return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
}
p.setExternalDescriptors(fds)
// Do this before syncing with child so that no children
// can escape the cgroup
if err := p.manager.Apply(p.pid()); err != nil {
return newSystemErrorWithCause(err, "applying cgroup configuration for process")
// This is a safe access because .Start() already holds the mutex, so there's
// no need to add a safe NotRoot() method because we couldn't use it.
notroot := p.container.notRoot
if !notroot {
// Do this before syncing with child so that no children can escape the
// cgroup. We can't do this if we're not running as root.
if err := p.manager.Apply(p.pid()); err != nil {
return newSystemErrorWithCause(err, "applying cgroup configuration for process")
}
}
defer func() {
if err != nil {
if err != nil && !notroot {
// TODO: should not be the responsibility to call here
p.manager.Destroy()
}
Expand Down Expand Up @@ -282,8 +287,11 @@ loop:
}
switch procSync.Type {
case procReady:
if err := p.manager.Set(p.config.Config); err != nil {
return newSystemErrorWithCause(err, "setting cgroup config for ready process")
// We can't do any of this setup without being root.
if !notroot {
if err := p.manager.Set(p.config.Config); err != nil {
return newSystemErrorWithCause(err, "setting cgroup config for ready process")
}
}
// set oom_score_adj
if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil {
Expand Down
4 changes: 0 additions & 4 deletions start.go
Expand Up @@ -75,10 +75,6 @@ command(s) that get executed on start, edit the args parameter of the spec. See
setupSdNotify(spec, notifySocket)
}

if os.Geteuid() != 0 {
fatalf("runc should be run as root")
}

status, err := startContainer(context, spec)
if err != nil {
fatal(err)
Expand Down

0 comments on commit cef7834

Please sign in to comment.