Skip to content

Commit

Permalink
Merge pull request #1569 from cyphar/delay-seccomp
Browse files Browse the repository at this point in the history
init: delay seccomp application as late as possible
  • Loading branch information
Mrunal Patel committed Sep 7, 2017
2 parents 7e036aa + 1f32fff commit deb9d7f
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 17 deletions.
13 changes: 12 additions & 1 deletion libcontainer/setns_init_linux.go
Expand Up @@ -47,7 +47,10 @@ func (l *linuxSetnsInit) Init() error {
return err
}
}
if l.config.Config.Seccomp != nil {
// Without NoNewPrivileges seccomp is a privileged operation, so we need to
// do this before dropping capabilities; otherwise do it as late as possible
// just before execve so as few syscalls take place after it as possible.
if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return err
}
Expand All @@ -61,5 +64,13 @@ func (l *linuxSetnsInit) Init() error {
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
return err
}
// Set seccomp as close to execve as possible, so as few syscalls take
// place afterward (reducing the amount of syscalls that users need to
// enable in their seccomp profiles).
if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return newSystemErrorWithCause(err, "init seccomp")
}
}
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
}
40 changes: 24 additions & 16 deletions libcontainer/standard_init_linux.go
Expand Up @@ -30,28 +30,28 @@ func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
var newperms uint32

if l.config.Config.Namespaces.Contains(configs.NEWUSER) {
// with user ns we need 'other' search permissions
// With user ns we need 'other' search permissions.
newperms = 0x8
} else {
// without user ns we need 'UID' search permissions
// Without user ns we need 'UID' search permissions.
newperms = 0x80000
}

// create a unique per session container name that we can
// join in setns; however, other containers can also join it
// Create a unique per session container name that we can join in setns;
// However, other containers can also join it.
return fmt.Sprintf("_ses.%s", l.config.ContainerId), 0xffffffff, newperms
}

func (l *linuxStandardInit) Init() error {
if !l.config.Config.NoNewKeyring {
ringname, keepperms, newperms := l.getSessionRingParams()

// do not inherit the parent's session keyring
// Do not inherit the parent's session keyring.
sessKeyId, err := keys.JoinSessionKeyring(ringname)
if err != nil {
return err
}
// make session keyring searcheable
// Make session keyring searcheable.
if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil {
return err
}
Expand Down Expand Up @@ -150,39 +150,47 @@ func (l *linuxStandardInit) Init() error {
if err := pdeath.Restore(); err != nil {
return err
}
// compare the parent from the initial start of the init process and make sure that it did not change.
// if the parent changes that means it died and we were reparented to something else so we should
// just kill ourself and not cause problems for someone else.
// Compare the parent from the initial start of the init process and make
// sure that it did not change. if the parent changes that means it died
// and we were reparented to something else so we should just kill ourself
// and not cause problems for someone else.
if unix.Getppid() != l.parentPid {
return unix.Kill(unix.Getpid(), unix.SIGKILL)
}
// check for the arg before waiting to make sure it exists and it is returned
// as a create time error.
// Check for the arg before waiting to make sure it exists and it is
// returned as a create time error.
name, err := exec.LookPath(l.config.Args[0])
if err != nil {
return err
}
// close the pipe to signal that we have completed our init.
// Close the pipe to signal that we have completed our init.
l.pipe.Close()
// Wait for the FIFO to be opened on the other side before exec-ing the
// user process. We open it through /proc/self/fd/$fd, because the fd that
// was given to us was an O_PATH fd to the fifo itself. Linux allows us to
// re-open an O_PATH fd through /proc.
fd, err := unix.Open(fmt.Sprintf("/proc/self/fd/%d", l.fifoFd), unix.O_WRONLY|unix.O_CLOEXEC, 0)
if err != nil {
return newSystemErrorWithCause(err, "openat exec fifo")
return newSystemErrorWithCause(err, "open exec fifo")
}
if _, err := unix.Write(fd, []byte("0")); err != nil {
return newSystemErrorWithCause(err, "write 0 exec fifo")
}
// Close the O_PATH fifofd fd before exec because the kernel resets
// dumpable in the wrong order. This has been fixed in newer kernels, but
// we keep this to ensure CVE-2016-9962 doesn't re-emerge on older kernels.
// N.B. the core issue itself (passing dirfds to the host filesystem) has
// since been resolved.
// https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318
unix.Close(l.fifoFd)
// Set seccomp as close to execve as possible, so as few syscalls take
// place afterward (reducing the amount of syscalls that users need to
// enable in their seccomp profiles).
if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return newSystemErrorWithCause(err, "init seccomp")
}
}
// close the statedir fd before exec because the kernel resets dumpable in the wrong order
// https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318
unix.Close(l.fifoFd)
if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil {
return newSystemErrorWithCause(err, "exec user process")
}
Expand Down

0 comments on commit deb9d7f

Please sign in to comment.