Skip to content

Commit

Permalink
Merge pull request from GHSA-xr7r-f8xq-vfvv
Browse files Browse the repository at this point in the history
runc: release 1.1.12
  • Loading branch information
cyphar committed Jan 31, 2024
2 parents 099ff69 + 29d6d87 commit a9833ff
Show file tree
Hide file tree
Showing 11 changed files with 200 additions and 36 deletions.
21 changes: 20 additions & 1 deletion CHANGELOG.md
Expand Up @@ -6,6 +6,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased 1.1.z]

## [1.1.12] - 2024-01-31

> Now you're thinking with Portals™!
### Security

* Fix [CVE-2024-21626][cve-2024-21626], a container breakout attack that took
advantage of a file descriptor that was leaked internally within runc (but
never leaked to the container process). In addition to fixing the leak,
several strict hardening measures were added to ensure that future internal
leaks could not be used to break out in this manner again. Based on our
research, while no other container runtime had a similar leak, none had any
of the hardening steps we've introduced (and some runtimes would not check
for any file descriptors that a calling process may have leaked to them,
allowing for container breakouts due to basic user error).

[cve-2024-21626]: https://github.com/opencontainers/runc/security/advisories/GHSA-xr7r-f8xq-vfvv

## [1.1.11] - 2024-01-01

> Happy New Year!
Expand Down Expand Up @@ -493,7 +511,8 @@ implementation (libcontainer) is *not* covered by this policy.
[1.0.1]: https://github.com/opencontainers/runc/compare/v1.0.0...v1.0.1

<!-- 1.1.z patch releases -->
[Unreleased 1.1.z]: https://github.com/opencontainers/runc/compare/v1.1.11...release-1.1
[Unreleased 1.1.z]: https://github.com/opencontainers/runc/compare/v1.1.12...release-1.1
[1.1.12]: https://github.com/opencontainers/runc/compare/v1.1.11...v1.1.12
[1.1.11]: https://github.com/opencontainers/runc/compare/v1.1.10...v1.1.11
[1.1.10]: https://github.com/opencontainers/runc/compare/v1.1.9...v1.1.10
[1.1.9]: https://github.com/opencontainers/runc/compare/v1.1.8...v1.1.9
Expand Down
2 changes: 1 addition & 1 deletion VERSION
@@ -1 +1 @@
1.1.11+dev
1.1.12+dev
31 changes: 16 additions & 15 deletions libcontainer/cgroups/file.go
Expand Up @@ -77,16 +77,16 @@ var (
// TestMode is set to true by unit tests that need "fake" cgroupfs.
TestMode bool

cgroupFd int = -1
prepOnce sync.Once
prepErr error
resolveFlags uint64
cgroupRootHandle *os.File
prepOnce sync.Once
prepErr error
resolveFlags uint64
)

func prepareOpenat2() error {
prepOnce.Do(func() {
fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{
Flags: unix.O_DIRECTORY | unix.O_PATH,
Flags: unix.O_DIRECTORY | unix.O_PATH | unix.O_CLOEXEC,
})
if err != nil {
prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
Expand All @@ -97,15 +97,16 @@ func prepareOpenat2() error {
}
return
}
file := os.NewFile(uintptr(fd), cgroupfsDir)

var st unix.Statfs_t
if err = unix.Fstatfs(fd, &st); err != nil {
if err := unix.Fstatfs(int(file.Fd()), &st); err != nil {
prepErr = &os.PathError{Op: "statfs", Path: cgroupfsDir, Err: err}
logrus.Warnf("falling back to securejoin: %s", prepErr)
return
}

cgroupFd = fd

cgroupRootHandle = file
resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS
if st.Type == unix.CGROUP2_SUPER_MAGIC {
// cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks
Expand All @@ -132,28 +133,28 @@ func openFile(dir, file string, flags int) (*os.File, error) {
return openFallback(path, flags, mode)
}

fd, err := unix.Openat2(cgroupFd, relPath,
fd, err := unix.Openat2(int(cgroupRootHandle.Fd()), relPath,
&unix.OpenHow{
Resolve: resolveFlags,
Flags: uint64(flags) | unix.O_CLOEXEC,
Mode: uint64(mode),
})
if err != nil {
err = &os.PathError{Op: "openat2", Path: path, Err: err}
// Check if cgroupFd is still opened to cgroupfsDir
// Check if cgroupRootHandle is still opened to cgroupfsDir
// (happens when this package is incorrectly used
// across the chroot/pivot_root/mntns boundary, or
// when /sys/fs/cgroup is remounted).
//
// TODO: if such usage will ever be common, amend this
// to reopen cgroupFd and retry openat2.
fdStr := strconv.Itoa(cgroupFd)
// to reopen cgroupRootHandle and retry openat2.
fdStr := strconv.Itoa(int(cgroupRootHandle.Fd()))
fdDest, _ := os.Readlink("/proc/self/fd/" + fdStr)
if fdDest != cgroupfsDir {
// Wrap the error so it is clear that cgroupFd
// Wrap the error so it is clear that cgroupRootHandle
// is opened to an unexpected/wrong directory.
err = fmt.Errorf("cgroupFd %s unexpectedly opened to %s != %s: %w",
fdStr, fdDest, cgroupfsDir, err)
err = fmt.Errorf("cgroupRootHandle %d unexpectedly opened to %s != %s: %w",
cgroupRootHandle.Fd(), fdDest, cgroupfsDir, err)
}
return nil, err
}
Expand Down
1 change: 1 addition & 0 deletions libcontainer/cgroups/fs/paths.go
Expand Up @@ -83,6 +83,7 @@ func tryDefaultCgroupRoot() string {
if err != nil {
return ""
}
defer dir.Close()
names, err := dir.Readdirnames(1)
if err != nil {
return ""
Expand Down
9 changes: 9 additions & 0 deletions libcontainer/container_linux.go
Expand Up @@ -353,6 +353,15 @@ func (c *linuxContainer) start(process *Process) (retErr error) {
}()
}

// Before starting "runc init", mark all non-stdio open files as O_CLOEXEC
// to make sure we don't leak any files into "runc init". Any files to be
// passed to "runc init" through ExtraFiles will get dup2'd by the Go
// runtime and thus their O_CLOEXEC flag will be cleared. This is some
// additional protection against attacks like CVE-2024-21626, by making
// sure we never leak files to "runc init" we didn't intend to.
if err := utils.CloseExecFrom(3); err != nil {
return fmt.Errorf("unable to mark non-stdio fds as cloexec: %w", err)
}
if err := parent.start(); err != nil {
return fmt.Errorf("unable to start container process: %w", err)
}
Expand Down
31 changes: 31 additions & 0 deletions libcontainer/init_linux.go
Expand Up @@ -8,6 +8,7 @@ import (
"io"
"net"
"os"
"path/filepath"
"strings"
"unsafe"

Expand Down Expand Up @@ -135,6 +136,32 @@ func populateProcessEnvironment(env []string) error {
return nil
}

// verifyCwd ensures that the current directory is actually inside the mount
// namespace root of the current process.
func verifyCwd() error {
// getcwd(2) on Linux detects if cwd is outside of the rootfs of the
// current mount namespace root, and in that case prefixes "(unreachable)"
// to the returned string. glibc's getcwd(3) and Go's Getwd() both detect
// when this happens and return ENOENT rather than returning a non-absolute
// path. In both cases we can therefore easily detect if we have an invalid
// cwd by checking the return value of getcwd(3). See getcwd(3) for more
// details, and CVE-2024-21626 for the security issue that motivated this
// check.
//
// We have to use unix.Getwd() here because os.Getwd() has a workaround for
// $PWD which involves doing stat(.), which can fail if the current
// directory is inaccessible to the container process.
if wd, err := unix.Getwd(); errors.Is(err, unix.ENOENT) {
return errors.New("current working directory is outside of container mount namespace root -- possible container breakout detected")
} else if err != nil {
return fmt.Errorf("failed to verify if current working directory is safe: %w", err)
} else if !filepath.IsAbs(wd) {
// We shouldn't ever hit this, but check just in case.
return fmt.Errorf("current working directory is not absolute -- possible container breakout detected: cwd is %q", wd)
}
return nil
}

// finalizeNamespace drops the caps, sets the correct user
// and working dir, and closes any leaked file descriptors
// before executing the command inside the namespace
Expand Down Expand Up @@ -193,6 +220,10 @@ func finalizeNamespace(config *initConfig) error {
return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %w", config.Cwd, err)
}
}
// Make sure our final working directory is inside the container.
if err := verifyCwd(); err != nil {
return err
}
if err := system.ClearKeepCaps(); err != nil {
return fmt.Errorf("unable to clear keep caps: %w", err)
}
Expand Down
20 changes: 10 additions & 10 deletions libcontainer/integration/seccomp_test.go
Expand Up @@ -13,7 +13,7 @@ import (
libseccomp "github.com/seccomp/libseccomp-golang"
)

func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
func TestSeccompDenySyslogWithErrno(t *testing.T) {
if testing.Short() {
return
}
Expand All @@ -25,7 +25,7 @@ func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
DefaultAction: configs.Allow,
Syscalls: []*configs.Syscall{
{
Name: "getcwd",
Name: "syslog",
Action: configs.Errno,
ErrnoRet: &errnoRet,
},
Expand All @@ -39,7 +39,7 @@ func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
buffers := newStdBuffers()
pwd := &libcontainer.Process{
Cwd: "/",
Args: []string{"pwd"},
Args: []string{"dmesg"},
Env: standardEnvironment,
Stdin: buffers.Stdin,
Stdout: buffers.Stdout,
Expand All @@ -65,17 +65,17 @@ func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
}

if exitCode == 0 {
t.Fatalf("Getcwd should fail with negative exit code, instead got %d!", exitCode)
t.Fatalf("dmesg should fail with negative exit code, instead got %d!", exitCode)
}

expected := "pwd: getcwd: No such process"
expected := "dmesg: klogctl: No such process"
actual := strings.Trim(buffers.Stderr.String(), "\n")
if actual != expected {
t.Fatalf("Expected output %s but got %s\n", expected, actual)
}
}

func TestSeccompDenyGetcwd(t *testing.T) {
func TestSeccompDenySyslog(t *testing.T) {
if testing.Short() {
return
}
Expand All @@ -85,7 +85,7 @@ func TestSeccompDenyGetcwd(t *testing.T) {
DefaultAction: configs.Allow,
Syscalls: []*configs.Syscall{
{
Name: "getcwd",
Name: "syslog",
Action: configs.Errno,
},
},
Expand All @@ -98,7 +98,7 @@ func TestSeccompDenyGetcwd(t *testing.T) {
buffers := newStdBuffers()
pwd := &libcontainer.Process{
Cwd: "/",
Args: []string{"pwd"},
Args: []string{"dmesg"},
Env: standardEnvironment,
Stdin: buffers.Stdin,
Stdout: buffers.Stdout,
Expand All @@ -124,10 +124,10 @@ func TestSeccompDenyGetcwd(t *testing.T) {
}

if exitCode == 0 {
t.Fatalf("Getcwd should fail with negative exit code, instead got %d!", exitCode)
t.Fatalf("dmesg should fail with negative exit code, instead got %d!", exitCode)
}

expected := "pwd: getcwd: Operation not permitted"
expected := "dmesg: klogctl: Operation not permitted"
actual := strings.Trim(buffers.Stderr.String(), "\n")
if actual != expected {
t.Fatalf("Expected output %s but got %s\n", expected, actual)
Expand Down
37 changes: 36 additions & 1 deletion libcontainer/setns_init_linux.go
Expand Up @@ -4,6 +4,7 @@ import (
"errors"
"fmt"
"os"
"os/exec"
"strconv"

"github.com/opencontainers/selinux/go-selinux"
Expand All @@ -14,6 +15,7 @@ import (
"github.com/opencontainers/runc/libcontainer/keys"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
)

// linuxSetnsInit performs the container's initialization for running a new process
Expand Down Expand Up @@ -82,6 +84,21 @@ func (l *linuxSetnsInit) Init() error {
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
return err
}

// Check for the arg before waiting to make sure it exists and it is
// returned as a create time error.
name, err := exec.LookPath(l.config.Args[0])
if err != nil {
return err
}
// exec.LookPath in Go < 1.20 might return no error for an executable
// residing on a file system mounted with noexec flag, so perform this
// extra check now while we can still return a proper error.
// TODO: remove this once go < 1.20 is not supported.
if err := eaccess(name); err != nil {
return &os.PathError{Op: "eaccess", Path: name, Err: err}
}

// Set seccomp as close to execve as possible, so as few syscalls take
// place afterward (reducing the amount of syscalls that users need to
// enable in their seccomp profiles).
Expand All @@ -101,5 +118,23 @@ func (l *linuxSetnsInit) Init() error {
return &os.PathError{Op: "close log pipe", Path: "fd " + strconv.Itoa(l.logFd), Err: err}
}

return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
// Close all file descriptors we are not passing to the container. This is
// necessary because the execve target could use internal runc fds as the
// execve path, potentially giving access to binary files from the host
// (which can then be opened by container processes, leading to container
// escapes). Note that because this operation will close any open file
// descriptors that are referenced by (*os.File) handles from underneath
// the Go runtime, we must not do any file operations after this point
// (otherwise the (*os.File) finaliser could close the wrong file). See
// CVE-2024-21626 for more information as to why this protection is
// necessary.
//
// This is not needed for runc-dmz, because the extra execve(2) step means
// that all O_CLOEXEC file descriptors have already been closed and thus
// the second execve(2) from runc-dmz cannot access internal file
// descriptors from runc.
if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil {
return err
}
return system.Exec(name, l.config.Args[0:], os.Environ())
}
19 changes: 19 additions & 0 deletions libcontainer/standard_init_linux.go
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/opencontainers/runc/libcontainer/keys"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
)

type linuxStandardInit struct {
Expand Down Expand Up @@ -258,5 +259,23 @@ func (l *linuxStandardInit) Init() error {
return err
}

// Close all file descriptors we are not passing to the container. This is
// necessary because the execve target could use internal runc fds as the
// execve path, potentially giving access to binary files from the host
// (which can then be opened by container processes, leading to container
// escapes). Note that because this operation will close any open file
// descriptors that are referenced by (*os.File) handles from underneath
// the Go runtime, we must not do any file operations after this point
// (otherwise the (*os.File) finaliser could close the wrong file). See
// CVE-2024-21626 for more information as to why this protection is
// necessary.
//
// This is not needed for runc-dmz, because the extra execve(2) step means
// that all O_CLOEXEC file descriptors have already been closed and thus
// the second execve(2) from runc-dmz cannot access internal file
// descriptors from runc.
if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil {
return err
}
return system.Exec(name, l.config.Args[0:], os.Environ())
}

0 comments on commit a9833ff

Please sign in to comment.