From 0d72adf96dda1b687815bf89bb245b937a2f603c Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 16 Mar 2023 14:35:50 -0700 Subject: [PATCH] Prohibit /proc and /sys to be symlinks Commit 3291d66b9844 introduced a check for /proc and /sys, making sure the destination (dest) is a directory (and not e.g. a symlink). Later, a hunk from commit 0ca91f44f switched from using filepath.Join to SecureJoin for dest. As SecureJoin follows and resolves symlinks, the check whether dest is a symlink no longer works. To fix, do the check without/before using SecureJoin. Add integration tests to make sure we won't regress. Signed-off-by: Kir Kolyshkin --- libcontainer/rootfs_linux.go | 29 ++++++++++++++++++++--------- tests/integration/mask.bats | 19 +++++++++++++++++++ 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index 2a98372b561..2421566039c 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -375,32 +375,43 @@ func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) { func mountToRootfs(m *configs.Mount, c *mountConfig) error { rootfs := c.root - mountLabel := c.label - mountFd := c.fd - dest, err := securejoin.SecureJoin(rootfs, m.Destination) - if err != nil { - return err - } + // procfs and sysfs are special because we need to ensure they are actually + // mounted on a specific path in a container without any funny business. switch m.Device { case "proc", "sysfs": // If the destination already exists and is not a directory, we bail - // out This is to avoid mounting through a symlink or similar -- which + // out. This is to avoid mounting through a symlink or similar -- which // has been a "fun" attack scenario in the past. // TODO: This won't be necessary once we switch to libpathrs and we can // stop all of these symlink-exchange attacks. + dest := filepath.Clean(m.Destination) + if !strings.HasPrefix(dest, rootfs) { + // Do not use securejoin as it resolves symlinks. + dest = filepath.Join(rootfs, dest) + } if fi, err := os.Lstat(dest); err != nil { if !os.IsNotExist(err) { return err } - } else if fi.Mode()&os.ModeDir == 0 { + } else if !fi.IsDir() { return fmt.Errorf("filesystem %q must be mounted on ordinary directory", m.Device) } if err := os.MkdirAll(dest, 0o755); err != nil { return err } - // Selinux kernels do not support labeling of /proc or /sys + // Selinux kernels do not support labeling of /proc or /sys. return mountPropagate(m, rootfs, "", nil) + } + + mountLabel := c.label + mountFd := c.fd + dest, err := securejoin.SecureJoin(rootfs, m.Destination) + if err != nil { + return err + } + + switch m.Device { case "mqueue": if err := os.MkdirAll(dest, 0o755); err != nil { return err diff --git a/tests/integration/mask.bats b/tests/integration/mask.bats index 4bf834ef92f..d324eb32524 100644 --- a/tests/integration/mask.bats +++ b/tests/integration/mask.bats @@ -56,3 +56,22 @@ function teardown() { [ "$status" -eq 1 ] [[ "${output}" == *"Operation not permitted"* ]] } + +@test "mask paths [prohibit symlink /proc]" { + ln -s /symlink rootfs/proc + runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox + [ "$status" -eq 1 ] + [[ "${output}" == *"must be mounted on ordinary directory"* ]] +} + +@test "mask paths [prohibit symlink /sys]" { + # In rootless containers, /sys is a bind mount not a real sysfs. + requires root + + ln -s /symlink rootfs/sys + runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox + [ "$status" -eq 1 ] + # On cgroup v1, this may fail before checking if /sys is a symlink, + # so we merely check that it fails, and do not check the exact error + # message like for /proc above. +}