Skip to content

Commit

Permalink
rootfs: add mount destination validation
Browse files Browse the repository at this point in the history
NOTE: based on a similar fix on the OCI runc (see commit 0ca91f44f1664da834bc61115a849b56d22f595f).
Several adjustments for sysbox-runc done on top of that commit.

Original OCI runc commit:

rootfs: add mount destination validation

Because the target of a mount is inside a container (which may be a
volume that is shared with another container), there exists a race
condition where the target of the mount may change to a path containing
a symlink after we have sanitised the path -- resulting in us
inadvertently mounting the path outside of the container.

This is not immediately useful because we are in a mount namespace with
MS_SLAVE mount propagation applied to "/", so we cannot mount on top of
host paths in the host namespace. However, if any subsequent mountpoints
in the configuration use a subdirectory of that host path as a source,
those subsequent mounts will use an attacker-controlled source path
(resolved within the host rootfs) -- allowing the bind-mounting of "/"
into the container.

While arguably configuration issues like this are not entirely within
runc's threat model, within the context of Kubernetes (and possibly
other container managers that provide semi-arbitrary container creation
privileges to untrusted users) this is a legitimate issue. Since we
cannot block mounting from the host into the container, we need to block
the first stage of this attack (mounting onto a path outside the
container).

The long-term plan to solve this would be to migrate to libpathrs, but
as a stop-gap we implement libpathrs-like path verification through
readlink(/proc/self/fd/$n) and then do mount operations through the
procfd once it's been verified to be inside the container. The target
could move after we've checked it, but if it is inside the container
then we can assume that it is safe for the same reason that libpathrs
operations would be safe.

A slight wrinkle is the "copyup" functionality we provide for tmpfs,
which is the only case where we want to do a mount on the host
filesystem. To facilitate this, I split out the copy-up functionality
entirely so that the logic isn't interspersed with the regular tmpfs
logic. In addition, all dependencies on m.Destination being overwritten
have been removed since that pattern was just begging to be a source of
more mount-target bugs (we do still have to modify m.Destination for
tmpfs-copyup but we only do it temporarily).

Fixes: CVE-2021-30465
Reported-by: Etienne Champetier <champetier.etienne@gmail.com>
Co-authored-by: Noah Meyerhans <nmeyerha@amazon.com>
Reviewed-by: Samuel Karp <skarp@amazon.com>
Reviewed-by: Kir Kolyshkin <kolyshkin@gmail.com> (@kolyshkin)
Reviewed-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>

===

Signed-off-by: Cesar Talledo <ctalledo@nestybox.com>
  • Loading branch information
ctalledo committed May 11, 2022
1 parent 4f7419d commit a6023ce
Show file tree
Hide file tree
Showing 6 changed files with 297 additions and 139 deletions.
1 change: 1 addition & 0 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -2474,6 +2474,7 @@ func (c *linuxContainer) handleOp(op opReqType, childPid int, reqs []opReq) erro
case bind, chown, mkdir:
namespaces = append(namespaces,
fmt.Sprintf("mnt:/proc/%d/ns/mnt", childPid),
fmt.Sprintf("pid:/proc/%d/ns/pid", childPid),
)
case switchDockerDns:
namespaces = append(namespaces,
Expand Down
78 changes: 48 additions & 30 deletions libcontainer/rootfs_init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (

sh "github.com/nestybox/sysbox-libs/idShiftUtils"
utils "github.com/nestybox/sysbox-libs/utils"
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
)

type linuxRootfsInit struct {
Expand Down Expand Up @@ -75,7 +76,7 @@ func iptablesRestoreHasWait() (bool, error) {
return verConstraint.Check(ver), nil
}

func doBindMount(m *configs.Mount) error {
func doBindMount(rootfs string, m *configs.Mount) error {

// sysbox-runc: For some reason, when the rootfs is on shiftfs, we
// need to do an Lstat() of the source path prior to doing the
Expand All @@ -91,26 +92,38 @@ func doBindMount(m *configs.Mount) error {
if !m.BindSrcInfo.IsDir {
src = filepath.Dir(m.Source)
}

os.Lstat(src)
if err := unix.Mount(m.Source, m.Destination, "", unix.MS_BIND|unix.MS_REC, ""); err != nil {

// We've noticed that the lstat and/or mount syscall fails with EPERM when
// bind-mounting a source dir that is on a shiftfs mount on top of a tmpfs
// mount. For some reason the Linux "mount" command does not fail in this case,
// so let's try it.
// Bind-mount with procfd to mitigate symlink exchange attacks.

if err := libcontainerUtils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
if err := unix.Mount(m.Source, procfd, "", unix.MS_BIND|unix.MS_REC, ""); err != nil {

cmd := exec.Command("/bin/mount", "--rbind", m.Source, m.Destination)
err := cmd.Run()
if err != nil {
return fmt.Errorf("bind-mount of %s to %s failed: %v", m.Source, m.Destination, err)
// We've noticed that the lstat and/or mount syscall fails with EPERM when
// bind-mounting a source dir that is on a shiftfs mount on top of a tmpfs
// mount. For some reason the Linux "mount" command does not fail in this case,
// so let's try it.
cmd := exec.Command("/bin/mount", "--rbind", m.Source, procfd)
err := cmd.Run()
if err != nil {
realpath, _ := os.Readlink(procfd)
return fmt.Errorf("bind-mount of %s to %s failed: %v", m.Source, realpath, err)
}
}
return nil
}); err != nil {
return fmt.Errorf("bind mount through procfd of %s -> %s: %w", m.Source, m.Destination, err)
}

for _, pflag := range m.PropagationFlags {
if err := unix.Mount("", m.Destination, "", uintptr(pflag), ""); err != nil {
return err
if err := libcontainerUtils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
for _, pflag := range m.PropagationFlags {
if err := unix.Mount("", procfd, "", uintptr(pflag), ""); err != nil {
return err
}
}
return nil
}); err != nil {
return fmt.Errorf("change bind mount propagation through procfd: %w", err)
}

return nil
Expand Down Expand Up @@ -246,7 +259,6 @@ func (l *linuxRootfsInit) Init() error {

// If multiple requests are passed in the slice, they must all be
// of the same type.

switch l.reqs[0].Op {

case bind:
Expand All @@ -256,14 +268,21 @@ func (l *linuxRootfsInit) Init() error {
return newSystemErrorWithCausef(err, "chdir to rootfs %s", rootfs)
}

initPid := l.reqs[0].InitPid
usernsPath := fmt.Sprintf("/proc/%d/ns/user", initPid)
// We are in the pid and mount ns of the container's init process; remount
// /proc so that it picks up this fact.
os.Lstat("/proc")
if err := unix.Mount("proc", "/proc", "proc", 0, ""); err != nil {
return newSystemErrorWithCause(err, "re-mounting procfs")
}
defer unix.Unmount("/proc", unix.MNT_DETACH)

usernsPath := "/proc/1/ns/user"

for _, req := range l.reqs {
m := &req.Mount
mountLabel := req.Label

if err := doBindMount(m); err != nil {
if err := doBindMount(rootfs, m); err != nil {
return newSystemErrorWithCausef(err, "bind mounting %s to %s", m.Source, m.Destination)
}

Expand All @@ -290,18 +309,17 @@ func (l *linuxRootfsInit) Init() error {

// Set up the ID-mapping as needed
if m.IDMappedMount {

// Note: arguments to IDMapMount() must be absolute.
target, err := securejoin.SecureJoin(rootfs, m.Destination)
if err != nil {
return err
}

if err := sh.IDMapMount(usernsPath, target); err != nil {
fsName, _ := utils.GetFsName(target)
return newSystemErrorWithCausef(err,
"setting up ID-mapped mount on userns %s, path %s (likely means idmapped mounts are not supported on the filesystem at this path (%s))",
usernsPath, target, fsName)
if err := libcontainerUtils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
if err := sh.IDMapMount(usernsPath, procfd); err != nil {
fsName, _ := utils.GetFsName(procfd)
realpath, _ := os.Readlink(procfd)
return newSystemErrorWithCausef(err,
"setting up ID-mapped mount on path %s (likely means idmapped mounts are not supported on the filesystem at this path (%s))",
realpath, fsName)
}
return nil
}); err != nil {
return newSystemErrorWithCausef(err, "ID-map mount on %s", m.Destination)
}
}
}
Expand Down
Loading

0 comments on commit a6023ce

Please sign in to comment.