Skip to content

Commit

Permalink
conf: improve read-only /sys with read-write /sys/devices/virtual/net
Browse files Browse the repository at this point in the history
Some tools require /sys/devices/virtual/net to be read-write. At the
same time we want all other parts of /sys to be read-only. To do this we
created a layout where we hade a read-only instance of sysfs mounted on
top of a read-write instance of sysfs:

`-/sys                                  sysfs                                                        sysfs      rw,nosuid,nodev,noexec,relatime
  `-/sys                                sysfs                                                        sysfs      ro,nosuid,nodev,noexec,relatime
    |-/sys/devices/virtual/net          sysfs                                                        sysfs      rw,relatime
    | `-/sys/devices/virtual/net        sysfs[/devices/virtual/net]                                  sysfs      rw,nosuid,nodev,noexec,relatime

This causes issues for systemd services that create a separate mount
namespace as they get confused to what mount options need to be
respected.

Simplify our mounting logic so we end up with a single read-only mount
of sysfs on /sys and a read-write bind-mount of /sys/devices/virtual/net:

├─/sys                                sysfs                                                                                  sysfs         ro,nosuid,nodev,noexec,relatime
│ ├─/sys/devices/virtual/net          sysfs[/devices/virtual/net]                                                            sysfs         rw,nosuid,nodev,noexec,relatime

Link: systemd/systemd#20032
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
  • Loading branch information
Christian Brauner committed Jun 30, 2021
1 parent 804f3e2 commit cb4889a
Showing 1 changed file with 14 additions and 13 deletions.
27 changes: 14 additions & 13 deletions src/lxc/conf.c
Expand Up @@ -699,7 +699,7 @@ static int lxc_mount_auto_mounts(struct lxc_handler *handler, int flags)
*/
{ LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL, false },
/* proc/tty is used as a temporary placeholder for proc/sys/net which we'll move back in a few steps */
{ LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys/net", "%r/proc/tty", NULL, MS_BIND, NULL, true },
{ LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys/net", "%r/proc/tty", NULL, MS_BIND, NULL, true, },
{ LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL, false },
{ LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL, false },
{ LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/tty", "%r/proc/sys/net", NULL, MS_MOVE, NULL, true },
Expand All @@ -708,12 +708,9 @@ static int lxc_mount_auto_mounts(struct lxc_handler *handler, int flags)
{ LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL, false },
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL, false },
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL, false },
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys", "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL, false },
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys", "%r/sys", NULL, MS_BIND, NULL, false },
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL, false },
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys/devices/virtual/net", "sysfs", 0, NULL, false },
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys/devices/virtual/net/devices/virtual/net", "%r/sys/devices/virtual/net", NULL, MS_BIND, NULL, false },
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys/devices/virtual/net", NULL, MS_REMOUNT|MS_BIND|MS_NOSUID|MS_NODEV|MS_NOEXEC, NULL, false },
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys", "sysfs", MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC, NULL, false },
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys/devices/virtual/net", "%r/sys/devices/virtual/net", NULL, MS_BIND, NULL, false },
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys/devices/virtual/net", NULL, MS_REMOUNT|MS_NOSUID|MS_NODEV|MS_NOEXEC, NULL, false },
{ 0, 0, NULL, NULL, NULL, 0, NULL, false }
};
struct lxc_conf *conf = handler->conf;
Expand Down Expand Up @@ -769,7 +766,7 @@ static int lxc_mount_auto_mounts(struct lxc_handler *handler, int flags)
has_cap_net_admin = lxc_wants_cap(CAP_NET_ADMIN, conf);
for (i = 0; default_mounts[i].match_mask; i++) {
__do_free char *destination = NULL, *source = NULL;
unsigned long mflags;
unsigned long mflags = default_mounts[i].flags;

if ((flags & default_mounts[i].match_mask) != default_mounts[i].match_flag)
continue;
Expand All @@ -794,10 +791,10 @@ static int lxc_mount_auto_mounts(struct lxc_handler *handler, int flags)
if (!destination)
return syserror_set(-ENOMEM, "Failed to create target path");

mflags = add_required_remount_flags(source, destination,
default_mounts[i].flags);
ret = safe_mount(source, destination, default_mounts[i].fstype,
mflags, default_mounts[i].options,
ret = safe_mount(source, destination,
default_mounts[i].fstype,
mflags,
default_mounts[i].options,
rootfs->path ? rootfs->mount : NULL);
if (ret < 0) {
if (errno != ENOENT)
Expand All @@ -806,7 +803,11 @@ static int lxc_mount_auto_mounts(struct lxc_handler *handler, int flags)
INFO("Mount source or target for \"%s\" on \"%s\" does not exist. Skipping", source, destination);
continue;
}
TRACE("Mounted automount \"%s\" on \"%s\" with flags %lu", source, destination, mflags);

if (mflags & MS_REMOUNT)
TRACE("Remounted automount \"%s\" on \"%s\" %s with flags %lu", source, destination, (mflags & MS_RDONLY) ? "read-only" : "read-write", mflags);
else
TRACE("Mounted automount \"%s\" on \"%s\" %s with flags %lu", source, destination, (mflags & MS_RDONLY) ? "read-only" : "read-write", mflags);
}

if (flags & LXC_AUTO_CGROUP_MASK) {
Expand Down

0 comments on commit cb4889a

Please sign in to comment.