Skip to content

Commit

Permalink
Add Linux namespace delegation support
Browse files Browse the repository at this point in the history
This allows ZFS datasets to be delegated to a user/mount namespace
Within that namespace, only the delegated datasets are visible
Works very similarly to Zones/Jailes on other ZFS OSes

As a user:
```
 $ unshare -Um
 $ zfs list
no datasets available
 $ readlink /proc/self/ns/user
user:[4026532291]
```

As root:
```
 # zfs list
NAME                            ZONED  MOUNTPOINT
containers                      off    /containers
containers/host                 off    /containers/host
containers/host/child           off    /containers/host/child
containers/host/child/gchild    off    /containers/host/child/gchild
containers/unpriv               on     /unpriv
containers/unpriv/child         on     /unpriv/child
containers/unpriv/child/gchild  on     /unpriv/child/gchild

 # zfs zone 4026532291 containers/unpriv
```

Back to the user namespace:
```
 $ zfs list
NAME                             USED  AVAIL     REFER  MOUNTPOINT
containers                       129M  47.8G       24K  /containers
containers/unpriv                128M  47.8G       24K  /unpriv
containers/unpriv/child          128M  47.8G      128M  /unpriv/child
```

Signed-off-by: Will Andrews <will.andrews@klarasystems.com>
Signed-off-by: Allan Jude <allan@klarasystems.com>
Sponsored-by: Buddy <https://buddy.works>
  • Loading branch information
Will Andrews authored and allanjude committed Nov 2, 2021
1 parent 71ec9e5 commit 80c3594
Show file tree
Hide file tree
Showing 27 changed files with 829 additions and 16 deletions.
67 changes: 67 additions & 0 deletions cmd/zfs/zfs_main.c
Expand Up @@ -127,6 +127,11 @@ static int zfs_do_jail(int argc, char **argv);
static int zfs_do_unjail(int argc, char **argv);
#endif

#ifdef __linux__
static int zfs_do_zone(int argc, char **argv);
static int zfs_do_unzone(int argc, char **argv);
#endif

/*
* Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
*/
Expand Down Expand Up @@ -184,6 +189,8 @@ typedef enum {
HELP_JAIL,
HELP_UNJAIL,
HELP_WAIT,
HELP_ZONE,
HELP_UNZONE,
} zfs_help_t;

typedef struct zfs_command {
Expand Down Expand Up @@ -254,6 +261,11 @@ static zfs_command_t command_table[] = {
{ "jail", zfs_do_jail, HELP_JAIL },
{ "unjail", zfs_do_unjail, HELP_UNJAIL },
#endif

#ifdef __linux__
{ "zone", zfs_do_zone, HELP_ZONE },
{ "unzone", zfs_do_unzone, HELP_UNZONE },
#endif
};

#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0]))
Expand Down Expand Up @@ -414,6 +426,10 @@ get_usage(zfs_help_t idx)
return (gettext("\tunjail <jailid|jailname> <filesystem>\n"));
case HELP_WAIT:
return (gettext("\twait [-t <activity>] <filesystem>\n"));
case HELP_ZONE:
return (gettext("\tzone <nsnum> <filesystem>\n"));
case HELP_UNZONE:
return (gettext("\tunzone <nsnum> <filesystem>\n"));
default:
__builtin_unreachable();
}
Expand Down Expand Up @@ -8728,6 +8744,57 @@ main(int argc, char **argv)
return (ret);
}

/*
* zfs zone nsnum filesystem
*
* Add or delete the given dataset to/from the namespace.
*/
#ifdef __linux__
static int
zfs_do_zone_impl(int argc, char **argv, boolean_t attach)
{
zfs_handle_t *zhp;
unsigned long nsnum;
int ret;

if (argc < 3) {
(void) fprintf(stderr, gettext("missing argument(s)\n"));
usage(B_FALSE);
}
if (argc > 3) {
(void) fprintf(stderr, gettext("too many arguments\n"));
usage(B_FALSE);
}

nsnum = strtoul(argv[1], NULL, 10);
if (nsnum > UINT_MAX) {
(void) fprintf(stderr, gettext("invalid namespace number\n"));
usage(B_FALSE);
}

zhp = zfs_open(g_zfs, argv[3], ZFS_TYPE_FILESYSTEM);
if (zhp == NULL)
return (1);

ret = (zfs_userns(zhp, (unsigned int)nsnum, attach) != 0);

zfs_close(zhp);
return (ret);
}

static int
zfs_do_zone(int argc, char **argv)
{
return (zfs_do_zone_impl(argc, argv, 1));
}

static int
zfs_do_unzone(int argc, char **argv)
{
return (zfs_do_zone_impl(argc, argv, 0));
}
#endif

#ifdef __FreeBSD__
#include <sys/jail.h>
#include <jail.h>
Expand Down
23 changes: 23 additions & 0 deletions config/kernel-user-ns-inum.m4
@@ -0,0 +1,23 @@
dnl #
dnl # 3.18 API change
dnl # struct user_namespace inum moved from .proc_inum to .ns.inum.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM], [
ZFS_LINUX_TEST_SRC([user_ns_common_inum], [
#include <linux/user_namespace.h>
], [
struct user_namespace uns;
uns.ns.inum = 0;
])
])

AC_DEFUN([ZFS_AC_KERNEL_USER_NS_COMMON_INUM], [
AC_MSG_CHECKING([whether user_namespace->ns.inum exists])
ZFS_LINUX_TEST_RESULT([user_ns_common_inum], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_USER_NS_COMMON_INUM, 1,
[user_namespace->ns.inum exists])
],[
AC_MSG_RESULT(no)
])
])
2 changes: 2 additions & 0 deletions config/kernel.m4
Expand Up @@ -134,6 +134,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_SET_SPECIAL_STATE
ZFS_AC_KERNEL_SRC_VFS_SET_PAGE_DIRTY_NOBUFFERS
ZFS_AC_KERNEL_SRC_STANDALONE_LINUX_STDARG
ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM
AC_MSG_CHECKING([for available kernel interfaces])
ZFS_LINUX_TEST_COMPILE_ALL([kabi])
Expand Down Expand Up @@ -241,6 +242,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_SET_SPECIAL_STATE
ZFS_AC_KERNEL_VFS_SET_PAGE_DIRTY_NOBUFFERS
ZFS_AC_KERNEL_STANDALONE_LINUX_STDARG
ZFS_AC_KERNEL_USER_NS_COMMON_INUM
])

dnl #
Expand Down
9 changes: 9 additions & 0 deletions include/libzfs.h
Expand Up @@ -963,6 +963,15 @@ _LIBZFS_H int zpool_nextboot(libzfs_handle_t *, uint64_t, uint64_t,

#endif /* __FreeBSD__ */

#ifdef __linux__

/*
* Add or delete the given filesystem to/from the given user namespace.
*/
_LIBZFS_H int zfs_userns(zfs_handle_t *zhp, unsigned int nsnum, int attach);

#endif

#ifdef __cplusplus
}
#endif
Expand Down
31 changes: 27 additions & 4 deletions include/os/linux/spl/sys/zone.h
Expand Up @@ -25,11 +25,34 @@
#define _SPL_ZONE_H

#include <sys/byteorder.h>
#include <sys/cred.h>

#define GLOBAL_ZONEID 0
#include <linux/cred.h>
#include <linux/user_namespace.h>

#define zone_dataset_visible(x, y) (1)
#define crgetzoneid(x) (GLOBAL_ZONEID)
#define INGLOBALZONE(z) (1)
/*
* Attach the given dataset to the given user namespace.
*/
extern int zone_dataset_attach(cred_t *, const char *, unsigned int);

/*
* Detach the given dataset from the given user namespace.
*/
extern int zone_dataset_detach(cred_t *, const char *, unsigned int);

/*
* Returns true if the named pool/dataset is visible in the current zone.
*/
extern int zone_dataset_visible(const char *dataset, int *write);

int spl_zone_init(void);
void spl_zone_fini(void);

extern unsigned int crgetzoneid(const cred_t *);
extern unsigned int global_zoneid(void);
extern boolean_t inglobalzone(proc_t *);

#define INGLOBALZONE(x) inglobalzone(x)
#define GLOBAL_ZONEID global_zoneid()

#endif /* SPL_ZONE_H */
2 changes: 2 additions & 0 deletions include/sys/fs/zfs.h
Expand Up @@ -1372,7 +1372,9 @@ typedef enum zfs_ioc {
ZFS_IOC_EVENTS_SEEK, /* 0x83 (Linux) */
ZFS_IOC_NEXTBOOT, /* 0x84 (FreeBSD) */
ZFS_IOC_JAIL, /* 0x85 (FreeBSD) */
ZFS_IOC_USERNS_ATTACH = ZFS_IOC_JAIL, /* 0x85 (Linux) */
ZFS_IOC_UNJAIL, /* 0x86 (FreeBSD) */
ZFS_IOC_USERNS_DETACH = ZFS_IOC_UNJAIL, /* 0x86 (Linux) */
ZFS_IOC_SET_BOOTENV, /* 0x87 */
ZFS_IOC_GET_BOOTENV, /* 0x88 */
ZFS_IOC_LAST
Expand Down
2 changes: 1 addition & 1 deletion lib/libspl/include/sys/types.h
Expand Up @@ -44,7 +44,7 @@
#include <inttypes.h>
#endif /* HAVE_INTTYPES */

typedef int zoneid_t;
typedef uint_t zoneid_t;
typedef int projid_t;

/*
Expand Down
12 changes: 11 additions & 1 deletion lib/libspl/include/zone.h
Expand Up @@ -33,7 +33,17 @@
extern "C" {
#endif

#define GLOBAL_ZONEID 0
#ifdef __FreeBSD__
#define GLOBAL_ZONEID 0
#else
/*
* Hardcoded in the kernel's root user namespace. A "better" way to get
* this would be by using ioctl_ns(2), but this would need to be performed
* recursively on NS_GET_PARENT and then NS_GET_USERNS. Also, that's only
* supported since Linux 4.9.
*/
#define GLOBAL_ZONEID 4026531837U
#endif

extern zoneid_t getzoneid(void);

Expand Down
32 changes: 31 additions & 1 deletion lib/libspl/os/linux/zone.c
Expand Up @@ -23,10 +23,40 @@
* Use is subject to license terms.
*/

#include <unistd.h>
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>

#include <zone.h>

zoneid_t
getzoneid()
{
return (GLOBAL_ZONEID);
char path[PATH_MAX];
char buf[128] = { '\0' };
char *cp;

int c = snprintf(path, sizeof (path), "/proc/self/ns/user");
/* This API doesn't have any error checking... */
if (c < 0)
return (0);

ssize_t r = readlink(path, buf, sizeof (buf) - 1);
if (r < 0)
return (0);

cp = strchr(buf, '[');
if (cp == NULL)
return (0);
cp++;

unsigned long n = strtoul(cp, NULL, 10);
if (n == ULONG_MAX && errno == ERANGE)
return (0);
zoneid_t z = (zoneid_t)n;

return (z);
}
2 changes: 1 addition & 1 deletion lib/libuutil/libuutil.abi
Expand Up @@ -1137,7 +1137,7 @@
<pointer-type-def type-id='a84c031d' size-in-bits='64' id='26a90f95'/>
</abi-instr>
<abi-instr version='1.0' address-size='64' path='os/linux/zone.c' language='LANG_C99'>
<typedef-decl name='zoneid_t' type-id='95e97e5e' id='4da03624'/>
<typedef-decl name='zoneid_t' type-id='3502e3ff' id='4da03624'/>
<function-decl name='getzoneid' mangled-name='getzoneid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getzoneid'>
<return type-id='4da03624'/>
</function-decl>
Expand Down
9 changes: 8 additions & 1 deletion lib/libzfs/libzfs.abi
Expand Up @@ -432,6 +432,7 @@
<elf-symbol name='zfs_unshareall_bytype' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_unshareall_nfs' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_unshareall_smb' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_userns' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_userspace' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_valid_proplist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_version_kernel' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
Expand Down Expand Up @@ -1672,7 +1673,7 @@
</class-decl>
</abi-instr>
<abi-instr version='1.0' address-size='64' path='os/linux/zone.c' language='LANG_C99'>
<typedef-decl name='zoneid_t' type-id='95e97e5e' id='4da03624'/>
<typedef-decl name='zoneid_t' type-id='3502e3ff' id='4da03624'/>
<function-decl name='getzoneid' mangled-name='getzoneid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getzoneid'>
<return type-id='4da03624'/>
</function-decl>
Expand Down Expand Up @@ -5679,6 +5680,12 @@
</data-member>
</class-decl>
<pointer-type-def type-id='a5559cdd' size-in-bits='64' id='e4ec4540'/>
<function-decl name='zfs_userns' mangled-name='zfs_userns' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_userns'>
<parameter type-id='9200a744' name='zhp'/>
<parameter type-id='f0981eeb' name='nsnum'/>
<parameter type-id='95e97e5e' name='attach'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='zfs_version_kernel' mangled-name='zfs_version_kernel' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_version_kernel'>
<parameter type-id='26a90f95' name='version'/>
<parameter type-id='95e97e5e' name='len'/>
Expand Down
52 changes: 52 additions & 0 deletions lib/libzfs/os/linux/libzfs_util_os.c
Expand Up @@ -219,3 +219,55 @@ zfs_version_kernel(char *version, int len)

return (0);
}

/*
* Add or delete the given filesystem to/from the given user namespace.
*/
int
zfs_userns(zfs_handle_t *zhp, unsigned int nsnum, int attach)
{
libzfs_handle_t *hdl = zhp->zfs_hdl;
zfs_cmd_t zc = {"\0"};
char errbuf[1024];
unsigned long cmd;
int ret;

if (attach) {
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot add '%s' to namespace"),
zhp->zfs_name);
} else {
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot remove '%s' from namespace"),
zhp->zfs_name);
}

switch (zhp->zfs_type) {
case ZFS_TYPE_VOLUME:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"volumes can not be namespaced"));
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
case ZFS_TYPE_SNAPSHOT:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"snapshots can not be namespaced"));
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
case ZFS_TYPE_BOOKMARK:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"bookmarks can not be namespaced"));
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
case ZFS_TYPE_POOL:
case ZFS_TYPE_FILESYSTEM:
fallthrough;
}
assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM);

(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
zc.zc_objset_type = DMU_OST_ZFS;
zc.zc_zoneid = nsnum;

cmd = attach ? ZFS_IOC_USERNS_ATTACH : ZFS_IOC_USERNS_DETACH;
if ((ret = zfs_ioctl(hdl, cmd, &zc)) != 0)
zfs_standard_error(hdl, errno, errbuf);

return (ret);
}
2 changes: 1 addition & 1 deletion lib/libzfs_core/libzfs_core.abi
Expand Up @@ -956,7 +956,7 @@
<pointer-type-def type-id='a84c031d' size-in-bits='64' id='26a90f95'/>
</abi-instr>
<abi-instr version='1.0' address-size='64' path='os/linux/zone.c' language='LANG_C99'>
<typedef-decl name='zoneid_t' type-id='95e97e5e' id='4da03624'/>
<typedef-decl name='zoneid_t' type-id='3502e3ff' id='4da03624'/>
<function-decl name='getzoneid' mangled-name='getzoneid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getzoneid'>
<return type-id='4da03624'/>
</function-decl>
Expand Down
3 changes: 1 addition & 2 deletions man/man7/zfsprops.7
Expand Up @@ -1879,8 +1879,7 @@ feature and are not relevant on other platforms.
The default value is
.Sy off .
.It Sy zoned Ns = Ns Sy on Ns | Ns Sy off
Controls whether the dataset is managed from a non-global zone.
Zones are a Solaris feature and are not relevant on other platforms.
Controls whether the dataset is managed from a non-global zone or namespace.
The default value is
.Sy off .
.El
Expand Down

0 comments on commit 80c3594

Please sign in to comment.