Skip to content

Commit

Permalink
Merge branch 'work.mount-syscalls' of git://git.kernel.org/pub/scm/li…
Browse files Browse the repository at this point in the history
…nux/kernel/git/viro/vfs

Pull mount ABI updates from Al Viro:
 "The syscalls themselves, finally.

  That's not all there is to that stuff, but switching individual
  filesystems to new methods is fortunately independent from everything
  else, so e.g. NFS series can go through NFS tree, etc.

  As those conversions get done, we'll be finally able to get rid of a
  bunch of duplication in fs/super.c introduced in the beginning of the
  entire thing. I expect that to be finished in the next window..."

* 'work.mount-syscalls' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  vfs: Add a sample program for the new mount API
  vfs: syscall: Add fspick() to select a superblock for reconfiguration
  vfs: syscall: Add fsmount() to create a mount for a superblock
  vfs: syscall: Add fsconfig() for configuring and managing a context
  vfs: Implement logging through fs_context
  vfs: syscall: Add fsopen() to prepare for superblock creation
  Make anon_inodes unconditional
  teach move_mount(2) to work with OPEN_TREE_CLONE
  vfs: syscall: Add move_mount(2) to move mounts around
  vfs: syscall: Add open_tree(2) to reference or clone a mount
  • Loading branch information
torvalds committed May 8, 2019
2 parents d27fb65 + f1b5618 commit 4009132
Show file tree
Hide file tree
Showing 22 changed files with 1,353 additions and 91 deletions.
7 changes: 6 additions & 1 deletion arch/x86/entry/syscalls/syscall_32.tbl
Expand Up @@ -398,7 +398,12 @@
384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl
385 i386 io_pgetevents sys_io_pgetevents_time32 __ia32_compat_sys_io_pgetevents
386 i386 rseq sys_rseq __ia32_sys_rseq
# don't use numbers 387 through 392, add new calls at the end
387 i386 open_tree sys_open_tree __ia32_sys_open_tree
388 i386 move_mount sys_move_mount __ia32_sys_move_mount
389 i386 fsopen sys_fsopen __ia32_sys_fsopen
390 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig
391 i386 fsmount sys_fsmount __ia32_sys_fsmount
392 i386 fspick sys_fspick __ia32_sys_fspick
393 i386 semget sys_semget __ia32_sys_semget
394 i386 semctl sys_semctl __ia32_compat_sys_semctl
395 i386 shmget sys_shmget __ia32_sys_shmget
Expand Down
6 changes: 6 additions & 0 deletions arch/x86/entry/syscalls/syscall_64.tbl
Expand Up @@ -343,6 +343,12 @@
332 common statx __x64_sys_statx
333 common io_pgetevents __x64_sys_io_pgetevents
334 common rseq __x64_sys_rseq
335 common open_tree __x64_sys_open_tree
336 common move_mount __x64_sys_move_mount
337 common fsopen __x64_sys_fsopen
338 common fsconfig __x64_sys_fsconfig
339 common fsmount __x64_sys_fsmount
340 common fspick __x64_sys_fspick
# don't use numbers 387 through 423, add new calls after the last
# 'common' entry
424 common pidfd_send_signal __x64_sys_pidfd_send_signal
Expand Down
2 changes: 1 addition & 1 deletion fs/Makefile
Expand Up @@ -13,7 +13,7 @@ obj-y := open.o read_write.o file_table.o super.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o splice.o sync.o utimes.o d_path.o \
stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \
fs_types.o fs_context.o fs_parser.o
fs_types.o fs_context.o fs_parser.o fsopen.o

ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o block_dev.o direct-io.o mpage.o
Expand Down
9 changes: 6 additions & 3 deletions fs/file_table.c
Expand Up @@ -255,6 +255,7 @@ static void __fput(struct file *file)
struct dentry *dentry = file->f_path.dentry;
struct vfsmount *mnt = file->f_path.mnt;
struct inode *inode = file->f_inode;
fmode_t mode = file->f_mode;

if (unlikely(!(file->f_mode & FMODE_OPENED)))
goto out;
Expand All @@ -277,18 +278,20 @@ static void __fput(struct file *file)
if (file->f_op->release)
file->f_op->release(inode, file);
if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
!(file->f_mode & FMODE_PATH))) {
!(mode & FMODE_PATH))) {
cdev_put(inode->i_cdev);
}
fops_put(file->f_op);
put_pid(file->f_owner.pid);
if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
i_readcount_dec(inode);
if (file->f_mode & FMODE_WRITER) {
if (mode & FMODE_WRITER) {
put_write_access(inode);
__mnt_drop_write(mnt);
}
dput(dentry);
if (unlikely(mode & FMODE_NEED_UNMOUNT))
dissolve_on_fput(mnt);
mntput(mnt);
out:
file_free(file);
Expand Down
160 changes: 146 additions & 14 deletions fs/fs_context.c
Expand Up @@ -11,6 +11,7 @@
*/

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
#include <linux/fs.h>
Expand All @@ -23,6 +24,7 @@
#include <linux/pid_namespace.h>
#include <linux/user_namespace.h>
#include <net/net_namespace.h>
#include <asm/sections.h>
#include "mount.h"
#include "internal.h"

Expand Down Expand Up @@ -271,6 +273,8 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
fc->cred = get_current_cred();
fc->net_ns = get_net(current->nsproxy->net_ns);

mutex_init(&fc->uapi_mutex);

switch (purpose) {
case FS_CONTEXT_FOR_MOUNT:
fc->user_ns = get_user_ns(fc->cred->user_ns);
Expand Down Expand Up @@ -353,6 +357,8 @@ struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
if (!fc)
return ERR_PTR(-ENOMEM);

mutex_init(&fc->uapi_mutex);

fc->fs_private = NULL;
fc->s_fs_info = NULL;
fc->source = NULL;
Expand All @@ -361,6 +367,8 @@ struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
get_net(fc->net_ns);
get_user_ns(fc->user_ns);
get_cred(fc->cred);
if (fc->log)
refcount_inc(&fc->log->usage);

/* Can't call put until we've called ->dup */
ret = fc->ops->dup(fc, src_fc);
Expand All @@ -378,35 +386,107 @@ struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
}
EXPORT_SYMBOL(vfs_dup_fs_context);

#ifdef CONFIG_PRINTK
/**
* logfc - Log a message to a filesystem context
* @fc: The filesystem context to log to.
* @fmt: The format of the buffer.
*/
void logfc(struct fs_context *fc, const char *fmt, ...)
{
static const char store_failure[] = "OOM: Can't store error string";
struct fc_log *log = fc ? fc->log : NULL;
const char *p;
va_list va;
char *q;
u8 freeable;

va_start(va, fmt);

switch (fmt[0]) {
case 'w':
vprintk_emit(0, LOGLEVEL_WARNING, NULL, 0, fmt, va);
break;
case 'e':
vprintk_emit(0, LOGLEVEL_ERR, NULL, 0, fmt, va);
break;
default:
vprintk_emit(0, LOGLEVEL_NOTICE, NULL, 0, fmt, va);
break;
if (!strchr(fmt, '%')) {
p = fmt;
goto unformatted_string;
}
if (strcmp(fmt, "%s") == 0) {
p = va_arg(va, const char *);
goto unformatted_string;
}

pr_cont("\n");
q = kvasprintf(GFP_KERNEL, fmt, va);
copied_string:
if (!q)
goto store_failure;
freeable = 1;
goto store_string;

unformatted_string:
if ((unsigned long)p >= (unsigned long)__start_rodata &&
(unsigned long)p < (unsigned long)__end_rodata)
goto const_string;
if (log && within_module_core((unsigned long)p, log->owner))
goto const_string;
q = kstrdup(p, GFP_KERNEL);
goto copied_string;

store_failure:
p = store_failure;
const_string:
q = (char *)p;
freeable = 0;
store_string:
if (!log) {
switch (fmt[0]) {
case 'w':
printk(KERN_WARNING "%s\n", q + 2);
break;
case 'e':
printk(KERN_ERR "%s\n", q + 2);
break;
default:
printk(KERN_NOTICE "%s\n", q + 2);
break;
}
if (freeable)
kfree(q);
} else {
unsigned int logsize = ARRAY_SIZE(log->buffer);
u8 index;

index = log->head & (logsize - 1);
BUILD_BUG_ON(sizeof(log->head) != sizeof(u8) ||
sizeof(log->tail) != sizeof(u8));
if ((u8)(log->head - log->tail) == logsize) {
/* The buffer is full, discard the oldest message */
if (log->need_free & (1 << index))
kfree(log->buffer[index]);
log->tail++;
}

log->buffer[index] = q;
log->need_free &= ~(1 << index);
log->need_free |= freeable << index;
log->head++;
}
va_end(va);
}
EXPORT_SYMBOL(logfc);
#endif

/*
* Free a logging structure.
*/
static void put_fc_log(struct fs_context *fc)
{
struct fc_log *log = fc->log;
int i;

if (log) {
if (refcount_dec_and_test(&log->usage)) {
fc->log = NULL;
for (i = 0; i <= 7; i++)
if (log->need_free & (1 << i))
kfree(log->buffer[i]);
kfree(log);
}
}
}

/**
* put_fs_context - Dispose of a superblock configuration context.
Expand All @@ -431,6 +511,7 @@ void put_fs_context(struct fs_context *fc)
put_user_ns(fc->user_ns);
put_cred(fc->cred);
kfree(fc->subtype);
put_fc_log(fc);
put_filesystem(fc->fs_type);
kfree(fc->source);
kfree(fc);
Expand Down Expand Up @@ -640,3 +721,54 @@ int parse_monolithic_mount_data(struct fs_context *fc, void *data)

return monolithic_mount_data(fc, data);
}

/*
* Clean up a context after performing an action on it and put it into a state
* from where it can be used to reconfigure a superblock.
*
* Note that here we do only the parts that can't fail; the rest is in
* finish_clean_context() below and in between those fs_context is marked
* FS_CONTEXT_AWAITING_RECONF. The reason for splitup is that after
* successful mount or remount we need to report success to userland.
* Trying to do full reinit (for the sake of possible subsequent remount)
* and failing to allocate memory would've put us into a nasty situation.
* So here we only discard the old state and reinitialization is left
* until we actually try to reconfigure.
*/
void vfs_clean_context(struct fs_context *fc)
{
if (fc->need_free && fc->ops && fc->ops->free)
fc->ops->free(fc);
fc->need_free = false;
fc->fs_private = NULL;
fc->s_fs_info = NULL;
fc->sb_flags = 0;
security_free_mnt_opts(&fc->security);
kfree(fc->subtype);
fc->subtype = NULL;
kfree(fc->source);
fc->source = NULL;

fc->purpose = FS_CONTEXT_FOR_RECONFIGURE;
fc->phase = FS_CONTEXT_AWAITING_RECONF;
}

int finish_clean_context(struct fs_context *fc)
{
int error;

if (fc->phase != FS_CONTEXT_AWAITING_RECONF)
return 0;

if (fc->fs_type->init_fs_context)
error = fc->fs_type->init_fs_context(fc);
else
error = legacy_init_fs_context(fc);
if (unlikely(error)) {
fc->phase = FS_CONTEXT_FAILED;
return error;
}
fc->need_free = true;
fc->phase = FS_CONTEXT_RECONF_PARAMS;
return 0;
}

0 comments on commit 4009132

Please sign in to comment.