Skip to content

Commit

Permalink
Merge branch 'work.mount' of git://git.kernel.org/pub/scm/linux/kerne…
Browse files Browse the repository at this point in the history
…l/git/viro/vfs

Pull vfs mount infrastructure updates from Al Viro:
 "The rest of core infrastructure; no new syscalls in that pile, but the
  old parts are switched to new infrastructure. At that point
  conversions of individual filesystems can happen independently; some
  are done here (afs, cgroup, procfs, etc.), there's also a large series
  outside of that pile dealing with NFS (quite a bit of option-parsing
  stuff is getting used there - it's one of the most convoluted
  filesystems in terms of mount-related logics), but NFS bits are the
  next cycle fodder.

  It got seriously simplified since the last cycle; documentation is
  probably the weakest bit at the moment - I considered dropping the
  commit introducing Documentation/filesystems/mount_api.txt (cutting
  the size increase by quarter ;-), but decided that it would be better
  to fix it up after -rc1 instead.

  That pile allows to do followup work in independent branches, which
  should make life much easier for the next cycle. fs/super.c size
  increase is unpleasant; there's a followup series that allows to
  shrink it considerably, but I decided to leave that until the next
  cycle"

* 'work.mount' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (41 commits)
  afs: Use fs_context to pass parameters over automount
  afs: Add fs_context support
  vfs: Add some logging to the core users of the fs_context log
  vfs: Implement logging through fs_context
  vfs: Provide documentation for new mount API
  vfs: Remove kern_mount_data()
  hugetlbfs: Convert to fs_context
  cpuset: Use fs_context
  kernfs, sysfs, cgroup, intel_rdt: Support fs_context
  cgroup: store a reference to cgroup_ns into cgroup_fs_context
  cgroup1_get_tree(): separate "get cgroup_root to use" into a separate helper
  cgroup_do_mount(): massage calling conventions
  cgroup: stash cgroup_root reference into cgroup_fs_context
  cgroup2: switch to option-by-option parsing
  cgroup1: switch to option-by-option parsing
  cgroup: take options parsing into ->parse_monolithic()
  cgroup: fold cgroup1_mount() into cgroup1_get_tree()
  cgroup: start switching to fs_context
  ipc: Convert mqueue fs to fs_context
  proc: Add fs_context support to procfs
  ...
  • Loading branch information
torvalds committed Mar 12, 2019
2 parents dbc2fba + c99c217 commit 7b47a9e
Show file tree
Hide file tree
Showing 45 changed files with 4,357 additions and 1,332 deletions.
709 changes: 709 additions & 0 deletions Documentation/filesystems/mount_api.txt

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions arch/x86/kernel/cpu/resctrl/internal.h
Expand Up @@ -4,6 +4,7 @@

#include <linux/sched.h>
#include <linux/kernfs.h>
#include <linux/fs_context.h>
#include <linux/jump_label.h>

#define MSR_IA32_L3_QOS_CFG 0xc81
Expand Down Expand Up @@ -40,6 +41,21 @@
#define RMID_VAL_ERROR BIT_ULL(63)
#define RMID_VAL_UNAVAIL BIT_ULL(62)


struct rdt_fs_context {
struct kernfs_fs_context kfc;
bool enable_cdpl2;
bool enable_cdpl3;
bool enable_mba_mbps;
};

static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
{
struct kernfs_fs_context *kfc = fc->fs_private;

return container_of(kfc, struct rdt_fs_context, kfc);
}

DECLARE_STATIC_KEY_FALSE(rdt_enable_key);

/**
Expand Down
185 changes: 116 additions & 69 deletions arch/x86/kernel/cpu/resctrl/rdtgroup.c
Expand Up @@ -24,6 +24,7 @@
#include <linux/cpu.h>
#include <linux/debugfs.h>
#include <linux/fs.h>
#include <linux/fs_parser.h>
#include <linux/sysfs.h>
#include <linux/kernfs.h>
#include <linux/seq_buf.h>
Expand All @@ -32,6 +33,7 @@
#include <linux/sched/task.h>
#include <linux/slab.h>
#include <linux/task_work.h>
#include <linux/user_namespace.h>

#include <uapi/linux/magic.h>

Expand Down Expand Up @@ -1858,46 +1860,6 @@ static void cdp_disable_all(void)
cdpl2_disable();
}

static int parse_rdtgroupfs_options(char *data)
{
char *token, *o = data;
int ret = 0;

while ((token = strsep(&o, ",")) != NULL) {
if (!*token) {
ret = -EINVAL;
goto out;
}

if (!strcmp(token, "cdp")) {
ret = cdpl3_enable();
if (ret)
goto out;
} else if (!strcmp(token, "cdpl2")) {
ret = cdpl2_enable();
if (ret)
goto out;
} else if (!strcmp(token, "mba_MBps")) {
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
ret = set_mba_sc(true);
else
ret = -EINVAL;
if (ret)
goto out;
} else {
ret = -EINVAL;
goto out;
}
}

return 0;

out:
pr_err("Invalid mount option \"%s\"\n", token);

return ret;
}

/*
* We don't allow rdtgroup directories to be created anywhere
* except the root directory. Thus when looking for the rdtgroup
Expand Down Expand Up @@ -1969,13 +1931,27 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn,
struct rdtgroup *prgrp,
struct kernfs_node **mon_data_kn);

static struct dentry *rdt_mount(struct file_system_type *fs_type,
int flags, const char *unused_dev_name,
void *data)
static int rdt_enable_ctx(struct rdt_fs_context *ctx)
{
int ret = 0;

if (ctx->enable_cdpl2)
ret = cdpl2_enable();

if (!ret && ctx->enable_cdpl3)
ret = cdpl3_enable();

if (!ret && ctx->enable_mba_mbps)
ret = set_mba_sc(true);

return ret;
}

static int rdt_get_tree(struct fs_context *fc)
{
struct rdt_fs_context *ctx = rdt_fc2context(fc);
struct rdt_domain *dom;
struct rdt_resource *r;
struct dentry *dentry;
int ret;

cpus_read_lock();
Expand All @@ -1984,53 +1960,42 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
* resctrl file system can only be mounted once.
*/
if (static_branch_unlikely(&rdt_enable_key)) {
dentry = ERR_PTR(-EBUSY);
ret = -EBUSY;
goto out;
}

ret = parse_rdtgroupfs_options(data);
if (ret) {
dentry = ERR_PTR(ret);
ret = rdt_enable_ctx(ctx);
if (ret < 0)
goto out_cdp;
}

closid_init();

ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
if (ret) {
dentry = ERR_PTR(ret);
goto out_cdp;
}
if (ret < 0)
goto out_mba;

if (rdt_mon_capable) {
ret = mongroup_create_dir(rdtgroup_default.kn,
NULL, "mon_groups",
&kn_mongrp);
if (ret) {
dentry = ERR_PTR(ret);
if (ret < 0)
goto out_info;
}
kernfs_get(kn_mongrp);

ret = mkdir_mondata_all(rdtgroup_default.kn,
&rdtgroup_default, &kn_mondata);
if (ret) {
dentry = ERR_PTR(ret);
if (ret < 0)
goto out_mongrp;
}
kernfs_get(kn_mondata);
rdtgroup_default.mon.mon_data_kn = kn_mondata;
}

ret = rdt_pseudo_lock_init();
if (ret) {
dentry = ERR_PTR(ret);
if (ret)
goto out_mondata;
}

dentry = kernfs_mount(fs_type, flags, rdt_root,
RDTGROUP_SUPER_MAGIC, NULL);
if (IS_ERR(dentry))
ret = kernfs_get_tree(fc);
if (ret < 0)
goto out_psl;

if (rdt_alloc_capable)
Expand Down Expand Up @@ -2059,14 +2024,95 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
kernfs_remove(kn_mongrp);
out_info:
kernfs_remove(kn_info);
out_mba:
if (ctx->enable_mba_mbps)
set_mba_sc(false);
out_cdp:
cdp_disable_all();
out:
rdt_last_cmd_clear();
mutex_unlock(&rdtgroup_mutex);
cpus_read_unlock();
return ret;
}

enum rdt_param {
Opt_cdp,
Opt_cdpl2,
Opt_mba_mpbs,
nr__rdt_params
};

static const struct fs_parameter_spec rdt_param_specs[] = {
fsparam_flag("cdp", Opt_cdp),
fsparam_flag("cdpl2", Opt_cdpl2),
fsparam_flag("mba_mpbs", Opt_mba_mpbs),
{}
};

static const struct fs_parameter_description rdt_fs_parameters = {
.name = "rdt",
.specs = rdt_param_specs,
};

static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct rdt_fs_context *ctx = rdt_fc2context(fc);
struct fs_parse_result result;
int opt;

opt = fs_parse(fc, &rdt_fs_parameters, param, &result);
if (opt < 0)
return opt;

return dentry;
switch (opt) {
case Opt_cdp:
ctx->enable_cdpl3 = true;
return 0;
case Opt_cdpl2:
ctx->enable_cdpl2 = true;
return 0;
case Opt_mba_mpbs:
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return -EINVAL;
ctx->enable_mba_mbps = true;
return 0;
}

return -EINVAL;
}

static void rdt_fs_context_free(struct fs_context *fc)
{
struct rdt_fs_context *ctx = rdt_fc2context(fc);

kernfs_free_fs_context(fc);
kfree(ctx);
}

static const struct fs_context_operations rdt_fs_context_ops = {
.free = rdt_fs_context_free,
.parse_param = rdt_parse_param,
.get_tree = rdt_get_tree,
};

static int rdt_init_fs_context(struct fs_context *fc)
{
struct rdt_fs_context *ctx;

ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
if (!ctx)
return -ENOMEM;

ctx->kfc.root = rdt_root;
ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
fc->fs_private = &ctx->kfc;
fc->ops = &rdt_fs_context_ops;
if (fc->user_ns)
put_user_ns(fc->user_ns);
fc->user_ns = get_user_ns(&init_user_ns);
fc->global = true;
return 0;
}

static int reset_all_ctrls(struct rdt_resource *r)
Expand Down Expand Up @@ -2239,9 +2285,10 @@ static void rdt_kill_sb(struct super_block *sb)
}

static struct file_system_type rdt_fs_type = {
.name = "resctrl",
.mount = rdt_mount,
.kill_sb = rdt_kill_sb,
.name = "resctrl",
.init_fs_context = rdt_init_fs_context,
.parameters = &rdt_fs_parameters,
.kill_sb = rdt_kill_sb,
};

static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
Expand Down
7 changes: 7 additions & 0 deletions fs/Kconfig
Expand Up @@ -8,6 +8,13 @@ menu "File systems"
config DCACHE_WORD_ACCESS
bool

config VALIDATE_FS_PARSER
bool "Validate filesystem parameter description"
default y
help
Enable this to perform validation of the parameter description for a
filesystem when it is registered.

if BLOCK

config FS_IOMAP
Expand Down
2 changes: 1 addition & 1 deletion fs/Makefile
Expand Up @@ -13,7 +13,7 @@ obj-y := open.o read_write.o file_table.o super.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o splice.o sync.o utimes.o d_path.o \
stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \
fs_types.o
fs_types.o fs_context.o fs_parser.o

ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o block_dev.o direct-io.o mpage.o
Expand Down
9 changes: 4 additions & 5 deletions fs/afs/internal.h
Expand Up @@ -36,15 +36,14 @@
struct pagevec;
struct afs_call;

struct afs_mount_params {
bool rwpath; /* T if the parent should be considered R/W */
struct afs_fs_context {
bool force; /* T to force cell type */
bool autocell; /* T if set auto mount operation */
bool dyn_root; /* T if dynamic root */
bool no_cell; /* T if the source is "none" (for dynroot) */
afs_voltype_t type; /* type of volume requested */
int volnamesz; /* size of volume name */
unsigned int volnamesz; /* size of volume name */
const char *volname; /* name of volume to mount */
struct net *net_ns; /* Network namespace in effect */
struct afs_net *net; /* the AFS net namespace stuff */
struct afs_cell *cell; /* cell in which to find volume */
struct afs_volume *volume; /* volume record */
Expand Down Expand Up @@ -1274,7 +1273,7 @@ static inline struct afs_volume *__afs_get_volume(struct afs_volume *volume)
return volume;
}

extern struct afs_volume *afs_create_volume(struct afs_mount_params *);
extern struct afs_volume *afs_create_volume(struct afs_fs_context *);
extern void afs_activate_volume(struct afs_volume *);
extern void afs_deactivate_volume(struct afs_volume *);
extern void afs_put_volume(struct afs_cell *, struct afs_volume *);
Expand Down

1 comment on commit 7b47a9e

@Fuehrerstand
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi together :)

I'll try to understand the merges of the fs-parts into here. In my mind its okay, to use the mqueue by concept. But did you ever tried to test the mqueue for massive usuage?

I know, that looks like kidding, but what happens might a risk to big stack overflow:

My test-sceneario is a simple one. I place a perlscript to a server and call it from clients. Up this all seems to be okay. The clients pushing the messages to the server and pushed up to the holy list for the delivery.

What happens if a connection of a client is broken?
Now we have a corpse in the holy list, but where the hearse is?

On a single one, maybe smiled of. On massives happens what?
The list blows up and slows down any process, which is involved this list.

Where are the borders?
Sure, there are defined limits. And then, if reached or overloaded?

I'll suggest extending by a value of lifetime for the maximum of presence each one. So the holy list is selfcleaning. Reaching processes could be informed by signal and go out of their loops.

@torvalds The IPC is a nice construct inside per mind. I'm not sure, if it replicated and extended up next stage, there will be a chance for optimizing big data processing massive. Returning the intention of reading here, so these words seems not to be alone.

Please sign in to comment.