Skip to content

Commit

Permalink
RCU'd vfsmounts
Browse files Browse the repository at this point in the history
* RCU-delayed freeing of vfsmounts
* vfsmount_lock replaced with a seqlock (mount_lock)
* sequence number from mount_lock is stored in nameidata->m_seq and
used when we exit RCU mode
* new vfsmount flag - MNT_SYNC_UMOUNT.  Set by umount_tree() when its
caller knows that vfsmount will have no surviving references.
* synchronize_rcu() done between unlocking namespace_sem in namespace_unlock()
and doing pending mntput().
* new helper: legitimize_mnt(mnt, seq).  Checks the mount_lock sequence
number against seq, then grabs reference to mnt.  Then it rechecks mount_lock
again to close the race and either returns success or drops the reference it
has acquired.  The subtle point is that in case of MNT_SYNC_UMOUNT we can
simply decrement the refcount and sod off - aforementioned synchronize_rcu()
makes sure that final mntput() won't come until we leave RCU mode.  We need
that, since we don't want to end up with some lazy pathwalk racing with
umount() and stealing the final mntput() from it - caller of umount() may
expect it to return only once the fs is shut down and we don't want to break
that.  In other cases (i.e. with MNT_SYNC_UMOUNT absent) we have to do
full-blown mntput() in case of mount_lock sequence number mismatch happening
just as we'd grabbed the reference, but in those cases we won't be stealing
the final mntput() from anything that would care.
* mntput_no_expire() doesn't lock anything on the fast path now.  Incidentally,
SMP and UP cases are handled the same way - no ifdefs there.
* normal pathname resolution does *not* do any writes to mount_lock.  It does,
of course, bump the refcounts of vfsmount and dentry in the very end, but that's
it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
  • Loading branch information
Al Viro committed Nov 9, 2013
1 parent 42c3260 commit 48a066e
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 83 deletions.
20 changes: 14 additions & 6 deletions fs/dcache.c
Expand Up @@ -2887,24 +2887,28 @@ static int prepend_path(const struct path *path,
struct vfsmount *vfsmnt = path->mnt;
struct mount *mnt = real_mount(vfsmnt);
int error = 0;
unsigned seq = 0;
unsigned seq, m_seq = 0;
char *bptr;
int blen;

br_read_lock(&vfsmount_lock);
rcu_read_lock();
restart_mnt:
read_seqbegin_or_lock(&mount_lock, &m_seq);
seq = 0;
restart:
bptr = *buffer;
blen = *buflen;
error = 0;
read_seqbegin_or_lock(&rename_lock, &seq);
while (dentry != root->dentry || vfsmnt != root->mnt) {
struct dentry * parent;

if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
struct mount *parent = ACCESS_ONCE(mnt->mnt_parent);
/* Global root? */
if (mnt_has_parent(mnt)) {
dentry = mnt->mnt_mountpoint;
mnt = mnt->mnt_parent;
if (mnt != parent) {
dentry = ACCESS_ONCE(mnt->mnt_mountpoint);
mnt = parent;
vfsmnt = &mnt->mnt;
continue;
}
Expand Down Expand Up @@ -2938,7 +2942,11 @@ static int prepend_path(const struct path *path,
goto restart;
}
done_seqretry(&rename_lock, seq);
br_read_unlock(&vfsmount_lock);
if (need_seqretry(&mount_lock, m_seq)) {
m_seq = 1;
goto restart_mnt;
}
done_seqretry(&mount_lock, m_seq);

if (error >= 0 && bptr == *buffer) {
if (--blen < 0)
Expand Down
10 changes: 6 additions & 4 deletions fs/mount.h
@@ -1,7 +1,6 @@
#include <linux/mount.h>
#include <linux/seq_file.h>
#include <linux/poll.h>
#include <linux/lglock.h>

struct mnt_namespace {
atomic_t count;
Expand Down Expand Up @@ -30,6 +29,7 @@ struct mount {
struct mount *mnt_parent;
struct dentry *mnt_mountpoint;
struct vfsmount mnt;
struct rcu_head mnt_rcu;
#ifdef CONFIG_SMP
struct mnt_pcp __percpu *mnt_pcp;
#else
Expand Down Expand Up @@ -80,21 +80,23 @@ static inline int is_mounted(struct vfsmount *mnt)
extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *);

extern bool legitimize_mnt(struct vfsmount *, unsigned);

static inline void get_mnt_ns(struct mnt_namespace *ns)
{
atomic_inc(&ns->count);
}

extern struct lglock vfsmount_lock;
extern seqlock_t mount_lock;

static inline void lock_mount_hash(void)
{
br_write_lock(&vfsmount_lock);
write_seqlock(&mount_lock);
}

static inline void unlock_mount_hash(void)
{
br_write_unlock(&vfsmount_lock);
write_sequnlock(&mount_lock);
}

struct proc_mounts {
Expand Down
50 changes: 26 additions & 24 deletions fs/namei.c
Expand Up @@ -484,14 +484,12 @@ EXPORT_SYMBOL(path_put);

static inline void lock_rcu_walk(void)
{
br_read_lock(&vfsmount_lock);
rcu_read_lock();
}

static inline void unlock_rcu_walk(void)
{
rcu_read_unlock();
br_read_unlock(&vfsmount_lock);
}

/**
Expand All @@ -512,26 +510,23 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
BUG_ON(!(nd->flags & LOOKUP_RCU));

/*
* Get a reference to the parent first: we're
* going to make "path_put(nd->path)" valid in
* non-RCU context for "terminate_walk()".
*
* If this doesn't work, return immediately with
* RCU walking still active (and then we will do
* the RCU walk cleanup in terminate_walk()).
* After legitimizing the bastards, terminate_walk()
* will do the right thing for non-RCU mode, and all our
* subsequent exit cases should rcu_read_unlock()
* before returning. Do vfsmount first; if dentry
* can't be legitimized, just set nd->path.dentry to NULL
* and rely on dput(NULL) being a no-op.
*/
if (!lockref_get_not_dead(&parent->d_lockref))
if (!legitimize_mnt(nd->path.mnt, nd->m_seq))
return -ECHILD;

/*
* After the mntget(), we terminate_walk() will do
* the right thing for non-RCU mode, and all our
* subsequent exit cases should unlock_rcu_walk()
* before returning.
*/
mntget(nd->path.mnt);
nd->flags &= ~LOOKUP_RCU;

if (!lockref_get_not_dead(&parent->d_lockref)) {
nd->path.dentry = NULL;
unlock_rcu_walk();
return -ECHILD;
}

/*
* For a negative lookup, the lookup sequence point is the parents
* sequence point, and it only needs to revalidate the parent dentry.
Expand Down Expand Up @@ -608,16 +603,21 @@ static int complete_walk(struct nameidata *nd)
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;

if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) {
unlock_rcu_walk();
return -ECHILD;
}
if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) {
unlock_rcu_walk();
mntput(nd->path.mnt);
return -ECHILD;
}
if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
unlock_rcu_walk();
dput(dentry);
mntput(nd->path.mnt);
return -ECHILD;
}
mntget(nd->path.mnt);
unlock_rcu_walk();
}

Expand Down Expand Up @@ -909,15 +909,15 @@ int follow_up(struct path *path)
struct mount *parent;
struct dentry *mountpoint;

br_read_lock(&vfsmount_lock);
read_seqlock_excl(&mount_lock);
parent = mnt->mnt_parent;
if (parent == mnt) {
br_read_unlock(&vfsmount_lock);
read_sequnlock_excl(&mount_lock);
return 0;
}
mntget(&parent->mnt);
mountpoint = dget(mnt->mnt_mountpoint);
br_read_unlock(&vfsmount_lock);
read_sequnlock_excl(&mount_lock);
dput(path->dentry);
path->dentry = mountpoint;
mntput(path->mnt);
Expand Down Expand Up @@ -1048,8 +1048,8 @@ static int follow_managed(struct path *path, unsigned flags)

/* Something is mounted on this dentry in another
* namespace and/or whatever was mounted there in this
* namespace got unmounted before we managed to get the
* vfsmount_lock */
* namespace got unmounted before lookup_mnt() could
* get it */
}

/* Handle an automount point */
Expand Down Expand Up @@ -1864,6 +1864,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
if (flags & LOOKUP_RCU) {
lock_rcu_walk();
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
nd->m_seq = read_seqbegin(&mount_lock);
} else {
path_get(&nd->path);
}
Expand All @@ -1872,6 +1873,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,

nd->root.mnt = NULL;

nd->m_seq = read_seqbegin(&mount_lock);
if (*name=='/') {
if (flags & LOOKUP_RCU) {
lock_rcu_walk();
Expand Down

0 comments on commit 48a066e

Please sign in to comment.