Permalink
Browse files

fs: icache RCU free inodes

RCU free the struct inode. This will allow:

- Subsequent store-free path walking patch. The inode must be consulted for
  permissions when walking, so an RCU inode reference is a must.
- sb_inode_list_lock to be moved inside i_lock because sb list walkers who want
  to take i_lock no longer need to take sb_inode_list_lock to walk the list in
  the first place. This will simplify and optimize locking.
- Could remove some nested trylock loops in dcache code
- Could potentially simplify things a bit in VM land. Do not need to take the
  page lock to follow page->mapping.

The downsides of this is the performance cost of using RCU. In a simple
creat/unlink microbenchmark, performance drops by about 10% due to inability to
reuse cache-hot slab objects. As iterations increase and RCU freeing starts
kicking over, this increases to about 20%.

In cases where inode lifetimes are longer (ie. many inodes may be allocated
during the average life span of a single inode), a lot of this cache reuse is
not applicable, so the regression caused by this patch is smaller.

The cache-hot regression could largely be avoided by using SLAB_DESTROY_BY_RCU,
however this adds some complexity to list walking and store-free path walking,
so I prefer to implement this at a later date, if it is shown to be a win in
real situations. I haven't found a regression in any non-micro benchmark so I
doubt it will be a problem.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
  • Loading branch information...
Nick Piggin
Nick Piggin committed Jan 7, 2011
1 parent 77812a1 commit fa0d7e3de6d6fc5004ad9dea0dd6b286af8f03e9
Showing with 490 additions and 68 deletions.
  1. +14 −0 Documentation/filesystems/porting
  2. +8 −2 arch/powerpc/platforms/cell/spufs/inode.c
  3. +8 −1 drivers/staging/pohmelfs/inode.c
  4. +8 −1 drivers/staging/smbfs/inode.c
  5. +8 −1 fs/9p/vfs_inode.c
  6. +8 −1 fs/adfs/super.c
  7. +8 −1 fs/affs/super.c
  8. +9 −1 fs/afs/super.c
  9. +8 −2 fs/befs/linuxvfs.c
  10. +8 −1 fs/bfs/inode.c
  11. +8 −1 fs/block_dev.c
  12. +8 −1 fs/btrfs/inode.c
  13. +10 −1 fs/ceph/inode.c
  14. +8 −1 fs/cifs/cifsfs.c
  15. +8 −1 fs/coda/inode.c
  16. +11 −1 fs/ecryptfs/super.c
  17. +8 −1 fs/efs/super.c
  18. +8 −1 fs/exofs/super.c
  19. +8 −1 fs/ext2/super.c
  20. +8 −1 fs/ext3/super.c
  21. +8 −1 fs/ext4/super.c
  22. +8 −1 fs/fat/inode.c
  23. +8 −1 fs/freevxfs/vxfs_inode.c
  24. +8 −1 fs/fuse/inode.c
  25. +8 −1 fs/gfs2/super.c
  26. +8 −1 fs/hfs/super.c
  27. +9 −1 fs/hfsplus/super.c
  28. +8 −1 fs/hostfs/hostfs_kern.c
  29. +8 −1 fs/hpfs/super.c
  30. +8 −1 fs/hppfs/hppfs.c
  31. +8 −1 fs/hugetlbfs/inode.c
  32. +9 −1 fs/inode.c
  33. +8 −1 fs/isofs/inode.c
  34. +8 −1 fs/jffs2/super.c
  35. +9 −1 fs/jfs/super.c
  36. +8 −1 fs/logfs/inode.c
  37. +8 −1 fs/minix/inode.c
  38. +8 −1 fs/ncpfs/inode.c
  39. +8 −1 fs/nfs/inode.c
  40. +9 −1 fs/nilfs2/super.c
  41. +8 −1 fs/ntfs/inode.c
  42. +8 −1 fs/ocfs2/dlmfs/dlmfs.c
  43. +8 −1 fs/ocfs2/super.c
  44. +8 −1 fs/openpromfs/inode.c
  45. +8 −1 fs/proc/inode.c
  46. +8 −1 fs/qnx4/inode.c
  47. +8 −1 fs/reiserfs/super.c
  48. +8 −1 fs/romfs/super.c
  49. +8 −1 fs/squashfs/super.c
  50. +8 −1 fs/sysv/inode.c
  51. +9 −1 fs/ubifs/super.c
  52. +8 −1 fs/udf/super.c
  53. +8 −1 fs/ufs/super.c
  54. +12 −1 fs/xfs/xfs_iget.c
  55. +4 −1 include/linux/fs.h
  56. +0 −1 include/linux/net.h
  57. +8 −1 ipc/mqueue.c
  58. +8 −1 mm/shmem.c
  59. +8 −8 net/socket.c
  60. +9 −1 net/sunrpc/rpc_pipe.c
@@ -346,3 +346,17 @@ look at examples of other filesystems) for guidance.
for details of what locks to replace dcache_lock with in order to protect
particular things. Most of the time, a filesystem only needs ->d_lock, which
protects *all* the dcache state of a given dentry.
+
+--
+[mandatory]
+
+ Filesystems must RCU-free their inodes, if they can have been accessed
+via rcu-walk path walk (basically, if the file can have had a path name in the
+vfs namespace).
+
+ i_dentry and i_rcu share storage in a union, and the vfs expects
+i_dentry to be reinitialized before it is freed, so an:
+
+ INIT_LIST_HEAD(&inode->i_dentry);
+
+must be done in the RCU callback.
@@ -71,12 +71,18 @@ spufs_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void
-spufs_destroy_inode(struct inode *inode)
+static void spufs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(spufs_inode_cache, SPUFS_I(inode));
}
+static void spufs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, spufs_i_callback);
+}
+
static void
spufs_init_once(void *p)
{
@@ -826,6 +826,13 @@ const struct address_space_operations pohmelfs_aops = {
.set_page_dirty = __set_page_dirty_nobuffers,
};
+static void pohmelfs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(pohmelfs_inode_cache, POHMELFS_I(inode));
+}
+
/*
* ->detroy_inode() callback. Deletes inode from the caches
* and frees private data.
@@ -842,8 +849,8 @@ static void pohmelfs_destroy_inode(struct inode *inode)
dprintk("%s: pi: %p, inode: %p, ino: %llu.\n",
__func__, pi, &pi->vfs_inode, pi->ino);
- kmem_cache_free(pohmelfs_inode_cache, pi);
atomic_long_dec(&psb->total_inodes);
+ call_rcu(&inode->i_rcu, pohmelfs_i_callback);
}
/*
@@ -62,11 +62,18 @@ static struct inode *smb_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void smb_destroy_inode(struct inode *inode)
+static void smb_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(smb_inode_cachep, SMB_I(inode));
}
+static void smb_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, smb_i_callback);
+}
+
static void init_once(void *foo)
{
struct smb_inode_info *ei = (struct smb_inode_info *) foo;
View
@@ -237,10 +237,17 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
*
*/
-void v9fs_destroy_inode(struct inode *inode)
+static void v9fs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode));
}
+
+void v9fs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, v9fs_i_callback);
+}
#endif
/**
View
@@ -240,11 +240,18 @@ static struct inode *adfs_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void adfs_destroy_inode(struct inode *inode)
+static void adfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(adfs_inode_cachep, ADFS_I(inode));
}
+static void adfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, adfs_i_callback);
+}
+
static void init_once(void *foo)
{
struct adfs_inode_info *ei = (struct adfs_inode_info *) foo;
View
@@ -95,11 +95,18 @@ static struct inode *affs_alloc_inode(struct super_block *sb)
return &i->vfs_inode;
}
-static void affs_destroy_inode(struct inode *inode)
+static void affs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(affs_inode_cachep, AFFS_I(inode));
}
+static void affs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, affs_i_callback);
+}
+
static void init_once(void *foo)
{
struct affs_inode_info *ei = (struct affs_inode_info *) foo;
View
@@ -498,6 +498,14 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
return &vnode->vfs_inode;
}
+static void afs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(afs_inode_cachep, vnode);
+}
+
/*
* destroy an AFS inode struct
*/
@@ -511,7 +519,7 @@ static void afs_destroy_inode(struct inode *inode)
ASSERTCMP(vnode->server, ==, NULL);
- kmem_cache_free(afs_inode_cachep, vnode);
+ call_rcu(&inode->i_rcu, afs_i_callback);
atomic_dec(&afs_count_active_inodes);
}
View
@@ -284,12 +284,18 @@ befs_alloc_inode(struct super_block *sb)
return &bi->vfs_inode;
}
-static void
-befs_destroy_inode(struct inode *inode)
+static void befs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
}
+static void befs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, befs_i_callback);
+}
+
static void init_once(void *foo)
{
struct befs_inode_info *bi = (struct befs_inode_info *) foo;
View
@@ -248,11 +248,18 @@ static struct inode *bfs_alloc_inode(struct super_block *sb)
return &bi->vfs_inode;
}
-static void bfs_destroy_inode(struct inode *inode)
+static void bfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(bfs_inode_cachep, BFS_I(inode));
}
+static void bfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, bfs_i_callback);
+}
+
static void init_once(void *foo)
{
struct bfs_inode_info *bi = foo;
View
@@ -409,13 +409,20 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void bdev_destroy_inode(struct inode *inode)
+static void bdev_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
struct bdev_inode *bdi = BDEV_I(inode);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(bdev_cachep, bdi);
}
+static void bdev_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, bdev_i_callback);
+}
+
static void init_once(void *foo)
{
struct bdev_inode *ei = (struct bdev_inode *) foo;
View
@@ -6495,6 +6495,13 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
return inode;
}
+static void btrfs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
+}
+
void btrfs_destroy_inode(struct inode *inode)
{
struct btrfs_ordered_extent *ordered;
@@ -6564,7 +6571,7 @@ void btrfs_destroy_inode(struct inode *inode)
inode_tree_del(inode);
btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
free:
- kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
+ call_rcu(&inode->i_rcu, btrfs_i_callback);
}
int btrfs_drop_inode(struct inode *inode)
View
@@ -368,6 +368,15 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
return &ci->vfs_inode;
}
+static void ceph_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(ceph_inode_cachep, ci);
+}
+
void ceph_destroy_inode(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
@@ -407,7 +416,7 @@ void ceph_destroy_inode(struct inode *inode)
if (ci->i_xattrs.prealloc_blob)
ceph_buffer_put(ci->i_xattrs.prealloc_blob);
- kmem_cache_free(ceph_inode_cachep, ci);
+ call_rcu(&inode->i_rcu, ceph_i_callback);
}
View
@@ -334,10 +334,17 @@ cifs_alloc_inode(struct super_block *sb)
return &cifs_inode->vfs_inode;
}
+static void cifs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(cifs_inode_cachep, CIFS_I(inode));
+}
+
static void
cifs_destroy_inode(struct inode *inode)
{
- kmem_cache_free(cifs_inode_cachep, CIFS_I(inode));
+ call_rcu(&inode->i_rcu, cifs_i_callback);
}
static void
View
@@ -56,11 +56,18 @@ static struct inode *coda_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void coda_destroy_inode(struct inode *inode)
+static void coda_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(coda_inode_cachep, ITOC(inode));
}
+static void coda_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, coda_i_callback);
+}
+
static void init_once(void *foo)
{
struct coda_inode_info *ei = (struct coda_inode_info *) foo;
View
@@ -62,6 +62,16 @@ static struct inode *ecryptfs_alloc_inode(struct super_block *sb)
return inode;
}
+static void ecryptfs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct ecryptfs_inode_info *inode_info;
+ inode_info = ecryptfs_inode_to_private(inode);
+
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
+}
+
/**
* ecryptfs_destroy_inode
* @inode: The ecryptfs inode
@@ -88,7 +98,7 @@ static void ecryptfs_destroy_inode(struct inode *inode)
}
}
ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
- kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
+ call_rcu(&inode->i_rcu, ecryptfs_i_callback);
}
/**
View
@@ -65,11 +65,18 @@ static struct inode *efs_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void efs_destroy_inode(struct inode *inode)
+static void efs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(efs_inode_cachep, INODE_INFO(inode));
}
+static void efs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, efs_i_callback);
+}
+
static void init_once(void *foo)
{
struct efs_inode_info *ei = (struct efs_inode_info *) foo;
View
@@ -150,12 +150,19 @@ static struct inode *exofs_alloc_inode(struct super_block *sb)
return &oi->vfs_inode;
}
+static void exofs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(exofs_inode_cachep, exofs_i(inode));
+}
+
/*
* Remove an inode from the cache
*/
static void exofs_destroy_inode(struct inode *inode)
{
- kmem_cache_free(exofs_inode_cachep, exofs_i(inode));
+ call_rcu(&inode->i_rcu, exofs_i_callback);
}
/*
Oops, something went wrong.

0 comments on commit fa0d7e3

Please sign in to comment.