Skip to content
This repository
Browse code

mm: cleancache core ops functions and config

This third patch of eight in this cleancache series provides
the core code for cleancache that interfaces between the hooks in
VFS and individual filesystems and a cleancache backend.  It also
includes build and config patches.

Two new files are added: mm/cleancache.c and include/linux/cleancache.h.

Note that CONFIG_CLEANCACHE can default to on; in systems that do
not provide a cleancache backend, all hooks devolve to a simple
check of a global enable flag, so performance impact should
be negligible but can be reduced to zero impact if config'ed off.
However for this first commit, it defaults to off.

Details and a FAQ can be found in Documentation/vm/cleancache.txt

Credits: Cleancache_ops design derived from Jeremy Fitzhardinge
design for tmem

[v8: dan.magenheimer@oracle.com: fix exportfs call affecting btrfs]
[v8: akpm@linux-foundation.org: use static inline function, not macro]
[v7: dan.magenheimer@oracle.com: cleanup sysfs and remove cleancache prefix]
[v6: JBeulich@novell.com: robustly handle buggy fs encode_fh actor definition]
[v5: jeremy@goop.org: clean up global usage and static var names]
[v5: jeremy@goop.org: simplify init hook and any future fs init changes]
[v5: hch@infradead.org: cleaner non-global interface for ops registration]
[v4: adilger@sun.com: interface must support exportfs FS's]
[v4: hch@infradead.org: interface must support 64-bit FS on 32-bit kernel]
[v3: akpm@linux-foundation.org: use one ops struct to avoid pointer hops]
[v3: akpm@linux-foundation.org: document and ensure PageLocked reqts are met]
[v3: ngupta@vflare.org: fix success/fail codes, change funcs to void]
[v2: viro@ZenIV.linux.org.uk: use sane types]
Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Reviewed-by: Jeremy Fitzhardinge <jeremy@goop.org>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Al Viro <viro@ZenIV.linux.org.uk>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Nitin Gupta <ngupta@vflare.org>
Acked-by: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Andreas Dilger <adilger@sun.com>
Acked-by: Jan Beulich <JBeulich@novell.com>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Rik Van Riel <riel@redhat.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ted Ts'o <tytso@mit.edu>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <joel.becker@oracle.com>
  • Loading branch information...
commit 077b1f83a69d94f2918630a882d74939baca0bce 1 parent 9fdfdcf
Dan Magenheimer authored
122 include/linux/cleancache.h
... ... @@ -0,0 +1,122 @@
  1 +#ifndef _LINUX_CLEANCACHE_H
  2 +#define _LINUX_CLEANCACHE_H
  3 +
  4 +#include <linux/fs.h>
  5 +#include <linux/exportfs.h>
  6 +#include <linux/mm.h>
  7 +
  8 +#define CLEANCACHE_KEY_MAX 6
  9 +
  10 +/*
  11 + * cleancache requires every file with a page in cleancache to have a
  12 + * unique key unless/until the file is removed/truncated. For some
  13 + * filesystems, the inode number is unique, but for "modern" filesystems
  14 + * an exportable filehandle is required (see exportfs.h)
  15 + */
  16 +struct cleancache_filekey {
  17 + union {
  18 + ino_t ino;
  19 + __u32 fh[CLEANCACHE_KEY_MAX];
  20 + u32 key[CLEANCACHE_KEY_MAX];
  21 + } u;
  22 +};
  23 +
  24 +struct cleancache_ops {
  25 + int (*init_fs)(size_t);
  26 + int (*init_shared_fs)(char *uuid, size_t);
  27 + int (*get_page)(int, struct cleancache_filekey,
  28 + pgoff_t, struct page *);
  29 + void (*put_page)(int, struct cleancache_filekey,
  30 + pgoff_t, struct page *);
  31 + void (*flush_page)(int, struct cleancache_filekey, pgoff_t);
  32 + void (*flush_inode)(int, struct cleancache_filekey);
  33 + void (*flush_fs)(int);
  34 +};
  35 +
  36 +extern struct cleancache_ops
  37 + cleancache_register_ops(struct cleancache_ops *ops);
  38 +extern void __cleancache_init_fs(struct super_block *);
  39 +extern void __cleancache_init_shared_fs(char *, struct super_block *);
  40 +extern int __cleancache_get_page(struct page *);
  41 +extern void __cleancache_put_page(struct page *);
  42 +extern void __cleancache_flush_page(struct address_space *, struct page *);
  43 +extern void __cleancache_flush_inode(struct address_space *);
  44 +extern void __cleancache_flush_fs(struct super_block *);
  45 +extern int cleancache_enabled;
  46 +
  47 +#ifdef CONFIG_CLEANCACHE
  48 +static inline bool cleancache_fs_enabled(struct page *page)
  49 +{
  50 + return page->mapping->host->i_sb->cleancache_poolid >= 0;
  51 +}
  52 +static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping)
  53 +{
  54 + return mapping->host->i_sb->cleancache_poolid >= 0;
  55 +}
  56 +#else
  57 +#define cleancache_enabled (0)
  58 +#define cleancache_fs_enabled(_page) (0)
  59 +#define cleancache_fs_enabled_mapping(_page) (0)
  60 +#endif
  61 +
  62 +/*
  63 + * The shim layer provided by these inline functions allows the compiler
  64 + * to reduce all cleancache hooks to nothingness if CONFIG_CLEANCACHE
  65 + * is disabled, to a single global variable check if CONFIG_CLEANCACHE
  66 + * is enabled but no cleancache "backend" has dynamically enabled it,
  67 + * and, for the most frequent cleancache ops, to a single global variable
  68 + * check plus a superblock element comparison if CONFIG_CLEANCACHE is enabled
  69 + * and a cleancache backend has dynamically enabled cleancache, but the
  70 + * filesystem referenced by that cleancache op has not enabled cleancache.
  71 + * As a result, CONFIG_CLEANCACHE can be enabled by default with essentially
  72 + * no measurable performance impact.
  73 + */
  74 +
  75 +static inline void cleancache_init_fs(struct super_block *sb)
  76 +{
  77 + if (cleancache_enabled)
  78 + __cleancache_init_fs(sb);
  79 +}
  80 +
  81 +static inline void cleancache_init_shared_fs(char *uuid, struct super_block *sb)
  82 +{
  83 + if (cleancache_enabled)
  84 + __cleancache_init_shared_fs(uuid, sb);
  85 +}
  86 +
  87 +static inline int cleancache_get_page(struct page *page)
  88 +{
  89 + int ret = -1;
  90 +
  91 + if (cleancache_enabled && cleancache_fs_enabled(page))
  92 + ret = __cleancache_get_page(page);
  93 + return ret;
  94 +}
  95 +
  96 +static inline void cleancache_put_page(struct page *page)
  97 +{
  98 + if (cleancache_enabled && cleancache_fs_enabled(page))
  99 + __cleancache_put_page(page);
  100 +}
  101 +
  102 +static inline void cleancache_flush_page(struct address_space *mapping,
  103 + struct page *page)
  104 +{
  105 + /* careful... page->mapping is NULL sometimes when this is called */
  106 + if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping))
  107 + __cleancache_flush_page(mapping, page);
  108 +}
  109 +
  110 +static inline void cleancache_flush_inode(struct address_space *mapping)
  111 +{
  112 + if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping))
  113 + __cleancache_flush_inode(mapping);
  114 +}
  115 +
  116 +static inline void cleancache_flush_fs(struct super_block *sb)
  117 +{
  118 + if (cleancache_enabled)
  119 + __cleancache_flush_fs(sb);
  120 +}
  121 +
  122 +#endif /* _LINUX_CLEANCACHE_H */
23 mm/Kconfig
@@ -347,3 +347,26 @@ config NEED_PER_CPU_KM
347 347 depends on !SMP
348 348 bool
349 349 default y
  350 +
  351 +config CLEANCACHE
  352 + bool "Enable cleancache driver to cache clean pages if tmem is present"
  353 + default n
  354 + help
  355 + Cleancache can be thought of as a page-granularity victim cache
  356 + for clean pages that the kernel's pageframe replacement algorithm
  357 + (PFRA) would like to keep around, but can't since there isn't enough
  358 + memory. So when the PFRA "evicts" a page, it first attempts to use
  359 + cleancacne code to put the data contained in that page into
  360 + "transcendent memory", memory that is not directly accessible or
  361 + addressable by the kernel and is of unknown and possibly
  362 + time-varying size. And when a cleancache-enabled
  363 + filesystem wishes to access a page in a file on disk, it first
  364 + checks cleancache to see if it already contains it; if it does,
  365 + the page is copied into the kernel and a disk access is avoided.
  366 + When a transcendent memory driver is available (such as zcache or
  367 + Xen transcendent memory), a significant I/O reduction
  368 + may be achieved. When none is available, all cleancache calls
  369 + are reduced to a single pointer-compare-against-NULL resulting
  370 + in a negligible performance hit.
  371 +
  372 + If unsure, say Y to enable cleancache
1  mm/Makefile
@@ -49,3 +49,4 @@ obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
49 49 obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
50 50 obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
51 51 obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
  52 +obj-$(CONFIG_CLEANCACHE) += cleancache.o
244 mm/cleancache.c
... ... @@ -0,0 +1,244 @@
  1 +/*
  2 + * Cleancache frontend
  3 + *
  4 + * This code provides the generic "frontend" layer to call a matching
  5 + * "backend" driver implementation of cleancache. See
  6 + * Documentation/vm/cleancache.txt for more information.
  7 + *
  8 + * Copyright (C) 2009-2010 Oracle Corp. All rights reserved.
  9 + * Author: Dan Magenheimer
  10 + *
  11 + * This work is licensed under the terms of the GNU GPL, version 2.
  12 + */
  13 +
  14 +#include <linux/module.h>
  15 +#include <linux/fs.h>
  16 +#include <linux/exportfs.h>
  17 +#include <linux/mm.h>
  18 +#include <linux/cleancache.h>
  19 +
  20 +/*
  21 + * This global enablement flag may be read thousands of times per second
  22 + * by cleancache_get/put/flush even on systems where cleancache_ops
  23 + * is not claimed (e.g. cleancache is config'ed on but remains
  24 + * disabled), so is preferred to the slower alternative: a function
  25 + * call that checks a non-global.
  26 + */
  27 +int cleancache_enabled;
  28 +EXPORT_SYMBOL(cleancache_enabled);
  29 +
  30 +/*
  31 + * cleancache_ops is set by cleancache_ops_register to contain the pointers
  32 + * to the cleancache "backend" implementation functions.
  33 + */
  34 +static struct cleancache_ops cleancache_ops;
  35 +
  36 +/* useful stats available in /sys/kernel/mm/cleancache */
  37 +static unsigned long cleancache_succ_gets;
  38 +static unsigned long cleancache_failed_gets;
  39 +static unsigned long cleancache_puts;
  40 +static unsigned long cleancache_flushes;
  41 +
  42 +/*
  43 + * register operations for cleancache, returning previous thus allowing
  44 + * detection of multiple backends and possible nesting
  45 + */
  46 +struct cleancache_ops cleancache_register_ops(struct cleancache_ops *ops)
  47 +{
  48 + struct cleancache_ops old = cleancache_ops;
  49 +
  50 + cleancache_ops = *ops;
  51 + cleancache_enabled = 1;
  52 + return old;
  53 +}
  54 +EXPORT_SYMBOL(cleancache_register_ops);
  55 +
  56 +/* Called by a cleancache-enabled filesystem at time of mount */
  57 +void __cleancache_init_fs(struct super_block *sb)
  58 +{
  59 + sb->cleancache_poolid = (*cleancache_ops.init_fs)(PAGE_SIZE);
  60 +}
  61 +EXPORT_SYMBOL(__cleancache_init_fs);
  62 +
  63 +/* Called by a cleancache-enabled clustered filesystem at time of mount */
  64 +void __cleancache_init_shared_fs(char *uuid, struct super_block *sb)
  65 +{
  66 + sb->cleancache_poolid =
  67 + (*cleancache_ops.init_shared_fs)(uuid, PAGE_SIZE);
  68 +}
  69 +EXPORT_SYMBOL(__cleancache_init_shared_fs);
  70 +
  71 +/*
  72 + * If the filesystem uses exportable filehandles, use the filehandle as
  73 + * the key, else use the inode number.
  74 + */
  75 +static int cleancache_get_key(struct inode *inode,
  76 + struct cleancache_filekey *key)
  77 +{
  78 + int (*fhfn)(struct dentry *, __u32 *fh, int *, int);
  79 + int len = 0, maxlen = CLEANCACHE_KEY_MAX;
  80 + struct super_block *sb = inode->i_sb;
  81 +
  82 + key->u.ino = inode->i_ino;
  83 + if (sb->s_export_op != NULL) {
  84 + fhfn = sb->s_export_op->encode_fh;
  85 + if (fhfn) {
  86 + struct dentry d;
  87 + d.d_inode = inode;
  88 + len = (*fhfn)(&d, &key->u.fh[0], &maxlen, 0);
  89 + if (len <= 0 || len == 255)
  90 + return -1;
  91 + if (maxlen > CLEANCACHE_KEY_MAX)
  92 + return -1;
  93 + }
  94 + }
  95 + return 0;
  96 +}
  97 +
  98 +/*
  99 + * "Get" data from cleancache associated with the poolid/inode/index
  100 + * that were specified when the data was put to cleanache and, if
  101 + * successful, use it to fill the specified page with data and return 0.
  102 + * The pageframe is unchanged and returns -1 if the get fails.
  103 + * Page must be locked by caller.
  104 + */
  105 +int __cleancache_get_page(struct page *page)
  106 +{
  107 + int ret = -1;
  108 + int pool_id;
  109 + struct cleancache_filekey key = { .u.key = { 0 } };
  110 +
  111 + VM_BUG_ON(!PageLocked(page));
  112 + pool_id = page->mapping->host->i_sb->cleancache_poolid;
  113 + if (pool_id < 0)
  114 + goto out;
  115 +
  116 + if (cleancache_get_key(page->mapping->host, &key) < 0)
  117 + goto out;
  118 +
  119 + ret = (*cleancache_ops.get_page)(pool_id, key, page->index, page);
  120 + if (ret == 0)
  121 + cleancache_succ_gets++;
  122 + else
  123 + cleancache_failed_gets++;
  124 +out:
  125 + return ret;
  126 +}
  127 +EXPORT_SYMBOL(__cleancache_get_page);
  128 +
  129 +/*
  130 + * "Put" data from a page to cleancache and associate it with the
  131 + * (previously-obtained per-filesystem) poolid and the page's,
  132 + * inode and page index. Page must be locked. Note that a put_page
  133 + * always "succeeds", though a subsequent get_page may succeed or fail.
  134 + */
  135 +void __cleancache_put_page(struct page *page)
  136 +{
  137 + int pool_id;
  138 + struct cleancache_filekey key = { .u.key = { 0 } };
  139 +
  140 + VM_BUG_ON(!PageLocked(page));
  141 + pool_id = page->mapping->host->i_sb->cleancache_poolid;
  142 + if (pool_id >= 0 &&
  143 + cleancache_get_key(page->mapping->host, &key) >= 0) {
  144 + (*cleancache_ops.put_page)(pool_id, key, page->index, page);
  145 + cleancache_puts++;
  146 + }
  147 +}
  148 +EXPORT_SYMBOL(__cleancache_put_page);
  149 +
  150 +/*
  151 + * Flush any data from cleancache associated with the poolid and the
  152 + * page's inode and page index so that a subsequent "get" will fail.
  153 + */
  154 +void __cleancache_flush_page(struct address_space *mapping, struct page *page)
  155 +{
  156 + /* careful... page->mapping is NULL sometimes when this is called */
  157 + int pool_id = mapping->host->i_sb->cleancache_poolid;
  158 + struct cleancache_filekey key = { .u.key = { 0 } };
  159 +
  160 + if (pool_id >= 0) {
  161 + VM_BUG_ON(!PageLocked(page));
  162 + if (cleancache_get_key(mapping->host, &key) >= 0) {
  163 + (*cleancache_ops.flush_page)(pool_id, key, page->index);
  164 + cleancache_flushes++;
  165 + }
  166 + }
  167 +}
  168 +EXPORT_SYMBOL(__cleancache_flush_page);
  169 +
  170 +/*
  171 + * Flush all data from cleancache associated with the poolid and the
  172 + * mappings's inode so that all subsequent gets to this poolid/inode
  173 + * will fail.
  174 + */
  175 +void __cleancache_flush_inode(struct address_space *mapping)
  176 +{
  177 + int pool_id = mapping->host->i_sb->cleancache_poolid;
  178 + struct cleancache_filekey key = { .u.key = { 0 } };
  179 +
  180 + if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0)
  181 + (*cleancache_ops.flush_inode)(pool_id, key);
  182 +}
  183 +EXPORT_SYMBOL(__cleancache_flush_inode);
  184 +
  185 +/*
  186 + * Called by any cleancache-enabled filesystem at time of unmount;
  187 + * note that pool_id is surrendered and may be reutrned by a subsequent
  188 + * cleancache_init_fs or cleancache_init_shared_fs
  189 + */
  190 +void __cleancache_flush_fs(struct super_block *sb)
  191 +{
  192 + if (sb->cleancache_poolid >= 0) {
  193 + int old_poolid = sb->cleancache_poolid;
  194 + sb->cleancache_poolid = -1;
  195 + (*cleancache_ops.flush_fs)(old_poolid);
  196 + }
  197 +}
  198 +EXPORT_SYMBOL(__cleancache_flush_fs);
  199 +
  200 +#ifdef CONFIG_SYSFS
  201 +
  202 +/* see Documentation/ABI/xxx/sysfs-kernel-mm-cleancache */
  203 +
  204 +#define CLEANCACHE_SYSFS_RO(_name) \
  205 + static ssize_t cleancache_##_name##_show(struct kobject *kobj, \
  206 + struct kobj_attribute *attr, char *buf) \
  207 + { \
  208 + return sprintf(buf, "%lu\n", cleancache_##_name); \
  209 + } \
  210 + static struct kobj_attribute cleancache_##_name##_attr = { \
  211 + .attr = { .name = __stringify(_name), .mode = 0444 }, \
  212 + .show = cleancache_##_name##_show, \
  213 + }
  214 +
  215 +CLEANCACHE_SYSFS_RO(succ_gets);
  216 +CLEANCACHE_SYSFS_RO(failed_gets);
  217 +CLEANCACHE_SYSFS_RO(puts);
  218 +CLEANCACHE_SYSFS_RO(flushes);
  219 +
  220 +static struct attribute *cleancache_attrs[] = {
  221 + &cleancache_succ_gets_attr.attr,
  222 + &cleancache_failed_gets_attr.attr,
  223 + &cleancache_puts_attr.attr,
  224 + &cleancache_flushes_attr.attr,
  225 + NULL,
  226 +};
  227 +
  228 +static struct attribute_group cleancache_attr_group = {
  229 + .attrs = cleancache_attrs,
  230 + .name = "cleancache",
  231 +};
  232 +
  233 +#endif /* CONFIG_SYSFS */
  234 +
  235 +static int __init init_cleancache(void)
  236 +{
  237 +#ifdef CONFIG_SYSFS
  238 + int err;
  239 +
  240 + err = sysfs_create_group(mm_kobj, &cleancache_attr_group);
  241 +#endif /* CONFIG_SYSFS */
  242 + return 0;
  243 +}
  244 +module_init(init_cleancache)

0 comments on commit 077b1f8

Please sign in to comment.
Something went wrong with that request. Please try again.