Skip to content

Commit

Permalink
vmscan: split LRU lists into anon & file sets
Browse files Browse the repository at this point in the history
Split the LRU lists in two, one set for pages that are backed by real file
systems ("file") and one for pages that are backed by memory and swap
("anon").  The latter includes tmpfs.

The advantage of doing this is that the VM will not have to scan over lots
of anonymous pages (which we generally do not want to swap out), just to
find the page cache pages that it should evict.

This patch has the infrastructure and a basic policy to balance how much
we scan the anon lists and how much we scan the file lists.  The big
policy changes are in separate patches.

[lee.schermerhorn@hp.com: collect lru meminfo statistics from correct offset]
[kosaki.motohiro@jp.fujitsu.com: prevent incorrect oom under split_lru]
[kosaki.motohiro@jp.fujitsu.com: fix pagevec_move_tail() doesn't treat unevictable page]
[hugh@veritas.com: memcg swapbacked pages active]
[hugh@veritas.com: splitlru: BDI_CAP_SWAP_BACKED]
[akpm@linux-foundation.org: fix /proc/vmstat units]
[nishimura@mxp.nes.nec.co.jp: memcg: fix handling of shmem migration]
[kosaki.motohiro@jp.fujitsu.com: adjust Quicklists field of /proc/meminfo]
[kosaki.motohiro@jp.fujitsu.com: fix style issue of get_scan_ratio()]
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
Rik van Riel authored and torvalds committed Oct 20, 2008
1 parent b2e1853 commit 4f98a2f
Show file tree
Hide file tree
Showing 25 changed files with 562 additions and 367 deletions.
56 changes: 33 additions & 23 deletions drivers/base/node.c
Expand Up @@ -61,34 +61,44 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
si_meminfo_node(&i, nid);

n = sprintf(buf, "\n"
"Node %d MemTotal: %8lu kB\n"
"Node %d MemFree: %8lu kB\n"
"Node %d MemUsed: %8lu kB\n"
"Node %d Active: %8lu kB\n"
"Node %d Inactive: %8lu kB\n"
"Node %d MemTotal: %8lu kB\n"
"Node %d MemFree: %8lu kB\n"
"Node %d MemUsed: %8lu kB\n"
"Node %d Active: %8lu kB\n"
"Node %d Inactive: %8lu kB\n"
"Node %d Active(anon): %8lu kB\n"
"Node %d Inactive(anon): %8lu kB\n"
"Node %d Active(file): %8lu kB\n"
"Node %d Inactive(file): %8lu kB\n"
#ifdef CONFIG_HIGHMEM
"Node %d HighTotal: %8lu kB\n"
"Node %d HighFree: %8lu kB\n"
"Node %d LowTotal: %8lu kB\n"
"Node %d LowFree: %8lu kB\n"
"Node %d HighTotal: %8lu kB\n"
"Node %d HighFree: %8lu kB\n"
"Node %d LowTotal: %8lu kB\n"
"Node %d LowFree: %8lu kB\n"
#endif
"Node %d Dirty: %8lu kB\n"
"Node %d Writeback: %8lu kB\n"
"Node %d FilePages: %8lu kB\n"
"Node %d Mapped: %8lu kB\n"
"Node %d AnonPages: %8lu kB\n"
"Node %d PageTables: %8lu kB\n"
"Node %d NFS_Unstable: %8lu kB\n"
"Node %d Bounce: %8lu kB\n"
"Node %d WritebackTmp: %8lu kB\n"
"Node %d Slab: %8lu kB\n"
"Node %d SReclaimable: %8lu kB\n"
"Node %d SUnreclaim: %8lu kB\n",
"Node %d Dirty: %8lu kB\n"
"Node %d Writeback: %8lu kB\n"
"Node %d FilePages: %8lu kB\n"
"Node %d Mapped: %8lu kB\n"
"Node %d AnonPages: %8lu kB\n"
"Node %d PageTables: %8lu kB\n"
"Node %d NFS_Unstable: %8lu kB\n"
"Node %d Bounce: %8lu kB\n"
"Node %d WritebackTmp: %8lu kB\n"
"Node %d Slab: %8lu kB\n"
"Node %d SReclaimable: %8lu kB\n"
"Node %d SUnreclaim: %8lu kB\n",
nid, K(i.totalram),
nid, K(i.freeram),
nid, K(i.totalram - i.freeram),
nid, K(node_page_state(nid, NR_ACTIVE)),
nid, K(node_page_state(nid, NR_INACTIVE)),
nid, K(node_page_state(nid, NR_ACTIVE_ANON) +
node_page_state(nid, NR_ACTIVE_FILE)),
nid, K(node_page_state(nid, NR_INACTIVE_ANON) +
node_page_state(nid, NR_INACTIVE_FILE)),
nid, K(node_page_state(nid, NR_ACTIVE_ANON)),
nid, K(node_page_state(nid, NR_INACTIVE_ANON)),
nid, K(node_page_state(nid, NR_ACTIVE_FILE)),
nid, K(node_page_state(nid, NR_INACTIVE_FILE)),
#ifdef CONFIG_HIGHMEM
nid, K(i.totalhigh),
nid, K(i.freehigh),
Expand Down
4 changes: 2 additions & 2 deletions fs/cifs/file.c
Expand Up @@ -1791,7 +1791,7 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
SetPageUptodate(page);
unlock_page(page);
if (!pagevec_add(plru_pvec, page))
__pagevec_lru_add(plru_pvec);
__pagevec_lru_add_file(plru_pvec);
data += PAGE_CACHE_SIZE;
}
return;
Expand Down Expand Up @@ -1925,7 +1925,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
bytes_read = 0;
}

pagevec_lru_add(&lru_pvec);
pagevec_lru_add_file(&lru_pvec);

/* need to free smb_read_data buf before exit */
if (smb_read_data) {
Expand Down
2 changes: 1 addition & 1 deletion fs/nfs/dir.c
Expand Up @@ -1517,7 +1517,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0,
GFP_KERNEL)) {
pagevec_add(&lru_pvec, page);
pagevec_lru_add(&lru_pvec);
pagevec_lru_add_file(&lru_pvec);
SetPageUptodate(page);
unlock_page(page);
} else
Expand Down
4 changes: 2 additions & 2 deletions fs/ntfs/file.c
Expand Up @@ -439,7 +439,7 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
pages[nr] = *cached_page;
page_cache_get(*cached_page);
if (unlikely(!pagevec_add(lru_pvec, *cached_page)))
__pagevec_lru_add(lru_pvec);
__pagevec_lru_add_file(lru_pvec);
*cached_page = NULL;
}
index++;
Expand Down Expand Up @@ -2084,7 +2084,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
OSYNC_METADATA|OSYNC_DATA);
}
}
pagevec_lru_add(&lru_pvec);
pagevec_lru_add_file(&lru_pvec);
ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
written ? "written" : "status", (unsigned long)written,
(long)status);
Expand Down
77 changes: 45 additions & 32 deletions fs/proc/proc_misc.c
Expand Up @@ -136,6 +136,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
unsigned long allowed;
struct vmalloc_info vmi;
long cached;
unsigned long pages[NR_LRU_LISTS];
int lru;

/*
* display in kilobytes.
Expand All @@ -154,51 +156,62 @@ static int meminfo_read_proc(char *page, char **start, off_t off,

get_vmalloc_info(&vmi);

for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
pages[lru] = global_page_state(NR_LRU_BASE + lru);

/*
* Tagged format, for easy grepping and expansion.
*/
len = sprintf(page,
"MemTotal: %8lu kB\n"
"MemFree: %8lu kB\n"
"Buffers: %8lu kB\n"
"Cached: %8lu kB\n"
"SwapCached: %8lu kB\n"
"Active: %8lu kB\n"
"Inactive: %8lu kB\n"
"MemTotal: %8lu kB\n"
"MemFree: %8lu kB\n"
"Buffers: %8lu kB\n"
"Cached: %8lu kB\n"
"SwapCached: %8lu kB\n"
"Active: %8lu kB\n"
"Inactive: %8lu kB\n"
"Active(anon): %8lu kB\n"
"Inactive(anon): %8lu kB\n"
"Active(file): %8lu kB\n"
"Inactive(file): %8lu kB\n"
#ifdef CONFIG_HIGHMEM
"HighTotal: %8lu kB\n"
"HighFree: %8lu kB\n"
"LowTotal: %8lu kB\n"
"LowFree: %8lu kB\n"
"HighTotal: %8lu kB\n"
"HighFree: %8lu kB\n"
"LowTotal: %8lu kB\n"
"LowFree: %8lu kB\n"
#endif
"SwapTotal: %8lu kB\n"
"SwapFree: %8lu kB\n"
"Dirty: %8lu kB\n"
"Writeback: %8lu kB\n"
"AnonPages: %8lu kB\n"
"Mapped: %8lu kB\n"
"Slab: %8lu kB\n"
"SReclaimable: %8lu kB\n"
"SUnreclaim: %8lu kB\n"
"PageTables: %8lu kB\n"
"SwapTotal: %8lu kB\n"
"SwapFree: %8lu kB\n"
"Dirty: %8lu kB\n"
"Writeback: %8lu kB\n"
"AnonPages: %8lu kB\n"
"Mapped: %8lu kB\n"
"Slab: %8lu kB\n"
"SReclaimable: %8lu kB\n"
"SUnreclaim: %8lu kB\n"
"PageTables: %8lu kB\n"
#ifdef CONFIG_QUICKLIST
"Quicklists: %8lu kB\n"
"Quicklists: %8lu kB\n"
#endif
"NFS_Unstable: %8lu kB\n"
"Bounce: %8lu kB\n"
"WritebackTmp: %8lu kB\n"
"CommitLimit: %8lu kB\n"
"Committed_AS: %8lu kB\n"
"VmallocTotal: %8lu kB\n"
"VmallocUsed: %8lu kB\n"
"VmallocChunk: %8lu kB\n",
"NFS_Unstable: %8lu kB\n"
"Bounce: %8lu kB\n"
"WritebackTmp: %8lu kB\n"
"CommitLimit: %8lu kB\n"
"Committed_AS: %8lu kB\n"
"VmallocTotal: %8lu kB\n"
"VmallocUsed: %8lu kB\n"
"VmallocChunk: %8lu kB\n",
K(i.totalram),
K(i.freeram),
K(i.bufferram),
K(cached),
K(total_swapcache_pages),
K(global_page_state(NR_ACTIVE)),
K(global_page_state(NR_INACTIVE)),
K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]),
K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]),
K(pages[LRU_ACTIVE_ANON]),
K(pages[LRU_INACTIVE_ANON]),
K(pages[LRU_ACTIVE_FILE]),
K(pages[LRU_INACTIVE_FILE]),
#ifdef CONFIG_HIGHMEM
K(i.totalhigh),
K(i.freehigh),
Expand Down
4 changes: 2 additions & 2 deletions fs/ramfs/file-nommu.c
Expand Up @@ -112,12 +112,12 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
goto add_error;

if (!pagevec_add(&lru_pvec, page))
__pagevec_lru_add(&lru_pvec);
__pagevec_lru_add_file(&lru_pvec);

unlock_page(page);
}

pagevec_lru_add(&lru_pvec);
pagevec_lru_add_file(&lru_pvec);
return 0;

fsize_exceeded:
Expand Down
13 changes: 13 additions & 0 deletions include/linux/backing-dev.h
Expand Up @@ -175,6 +175,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
* BDI_CAP_READ_MAP: Can be mapped for reading
* BDI_CAP_WRITE_MAP: Can be mapped for writing
* BDI_CAP_EXEC_MAP: Can be mapped for execution
*
* BDI_CAP_SWAP_BACKED: Count shmem/tmpfs objects as swap-backed.
*/
#define BDI_CAP_NO_ACCT_DIRTY 0x00000001
#define BDI_CAP_NO_WRITEBACK 0x00000002
Expand All @@ -184,6 +186,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
#define BDI_CAP_WRITE_MAP 0x00000020
#define BDI_CAP_EXEC_MAP 0x00000040
#define BDI_CAP_NO_ACCT_WB 0x00000080
#define BDI_CAP_SWAP_BACKED 0x00000100

#define BDI_CAP_VMFLAGS \
(BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP)
Expand Down Expand Up @@ -248,6 +251,11 @@ static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi)
BDI_CAP_NO_WRITEBACK));
}

static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi)
{
return bdi->capabilities & BDI_CAP_SWAP_BACKED;
}

static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
{
return bdi_cap_writeback_dirty(mapping->backing_dev_info);
Expand All @@ -258,4 +266,9 @@ static inline bool mapping_cap_account_dirty(struct address_space *mapping)
return bdi_cap_account_dirty(mapping->backing_dev_info);
}

static inline bool mapping_cap_swap_backed(struct address_space *mapping)
{
return bdi_cap_swap_backed(mapping->backing_dev_info);
}

#endif /* _LINUX_BACKING_DEV_H */
2 changes: 1 addition & 1 deletion include/linux/memcontrol.h
Expand Up @@ -44,7 +44,7 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
unsigned long *scanned, int order,
int mode, struct zone *z,
struct mem_cgroup *mem_cont,
int active);
int active, int file);
extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);

Expand Down
50 changes: 38 additions & 12 deletions include/linux/mm_inline.h
Expand Up @@ -5,7 +5,7 @@
* page_is_file_cache - should the page be on a file LRU or anon LRU?
* @page: the page to test
*
* Returns !0 if @page is page cache page backed by a regular filesystem,
* Returns LRU_FILE if @page is page cache page backed by a regular filesystem,
* or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed.
* Used by functions that manipulate the LRU lists, to sort a page
* onto the right LRU list.
Expand All @@ -20,7 +20,7 @@ static inline int page_is_file_cache(struct page *page)
return 0;

/* The page is page cache backed by a normal filesystem. */
return 1;
return LRU_FILE;
}

static inline void
Expand All @@ -38,39 +38,64 @@ del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
}

static inline void
add_page_to_active_list(struct zone *zone, struct page *page)
add_page_to_inactive_anon_list(struct zone *zone, struct page *page)
{
add_page_to_lru_list(zone, page, LRU_ACTIVE);
add_page_to_lru_list(zone, page, LRU_INACTIVE_ANON);
}

static inline void
add_page_to_inactive_list(struct zone *zone, struct page *page)
add_page_to_active_anon_list(struct zone *zone, struct page *page)
{
add_page_to_lru_list(zone, page, LRU_INACTIVE);
add_page_to_lru_list(zone, page, LRU_ACTIVE_ANON);
}

static inline void
del_page_from_active_list(struct zone *zone, struct page *page)
add_page_to_inactive_file_list(struct zone *zone, struct page *page)
{
del_page_from_lru_list(zone, page, LRU_ACTIVE);
add_page_to_lru_list(zone, page, LRU_INACTIVE_FILE);
}

static inline void
del_page_from_inactive_list(struct zone *zone, struct page *page)
add_page_to_active_file_list(struct zone *zone, struct page *page)
{
del_page_from_lru_list(zone, page, LRU_INACTIVE);
add_page_to_lru_list(zone, page, LRU_ACTIVE_FILE);
}

static inline void
del_page_from_inactive_anon_list(struct zone *zone, struct page *page)
{
del_page_from_lru_list(zone, page, LRU_INACTIVE_ANON);
}

static inline void
del_page_from_active_anon_list(struct zone *zone, struct page *page)
{
del_page_from_lru_list(zone, page, LRU_ACTIVE_ANON);
}

static inline void
del_page_from_inactive_file_list(struct zone *zone, struct page *page)
{
del_page_from_lru_list(zone, page, LRU_INACTIVE_FILE);
}

static inline void
del_page_from_active_file_list(struct zone *zone, struct page *page)
{
del_page_from_lru_list(zone, page, LRU_INACTIVE_FILE);
}

static inline void
del_page_from_lru(struct zone *zone, struct page *page)
{
enum lru_list l = LRU_INACTIVE;
enum lru_list l = LRU_BASE;

list_del(&page->lru);
if (PageActive(page)) {
__ClearPageActive(page);
l = LRU_ACTIVE;
l += LRU_ACTIVE;
}
l += page_is_file_cache(page);
__dec_zone_state(zone, NR_LRU_BASE + l);
}

Expand All @@ -87,6 +112,7 @@ static inline enum lru_list page_lru(struct page *page)

if (PageActive(page))
lru += LRU_ACTIVE;
lru += page_is_file_cache(page);

return lru;
}
Expand Down

0 comments on commit 4f98a2f

Please sign in to comment.