Skip to content

Commit

Permalink
mm: embed the memcg pointer directly into struct page
Browse files Browse the repository at this point in the history
Memory cgroups used to have 5 per-page pointers.  To allow users to
disable that amount of overhead during runtime, those pointers were
allocated in a separate array, with a translation layer between them and
struct page.

There is now only one page pointer remaining: the memcg pointer, that
indicates which cgroup the page is associated with when charged.  The
complexity of runtime allocation and the runtime translation overhead is
no longer justified to save that *potential* 0.19% of memory.  With
CONFIG_SLUB, page->mem_cgroup actually sits in the doubleword padding
after the page->private member and doesn't even increase struct page,
and then this patch actually saves space.  Remaining users that care can
still compile their kernels without CONFIG_MEMCG.

     text    data     bss     dec     hex     filename
  8828345 1725264  983040 11536649 b00909  vmlinux.old
  8827425 1725264  966656 11519345 afc571  vmlinux.new

[mhocko@suse.cz: update Documentation/cgroups/memory.txt]
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Konstantin Khlebnikov <koct9i@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
hnaz authored and torvalds committed Dec 11, 2014
1 parent 22811c6 commit 1306a85
Show file tree
Hide file tree
Showing 9 changed files with 46 additions and 487 deletions.
5 changes: 5 additions & 0 deletions Documentation/cgroups/memory.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
Memory Resource Controller

NOTE: This document is hopelessly outdated and it asks for a complete
rewrite. It still contains a useful information so we are keeping it
here but make sure to check the current code if you need a deeper
understanding.

NOTE: The Memory Resource Controller has generically been referred to as the
memory controller in this document. Do not confuse memory controller
used here with the memory controller that is used in hardware.
Expand Down
6 changes: 1 addition & 5 deletions include/linux/memcontrol.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
#include <linux/jump_label.h>

struct mem_cgroup;
struct page_cgroup;
struct page;
struct mm_struct;
struct kmem_cache;
Expand Down Expand Up @@ -466,8 +465,6 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
* memcg_kmem_uncharge_pages: uncharge pages from memcg
* @page: pointer to struct page being freed
* @order: allocation order.
*
* there is no need to specify memcg here, since it is embedded in page_cgroup
*/
static inline void
memcg_kmem_uncharge_pages(struct page *page, int order)
Expand All @@ -484,8 +481,7 @@ memcg_kmem_uncharge_pages(struct page *page, int order)
*
* Needs to be called after memcg_kmem_newpage_charge, regardless of success or
* failure of the allocation. if @page is NULL, this function will revert the
* charges. Otherwise, it will commit the memcg given by @memcg to the
* corresponding page_cgroup.
* charges. Otherwise, it will commit @page to @memcg.
*/
static inline void
memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
Expand Down
5 changes: 5 additions & 0 deletions include/linux/mm_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1))

struct address_space;
struct mem_cgroup;

#define USE_SPLIT_PTE_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
#define USE_SPLIT_PMD_PTLOCKS (USE_SPLIT_PTE_PTLOCKS && \
Expand Down Expand Up @@ -167,6 +168,10 @@ struct page {
struct page *first_page; /* Compound tail pages */
};

#ifdef CONFIG_MEMCG
struct mem_cgroup *mem_cgroup;
#endif

/*
* On machines where all RAM is mapped into kernel address space,
* we can simply calculate the virtual address. On machines with
Expand Down
12 changes: 0 additions & 12 deletions include/linux/mmzone.h
Original file line number Diff line number Diff line change
Expand Up @@ -722,9 +722,6 @@ typedef struct pglist_data {
int nr_zones;
#ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */
struct page *node_mem_map;
#ifdef CONFIG_MEMCG
struct page_cgroup *node_page_cgroup;
#endif
#endif
#ifndef CONFIG_NO_BOOTMEM
struct bootmem_data *bdata;
Expand Down Expand Up @@ -1078,7 +1075,6 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
#define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK)

struct page;
struct page_cgroup;
struct mem_section {
/*
* This is, logically, a pointer to an array of struct
Expand All @@ -1096,14 +1092,6 @@ struct mem_section {

/* See declaration of similar field in struct zone */
unsigned long *pageblock_flags;
#ifdef CONFIG_MEMCG
/*
* If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use
* section. (see memcontrol.h/page_cgroup.h about this.)
*/
struct page_cgroup *page_cgroup;
unsigned long pad;
#endif
/*
* WARNING: mem_section must be a power-of-2 in size for the
* calculation and use of SECTION_ROOT_MASK to make sense.
Expand Down
53 changes: 0 additions & 53 deletions include/linux/page_cgroup.h
Original file line number Diff line number Diff line change
@@ -1,59 +1,6 @@
#ifndef __LINUX_PAGE_CGROUP_H
#define __LINUX_PAGE_CGROUP_H

struct pglist_data;

#ifdef CONFIG_MEMCG
struct mem_cgroup;

/*
* Page Cgroup can be considered as an extended mem_map.
* A page_cgroup page is associated with every page descriptor. The
* page_cgroup helps us identify information about the cgroup
* All page cgroups are allocated at boot or memory hotplug event,
* then the page cgroup for pfn always exists.
*/
struct page_cgroup {
struct mem_cgroup *mem_cgroup;
};

extern void pgdat_page_cgroup_init(struct pglist_data *pgdat);

#ifdef CONFIG_SPARSEMEM
static inline void page_cgroup_init_flatmem(void)
{
}
extern void page_cgroup_init(void);
#else
extern void page_cgroup_init_flatmem(void);
static inline void page_cgroup_init(void)
{
}
#endif

struct page_cgroup *lookup_page_cgroup(struct page *page);

#else /* !CONFIG_MEMCG */
struct page_cgroup;

static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat)
{
}

static inline struct page_cgroup *lookup_page_cgroup(struct page *page)
{
return NULL;
}

static inline void page_cgroup_init(void)
{
}

static inline void page_cgroup_init_flatmem(void)
{
}
#endif /* CONFIG_MEMCG */

#include <linux/swap.h>

#ifdef CONFIG_MEMCG_SWAP
Expand Down
7 changes: 0 additions & 7 deletions init/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@
#include <linux/mempolicy.h>
#include <linux/key.h>
#include <linux/buffer_head.h>
#include <linux/page_cgroup.h>
#include <linux/debug_locks.h>
#include <linux/debugobjects.h>
#include <linux/lockdep.h>
Expand Down Expand Up @@ -485,11 +484,6 @@ void __init __weak thread_info_cache_init(void)
*/
static void __init mm_init(void)
{
/*
* page_cgroup requires contiguous pages,
* bigger than MAX_ORDER unless SPARSEMEM.
*/
page_cgroup_init_flatmem();
mem_init();
kmem_cache_init();
percpu_init_late();
Expand Down Expand Up @@ -627,7 +621,6 @@ asmlinkage __visible void __init start_kernel(void)
initrd_start = 0;
}
#endif
page_cgroup_init();
debug_objects_mem_init();
kmemleak_init();
setup_per_cpu_pageset();
Expand Down

0 comments on commit 1306a85

Please sign in to comment.