Skip to content

Commit

Permalink
mm: drop tail page refcounting
Browse files Browse the repository at this point in the history
Tail page refcounting is utterly complicated and painful to support.

It uses ->_mapcount on tail pages to store how many times this page is
pinned.  get_page() bumps ->_mapcount on tail page in addition to
->_count on head.  This information is required by split_huge_page() to
be able to distribute pins from head of compound page to tails during
the split.

We will need ->_mapcount to account PTE mappings of subpages of the
compound page.  We eliminate need in current meaning of ->_mapcount in
tail pages by forbidding split entirely if the page is pinned.

The only user of tail page refcounting is THP which is marked BROKEN for
now.

Let's drop all this mess.  It makes get_page() and put_page() much
simpler.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: Sasha Levin <sasha.levin@oracle.com>
Tested-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Jerome Marchand <jmarchan@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Steve Capper <steve.capper@linaro.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
kiryl authored and torvalds committed Jan 16, 2016
1 parent ad0bed2 commit ddc58f2
Show file tree
Hide file tree
Showing 12 changed files with 40 additions and 466 deletions.
4 changes: 0 additions & 4 deletions arch/mips/mm/gup.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,6 @@ static int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end,
do {
VM_BUG_ON(compound_head(page) != head);
pages[*nr] = page;
if (PageTail(page))
get_huge_page_tail(page);
(*nr)++;
page++;
refs++;
Expand Down Expand Up @@ -153,8 +151,6 @@ static int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end,
do {
VM_BUG_ON(compound_head(page) != head);
pages[*nr] = page;
if (PageTail(page))
get_huge_page_tail(page);
(*nr)++;
page++;
refs++;
Expand Down
13 changes: 1 addition & 12 deletions arch/powerpc/mm/hugetlbpage.c
Original file line number Diff line number Diff line change
Expand Up @@ -999,7 +999,7 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
{
unsigned long mask;
unsigned long pte_end;
struct page *head, *page, *tail;
struct page *head, *page;
pte_t pte;
int refs;

Expand All @@ -1022,7 +1022,6 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
head = pte_page(pte);

page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
tail = page;
do {
VM_BUG_ON(compound_head(page) != head);
pages[*nr] = page;
Expand All @@ -1044,15 +1043,5 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
return 0;
}

/*
* Any tail page need their mapcount reference taken before we
* return.
*/
while (refs--) {
if (PageTail(tail))
get_huge_page_tail(tail);
tail++;
}

return 1;
}
13 changes: 1 addition & 12 deletions arch/s390/mm/gup.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
unsigned long mask, result;
struct page *head, *page, *tail;
struct page *head, *page;
int refs;

result = write ? 0 : _SEGMENT_ENTRY_PROTECT;
Expand All @@ -67,7 +67,6 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
refs = 0;
head = pmd_page(pmd);
page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
tail = page;
do {
VM_BUG_ON(compound_head(page) != head);
pages[*nr] = page;
Expand All @@ -88,16 +87,6 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
return 0;
}

/*
* Any tail page need their mapcount reference taken before we
* return.
*/
while (refs--) {
if (PageTail(tail))
get_huge_page_tail(tail);
tail++;
}

return 1;
}

Expand Down
14 changes: 1 addition & 13 deletions arch/sparc/mm/gup.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,6 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
put_page(head);
return 0;
}
if (head != page)
get_huge_page_tail(page);

pages[*nr] = page;
(*nr)++;
Expand All @@ -70,7 +68,7 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
unsigned long end, int write, struct page **pages,
int *nr)
{
struct page *head, *page, *tail;
struct page *head, *page;
int refs;

if (!(pmd_val(pmd) & _PAGE_VALID))
Expand All @@ -82,7 +80,6 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
refs = 0;
head = pmd_page(pmd);
page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
tail = page;
do {
VM_BUG_ON(compound_head(page) != head);
pages[*nr] = page;
Expand All @@ -103,15 +100,6 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
return 0;
}

/* Any tail page need their mapcount reference taken before we
* return.
*/
while (refs--) {
if (PageTail(tail))
get_huge_page_tail(tail);
tail++;
}

return 1;
}

Expand Down
4 changes: 0 additions & 4 deletions arch/x86/mm/gup.c
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,6 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
do {
VM_BUG_ON_PAGE(compound_head(page) != head, page);
pages[*nr] = page;
if (PageTail(page))
get_huge_page_tail(page);
(*nr)++;
page++;
refs++;
Expand Down Expand Up @@ -212,8 +210,6 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
do {
VM_BUG_ON_PAGE(compound_head(page) != head, page);
pages[*nr] = page;
if (PageTail(page))
get_huge_page_tail(page);
(*nr)++;
page++;
refs++;
Expand Down
47 changes: 10 additions & 37 deletions include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -466,44 +466,9 @@ static inline int page_count(struct page *page)
return atomic_read(&compound_head(page)->_count);
}

static inline bool __compound_tail_refcounted(struct page *page)
{
return PageAnon(page) && !PageSlab(page) && !PageHeadHuge(page);
}

/*
* This takes a head page as parameter and tells if the
* tail page reference counting can be skipped.
*
* For this to be safe, PageSlab and PageHeadHuge must remain true on
* any given page where they return true here, until all tail pins
* have been released.
*/
static inline bool compound_tail_refcounted(struct page *page)
{
VM_BUG_ON_PAGE(!PageHead(page), page);
return __compound_tail_refcounted(page);
}

static inline void get_huge_page_tail(struct page *page)
{
/*
* __split_huge_page_refcount() cannot run from under us.
*/
VM_BUG_ON_PAGE(!PageTail(page), page);
VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page);
if (compound_tail_refcounted(compound_head(page)))
atomic_inc(&page->_mapcount);
}

extern bool __get_page_tail(struct page *page);

static inline void get_page(struct page *page)
{
if (unlikely(PageTail(page)))
if (likely(__get_page_tail(page)))
return;
page = compound_head(page);
/*
* Getting a normal page or the head of a compound page
* requires to already have an elevated page->_count.
Expand All @@ -528,7 +493,15 @@ static inline void init_page_count(struct page *page)
atomic_set(&page->_count, 1);
}

void put_page(struct page *page);
void __put_page(struct page *page);

static inline void put_page(struct page *page)
{
page = compound_head(page);
if (put_page_testzero(page))
__put_page(page);
}

void put_pages_list(struct list_head *pages);

void split_page(struct page *page, unsigned int order);
Expand Down
17 changes: 3 additions & 14 deletions include/linux/mm_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,20 +81,9 @@ struct page {

union {
/*
* Count of ptes mapped in
* mms, to show when page is
* mapped & limit reverse map
* searches.
*
* Used also for tail pages
* refcounting instead of
* _count. Tail pages cannot
* be mapped and keeping the
* tail page _count zero at
* all times guarantees
* get_page_unless_zero() will
* never succeed on tail
* pages.
* Count of ptes mapped in mms, to show
* when page is mapped & limit reverse
* map searches.
*/
atomic_t _mapcount;

Expand Down
34 changes: 4 additions & 30 deletions mm/gup.c
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
}

if (flags & FOLL_GET)
get_page_foll(page);
get_page(page);
if (flags & FOLL_TOUCH) {
if ((flags & FOLL_WRITE) &&
!pte_dirty(pte) && !PageDirty(page))
Expand Down Expand Up @@ -1153,7 +1153,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
struct page *head, *page, *tail;
struct page *head, *page;
int refs;

if (write && !pmd_write(orig))
Expand All @@ -1162,7 +1162,6 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
refs = 0;
head = pmd_page(orig);
page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
tail = page;
do {
VM_BUG_ON_PAGE(compound_head(page) != head, page);
pages[*nr] = page;
Expand All @@ -1183,24 +1182,13 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
return 0;
}

/*
* Any tail pages need their mapcount reference taken before we
* return. (This allows the THP code to bump their ref count when
* they are split into base pages).
*/
while (refs--) {
if (PageTail(tail))
get_huge_page_tail(tail);
tail++;
}

return 1;
}

static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
struct page *head, *page, *tail;
struct page *head, *page;
int refs;

if (write && !pud_write(orig))
Expand All @@ -1209,7 +1197,6 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
refs = 0;
head = pud_page(orig);
page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
tail = page;
do {
VM_BUG_ON_PAGE(compound_head(page) != head, page);
pages[*nr] = page;
Expand All @@ -1230,12 +1217,6 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
return 0;
}

while (refs--) {
if (PageTail(tail))
get_huge_page_tail(tail);
tail++;
}

return 1;
}

Expand All @@ -1244,15 +1225,14 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
struct page **pages, int *nr)
{
int refs;
struct page *head, *page, *tail;
struct page *head, *page;

if (write && !pgd_write(orig))
return 0;

refs = 0;
head = pgd_page(orig);
page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
tail = page;
do {
VM_BUG_ON_PAGE(compound_head(page) != head, page);
pages[*nr] = page;
Expand All @@ -1273,12 +1253,6 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
return 0;
}

while (refs--) {
if (PageTail(tail))
get_huge_page_tail(tail);
tail++;
}

return 1;
}

Expand Down
Loading

0 comments on commit ddc58f2

Please sign in to comment.