Skip to content

Commit

Permalink
thp: allocate memory in khugepaged outside of mmap_sem write mode
Browse files Browse the repository at this point in the history
This tries to be more friendly to filesystem in userland, with userland
backends that allocate memory in the I/O paths and that could deadlock if
khugepaged holds the mmap_sem write mode of the userland backend while
allocating memory.  Memory allocation may wait for writeback I/O
completion from the daemon that may be blocked in the mmap_sem read mode
if a page fault happens and the daemon wasn't using mlock for the memory
required for the I/O submission and completion.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
aagit authored and torvalds committed Jan 14, 2011
1 parent 0bbbc0b commit ce83d21
Showing 1 changed file with 34 additions and 22 deletions.
56 changes: 34 additions & 22 deletions mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -1664,9 +1664,9 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,

static void collapse_huge_page(struct mm_struct *mm,
unsigned long address,
struct page **hpage)
struct page **hpage,
struct vm_area_struct *vma)
{
struct vm_area_struct *vma;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd, _pmd;
Expand All @@ -1680,9 +1680,34 @@ static void collapse_huge_page(struct mm_struct *mm,
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
#ifndef CONFIG_NUMA
VM_BUG_ON(!*hpage);
new_page = *hpage;
#else
VM_BUG_ON(*hpage);
/*
* Allocate the page while the vma is still valid and under
* the mmap_sem read mode so there is no memory allocation
* later when we take the mmap_sem in write mode. This is more
* friendly behavior (OTOH it may actually hide bugs) to
* filesystems in userland with daemons allocating memory in
* the userland I/O paths. Allocating memory with the
* mmap_sem in read mode is good idea also to allow greater
* scalability.
*/
new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address);
if (unlikely(!new_page)) {
up_read(&mm->mmap_sem);
*hpage = ERR_PTR(-ENOMEM);
return;
}
#endif
if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
up_read(&mm->mmap_sem);
put_page(new_page);
return;
}

/* after allocating the hugepage upgrade to mmap_sem write mode */
up_read(&mm->mmap_sem);

/*
* Prevent all access to pagetables with the exception of
Expand Down Expand Up @@ -1720,18 +1745,6 @@ static void collapse_huge_page(struct mm_struct *mm,
if (!pmd_present(*pmd) || pmd_trans_huge(*pmd))
goto out;

#ifndef CONFIG_NUMA
new_page = *hpage;
#else
new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address);
if (unlikely(!new_page)) {
*hpage = ERR_PTR(-ENOMEM);
goto out;
}
#endif
if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)))
goto out_put_page;

anon_vma_lock(vma->anon_vma);

pte = pte_offset_map(pmd, address);
Expand Down Expand Up @@ -1759,7 +1772,7 @@ static void collapse_huge_page(struct mm_struct *mm,
spin_unlock(&mm->page_table_lock);
anon_vma_unlock(vma->anon_vma);
mem_cgroup_uncharge_page(new_page);
goto out_put_page;
goto out;
}

/*
Expand Down Expand Up @@ -1798,15 +1811,15 @@ static void collapse_huge_page(struct mm_struct *mm,
*hpage = NULL;
#endif
khugepaged_pages_collapsed++;
out:
out_up_write:
up_write(&mm->mmap_sem);
return;

out_put_page:
out:
#ifdef CONFIG_NUMA
put_page(new_page);
#endif
goto out;
goto out_up_write;
}

static int khugepaged_scan_pmd(struct mm_struct *mm,
Expand Down Expand Up @@ -1865,10 +1878,9 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
ret = 1;
out_unmap:
pte_unmap_unlock(pte, ptl);
if (ret) {
up_read(&mm->mmap_sem);
collapse_huge_page(mm, address, hpage);
}
if (ret)
/* collapse_huge_page will return with the mmap_sem released */
collapse_huge_page(mm, address, hpage, vma);
out:
return ret;
}
Expand Down

0 comments on commit ce83d21

Please sign in to comment.