Skip to content

Commit

Permalink
Merge branch 'page-refs' (page ref overflow)
Browse files Browse the repository at this point in the history
Merge page ref overflow branch.

Jann Horn reported that he can overflow the page ref count with
sufficient memory (and a filesystem that is intentionally extremely
slow).

Admittedly it's not exactly easy.  To have more than four billion
references to a page requires a minimum of 32GB of kernel memory just
for the pointers to the pages, much less any metadata to keep track of
those pointers.  Jann needed a total of 140GB of memory and a specially
crafted filesystem that leaves all reads pending (in order to not ever
free the page references and just keep adding more).

Still, we have a fairly straightforward way to limit the two obvious
user-controllable sources of page references: direct-IO like page
references gotten through get_user_pages(), and the splice pipe page
duplication.  So let's just do that.

* branch page-refs:
  fs: prevent page refcount overflow in pipe_buf_get
  mm: prevent get_user_pages() from overflowing page refcount
  mm: add 'try_get_page()' helper function
  mm: make page ref count overflow check tighter and more explicit
  • Loading branch information
torvalds committed Apr 14, 2019
2 parents 4443f8e + 15fab63 commit 6b3a707
Show file tree
Hide file tree
Showing 8 changed files with 92 additions and 28 deletions.
12 changes: 6 additions & 6 deletions fs/fuse/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -2056,10 +2056,8 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;

ret = -EINVAL;
if (rem < len) {
pipe_unlock(pipe);
goto out;
}
if (rem < len)
goto out_free;

rem = len;
while (rem) {
Expand All @@ -2077,7 +2075,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
pipe->nrbufs--;
} else {
pipe_buf_get(pipe, ibuf);
if (!pipe_buf_get(pipe, ibuf))
goto out_free;

*obuf = *ibuf;
obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
obuf->len = rem;
Expand All @@ -2100,11 +2100,11 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
ret = fuse_dev_do_write(fud, &cs, len);

pipe_lock(pipe);
out_free:
for (idx = 0; idx < nbuf; idx++)
pipe_buf_release(pipe, &bufs[idx]);
pipe_unlock(pipe);

out:
kvfree(bufs);
return ret;
}
Expand Down
4 changes: 2 additions & 2 deletions fs/pipe.c
Original file line number Diff line number Diff line change
Expand Up @@ -188,9 +188,9 @@ EXPORT_SYMBOL(generic_pipe_buf_steal);
* in the tee() system call, when we duplicate the buffers in one
* pipe into another.
*/
void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
{
get_page(buf->page);
return try_get_page(buf->page);
}
EXPORT_SYMBOL(generic_pipe_buf_get);

Expand Down
12 changes: 10 additions & 2 deletions fs/splice.c
Original file line number Diff line number Diff line change
Expand Up @@ -1593,7 +1593,11 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
* Get a reference to this pipe buffer,
* so we can copy the contents over.
*/
pipe_buf_get(ipipe, ibuf);
if (!pipe_buf_get(ipipe, ibuf)) {
if (ret == 0)
ret = -EFAULT;
break;
}
*obuf = *ibuf;

/*
Expand Down Expand Up @@ -1667,7 +1671,11 @@ static int link_pipe(struct pipe_inode_info *ipipe,
* Get a reference to this pipe buffer,
* so we can copy the contents over.
*/
pipe_buf_get(ipipe, ibuf);
if (!pipe_buf_get(ipipe, ibuf)) {
if (ret == 0)
ret = -EFAULT;
break;
}

obuf = opipe->bufs + nbuf;
*obuf = *ibuf;
Expand Down
15 changes: 14 additions & 1 deletion include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -966,15 +966,28 @@ static inline bool is_pci_p2pdma_page(const struct page *page)
}
#endif /* CONFIG_DEV_PAGEMAP_OPS */

/* 127: arbitrary random number, small enough to assemble well */
#define page_ref_zero_or_close_to_overflow(page) \
((unsigned int) page_ref_count(page) + 127u <= 127u)

static inline void get_page(struct page *page)
{
page = compound_head(page);
/*
* Getting a normal page or the head of a compound page
* requires to already have an elevated page->_refcount.
*/
VM_BUG_ON_PAGE(page_ref_count(page) <= 0, page);
VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page);
page_ref_inc(page);
}

static inline __must_check bool try_get_page(struct page *page)
{
page = compound_head(page);
if (WARN_ON_ONCE(page_ref_count(page) <= 0))
return false;
page_ref_inc(page);
return true;
}

static inline void put_page(struct page *page)
Expand Down
10 changes: 6 additions & 4 deletions include/linux/pipe_fs_i.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,18 +101,20 @@ struct pipe_buf_operations {
/*
* Get a reference to the pipe buffer.
*/
void (*get)(struct pipe_inode_info *, struct pipe_buffer *);
bool (*get)(struct pipe_inode_info *, struct pipe_buffer *);
};

/**
* pipe_buf_get - get a reference to a pipe_buffer
* @pipe: the pipe that the buffer belongs to
* @buf: the buffer to get a reference to
*
* Return: %true if the reference was successfully obtained.
*/
static inline void pipe_buf_get(struct pipe_inode_info *pipe,
static inline __must_check bool pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
buf->ops->get(pipe, buf);
return buf->ops->get(pipe, buf);
}

/**
Expand Down Expand Up @@ -171,7 +173,7 @@ struct pipe_inode_info *alloc_pipe_info(void);
void free_pipe_info(struct pipe_inode_info *);

/* Generic pipe buffer ops functions */
void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
bool generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
Expand Down
6 changes: 5 additions & 1 deletion kernel/trace/trace.c
Original file line number Diff line number Diff line change
Expand Up @@ -7041,12 +7041,16 @@ static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
buf->private = 0;
}

static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
struct buffer_ref *ref = (struct buffer_ref *)buf->private;

if (ref->ref > INT_MAX/2)
return false;

ref->ref++;
return true;
}

/* Pipe buffer operations for a buffer. */
Expand Down
48 changes: 36 additions & 12 deletions mm/gup.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,12 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
goto retry;
}

if (flags & FOLL_GET)
get_page(page);
if (flags & FOLL_GET) {
if (unlikely(!try_get_page(page))) {
page = ERR_PTR(-ENOMEM);
goto out;
}
}
if (flags & FOLL_TOUCH) {
if ((flags & FOLL_WRITE) &&
!pte_dirty(pte) && !PageDirty(page))
Expand Down Expand Up @@ -298,7 +302,10 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
if (pmd_trans_unstable(pmd))
ret = -EBUSY;
} else {
get_page(page);
if (unlikely(!try_get_page(page))) {
spin_unlock(ptl);
return ERR_PTR(-ENOMEM);
}
spin_unlock(ptl);
lock_page(page);
ret = split_huge_page(page);
Expand Down Expand Up @@ -500,7 +507,10 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
if (is_device_public_page(*page))
goto unmap;
}
get_page(*page);
if (unlikely(!try_get_page(*page))) {
ret = -ENOMEM;
goto unmap;
}
out:
ret = 0;
unmap:
Expand Down Expand Up @@ -1545,6 +1555,20 @@ static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
}
}

/*
* Return the compund head page with ref appropriately incremented,
* or NULL if that failed.
*/
static inline struct page *try_get_compound_head(struct page *page, int refs)
{
struct page *head = compound_head(page);
if (WARN_ON_ONCE(page_ref_count(head) < 0))
return NULL;
if (unlikely(!page_cache_add_speculative(head, refs)))
return NULL;
return head;
}

#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
Expand Down Expand Up @@ -1579,9 +1603,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,

VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
page = pte_page(pte);
head = compound_head(page);

if (!page_cache_get_speculative(head))
head = try_get_compound_head(page, 1);
if (!head)
goto pte_unmap;

if (unlikely(pte_val(pte) != pte_val(*ptep))) {
Expand Down Expand Up @@ -1720,8 +1744,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
refs++;
} while (addr += PAGE_SIZE, addr != end);

head = compound_head(pmd_page(orig));
if (!page_cache_add_speculative(head, refs)) {
head = try_get_compound_head(pmd_page(orig), refs);
if (!head) {
*nr -= refs;
return 0;
}
Expand Down Expand Up @@ -1758,8 +1782,8 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
refs++;
} while (addr += PAGE_SIZE, addr != end);

head = compound_head(pud_page(orig));
if (!page_cache_add_speculative(head, refs)) {
head = try_get_compound_head(pud_page(orig), refs);
if (!head) {
*nr -= refs;
return 0;
}
Expand Down Expand Up @@ -1795,8 +1819,8 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
refs++;
} while (addr += PAGE_SIZE, addr != end);

head = compound_head(pgd_page(orig));
if (!page_cache_add_speculative(head, refs)) {
head = try_get_compound_head(pgd_page(orig), refs);
if (!head) {
*nr -= refs;
return 0;
}
Expand Down
13 changes: 13 additions & 0 deletions mm/hugetlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -4299,6 +4299,19 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,

pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT;
page = pte_page(huge_ptep_get(pte));

/*
* Instead of doing 'try_get_page()' below in the same_page
* loop, just check the count once here.
*/
if (unlikely(page_count(page) <= 0)) {
if (pages) {
spin_unlock(ptl);
remainder = 0;
err = -ENOMEM;
break;
}
}
same_page:
if (pages) {
pages[i] = mem_map_offset(page, pfn_offset);
Expand Down

0 comments on commit 6b3a707

Please sign in to comment.