Skip to content
Permalink
Browse files

mm: Tighten x86 /dev/mem with zeroing reads

Under CONFIG_STRICT_DEVMEM, reading System RAM through /dev/mem is
disallowed. However, on x86, the first 1MB was always allowed for BIOS
and similar things, regardless of it actually being System RAM. It was
possible for heap to end up getting allocated in low 1MB RAM, and then
read by things like x86info or dd, which would trip hardened usercopy:

usercopy: kernel memory exposure attempt detected from ffff880000090000 (dma-kmalloc-256) (4096 bytes)

This changes the x86 exception for the low 1MB by reading back zeros for
System RAM areas instead of blindly allowing them. More work is needed to
extend this to mmap, but currently mmap doesn't go through usercopy, so
hardened usercopy won't Oops the kernel.

Reported-by: Tommi Rantala <tommi.t.rantala@nokia.com>
Tested-by: Tommi Rantala <tommi.t.rantala@nokia.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
  • Loading branch information...
kees committed Apr 5, 2017
1 parent b9b3322 commit a4866aa812518ed1a37d8ea0c881dc946409de94
Showing with 82 additions and 41 deletions.
  1. +30 −11 arch/x86/mm/init.c
  2. +52 −30 drivers/char/mem.c
@@ -643,21 +643,40 @@ void __init init_mem_mapping(void)
* devmem_is_allowed() checks to see if /dev/mem access to a certain address
* is valid. The argument is a physical page number.
*
*
* On x86, access has to be given to the first megabyte of ram because that area
* contains BIOS code and data regions used by X and dosemu and similar apps.
* Access has to be given to non-kernel-ram areas as well, these contain the PCI
* mmio resources as well as potential bios/acpi data regions.
* On x86, access has to be given to the first megabyte of RAM because that
* area traditionally contains BIOS code and data regions used by X, dosemu,
* and similar apps. Since they map the entire memory range, the whole range
* must be allowed (for mapping), but any areas that would otherwise be
* disallowed are flagged as being "zero filled" instead of rejected.
* Access has to be given to non-kernel-ram areas as well, these contain the
* PCI mmio resources as well as potential bios/acpi data regions.
*/
int devmem_is_allowed(unsigned long pagenr)
{
if (pagenr < 256)
return 1;
if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
if (page_is_ram(pagenr)) {
/*
* For disallowed memory regions in the low 1MB range,
* request that the page be shown as all zeros.
*/
if (pagenr < 256)
return 2;

return 0;
}

/*
* This must follow RAM test, since System RAM is considered a
* restricted resource under CONFIG_STRICT_IOMEM.
*/
if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) {
/* Low 1MB bypasses iomem restrictions. */
if (pagenr < 256)
return 1;

return 0;
if (!page_is_ram(pagenr))
return 1;
return 0;
}

return 1;
}

void free_init_pages(char *what, unsigned long begin, unsigned long end)
@@ -60,6 +60,10 @@ static inline int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
#endif

#ifdef CONFIG_STRICT_DEVMEM
static inline int page_is_allowed(unsigned long pfn)
{
return devmem_is_allowed(pfn);
}
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
{
u64 from = ((u64)pfn) << PAGE_SHIFT;
@@ -75,6 +79,10 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
return 1;
}
#else
static inline int page_is_allowed(unsigned long pfn)
{
return 1;
}
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
{
return 1;
@@ -122,23 +130,31 @@ static ssize_t read_mem(struct file *file, char __user *buf,

while (count > 0) {
unsigned long remaining;
int allowed;

sz = size_inside_page(p, count);

if (!range_is_allowed(p >> PAGE_SHIFT, count))
allowed = page_is_allowed(p >> PAGE_SHIFT);
if (!allowed)
return -EPERM;
if (allowed == 2) {
/* Show zeros for restricted memory. */
remaining = clear_user(buf, sz);
} else {
/*
* On ia64 if a page has been mapped somewhere as
* uncached, then it must also be accessed uncached
* by the kernel or data corruption may occur.
*/
ptr = xlate_dev_mem_ptr(p);
if (!ptr)
return -EFAULT;

/*
* On ia64 if a page has been mapped somewhere as uncached, then
* it must also be accessed uncached by the kernel or data
* corruption may occur.
*/
ptr = xlate_dev_mem_ptr(p);
if (!ptr)
return -EFAULT;
remaining = copy_to_user(buf, ptr, sz);

unxlate_dev_mem_ptr(p, ptr);
}

remaining = copy_to_user(buf, ptr, sz);
unxlate_dev_mem_ptr(p, ptr);
if (remaining)
return -EFAULT;

@@ -181,30 +197,36 @@ static ssize_t write_mem(struct file *file, const char __user *buf,
#endif

while (count > 0) {
int allowed;

sz = size_inside_page(p, count);

if (!range_is_allowed(p >> PAGE_SHIFT, sz))
allowed = page_is_allowed(p >> PAGE_SHIFT);
if (!allowed)
return -EPERM;

/*
* On ia64 if a page has been mapped somewhere as uncached, then
* it must also be accessed uncached by the kernel or data
* corruption may occur.
*/
ptr = xlate_dev_mem_ptr(p);
if (!ptr) {
if (written)
break;
return -EFAULT;
}
/* Skip actual writing when a page is marked as restricted. */
if (allowed == 1) {
/*
* On ia64 if a page has been mapped somewhere as
* uncached, then it must also be accessed uncached
* by the kernel or data corruption may occur.
*/
ptr = xlate_dev_mem_ptr(p);
if (!ptr) {
if (written)
break;
return -EFAULT;
}

copied = copy_from_user(ptr, buf, sz);
unxlate_dev_mem_ptr(p, ptr);
if (copied) {
written += sz - copied;
if (written)
break;
return -EFAULT;
copied = copy_from_user(ptr, buf, sz);
unxlate_dev_mem_ptr(p, ptr);
if (copied) {
written += sz - copied;
if (written)
break;
return -EFAULT;
}
}

buf += sz;

1 comment on commit a4866aa

@stsp

This comment has been minimized.

Copy link
Contributor

commented on a4866aa Sep 26, 2017

So if I understand correctly, this will read
back zeros for any page_is_ram() below
1Mb, leaving only the iomem regions
allowed. But dosemu and likely other
mentioned apps like X also need an
access to BDA, which is in ram:
http://stanislavs.org/helppc/bios_data_area.html
We need it only for VGA pass-through
which is rarely used, but nevertheless
I wonder why not to simply disallow
the in-kernel allocators to ever touch
that region? As I understand the above
description, they currently can even
overwrite the BDA?

Please sign in to comment.
You can’t perform that action at this time.