Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
block: use DAX for partition table reads
Avoid populating pagecache when the block device is in DAX mode.
Otherwise these page cache entries collide with the fsync/msync
implementation and break data durability guarantees.

Cc: Jan Kara <jack@suse.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Reported-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
  • Loading branch information
djbw committed Jan 30, 2016
1 parent 9f4736f commit d1a5f2b
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 3 deletions.
18 changes: 15 additions & 3 deletions block/partition-generic.c
Expand Up @@ -16,6 +16,7 @@
#include <linux/kmod.h>
#include <linux/ctype.h>
#include <linux/genhd.h>
#include <linux/dax.h>
#include <linux/blktrace_api.h>

#include "partitions/check.h"
Expand Down Expand Up @@ -550,13 +551,24 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
return 0;
}

unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n)
{
struct address_space *mapping = bdev->bd_inode->i_mapping;

return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
NULL);
}

unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
{
struct page *page;

page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
NULL);
/* don't populate page cache for dax capable devices */
if (IS_DAX(bdev->bd_inode))
page = read_dax_sector(bdev, n);
else
page = read_pagecache_sector(bdev, n);

if (!IS_ERR(page)) {
if (PageError(page))
goto fail;
Expand Down
20 changes: 20 additions & 0 deletions fs/dax.c
Expand Up @@ -58,6 +58,26 @@ static void dax_unmap_atomic(struct block_device *bdev,
blk_queue_exit(bdev->bd_queue);
}

struct page *read_dax_sector(struct block_device *bdev, sector_t n)
{
struct page *page = alloc_pages(GFP_KERNEL, 0);
struct blk_dax_ctl dax = {
.size = PAGE_SIZE,
.sector = n & ~((((int) PAGE_SIZE) / 512) - 1),
};
long rc;

if (!page)
return ERR_PTR(-ENOMEM);

rc = dax_map_atomic(bdev, &dax);
if (rc < 0)
return ERR_PTR(rc);
memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE);
dax_unmap_atomic(bdev, &dax);
return page;
}

/*
* dax_clear_blocks() is called from within transaction context from XFS,
* and hence this means the stack from this point must follow GFP_NOFS
Expand Down
11 changes: 11 additions & 0 deletions include/linux/dax.h
Expand Up @@ -14,6 +14,17 @@ int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
dax_iodone_t);
int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
dax_iodone_t);

#ifdef CONFIG_FS_DAX
struct page *read_dax_sector(struct block_device *bdev, sector_t n);
#else
static inline struct page *read_dax_sector(struct block_device *bdev,
sector_t n)
{
return ERR_PTR(-ENXIO);
}
#endif

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
unsigned int flags, get_block_t, dax_iodone_t);
Expand Down

0 comments on commit d1a5f2b

Please sign in to comment.