Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deadlock between mm_sem and tx assign in zfs_write() and page fault #7939

Merged
merged 6 commits into from
Oct 16, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/spl/sys/uio.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ typedef struct uio {
int uio_iovcnt;
offset_t uio_loffset;
uio_seg_t uio_segflg;
boolean_t uio_fault_disable;
uint16_t uio_fmode;
uint16_t uio_extflg;
offset_t uio_limit;
Expand Down
2 changes: 1 addition & 1 deletion include/sys/uio_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
#include <sys/uio.h>

extern int uiomove(void *, size_t, enum uio_rw, uio_t *);
extern void uio_prefaultpages(ssize_t, uio_t *);
extern int uio_prefaultpages(ssize_t, uio_t *);
extern int uiocopy(void *, size_t, enum uio_rw, uio_t *, size_t *);
extern void uioskip(uio_t *, size_t);

Expand Down
31 changes: 25 additions & 6 deletions module/zcommon/zfs_uio.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
#include <sys/sysmacros.h>
#include <sys/strings.h>
#include <linux/kmap_compat.h>
#include <linux/uaccess.h>

/*
* Move "n" bytes at byte address "p"; "rw" indicates the direction
Expand Down Expand Up @@ -79,8 +80,24 @@ uiomove_iov(void *p, size_t n, enum uio_rw rw, struct uio *uio)
if (copy_to_user(iov->iov_base+skip, p, cnt))
return (EFAULT);
} else {
if (copy_from_user(p, iov->iov_base+skip, cnt))
return (EFAULT);
if (uio->uio_fault_disable) {
if (!access_ok(VERIFY_READ,
(iov->iov_base + skip), cnt)) {
return (EFAULT);
behlendorf marked this conversation as resolved.
Show resolved Hide resolved
}

pagefault_disable();
if (__copy_from_user_inatomic(p,
(iov->iov_base + skip), cnt)) {
pagefault_enable();
return (EFAULT);
behlendorf marked this conversation as resolved.
Show resolved Hide resolved
behlendorf marked this conversation as resolved.
Show resolved Hide resolved
}
pagefault_enable();
} else {
if (copy_from_user(p,
(iov->iov_base + skip), cnt))
return (EFAULT);
}
}
break;
case UIO_SYSSPACE:
Expand Down Expand Up @@ -158,7 +175,7 @@ EXPORT_SYMBOL(uiomove);
* error will terminate the process as this is only a best attempt to get
* the pages resident.
*/
void
int
uio_prefaultpages(ssize_t n, struct uio *uio)
{
const struct iovec *iov;
Expand All @@ -172,7 +189,7 @@ uio_prefaultpages(ssize_t n, struct uio *uio)
switch (uio->uio_segflg) {
case UIO_SYSSPACE:
case UIO_BVEC:
return;
return (0);
case UIO_USERSPACE:
case UIO_USERISPACE:
break;
Expand All @@ -196,7 +213,7 @@ uio_prefaultpages(ssize_t n, struct uio *uio)
p = iov->iov_base + skip;
while (cnt) {
if (fuword8((uint8_t *)p, &tmp))
return;
return (EFAULT);
incr = MIN(cnt, PAGESIZE);
p += incr;
cnt -= incr;
Expand All @@ -206,8 +223,10 @@ uio_prefaultpages(ssize_t n, struct uio *uio)
*/
p--;
if (fuword8((uint8_t *)p, &tmp))
return;
return (EFAULT);
}

return (0);
}
EXPORT_SYMBOL(uio_prefaultpages);

Expand Down
24 changes: 21 additions & 3 deletions module/zfs/zfs_vnops.c
Original file line number Diff line number Diff line change
Expand Up @@ -650,7 +650,10 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
xuio = (xuio_t *)uio;
else
#endif
uio_prefaultpages(MIN(n, max_blksz), uio);
if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
ZFS_EXIT(zfsvfs);
return (SET_ERROR(EFAULT));
wgqimut marked this conversation as resolved.
Show resolved Hide resolved
}

rl_t *rl;

Expand Down Expand Up @@ -809,8 +812,19 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
ssize_t tx_bytes;
if (abuf == NULL) {
tx_bytes = uio->uio_resid;
uio->uio_fault_disable = B_TRUE;
error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
uio, nbytes, tx);
if (error == EFAULT) {
dmu_tx_commit(tx);
behlendorf marked this conversation as resolved.
Show resolved Hide resolved
if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
break;
}
continue;
} else if (error != 0) {
dmu_tx_commit(tx);
break;
wgqimut marked this conversation as resolved.
Show resolved Hide resolved
}
tx_bytes -= uio->uio_resid;
} else {
tx_bytes = nbytes;
Expand Down Expand Up @@ -910,8 +924,12 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
ASSERT(tx_bytes == nbytes);
n -= nbytes;

if (!xuio && n > 0)
uio_prefaultpages(MIN(n, max_blksz), uio);
if (!xuio && n > 0) {
if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
error = EFAULT;
break;
}
}
}

zfs_inode_update(zp);
Expand Down
140 changes: 99 additions & 41 deletions tests/zfs-tests/cmd/mmapwrite/mmapwrite.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,74 +31,132 @@
#include <string.h>
#include <sys/mman.h>
#include <pthread.h>
#include <errno.h>
#include <err.h>

/*
* --------------------------------------------------------------------
* Bug Id: 5032643
* Bug Issue Id: #7512
* The bug time sequence:
* 1. context #1, zfs_write assign a txg "n".
* 2. In the same process, context #2, mmap page fault (which means the mm_sem
* is hold) occurred, zfs_dirty_inode open a txg failed, and wait previous
* txg "n" completed.
* 3. context #1 call uiomove to write, however page fault is occurred in
* uiomove, which means it need mm_sem, but mm_sem is hold by
* context #2, so it stuck and can't complete, then txg "n" will not
* complete.
*
* Simply writing to a file and mmaping that file at the same time can
* result in deadlock. Nothing perverse like writing from the file's
* own mapping is required.
* So context #1 and context #2 trap into the "dead lock".
* --------------------------------------------------------------------
*/

#define NORMAL_WRITE_TH_NUM 2

static void *
mapper(void *fdp)
normal_writer(void *filename)
{
void *addr;
int fd = *(int *)fdp;
char *file_path = filename;
int fd = -1;
ssize_t write_num = 0;
int page_size = getpagesize();

if ((addr =
mmap(0, 8192, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) {
perror("mmap");
exit(1);
fd = open(file_path, O_RDWR | O_CREAT, 0777);
if (fd == -1) {
err(1, "failed to open %s", file_path);
}
for (;;) {
if (mmap(addr, 8192, PROT_READ,
MAP_SHARED|MAP_FIXED, fd, 0) == MAP_FAILED) {
perror("mmap");
exit(1);

char *buf = malloc(1);
while (1) {
write_num = write(fd, buf, 1);
if (write_num == 0) {
err(1, "write failed!");
break;
}
lseek(fd, page_size, SEEK_CUR);
}

if (buf) {
free(buf);
}
/* NOTREACHED */
return ((void *)1);
}

int
main(int argc, char **argv)
static void *
map_writer(void *filename)
{
int fd;
char buf[1024];
pthread_t tid;
int fd = -1;
int ret = 0;
char *buf = NULL;
int page_size = getpagesize();
int op_errno = 0;
char *file_path = filename;

memset(buf, 'a', sizeof (buf));
while (1) {
ret = access(file_path, F_OK);
if (ret) {
op_errno = errno;
if (op_errno == ENOENT) {
fd = open(file_path, O_RDWR | O_CREAT, 0777);
if (fd == -1) {
err(1, "open file failed");
}

if (argc != 2) {
(void) printf("usage: %s <file name>\n", argv[0]);
exit(1);
}
ret = ftruncate(fd, page_size);
if (ret == -1) {
err(1, "truncate file failed");
}
} else {
err(1, "access file failed!");
}
} else {
fd = open(file_path, O_RDWR, 0777);
if (fd == -1) {
err(1, "open file failed");
}
}

if ((fd = open(argv[1], O_RDWR|O_CREAT|O_TRUNC, 0666)) == -1) {
perror("open");
exit(1);
if ((buf = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0)) == MAP_FAILED) {
err(1, "map file failed");
}

if (fd != -1)
close(fd);

char s[10] = {0, };
memcpy(buf, s, 10);
ret = munmap(buf, page_size);
if (ret != 0) {
err(1, "unmap file failed");
}
}
}

(void) pthread_setconcurrency(2);
if (pthread_create(&tid, NULL, mapper, &fd) != 0) {
perror("pthread_create");
close(fd);
int
main(int argc, char **argv)
{
pthread_t map_write_tid;
pthread_t normal_write_tid[NORMAL_WRITE_TH_NUM];
int i = 0;

if (argc != 3) {
(void) printf("usage: %s <normal write file name>"
"<map write file name>\n", argv[0]);
exit(1);
}
for (;;) {
if (write(fd, buf, sizeof (buf)) == -1) {
perror("write");
close(fd);
exit(1);

for (i = 0; i < NORMAL_WRITE_TH_NUM; i++) {
if (pthread_create(&normal_write_tid[i], NULL, normal_writer,
argv[1])) {
err(1, "pthread_create normal_writer failed.");
}
}

close(fd);
if (pthread_create(&map_write_tid, NULL, map_writer, argv[2])) {
err(1, "pthread_create map_writer failed.");
}

/* NOTREACHED */
pthread_join(map_write_tid, NULL);
return (0);
}
8 changes: 5 additions & 3 deletions tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,14 @@ if ! is_mp; then
fi

log_must chmod 777 $TESTDIR
mmapwrite $TESTDIR/test-write-file &
mmapwrite $TESTDIR/normal_write_file $TESTDIR/map_write_file &
PID_MMAPWRITE=$!
log_note "mmapwrite $TESTDIR/test-write-file pid: $PID_MMAPWRITE"
log_note "mmapwrite $TESTDIR/normal_write_file $TESTDIR/map_write_file"\
"pid: $PID_MMAPWRITE"
log_must sleep 30

log_must kill -9 $PID_MMAPWRITE
log_must ls -l $TESTDIR/test-write-file
log_must ls -l $TESTDIR/normal_write_file
log_must ls -l $TESTDIR/map_write_file

log_pass "write(2) a mmap(2)'ing file succeeded."