Skip to content

Commit

Permalink
fix write IO hang.
Browse files Browse the repository at this point in the history
The bug time sequence:
1. context #1, `zfs_write` assign a txg "n".
2. In a same process, context #2, mmap page fault (which means the
   `mm_sem` is hold) occurred, `zfs_dirty_inode` open a txg failed,
   and wait previous txg "n" completed.
3. context #1 call `uiomove` to write, however page fault is occurred
   in `uiomove`, which means it need `mm_sem`, but `mm_sem` is hold by
   context #2, so it stuck and can't complete,  then txg "n" will
   not complete.

So context #1 and context #2 trap into the "dead lock".

Signed-off-by: Grady Wong <grady.w@xtaotech.com>
  • Loading branch information
Grady Wong committed Oct 8, 2018
1 parent dfbe267 commit cd06b1b
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 3 deletions.
1 change: 1 addition & 0 deletions include/spl/sys/uio.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ typedef struct uio {
int uio_iovcnt;
offset_t uio_loffset;
uio_seg_t uio_segflg;
boolean_t uio_fault_disable;
uint16_t uio_fmode;
uint16_t uio_extflg;
offset_t uio_limit;
Expand Down
21 changes: 19 additions & 2 deletions module/zcommon/zfs_uio.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
#include <sys/sysmacros.h>
#include <sys/strings.h>
#include <linux/kmap_compat.h>
#include <linux/uaccess.h>

/*
* Move "n" bytes at byte address "p"; "rw" indicates the direction
Expand Down Expand Up @@ -79,8 +80,24 @@ uiomove_iov(void *p, size_t n, enum uio_rw rw, struct uio *uio)
if (copy_to_user(iov->iov_base+skip, p, cnt))
return (EFAULT);
} else {
if (copy_from_user(p, iov->iov_base+skip, cnt))
return (EFAULT);
if (uio->uio_fault_disable) {
if (!access_ok(VERIFY_READ,
(iov->iov_base + skip), cnt)) {
return (SET_ERROR(EFAULT));
}

pagefault_disable();
if (__copy_from_user_inatomic(p,
(iov->iov_base + skip), cnt)) {
pagefault_enable();
return (SET_ERROR(EFAULT));
}
pagefault_enable();
} else {
if (copy_from_user(p,
(iov->iov_base + skip), cnt))
return (SET_ERROR(EFAULT));
}
}
break;
case UIO_SYSSPACE:
Expand Down
20 changes: 19 additions & 1 deletion module/zfs/zfs_vnops.c
Original file line number Diff line number Diff line change
Expand Up @@ -809,8 +809,17 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
ssize_t tx_bytes;
if (abuf == NULL) {
tx_bytes = uio->uio_resid;
uio->uio_fault_disable = B_TRUE;
error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
uio, nbytes, tx);
if (error == EFAULT) {
dmu_tx_commit(tx);
uio_prefaultpages(MIN(n, max_blksz), uio);
continue;
} else if (error != 0) {
dmu_tx_abort(tx);
break;
}
tx_bytes -= uio->uio_resid;
} else {
tx_bytes = nbytes;
Expand Down Expand Up @@ -4636,13 +4645,22 @@ zfs_dirty_inode(struct inode *ip, int flags)
}
#endif

top:
tx = dmu_tx_create(zfsvfs->z_os);

dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
zfs_sa_upgrade_txholds(tx, zp);

error = dmu_tx_assign(tx, TXG_WAIT);
boolean_t waited = B_FALSE;
error = dmu_tx_assign(tx,
waited ? (TXG_NOTHROTTLE | TXG_WAIT) : TXG_NOWAIT);
if (error) {
if (error == ERESTART && waited == B_FALSE) {
waited = B_TRUE;
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
}
dmu_tx_abort(tx);
goto out;
}
Expand Down

0 comments on commit cd06b1b

Please sign in to comment.